diff --git a/interpreter/cling/LastKnownGoodLLVMSVNRevision.txt b/interpreter/cling/LastKnownGoodLLVMSVNRevision.txt index 612afc97d2629..273ce20a6d74d 100644 --- a/interpreter/cling/LastKnownGoodLLVMSVNRevision.txt +++ b/interpreter/cling/LastKnownGoodLLVMSVNRevision.txt @@ -1 +1 @@ -302975 +release_50 diff --git a/interpreter/cling/include/cling/Interpreter/ClingOptions.h b/interpreter/cling/include/cling/Interpreter/ClingOptions.h index 513218e47d9fc..13da700e54039 100644 --- a/interpreter/cling/include/cling/Interpreter/ClingOptions.h +++ b/interpreter/cling/include/cling/Interpreter/ClingOptions.h @@ -17,7 +17,7 @@ namespace clingoptions { OPT_INVALID = 0, // This is not an option ID. #define PREFIX(NAME, VALUE) #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR) OPT_##ID, + HELPTEXT, METAVAR, VALUES) OPT_##ID, #include "cling/Interpreter/ClingOptions.inc" LastOption #undef OPTION diff --git a/interpreter/cling/include/cling/Interpreter/ClingOptions.inc b/interpreter/cling/include/cling/Interpreter/ClingOptions.inc index c3d1a9ab5e455..deedb3d56b6ac 100644 --- a/interpreter/cling/include/cling/Interpreter/ClingOptions.inc +++ b/interpreter/cling/include/cling/Interpreter/ClingOptions.inc @@ -16,26 +16,26 @@ PREFIX(prefix_2, {"--" COMMA 0}) #error "Define OPTION prior to including this file!" #endif -OPTION(prefix_0, "", INPUT, Input, INVALID, INVALID, 0, 0, 0, 0, 0) -OPTION(prefix_0, "", UNKNOWN, Unknown, INVALID, INVALID, 0, 0, 0, 0, 0) +OPTION(prefix_0, "", INPUT, Input, INVALID, INVALID, 0, 0, 0, 0, 0, 0) +OPTION(prefix_0, "", UNKNOWN, Unknown, INVALID, INVALID, 0, 0, 0, 0, 0, 0) OPTION(prefix_2, "errorout", _errorout, Flag, INVALID, INVALID, 0, 0, 0, - "Do not recover from input errors", 0) + "Do not recover from input errors", 0, 0) OPTION(prefix_3, "help", help, Flag, INVALID, INVALID, 0, 0, 0, - "Print this help text", 0) + "Print this help text", 0, 0) OPTION(prefix_1, "L", L, JoinedOrSeparate, INVALID, INVALID, 0, 0, 0, - "Add directory to library search path", "") + "Add directory to library search path", "", 0) // Re-implement to forward to our help OPTION(prefix_1, "l", l, JoinedOrSeparate, INVALID, INVALID, 0, 0, 0, - "Load a library before prompt", "") + "Load a library before prompt", "", 0) OPTION(prefix_2, "metastr=", _metastr_EQ, Joined, INVALID, INVALID, 0, 0, 0, - "Set the meta command tag, default '.'", 0) + "Set the meta command tag, default '.'", 0, 0) OPTION(prefix_2, "metastr", _metastr, Separate, INVALID, INVALID, 0, 0, 0, - "Set the meta command tag, default '.'", 0) + "Set the meta command tag, default '.'", 0, 0) OPTION(prefix_2, "nologo", _nologo, Flag, INVALID, INVALID, 0, 0, 0, - "Do not show startup-banner", 0) + "Do not show startup-banner", 0, 0) OPTION(prefix_3, "noruntime", noruntime, Flag, INVALID, INVALID, 0, 0, 0, - "Disable runtime support (no null checking, no value printing)", 0) + "Disable runtime support (no null checking, no value printing)", 0, 0) OPTION(prefix_3, "version", version, Flag, INVALID, INVALID, 0, 0, 0, - "Print the compiler version", 0) + "Print the compiler version", 0, 0) OPTION(prefix_1, "v", v, Flag, INVALID, INVALID, 0, 0, 0, - "Enable verbose output", 0) + "Enable verbose output", 0, 0) diff --git a/interpreter/cling/lib/Interpreter/BackendPasses.cpp b/interpreter/cling/lib/Interpreter/BackendPasses.cpp index db37819abb7e2..10dc61fafe60b 100644 --- a/interpreter/cling/lib/Interpreter/BackendPasses.cpp +++ b/interpreter/cling/lib/Interpreter/BackendPasses.cpp @@ -115,7 +115,6 @@ void BackendPasses::CreatePasses(llvm::Module& M, int OptLevel) llvm::PassManagerBuilder PMBuilder; PMBuilder.OptLevel = OptLevel; PMBuilder.SizeLevel = m_CGOpts.OptimizeSize; - PMBuilder.BBVectorize = 0; // m_CGOpts.VectorizeBB; PMBuilder.SLPVectorize = OptLevel > 1 ? 1 : 0; // m_CGOpts.VectorizeSLP PMBuilder.LoopVectorize = OptLevel > 1 ? 1 : 0; // m_CGOpts.VectorizeLoop diff --git a/interpreter/cling/lib/Interpreter/DynamicLibraryManager.cpp b/interpreter/cling/lib/Interpreter/DynamicLibraryManager.cpp index ab32159e27747..1ac25789366e0 100644 --- a/interpreter/cling/lib/Interpreter/DynamicLibraryManager.cpp +++ b/interpreter/cling/lib/Interpreter/DynamicLibraryManager.cpp @@ -14,8 +14,8 @@ #include "cling/Utils/Platform.h" #include "cling/Utils/Output.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include @@ -64,7 +64,7 @@ namespace cling { DynamicLibraryManager::~DynamicLibraryManager() {} static bool isSharedLib(llvm::StringRef LibName, bool* exists = 0) { - using namespace llvm::sys::fs; + using namespace llvm; file_magic Magic; const std::error_code Error = identify_magic(LibName, Magic); if (exists) diff --git a/interpreter/cling/lib/Interpreter/IncrementalExecutor.h b/interpreter/cling/lib/Interpreter/IncrementalExecutor.h index f8bec764553f8..61427482f760c 100644 --- a/interpreter/cling/lib/Interpreter/IncrementalExecutor.h +++ b/interpreter/cling/lib/Interpreter/IncrementalExecutor.h @@ -165,8 +165,9 @@ namespace cling { ///\brief Unload a set of JIT symbols. bool unloadModule(const std::shared_ptr& M) { - m_JIT->removeModule(M); - // FIXME: Propagate if we removed a module or not. + // FIXME: Propagate the error in a more verbose way. + if (auto Err = m_JIT->removeModule(M)) + return false; return true; } diff --git a/interpreter/cling/lib/Interpreter/IncrementalJIT.cpp b/interpreter/cling/lib/Interpreter/IncrementalJIT.cpp index 5c7cd52b5718f..963bbeb45febd 100644 --- a/interpreter/cling/lib/Interpreter/IncrementalJIT.cpp +++ b/interpreter/cling/lib/Interpreter/IncrementalJIT.cpp @@ -13,6 +13,7 @@ #include "cling/Utils/Platform.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/Support/DynamicLibrary.h" #ifdef __APPLE__ @@ -41,7 +42,7 @@ class ClingMemoryManager: public SectionMemoryManager { class NotifyFinalizedT { public: NotifyFinalizedT(cling::IncrementalJIT &jit) : m_JIT(jit) {} - void operator()(llvm::orc::RTDyldObjectLinkingLayerBase::ObjSetHandleT H) { + void operator()(llvm::orc::RTDyldObjectLinkingLayerBase::ObjHandleT H) { m_JIT.RemoveUnfinalizedSection(H); } @@ -220,9 +221,16 @@ class Azog: public RTDyldMemoryManager { } uint64_t getSymbolAddress(const std::string &Name) override { - return m_jit.getSymbolAddressWithoutMangling(Name, - true /*also use dlsym*/) - .getAddress(); + // FIXME: We should decide if we want to handle the error here or make the + // return type of the function llvm::Expected relying on the + // users to decide how to handle the error. + if (auto Addr = m_jit.getSymbolAddressWithoutMangling(Name, + true /*also use dlsym*/) + .getAddress()) + return *Addr; + + llvm_unreachable("Handle the error case"); + return ~0U; } void *getPointerToNamedFunction(const std::string &Name, @@ -267,12 +275,29 @@ IncrementalJIT::IncrementalJIT(IncrementalExecutor& exe, m_Parent(exe), m_TM(std::move(TM)), m_TMDataLayout(m_TM->createDataLayout()), - m_ExeMM(llvm::make_unique(m_Parent)), + m_ExeMM(std::make_shared(m_Parent)), m_NotifyObjectLoaded(*this), - m_ObjectLayer(m_SymbolMap, m_NotifyObjectLoaded, NotifyFinalizedT(*this)), + m_ObjectLayer(m_SymbolMap, [this] () { return llvm::make_unique(*this); }, + m_NotifyObjectLoaded, NotifyFinalizedT(*this)), m_CompileLayer(m_ObjectLayer, llvm::orc::SimpleCompiler(*m_TM)), m_LazyEmitLayer(m_CompileLayer) { + // Force the JIT to query for symbols local to itself, i.e. if it resides in a + // shared library it will resolve symbols from there first. This is done to + // implement our proto symbol versioning protection. Namely, if some other + // library provides llvm symbols, we want out JIT to avoid looking at them. + // + // FIXME: In general, this approach causes numerous issues when cling is + // embedded and the framework needs to provide its own set of symbols which + // exist in llvm. Most notably if the framework links against different + // versions of linked against llvm libraries. For instance, if we want to provide + // a custom zlib in the framework the JIT will still resolve to llvm's version + // of libz causing hard-to-debug bugs. In order to work around such cases we + // need to swap the llvm system libraries, which can be tricky for two + // reasons: (a) llvm's cmake doesn't really support it; (b) only works if we + // build llvm from sources. + llvm::sys::DynamicLibrary::SearchOrder + = llvm::sys::DynamicLibrary::SO_LoadedFirst; // Enable JIT symbol resolution from the binary. llvm::sys::DynamicLibrary::LoadLibraryPermanently(0, 0); @@ -343,9 +368,12 @@ IncrementalJIT::getSymbolAddressWithoutMangling(const std::string& Name, return Sym; if (AlsoInProcess) { - if (llvm::JITSymbol SymInfo = m_ExeMM->findSymbol(Name)) - return llvm::JITSymbol(SymInfo.getAddress(), - llvm::JITSymbolFlags::Exported); + if (llvm::JITSymbol SymInfo = m_ExeMM->findSymbol(Name)) { + if (auto AddrOrErr = SymInfo.getAddress()) + return llvm::JITSymbol(*AddrOrErr, llvm::JITSymbolFlags::Exported); + else + llvm_unreachable("Handle the error case"); + } #ifdef LLVM_ON_WIN32 // FIXME: DLSym symbol lookup can overlap m_ExeMM->findSymbol wasting time // looking for a symbol in libs where it is already known not to exist. @@ -374,13 +402,21 @@ void IncrementalJIT::addModule(const std::shared_ptr& module) { // LLVM MERGE FIXME: update this to use new interfaces. auto Resolver = llvm::orc::createLambdaResolver( [&](const std::string &S) { - if (auto Sym = getInjectedSymbols(S)) - return JITSymbol((uint64_t)Sym.getAddress(), Sym.getFlags()); + if (auto Sym = getInjectedSymbols(S)) { + if (auto AddrOrErr = Sym.getAddress()) + return JITSymbol((uint64_t)*AddrOrErr, Sym.getFlags()); + else + llvm_unreachable("Handle the error case"); + } return m_ExeMM->findSymbol(S); }, [&](const std::string &Name) { - if (auto Sym = getSymbolAddressWithoutMangling(Name, true)) - return JITSymbol(Sym.getAddress(), Sym.getFlags()); + if (auto Sym = getSymbolAddressWithoutMangling(Name, true)) { + if (auto AddrOrErr = Sym.getAddress()) + return JITSymbol(*AddrOrErr, Sym.getFlags()); + else + llvm_unreachable("Handle the error case"); + } const std::string* NameNP = &Name; #ifdef MANGLE_PREFIX @@ -401,25 +437,23 @@ void IncrementalJIT::addModule(const std::shared_ptr& module) { return JITSymbol(addr, llvm::JITSymbolFlags::Weak); }); - std::vector moduleSet; - moduleSet.push_back(module.get()); - ModuleSetHandleT MSHandle = - m_LazyEmitLayer.addModuleSet(std::move(moduleSet), - llvm::make_unique(*this), - std::move(Resolver)); - m_UnloadPoints[module.get()] = MSHandle; + if (auto H = m_LazyEmitLayer.addModule(module, std::move(Resolver))) + m_UnloadPoints[module.get()] = *H; + else + llvm_unreachable("Handle the error case"); } -void IncrementalJIT::removeModule(const std::shared_ptr& module) { +llvm::Error +IncrementalJIT::removeModule(const std::shared_ptr& module) { // FIXME: Track down what calls this routine on a not-yet-added module. Once // this is resolved we can remove this check enabling the assert. auto IUnload = m_UnloadPoints.find(module.get()); if (IUnload == m_UnloadPoints.end()) - return; + return llvm::Error::success(); auto Handle = IUnload->second; assert(*Handle && "Trying to remove a non existent module!"); m_UnloadPoints.erase(IUnload); - m_LazyEmitLayer.removeModuleSet(Handle); + return m_LazyEmitLayer.removeModule(Handle); } }// end namespace cling diff --git a/interpreter/cling/lib/Interpreter/IncrementalJIT.h b/interpreter/cling/lib/Interpreter/IncrementalJIT.h index 3ef0742ef1deb..6ef575e5b7b0f 100644 --- a/interpreter/cling/lib/Interpreter/IncrementalJIT.h +++ b/interpreter/cling/lib/Interpreter/IncrementalJIT.h @@ -53,46 +53,41 @@ class IncrementalJIT { class NotifyObjectLoadedT { public: - typedef std::vector>> ObjListT; - typedef std::vector> - LoadedObjInfoListT; - NotifyObjectLoadedT(IncrementalJIT &jit) : m_JIT(jit) {} - - void operator()(llvm::orc::RTDyldObjectLinkingLayerBase::ObjSetHandleT H, - const ObjListT &Objects, - const LoadedObjInfoListT &Infos) const - { + void operator()(llvm::orc::RTDyldObjectLinkingLayerBase::ObjHandleT H, + const llvm::orc::RTDyldObjectLinkingLayer::ObjectPtr &Object, + const llvm::LoadedObjectInfo &Info) const { m_JIT.m_UnfinalizedSections[H] = std::move(m_JIT.m_SectionsAllocatedSinceLastLoad); m_JIT.m_SectionsAllocatedSinceLastLoad = SectionAddrSet(); - assert(Objects.size() == Infos.size() && - "Incorrect number of Infos for Objects."); - if (auto GDBListener = m_JIT.m_GDBListener) { - for (size_t I = 0, N = Objects.size(); I < N; ++I) - GDBListener->NotifyObjectEmitted(*Objects[I]->getBinary(), - *Infos[I]); - } - for (const auto &Object: Objects) { - for (const auto &Symbol: Object->getBinary()->symbols()) { - auto Flags = Symbol.getFlags(); - if (Flags & llvm::object::BasicSymbolRef::SF_Undefined) - continue; - // FIXME: this should be uncommented once we serve incremental - // modules from a TU module. - //if (!(Flags & llvm::object::BasicSymbolRef::SF_Exported)) - // continue; - auto NameOrError = Symbol.getName(); - if (!NameOrError) - continue; - auto Name = NameOrError.get(); - if (m_JIT.m_SymbolMap.find(Name) == m_JIT.m_SymbolMap.end()) { - llvm::JITSymbol Sym - = m_JIT.m_CompileLayer.findSymbolIn(H, Name, true); - if (llvm::JITTargetAddress Addr = Sym.getAddress()) - m_JIT.m_SymbolMap[Name] = Addr; - } + // FIXME: NotifyObjectEmitted requires a RuntimeDyld::LoadedObjectInfo + // object. In order to get it one should call + // RTDyld.loadObject(*ObjToLoad->getBinary()) according to r306058. + // Moreover this should be done in the finalizer. Currently we are + // disabling this since we have globally disabled this functionality in + // IncrementalJIT.cpp (m_GDBListener = 0). + // + // if (auto GDBListener = m_JIT.m_GDBListener) + // GDBListener->NotifyObjectEmitted(*Object->getBinary(), Info); + + for (const auto &Symbol: Object->getBinary()->symbols()) { + auto Flags = Symbol.getFlags(); + if (Flags & llvm::object::BasicSymbolRef::SF_Undefined) + continue; + // FIXME: this should be uncommented once we serve incremental + // modules from a TU module. + //if (!(Flags & llvm::object::BasicSymbolRef::SF_Exported)) + // continue; + auto NameOrError = Symbol.getName(); + if (!NameOrError) + continue; + auto Name = NameOrError.get(); + if (m_JIT.m_SymbolMap.find(Name) == m_JIT.m_SymbolMap.end()) { + llvm::JITSymbol Sym + = m_JIT.m_CompileLayer.findSymbolIn(H, Name, true); + if (auto Addr = Sym.getAddress()) + m_JIT.m_SymbolMap[Name] = *Addr; } } } @@ -100,22 +95,21 @@ class IncrementalJIT { private: IncrementalJIT &m_JIT; }; - class RemovableObjectLinkingLayer: - public llvm::orc::RTDyldObjectLinkingLayer { + public llvm::orc::RTDyldObjectLinkingLayer { public: - using Base_t = llvm::orc::RTDyldObjectLinkingLayer; - using NotifyLoadedFtor = NotifyObjectLoadedT; + using Base_t = llvm::orc::RTDyldObjectLinkingLayer; using NotifyFinalizedFtor = Base_t::NotifyFinalizedFtor; RemovableObjectLinkingLayer(SymbolMapT &SymMap, + Base_t::MemoryManagerGetter MM, NotifyObjectLoadedT NotifyLoaded, - NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor()): - Base_t(NotifyLoaded, NotifyFinalized), m_SymbolMap(SymMap) + NotifyFinalizedFtor NotifyFinalized) + : Base_t(MM, NotifyLoaded, NotifyFinalized), m_SymbolMap(SymMap) {} - void - removeObjectSet(llvm::orc::RTDyldObjectLinkingLayerBase::ObjSetHandleT H) { - struct AccessSymbolTable: public LinkedObjectSet { + llvm::Error + removeObject(llvm::orc::RTDyldObjectLinkingLayerBase::ObjHandleT H) { + struct AccessSymbolTable: public LinkedObject { const llvm::StringMap& getSymbolTable() const { return SymbolTable; @@ -131,23 +125,24 @@ class IncrementalJIT { if (iterSymMap->second == NameSym.second.getAddress()) m_SymbolMap.erase(iterSymMap); } - llvm::orc::RTDyldObjectLinkingLayer::removeObjectSet(H); + return llvm::orc::RTDyldObjectLinkingLayer::removeObject(H); } private: SymbolMapT& m_SymbolMap; }; typedef RemovableObjectLinkingLayer ObjectLayerT; - typedef llvm::orc::IRCompileLayer CompileLayerT; + typedef llvm::orc::IRCompileLayer CompileLayerT; typedef llvm::orc::LazyEmittingLayer LazyEmitLayerT; - typedef LazyEmitLayerT::ModuleSetHandleT ModuleSetHandleT; + typedef LazyEmitLayerT::ModuleHandleT ModuleHandleT; std::unique_ptr m_TM; llvm::DataLayout m_TMDataLayout; ///\brief The RTDyldMemoryManager used to communicate with the /// IncrementalExecutor to handle missing or special symbols. - std::unique_ptr m_ExeMM; + std::shared_ptr m_ExeMM; NotifyObjectLoadedT m_NotifyObjectLoaded; @@ -155,22 +150,22 @@ class IncrementalJIT { CompileLayerT m_CompileLayer; LazyEmitLayerT m_LazyEmitLayer; - // We need to store ObjLayerT::ObjSetHandles for each of the object sets + // We need to store ObjLayerT::ObjHandles for each of the object sets // that have been emitted but not yet finalized so that we can forward the // mapSectionAddress calls appropriately. typedef std::set SectionAddrSet; - struct ObjSetHandleCompare { - bool operator()(ObjectLayerT::ObjSetHandleT H1, - ObjectLayerT::ObjSetHandleT H2) const { + struct ObjHandleCompare { + bool operator()(ObjectLayerT::ObjHandleT H1, + ObjectLayerT::ObjHandleT H2) const { return &*H1 < &*H2; } }; SectionAddrSet m_SectionsAllocatedSinceLastLoad; - std::map + std::map m_UnfinalizedSections; - ///\brief Mapping between \c llvm::Module* and \c ModuleSetHandleT. - std::map m_UnloadPoints; + ///\brief Mapping between \c llvm::Module* and \c ModuleHandleT. + std::map m_UnloadPoints; std::string Mangle(llvm::StringRef Name) { stdstrstream MangledName; @@ -192,22 +187,31 @@ class IncrementalJIT { /// \param AlsoInProcess - Sometimes you only care about JITed symbols. If so, /// pass `false` here to not resolve the symbol through dlsym(). uint64_t getSymbolAddress(const std::string& Name, bool AlsoInProcess) { - return getSymbolAddressWithoutMangling(Mangle(Name), AlsoInProcess) - .getAddress(); + // FIXME: We should decide if we want to handle the error here or make the + // return type of the function llvm::Expected relying on the + // users to decide how to handle the error. + if (auto S = getSymbolAddressWithoutMangling(Mangle(Name), AlsoInProcess)) { + if (auto AddrOrErr = S.getAddress()) + return *AddrOrErr; + else + llvm_unreachable("Handle the error case"); + } + + return 0; } ///\brief Get the address of a symbol from the JIT or the memory manager. /// Use this to resolve symbols of known, target-specific names. llvm::JITSymbol getSymbolAddressWithoutMangling(const std::string& Name, - bool AlsoInProcess); + bool AlsoInProcess); void addModule(const std::shared_ptr& module); - void removeModule(const std::shared_ptr& module); + llvm::Error removeModule(const std::shared_ptr& module); IncrementalExecutor& getParent() const { return m_Parent; } void RemoveUnfinalizedSection( - llvm::orc::RTDyldObjectLinkingLayerBase::ObjSetHandleT H) { + llvm::orc::RTDyldObjectLinkingLayerBase::ObjHandleT H) { m_UnfinalizedSections.erase(H); } diff --git a/interpreter/cling/lib/Interpreter/Interpreter.cpp b/interpreter/cling/lib/Interpreter/Interpreter.cpp index 6b1ae720d34d1..80cda212ef1c3 100644 --- a/interpreter/cling/lib/Interpreter/Interpreter.cpp +++ b/interpreter/cling/lib/Interpreter/Interpreter.cpp @@ -202,7 +202,7 @@ namespace cling { // Initialize the opt level to what CodeGenOpts says. if (m_OptLevel == -1) - m_OptLevel = getCI()->getCodeGenOpts().OptimizationLevel; + setDefaultOptLevel(getCI()->getCodeGenOpts().OptimizationLevel); Sema& SemaRef = getSema(); Preprocessor& PP = SemaRef.getPreprocessor(); diff --git a/interpreter/cling/lib/Interpreter/InterpreterCallbacks.cpp b/interpreter/cling/lib/Interpreter/InterpreterCallbacks.cpp index 6768192c74afb..bf9c32508269c 100644 --- a/interpreter/cling/lib/Interpreter/InterpreterCallbacks.cpp +++ b/interpreter/cling/lib/Interpreter/InterpreterCallbacks.cpp @@ -214,7 +214,7 @@ namespace cling { std::vector> Consumers; Consumers.push_back(std::move(wrapper)); - Consumers.push_back(std::move(m_Interpreter->getCI()->takeASTConsumer())); + Consumers.push_back(m_Interpreter->getCI()->takeASTConsumer()); std::unique_ptr multiConsumer( new clang::MultiplexConsumer(std::move(Consumers))); diff --git a/interpreter/cling/lib/Interpreter/InvocationOptions.cpp b/interpreter/cling/lib/Interpreter/InvocationOptions.cpp index 752542672d348..f3d7123e6dbad 100644 --- a/interpreter/cling/lib/Interpreter/InvocationOptions.cpp +++ b/interpreter/cling/lib/Interpreter/InvocationOptions.cpp @@ -42,7 +42,7 @@ static const char kNoStdInc[] = "-nostdinc"; #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR) + HELPTEXT, METAVAR, VALUES) #include "cling/Interpreter/ClingOptions.inc" #undef OPTION #undef PREFIX @@ -50,9 +50,9 @@ static const char kNoStdInc[] = "-nostdinc"; static const OptTable::Info ClingInfoTable[] = { #define PREFIX(NAME, VALUE) #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR) \ + HELPTEXT, METAVAR, VALUES) \ { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, Option::KIND##Class, PARAM, \ - FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, + FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS, VALUES }, #include "cling/Interpreter/ClingOptions.inc" #undef OPTION #undef PREFIX diff --git a/interpreter/cling/lib/Interpreter/LookupHelper.cpp b/interpreter/cling/lib/Interpreter/LookupHelper.cpp index b28370cf860b4..4df32ba572a0f 100644 --- a/interpreter/cling/lib/Interpreter/LookupHelper.cpp +++ b/interpreter/cling/lib/Interpreter/LookupHelper.cpp @@ -753,7 +753,7 @@ namespace cling { P.getCurToken().getAnnotationRange(), SS); if (SS.isValid()) { - P.ConsumeToken(); + P.ConsumeAnyToken(); if (!P.getCurToken().is(clang::tok::identifier)) { return 0; } diff --git a/interpreter/cling/lib/MetaProcessor/MetaProcessor.cpp b/interpreter/cling/lib/MetaProcessor/MetaProcessor.cpp index 8ea99ff360018..0b09db95a5c7b 100644 --- a/interpreter/cling/lib/MetaProcessor/MetaProcessor.cpp +++ b/interpreter/cling/lib/MetaProcessor/MetaProcessor.cpp @@ -22,6 +22,7 @@ #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Support/Path.h" #include @@ -378,9 +379,9 @@ namespace cling { // heuristic unreliable. if (!in.fail() && readMagic >= 300) { llvm::StringRef magicStr(magic,in.gcount()); - llvm::sys::fs::file_magic fileType - = llvm::sys::fs::identify_magic(magicStr); - if (fileType != llvm::sys::fs::file_magic::unknown) + llvm::file_magic fileType + = llvm::identify_magic(magicStr); + if (fileType != llvm::file_magic::unknown) return reportIOErr(filename, "read from binary"); unsigned printable = 0; diff --git a/interpreter/llvm/src/CMakeLists.txt b/interpreter/llvm/src/CMakeLists.txt index 7769f1834b8e5..8c0f511451397 100644 --- a/interpreter/llvm/src/CMakeLists.txt +++ b/interpreter/llvm/src/CMakeLists.txt @@ -29,7 +29,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH) set(LLVM_VERSION_PATCH 0) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) - set(LLVM_VERSION_SUFFIX svn) + set(LLVM_VERSION_SUFFIX "") endif() if (POLICY CMP0048) @@ -44,6 +44,13 @@ if (NOT PACKAGE_VERSION) "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") endif() +if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (CMAKE_GENERATOR_TOOLSET STREQUAL "")) + message(WARNING "Visual Studio generators use the x86 host compiler by " + "default, even for 64-bit targets. This can result in linker " + "instability and out of memory errors. To use the 64-bit " + "host compiler, pass -Thost=x64 on the CMake command line.") +endif() + project(LLVM ${cmake_3_0_PROJ_VERSION} ${cmake_3_0_LANGUAGES} @@ -87,7 +94,7 @@ if(CMAKE_HOST_APPLE AND APPLE) set(LIBTOOL_NO_WARNING_FLAG "-no_warning_for_no_symbols") endif() endif() - + foreach(lang ${languages}) set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${CMAKE_LIBTOOL} -static ${LIBTOOL_NO_WARNING_FLAG} -o \ @@ -199,7 +206,7 @@ endif() include(VersionFromVCS) option(LLVM_APPEND_VC_REV - "Append the version control system revision id to LLVM version" OFF) + "Embed the version control system revision id in LLVM" ON) if( LLVM_APPEND_VC_REV ) add_version_info_from_vcs(PACKAGE_VERSION) @@ -281,6 +288,10 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name set(LLVM_TOOLS_INSTALL_DIR "bin" CACHE STRING "Path for binary subdirectory (defaults to 'bin')") mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) +set(LLVM_UTILS_INSTALL_DIR "bin" CACHE STRING + "Path to install LLVM utilities (enabled by LLVM_INSTALL_UTILS=ON) (defaults to LLVM_TOOLS_INSTALL_DIR)") +mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) + # They are used as destination of target generators. set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX}) @@ -303,6 +314,7 @@ set(LLVM_CMAKE_PATH ${LLVM_MAIN_SRC_DIR}/cmake/modules) set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples) set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) +# List of all targets to be built by default: set(LLVM_ALL_TARGETS AArch64 AMDGPU @@ -314,7 +326,6 @@ set(LLVM_ALL_TARGETS MSP430 NVPTX PowerPC - RISCV Sparc SystemZ X86 @@ -563,6 +574,10 @@ if (LLVM_BUILD_STATIC) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") endif() +# Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +set(LLVM_TARGET_TRIPLE_ENV CACHE STRING "The name of environment variable to override default target. Disabled by blank.") +mark_as_advanced(LLVM_TARGET_TRIPLE_ENV) + # All options referred to from HandleLLVMOptions have to be specified # BEFORE this include, otherwise options will not be correctly set on # first cmake run @@ -793,7 +808,8 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include llvm/Support/Solaris.h") + # special hack for Solaris to handle crazy system sys/regset.h + include_directories("${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/Solaris") endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) # Make sure we don't get -rdynamic in every binary. For those that need it, diff --git a/interpreter/llvm/src/CODE_OWNERS.TXT b/interpreter/llvm/src/CODE_OWNERS.TXT index ec4561d991693..619844256ada5 100644 --- a/interpreter/llvm/src/CODE_OWNERS.TXT +++ b/interpreter/llvm/src/CODE_OWNERS.TXT @@ -70,7 +70,7 @@ D: Branch weights and BlockFrequencyInfo N: Hal Finkel E: hfinkel@anl.gov -D: BBVectorize, the loop reroller, alias analysis and the PowerPC target +D: The loop reroller, alias analysis and the PowerPC target N: Dan Gohman E: sunfish@mozilla.com @@ -195,6 +195,7 @@ D: MemorySanitizer (LLVM part) N: Craig Topper E: craig.topper@gmail.com +E: craig.topper@intel.com D: X86 Backend N: Ulrich Weigand diff --git a/interpreter/llvm/src/CREDITS.TXT b/interpreter/llvm/src/CREDITS.TXT index 15d822a680911..bfc3482e4099e 100644 --- a/interpreter/llvm/src/CREDITS.TXT +++ b/interpreter/llvm/src/CREDITS.TXT @@ -220,7 +220,7 @@ W: http://randomhacks.net/ D: llvm-config script N: Anton Korobeynikov -E: asl@math.spbu.ru +E: anton at korobeynikov dot info D: Mingw32 fixes, cross-compiling support, stdcall/fastcall calling conv. D: x86/linux PIC codegen, aliases, regparm/visibility attributes D: Switch lowering refactoring @@ -265,7 +265,7 @@ D: Release manager (1.7+) N: Sylvestre Ledru E: sylvestre@debian.org W: http://sylvestre.ledru.info/ -W: http://llvm.org/apt/ +W: http://apt.llvm.org/ D: Debian and Ubuntu packaging D: Continuous integration with jenkins @@ -318,11 +318,12 @@ D: Support for implicit TLS model used with MS VC runtime D: Dumping of Win64 EH structures N: Takumi Nakamura +I: chapuni E: geek4civic@gmail.com E: chapuni@hf.rim.or.jp -D: Cygwin and MinGW support. -D: Win32 tweaks. -S: Yokohama, Japan +D: Maintaining the Git monorepo +W: https://github.com/llvm-project/ +S: Ebina, Japan N: Edward O'Callaghan E: eocallaghan@auroraux.org diff --git a/interpreter/llvm/src/RELEASE_TESTERS.TXT b/interpreter/llvm/src/RELEASE_TESTERS.TXT index 7bfa88c6cf0e8..9a01c725fb511 100644 --- a/interpreter/llvm/src/RELEASE_TESTERS.TXT +++ b/interpreter/llvm/src/RELEASE_TESTERS.TXT @@ -41,14 +41,9 @@ E: hans@chromium.org T: x86 O: Windows -N: Renato Golin -E: renato.golin@linaro.org -T: ARM -O: Linux - N: Diana Picus E: diana.picus@linaro.org -T: AArch64 +T: ARM, AArch64 O: Linux N: Simon Dardis diff --git a/interpreter/llvm/src/bindings/go/llvm/ir.go b/interpreter/llvm/src/bindings/go/llvm/ir.go index fe191beb38132..2220970343071 100644 --- a/interpreter/llvm/src/bindings/go/llvm/ir.go +++ b/interpreter/llvm/src/bindings/go/llvm/ir.go @@ -611,6 +611,12 @@ func (t Type) StructElementTypes() []Type { } // Operations on array, pointer, and vector types (sequence types) +func (t Type) Subtypes() (ret []Type) { + ret = make([]Type, C.LLVMGetNumContainedTypes(t.C)) + C.LLVMGetSubtypes(t.C, llvmTypeRefPtr(&ret[0])) + return +} + func ArrayType(elementType Type, elementCount int) (t Type) { t.C = C.LLVMArrayType(elementType.C, C.unsigned(elementCount)) return diff --git a/interpreter/llvm/src/bindings/go/llvm/ir_test.go b/interpreter/llvm/src/bindings/go/llvm/ir_test.go index c823615a4293c..325ee4890f4c1 100644 --- a/interpreter/llvm/src/bindings/go/llvm/ir_test.go +++ b/interpreter/llvm/src/bindings/go/llvm/ir_test.go @@ -134,3 +134,29 @@ func TestDebugLoc(t *testing.T) { t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C) } } + +func TestSubtypes(t *testing.T) { + cont := NewContext() + defer cont.Dispose() + + int_pointer := PointerType(cont.Int32Type(), 0) + int_inner := int_pointer.Subtypes() + if len(int_inner) != 1 { + t.Errorf("Got size %d, though wanted 1") + } + if int_inner[0] != cont.Int32Type() { + t.Errorf("Expected int32 type") + } + + st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false) + st_inner := st_pointer.Subtypes() + if len(st_inner) != 2 { + t.Errorf("Got size %d, though wanted 2") + } + if st_inner[0] != cont.Int32Type() { + t.Errorf("Expected first struct field to be int32") + } + if st_inner[1] != cont.Int8Type() { + t.Errorf("Expected second struct field to be int8") + } +} diff --git a/interpreter/llvm/src/bindings/ocaml/llvm/llvm.ml b/interpreter/llvm/src/bindings/ocaml/llvm/llvm.ml index 399fd2d27c201..59f0f178c2881 100644 --- a/interpreter/llvm/src/bindings/ocaml/llvm/llvm.ml +++ b/interpreter/llvm/src/bindings/ocaml/llvm/llvm.ml @@ -20,6 +20,10 @@ type llattribute type llmemorybuffer type llmdkind +exception FeatureDisabled of string + +let () = Callback.register_exception "Llvm.FeatureDisabled" (FeatureDisabled "") + module TypeKind = struct type t = | Void @@ -459,6 +463,8 @@ external is_packed : lltype -> bool = "llvm_is_packed" external is_opaque : lltype -> bool = "llvm_is_opaque" (*--... Operations on pointer, vector, and array types .....................--*) + +external subtypes : lltype -> lltype array = "llvm_subtypes" external array_type : lltype -> int -> lltype = "llvm_array_type" external pointer_type : lltype -> lltype = "llvm_pointer_type" external qualified_pointer_type : lltype -> int -> lltype diff --git a/interpreter/llvm/src/bindings/ocaml/llvm/llvm.mli b/interpreter/llvm/src/bindings/ocaml/llvm/llvm.mli index 4068126e2cbf1..3387c1ec52fe9 100644 --- a/interpreter/llvm/src/bindings/ocaml/llvm/llvm.mli +++ b/interpreter/llvm/src/bindings/ocaml/llvm/llvm.mli @@ -371,6 +371,8 @@ type ('a, 'b) llrev_pos = (** {6 Exceptions} *) +exception FeatureDisabled of string + exception IoError of string @@ -658,6 +660,9 @@ val is_opaque : lltype -> bool (** {7 Operations on pointer, vector, and array types} *) +(** [subtypes ty] returns [ty]'s subtypes *) +val subtypes : lltype -> lltype array + (** [array_type ty n] returns the array type containing [n] elements of type [ty]. See the method [llvm::ArrayType::get]. *) val array_type : lltype -> int -> lltype diff --git a/interpreter/llvm/src/bindings/ocaml/llvm/llvm_ocaml.c b/interpreter/llvm/src/bindings/ocaml/llvm/llvm_ocaml.c index af04ea25c8ab4..137b17f26bfb2 100644 --- a/interpreter/llvm/src/bindings/ocaml/llvm/llvm_ocaml.c +++ b/interpreter/llvm/src/bindings/ocaml/llvm/llvm_ocaml.c @@ -336,7 +336,12 @@ CAMLprim LLVMContextRef llvm_type_context(LLVMTypeRef Ty) { /* lltype -> unit */ CAMLprim value llvm_dump_type(LLVMTypeRef Val) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVMDumpType(Val); +#else + caml_raise_with_arg(*caml_named_value("Llvm.FeatureDisabled"), + caml_copy_string("dump")); +#endif return Val_unit; } @@ -506,6 +511,20 @@ CAMLprim value llvm_is_opaque(LLVMTypeRef StructTy) { /*--... Operations on array, pointer, and vector types .....................--*/ +/* lltype -> lltype array */ +CAMLprim value llvm_subtypes(LLVMTypeRef Ty) { + CAMLparam0(); + CAMLlocal1(Arr); + + unsigned Size = LLVMGetNumContainedTypes(Ty); + + Arr = caml_alloc(Size, 0); + + LLVMGetSubtypes(Ty, (LLVMTypeRef *) Arr); + + CAMLreturn(Arr); +} + /* lltype -> int -> lltype */ CAMLprim LLVMTypeRef llvm_array_type(LLVMTypeRef ElementTy, value Count) { return LLVMArrayType(ElementTy, Int_val(Count)); diff --git a/interpreter/llvm/src/bindings/ocaml/target/target_ocaml.c b/interpreter/llvm/src/bindings/ocaml/target/target_ocaml.c index b63bef6d3d5b1..8872f42b5b68b 100644 --- a/interpreter/llvm/src/bindings/ocaml/target/target_ocaml.c +++ b/interpreter/llvm/src/bindings/ocaml/target/target_ocaml.c @@ -77,7 +77,7 @@ CAMLprim value llvm_datalayout_pointer_size(value DL) { /* Llvm.llcontext -> DataLayout.t -> Llvm.lltype */ CAMLprim LLVMTypeRef llvm_datalayout_intptr_type(LLVMContextRef C, value DL) { - return LLVMIntPtrTypeInContext(C, DataLayout_val(DL));; + return LLVMIntPtrTypeInContext(C, DataLayout_val(DL)); } /* int -> DataLayout.t -> int */ diff --git a/interpreter/llvm/src/cmake/config-ix.cmake b/interpreter/llvm/src/cmake/config-ix.cmake index 5e8adf4a71dab..de8e9bf9a4944 100644 --- a/interpreter/llvm/src/cmake/config-ix.cmake +++ b/interpreter/llvm/src/cmake/config-ix.cmake @@ -397,7 +397,7 @@ elseif (LLVM_NATIVE_ARCH MATCHES "msp430") set(LLVM_NATIVE_ARCH MSP430) elseif (LLVM_NATIVE_ARCH MATCHES "hexagon") set(LLVM_NATIVE_ARCH Hexagon) -elseif (LLVM_NATIVE_ARCH MATCHES "s390[x]") +elseif (LLVM_NATIVE_ARCH MATCHES "s390x") set(LLVM_NATIVE_ARCH SystemZ) elseif (LLVM_NATIVE_ARCH MATCHES "wasm32") set(LLVM_NATIVE_ARCH WebAssembly) diff --git a/interpreter/llvm/src/cmake/modules/AddLLVM.cmake b/interpreter/llvm/src/cmake/modules/AddLLVM.cmake index 2d227b48c594f..e60c253fdfb16 100644 --- a/interpreter/llvm/src/cmake/modules/AddLLVM.cmake +++ b/interpreter/llvm/src/cmake/modules/AddLLVM.cmake @@ -91,7 +91,7 @@ function(add_llvm_symbol_exports target_name export_file) DEPENDS ${export_file} VERBATIM COMMENT "Creating export file for ${target_name}") - if (${CMAKE_SYSTEM_NAME} MATCHES "SunOS") + if (${LLVM_LINKER_IS_SOLARISLD}) set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}") else() @@ -148,13 +148,28 @@ function(add_llvm_symbol_exports target_name export_file) endfunction(add_llvm_symbol_exports) if(NOT WIN32 AND NOT APPLE) + # Detect what linker we have here execute_process( COMMAND ${CMAKE_C_COMPILER} -Wl,--version OUTPUT_VARIABLE stdout - ERROR_QUIET + ERROR_VARIABLE stderr ) + set(LLVM_LINKER_DETECTED ON) if("${stdout}" MATCHES "GNU gold") set(LLVM_LINKER_IS_GOLD ON) + message(STATUS "Linker detection: GNU Gold") + elseif("${stdout}" MATCHES "^LLD") + set(LLVM_LINKER_IS_LLD ON) + message(STATUS "Linker detection: LLD") + elseif("${stdout}" MATCHES "GNU ld") + set(LLVM_LINKER_IS_GNULD ON) + message(STATUS "Linker detection: GNU ld") + elseif("${stderr}" MATCHES "Solaris Link Editors") + set(LLVM_LINKER_IS_SOLARISLD ON) + message(STATUS "Linker detection: Solaris ld") + else() + set(LLVM_LINKER_DETECTED OFF) + message(STATUS "Linker detection: unknown") endif() endif() @@ -865,7 +880,7 @@ macro(add_llvm_utility name) set_target_properties(${name} PROPERTIES FOLDER "Utils") if( LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS ) install (TARGETS ${name} - RUNTIME DESTINATION bin + RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} COMPONENT ${name}) if (NOT CMAKE_CONFIGURATION_TYPES) add_custom_target(install-${name} @@ -1133,6 +1148,19 @@ function(configure_lit_site_cfg input output) set(LIT_SITE_CFG_IN_HEADER "## Autogenerated from ${input}\n## Do not edit!") + # Override config_target_triple (and the env) + if(LLVM_TARGET_TRIPLE_ENV) + # This is expanded into the heading. + string(CONCAT LIT_SITE_CFG_IN_HEADER "${LIT_SITE_CFG_IN_HEADER}\n\n" + "import os\n" + "target_env = \"${LLVM_TARGET_TRIPLE_ENV}\"\n" + "config.target_triple = config.environment[target_env] = os.environ.get(target_env, \"${TARGET_TRIPLE}\")\n" + ) + + # This is expanded to; config.target_triple = ""+config.target_triple+"" + set(TARGET_TRIPLE "\"+config.target_triple+\"") + endif() + configure_file(${input} ${output} @ONLY) endfunction() @@ -1146,11 +1174,6 @@ function(add_lit_target target comment) list(APPEND LIT_ARGS --param build_mode=${CMAKE_CFG_INTDIR}) endif () if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) - # reset cache after erraneous r283029 - # TODO: remove this once all buildbots run - if (LIT_COMMAND STREQUAL "${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py") - unset(LIT_COMMAND CACHE) - endif() set (LIT_COMMAND "${PYTHON_EXECUTABLE};${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py" CACHE STRING "Command used to spawn llvm-lit") else() diff --git a/interpreter/llvm/src/cmake/modules/AddOCaml.cmake b/interpreter/llvm/src/cmake/modules/AddOCaml.cmake index 1b805c0710a39..1d8094cc505f5 100644 --- a/interpreter/llvm/src/cmake/modules/AddOCaml.cmake +++ b/interpreter/llvm/src/cmake/modules/AddOCaml.cmake @@ -87,6 +87,11 @@ function(add_ocaml_library name) foreach( include_dir ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR} ) set(c_flags "${c_flags} -I${include_dir}") endforeach() + # include -D/-UNDEBUG to match dump function visibility + # regex from HandleLLVMOptions.cmake + string(REGEX MATCH "(^| )[/-][UD] *NDEBUG($| )" flag_matches + "${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_C_FLAGS}") + set(c_flags "${c_flags} ${flag_matches}") foreach( ocaml_file ${ARG_OCAML} ) list(APPEND sources "${ocaml_file}.mli" "${ocaml_file}.ml") @@ -199,7 +204,7 @@ function(add_ocaml_library name) PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE - DESTINATION "${LLVM_OCAML_INSTALL_PATH}/llvm") + DESTINATION "${LLVM_OCAML_INSTALL_PATH}/stublibs") foreach( install_file ${install_files} ${install_shlibs} ) get_filename_component(filename "${install_file}" NAME) diff --git a/interpreter/llvm/src/cmake/modules/AddSphinxTarget.cmake b/interpreter/llvm/src/cmake/modules/AddSphinxTarget.cmake index c3a676d3063da..4540c5c36c8e2 100644 --- a/interpreter/llvm/src/cmake/modules/AddSphinxTarget.cmake +++ b/interpreter/llvm/src/cmake/modules/AddSphinxTarget.cmake @@ -1,9 +1,9 @@ # Create sphinx target -if (LLVM_ENABLE_SPHINX AND NOT TARGET sphinx) +if (LLVM_ENABLE_SPHINX) message(STATUS "Sphinx enabled.") find_package(Sphinx REQUIRED) - if (LLVM_BUILD_DOCS) + if (LLVM_BUILD_DOCS AND NOT TARGET sphinx) add_custom_target(sphinx ALL) endif() else() diff --git a/interpreter/llvm/src/cmake/modules/HandleLLVMOptions.cmake b/interpreter/llvm/src/cmake/modules/HandleLLVMOptions.cmake index e91e951d41355..0676317acc684 100644 --- a/interpreter/llvm/src/cmake/modules/HandleLLVMOptions.cmake +++ b/interpreter/llvm/src/cmake/modules/HandleLLVMOptions.cmake @@ -101,6 +101,10 @@ else() message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!") endif() +if( LLVM_REVERSE_ITERATION ) + set( LLVM_ENABLE_REVERSE_ITERATION 1 ) +endif() + if(WIN32) set(LLVM_HAVE_LINK_VERSION_SCRIPT 0) if(CYGWIN) @@ -638,6 +642,9 @@ if(LLVM_USE_SANITIZER) append_common_sanitizer_flags() append("-fsanitize=address,undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + elseif (LLVM_USE_SANITIZER STREQUAL "Leaks") + append_common_sanitizer_flags() + append("-fsanitize=leak" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) else() message(FATAL_ERROR "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}") endif() @@ -679,8 +686,8 @@ endif() # lld doesn't print colored diagnostics when invoked from Ninja if (UNIX AND CMAKE_GENERATOR STREQUAL "Ninja") include(CheckLinkerFlag) - check_linker_flag("-Wl,-color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS) - append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,-color-diagnostics" + check_linker_flag("-Wl,--color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS) + append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,--color-diagnostics" CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) endif() diff --git a/interpreter/llvm/src/cmake/modules/LLVMExternalProjectUtils.cmake b/interpreter/llvm/src/cmake/modules/LLVMExternalProjectUtils.cmake index d457389f3ca37..c851eb8dbf086 100644 --- a/interpreter/llvm/src/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/interpreter/llvm/src/cmake/modules/LLVMExternalProjectUtils.cmake @@ -195,8 +195,16 @@ function(llvm_ExternalProject_Add name source_dir) # Add top-level targets foreach(target ${ARG_EXTRA_TARGETS}) + string(REPLACE ":" ";" target_list ${target}) + list(GET target_list 0 target) + list(LENGTH target_list target_list_len) + if(${target_list_len} GREATER 1) + list(GET target_list 1 target_name) + else() + set(target_name "${target}") + endif() llvm_ExternalProject_BuildCmd(build_runtime_cmd ${target} ${BINARY_DIR}) - add_custom_target(${target} + add_custom_target(${target_name} COMMAND ${build_runtime_cmd} DEPENDS ${name}-configure WORKING_DIRECTORY ${BINARY_DIR} diff --git a/interpreter/llvm/src/cmake/modules/TableGen.cmake b/interpreter/llvm/src/cmake/modules/TableGen.cmake index da0858e54d441..8c3e2d7d70047 100644 --- a/interpreter/llvm/src/cmake/modules/TableGen.cmake +++ b/interpreter/llvm/src/cmake/modules/TableGen.cmake @@ -14,8 +14,31 @@ function(tablegen project ofn) message(FATAL_ERROR "${project}_TABLEGEN_EXE not set") endif() - file(GLOB local_tds "*.td") - file(GLOB_RECURSE global_tds "${LLVM_MAIN_INCLUDE_DIR}/llvm/*.td") + # Use depfile instead of globbing arbitrary *.td(s) + # DEPFILE is available for Ninja Generator with CMake>=3.7. + if(CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.7) + # Make output path relative to build.ninja, assuming located on + # ${CMAKE_BINARY_DIR}. + # CMake emits build targets as relative paths but Ninja doesn't identify + # absolute path (in *.d) as relative path (in build.ninja) + # Note that tblgen is executed on ${CMAKE_BINARY_DIR} as working directory. + file(RELATIVE_PATH ofn_rel + ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/${ofn}) + set(additional_cmdline + -o ${ofn_rel}.tmp + -d ${ofn_rel}.d + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + DEPFILE ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.d + ) + set(local_tds) + set(global_tds) + else() + file(GLOB local_tds "*.td") + file(GLOB_RECURSE global_tds "${LLVM_MAIN_INCLUDE_DIR}/llvm/*.td") + set(additional_cmdline + -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + ) + endif() if (IS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS}) set(LLVM_TARGET_DEFINITIONS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS}) @@ -30,16 +53,26 @@ function(tablegen project ofn) endif() endif() + # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list + # (both the target and the file) to have .inc files rebuilt on + # a tablegen change, as cmake does not propagate file-level dependencies + # of custom targets. See the following ticket for more information: + # https://cmake.org/Bug/view.php?id=15858 + # The dependency on both, the target and the file, produces the same + # dependency twice in the result file when + # ("${${project}_TABLEGEN_TARGET}" STREQUAL "${${project}_TABLEGEN_EXE}") + # but lets us having smaller and cleaner code here. add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp # Generate tablegen output in a temporary file. COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} - ${LLVM_TABLEGEN_FLAGS} + ${LLVM_TABLEGEN_FLAGS} ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} - -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + ${additional_cmdline} # The file in LLVM_TARGET_DEFINITIONS may be not in the current # directory and local_tds may not contain it, so we must # explicitly list it here: - DEPENDS ${${project}_TABLEGEN_TARGET} ${local_tds} ${global_tds} + DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE} + ${local_tds} ${global_tds} ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} COMMENT "Building ${ofn}..." ) @@ -94,7 +127,8 @@ macro(add_tablegen target project) set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS}) set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen) - if(NOT XCODE) + # CMake-3.9 doesn't let compilation units depend on their dependent libraries. + if(NOT (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.9) AND NOT XCODE) # FIXME: It leaks to user, callee of add_tablegen. set(LLVM_ENABLE_OBJLIB ON) endif() diff --git a/interpreter/llvm/src/docs/AMDGPUUsage.rst b/interpreter/llvm/src/docs/AMDGPUUsage.rst index 81c067b317d3a..41c7ecba527fa 100644 --- a/interpreter/llvm/src/docs/AMDGPUUsage.rst +++ b/interpreter/llvm/src/docs/AMDGPUUsage.rst @@ -1,109 +1,3439 @@ -============================== -User Guide for AMDGPU Back-end -============================== +============================= +User Guide for AMDGPU Backend +============================= + +.. contents:: + :local: Introduction ============ -The AMDGPU back-end provides ISA code generation for AMD GPUs, starting with -the R600 family up until the current Volcanic Islands (GCN Gen 3). +The AMDGPU backend provides ISA code generation for AMD GPUs, starting with the +R600 family up until the current GCN families. It lives in the +``lib/Target/AMDGPU`` directory. -Refer to `AMDGPU section in Architecture & Platform Information for Compiler Writers `_ -for additional documentation. +LLVM +==== -Conventions -=========== +.. _amdgpu-target-triples: + +Target Triples +-------------- + +Use the ``clang -target ---`` option to +specify the target triple: + + .. table:: AMDGPU Target Triples + :name: amdgpu-target-triples-table + + ============ ======== ========= =========== + Architecture Vendor OS Environment + ============ ======== ========= =========== + r600 amd + amdgcn amd + amdgcn amd amdhsa + amdgcn amd amdhsa opencl + amdgcn amd amdhsa amdgizcl + amdgcn amd amdhsa amdgiz + amdgcn amd amdhsa hcc + ============ ======== ========= =========== + +``r600-amd--`` + Supports AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders executed on + the MESA runtime. + +``amdgcn-amd--`` + Supports AMD GPUs GCN 6 onwards for graphics and compute shaders executed on + the MESA runtime. + +``amdgcn-amd-amdhsa-`` + Supports AMD GCN GPUs GFX6 onwards for compute kernels executed on HSA [HSA]_ + compatible runtimes such as AMD's ROCm [AMD-ROCm]_. + +``amdgcn-amd-amdhsa-opencl`` + Supports AMD GCN GPUs GFX6 onwards for OpenCL compute kernels executed on HSA + [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See + :ref:`amdgpu-opencl`. + +``amdgcn-amd-amdhsa-amdgizcl`` + Same as ``amdgcn-amd-amdhsa-opencl`` except a different address space mapping + is used (see :ref:`amdgpu-address-spaces`). + +``amdgcn-amd-amdhsa-amdgiz`` + Same as ``amdgcn-amd-amdhsa-`` except a different address space mapping is + used (see :ref:`amdgpu-address-spaces`). + +``amdgcn-amd-amdhsa-hcc`` + Supports AMD GCN GPUs GFX6 onwards for AMD HC language compute kernels + executed on HSA [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See + :ref:`amdgpu-hcc`. + +.. _amdgpu-processors: + +Processors +---------- + +Use the ``clang -mcpu `` option to specify the AMD GPU processor. The +names from both the *Processor* and *Alternative Processor* can be used. + + .. table:: AMDGPU Processors + :name: amdgpu-processors-table + + ========== =========== ============ ===== ======= ================== + Processor Alternative Target dGPU/ Runtime Example + Processor Triple APU Support Products + Architecture + ========== =========== ============ ===== ======= ================== + **R600** [AMD-R6xx]_ + -------------------------------------------------------------------- + r600 r600 dGPU + r630 r600 dGPU + rs880 r600 dGPU + rv670 r600 dGPU + **R700** [AMD-R7xx]_ + -------------------------------------------------------------------- + rv710 r600 dGPU + rv730 r600 dGPU + rv770 r600 dGPU + **Evergreen** [AMD-Evergreen]_ + -------------------------------------------------------------------- + cedar r600 dGPU + redwood r600 dGPU + sumo r600 dGPU + juniper r600 dGPU + cypress r600 dGPU + **Northern Islands** [AMD-Cayman-Trinity]_ + -------------------------------------------------------------------- + barts r600 dGPU + turks r600 dGPU + caicos r600 dGPU + cayman r600 dGPU + **GCN GFX6 (Southern Islands (SI))** [AMD-Souther-Islands]_ + -------------------------------------------------------------------- + gfx600 - SI amdgcn dGPU + - tahiti + gfx601 - pitcairn amdgcn dGPU + - verde + - oland + - hainan + **GCN GFX7 (Sea Islands (CI))** [AMD-Sea-Islands]_ + -------------------------------------------------------------------- + gfx700 - bonaire amdgcn dGPU - Radeon HD 7790 + - Radeon HD 8770 + - R7 260 + - R7 260X + \ - kaveri amdgcn APU - A6-7000 + - A6 Pro-7050B + - A8-7100 + - A8 Pro-7150B + - A10-7300 + - A10 Pro-7350B + - FX-7500 + - A8-7200P + - A10-7400P + - FX-7600P + gfx701 - hawaii amdgcn dGPU ROCm - FirePro W8100 + - FirePro W9100 + - FirePro S9150 + - FirePro S9170 + gfx702 dGPU ROCm - Radeon R9 290 + - Radeon R9 290x + - Radeon R390 + - Radeon R390x + gfx703 - kabini amdgcn APU - E1-2100 + - mullins - E1-2200 + - E1-2500 + - E2-3000 + - E2-3800 + - A4-5000 + - A4-5100 + - A6-5200 + - A4 Pro-3340B + **GCN GFX8 (Volcanic Islands (VI))** [AMD-Volcanic-Islands]_ + -------------------------------------------------------------------- + gfx800 - iceland amdgcn dGPU - FirePro S7150 + - FirePro S7100 + - FirePro W7100 + - Radeon R285 + - Radeon R9 380 + - Radeon R9 385 + - Mobile FirePro + M7170 + gfx801 - carrizo amdgcn APU - A6-8500P + - Pro A6-8500B + - A8-8600P + - Pro A8-8600B + - FX-8800P + - Pro A12-8800B + \ amdgcn APU ROCm - A10-8700P + - Pro A10-8700B + - A10-8780P + \ amdgcn APU - A10-9600P + - A10-9630P + - A12-9700P + - A12-9730P + - FX-9800P + - FX-9830P + \ amdgcn APU - E2-9010 + - A6-9210 + - A9-9410 + gfx802 - tonga amdgcn dGPU ROCm Same as gfx800 + gfx803 - fiji amdgcn dGPU ROCm - Radeon R9 Nano + - Radeon R9 Fury + - Radeon R9 FuryX + - Radeon Pro Duo + - FirePro S9300x2 + \ - polaris10 amdgcn dGPU ROCm - Radeon RX 470 + - Radeon RX 480 + \ - polaris11 amdgcn dGPU ROCm - Radeon RX 460 + gfx804 amdgcn dGPU Same as gfx803 + gfx810 - stoney amdgcn APU + **GCN GFX9** + -------------------------------------------------------------------- + gfx900 amdgcn dGPU - Radeon Vega Frontier Edition + gfx901 amdgcn dGPU ROCm Same as gfx900 + except XNACK is + enabled + gfx902 amdgcn APU *TBA* + + .. TODO + Add product + names. + gfx903 amdgcn APU Same as gfx902 + except XNACK is + enabled + ========== =========== ============ ===== ======= ================== + +.. _amdgpu-address-spaces: Address Spaces -------------- -The AMDGPU back-end uses the following address space mapping: +The AMDGPU backend uses the following address space mappings. + +The memory space names used in the table, aside from the region memory space, is +from the OpenCL standard. + +LLVM Address Space number is used throughout LLVM (for example, in LLVM IR). + + .. table:: Address Space Mapping + :name: amdgpu-address-space-mapping-table + + ================== ================= ================= ================= ================= + LLVM Address Space Memory Space + ------------------ ----------------------------------------------------------------------- + \ Current Default amdgiz/amdgizcl hcc Future Default + ================== ================= ================= ================= ================= + 0 Private (Scratch) Generic (Flat) Generic (Flat) Generic (Flat) + 1 Global Global Global Global + 2 Constant Constant Constant Region (GDS) + 3 Local (group/LDS) Local (group/LDS) Local (group/LDS) Local (group/LDS) + 4 Generic (Flat) Region (GDS) Region (GDS) Constant + 5 Region (GDS) Private (Scratch) Private (Scratch) Private (Scratch) + ================== ================= ================= ================= ================= + +Current Default + This is the current default address space mapping used for all languages + except hcc. This will shortly be deprecated. + +amdgiz/amdgizcl + This is the current address space mapping used when ``amdgiz`` or ``amdgizcl`` + is specified as the target triple environment value. + +hcc + This is the current address space mapping used when ``hcc`` is specified as + the target triple environment value.This will shortly be deprecated. + +Future Default + This will shortly be the only address space mapping for all languages using + AMDGPU backend. + +.. _amdgpu-memory-scopes: + +Memory Scopes +------------- + +This section provides LLVM memory synchronization scopes supported by the AMDGPU +backend memory model when the target triple OS is ``amdhsa`` (see +:ref:`amdgpu-amdhsa-memory-model` and :ref:`amdgpu-target-triples`). + +The memory model supported is based on the HSA memory model [HSA]_ which is +based in turn on HRF-indirect with scope inclusion [HRF]_. The happens-before +relation is transitive over the synchonizes-with relation independent of scope, +and synchonizes-with allows the memory scope instances to be inclusive (see +table :ref:`amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table`). + +This is different to the OpenCL [OpenCL]_ memory model which does not have scope +inclusion and requires the memory scopes to exactly match. However, this +is conservatively correct for OpenCL. + + .. table:: AMDHSA LLVM Sync Scopes for AMDHSA + :name: amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table + + ================ ========================================================== + LLVM Sync Scope Description + ================ ========================================================== + *none* The default: ``system``. + + Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``. + - ``agent`` and executed by a thread on the same agent. + - ``workgroup`` and executed by a thread in the same + workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``agent`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system`` or ``agent`` and executed by a thread on the + same agent. + - ``workgroup`` and executed by a thread in the same + workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``workgroup`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``, ``agent`` or ``workgroup`` and executed by a + thread in the same workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``wavefront`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``, ``agent``, ``workgroup`` or ``wavefront`` + and executed by a thread in the same wavefront. + + ``singlethread`` Only synchronizes with, and participates in modification + and seq_cst total orderings with, other operations (except + image operations) running in the same thread for all + address spaces (for example, in signal handlers). + ================ ========================================================== + +AMDGPU Intrinsics +----------------- + +The AMDGPU backend implements the following intrinsics. + +*This section is WIP.* + +.. TODO + List AMDGPU intrinsics + +Code Object +=========== + +The AMDGPU backend generates a standard ELF [ELF]_ relocatable code object that +can be linked by ``lld`` to produce a standard ELF shared code object which can +be loaded and executed on an AMDGPU target. + +Header +------ + +The AMDGPU backend uses the following ELF header: + + .. table:: AMDGPU ELF Header + :name: amdgpu-elf-header-table + + ========================== ========================= + Field Value + ========================== ========================= + ``e_ident[EI_CLASS]`` ``ELFCLASS64`` + ``e_ident[EI_DATA]`` ``ELFDATA2LSB`` + ``e_ident[EI_OSABI]`` ``ELFOSABI_AMDGPU_HSA`` + ``e_ident[EI_ABIVERSION]`` ``ELFABIVERSION_AMDGPU_HSA`` + ``e_type`` ``ET_REL`` or ``ET_DYN`` + ``e_machine`` ``EM_AMDGPU`` + ``e_entry`` 0 + ``e_flags`` 0 + ========================== ========================= + +.. + + .. table:: AMDGPU ELF Header Enumeration Values + :name: amdgpu-elf-header-enumeration-values-table + + ============================ ===== + Name Value + ============================ ===== + ``EM_AMDGPU`` 224 + ``ELFOSABI_AMDGPU_HSA`` 64 + ``ELFABIVERSION_AMDGPU_HSA`` 1 + ============================ ===== + +``e_ident[EI_CLASS]`` + The ELF class is always ``ELFCLASS64``. The AMDGPU backend only supports 64 bit + applications. + +``e_ident[EI_DATA]`` + All AMDGPU targets use ELFDATA2LSB for little-endian byte ordering. + +``e_ident[EI_OSABI]`` + The AMD GPU architecture specific OS ABI of ``ELFOSABI_AMDGPU_HSA`` is used to + specify that the code object conforms to the AMD HSA runtime ABI [HSA]_. + +``e_ident[EI_ABIVERSION]`` + The AMD GPU architecture specific OS ABI version of + ``ELFABIVERSION_AMDGPU_HSA`` is used to specify the version of AMD HSA runtime + ABI to which the code object conforms. + +``e_type`` + Can be one of the following values: + + + ``ET_REL`` + The type produced by the AMD GPU backend compiler as it is relocatable code + object. + + ``ET_DYN`` + The type produced by the linker as it is a shared code object. + + The AMD HSA runtime loader requires a ``ET_DYN`` code object. + +``e_machine`` + The value ``EM_AMDGPU`` is used for the machine for all members of the AMD GPU + architecture family. The specific member is specified in the + ``NT_AMD_AMDGPU_ISA`` entry in the ``.note`` section (see + :ref:`amdgpu-note-records`). + +``e_entry`` + The entry point is 0 as the entry points for individual kernels must be + selected in order to invoke them through AQL packets. + +``e_flags`` + The value is 0 as no flags are used. + +Sections +-------- + +An AMDGPU target ELF code object has the standard ELF sections which include: + + .. table:: AMDGPU ELF Sections + :name: amdgpu-elf-sections-table + + ================== ================ ================================= + Name Type Attributes + ================== ================ ================================= + ``.bss`` ``SHT_NOBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.data`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.debug_``\ *\** ``SHT_PROGBITS`` *none* + ``.dynamic`` ``SHT_DYNAMIC`` ``SHF_ALLOC`` + ``.dynstr`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.dynsym`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.got`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.hash`` ``SHT_HASH`` ``SHF_ALLOC`` + ``.note`` ``SHT_NOTE`` *none* + ``.rela``\ *name* ``SHT_RELA`` *none* + ``.rela.dyn`` ``SHT_RELA`` *none* + ``.rodata`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.shstrtab`` ``SHT_STRTAB`` *none* + ``.strtab`` ``SHT_STRTAB`` *none* + ``.symtab`` ``SHT_SYMTAB`` *none* + ``.text`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_EXECINSTR`` + ================== ================ ================================= + +These sections have their standard meanings (see [ELF]_) and are only generated +if needed. + +``.debug``\ *\** + The standard DWARF sections. See :ref:`amdgpu-dwarf` for information on the + DWARF produced by the AMDGPU backend. + +``.dynamic``, ``.dynstr``, ``.dynstr``, ``.hash`` + The standard sections used by a dynamic loader. + +``.note`` + See :ref:`amdgpu-note-records` for the note records supported by the AMDGPU + backend. + +``.rela``\ *name*, ``.rela.dyn`` + For relocatable code objects, *name* is the name of the section that the + relocation records apply. For example, ``.rela.text`` is the section name for + relocation records associated with the ``.text`` section. + + For linked shared code objects, ``.rela.dyn`` contains all the relocation + records from each of the relocatable code object's ``.rela``\ *name* sections. + + See :ref:`amdgpu-relocation-records` for the relocation records supported by + the AMDGPU backend. + +``.text`` + The executable machine code for the kernels and functions they call. Generated + as position independent code. See :ref:`amdgpu-code-conventions` for + information on conventions used in the isa generation. + +.. _amdgpu-note-records: + +Note Records +------------ + +As required by ``ELFCLASS64``, minimal zero byte padding must be generated after +the ``name`` field to ensure the ``desc`` field is 4 byte aligned. In addition, +minimal zero byte padding must be generated to ensure the ``desc`` field size is +a multiple of 4 bytes. The ``sh_addralign`` field of the ``.note`` section must +be at least 4 to indicate at least 8 byte alignment. + +The AMDGPU backend code object uses the following ELF note records in the +``.note`` section. The *Description* column specifies the layout of the note +record’s ``desc`` field. All fields are consecutive bytes. Note records with +variable size strings have a corresponding ``*_size`` field that specifies the +number of bytes, including the terminating null character, in the string. The +string(s) come immediately after the preceding fields. + +Additional note records can be present. + + .. table:: AMDGPU ELF Note Records + :name: amdgpu-elf-note-records-table - ================== =================== ============== - LLVM Address Space DWARF Address Space Memory Space - ================== =================== ============== - 0 1 Private - 1 N/A Global - 2 N/A Constant - 3 2 Local - 4 N/A Generic (Flat) - 5 N/A Region - ================== =================== ============== + ===== ========================== ========================================== + Name Type Description + ===== ========================== ========================================== + "AMD" ``NT_AMD_AMDGPU_METADATA`` + "AMD" ``NT_AMD_AMDGPU_ISA`` + ===== ========================== ========================================== -The terminology in the table, aside from the region memory space, is from the -OpenCL standard. +.. -LLVM Address Space is used throughout LLVM (for example, in LLVM IR). DWARF -Address Space is emitted in DWARF, and is used by tools, such as debugger, -profiler and others. + .. table:: AMDGPU ELF Note Record Enumeration Values + :name: amdgpu-elf-note-record-enumeration-values-table + + ============================= ===== + Name Value + ============================= ===== + *reserved* 0-9 + ``NT_AMD_AMDGPU_METADATA`` 10 + ``NT_AMD_AMDGPU_ISA`` 11 + ============================= ===== + +``NT_AMD_AMDGPU_ISA`` + Specifies the instruction set architecture used by the machine code contained + in the code object. + + This note record is required for code objects containing machine code for + processors matching the ``amdgcn`` architecture in table + :ref:`amdgpu-processors`. + + The null terminated string has the following syntax: + + *architecture*\ ``-``\ *vendor*\ ``-``\ *os*\ ``-``\ *environment*\ ``-``\ *processor* + + where: + + *architecture* + The architecture from table :ref:`amdgpu-target-triples-table`. + + This is always ``amdgcn`` when the target triple OS is ``amdhsa`` (see + :ref:`amdgpu-target-triples`). + + *vendor* + The vendor from table :ref:`amdgpu-target-triples-table`. + + For the AMDGPU backend this is always ``amd``. + + *os* + The OS from table :ref:`amdgpu-target-triples-table`. + + *environment* + An environment from table :ref:`amdgpu-target-triples-table`, or blank if + the environment has no affect on the execution of the code object. + + For the AMDGPU backend this is currently always blank. + *processor* + The processor from table :ref:`amdgpu-processors-table`. + + For example: + + ``amdgcn-amd-amdhsa--gfx901`` + +``NT_AMD_AMDGPU_METADATA`` + Specifies extensible metadata associated with the code object. See + :ref:`amdgpu-code-object-metadata` for the syntax of the code object metadata + string. + + This note record is required and must contain the minimum information + necessary to support the ROCM kernel queries. For example, the segment sizes + needed in a dispatch packet. In addition, a high level language runtime may + require other information to be included. For example, the AMD OpenCL runtime + records kernel argument information. + + .. TODO + Is the string null terminated? It probably should not if YAML allows it to + contain null characters, otherwise it should be. + +.. _amdgpu-code-object-metadata: + +Code Object Metadata +-------------------- + +The code object metadata is specified by the ``NT_AMD_AMDHSA_METADATA`` note +record (see :ref:`amdgpu-note-records`). + +The metadata is specified as a YAML formatted string (see [YAML]_ and +:doc:`YamlIO`). + +The metadata is represented as a single YAML document comprised of the mapping +defined in table :ref:`amdgpu-amdhsa-code-object-metadata-mapping-table` and +referenced tables. + +For boolean values, the string values of ``false`` and ``true`` are used for +false and true respectively. + +Additional information can be added to the mappings. To avoid conflicts, any +non-AMD key names should be prefixed by "*vendor-name*.". + + .. table:: AMDHSA Code Object Metadata Mapping + :name: amdgpu-amdhsa-code-object-metadata-mapping-table + + ========== ============== ========= ======================================= + String Key Value Type Required? Description + ========== ============== ========= ======================================= + "Version" sequence of Required - The first integer is the major + 2 integers version. Currently 1. + - The second integer is the minor + version. Currently 0. + "Printf" sequence of Each string is encoded information + strings about a printf function call. The + encoded information is organized as + fields separated by colon (':'): + + ``ID:N:S[0]:S[1]:...:S[N-1]:FormatString`` + + where: + + ``ID`` + A 32 bit integer as a unique id for + each printf function call + + ``N`` + A 32 bit integer equal to the number + of arguments of printf function call + minus 1 + + ``S[i]`` (where i = 0, 1, ... , N-1) + 32 bit integers for the size in bytes + of the i-th FormatString argument of + the printf function call + + FormatString + The format string passed to the + printf function call. + "Kernels" sequence of Required Sequence of the mappings for each + mapping kernel in the code object. See + :ref:`amdgpu-amdhsa-code-object-kernel-metadata-mapping-table` + for the definition of the mapping. + ========== ============== ========= ======================================= + +.. + + .. table:: AMDHSA Code Object Kernel Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-metadata-mapping-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Required Source name of the kernel. + "SymbolName" string Required Name of the kernel + descriptor ELF symbol. + "Language" string Source language of the kernel. + Values include: + + - "OpenCL C" + - "OpenCL C++" + - "HCC" + - "OpenMP" + + "LanguageVersion" sequence of - The first integer is the major + 2 integers version. + - The second integer is the + minor version. + "Attrs" mapping Mapping of kernel attributes. + See + :ref:`amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table` + for the mapping definition. + "Arguments" sequence of Sequence of mappings of the + mapping kernel arguments. See + :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table` + for the definition of the mapping. + "CodeProps" mapping Mapping of properties related to + the kernel code. See + :ref:`amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table` + for the mapping definition. + "DebugProps" mapping Mapping of properties related to + the kernel debugging. See + :ref:`amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table` + for the mapping definition. + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object Kernel Attribute Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table + + =================== ============== ========= ============================== + String Key Value Type Required? Description + =================== ============== ========= ============================== + "ReqdWorkGroupSize" sequence of The dispatch work-group size + 3 integers X, Y, Z must correspond to the + specified values. + + Corresponds to the OpenCL + ``reqd_work_group_size`` + attribute. + "WorkGroupSizeHint" sequence of The dispatch work-group size + 3 integers X, Y, Z is likely to be the + specified values. + + Corresponds to the OpenCL + ``work_group_size_hint`` + attribute. + "VecTypeHint" string The name of a scalar or vector + type. + + Corresponds to the OpenCL + ``vec_type_hint`` attribute. + =================== ============== ========= ============================== + +.. + + .. table:: AMDHSA Code Object Kernel Argument Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Kernel argument name. + "TypeName" string Kernel argument type name. + "Size" integer Required Kernel argument size in bytes. + "Align" integer Required Kernel argument alignment in + bytes. Must be a power of two. + "ValueKind" string Required Kernel argument kind that + specifies how to set up the + corresponding argument. + Values include: + + "ByValue" + The argument is copied + directly into the kernarg. + + "GlobalBuffer" + A global address space pointer + to the buffer data is passed + in the kernarg. + + "DynamicSharedPointer" + A group address space pointer + to dynamically allocated LDS + is passed in the kernarg. + + "Sampler" + A global address space + pointer to a S# is passed in + the kernarg. + + "Image" + A global address space + pointer to a T# is passed in + the kernarg. + + "Pipe" + A global address space pointer + to an OpenCL pipe is passed in + the kernarg. + + "Queue" + A global address space pointer + to an OpenCL device enqueue + queue is passed in the + kernarg. + + "HiddenGlobalOffsetX" + The OpenCL grid dispatch + global offset for the X + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetY" + The OpenCL grid dispatch + global offset for the Y + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetZ" + The OpenCL grid dispatch + global offset for the Z + dimension is passed in the + kernarg. + + "HiddenNone" + An argument that is not used + by the kernel. Space needs to + be left for it, but it does + not need to be set up. + + "HiddenPrintfBuffer" + A global address space pointer + to the runtime printf buffer + is passed in kernarg. + + "HiddenDefaultQueue" + A global address space pointer + to the OpenCL device enqueue + queue that should be used by + the kernel by default is + passed in the kernarg. + + "HiddenCompletionAction" + *TBD* + + .. TODO + Add description. + + "ValueType" string Required Kernel argument value type. Only + present if "ValueKind" is + "ByValue". For vector data + types, the value is for the + element type. Values include: + + - "Struct" + - "I8" + - "U8" + - "I16" + - "U16" + - "F16" + - "I32" + - "U32" + - "F32" + - "I64" + - "U64" + - "F64" + + .. TODO + How can it be determined if a + vector type, and what size + vector? + "PointeeAlign" integer Alignment in bytes of pointee + type for pointer type kernel + argument. Must be a power + of 2. Only present if + "ValueKind" is + "DynamicSharedPointer". + "AddrSpaceQual" string Kernel argument address space + qualifier. Only present if + "ValueKind" is "GlobalBuffer" or + "DynamicSharedPointer". Values + are: + + - "Private" + - "Global" + - "Constant" + - "Local" + - "Generic" + - "Region" + + .. TODO + Is GlobalBuffer only Global + or Constant? Is + DynamicSharedPointer always + Local? Can HCC allow Generic? + How can Private or Region + ever happen? + "AccQual" string Kernel argument access + qualifier. Only present if + "ValueKind" is "Image" or + "Pipe". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + .. TODO + Does this apply to + GlobalBuffer? + "ActualAcc" string The actual memory accesses + performed by the kernel on the + kernel argument. Only present if + "ValueKind" is "GlobalBuffer", + "Image", or "Pipe". This may be + more restrictive than indicated + by "AccQual" to reflect what the + kernel actual does. If not + present then the runtime must + assume what is implied by + "AccQual" and "IsConst". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + "IsConst" boolean Indicates if the kernel argument + is const qualified. Only present + if "ValueKind" is + "GlobalBuffer". + + "IsRestrict" boolean Indicates if the kernel argument + is restrict qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsVolatile" boolean Indicates if the kernel argument + is volatile qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsPipe" boolean Indicates if the kernel argument + is pipe qualified. Only present + if "ValueKind" is "Pipe". + + .. TODO + Can GlobalBuffer be pipe + qualified? + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object Kernel Code Properties Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table + + ============================ ============== ========= ===================== + String Key Value Type Required? Description + ============================ ============== ========= ===================== + "KernargSegmentSize" integer Required The size in bytes of + the kernarg segment + that holds the values + of the arguments to + the kernel. + "GroupSegmentFixedSize" integer Required The amount of group + segment memory + required by a + work-group in + bytes. This does not + include any + dynamically allocated + group segment memory + that may be added + when the kernel is + dispatched. + "PrivateSegmentFixedSize" integer Required The amount of fixed + private address space + memory required for a + work-item in + bytes. If + IsDynamicCallstack + is 1 then additional + space must be added + to this value for the + call stack. + "KernargSegmentAlign" integer Required The maximum byte + alignment of + arguments in the + kernarg segment. Must + be a power of 2. + "WavefrontSize" integer Required Wavefront size. Must + be a power of 2. + "NumSGPRs" integer Number of scalar + registers used by a + wavefront for + GFX6-GFX9. This + includes the special + SGPRs for VCC, Flat + Scratch (GFX7-GFX9) + and XNACK (for + GFX8-GFX9). It does + not include the 16 + SGPR added if a trap + handler is + enabled. It is not + rounded up to the + allocation + granularity. + "NumVGPRs" integer Number of vector + registers used by + each work-item for + GFX6-GFX9 + "MaxFlatWorkgroupSize" integer Maximum flat + work-group size + supported by the + kernel in work-items. + "IsDynamicCallStack" boolean Indicates if the + generated machine + code is using a + dynamically sized + call stack. + "IsXNACKEnabled" boolean Indicates if the + generated machine + code is capable of + supporting XNACK. + ============================ ============== ========= ===================== + +.. + + .. table:: AMDHSA Code Object Kernel Debug Properties Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table + + =================================== ============== ========= ============== + String Key Value Type Required? Description + =================================== ============== ========= ============== + "DebuggerABIVersion" string + "ReservedNumVGPRs" integer + "ReservedFirstVGPR" integer + "PrivateSegmentBufferSGPR" integer + "WavefrontPrivateSegmentOffsetSGPR" integer + =================================== ============== ========= ============== + +.. TODO + Plan to remove the debug properties metadata. + +.. _amdgpu-symbols: + +Symbols +------- + +Symbols include the following: + + .. table:: AMDGPU ELF Symbols + :name: amdgpu-elf-symbols-table + + ===================== ============== ============= ================== + Name Type Section Description + ===================== ============== ============= ================== + *link-name* ``STT_OBJECT`` - ``.data`` Global variable + - ``.rodata`` + - ``.bss`` + *link-name*\ ``@kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor + *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point + ===================== ============== ============= ================== + +Global variable + Global variables both used and defined by the compilation unit. + + If the symbol is defined in the compilation unit then it is allocated in the + appropriate section according to if it has initialized data or is readonly. + + If the symbol is external then its section is ``STN_UNDEF`` and the loader + will resolve relocations using the definition provided by another code object + or explicitly defined by the runtime. + + All global symbols, whether defined in the compilation unit or external, are + accessed by the machine code indirectly through a GOT table entry. This + allows them to be preemptable. The GOT table is only supported when the target + triple OS is ``amdhsa`` (see :ref:`amdgpu-target-triples`). + + .. TODO + Add description of linked shared object symbols. Seems undefined symbols + are marked as STT_NOTYPE. + +Kernel descriptor + Every HSA kernel has an associated kernel descriptor. It is the address of the + kernel descriptor that is used in the AQL dispatch packet used to invoke the + kernel, not the kernel entry point. The layout of the HSA kernel descriptor is + defined in :ref:`amdgpu-amdhsa-kernel-descriptor`. + +Kernel entry point + Every HSA kernel also has a symbol for its machine code entry point. + +.. _amdgpu-relocation-records: + +Relocation Records +------------------ + +AMDGPU backend generates ``Elf64_Rela`` relocation records. Supported +relocatable fields are: + +``word32`` + This specifies a 32-bit field occupying 4 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMD GPU architecture. + +``word64`` + This specifies a 64-bit field occupying 8 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMD GPU architecture. + +Following notations are used for specifying relocation calculations: + +**A** + Represents the addend used to compute the value of the relocatable field. + +**G** + Represents the offset into the global offset table at which the relocation + entry’s symbol will reside during execution. + +**GOT** + Represents the address of the global offset table. + +**P** + Represents the place (section offset for ``et_rel`` or address for ``et_dyn``) + of the storage unit being relocated (computed using ``r_offset``). + +**S** + Represents the value of the symbol whose index resides in the relocation + entry. + +The following relocation types are supported: + + .. table:: AMDGPU ELF Relocation Records + :name: amdgpu-elf-relocation-records-table + + ========================== ===== ========== ============================== + Relocation Type Value Field Calculation + ========================== ===== ========== ============================== + ``R_AMDGPU_NONE`` 0 *none* *none* + ``R_AMDGPU_ABS32_LO`` 1 ``word32`` (S + A) & 0xFFFFFFFF + ``R_AMDGPU_ABS32_HI`` 2 ``word32`` (S + A) >> 32 + ``R_AMDGPU_ABS64`` 3 ``word64`` S + A + ``R_AMDGPU_REL32`` 4 ``word32`` S + A - P + ``R_AMDGPU_REL64`` 5 ``word64`` S + A - P + ``R_AMDGPU_ABS32`` 6 ``word32`` S + A + ``R_AMDGPU_GOTPCREL`` 7 ``word32`` G + GOT + A - P + ``R_AMDGPU_GOTPCREL32_LO`` 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF + ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 + ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF + ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 + ========================== ===== ========== ============================== + +.. _amdgpu-dwarf: + +DWARF +----- + +Standard DWARF [DWARF]_ Version 2 sections can be generated. These contain +information that maps the code object executable code and data to the source +language constructs. It can be used by tools such as debuggers and profilers. + +Address Space Mapping +~~~~~~~~~~~~~~~~~~~~~ + +The following address space mapping is used: + + .. table:: AMDGPU DWARF Address Space Mapping + :name: amdgpu-dwarf-address-space-mapping-table + + =================== ================= + DWARF Address Space Memory Space + =================== ================= + 1 Private (Scratch) + 2 Local (group/LDS) + *omitted* Global + *omitted* Constant + *omitted* Generic (Flat) + *not supported* Region (GDS) + =================== ================= + +See :ref:`amdgpu-address-spaces` for infomration on the memory space terminology +used in the table. + +An ``address_class`` attribute is generated on pointer type DIEs to specify the +DWARF address space of the value of the pointer when it is in the *private* or +*local* address space. Otherwise the attribute is omitted. + +An ``XDEREF`` operation is generated in location list expressions for variables +that are allocated in the *private* and *local* address space. Otherwise no +``XDREF`` is omitted. + +Register Mapping +~~~~~~~~~~~~~~~~ + +*This section is WIP.* + +.. TODO + Define DWARF register enumeration. + + If want to present a wavefront state then should expose vector registers as + 64 wide (rather than per work-item view that LLVM uses). Either as separate + registers, or a 64x4 byte single register. In either case use a new LANE op + (akin to XDREF) to select the current lane usage in a location + expression. This would also allow scalar register spilling to vector register + lanes to be expressed (currently no debug information is being generated for + spilling). If choose a wide single register approach then use LANE in + conjunction with PIECE operation to select the dword part of the register for + the current lane. If the separate register approach then use LANE to select + the register. + +Source Text +~~~~~~~~~~~ + +*This section is WIP.* + +.. TODO + DWARF extension to include runtime generated source text. + +.. _amdgpu-code-conventions: + +Code Conventions +================ + +AMDHSA +------ + +This section provides code conventions used when the target triple OS is +``amdhsa`` (see :ref:`amdgpu-target-triples`). + +Kernel Dispatch +~~~~~~~~~~~~~~~ + +The HSA architected queuing language (AQL) defines a user space memory interface +that can be used to control the dispatch of kernels, in an agent independent +way. An agent can have zero or more AQL queues created for it using the ROCm +runtime, in which AQL packets (all of which are 64 bytes) can be placed. See the +*HSA Platform System Architecture Specification* [HSA]_ for the AQL queue +mechanics and packet layouts. + +The packet processor of a kernel agent is responsible for detecting and +dispatching HSA kernels from the AQL queues associated with it. For AMD GPUs the +packet processor is implemented by the hardware command processor (CP), +asynchronous dispatch controller (ADC) and shader processor input controller +(SPI). + +The ROCm runtime can be used to allocate an AQL queue object. It uses the kernel +mode driver to initialize and register the AQL queue with CP. + +To dispatch a kernel the following actions are performed. This can occur in the +CPU host program, or from an HSA kernel executing on a GPU. + +1. A pointer to an AQL queue for the kernel agent on which the kernel is to be + executed is obtained. +2. A pointer to the kernel descriptor (see + :ref:`amdgpu-amdhsa-kernel-descriptor`) of the kernel to execute is + obtained. It must be for a kernel that is contained in a code object that that + was loaded by the ROCm runtime on the kernel agent with which the AQL queue is + associated. +3. Space is allocated for the kernel arguments using the ROCm runtime allocator + for a memory region with the kernarg property for the kernel agent that will + execute the kernel. It must be at least 16 byte aligned. +4. Kernel argument values are assigned to the kernel argument memory + allocation. The layout is defined in the *HSA Programmer’s Language Reference* + [HSA]_. For AMDGPU the kernel execution directly accesses the kernel argument + memory in the same way constant memory is accessed. (Note that the HSA + specification allows an implementation to copy the kernel argument contents to + another location that is accessed by the kernel.) +5. An AQL kernel dispatch packet is created on the AQL queue. The ROCm runtime + api uses 64 bit atomic operations to reserve space in the AQL queue for the + packet. The packet must be set up, and the final write must use an atomic + store release to set the packet kind to ensure the packet contents are + visible to the kernel agent. AQL defines a doorbell signal mechanism to + notify the kernel agent that the AQL queue has been updated. These rules, and + the layout of the AQL queue and kernel dispatch packet is defined in the *HSA + System Architecture Specification* [HSA]_. +6. A kernel dispatch packet includes information about the actual dispatch, + such as grid and work-group size, together with information from the code + object about the kernel, such as segment sizes. The ROCm runtime queries on + the kernel symbol can be used to obtain the code object values which are + recorded in the :ref:`amdgpu-code-object-metadata`. +7. CP executes micro-code and is responsible for detecting and setting up the + GPU to execute the wavefronts of a kernel dispatch. +8. CP ensures that when the a wavefront starts executing the kernel machine + code, the scalar general purpose registers (SGPR) and vector general purpose + registers (VGPR) are set up as required by the machine code. The required + setup is defined in the :ref:`amdgpu-amdhsa-kernel-descriptor`. The initial + register state is defined in + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`. +9. The prolog of the kernel machine code (see + :ref:`amdgpu-amdhsa-kernel-prolog`) sets up the machine state as necessary + before continuing executing the machine code that corresponds to the kernel. +10. When the kernel dispatch has completed execution, CP signals the completion + signal specified in the kernel dispatch packet if not 0. + +.. _amdgpu-amdhsa-memory-spaces: + +Memory Spaces +~~~~~~~~~~~~~ + +The memory space properties are: + + .. table:: AMDHSA Memory Spaces + :name: amdgpu-amdhsa-memory-spaces-table + + ================= =========== ======== ======= ================== + Memory Space Name HSA Segment Hardware Address NULL Value + Name Name Size + ================= =========== ======== ======= ================== + Private private scratch 32 0x00000000 + Local group LDS 32 0xFFFFFFFF + Global global global 64 0x0000000000000000 + Constant constant *same as 64 0x0000000000000000 + global* + Generic flat flat 64 0x0000000000000000 + Region N/A GDS 32 *not implemented + for AMDHSA* + ================= =========== ======== ======= ================== + +The global and constant memory spaces both use global virtual addresses, which +are the same virtual address space used by the CPU. However, some virtual +addresses may only be accessible to the CPU, some only accessible by the GPU, +and some by both. + +Using the constant memory space indicates that the data will not change during +the execution of the kernel. This allows scalar read instructions to be +used. The vector and scalar L1 caches are invalidated of volatile data before +each kernel dispatch execution to allow constant memory to change values between +kernel dispatches. + +The local memory space uses the hardware Local Data Store (LDS) which is +automatically allocated when the hardware creates work-groups of wavefronts, and +freed when all the wavefronts of a work-group have terminated. The data store +(DS) instructions can be used to access it. + +The private memory space uses the hardware scratch memory support. If the kernel +uses scratch, then the hardware allocates memory that is accessed using +wavefront lane dword (4 byte) interleaving. The mapping used from private +address to physical address is: + + ``wavefront-scratch-base + + (private-address * wavefront-size * 4) + + (wavefront-lane-id * 4)`` + +There are different ways that the wavefront scratch base address is determined +by a wavefront (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). This +memory can be accessed in an interleaved manner using buffer instruction with +the scratch buffer descriptor and per wave scratch offset, by the scratch +instructions, or by flat instructions. If each lane of a wavefront accesses the +same private address, the interleaving results in adjacent dwords being accessed +and hence requires fewer cache lines to be fetched. Multi-dword access is not +supported except by flat and scratch instructions in GFX9. + +The generic address space uses the hardware flat address support available in +GFX7-GFX9. This uses two fixed ranges of virtual addresses (the private and +local appertures), that are outside the range of addressible global memory, to +map from a flat address to a private or local address. + +FLAT instructions can take a flat address and access global, private (scratch) +and group (LDS) memory depending in if the address is within one of the +apperture ranges. Flat access to scratch requires hardware aperture setup and +setup in the kernel prologue (see :ref:`amdgpu-amdhsa-flat-scratch`). Flat +access to LDS requires hardware aperture setup and M0 (GFX7-GFX8) register setup +(see :ref:`amdgpu-amdhsa-m0`). + +To convert between a segment address and a flat address the base address of the +appertures address can be used. For GFX7-GFX8 these are available in the +:ref:`amdgpu-amdhsa-hsa-aql-queue` the address of which can be obtained with +Queue Ptr SGPR (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). For +GFX9 the appature base addresses are directly available as inline constant +registers ``SRC_SHARED_BASE/LIMIT`` and ``SRC_PRIVATE_BASE/LIMIT``. In 64 bit +address mode the apperture sizes are 2^32 bytes and the base is aligned to 2^32 +which makes it easier to convert from flat to segment or segment to flat. + +HSA Image and Samplers +~~~~~~~~~~~~~~~~~~~~~~ + +Image and sample handles created by the ROCm runtime are 64 bit addresses of a +hardware 32 byte V# and 48 byte S# object respectively. In order to support the +HSA ``query_sampler`` operations two extra dwords are used to store the HSA BRIG +enumeration values for the queries that are not trivially deducible from the S# +representation. + +HSA Signals +~~~~~~~~~~~ + +Signal handles created by the ROCm runtime are 64 bit addresses of a structure +allocated in memory accessible from both the CPU and GPU. The structure is +defined by the ROCm runtime and subject to change between releases (see +[AMD-ROCm-github]_). + +.. _amdgpu-amdhsa-hsa-aql-queue: + +HSA AQL Queue +~~~~~~~~~~~~~ + +The AQL queue structure is defined by the ROCm runtime and subject to change +between releases (see [AMD-ROCm-github]_). For some processors it contains +fields needed to implement certain language features such as the flat address +aperture bases. It also contains fields used by CP such as managing the +allocation of scratch memory. + +.. _amdgpu-amdhsa-kernel-descriptor: + +Kernel Descriptor +~~~~~~~~~~~~~~~~~ + +A kernel descriptor consists of the information needed by CP to initiate the +execution of a kernel, including the entry point address of the machine code +that implements the kernel. + +Kernel Descriptor for GFX6-GFX9 ++++++++++++++++++++++++++++++++ + +CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. + + .. table:: Kernel Descriptor for GFX6-GFX9 + :name: amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table + + ======= ======= =============================== =========================== + Bits Size Field Name Description + ======= ======= =============================== =========================== + 31:0 4 bytes group_segment_fixed_size The amount of fixed local + address space memory + required for a work-group + in bytes. This does not + include any dynamically + allocated local address + space memory that may be + added when the kernel is + dispatched. + 63:32 4 bytes private_segment_fixed_size The amount of fixed + private address space + memory required for a + work-item in bytes. If + is_dynamic_callstack is 1 + then additional space must + be added to this value for + the call stack. + 95:64 4 bytes max_flat_workgroup_size Maximum flat work-group + size supported by the + kernel in work-items. + 96 1 bit is_dynamic_call_stack Indicates if the generated + machine code is using a + dynamically sized call + stack. + 97 1 bit is_xnack_enabled Indicates if the generated + machine code is capable of + suppoting XNACK. + 127:98 30 bits Reserved. Must be 0. + 191:128 8 bytes kernel_code_entry_byte_offset Byte offset (possibly + negative) from base + address of kernel + descriptor to kernel's + entry point instruction + which must be 256 byte + aligned. + 383:192 24 Reserved. Must be 0. + bytes + 415:384 4 bytes compute_pgm_rsrc1 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC1`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table`. + 447:416 4 bytes compute_pgm_rsrc2 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC2`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table`. + 448 1 bit enable_sgpr_private_segment Enable the setup of the + _buffer SGPR user data registers + (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + The total number of SGPR + user data registers + requested must not exceed + 16 and match value in + ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``. + Any requests beyond 16 + will be ignored. + 449 1 bit enable_sgpr_dispatch_ptr *see above* + 450 1 bit enable_sgpr_queue_ptr *see above* + 451 1 bit enable_sgpr_kernarg_segment_ptr *see above* + 452 1 bit enable_sgpr_dispatch_id *see above* + 453 1 bit enable_sgpr_flat_scratch_init *see above* + 454 1 bit enable_sgpr_private_segment *see above* + _size + 455 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_X should always be 0. + 456 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_Y should always be 0. + 457 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_Z should always be 0. + 463:458 6 bits Reserved. Must be 0. + 511:464 4 Reserved. Must be 0. + bytes + 512 **Total size 64 bytes.** + ======= =================================================================== + +.. + + .. table:: compute_pgm_rsrc1 for GFX6-GFX9 + :name: amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 5:0 6 bits granulated_workitem_vgpr_count Number of vector registers + used by each work-item, + granularity is device + specific: + + GFX6-9 + roundup((max-vgpg + 1) + / 4) - 1 + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.VGPRS``. + 9:6 4 bits granulated_wavefront_sgpr_count Number of scalar registers + used by a wavefront, + granularity is device + specific: + + GFX6-8 + roundup((max-sgpg + 1) + / 8) - 1 + GFX9 + roundup((max-sgpg + 1) + / 16) - 1 + + Includes the special SGPRs + for VCC, Flat Scratch (for + GFX7 onwards) and XNACK + (for GFX8 onwards). It does + not include the 16 SGPR + added if a trap handler is + enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.SGPRS``. + 11:10 2 bits priority Must be 0. + + Start executing wavefront + at the specified priority. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIORITY``. + 13:12 2 bits float_mode_round_32 Wavefront starts execution + with specified rounding + mode for single (32 + bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 15:14 2 bits float_mode_round_16_64 Wavefront starts execution + with specified rounding + denorm mode for half/double (16 + and 64 bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 17:16 2 bits float_mode_denorm_32 Wavefront starts execution + with specified denorm mode + for single (32 + bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 19:18 2 bits float_mode_denorm_16_64 Wavefront starts execution + with specified denorm mode + for half/double (16 + and 64 bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 20 1 bit priv Must be 0. + + Start executing wavefront + in privilege trap handler + mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIV``. + 21 1 bit enable_dx10_clamp Wavefront starts execution + with DX10 clamp mode + enabled. Used by the vector + ALU to force DX-10 style + treatment of NaN's (when + set, clamp NaN to zero, + otherwise pass NaN + through). + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.DX10_CLAMP``. + 22 1 bit debug_mode Must be 0. + + Start executing wavefront + in single step mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.DEBUG_MODE``. + 23 1 bit enable_ieee_mode Wavefront starts execution + with IEEE mode + enabled. Floating point + opcodes that support + exception flag gathering + will quiet and propagate + signaling-NaN inputs per + IEEE 754-2008. Min_dx10 and + max_dx10 become IEEE + 754-2008 compliant due to + signaling-NaN propagation + and quieting. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.IEEE_MODE``. + 24 1 bit bulky Must be 0. + + Only one work-group allowed + to execute on a compute + unit. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.BULKY``. + 25 1 bit cdbg_user Must be 0. + + Flag that can be used to + control debugging code. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.CDBG_USER``. + 31:26 6 bits Reserved. Must be 0. + 32 **Total size 4 bytes** + ======= =================================================================================================================== + +.. + + .. table:: compute_pgm_rsrc2 for GFX6-GFX9 + :name: amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 0 1 bit enable_sgpr_private_segment Enable the setup of the + _wave_offset SGPR wave scratch offset + system register (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.SCRATCH_EN``. + 5:1 5 bits user_sgpr_count The total number of SGPR + user data registers + requested. This number must + match the number of user + data registers enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.USER_SGPR``. + 6 1 bit enable_trap_handler Set to 1 if code contains a + TRAP instruction which + requires a trap handler to + be enabled. + + CP sets + ``COMPUTE_PGM_RSRC2.TRAP_PRESENT`` + if the runtime has + installed a trap handler + regardless of the setting + of this field. + 7 1 bit enable_sgpr_workgroup_id_x Enable the setup of the + system SGPR register for + the work-group id in the X + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_X_EN``. + 8 1 bit enable_sgpr_workgroup_id_y Enable the setup of the + system SGPR register for + the work-group id in the Y + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Y_EN``. + 9 1 bit enable_sgpr_workgroup_id_z Enable the setup of the + system SGPR register for + the work-group id in the Z + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Z_EN``. + 10 1 bit enable_sgpr_workgroup_info Enable the setup of the + system SGPR register for + work-group information (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``. + 12:11 2 bits enable_vgpr_workitem_id Enable the setup of the + VGPR system registers used + for the work-item ID. + :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table` + defines the values. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``. + 13 1 bit enable_exception_address_watch Must be 0. + + Wavefront starts execution + with address watch + exceptions enabled which + are generated when L1 has + witnessed a thread access + an *address of + interest*. + + CP is responsible for + filling in the address + watch bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 14 1 bit enable_exception_memory Must be 0. + + Wavefront starts execution + with memory violation + exceptions exceptions + enabled which are generated + when a memory violation has + occurred for this wave from + L1 or LDS + (write-to-read-only-memory, + mis-aligned atomic, LDS + address out of range, + illegal address, etc.). + + CP sets the memory + violation bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 23:15 9 bits granulated_lds_size Must be 0. + + CP uses the rounded value + from the dispatch packet, + not this value, as the + dispatch may contain + dynamically allocated group + segment memory. CP writes + directly to + ``COMPUTE_PGM_RSRC2.LDS_SIZE``. + + Amount of group segment + (LDS) to allocate for each + work-group. Granularity is + device specific: + + GFX6: + roundup(lds-size / (64 * 4)) + GFX7-GFX9: + roundup(lds-size / (128 * 4)) + + 24 1 bit enable_exception_ieee_754_fp Wavefront starts execution + _invalid_operation with specified exceptions + enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.EXCP_EN`` + (set from bits 0..6). + + IEEE 754 FP Invalid + Operation + 25 1 bit enable_exception_fp_denormal FP Denormal one or more + _source input operands is a + denormal number + 26 1 bit enable_exception_ieee_754_fp IEEE 754 FP Division by + _division_by_zero Zero + 27 1 bit enable_exception_ieee_754_fp IEEE 754 FP FP Overflow + _overflow + 28 1 bit enable_exception_ieee_754_fp IEEE 754 FP Underflow + _underflow + 29 1 bit enable_exception_ieee_754_fp IEEE 754 FP Inexact + _inexact + 30 1 bit enable_exception_int_divide_by Integer Division by Zero + _zero (rcp_iflag_f32 instruction + only) + 31 1 bit Reserved. Must be 0. + 32 **Total size 4 bytes.** + ======= =================================================================================================================== + +.. + + .. table:: Floating Point Rounding Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even + AMD_FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity + AMD_FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity + AMD_FLOAT_ROUND_MODE_ZERO 3 Round Toward 0 + ===================================== ===== =============================== + +.. + + .. table:: Floating Point Denorm Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination + Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush + ===================================== ===== =============================== + +.. + + .. table:: System VGPR Work-Item ID Enumeration Values + :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y + dimensions ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z + dimensions ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined. + ===================================== ===== =============================== + +.. _amdgpu-amdhsa-initial-kernel-execution-state: + +Initial Kernel Execution State +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section defines the register state that will be set up by the packet +processor prior to the start of execution of every wavefront. This is limited by +the constraints of the hardware controllers of CP/ADC/SPI. + +The order of the SGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_sgpr_*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at SGPR0: the first enabled register is +SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have +an SGPR number. + +The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to +all waves of the grid. It is possible to specify more than 16 User SGPRs using +the ``enable_sgpr_*`` bit fields, in which case only the first 16 are actually +initialized. These are then immediately followed by the System SGPRs that are +set up by ADC/SPI and can have different values for each wave of the grid +dispatch. + +SGPR register initial state is defined in +:ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + + .. table:: SGPR Register Set Up Order + :name: amdgpu-amdhsa-sgpr-register-set-up-order-table + + ========== ========================== ====== ============================== + SGPR Order Name Number Description + (kernel descriptor enable of + field) SGPRs + ========== ========================== ====== ============================== + First Private Segment Buffer 4 V# that can be used, together + (enable_sgpr_private with Scratch Wave Offset as an + _segment_buffer) offset, to access the private + memory space using a segment + address. + + CP uses the value provided by + the runtime. + then Dispatch Ptr 2 64 bit address of AQL dispatch + (enable_sgpr_dispatch_ptr) packet for kernel dispatch + actually executing. + then Queue Ptr 2 64 bit address of amd_queue_t + (enable_sgpr_queue_ptr) object for AQL queue on which + the dispatch packet was + queued. + then Kernarg Segment Ptr 2 64 bit address of Kernarg + (enable_sgpr_kernarg segment. This is directly + _segment_ptr) copied from the + kernarg_address in the kernel + dispatch packet. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + then Dispatch Id 2 64 bit Dispatch ID of the + (enable_sgpr_dispatch_id) dispatch packet being + executed. + then Flat Scratch Init 2 This is 2 SGPRs: + (enable_sgpr_flat_scratch + _init) GFX6 + Not supported. + GFX7-GFX8 + The first SGPR is a 32 bit + byte offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID`` + to per SPI base of memory + for scratch for the queue + executing the kernel + dispatch. CP obtains this + from the runtime. + + This is the same offset used + in computing the Scratch + Segment Buffer base + address. The value of + Scratch Wave Offset must be + added by the kernel machine + code and moved to SGPRn-4 + for use as the FLAT SCRATCH + BASE in flat memory + instructions. + + The second SGPR is 32 bit + byte size of a single + work-item’s scratch memory + usage. This is directly + loaded from the kernel + dispatch packet Private + Segment Byte Size and + rounded up to a multiple of + DWORD. + + The kernel code must move to + SGPRn-3 for use as the FLAT + SCRATCH SIZE in flat memory + instructions. Having CP load + it once avoids loading it at + the beginning of every + wavefront. + GFX9 + This is the 64 bit base + address of the per SPI + scratch backing memory + managed by SPI for the queue + executing the kernel + dispatch. CP obtains this + from the runtime (and + divides it if there are + multiple Shader Arrays each + with its own SPI). The value + of Scratch Wave Offset must + be added by the kernel + machine code and moved to + SGPRn-4 and SGPRn-3 for use + as the FLAT SCRATCH BASE in + flat memory instructions. + then Private Segment Size 1 The 32 bit byte size of a + (enable_sgpr_private single work-item’s scratch + _segment_size) memory allocation. This is the + value from the kernel dispatch + packet Private Segment Byte + Size rounded up by CP to a + multiple of DWORD. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + + This is not used for + GFX7-GFX8 since it is the same + value as the second SGPR of + Flat Scratch Init. However, it + may be needed for GFX9 which + changes the meaning of the + Flat Scratch Init value. + then Grid Work-Group Count X 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the X dimension + _workgroup_count_X) for the grid being + executed. Computed from the + fields in the kernel dispatch + packet as ((grid_size.x + + workgroup_size.x - 1) / + workgroup_size.x). + then Grid Work-Group Count Y 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the Y dimension + _workgroup_count_Y && for the grid being + less than 16 previous executed. Computed from the + SGPRs) fields in the kernel dispatch + packet as ((grid_size.y + + workgroup_size.y - 1) / + workgroupSize.y). + + Only initialized if <16 + previous SGPRs initialized. + then Grid Work-Group Count Z 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the Z dimension + _workgroup_count_Z && for the grid being + less than 16 previous executed. Computed from the + SGPRs) fields in the kernel dispatch + packet as ((grid_size.z + + workgroup_size.z - 1) / + workgroupSize.z). + + Only initialized if <16 + previous SGPRs initialized. + then Work-Group Id X 1 32 bit work-group id in X + (enable_sgpr_workgroup_id dimension of grid for + _X) wavefront. + then Work-Group Id Y 1 32 bit work-group id in Y + (enable_sgpr_workgroup_id dimension of grid for + _Y) wavefront. + then Work-Group Id Z 1 32 bit work-group id in Z + (enable_sgpr_workgroup_id dimension of grid for + _Z) wavefront. + then Work-Group Info 1 {first_wave, 14’b0000, + (enable_sgpr_workgroup ordered_append_term[10:0], + _info) threadgroup_size_in_waves[5:0]} + then Scratch Wave Offset 1 32 bit byte offset from base + (enable_sgpr_private of scratch base of queue + _segment_wave_offset) executing the kernel + dispatch. Must be used as an + offset with Private + segment address when using + Scratch Segment Buffer. It + must be used to set up FLAT + SCRATCH for flat addressing + (see + :ref:`amdgpu-amdhsa-flat-scratch`). + ========== ========================== ====== ============================== + +The order of the VGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_vgpr*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at VGPR0: the first enabled register is +VGPR0, the next enabled register is VGPR1 etc.; disabled registers do not have a +VGPR number. + +VGPR register initial state is defined in +:ref:`amdgpu-amdhsa-vgpr-register-set-up-order-table`. + + .. table:: VGPR Register Set Up Order + :name: amdgpu-amdhsa-vgpr-register-set-up-order-table + + ========== ========================== ====== ============================== + VGPR Order Name Number Description + (kernel descriptor enable of + field) VGPRs + ========== ========================== ====== ============================== + First Work-Item Id X 1 32 bit work item id in X + (Always initialized) dimension of work-group for + wavefront lane. + then Work-Item Id Y 1 32 bit work item id in Y + (enable_vgpr_workitem_id dimension of work-group for + > 0) wavefront lane. + then Work-Item Id Z 1 32 bit work item id in Z + (enable_vgpr_workitem_id dimension of work-group for + > 1) wavefront lane. + ========== ========================== ====== ============================== + +The setting of registers is is done by GPU CP/ADC/SPI hardware as follows: + +1. SGPRs before the Work-Group Ids are set by CP using the 16 User Data + registers. +2. Work-group Id registers X, Y, Z are set by ADC which supports any + combination including none. +3. Scratch Wave Offset is set by SPI in a per wave basis which is why its value + cannot included with the flat scratch init value which is per queue. +4. The VGPRs are set by SPI which only supports specifying either (X), (X, Y) + or (X, Y, Z). + +Flat Scratch register pair are adjacent SGRRs so they can be moved as a 64 bit +value to the hardware required SGPRn-3 and SGPRn-4 respectively. + +The global segment can be accessed either using buffer instructions (GFX6 which +has V# 64 bit address support), flat instructions (GFX7-9), or global +instructions (GFX9). + +If buffer operations are used then the compiler can generate a V# with the +following properties: + +* base address of 0 +* no swizzle +* ATC: 1 if IOMMU present (such as APU) +* ptr64: 1 +* MTYPE set to support memory coherence that matches the runtime (such as CC for + APU and NC for dGPU). + +.. _amdgpu-amdhsa-kernel-prolog: + +Kernel Prolog +~~~~~~~~~~~~~ + +.. _amdgpu-amdhsa-m0: + +M0 +++ + +GFX6-GFX8 + The M0 register must be initialized with a value at least the total LDS size + if the kernel may access LDS via DS or flat operations. Total LDS size is + available in dispatch packet. For M0, it is also possible to use maximum + possible value of LDS for given target (0x7FFF for GFX6 and 0xFFFF for + GFX7-GFX8). +GFX9 + The M0 register is not used for range checking LDS accesses and so does not + need to be initialized in the prolog. + +.. _amdgpu-amdhsa-flat-scratch: + +Flat Scratch +++++++++++++ + +If the kernel may use flat operations to access scratch memory, the prolog code +must set up FLAT_SCRATCH register pair (FLAT_SCRATCH_LO/FLAT_SCRATCH_HI which +are in SGPRn-4/SGPRn-3). Initialization uses Flat Scratch Init and Scratch Wave +Offset SGPR registers (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`): + +GFX6 + Flat scratch is not supported. + +GFX7-8 + 1. The low word of Flat Scratch Init is 32 bit byte offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID`` to the base of scratch backing memory + being managed by SPI for the queue executing the kernel dispatch. This is + the same value used in the Scratch Segment Buffer V# base address. The + prolog must add the value of Scratch Wave Offset to get the wave's byte + scratch backing memory offset from ``SH_HIDDEN_PRIVATE_BASE_VIMID``. Since + FLAT_SCRATCH_LO is in units of 256 bytes, the offset must be right shifted + by 8 before moving into FLAT_SCRATCH_LO. + 2. The second word of Flat Scratch Init is 32 bit byte size of a single + work-items scratch memory usage. This is directly loaded from the kernel + dispatch packet Private Segment Byte Size and rounded up to a multiple of + DWORD. Having CP load it once avoids loading it at the beginning of every + wavefront. The prolog must move it to FLAT_SCRATCH_LO for use as FLAT SCRATCH + SIZE. +GFX9 + The Flat Scratch Init is the 64 bit address of the base of scratch backing + memory being managed by SPI for the queue executing the kernel dispatch. The + prolog must add the value of Scratch Wave Offset and moved to the FLAT_SCRATCH + pair for use as the flat scratch base in flat memory instructions. + +.. _amdgpu-amdhsa-memory-model: + +Memory Model +~~~~~~~~~~~~ + +This section describes the mapping of LLVM memory model onto AMDGPU machine code +(see :ref:`memmodel`). *The implementation is WIP.* + +.. TODO + Update when implementation complete. + + Support more relaxed OpenCL memory model to be controlled by environment + component of target triple. + +The AMDGPU backend supports the memory synchronization scopes specified in +:ref:`amdgpu-memory-scopes`. + +The code sequences used to implement the memory model are defined in table +:ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. + +The sequences specify the order of instructions that a single thread must +execute. The ``s_waitcnt`` and ``buffer_wbinvl1_vol`` are defined with respect +to other memory instructions executed by the same thread. This allows them to be +moved earlier or later which can allow them to be combined with other instances +of the same instruction, or hoisted/sunk out of loops to improve +performance. Only the instructions related to the memory model are given; +additional ``s_waitcnt`` instructions are required to ensure registers are +defined before being used. These may be able to be combined with the memory +model ``s_waitcnt`` instructions as described above. + +The AMDGPU memory model supports both the HSA [HSA]_ memory model, and the +OpenCL [OpenCL]_ memory model. The HSA memory model uses a single happens-before +relation for all address spaces (see :ref:`amdgpu-address-spaces`). The OpenCL +memory model which has separate happens-before relations for the global and +local address spaces, and only a fence specifying both global and local address +space joins the relationships. Since the LLVM ``memfence`` instruction does not +allow an address space to be specified the OpenCL fence has to convervatively +assume both local and global address space was specified. However, optimizations +can often be done to eliminate the additional ``s_waitcnt``instructions when +there are no intervening corresponding ``ds/flat_load/store/atomic`` memory +instructions. The code sequences in the table indicate what can be omitted for +the OpenCL memory. The target triple environment is used to determine if the +source language is OpenCL (see :ref:`amdgpu-opencl`). + +``ds/flat_load/store/atomic`` instructions to local memory are termed LDS +operations. + +``buffer/global/flat_load/store/atomic`` instructions to global memory are +termed vector memory operations. + +For GFX6-GFX9: + +* Each agent has multiple compute units (CU). +* Each CU has multiple SIMDs that execute wavefronts. +* The wavefronts for a single work-group are executed in the same CU but may be + executed by different SIMDs. +* Each CU has a single LDS memory shared by the wavefronts of the work-groups + executing on it. +* All LDS operations of a CU are performed as wavefront wide operations in a + global order and involve no caching. Completion is reported to a wavefront in + execution order. +* The LDS memory has multiple request queues shared by the SIMDs of a + CU. Therefore, the LDS operations performed by different waves of a work-group + can be reordered relative to each other, which can result in reordering the + visibility of vector memory operations with respect to LDS operations of other + wavefronts in the same work-group. A ``s_waitcnt lgkmcnt(0)`` is required to + ensure synchronization between LDS operations and vector memory operations + between waves of a work-group, but not between operations performed by the + same wavefront. +* The vector memory operations are performed as wavefront wide operations and + completion is reported to a wavefront in execution order. The exception is + that for GFX7-9 ``flat_load/store/atomic`` instructions can report out of + vector memory order if they access LDS memory, and out of LDS operation order + if they access global memory. +* The vector memory operations access a vector L1 cache shared by all wavefronts + on a CU. Therefore, no special action is required for coherence between + wavefronts in the same work-group. A ``buffer_wbinvl1_vol`` is required for + coherence between waves executing in different work-groups as they may be + executing on different CUs. +* The scalar memory operations access a scalar L1 cache shared by all wavefronts + on a group of CUs. The scalar and vector L1 caches are not coherent. However, + scalar operations are used in a restricted way so do not impact the memory + model. See :ref:`amdgpu-amdhsa-memory-spaces`. +* The vector and scalar memory operations use an L2 cache shared by all CUs on + the same agent. +* The L2 cache has independent channels to service disjoint ranges of virtual + addresses. +* Each CU has a separate request queue per channel. Therefore, the vector and + scalar memory operations performed by waves executing in different work-groups + (which may be executing on different CUs) of an agent can be reordered + relative to each other. A ``s_waitcnt vmcnt(0)`` is required to ensure + synchronization between vector memory operations of different CUs. It ensures a + previous vector memory operation has completed before executing a subsequent + vector memory or LDS operation and so can be used to meet the requirements of + acquire and release. +* The L2 cache can be kept coherent with other agents on some targets, or ranges + of virtual addresses can be set up to bypass it to ensure system coherence. + +Private address space uses ``buffer_load/store`` using the scratch V# (GFX6-8), +or ``scratch_load/store`` (GFX9). Since only a single thread is accessing the +memory, atomic memory orderings are not meaningful and all accesses are treated +as non-atomic. + +Constant address space uses ``buffer/global_load`` instructions (or equivalent +scalar memory instructions). Since the constant address space contents do not +change during the execution of a kernel dispatch it is not legal to perform +stores, and atomic memory orderings are not meaningful and all access are +treated as non-atomic. + +A memory synchronization scope wider than work-group is not meaningful for the +group (LDS) address space and is treated as work-group. + +The memory model does not support the region address space which is treated as +non-atomic. + +Acquire memory ordering is not meaningful on store atomic instructions and is +treated as non-atomic. + +Release memory ordering is not meaningful on load atomic instructions and is +treated a non-atomic. + +Acquire-release memory ordering is not meaningful on load or store atomic +instructions and is treated as acquire and release respectively. + +AMDGPU backend only uses scalar memory operations to access memory that is +proven to not change during the execution of the kernel dispatch. This includes +constant address space and global address space for program scope const +variables. Therefore the kernel machine code does not have to maintain the +scalar L1 cache to ensure it is coherent with the vector L1 cache. The scalar +and vector L1 caches are invalidated between kernel dispatches by CP since +constant address space data may change between kernel dispatch executions. See +:ref:`amdgpu-amdhsa-memory-spaces`. + +The one execption is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a ``s_dcache_wb`` is inserted before the ``s_endpgm`` and before a function +return since the locations may be used for vector memory instructions by a +future wave that uses the same scratch area, or a function call that creates a +frame at the same address, respectively. There is no need for a ``s_dcache_inv`` +as all scalar writes are write-before-read in the same thread. + +Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC_NV (non-coherenent non-volatile). Since the private address space +is only accessed by a single thread, and is always write-before-read, +there is never a need to invalidate these entries from the L1 cache. Hence all +cache invalidates are done as ``*_vol`` to only invalidate the volatile cache +lines. + +On dGPU the kernarg backing memory is accessed as UC (uncached) to avoid needing +to invalidate the L2 cache. This also causes it to be treated as non-volatile +and so is not invalidated by ``*_vol``. On APU it is accessed as CC (cache +coherent) and so the L2 cache will coherent with the CPU and other agents. + + .. table:: AMDHSA Memory Model Code Sequences GFX6-GFX9 + :name: amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table + + ============ ============ ============== ========== ======================= + LLVM Instr LLVM Memory LLVM Memory AMDGPU AMDGPU Machine Code + Ordering Sync Scope Address + Space + ============ ============ ============== ========== ======================= + **Non-Atomic** + --------------------------------------------------------------------------- + load *none* *none* - global non-volatile + - generic 1. buffer/global/flat_load + volatile + 1. buffer/global/flat_load + glc=1 + load *none* *none* - local 1. ds_load + store *none* *none* - global 1. buffer/global/flat_store + - generic + store *none* *none* - local 1. ds_store + **Unordered Atomic** + --------------------------------------------------------------------------- + load atomic unordered *any* *any* *Same as non-atomic*. + store atomic unordered *any* *any* *Same as non-atomic*. + atomicrmw unordered *any* *any* *Same as monotonic + atomic*. + **Monotonic Atomic** + --------------------------------------------------------------------------- + load atomic monotonic - singlethread - global 1. buffer/global/flat_load + - wavefront - generic + - workgroup + load atomic monotonic - singlethread - local 1. ds_load + - wavefront + - workgroup + load atomic monotonic - agent - global 1. buffer/global/flat_load + - system - generic glc=1 + store atomic monotonic - singlethread - global 1. buffer/global/flat_store + - wavefront - generic + - workgroup + - agent + - system + store atomic monotonic - singlethread - local 1. ds_store + - wavefront + - workgroup + atomicrmw monotonic - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + - workgroup + - agent + - system + atomicrmw monotonic - singlethread - local 1. ds_atomic + - wavefront + - workgroup + **Acquire Atomic** + --------------------------------------------------------------------------- + load atomic acquire - singlethread - global 1. buffer/global/ds/flat_load + - wavefront - local + - generic + load atomic acquire - workgroup - global 1. buffer/global_load + load atomic acquire - workgroup - local 1. ds/flat_load + - generic 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + + load atomic acquire - agent - global 1. buffer/global_load + - system glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale global data. + + load atomic acquire - agent - generic 1. flat_load glc=1 + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the flat_load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acquire - workgroup - global 1. buffer/global_atomic + atomicrmw acquire - workgroup - local 1. ds/flat_atomic + - generic 2. waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + atomicrmw value + being acquired. + + atomicrmw acquire - agent - global 1. buffer/global_atomic + - system 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - agent - generic 1. flat_atomic + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acquire - singlethread *none* *none* + - wavefront + fence acquire - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + value read by the + fence-paired-atomic. + + fence acquire - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + group/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + **Release Atomic** + --------------------------------------------------------------------------- + store atomic release - singlethread - global 1. buffer/global/ds/flat_store + - wavefront - local + - generic + store atomic release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to local have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + store atomic release - workgroup - local 1. ds_store + store atomic release - agent - global 1. s_waitcnt vmcnt(0) & + - system - generic lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to global have + completed before + performing the + store that is being + released. + + 2. buffer/global/ds/flat_store + atomicrmw release - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global/flat_atomic + atomicrmw release - workgroup - local 1. ds_atomic + atomicrmw release - agent - global 1. s_waitcnt vmcnt(0) & + - system - generic lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and local + have completed + before performing + the atomicrmw that + is being released. + + 2. buffer/global/ds/flat_atomic* + fence release - singlethread *none* *none* + - wavefront + fence release - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + to local have + completed before + performing the + following + fence-paired-atomic. + + fence release - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + to global have + completed before + performing the + following + fence-paired-atomic. + + **Acquire-Release Atomic** + --------------------------------------------------------------------------- + atomicrmw acq_rel - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + atomicrmw acq_rel - workgroup - local 1. ds_atomic + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + + atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + atomicrmw acq_rel - agent - global 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acq_rel - agent - generic 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acq_rel - singlethread *none* *none* + - wavefront + fence acq_rel - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing any + following global + memory operations. + - Ensures that the + preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic) + has completed + before following + global memory + operations. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + local/generic store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + This satisfies the + requirements of + release. + + fence acq_rel - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + preceding + global/local/generic + load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic) + has completed + before invalidating + the cache. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + global/local/generic + store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + This satisfies the + requirements of + release. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. This + satisfies the + requirements of + acquire. + + **Sequential Consistent Atomic** + --------------------------------------------------------------------------- + load atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local load atomic acquire*. + - workgroup - generic + load atomic seq_cst - agent - global 1. s_waitcnt vmcnt(0) + - system - local + - generic - Must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + waitcnt vmcnt(0) of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + competing out of + order.) + + 2. *Following + instructions same as + corresponding load + atomic acquire*. + + store atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local store atomic release*. + - workgroup - generic + store atomic seq_cst - agent - global *Same as corresponding + - system - generic store atomic release*. + atomicrmw seq_cst - singlethread - global *Same as corresponding + - wavefront - local atomicrmw acq_rel*. + - workgroup - generic + atomicrmw seq_cst - agent - global *Same as corresponding + - system - generic atomicrmw acq_rel*. + fence seq_cst - singlethread *none* *Same as corresponding + - wavefront fence acq_rel*. + - workgroup + - agent + - system + ============ ============ ============== ========== ======================= + +The memory order also adds the single thread optimization constrains defined in +table +:ref:`amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table`. + + .. table:: AMDHSA Memory Model Single Thread Optimization Constraints GFX6-GFX9 + :name: amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table + + ============ ============================================================== + LLVM Memory Optimization Constraints + Ordering + ============ ============================================================== + unordered *none* + monotonic *none* + acquire - If a load atomic/atomicrmw then no following load/load + atomic/store/ store atomic/atomicrmw/fence instruction can + be moved before the acquire. + - If a fence then same as load atomic, plus no preceding + associated fence-paired-atomic can be moved after the fence. + release - If a store atomic/atomicrmw then no preceding load/load + atomic/store/ store atomic/atomicrmw/fence instruction can + be moved after the release. + - If a fence then same as store atomic, plus no following + associated fence-paired-atomic can be moved before the + fence. + acq_rel Same constraints as both acquire and release. + seq_cst - If a load atomic then same constraints as acquire, plus no + preceding sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved after the + seq_cst. + - If a store atomic then the same constraints as release, plus + no following sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved before the + seq_cst. + - If an atomicrmw/fence then same constraints as acq_rel. + ============ ============================================================== + +Trap Handler ABI +~~~~~~~~~~~~~~~~ + +For code objects generated by AMDGPU backend for HSA [HSA]_ compatible runtimes +(such as ROCm [AMD-ROCm]_), the runtime installs a trap handler that supports +the ``s_trap`` instruction with the following usage: + + .. table:: AMDGPU Trap Handler for AMDHSA OS + :name: amdgpu-trap-handler-for-amdhsa-os-table + + =================== =============== =============== ======================= + Usage Code Sequence Trap Handler Description + Inputs + =================== =============== =============== ======================= + reserved ``s_trap 0x00`` Reserved by hardware. + ``debugtrap(arg)`` ``s_trap 0x01`` ``SGPR0-1``: Reserved for HSA + ``queue_ptr`` ``debugtrap`` + ``VGPR0``: intrinsic (not + ``arg`` implemented). + ``llvm.trap`` ``s_trap 0x02`` ``SGPR0-1``: Causes dispatch to be + ``queue_ptr`` terminated and its + associated queue put + into the error state. + ``llvm.debugtrap`` ``s_trap 0x03`` ``SGPR0-1``: If debugger not + ``queue_ptr`` installed handled + same as ``llvm.trap``. + debugger breakpoint ``s_trap 0x07`` Reserved for debugger + breakpoints. + debugger ``s_trap 0x08`` Reserved for debugger. + debugger ``s_trap 0xfe`` Reserved for debugger. + debugger ``s_trap 0xff`` Reserved for debugger. + =================== =============== =============== ======================= + +Non-AMDHSA +---------- Trap Handler ABI ----------------- -The OS element of the target triple controls the trap handler behavior. - -HSA OS -^^^^^^ -For code objects generated by AMDGPU back-end for the HSA OS, the runtime -installs a trap handler that supports the s_trap instruction with the following -usage: - - +--------------+-------------+-------------------+----------------------------+ - |Usage |Code Sequence|Trap Handler Inputs|Description | - +==============+=============+===================+============================+ - |reserved |s_trap 0x00 | |Reserved by hardware. | - +--------------+-------------+-------------------+----------------------------+ - |HSA debugtrap |s_trap 0x01 |SGPR0-1: queue_ptr |Reserved for HSA debugtrap | - |(arg) | |VGPR0: arg |intrinsic (not implemented).| - +--------------+-------------+-------------------+----------------------------+ - |llvm.trap |s_trap 0x02 |SGPR0-1: queue_ptr |Causes dispatch to be | - | | | |terminated and its | - | | | |associated queue put into | - | | | |the error state. | - +--------------+-------------+-------------------+----------------------------+ - |llvm.debugtrap| s_trap 0x03 |SGPR0-1: queue_ptr |If debugger not installed | - | | | |handled same as llvm.trap. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0x07 | |Reserved for debugger | - |breakpoint | | |breakpoints. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0x08 | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0xfe | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0xff | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - -Non-HSA OS -^^^^^^^^^^ -For code objects generated by AMDGPU back-end for non-HSA OS, the runtime does -not install a trap handler. The llvm.trap and llvm.debugtrap instructions are -handler as follows: - - =============== ============= =============================================== - Usage Code Sequence Description - =============== ============= =============================================== - llvm.trap s_endpgm Causes wavefront to be terminated. - llvm.debugtrap Nothing Compiler warning generated that there is no trap handler installed. - =============== ============= =============================================== +~~~~~~~~~~~~~~~~ + +For code objects generated by AMDGPU backend for non-amdhsa OS, the runtime does +not install a trap handler. The ``llvm.trap`` and ``llvm.debugtrap`` +instructions are handled as follows: + + .. table:: AMDGPU Trap Handler for Non-AMDHSA OS + :name: amdgpu-trap-handler-for-non-amdhsa-os-table + + =============== =============== =========================================== + Usage Code Sequence Description + =============== =============== =========================================== + llvm.trap s_endpgm Causes wavefront to be terminated. + llvm.debugtrap *none* Compiler warning given that there is no + trap handler installed. + =============== =============== =========================================== + +Source Languages +================ + +.. _amdgpu-opencl: + +OpenCL +------ + +When generating code for the OpenCL language the target triple environment +should be ``opencl`` or ``amdgizcl`` (see :ref:`amdgpu-target-triples`). + +When the language is OpenCL the following differences occur: + +1. The OpenCL memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). +2. The AMDGPU backend adds additional arguments to the kernel. +3. Additional metadata is generated (:ref:`amdgpu-code-object-metadata`). + +.. TODO + Specify what affect this has. Hidden arguments added. Additional metadata + generated. + +.. _amdgpu-hcc: + +HCC +--- + +When generating code for the OpenCL language the target triple environment +should be ``hcc`` (see :ref:`amdgpu-target-triples`). + +When the language is OpenCL the following differences occur: + +1. The HSA memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). + +.. TODO + Specify what affect this has. Assembler -========= +--------- AMDGPU backend has LLVM-MC based assembler which is currently in development. -It supports Southern Islands ISA, Sea Islands and Volcanic Islands. +It supports AMDGCN GFX6-GFX8. -This document describes general syntax for instructions and operands. For more -information about instructions, their semantics and supported combinations -of operands, refer to one of Instruction Set Architecture manuals. +This section describes general syntax for instructions and operands. For more +information about instructions, their semantics and supported combinations of +operands, refer to one of instruction set architecture manuals +[AMD-Souther-Islands]_ [AMD-Sea-Islands]_ [AMD-Volcanic-Islands]_. -An instruction has the following syntax (register operands are -normally comma-separated while extra operands are space-separated): +An instruction has the following syntax (register operands are normally +comma-separated while extra operands are space-separated): * , ... ...* - Operands --------- +~~~~~~~~ The following syntax for register operands is supported: @@ -140,8 +3470,11 @@ The following extra operands are supported: - dst_unused (UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE) - abs, neg, sext -DS Instructions Examples ------------------------- +Instruction Examples +~~~~~~~~~~~~~~~~~~~~ + +DS +~~ .. code-block:: nasm @@ -153,8 +3486,8 @@ DS Instructions Examples For full list of supported instructions, refer to "LDS/GDS instructions" in ISA Manual. -FLAT Instruction Examples --------------------------- +FLAT +++++ .. code-block:: nasm @@ -166,8 +3499,8 @@ FLAT Instruction Examples For full list of supported instructions, refer to "FLAT instructions" in ISA Manual. -MUBUF Instruction Examples ---------------------------- +MUBUF ++++++ .. code-block:: nasm @@ -179,8 +3512,8 @@ MUBUF Instruction Examples For full list of supported instructions, refer to "MUBUF Instructions" in ISA Manual. -SMRD/SMEM Instruction Examples -------------------------------- +SMRD/SMEM ++++++++++ .. code-block:: nasm @@ -192,8 +3525,8 @@ SMRD/SMEM Instruction Examples For full list of supported instructions, refer to "Scalar Memory Operations" in ISA Manual. -SOP1 Instruction Examples --------------------------- +SOP1 +++++ .. code-block:: nasm @@ -207,8 +3540,8 @@ SOP1 Instruction Examples For full list of supported instructions, refer to "SOP1 Instructions" in ISA Manual. -SOP2 Instruction Examples -------------------------- +SOP2 +++++ .. code-block:: nasm @@ -224,8 +3557,8 @@ SOP2 Instruction Examples For full list of supported instructions, refer to "SOP2 Instructions" in ISA Manual. -SOPC Instruction Examples --------------------------- +SOPC +++++ .. code-block:: nasm @@ -236,8 +3569,8 @@ SOPC Instruction Examples For full list of supported instructions, refer to "SOPC Instructions" in ISA Manual. -SOPP Instruction Examples --------------------------- +SOPP +++++ .. code-block:: nasm @@ -259,8 +3592,8 @@ Unless otherwise mentioned, little verification is performed on the operands of SOPP Instructions, so it is up to the programmer to be familiar with the range or acceptable values. -Vector ALU Instruction Examples -------------------------------- +VALU +++++ For vector ALU instruction opcodes (VOP1, VOP2, VOP3, VOPC, VOP_DPP, VOP_SDWA), the assembler will automatically use optimal encoding based on its operands. @@ -314,19 +3647,20 @@ VOP_SDWA examples: For full list of supported instructions, refer to "Vector ALU instructions". HSA Code Object Directives --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ AMDGPU ABI defines auxiliary data in output code object. In assembly source, one can specify them with assembler directives. .hsa_code_object_version major, minor -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++++++++++++++ *major* and *minor* are integers that specify the version of the HSA code object that will be generated by the assembler. .hsa_code_object_isa [major, minor, stepping, vendor, arch] -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *major*, *minor*, and *stepping* are all integers that describe the instruction set architecture (ISA) version of the assembly program. @@ -338,13 +3672,13 @@ By default, the assembler will derive the ISA version, *vendor*, and *arch* from the value of the -mcpu option that is passed to the assembler. .amdgpu_hsa_kernel (name) -^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++ This directives specifies that the symbol with given name is a kernel entry point (label) and the object should contain corresponding symbol of type STT_AMDGPU_HSA_KERNEL. .amd_kernel_code_t -^^^^^^^^^^^^^^^^^^ +++++++++++++++++++ This directive marks the beginning of a list of key / value pairs that are used to specify the amd_kernel_code_t object that will be emitted by the assembler. @@ -403,3 +3737,25 @@ Here is an example of a minimal amd_kernel_code_t specification: s_endpgm .Lfunc_end0: .size hello_world, .Lfunc_end0-hello_world + +Additional Documentation +======================== + +.. [AMD-R6xx] `AMD R6xx shader ISA `__ +.. [AMD-R7xx] `AMD R7xx shader ISA `__ +.. [AMD-Evergreen] `AMD Evergreen shader ISA `__ +.. [AMD-Cayman-Trinity] `AMD Cayman/Trinity shader ISA `__ +.. [AMD-Souther-Islands] `AMD Southern Islands Series ISA `__ +.. [AMD-Sea-Islands] `AMD Sea Islands Series ISA `_ +.. [AMD-Volcanic-Islands] `AMD GCN3 Instruction Set Architecture `__ +.. [AMD-OpenCL_Programming-Guide] `AMD Accelerated Parallel Processing OpenCL Programming Guide `_ +.. [AMD-APP-SDK] `AMD Accelerated Parallel Processing APP SDK Documentation `__ +.. [AMD-ROCm] `ROCm: Open Platform for Development, Discovery and Education Around GPU Computing `__ +.. [AMD-ROCm-github] `ROCm github `__ +.. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ +.. [ELF] `Executable and Linkable Format (ELF) `__ +.. [DWARF] `DWARF Debugging Information Format `__ +.. [YAML] `YAML Ain’t Markup Language (YAML™) Version 1.2 `__ +.. [OpenCL] `The OpenCL Specification Version 2.0 `__ +.. [HRF] `Heterogeneous-race-free Memory Models `__ +.. [AMD-AMDGPU-Compute-Application-Binary-Interface] `AMDGPU Compute Application Binary Interface `__ diff --git a/interpreter/llvm/src/docs/AliasAnalysis.rst b/interpreter/llvm/src/docs/AliasAnalysis.rst index e201333f30070..0a5cb00a48d3b 100644 --- a/interpreter/llvm/src/docs/AliasAnalysis.rst +++ b/interpreter/llvm/src/docs/AliasAnalysis.rst @@ -132,7 +132,8 @@ The ``MayAlias`` response is used whenever the two pointers might refer to the same object. The ``PartialAlias`` response is used when the two memory objects are known to -be overlapping in some way, but do not start at the same address. +be overlapping in some way, regardless whether they start at the same address +or not. The ``MustAlias`` response may only be returned if the two memory objects are guaranteed to always start at exactly the same location. A ``MustAlias`` diff --git a/interpreter/llvm/src/docs/Benchmarking.rst b/interpreter/llvm/src/docs/Benchmarking.rst new file mode 100644 index 0000000000000..0f88db745a686 --- /dev/null +++ b/interpreter/llvm/src/docs/Benchmarking.rst @@ -0,0 +1,87 @@ +================================== +Benchmarking tips +================================== + + +Introduction +============ + +For benchmarking a patch we want to reduce all possible sources of +noise as much as possible. How to do that is very OS dependent. + +Note that low noise is required, but not sufficient. It does not +exclude measurement bias. See +https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf for +example. + +General +================================ + +* Use a high resolution timer, e.g. perf under linux. + +* Run the benchmark multiple times to be able to recognize noise. + +* Disable as many processes or services as possible on the target system. + +* Disable frequency scaling, turbo boost and address space + randomization (see OS specific section). + +* Static link if the OS supports it. That avoids any variation that + might be introduced by loading dynamic libraries. This can be done + by passing ``-DLLVM_BUILD_STATIC=ON`` to cmake. + +* Try to avoid storage. On some systems you can use tmpfs. Putting the + program, inputs and outputs on tmpfs avoids touching a real storage + system, which can have a pretty big variability. + + To mount it (on linux and freebsd at least):: + + mount -t tmpfs -o size=g none dir_to_mount + +Linux +===== + +* Disable address space randomization:: + + echo 0 > /proc/sys/kernel/randomize_va_space + +* Set scaling_governor to performance:: + + for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + do + echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + done + +* Use https://github.com/lpechacek/cpuset to reserve cpus for just the + program you are benchmarking. If using perf, leave at least 2 cores + so that perf runs in one and your program in another:: + + cset shield -c N1,N2 -k on + + This will move all threads out of N1 and N2. The ``-k on`` means + that even kernel threads are moved out. + +* Disable the SMT pair of the cpus you will use for the benchmark. The + pair of cpu N can be found in + ``/sys/devices/system/cpu/cpuN/topology/thread_siblings_list`` and + disabled with:: + + echo 0 > /sys/devices/system/cpu/cpuX/online + + +* Run the program with:: + + cset shield --exec -- perf stat -r 10 + + This will run the command after ``--`` in the isolated cpus. The + particular perf command runs the ```` 10 times and reports + statistics. + +With these in place you can expect perf variations of less than 0.1%. + +Linux Intel +----------- + +* Disable turbo mode:: + + echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo diff --git a/interpreter/llvm/src/docs/BranchWeightMetadata.rst b/interpreter/llvm/src/docs/BranchWeightMetadata.rst index b941d0d150506..9bd8bd4ae744a 100644 --- a/interpreter/llvm/src/docs/BranchWeightMetadata.rst +++ b/interpreter/llvm/src/docs/BranchWeightMetadata.rst @@ -64,6 +64,20 @@ Branch weights are assigned to every destination. [ , i32 ... ] } +``CallInst`` +^^^^^^^^^^^^^^^^^^ + +Calls may have branch weight metadata, containing the execution count of +the call. It is currently used in SamplePGO mode only, to augment the +block and entry counts which may not be accurate with sampling. + +.. code-block:: none + + !0 = metadata !{ + metadata !"branch_weights", + i32 + } + Other ^^^^^ diff --git a/interpreter/llvm/src/docs/CMake.rst b/interpreter/llvm/src/docs/CMake.rst index 0a32d3957a53c..bf97e9173158f 100644 --- a/interpreter/llvm/src/docs/CMake.rst +++ b/interpreter/llvm/src/docs/CMake.rst @@ -186,8 +186,8 @@ CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``. Sets the build type for ``make``-based generators. Possible values are Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as Visual Studio, you should use the IDE settings to set the build type. - Be aware that Release and RelWithDebInfo are not using the same optimization - level on most platform. + Be aware that Release and RelWithDebInfo use different optimization levels on + most platforms. **CMAKE_INSTALL_PREFIX**:PATH Path where LLVM will be installed if "make install" is invoked or the @@ -247,9 +247,10 @@ LLVM-specific variables tests. **LLVM_APPEND_VC_REV**:BOOL - Append version control revision info (svn revision number or Git revision id) - to LLVM version string (stored in the PACKAGE_VERSION macro). For this to work - cmake must be invoked before the build. Defaults to OFF. + Embed version control revision info (svn revision number or Git revision id). + This is used among other things in the LLVM version string (stored in the + PACKAGE_VERSION macro). For this to work cmake must be invoked before the + build. Defaults to ON. **LLVM_ENABLE_THREADS**:BOOL Build with threads support, if available. Defaults to ON. @@ -535,6 +536,11 @@ LLVM-specific variables during the build. Enabling this option can significantly speed up build times especially when building LLVM in Debug configurations. +**LLVM_REVERSE_ITERATION**:BOOL + If enabled, all supported unordered llvm containers would be iterated in + reverse order. This is useful for uncovering non-determinism caused by + iteration of unordered containers. + CMake Caches ============ diff --git a/interpreter/llvm/src/docs/CMakePrimer.rst b/interpreter/llvm/src/docs/CMakePrimer.rst index 1e3a09e4d98ab..c29d627ee62cf 100644 --- a/interpreter/llvm/src/docs/CMakePrimer.rst +++ b/interpreter/llvm/src/docs/CMakePrimer.rst @@ -112,33 +112,6 @@ In this example the ``extra_sources`` variable is only defined if you're targeting an Apple platform. For all other targets the ``extra_sources`` will be evaluated as empty before add_executable is given its arguments. -One big "Gotcha" with variable dereferencing is that ``if`` commands implicitly -dereference values. This has some unexpected results. For example: - -.. code-block:: cmake - - if("${SOME_VAR}" STREQUAL "MSVC") - -In this code sample MSVC will be implicitly dereferenced, which will result in -the if command comparing the value of the dereferenced variables ``SOME_VAR`` -and ``MSVC``. A common workaround to this solution is to prepend strings being -compared with an ``x``. - -.. code-block:: cmake - - if("x${SOME_VAR}" STREQUAL "xMSVC") - -This works because while ``MSVC`` is a defined variable, ``xMSVC`` is not. This -pattern is uncommon, but it does occur in LLVM's CMake scripts. - -.. note:: - - Once the LLVM project upgrades its minimum CMake version to 3.1 or later we - can prevent this behavior by setting CMP0054 to new. For more information on - CMake policies please see the cmake-policies manpage or the `cmake-policies - online documentation - `_. - Lists ----- diff --git a/interpreter/llvm/src/docs/CodeGenerator.rst b/interpreter/llvm/src/docs/CodeGenerator.rst index 106fc8456f616..bcdc722835665 100644 --- a/interpreter/llvm/src/docs/CodeGenerator.rst +++ b/interpreter/llvm/src/docs/CodeGenerator.rst @@ -2642,59 +2642,6 @@ to ensure valid register usage and operand types. The AMDGPU backend ------------------ -The AMDGPU code generator lives in the lib/Target/AMDGPU directory, and is an -open source native AMD GCN ISA code generator. - -Target triples supported -^^^^^^^^^^^^^^^^^^^^^^^^ - -The following are the known target triples that are supported by the AMDGPU -backend. - -* **amdgcn--** --- AMD GCN GPUs (AMDGPU.7.0.0+) -* **amdgcn--amdhsa** --- AMD GCN GPUs (AMDGPU.7.0.0+) with HSA support -* **r600--** --- AMD GPUs HD2XXX-HD6XXX - -Relocations -^^^^^^^^^^^ - -Supported relocatable fields are: - -* **word32** --- This specifies a 32-bit field occupying 4 bytes with arbitrary - byte alignment. These values use the same byte order as other word values in - the AMD GPU architecture -* **word64** --- This specifies a 64-bit field occupying 8 bytes with arbitrary - byte alignment. These values use the same byte order as other word values in - the AMD GPU architecture - -Following notations are used for specifying relocation calculations: - -* **A** --- Represents the addend used to compute the value of the relocatable - field -* **G** --- Represents the offset into the global offset table at which the - relocation entry’s symbol will reside during execution. -* **GOT** --- Represents the address of the global offset table. -* **P** --- Represents the place (section offset or address) of the storage unit - being relocated (computed using ``r_offset``) -* **S** --- Represents the value of the symbol whose index resides in the - relocation entry - -AMDGPU Backend generates *Elf64_Rela* relocation records with the following -supported relocation types: - - ========================== ===== ========== ============================== - Relocation type Value Field Calculation - ========================== ===== ========== ============================== - ``R_AMDGPU_NONE`` 0 ``none`` ``none`` - ``R_AMDGPU_ABS32_LO`` 1 ``word32`` (S + A) & 0xFFFFFFFF - ``R_AMDGPU_ABS32_HI`` 2 ``word32`` (S + A) >> 32 - ``R_AMDGPU_ABS64`` 3 ``word64`` S + A - ``R_AMDGPU_REL32`` 4 ``word32`` S + A - P - ``R_AMDGPU_REL64`` 5 ``word64`` S + A - P - ``R_AMDGPU_ABS32`` 6 ``word32`` S + A - ``R_AMDGPU_GOTPCREL`` 7 ``word32`` G + GOT + A - P - ``R_AMDGPU_GOTPCREL32_LO`` 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF - ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 - ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF - ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 - ========================== ===== ========== ============================== +The AMDGPU code generator lives in the ``lib/Target/AMDGPU`` +directory. This code generator is capable of targeting a variety of +AMD GPU processors. Refer to :doc:`AMDGPUUsage` for more information. diff --git a/interpreter/llvm/src/docs/CodingStandards.rst b/interpreter/llvm/src/docs/CodingStandards.rst index 722718bf4f163..fa41198755fd7 100644 --- a/interpreter/llvm/src/docs/CodingStandards.rst +++ b/interpreter/llvm/src/docs/CodingStandards.rst @@ -34,10 +34,10 @@ There are some conventions that are not uniformly followed in the code base (e.g. the naming convention). This is because they are relatively new, and a lot of code was written before they were put in place. Our long term goal is for the entire codebase to follow the convention, but we explicitly *do not* -want patches that do large-scale reformating of existing code. On the other +want patches that do large-scale reformatting of existing code. On the other hand, it is reasonable to rename the methods of a class if you're about to -change it in some other way. Just do the reformating as a separate commit from -the functionality change. +change it in some other way. Just do the reformatting as a separate commit +from the functionality change. The ultimate goal of these guidelines is to increase the readability and maintainability of our common source base. If you have suggestions for topics to diff --git a/interpreter/llvm/src/docs/CommandGuide/lit.rst b/interpreter/llvm/src/docs/CommandGuide/lit.rst index b8299d44d48ec..fbe1a9ab1843f 100644 --- a/interpreter/llvm/src/docs/CommandGuide/lit.rst +++ b/interpreter/llvm/src/docs/CommandGuide/lit.rst @@ -80,6 +80,13 @@ OUTPUT OPTIONS Show more information on test failures, for example the entire test output instead of just the test result. +.. option:: -vv, --echo-all-commands + + Echo all commands to stdout, as they are being executed. + This can be valuable for debugging test failures, as the last echoed command + will be the one which has failed. + This option implies ``--verbose``. + .. option:: -a, --show-all Show more information about all tests, for example the entire test @@ -169,6 +176,13 @@ SELECTION OPTIONS must be in the range ``1..M``. The environment variable ``LIT_RUN_SHARD`` can also be used in place of this option. +.. option:: --filter=REGEXP + + Run only those tests whose name matches the regular expression specified in + ``REGEXP``. The environment variable ``LIT_FILTER`` can be also used in place + of this option, which is especially useful in environments where the call + to ``lit`` is issued indirectly. + ADDITIONAL OPTIONS ------------------ diff --git a/interpreter/llvm/src/docs/CommandGuide/llvm-cov.rst b/interpreter/llvm/src/docs/CommandGuide/llvm-cov.rst index ea2e625bc4d27..47db8d04e0b2f 100644 --- a/interpreter/llvm/src/docs/CommandGuide/llvm-cov.rst +++ b/interpreter/llvm/src/docs/CommandGuide/llvm-cov.rst @@ -262,6 +262,12 @@ OPTIONS The demangler is expected to read a newline-separated list of symbols from stdin and write a newline-separated list of the same length to stdout. +.. option:: -num-threads=N, -j=N + + Use N threads to write file reports (only applicable when -output-dir is + specified). When N=0, llvm-cov auto-detects an appropriate number of threads to + use. This is the default. + .. option:: -line-coverage-gt= Show code coverage only for functions with line coverage greater than the diff --git a/interpreter/llvm/src/docs/CommandGuide/llvm-nm.rst b/interpreter/llvm/src/docs/CommandGuide/llvm-nm.rst index 319e6e6aecf15..da7edea4743b8 100644 --- a/interpreter/llvm/src/docs/CommandGuide/llvm-nm.rst +++ b/interpreter/llvm/src/docs/CommandGuide/llvm-nm.rst @@ -134,9 +134,6 @@ OPTIONS BUGS ---- - * :program:`llvm-nm` cannot demangle C++ mangled names, like GNU :program:`nm` - can. - * :program:`llvm-nm` does not support the full set of arguments that GNU :program:`nm` does. diff --git a/interpreter/llvm/src/docs/CommandGuide/llvm-profdata.rst b/interpreter/llvm/src/docs/CommandGuide/llvm-profdata.rst index f7aa8309485b1..5b6330b5dc405 100644 --- a/interpreter/llvm/src/docs/CommandGuide/llvm-profdata.rst +++ b/interpreter/llvm/src/docs/CommandGuide/llvm-profdata.rst @@ -192,6 +192,12 @@ OPTIONS information is dumped in a more human readable form (also in text) with annotations. +.. option:: -topn=n + + Instruct the profile dumper to show the top ``n`` functions with the + hottest basic blocks in the summary section. By default, the topn functions + are not dumped. + .. option:: -sample Specify that the input profile is a sample-based profile. diff --git a/interpreter/llvm/src/docs/CompilerWriterInfo.rst b/interpreter/llvm/src/docs/CompilerWriterInfo.rst index 8ce999033b7f6..24375fb70d4e8 100644 --- a/interpreter/llvm/src/docs/CompilerWriterInfo.rst +++ b/interpreter/llvm/src/docs/CompilerWriterInfo.rst @@ -72,16 +72,7 @@ Other documents, collections, notes AMDGPU ------ -* `AMD R6xx shader ISA `_ -* `AMD R7xx shader ISA `_ -* `AMD Evergreen shader ISA `_ -* `AMD Cayman/Trinity shader ISA `_ -* `AMD Southern Islands Series ISA `_ -* `AMD Sea Islands Series ISA `_ -* `AMD GCN3 Instruction Set Architecture `__ -* `AMD GPU Programming Guide `_ -* `AMD Compute Resources `_ -* `AMDGPU Compute Application Binary Interface `__ +Refer to :doc:`AMDGPUUsage` for additional documentation. RISC-V ------ diff --git a/interpreter/llvm/src/docs/Coroutines.rst b/interpreter/llvm/src/docs/Coroutines.rst index f7a38577fe8eb..1bea04ebdd2ac 100644 --- a/interpreter/llvm/src/docs/Coroutines.rst +++ b/interpreter/llvm/src/docs/Coroutines.rst @@ -846,7 +846,7 @@ Overview: """"""""" The '``llvm.coro.alloc``' intrinsic returns `true` if dynamic allocation is -required to obtain a memory for the corutine frame and `false` otherwise. +required to obtain a memory for the coroutine frame and `false` otherwise. Arguments: """""""""" diff --git a/interpreter/llvm/src/docs/Docker.rst b/interpreter/llvm/src/docs/Docker.rst new file mode 100644 index 0000000000000..e606e1b71a2c0 --- /dev/null +++ b/interpreter/llvm/src/docs/Docker.rst @@ -0,0 +1,199 @@ +========================================= +A guide to Dockerfiles for building LLVM +========================================= + +Introduction +============ +You can find a number of sources to build docker images with LLVM components in +``llvm/utils/docker``. They can be used by anyone who wants to build the docker +images for their own use, or as a starting point for someone who wants to write +their own Dockerfiles. + +We currently provide Dockerfiles with ``debian8`` and ``nvidia-cuda`` base images. +We also provide an ``example`` image, which contains placeholders that one would need +to fill out in order to produce Dockerfiles for a new docker image. + +Why? +---- +Docker images provide a way to produce binary distributions of +software inside a controlled environment. Having Dockerfiles to builds docker images +inside LLVM repo makes them much more discoverable than putting them into any other +place. + +Docker basics +------------- +If you've never heard about Docker before, you might find this section helpful +to get a very basic explanation of it. +`Docker `_ is a popular solution for running programs in +an isolated and reproducible environment, especially to maintain releases for +software deployed to large distributed fleets. +It uses linux kernel namespaces and cgroups to provide a lightweight isolation +inside currently running linux kernel. +A single active instance of dockerized environment is called a *docker +container*. +A snapshot of a docker container filesystem is called a *docker image*. +One can start a container from a prebuilt docker image. + +Docker images are built from a so-called *Dockerfile*, a source file written in +a specialized language that defines instructions to be used when build +the docker image (see `official +documentation `_ for more +details). A minimal Dockerfile typically contains a base image and a number +of RUN commands that have to be executed to build the image. When building a new +image, docker will first download your base image, mount its filesystem as +read-only and then add a writable overlay on top of it to keep track of all +filesystem modifications, performed while building your image. When the build +process is finished, a diff between your image's final filesystem state and the +base image's filesystem is stored in the resulting image. + +Overview +======== +The ``llvm/utils/docker`` folder contains Dockerfiles and simple bash scripts to +serve as a basis for anyone who wants to create their own Docker image with +LLVM components, compiled from sources. The sources are checked out from the +upstream svn repository when building the image. + +Inside each subfolder we host Dockerfiles for two images: + +- ``build/`` image is used to compile LLVM, it installs a system compiler and all + build dependencies of LLVM. After the build process is finished, the build + image will have an archive with compiled components at ``/tmp/clang.tar.gz``. +- ``release/`` image usually only contains LLVM components, compiled by the + ``build/`` image, and also libstdc++ and binutils to make image minimally + useful for C++ development. The assumption is that you usually want clang to + be one of the provided components. + +To build both of those images, use ``build_docker_image.sh`` script. +It will checkout LLVM sources and build clang in the ``build`` container, copy results +of the build to the local filesystem and then build the ``release`` container using +those. The ``build_docker_image.sh`` accepts a list of LLVM repositories to +checkout, and arguments for CMake invocation. + +If you want to write your own docker image, start with an ``example/`` subfolder. +It provides incomplete Dockerfiles with (very few) FIXMEs explaining the steps +you need to take in order to make your Dockerfiles functional. + +Usage +===== +The ``llvm/utils/build_docker_image.sh`` script provides a rather high degree of +control on how to run the build. It allows you to specify the projects to +checkout from svn and provide a list of CMake arguments to use during when +building LLVM inside docker container. + +Here's a very simple example of getting a docker image with clang binary, +compiled by the system compiler in the debian8 image: + +.. code-block:: bash + + ./llvm/utils/docker/build_docker_image.sh \ + --source debian8 \ + --docker-repository clang-debian8 --docker-tag "staging" \ + -p clang -i install-clang -i install-clang-headers \ + -- \ + -DCMAKE_BUILD_TYPE=Release + +Note that a build like that doesn't use a 2-stage build process that +you probably want for clang. Running a 2-stage build is a little more intricate, +this command will do that: + +.. code-block:: bash + + # Run a 2-stage build. + # LLVM_TARGETS_TO_BUILD=Native is to reduce stage1 compile time. + # Options, starting with BOOTSTRAP_* are passed to stage2 cmake invocation. + ./build_docker_image.sh \ + --source debian8 \ + --docker-repository clang-debian8 --docker-tag "staging" \ + -p clang -i stage2-install-clang -i stage2-install-clang-headers \ + -- \ + -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ + -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \ + -DCLANG_ENABLE_BOOTSTRAP=ON -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers" + +This will produce two images, a release image ``clang-debian8:staging`` and a +build image ``clang-debian8-build:staging`` from the latest upstream revision. +After the image is built you can run bash inside a container based on your +image like this: + +.. code-block:: bash + + docker run -ti clang-debian8:staging bash + +Now you can run bash commands as you normally would: + +.. code-block:: bash + + root@80f351b51825:/# clang -v + clang version 5.0.0 (trunk 305064) + Target: x86_64-unknown-linux-gnu + Thread model: posix + InstalledDir: /bin + Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8 + Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8.4 + Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9 + Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2 + Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9 + Candidate multilib: .;@m64 + Selected multilib: .;@m64 + + +Which image should I choose? +============================ +We currently provide two images: debian8-based and nvidia-cuda-based. They +differ in the base image that they use, i.e. they have a different set of +preinstalled binaries. Debian8 is very minimal, nvidia-cuda is larger, but has +preinstalled CUDA libraries and allows to access a GPU, installed on your +machine. + +If you need a minimal linux distribution with only clang and libstdc++ included, +you should try debian8-based image. + +If you want to use CUDA libraries and have access to a GPU on your machine, +you should choose nvidia-cuda-based image and use `nvidia-docker +`_ to run your docker containers. Note +that you don't need nvidia-docker to build the images, but you need it in order +to have an access to GPU from a docker container that is running the built +image. + +If you have a different use-case, you could create your own image based on +``example/`` folder. + +Any docker image can be built and run using only the docker binary, i.e. you can +run debian8 build on Fedora or any other Linux distribution. You don't need to +install CMake, compilers or any other clang dependencies. It is all handled +during the build process inside Docker's isolated environment. + +Stable build +============ +If you want a somewhat recent and somewhat stable build, use the +``branches/google/stable`` branch, i.e. the following command will produce a +debian8-based image using the latest ``google/stable`` sources for you: + +.. code-block:: bash + + ./llvm/utils/docker/build_docker_image.sh \ + -s debian8 --d clang-debian8 -t "staging" \ + --branch branches/google/stable \ + -p clang -i install-clang -i install-clang-headers \ + -- \ + -DCMAKE_BUILD_TYPE=Release + + +Minimizing docker image size +============================ +Due to Docker restrictions we use two images (i.e., build and release folders) +for the release image to be as small as possible. It's much easier to achieve +that using two images, because Docker would store a filesystem layer for each +command in the Dockerfile, i.e. if you install some packages in one command, +then remove those in a separate command, the size of the resulting image will +still be proportinal to the size of an image with installed packages. +Therefore, we strive to provide a very simple release image which only copies +compiled clang and does not do anything else. + +Docker 1.13 added a ``--squash`` flag that allows to flatten the layers of the +image, i.e. remove the parts that were actually deleted. That is an easier way +to produce the smallest images possible by using just a single image. We do not +use it because as of today the flag is in experimental stage and not everyone +may have the latest docker version available. When the flag is out of +experimental stage, we should investigate replacing two images approach with +just a single image, built using ``--squash`` flag. diff --git a/interpreter/llvm/src/docs/GetElementPtr.rst b/interpreter/llvm/src/docs/GetElementPtr.rst index f39f1d9207a2a..b593871695fac 100644 --- a/interpreter/llvm/src/docs/GetElementPtr.rst +++ b/interpreter/llvm/src/docs/GetElementPtr.rst @@ -9,10 +9,11 @@ Introduction ============ This document seeks to dispel the mystery and confusion surrounding LLVM's -`GetElementPtr `_ (GEP) instruction. Questions -about the wily GEP instruction are probably the most frequently occurring -questions once a developer gets down to coding with LLVM. Here we lay out the -sources of confusion and show that the GEP instruction is really quite simple. +`GetElementPtr `_ (GEP) instruction. +Questions about the wily GEP instruction are probably the most frequently +occurring questions once a developer gets down to coding with LLVM. Here we lay +out the sources of confusion and show that the GEP instruction is really quite +simple. Address Computation =================== @@ -26,7 +27,7 @@ questions. What is the first index of the GEP instruction? ----------------------------------------------- -Quick answer: The index stepping through the first operand. +Quick answer: The index stepping through the second operand. The confusion with the first index usually arises from thinking about the GetElementPtr instruction as if it was a C index operator. They aren't the @@ -58,7 +59,7 @@ Sometimes this question gets rephrased as: won't be dereferenced?* The answer is simply because memory does not have to be accessed to perform the -computation. The first operand to the GEP instruction must be a value of a +computation. The second operand to the GEP instruction must be a value of a pointer type. The value of the pointer is provided directly to the GEP instruction as an operand without any need for accessing memory. It must, therefore be indexed and requires an index operand. Consider this example: @@ -79,8 +80,8 @@ therefore be indexed and requires an index operand. Consider this example: In this "C" example, the front end compiler (Clang) will generate three GEP instructions for the three indices through "P" in the assignment statement. The -function argument ``P`` will be the first operand of each of these GEP -instructions. The second operand indexes through that pointer. The third +function argument ``P`` will be the second operand of each of these GEP +instructions. The third operand indexes through that pointer. The fourth operand will be the field offset into the ``struct munger_struct`` type, for either the ``f1`` or ``f2`` field. So, in LLVM assembly the ``munge`` function looks like: @@ -99,8 +100,8 @@ looks like: ret void } -In each case the first operand is the pointer through which the GEP instruction -starts. The same is true whether the first operand is an argument, allocated +In each case the second operand is the pointer through which the GEP instruction +starts. The same is true whether the second operand is an argument, allocated memory, or a global variable. To make this clear, let's consider a more obtuse example: @@ -158,11 +159,11 @@ confusion: i32 }*``. That is, ``%MyStruct`` is a pointer to a structure containing a pointer to a ``float`` and an ``i32``. -#. Point #1 is evidenced by noticing the type of the first operand of the GEP +#. Point #1 is evidenced by noticing the type of the second operand of the GEP instruction (``%MyStruct``) which is ``{ float*, i32 }*``. #. The first index, ``i64 0`` is required to step over the global variable - ``%MyStruct``. Since the first argument to the GEP instruction must always + ``%MyStruct``. Since the second argument to the GEP instruction must always be a value of pointer type, the first index steps through that pointer. A value of 0 means 0 elements offset from that pointer. @@ -266,7 +267,7 @@ in the IR. In the future, it will probably be outright disallowed. What effect do address spaces have on GEPs? ------------------------------------------- -None, except that the address space qualifier on the first operand pointer type +None, except that the address space qualifier on the second operand pointer type always matches the address space qualifier on the result type. How is GEP different from ``ptrtoint``, arithmetic, and ``inttoptr``? @@ -429,7 +430,8 @@ because LLVM has no restrictions on mixing types in addressing, loads or stores. LLVM's type-based alias analysis pass uses metadata to describe a different type system (such as the C type system), and performs type-based aliasing on top of -that. Further details are in the `language reference `_. +that. Further details are in the +`language reference `_. What happens if a GEP computation overflows? -------------------------------------------- @@ -524,7 +526,7 @@ instruction: #. The GEP instruction never accesses memory, it only provides pointer computations. -#. The first operand to the GEP instruction is always a pointer and it must be +#. The second operand to the GEP instruction is always a pointer and it must be indexed. #. There are no superfluous indices for the GEP instruction. diff --git a/interpreter/llvm/src/docs/GettingStarted.rst b/interpreter/llvm/src/docs/GettingStarted.rst index 133331880395b..0cb415ad764e5 100644 --- a/interpreter/llvm/src/docs/GettingStarted.rst +++ b/interpreter/llvm/src/docs/GettingStarted.rst @@ -706,7 +706,7 @@ To set up a clone of all the llvm projects using a unified repository: .. code-block:: console % export TOP_LEVEL_DIR=`pwd` - % git clone https://github.com/llvm-project/llvm-project-20170507/ + % git clone https://github.com/llvm-project/llvm-project-20170507/ llvm-project % cd llvm-project % git config branch.master.rebase true diff --git a/interpreter/llvm/src/docs/GettingStartedVS.rst b/interpreter/llvm/src/docs/GettingStartedVS.rst index 1e46767679393..50f7aa123c558 100644 --- a/interpreter/llvm/src/docs/GettingStartedVS.rst +++ b/interpreter/llvm/src/docs/GettingStartedVS.rst @@ -100,6 +100,10 @@ Here's the short story for getting up and running quickly with LLVM: * CMake generates project files for all build types. To select a specific build type, use the Configuration manager from the VS IDE or the ``/property:Configuration`` command line option when using MSBuild. + * By default, the Visual Studio project files generated by CMake use the + 32-bit toolset. If you are developing on a 64-bit version of Windows and + want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when + generating the Visual Studio solution. This requires CMake 3.8.0 or later. 6. Start Visual Studio diff --git a/interpreter/llvm/src/docs/GoldPlugin.rst b/interpreter/llvm/src/docs/GoldPlugin.rst index 88b944a2a0fdd..78d38ccb32bd1 100644 --- a/interpreter/llvm/src/docs/GoldPlugin.rst +++ b/interpreter/llvm/src/docs/GoldPlugin.rst @@ -7,7 +7,7 @@ Introduction Building with link time optimization requires cooperation from the system linker. LTO support on Linux systems requires that you use the -`gold linker`_ which supports LTO via plugins. This is the same mechanism +`gold linker`_ or ld.bfd from binutils >= 2.21.51.0.2, as they support LTO via plugins. This is the same mechanism used by the `GCC LTO`_ project. The LLVM gold plugin implements the gold plugin interface on top of @@ -23,24 +23,22 @@ The LLVM gold plugin implements the gold plugin interface on top of How to build it =============== -You need to have gold with plugin support and build the LLVMgold plugin. -Check whether you have gold running ``/usr/bin/ld -v``. It will report "GNU -gold" or else "GNU ld" if not. If you have gold, check for plugin support -by running ``/usr/bin/ld -plugin``. If it complains "missing argument" then -you have plugin support. If not, such as an "unknown option" error then you -will either need to build gold or install a version with plugin support. +Check for plugin support by running ``/usr/bin/ld -plugin``. If it complains +"missing argument" then you have plugin support. If not, such as an "unknown option" +error then you will either need to build gold or install a recent version +of ld.bfd with plugin support and then build gold plugin. -* Download, configure and build gold with plugin support: +* Download, configure and build ld.bfd with plugin support: .. code-block:: bash $ git clone --depth 1 git://sourceware.org/git/binutils-gdb.git binutils $ mkdir build $ cd build - $ ../binutils/configure --enable-gold --enable-plugins --disable-werror - $ make all-gold + $ ../binutils/configure --disable-werror # ld.bfd includes plugin support by default + $ make all-ld - That should leave you with ``build/gold/ld-new`` which supports + That should leave you with ``build/ld/ld-new`` which supports the ``-plugin`` option. Running ``make`` will additionally build ``build/binutils/ar`` and ``nm-new`` binaries supporting plugins. diff --git a/interpreter/llvm/src/docs/HowToAddABuilder.rst b/interpreter/llvm/src/docs/HowToAddABuilder.rst index 08cbecdc2a579..201c71b213914 100644 --- a/interpreter/llvm/src/docs/HowToAddABuilder.rst +++ b/interpreter/llvm/src/docs/HowToAddABuilder.rst @@ -62,6 +62,9 @@ Here are the steps you can follow to do so: lab.llvm.org:9990 \ + To point a slave to silent master please use lab.llvm.org:9994 instead + of lab.llvm.org:9990. + #. Fill the buildslave description and admin name/e-mail. Here is an example of the buildslave description:: diff --git a/interpreter/llvm/src/docs/LangRef.rst b/interpreter/llvm/src/docs/LangRef.rst index 9ff47e8366dcb..5c65864e901e7 100644 --- a/interpreter/llvm/src/docs/LangRef.rst +++ b/interpreter/llvm/src/docs/LangRef.rst @@ -161,7 +161,7 @@ symbol table entries. Here is an example of the "hello world" module: ; Definition of main function define i32 @main() { ; i32()* - ; Convert [13 x i8]* to i8 *... + ; Convert [13 x i8]* to i8*... %cast210 = getelementptr [13 x i8], [13 x i8]* @.str, i64 0, i64 0 ; Call puts function to write out the string to stdout. @@ -1468,6 +1468,19 @@ example: This attribute by itself does not imply restrictions on inter-procedural optimizations. All of the semantic effects the patching may have to be separately conveyed via the linkage type. +``"probe-stack"`` + This attribute indicates that the function will trigger a guard region + in the end of the stack. It ensures that accesses to the stack must be + no further apart than the size of the guard region to a previous + access of the stack. It takes one required string value, the name of + the stack probing function that will be called. + + If a function that has a ``"probe-stack"`` attribute is inlined into + a function with another ``"probe-stack"`` attribute, the resulting + function has the ``"probe-stack"`` attribute of the caller. If a + function that has a ``"probe-stack"`` attribute is inlined into a + function that has no ``"probe-stack"`` attribute at all, the resulting + function has the ``"probe-stack"`` attribute of the callee. ``readnone`` On a function, this attribute indicates that the function computes its result (or decides to unwind an exception) based strictly on its arguments, @@ -1498,6 +1511,21 @@ example: On an argument, this attribute indicates that the function does not write through this pointer argument, even though it may write to the memory that the pointer points to. +``"stack-probe-size"`` + This attribute controls the behavior of stack probes: either + the ``"probe-stack"`` attribute, or ABI-required stack probes, if any. + It defines the size of the guard region. It ensures that if the function + may use more stack space than the size of the guard region, stack probing + sequence will be emitted. It takes one required integer value, which + is 4096 by default. + + If a function that has a ``"stack-probe-size"`` attribute is inlined into + a function with another ``"stack-probe-size"`` attribute, the resulting + function has the ``"stack-probe-size"`` attribute that has the lower + numeric value. If a function that has a ``"stack-probe-size"`` attribute is + inlined into a function that has no ``"stack-probe-size"`` attribute + at all, the resulting function has the ``"stack-probe-size"`` attribute + of the callee. ``writeonly`` On a function, this attribute indicates that the function may write to but does not read from memory. @@ -1989,7 +2017,7 @@ A pointer value is *based* on another pointer value according to the following rules: - A pointer value formed from a ``getelementptr`` operation is *based* - on the first value operand of the ``getelementptr``. + on the second value operand of the ``getelementptr``. - The result value of a ``bitcast`` is *based* on the operand of the ``bitcast``. - A pointer value formed by an ``inttoptr`` is *based* on all pointer @@ -2181,12 +2209,21 @@ For a simpler introduction to the ordering constraints, see the same address in this global order. This corresponds to the C++0x/C1x ``memory_order_seq_cst`` and Java volatile. -.. _singlethread: +.. _syncscope: -If an atomic operation is marked ``singlethread``, it only *synchronizes -with* or participates in modification and seq\_cst total orderings with -other operations running in the same thread (for example, in signal -handlers). +If an atomic operation is marked ``syncscope("singlethread")``, it only +*synchronizes with* and only participates in the seq\_cst total orderings of +other operations running in the same thread (for example, in signal handlers). + +If an atomic operation is marked ``syncscope("")``, where +```` is a target specific synchronization scope, then it is target +dependent if it *synchronizes with* and participates in the seq\_cst total +orderings of other operations. + +Otherwise, an atomic operation that is not marked ``syncscope("singlethread")`` +or ``syncscope("")`` *synchronizes with* and participates in the +seq\_cst total orderings of other operations that are not marked +``syncscope("singlethread")`` or ``syncscope("")``. .. _fastmath: @@ -3166,7 +3203,7 @@ The following is the syntax for constant expressions: ``getelementptr (TY, CSTPTR, IDX0, IDX1, ...)``, ``getelementptr inbounds (TY, CSTPTR, IDX0, IDX1, ...)`` Perform the :ref:`getelementptr operation ` on constants. As with the :ref:`getelementptr ` - instruction, the index list may have zero or more indexes, which are + instruction, the index list may have one or more indexes, which are required to make sense for the type of "pointer to TY". ``select (COND, VAL1, VAL2)`` Perform the :ref:`select operation ` on constants. @@ -4033,26 +4070,26 @@ DICompileUnit """"""""""""" ``DICompileUnit`` nodes represent a compile unit. The ``enums:``, -``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:`` -fields are tuples containing the debug info to be emitted along with the compile -unit, regardless of code optimizations (some nodes are only emitted if there are -references to them from instructions). The ``debugInfoForProfiling:`` field is a -boolean indicating whether or not line-table discriminators are updated to -provide more-accurate debug info for profiling results. +``retainedTypes:``, ``globals:``, ``imports:`` and ``macros:`` fields are tuples +containing the debug info to be emitted along with the compile unit, regardless +of code optimizations (some nodes are only emitted if there are references to +them from instructions). The ``debugInfoForProfiling:`` field is a boolean +indicating whether or not line-table discriminators are updated to provide +more-accurate debug info for profiling results. .. code-block:: text !0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: FullDebug, - enums: !2, retainedTypes: !3, subprograms: !4, - globals: !5, imports: !6, macros: !7, dwoId: 0x0abcd) + enums: !2, retainedTypes: !3, globals: !4, imports: !5, + macros: !6, dwoId: 0x0abcd) Compile unit descriptors provide the root scope for objects declared in a -specific compilation unit. File descriptors are defined using this scope. -These descriptors are collected by a named metadata ``!llvm.dbg.cu``. They -keep track of subprograms, global variables, type information, and imported -entities (declarations and namespaces). +specific compilation unit. File descriptors are defined using this scope. These +descriptors are collected by a named metadata node ``!llvm.dbg.cu``. They keep +track of global variables, type information, and imported entities (declarations +and namespaces). .. _DIFile: @@ -4326,8 +4363,8 @@ and ``scope:``. containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, - isOptimized: true, templateParams: !5, - declaration: !6, variables: !7) + isOptimized: true, unit: !5, templateParams: !6, + declaration: !7, variables: !8, thrownTypes: !9) .. _DILexicalBlock: @@ -4404,7 +4441,12 @@ referenced LLVM variable relates to the source language variable. The current supported vocabulary is limited: - ``DW_OP_deref`` dereferences the top of the expression stack. -- ``DW_OP_plus, 93`` adds ``93`` to the working expression. +- ``DW_OP_plus`` pops the last two entries from the expression stack, adds + them together and appends the result to the expression stack. +- ``DW_OP_minus`` pops the last two entries from the expression stack, subtracts + the last entry from the second last entry and appends the result to the + expression stack. +- ``DW_OP_plus_uconst, 93`` adds ``93`` to the working expression. - ``DW_OP_LLVM_fragment, 16, 8`` specifies the offset and size (``16`` and ``8`` here, respectively) of the variable fragment from the working expression. Note that contrary to DW_OP_bit_piece, the offset is describing the the location @@ -4415,12 +4457,6 @@ The current supported vocabulary is limited: address space identifier. - ``DW_OP_stack_value`` marks a constant value. -DIExpression nodes that contain a ``DW_OP_stack_value`` operator are standalone -location descriptions that describe constant values. This form is used to -describe global constants that have been optimized away. All other expressions -are modifiers to another location: A debug intrinsic ties a location and a -DIExpression together. - DWARF specifies three kinds of simple location descriptions: Register, memory, and implicit location descriptions. Register and memory location descriptions describe the *location* of a source variable (in the sense that a debugger might @@ -4432,9 +4468,10 @@ combined with a concrete location. .. code-block:: llvm !0 = !DIExpression(DW_OP_deref) - !1 = !DIExpression(DW_OP_plus, 3) + !1 = !DIExpression(DW_OP_plus_uconst, 3) + !1 = !DIExpression(DW_OP_constu, 3, DW_OP_plus) !2 = !DIExpression(DW_OP_bit_piece, 3, 7) - !3 = !DIExpression(DW_OP_deref, DW_OP_plus, 3, DW_OP_LLVM_fragment, 3, 7) + !3 = !DIExpression(DW_OP_deref, DW_OP_constu, 3, DW_OP_plus, DW_OP_LLVM_fragment, 3, 7) !4 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef) !5 = !DIExpression(DW_OP_constu, 42, DW_OP_stack_value) @@ -5006,7 +5043,7 @@ which is the string ``llvm.loop.licm_versioning.disable``. For example: Loop distribution allows splitting a loop into multiple loops. Currently, this is only performed if the entire loop cannot be vectorized due to unsafe -memory dependencies. The transformation will atempt to isolate the unsafe +memory dependencies. The transformation will attempt to isolate the unsafe dependencies into their own loop. This metadata can be used to selectively enable or disable distribution of the @@ -5192,6 +5229,72 @@ Example: !0 = !{i32* @a} +'``prof``' Metadata +^^^^^^^^^^^^^^^^^^^ + +The ``prof`` metadata is used to record profile data in the IR. +The first operand of the metadata node indicates the profile metadata +type. There are currently 3 types: +:ref:`branch_weights`, +:ref:`function_entry_count`, and +:ref:`VP`. + +.. _prof_node_branch_weights: + +branch_weights +"""""""""""""" + +Branch weight metadata attached to a branch, select, switch or call instruction +represents the likeliness of the associated branch being taken. +For more information, see :doc:`BranchWeightMetadata`. + +.. _prof_node_function_entry_count: + +function_entry_count +"""""""""""""""""""" + +Function entry count metadata can be attached to function definitions +to record the number of times the function is called. Used with BFI +information, it is also used to derive the basic block profile count. +For more information, see :doc:`BranchWeightMetadata`. + +.. _prof_node_VP: + +VP +"" + +VP (value profile) metadata can be attached to instructions that have +value profile information. Currently this is indirect calls (where it +records the hottest callees) and calls to memory intrinsics such as memcpy, +memmove, and memset (where it records the hottest byte lengths). + +Each VP metadata node contains "VP" string, then a uint32_t value for the value +profiling kind, a uint64_t value for the total number of times the instruction +is executed, followed by uint64_t value and execution count pairs. +The value profiling kind is 0 for indirect call targets and 1 for memory +operations. For indirect call targets, each profile value is a hash +of the callee function name, and for memory operations each value is the +byte length. + +Note that the value counts do not need to add up to the total count +listed in the third operand (in practice only the top hottest values +are tracked and reported). + +Indirect call example: + +.. code-block:: llvm + + call void %f(), !prof !1 + !1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410} + +Note that the VP type is 0 (the second operand), which indicates this is +an indirect call value profile data. The third operand indicates that the +indirect call executed 1600 times. The 4th and 6th operands give the +hashes of the 2 hottest target functions' names (this is the same hash used +to represent function names in the profile database), and the 5th and 7th +operands give the execution count that each of the respective prior target +functions was called. + Module Flags Metadata ===================== @@ -5266,6 +5369,10 @@ The following behaviors are supported: nodes. However, duplicate entries in the second list are dropped during the append operation. + * - 7 + - **Max** + Takes the max of the two values, which are required to be integers. + It is an error for a particular unique flag ID to have multiple behaviors, except in the case of **Require** (which adds restrictions on another metadata value) or **Override**. @@ -5358,40 +5465,6 @@ Some important flag interactions: - A module with ``Objective-C Garbage Collection`` set to 0 cannot be merged with a module with ``Objective-C GC Only`` set to 6. -Automatic Linker Flags Module Flags Metadata --------------------------------------------- - -Some targets support embedding flags to the linker inside individual object -files. Typically this is used in conjunction with language extensions which -allow source files to explicitly declare the libraries they depend on, and have -these automatically be transmitted to the linker via object files. - -These flags are encoded in the IR using metadata in the module flags section, -using the ``Linker Options`` key. The merge behavior for this flag is required -to be ``AppendUnique``, and the value for the key is expected to be a metadata -node which should be a list of other metadata nodes, each of which should be a -list of metadata strings defining linker options. - -For example, the following metadata section specifies two separate sets of -linker options, presumably to link against ``libz`` and the ``Cocoa`` -framework:: - - !0 = !{ i32 6, !"Linker Options", - !{ - !{ !"-lz" }, - !{ !"-framework", !"Cocoa" } } } - !llvm.module.flags = !{ !0 } - -The metadata encoding as lists of lists of options, as opposed to a collapsed -list of options, is chosen so that the IR encoding can use multiple option -strings to specify e.g., a single library, while still having that specifier be -preserved as an atomic element that can be recognized by a target specific -assembly writer or object file emitter. - -Each individual option is required to be either a valid option for the target's -linker, or an option that is reserved by the target specific assembly writer or -object file emitter. No other aspect of these options is defined by the IR. - C type width Module Flags Metadata ---------------------------------- @@ -5428,6 +5501,37 @@ enum is the smallest type which can represent all of its values:: !0 = !{i32 1, !"short_wchar", i32 1} !1 = !{i32 1, !"short_enum", i32 0} +Automatic Linker Flags Named Metadata +===================================== + +Some targets support embedding flags to the linker inside individual object +files. Typically this is used in conjunction with language extensions which +allow source files to explicitly declare the libraries they depend on, and have +these automatically be transmitted to the linker via object files. + +These flags are encoded in the IR using named metadata with the name +``!llvm.linker.options``. Each operand is expected to be a metadata node +which should be a list of other metadata nodes, each of which should be a +list of metadata strings defining linker options. + +For example, the following metadata section specifies two separate sets of +linker options, presumably to link against ``libz`` and the ``Cocoa`` +framework:: + + !0 = !{ !"-lz" }, + !1 = !{ !"-framework", !"Cocoa" } } } + !llvm.linker.options = !{ !0, !1 } + +The metadata encoding as lists of lists of options, as opposed to a collapsed +list of options, is chosen so that the IR encoding can use multiple option +strings to specify e.g., a single library, while still having that specifier be +preserved as an atomic element that can be recognized by a target specific +assembly writer or object file emitter. + +Each individual option is required to be either a valid option for the target's +linker, or an option that is reserved by the target specific assembly writer or +object file emitter. No other aspect of these options is defined by the IR. + .. _intrinsicglobalvariables: Intrinsic Global Variables @@ -6697,15 +6801,14 @@ Semantics: The value produced is ``op1`` \* 2\ :sup:`op2` mod 2\ :sup:`n`, where ``n`` is the width of the result. If ``op2`` is (statically or dynamically) equal to or larger than the number of bits in -``op1``, the result is undefined. If the arguments are vectors, each -vector element of ``op1`` is shifted by the corresponding shift amount -in ``op2``. +``op1``, this instruction returns a :ref:`poison value `. +If the arguments are vectors, each vector element of ``op1`` is shifted +by the corresponding shift amount in ``op2``. -If the ``nuw`` keyword is present, then the shift produces a :ref:`poison -value ` if it shifts out any non-zero bits. If the -``nsw`` keyword is present, then the shift produces a :ref:`poison -value ` if it shifts out any bits that disagree with the -resultant sign bit. +If the ``nuw`` keyword is present, then the shift produces a poison +value if it shifts out any non-zero bits. +If the ``nsw`` keyword is present, then the shift produces a poison +value it shifts out any bits that disagree with the resultant sign bit. Example: """""""" @@ -6748,13 +6851,12 @@ Semantics: This instruction always performs a logical shift right operation. The most significant bits of the result will be filled with zero bits after the shift. If ``op2`` is (statically or dynamically) equal to or larger -than the number of bits in ``op1``, the result is undefined. If the -arguments are vectors, each vector element of ``op1`` is shifted by the -corresponding shift amount in ``op2``. +than the number of bits in ``op1``, this instruction returns a :ref:`poison +value `. If the arguments are vectors, each vector element +of ``op1`` is shifted by the corresponding shift amount in ``op2``. If the ``exact`` keyword is present, the result value of the ``lshr`` is -a :ref:`poison value ` if any of the bits shifted out are -non-zero. +a poison value if any of the bits shifted out are non-zero. Example: """""""" @@ -6799,13 +6901,12 @@ Semantics: This instruction always performs an arithmetic shift right operation, The most significant bits of the result will be filled with the sign bit of ``op1``. If ``op2`` is (statically or dynamically) equal to or larger -than the number of bits in ``op1``, the result is undefined. If the -arguments are vectors, each vector element of ``op1`` is shifted by the -corresponding shift amount in ``op2``. +than the number of bits in ``op1``, this instruction returns a :ref:`poison +value `. If the arguments are vectors, each vector element +of ``op1`` is shifted by the corresponding shift amount in ``op2``. If the ``exact`` keyword is present, the result value of the ``ashr`` is -a :ref:`poison value ` if any of the bits shifted out are -non-zero. +a poison value if any of the bits shifted out are non-zero. Example: """""""" @@ -7292,7 +7393,7 @@ Syntax: :: = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !] - = load atomic [volatile] , * [singlethread] , align [, !invariant.group !] + = load atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] ! = !{ i32 1 } ! = !{i64 } ! = !{ i64 } @@ -7313,14 +7414,14 @@ modify the number or order of execution of this ``load`` with other :ref:`volatile operations `. If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering -` and optional ``singlethread`` argument. The ``release`` and -``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads -produce :ref:`defined ` results when they may see multiple atomic -stores. The type of the pointee must be an integer, pointer, or floating-point -type whose bit width is a power of two greater than or equal to eight and less -than or equal to a target-specific size limit. ``align`` must be explicitly -specified on atomic loads, and the load has undefined behavior if the alignment -is not set to a value which is at least the size in bytes of the +` and optional ``syncscope("")`` argument. The +``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions. +Atomic loads produce :ref:`defined ` results when they may see +multiple atomic stores. The type of the pointee must be an integer, pointer, or +floating-point type whose bit width is a power of two greater than or equal to +eight and less than or equal to a target-specific size limit. ``align`` must be +explicitly specified on atomic loads, and the load has undefined behavior if the +alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic loads. The optional constant ``align`` argument specifies the alignment of the @@ -7421,7 +7522,7 @@ Syntax: :: store [volatile] , * [, align ][, !nontemporal !][, !invariant.group !] ; yields void - store atomic [volatile] , * [singlethread] , align [, !invariant.group !] ; yields void + store atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] ; yields void Overview: """"""""" @@ -7441,14 +7542,14 @@ allowed to modify the number or order of execution of this ``store`` with other structural type `) can be stored. If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering -` and optional ``singlethread`` argument. The ``acquire`` and -``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads -produce :ref:`defined ` results when they may see multiple atomic -stores. The type of the pointee must be an integer, pointer, or floating-point -type whose bit width is a power of two greater than or equal to eight and less -than or equal to a target-specific size limit. ``align`` must be explicitly -specified on atomic stores, and the store has undefined behavior if the -alignment is not set to a value which is at least the size in bytes of the +` and optional ``syncscope("")`` argument. The +``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions. +Atomic loads produce :ref:`defined ` results when they may see +multiple atomic stores. The type of the pointee must be an integer, pointer, or +floating-point type whose bit width is a power of two greater than or equal to +eight and less than or equal to a target-specific size limit. ``align`` must be +explicitly specified on atomic stores, and the store has undefined behavior if +the alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic stores. The optional constant ``align`` argument specifies the alignment of the @@ -7509,7 +7610,7 @@ Syntax: :: - fence [singlethread] ; yields void + fence [syncscope("")] ; yields void Overview: """"""""" @@ -7543,17 +7644,17 @@ A ``fence`` which has ``seq_cst`` ordering, in addition to having both ``acquire`` and ``release`` semantics specified above, participates in the global program order of other ``seq_cst`` operations and/or fences. -The optional ":ref:`singlethread `" argument specifies -that the fence only synchronizes with other fences in the same thread. -(This is useful for interacting with signal handlers.) +A ``fence`` instruction can also take an optional +":ref:`syncscope `" argument. Example: """""""" .. code-block:: llvm - fence acquire ; yields void - fence singlethread seq_cst ; yields void + fence acquire ; yields void + fence syncscope("singlethread") seq_cst ; yields void + fence syncscope("agent") seq_cst ; yields void .. _i_cmpxchg: @@ -7565,7 +7666,7 @@ Syntax: :: - cmpxchg [weak] [volatile] * , , [singlethread] ; yields { ty, i1 } + cmpxchg [weak] [volatile] * , , [syncscope("")] ; yields { ty, i1 } Overview: """"""""" @@ -7594,10 +7695,8 @@ must be at least ``monotonic``, the ordering constraint on failure must be no stronger than that on success, and the failure ordering cannot be either ``release`` or ``acq_rel``. -The optional "``singlethread``" argument declares that the ``cmpxchg`` -is only atomic with respect to code (usually signal handlers) running in -the same thread as the ``cmpxchg``. Otherwise the cmpxchg is atomic with -respect to all other code in the system. +A ``cmpxchg`` instruction can also take an optional +":ref:`syncscope `" argument. The pointer passed into cmpxchg must have alignment greater than or equal to the size in memory of the operand. @@ -7651,7 +7750,7 @@ Syntax: :: - atomicrmw [volatile] * , [singlethread] ; yields ty + atomicrmw [volatile] * , [syncscope("")] ; yields ty Overview: """"""""" @@ -7685,6 +7784,9 @@ be a pointer to that type. If the ``atomicrmw`` is marked as order of execution of this ``atomicrmw`` with other :ref:`volatile operations `. +A ``atomicrmw`` instruction can also take an optional +":ref:`syncscope `" argument. + Semantics: """""""""" @@ -7745,7 +7847,7 @@ base address to start from. The remaining arguments are indices that indicate which of the elements of the aggregate object are indexed. The interpretation of each index is dependent on the type being indexed into. The first index always indexes the pointer value given as the -first argument, the second index indexes a value of the type pointed to +second argument, the second index indexes a value of the type pointed to (not necessarily the value directly pointed to, since the first index can be non-zero), etc. The first type indexed into must be a pointer value, subsequent types can be arrays, vectors, and structs. Note that @@ -9548,7 +9650,7 @@ Syntax: :: - declare i8 *@llvm.returnaddress(i32 ) + declare i8* @llvm.returnaddress(i32 ) Overview: """"""""" @@ -9586,7 +9688,7 @@ Syntax: :: - declare i8 *@llvm.addressofreturnaddress() + declare i8* @llvm.addressofreturnaddress() Overview: """"""""" @@ -10184,6 +10286,8 @@ overlap. It copies "len" bytes of memory over. If the argument is known to be aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +.. _int_memmove: + '``llvm.memmove``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10239,6 +10343,8 @@ copies "len" bytes of memory over. If the argument is known to be aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +.. _int_memset: + '``llvm.memset.*``' Intrinsics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12722,7 +12828,7 @@ Syntax: declare @llvm.experimental.constrained.fadd( , , metadata , - metadata ) + metadata ) Overview: """"""""" @@ -12759,7 +12865,7 @@ Syntax: declare @llvm.experimental.constrained.fsub( , , metadata , - metadata ) + metadata ) Overview: """"""""" @@ -12796,7 +12902,7 @@ Syntax: declare @llvm.experimental.constrained.fmul( , , metadata , - metadata ) + metadata ) Overview: """"""""" @@ -12833,7 +12939,7 @@ Syntax: declare @llvm.experimental.constrained.fdiv( , , metadata , - metadata ) + metadata ) Overview: """"""""" @@ -12870,7 +12976,7 @@ Syntax: declare @llvm.experimental.constrained.frem( , , metadata , - metadata ) + metadata ) Overview: """"""""" @@ -12899,6 +13005,461 @@ value operands and has the same type as the operands. The remainder has the same sign as the dividend. +Constrained libm-equivalent Intrinsics +-------------------------------------- + +In addition to the basic floating point operations for which constrained +intrinsics are described above, there are constrained versions of various +operations which provide equivalent behavior to a corresponding libm function. +These intrinsics allow the precise behavior of these operations with respect to +rounding mode and exception behavior to be controlled. + +As with the basic constrained floating point intrinsics, the rounding mode +and exception behavior arguments only control the behavior of the optimizer. +They do not change the runtime floating point environment. + + +'``llvm.experimental.constrained.sqrt``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.sqrt( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.sqrt``' intrinsic returns the square root +of the specified value, returning the same value as the libm '``sqrt``' +functions would, but without setting ``errno``. + +Arguments: +"""""""""" + +The first argument and the return type are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the nonnegative square root of the specified value. +If the value is less than negative zero, a floating point exception occurs +and the the return value is architecture specific. + + +'``llvm.experimental.constrained.pow``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.pow( , , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.pow``' intrinsic returns the first operand +raised to the (positive or negative) power specified by the second operand. + +Arguments: +"""""""""" + +The first two arguments and the return value are floating point numbers of the +same type. The second argument specifies the power to which the first argument +should be raised. + +The third and fourth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the first value raised to the second power, +returning the same values as the libm ``pow`` functions would, and +handles error conditions in the same way. + + +'``llvm.experimental.constrained.powi``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.powi( , i32 , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.powi``' intrinsic returns the first operand +raised to the (positive or negative) power specified by the second operand. The +order of evaluation of multiplications is not defined. When a vector of floating +point type is used, the second argument remains a scalar integer value. + + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. The second argument is a 32-bit signed integer specifying the power to +which the first argument should be raised. + +The third and fourth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the first value raised to the second power with an +unspecified sequence of rounding operations. + + +'``llvm.experimental.constrained.sin``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.sin( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.sin``' intrinsic returns the sine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the sine of the specified operand, returning the +same values as the libm ``sin`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.cos``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.cos( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.cos``' intrinsic returns the cosine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the cosine of the specified operand, returning the +same values as the libm ``cos`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.exp``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.exp( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.exp``' intrinsic computes the base-e +exponential of the specified value. + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``exp`` functions +would, and handles error conditions in the same way. + + +'``llvm.experimental.constrained.exp2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.exp2( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.exp2``' intrinsic computes the base-2 +exponential of the specified value. + + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``exp2`` functions +would, and handles error conditions in the same way. + + +'``llvm.experimental.constrained.log``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.log( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.log``' intrinsic computes the base-e +logarithm of the specified value. + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + + +Semantics: +"""""""""" + +This function returns the same values as the libm ``log`` functions +would, and handles error conditions in the same way. + + +'``llvm.experimental.constrained.log10``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.log10( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.log10``' intrinsic computes the base-10 +logarithm of the specified value. + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``log10`` functions +would, and handles error conditions in the same way. + + +'``llvm.experimental.constrained.log2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.log2( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.log2``' intrinsic computes the base-2 +logarithm of the specified value. + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``log2`` functions +would, and handles error conditions in the same way. + + +'``llvm.experimental.constrained.rint``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.rint( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.rint``' intrinsic returns the first +operand rounded to the nearest integer. It may raise an inexact floating point +exception if the operand is not an integer. + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``rint`` functions +would, and handles error conditions in the same way. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating point environment. The rounding +mode argument is only intended as information to the compiler. + + +'``llvm.experimental.constrained.nearbyint``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.nearbyint( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.nearbyint``' intrinsic returns the first +operand rounded to the nearest integer. It will not raise an inexact floating +point exception if the operand is not an integer. + + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``nearbyint`` functions +would, and handles error conditions in the same way. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating point environment. The rounding +mode argument is only intended as information to the compiler. + + General Intrinsics ------------------ @@ -13553,62 +14114,66 @@ Element Wise Atomic Memory Intrinsics These intrinsics are similar to the standard library memory intrinsics except that they perform memory transfer as a sequence of atomic memory accesses. -.. _int_memcpy_element_atomic: +.. _int_memcpy_element_unordered_atomic: -'``llvm.memcpy.element.atomic``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.memcpy.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" -This is an overloaded intrinsic. You can use ``llvm.memcpy.element.atomic`` on +This is an overloaded intrinsic. You can use ``llvm.memcpy.element.unordered.atomic`` on any integer bit width and for different address spaces. Not all targets support all bit widths however. :: - declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* , i8* , - i64 , i32 ) + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* , + i8* , + i32 , + i32 ) + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* , + i8* , + i64 , + i32 ) Overview: """"""""" -The '``llvm.memcpy.element.atomic.*``' intrinsic performs copy of a block of -memory from the source location to the destination location as a sequence of -unordered atomic memory accesses where each access is a multiple of -``element_size`` bytes wide and aligned at an element size boundary. For example -each element is accessed atomically in source and destination buffers. +The '``llvm.memcpy.element.unordered.atomic.*``' intrinsic is a specialization of the +'``llvm.memcpy.*``' intrinsic. It differs in that the ``dest`` and ``src`` are treated +as arrays with elements that are exactly ``element_size`` bytes, and the copy between +buffers uses a sequence of :ref:`unordered atomic ` load/store operations +that are a positive integer multiple of the ``element_size`` in size. Arguments: """""""""" -The first argument is a pointer to the destination, the second is a -pointer to the source. The third argument is an integer argument -specifying the number of elements to copy, the fourth argument is size of -the single element in bytes. +The first three arguments are the same as they are in the :ref:`@llvm.memcpy ` +intrinsic, with the added constraint that ``len`` is required to be a positive integer +multiple of the ``element_size``. If ``len`` is not a positive integer multiple of +``element_size``, then the behaviour of the intrinsic is undefined. -``element_size`` should be a power of two, greater than zero and less than -a target-specific atomic access size limit. +``element_size`` must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit. -For each of the input pointers ``align`` parameter attribute must be specified. -It must be a power of two and greater than or equal to the ``element_size``. -Caller guarantees that both the source and destination pointers are aligned to -that boundary. +For each of the input pointers ``align`` parameter attribute must be specified. It +must be a power of two no less than the ``element_size``. Caller guarantees that +both the source and destination pointers are aligned to that boundary. Semantics: """""""""" -The '``llvm.memcpy.element.atomic.*``' intrinsic copies -'``num_elements`` * ``element_size``' bytes of memory from the source location to -the destination location. These locations are not allowed to overlap. Memory copy -is performed as a sequence of unordered atomic memory accesses where each access -is guaranteed to be a multiple of ``element_size`` bytes wide and aligned at an -element size boundary. +The '``llvm.memcpy.element.unordered.atomic.*``' intrinsic copies ``len`` bytes of +memory from the source location to the destination location. These locations are not +allowed to overlap. The memory copy is performed as a sequence of load/store operations +where each access is guaranteed to be a multiple of ``element_size`` bytes wide and +aligned at an ``element_size`` boundary. The order of the copy is unspecified. The same value may be read from the source buffer many times, but only one write is issued to the destination buffer per -element. It is well defined to have concurrent reads and writes to both source -and destination provided those reads and writes are at least unordered atomic. +element. It is well defined to have concurrent reads and writes to both source and +destination provided those reads and writes are unordered atomic when specified. This intrinsic does not provide any additional ordering guarantees over those provided by a set of unordered loads from the source location and stores to the @@ -13617,8 +14182,158 @@ destination. Lowering: """"""""" -In the most general case call to the '``llvm.memcpy.element.atomic.*``' is lowered -to a call to the symbol ``__llvm_memcpy_element_atomic_*``. Where '*' is replaced -with an actual element size. +In the most general case call to the '``llvm.memcpy.element.unordered.atomic.*``' is +lowered to a call to the symbol ``__llvm_memcpy_element_unordered_atomic_*``. Where '*' +is replaced with an actual element size. Optimizer is allowed to inline memory copy when it's profitable to do so. + +'``llvm.memmove.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use +``llvm.memmove.element.unordered.atomic`` on any integer bit width and for +different address spaces. Not all targets support all bit widths however. + +:: + + declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* , + i8* , + i32 , + i32 ) + declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* , + i8* , + i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.memmove.element.unordered.atomic.*``' intrinsic is a specialization +of the '``llvm.memmove.*``' intrinsic. It differs in that the ``dest`` and +``src`` are treated as arrays with elements that are exactly ``element_size`` +bytes, and the copy between buffers uses a sequence of +:ref:`unordered atomic ` load/store operations that are a positive +integer multiple of the ``element_size`` in size. + +Arguments: +"""""""""" + +The first three arguments are the same as they are in the +:ref:`@llvm.memmove ` intrinsic, with the added constraint that +``len`` is required to be a positive integer multiple of the ``element_size``. +If ``len`` is not a positive integer multiple of ``element_size``, then the +behaviour of the intrinsic is undefined. + +``element_size`` must be a compile-time constant positive power of two no +greater than a target-specific atomic access size limit. + +For each of the input pointers the ``align`` parameter attribute must be +specified. It must be a power of two no less than the ``element_size``. Caller +guarantees that both the source and destination pointers are aligned to that +boundary. + +Semantics: +"""""""""" + +The '``llvm.memmove.element.unordered.atomic.*``' intrinsic copies ``len`` bytes +of memory from the source location to the destination location. These locations +are allowed to overlap. The memory copy is performed as a sequence of load/store +operations where each access is guaranteed to be a multiple of ``element_size`` +bytes wide and aligned at an ``element_size`` boundary. + +The order of the copy is unspecified. The same value may be read from the source +buffer many times, but only one write is issued to the destination buffer per +element. It is well defined to have concurrent reads and writes to both source +and destination provided those reads and writes are unordered atomic when +specified. + +This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered loads from the source location and stores to the +destination. + +Lowering: +""""""""" + +In the most general case call to the +'``llvm.memmove.element.unordered.atomic.*``' is lowered to a call to the symbol +``__llvm_memmove_element_unordered_atomic_*``. Where '*' is replaced with an +actual element size. + +The optimizer is allowed to inline the memory copy when it's profitable to do so. + +.. _int_memset_element_unordered_atomic: + +'``llvm.memset.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memset.element.unordered.atomic`` on +any integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* , + i8 , + i32 , + i32 ) + declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* , + i8 , + i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic is a specialization of the +'``llvm.memset.*``' intrinsic. It differs in that the ``dest`` is treated as an array +with elements that are exactly ``element_size`` bytes, and the assignment to that array +uses uses a sequence of :ref:`unordered atomic ` store operations +that are a positive integer multiple of the ``element_size`` in size. + +Arguments: +"""""""""" + +The first three arguments are the same as they are in the :ref:`@llvm.memset ` +intrinsic, with the added constraint that ``len`` is required to be a positive integer +multiple of the ``element_size``. If ``len`` is not a positive integer multiple of +``element_size``, then the behaviour of the intrinsic is undefined. + +``element_size`` must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit. + +The ``dest`` input pointer must have the ``align`` parameter attribute specified. It +must be a power of two no less than the ``element_size``. Caller guarantees that +the destination pointer is aligned to that boundary. + +Semantics: +"""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of +memory starting at the destination location to the given ``value``. The memory is +set with a sequence of store operations where each access is guaranteed to be a +multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary. + +The order of the assignment is unspecified. Only one write is issued to the +destination buffer per element. It is well defined to have concurrent reads and +writes to the destination provided those reads and writes are unordered atomic +when specified. + +This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered stores to the destination. + +Lowering: +""""""""" + +In the most general case call to the '``llvm.memset.element.unordered.atomic.*``' is +lowered to a call to the symbol ``__llvm_memset_element_unordered_atomic_*``. Where '*' +is replaced with an actual element size. + +The optimizer is allowed to inline the memory assignment when it's profitable to do so. + diff --git a/interpreter/llvm/src/docs/Lexicon.rst b/interpreter/llvm/src/docs/Lexicon.rst index ebc3fb772e81b..ce7ed318fe4b6 100644 --- a/interpreter/llvm/src/docs/Lexicon.rst +++ b/interpreter/llvm/src/docs/Lexicon.rst @@ -109,6 +109,13 @@ G Garbage Collection. The practice of using reachability analysis instead of explicit memory management to reclaim unused memory. +**GVN** + Global Value Numbering. GVN is a pass that partitions values computed by a + function into congruence classes. Values ending up in the same congruence + class are guaranteed to be the same for every execution of the program. + In that respect, congruency is a compile-time approximation of equivalence + of values at runtime. + H - diff --git a/interpreter/llvm/src/docs/LibFuzzer.rst b/interpreter/llvm/src/docs/LibFuzzer.rst index 5acfa04ce1f45..0f0b0e2e6fbd2 100644 --- a/interpreter/llvm/src/docs/LibFuzzer.rst +++ b/interpreter/llvm/src/docs/LibFuzzer.rst @@ -587,7 +587,7 @@ The simplest way is to have a statically initialized global object inside Alternatively, you may define an optional init function and it will receive the program arguments that you can read and modify. Do this **only** if you -realy need to access ``argv``/``argc``. +really need to access ``argv``/``argc``. .. code-block:: c++ diff --git a/interpreter/llvm/src/docs/Phabricator.rst b/interpreter/llvm/src/docs/Phabricator.rst index 8d1984b65cd99..cc8484cc1e3e3 100644 --- a/interpreter/llvm/src/docs/Phabricator.rst +++ b/interpreter/llvm/src/docs/Phabricator.rst @@ -54,7 +54,8 @@ reviewer understand your code. To get a full diff, use one of the following commands (or just use Arcanist to upload your patch): -* ``git diff -U999999 other-branch`` +* ``git show HEAD -U999999 > mypatch.patch`` +* ``git format-patch -U999999 @{u}`` * ``svn diff --diff-cmd=diff -x -U999999`` To upload a new patch: diff --git a/interpreter/llvm/src/docs/Proposals/VectorizationPlan.rst b/interpreter/llvm/src/docs/Proposals/VectorizationPlan.rst new file mode 100644 index 0000000000000..aed8e3d2b7935 --- /dev/null +++ b/interpreter/llvm/src/docs/Proposals/VectorizationPlan.rst @@ -0,0 +1,182 @@ +================== +Vectorization Plan +================== + +.. contents:: + :local: + +Abstract +======== +The vectorization transformation can be rather complicated, involving several +potential alternatives, especially for outer-loops [1]_ but also possibly for +innermost loops. These alternatives may have significant performance impact, +both positive and negative. A cost model is therefore employed to identify the +best alternative, including the alternative of avoiding any transformation +altogether. + +The Vectorization Plan is an explicit model for describing vectorization +candidates. It serves for both optimizing candidates including estimating their +cost reliably, and for performing their final translation into IR. This +facilitates dealing with multiple vectorization candidates. + +High-level Design +================= + +Vectorization Workflow +---------------------- +VPlan-based vectorization involves three major steps, taking a "scenario-based +approach" to vectorization planning: + +1. Legal Step: check if a loop can be legally vectorized; encode constraints and + artifacts if so. +2. Plan Step: + + a. Build initial VPlans following the constraints and decisions taken by + Legal Step 1, and compute their cost. + b. Apply optimizations to the VPlans, possibly forking additional VPlans. + Prune sub-optimal VPlans having relatively high cost. +3. Execute Step: materialize the best VPlan. Note that this is the only step + that modifies the IR. + +Design Guidelines +----------------- +In what follows, the term "input IR" refers to code that is fed into the +vectorizer whereas the term "output IR" refers to code that is generated by the +vectorizer. The output IR contains code that has been vectorized or "widened" +according to a loop Vectorization Factor (VF), and/or loop unroll-and-jammed +according to an Unroll Factor (UF). +The design of VPlan follows several high-level guidelines: + +1. Analysis-like: building and manipulating VPlans must not modify the input IR. + In particular, if the best option is not to vectorize at all, the + vectorization process terminates before reaching Step 3, and compilation + should proceed as if VPlans had not been built. + +2. Align Cost & Execute: each VPlan must support both estimating the cost and + generating the output IR code, such that the cost estimation evaluates the + to-be-generated code reliably. + +3. Support vectorizing additional constructs: + + a. Outer-loop vectorization. In particular, VPlan must be able to model the + control-flow of the output IR which may include multiple basic-blocks and + nested loops. + b. SLP vectorization. + c. Combinations of the above, including nested vectorization: vectorizing + both an inner loop and an outer-loop at the same time (each with its own + VF and UF), mixed vectorization: vectorizing a loop with SLP patterns + inside [4]_, (re)vectorizing input IR containing vector code. + d. Function vectorization [2]_. + +4. Support multiple candidates efficiently. In particular, similar candidates + related to a range of possible VF's and UF's must be represented efficiently. + Potential versioning needs to be supported efficiently. + +5. Support vectorizing idioms, such as interleaved groups of strided loads or + stores. This is achieved by modeling a sequence of output instructions using + a "Recipe", which is responsible for computing its cost and generating its + code. + +6. Encapsulate Single-Entry Single-Exit regions (SESE). During vectorization + such regions may need to be, for example, predicated and linearized, or + replicated VF*UF times to handle scalarized and predicated instructions. + Innerloops are also modelled as SESE regions. + +Low-level Design +================ +The low-level design of VPlan comprises of the following classes. + +:LoopVectorizationPlanner: + A LoopVectorizationPlanner is designed to handle the vectorization of a loop + or a loop nest. It can construct, optimize and discard one or more VPlans, + each VPlan modelling a distinct way to vectorize the loop or the loop nest. + Once the best VPlan is determined, including the best VF and UF, this VPlan + drives the generation of output IR. + +:VPlan: + A model of a vectorized candidate for a given input IR loop or loop nest. This + candidate is represented using a Hierarchical CFG. VPlan supports estimating + the cost and driving the generation of the output IR code it represents. + +:Hierarchical CFG: + A control-flow graph whose nodes are basic-blocks or Hierarchical CFG's. The + Hierarchical CFG data structure is similar to the Tile Tree [5]_, where + cross-Tile edges are lifted to connect Tiles instead of the original + basic-blocks as in Sharir [6]_, promoting the Tile encapsulation. The terms + Region and Block are used rather than Tile [5]_ to avoid confusion with loop + tiling. + +:VPBlockBase: + The building block of the Hierarchical CFG. A pure-virtual base-class of + VPBasicBlock and VPRegionBlock, see below. VPBlockBase models the hierarchical + control-flow relations with other VPBlocks. Note that in contrast to the IR + BasicBlock, a VPBlockBase models its control-flow successors and predecessors + directly, rather than through a Terminator branch or through predecessor + branches that "use" the VPBlockBase. + +:VPBasicBlock: + VPBasicBlock is a subclass of VPBlockBase, and serves as the leaves of the + Hierarchical CFG. It represents a sequence of output IR instructions that will + appear consecutively in an output IR basic-block. The instructions of this + basic-block originate from one or more VPBasicBlocks. VPBasicBlock holds a + sequence of zero or more VPRecipes that model the cost and generation of the + output IR instructions. + +:VPRegionBlock: + VPRegionBlock is a subclass of VPBlockBase. It models a collection of + VPBasicBlocks and VPRegionBlocks which form a SESE subgraph of the output IR + CFG. A VPRegionBlock may indicate that its contents are to be replicated a + constant number of times when output IR is generated, effectively representing + a loop with constant trip-count that will be completely unrolled. This is used + to support scalarized and predicated instructions with a single model for + multiple candidate VF's and UF's. + +:VPRecipeBase: + A pure-virtual base class modeling a sequence of one or more output IR + instructions, possibly based on one or more input IR instructions. These + input IR instructions are referred to as "Ingredients" of the Recipe. A Recipe + may specify how its ingredients are to be transformed to produce the output IR + instructions; e.g., cloned once, replicated multiple times or widened + according to selected VF. + +:VPTransformState: + Stores information used for generating output IR, passed from + LoopVectorizationPlanner to its selected VPlan for execution, and used to pass + additional information down to VPBlocks and VPRecipes. + +Related LLVM components +----------------------- +1. SLP Vectorizer: one can compare the VPlan model with LLVM's existing SLP + tree, where TSLP [3]_ adds Plan Step 2.b. + +2. RegionInfo: one can compare VPlan's H-CFG with the Region Analysis as used by + Polly [7]_. + +References +---------- +.. [1] "Outer-loop vectorization: revisited for short SIMD architectures", Dorit + Nuzman and Ayal Zaks, PACT 2008. + +.. [2] "Proposal for function vectorization and loop vectorization with function + calls", Xinmin Tian, [`cfe-dev + `_]., + March 2, 2016. + See also `review `_. + +.. [3] "Throttling Automatic Vectorization: When Less is More", Vasileios + Porpodas and Tim Jones, PACT 2015 and LLVM Developers' Meeting 2015. + +.. [4] "Exploiting mixed SIMD parallelism by reducing data reorganization + overhead", Hao Zhou and Jingling Xue, CGO 2016. + +.. [5] "Register Allocation via Hierarchical Graph Coloring", David Callahan and + Brian Koblenz, PLDI 1991 + +.. [6] "Structural analysis: A new approach to flow analysis in optimizing + compilers", M. Sharir, Journal of Computer Languages, Jan. 1980 + +.. [7] "Enabling Polyhedral Optimizations in LLVM", Tobias Grosser, Diploma + thesis, 2011. + +.. [8] "Introducing VPlan to the Loop Vectorizer", Gil Rapaport and Ayal Zaks, + European LLVM Developers' Meeting 2017. diff --git a/interpreter/llvm/src/docs/ReleaseNotes.rst b/interpreter/llvm/src/docs/ReleaseNotes.rst index bc35e62189a28..4e91eea2cbc8f 100644 --- a/interpreter/llvm/src/docs/ReleaseNotes.rst +++ b/interpreter/llvm/src/docs/ReleaseNotes.rst @@ -5,11 +5,6 @@ LLVM 5.0.0 Release Notes .. contents:: :local: -.. warning:: - These are in-progress notes for the upcoming LLVM 5 release. - Release notes for previous releases can be found on - `the Download Page `_. - Introduction ============ @@ -26,81 +21,244 @@ have questions or comments, the `LLVM Developer's Mailing List `_ is a good place to send them. -Note that if you are reading this file from a Subversion checkout or the main -LLVM web page, this document applies to the *next* release, not the current -one. To see the release notes for a specific release, please see the `releases -page `_. - Non-comprehensive list of changes in this release ================================================= -.. NOTE - For small 1-3 sentence descriptions, just add an entry at the end of - this list. If your description won't fit comfortably in one bullet - point (e.g. maybe you would like to give an example of the - functionality, or simply have a lot to talk about), see the `NOTE` below - for adding a new subsection. * LLVM's ``WeakVH`` has been renamed to ``WeakTrackingVH`` and a new ``WeakVH`` has been introduced. The new ``WeakVH`` nulls itself out on deletion, but does not track values across RAUW. -* ... next change ... +* A new library named ``BinaryFormat`` has been created which holds a collection + of code which previously lived in ``Support``. This includes the + ``file_magic`` structure and ``identify_magic`` functions, as well as all the + structure and type definitions for DWARF, ELF, COFF, WASM, and MachO file + formats. + +* The tool ``llvm-pdbdump`` has been renamed ``llvm-pdbutil`` to better reflect + its nature as a general purpose PDB manipulation / diagnostics tool that does + more than just dumping contents. + +* The ``BBVectorize`` pass has been removed. It was fully replaced and no + longer used back in 2014 but we didn't get around to removing it. Now it is + gone. The SLP vectorizer is the suggested non-loop vectorization pass. -.. NOTE - If you would like to document a larger change, then you can add a - subsection about it right here. You can copy the following boilerplate - and un-indent it (the indentation causes it to be inside this comment). +* A new tool opt-viewer.py has been added to visualize optimization remarks in + HTML. The tool processes the YAML files produced by clang with the + -fsave-optimization-record option. - Special New Feature - ------------------- +* A new CMake macro ``LLVM_REVERSE_ITERATION`` has been added. If enabled, all + supported unordered LLVM containers would be iterated in reverse order. This + is useful for uncovering non-determinism caused by iteration of unordered + containers. Currently, it supports reverse iteration of SmallPtrSet and + DenseMap. + +* A new tool ``llvm-dlltool`` has been added to create short import libraries + from GNU style definition files. The tool utilizes the PE COFF SPEC Import + Library Format and PE COFF Auxiliary Weak Externals Format to achieve + compatibility with LLD and MSVC LINK. - Makes programs 10x faster by doing Special New Thing. Changes to the LLVM IR ---------------------- -Changes to the ARM Backend --------------------------- +* The datalayout string may now indicate an address space to use for + the pointer type of ``alloca`` rather than the default of 0. - During this release ... +* Added ``speculatable`` attribute indicating a function which has no + side-effects which could inhibit hoisting of calls. +Changes to the Arm Targets +-------------------------- + +During this release the AArch64 target has: + +* A much improved Global ISel at O0. +* Support for ARMv8.1 8.2 and 8.3 instructions. +* New scheduler information for ThunderX2. +* Some SVE type changes but not much more than that. +* Made instruction fusion more aggressive, resulting in speedups + for code making use of AArch64 AES instructions. AES fusion has been + enabled for most Cortex-A cores and the AArch64MacroFusion pass was moved + to the generic MacroFusion pass. +* Added preferred function alignments for most Cortex-A cores. +* OpenMP "offload-to-self" base support. + +During this release the ARM target has: + +* Improved, but still mostly broken, Global ISel. +* Scheduling models update, new schedule for Cortex-A57. +* Hardware breakpoint support in LLDB. +* New assembler error handling, with spelling corrections and multiple + suggestions on how to fix problems. +* Improved mixed ARM/Thumb code generation. Some cases in which wrong + relocations were emitted have been fixed. +* Added initial support for mixed ARM/Thumb link-time optimization, using the + thumb-mode target feature. Changes to the MIPS Target -------------------------- - During this release ... +* The microMIPS64R6 backend is deprecated and will be removed in the next + release. + +* The MIPS backend now directly supports vector types for arguments and return + values (previously this required ABI specific LLVM IR). + +* Added documentation for how the MIPS backend handles address lowering. + +* Added a GCC compatible option -m(no-)madd4 to control the generation of four + operand multiply addition/subtraction instructions. + +* Added basic support for the XRay instrumentation system. + +* Added support for more assembly aliases and macros. + +* Added support for the ``micromips`` and ``nomicromips`` function attributes + which control micromips code generation on a per function basis. + +* Added the ``long-calls`` feature for non-pic environments. This feature is + used where the callee is out of range of the caller using a standard call + sequence. It must be enabled specifically. + +* Added support for performing microMIPS code generation via function + attributes. + +* Added experimental support for the static relocation model for the N64 ABI. + +* Added partial support for the MT ASE. + +* Added basic support for code size reduction for microMIPS. + +* Fixed numerous bugs including: multi-precision arithmetic support, various + vectorization bugs, debug information for thread local variables, debug + sections lacking the correct flags, crashing when disassembling sections + whose size is not a multiple of two or four. Changes to the PowerPC Target ----------------------------- - During this release ... +* Additional support and exploitation of POWER ISA 3.0: vabsdub, vabsduh, + vabsduw, modsw, moduw, modsd, modud, lxv, stxv, vextublx, vextubrx, vextuhlx, + vextuhrx, vextuwlx, vextuwrx, vextsb2w, vextsb2d, vextsh2w, vextsh2d, and + vextsw2d + +* Implemented Optimal Code Sequences from The PowerPC Compiler Writer's Guide. + +* Enable -fomit-frame-pointer by default. + +* Improved handling of bit reverse intrinsic. + +* Improved handling of memcpy and memcmp functions. + +* Improved handling of branches with static branch hints. + +* Improved codegen for atomic load_acquire. + +* Improved block placement during code layout + +* Many improvements to instruction selection and code generation + Changes to the X86 Target ------------------------- - During this release ... +* Added initial AMD Ryzen (znver1) scheduler support. + +* Added support for Intel Goldmont CPUs. + +* Add support for avx512vpopcntdq instructions. + +* Added heuristics to convert CMOV into branches when it may be profitable. + +* More aggressive inlining of memcmp calls. + +* Improve vXi64 shuffles on 32-bit targets. + +* Improved use of PMOVMSKB for any_of/all_of comparision reductions. + +* Improved Silvermont, Sandybridge, and Jaguar (btver2) schedulers. + +* Improved support for AVX512 vector rotations. + +* Added support for AMD Lightweight Profiling (LWP) instructions. + +* Avoid using slow LEA instructions. + +* Use alternative sequences for multiply by constant. + +* Improved lowering of strided shuffles. + +* Improved the AVX512 cost model used by the vectorizer. + +* Fix scalar code performance when AVX512 is enabled by making i1's illegal. + +* Fixed many inline assembly bugs. + +* Preliminary support for tracing NetBSD processes and core files with a single + thread in LLDB. Changes to the AMDGPU Target ----------------------------- - During this release ... +* Initial gfx9 support Changes to the AVR Target ----------------------------- - During this release ... +This release consists mainly of bugfixes and implementations of features +required for compiling basic Rust programs. -Changes to the OCaml bindings ------------------------------ +* Enable the branch relaxation pass so that we don't crash on large + stack load/stores + +* Add support for lowering bit-rotations to the native ``ror`` and ``rol`` + instructions + +* Fix bug where function pointers were treated as pointers to RAM and not + pointers to program memory + +* Fix broken code generation for shift-by-variable expressions + +* Support zero-sized types in argument lists; this is impossible in C, + but possible in Rust + + +Changes to the C API +-------------------- - During this release ... +* Deprecated the ``LLVMAddBBVectorizePass`` interface since the ``BBVectorize`` + pass has been removed. It is now a no-op and will be removed in the next + release. Use ``LLVMAddSLPVectorizePass`` instead to get the supported SLP + vectorizer. External Open Source Projects Using LLVM 5 ========================================== -* A project... +Zig Programming Language +------------------------ + +`Zig `_ is an open-source programming language designed +for robustness, optimality, and clarity. It integrates closely with C and is +intended to eventually take the place of C. It uses LLVM to produce highly +optimized native code and to cross-compile for any target out of the box. Zig +is in alpha; with a beta release expected in September. + +LDC - the LLVM-based D compiler +------------------------------- + +`D `_ is a language with C-like syntax and static typing. It +pragmatically combines efficiency, control, and modeling power, with safety and +programmer productivity. D supports powerful concepts like Compile-Time Function +Execution (CTFE) and Template Meta-Programming, provides an innovative approach +to concurrency and offers many classical paradigms. + +`LDC `_ uses the frontend from the reference compiler +combined with LLVM as backend to produce efficient native code. LDC targets +x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM +and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64 +are underway. Additional Information diff --git a/interpreter/llvm/src/docs/Vectorizers.rst b/interpreter/llvm/src/docs/Vectorizers.rst index 65c19aa2bc0cb..92d6200e169f8 100644 --- a/interpreter/llvm/src/docs/Vectorizers.rst +++ b/interpreter/llvm/src/docs/Vectorizers.rst @@ -44,12 +44,12 @@ Users can control the vectorization SIMD width using the command line flag "-for $ clang -mllvm -force-vector-width=8 ... $ opt -loop-vectorize -force-vector-width=8 ... -Users can control the unroll factor using the command line flag "-force-vector-unroll" +Users can control the unroll factor using the command line flag "-force-vector-interleave" .. code-block:: console - $ clang -mllvm -force-vector-unroll=2 ... - $ opt -loop-vectorize -force-vector-unroll=2 ... + $ clang -mllvm -force-vector-interleave=2 ... + $ opt -loop-vectorize -force-vector-interleave=2 ... Pragma loop hint directives ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -99,7 +99,9 @@ Optimization remarks are enabled using: indicates if vectorization was specified. ``-Rpass-analysis=loop-vectorize`` identifies the statements that caused -vectorization to fail. +vectorization to fail. If in addition ``-fsave-optimization-record`` is +provided, multiple causes of vectorization failure may be listed (this behavior +might change in the future). Consider the following loop: @@ -380,6 +382,17 @@ And Linpack-pc with the same configuration. Result is Mflops, higher is better. .. image:: linpack-pc.png +Ongoing Development Directions +------------------------------ + +.. toctree:: + :hidden: + + Proposals/VectorizationPlan + +:doc:`Proposals/VectorizationPlan` + Modeling the process and upgrading the infrastructure of LLVM's Loop Vectorizer. + .. _slp-vectorizer: The SLP Vectorizer diff --git a/interpreter/llvm/src/docs/XRay.rst b/interpreter/llvm/src/docs/XRay.rst index d650319e99220..d61e4e6d99553 100644 --- a/interpreter/llvm/src/docs/XRay.rst +++ b/interpreter/llvm/src/docs/XRay.rst @@ -150,7 +150,7 @@ variable, where we list down the options and their defaults below. | xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the | | | | | XRay logfile. | +-------------------+-----------------+---------------+------------------------+ -| xray_fdr_log | ``bool`` | ``false`` | Wheter to install the | +| xray_fdr_log | ``bool`` | ``false`` | Whether to install the | | | | | Flight Data Recorder | | | | | (FDR) mode. | +-------------------+-----------------+---------------+------------------------+ diff --git a/interpreter/llvm/src/docs/index.rst b/interpreter/llvm/src/docs/index.rst index fe47eb1bcb7f7..5bc2368def510 100644 --- a/interpreter/llvm/src/docs/index.rst +++ b/interpreter/llvm/src/docs/index.rst @@ -1,11 +1,6 @@ Overview ======== -.. warning:: - - If you are using a released version of LLVM, see `the download page - `_ to find your documentation. - The LLVM compiler infrastructure supports a wide range of projects, from industrial strength compilers to specialized JIT applications to small research projects. @@ -90,6 +85,8 @@ representation. CodeOfConduct CompileCudaWithLLVM ReportingGuide + Benchmarking + Docker :doc:`GettingStarted` Discusses how to get up and running quickly with the LLVM infrastructure. @@ -160,6 +157,9 @@ representation. A collection of tips for frontend authors on how to generate IR which LLVM is able to effectively optimize. +:doc:`Docker` + A reference for using Dockerfiles provided with LLVM. + Programming Documentation ========================= @@ -359,10 +359,10 @@ For API clients and LLVM developers. Answers some questions about the new Attributes infrastructure. :doc:`NVPTXUsage` - This document describes using the NVPTX back-end to compile GPU kernels. + This document describes using the NVPTX backend to compile GPU kernels. :doc:`AMDGPUUsage` - This document describes how to use the AMDGPU back-end. + This document describes using the AMDGPU backend to compile GPU kernels. :doc:`StackMaps` LLVM support for mapping instruction addresses to the location of @@ -527,6 +527,7 @@ can be better. CodeOfConduct Proposals/GitHubMove + Proposals/VectorizationPlan :doc:`CodeOfConduct` Proposal to adopt a code of conduct on the LLVM social spaces (lists, events, @@ -535,6 +536,8 @@ can be better. :doc:`Proposals/GitHubMove` Proposal to move from SVN/Git to GitHub. +:doc:`Proposals/VectorizationPlan` + Proposal to model the process and upgrade the infrastructure of LLVM's Loop Vectorizer. Indices and tables ================== diff --git a/interpreter/llvm/src/docs/tutorial/BuildingAJIT1.rst b/interpreter/llvm/src/docs/tutorial/BuildingAJIT1.rst index 625cbbba1a5cc..88f7aa5abbc70 100644 --- a/interpreter/llvm/src/docs/tutorial/BuildingAJIT1.rst +++ b/interpreter/llvm/src/docs/tutorial/BuildingAJIT1.rst @@ -12,7 +12,7 @@ Welcome to Chapter 1 of the "Building an ORC-based JIT in LLVM" tutorial. This tutorial runs through the implementation of a JIT compiler using LLVM's On-Request-Compilation (ORC) APIs. It begins with a simplified version of the KaleidoscopeJIT class used in the -`Implementing a language with LLVM `_ tutorials and then +`Implementing a language with LLVM `_ tutorials and then introduces new features like optimization, lazy compilation and remote execution. @@ -41,7 +41,7 @@ The structure of the tutorial is: a remote process with reduced privileges using the JIT Remote APIs. To provide input for our JIT we will use the Kaleidoscope REPL from -`Chapter 7 `_ of the "Implementing a language in LLVM tutorial", +`Chapter 7 `_ of the "Implementing a language in LLVM tutorial", with one minor modification: We will remove the FunctionPassManager from the code for that chapter and replace it with optimization support in our JIT class in Chapter #2. @@ -91,8 +91,8 @@ KaleidoscopeJIT In the previous section we described our API, now we examine a simple implementation of it: The KaleidoscopeJIT class [1]_ that was used in the -`Implementing a language with LLVM `_ tutorials. We will use -the REPL code from `Chapter 7 `_ of that tutorial to supply the +`Implementing a language with LLVM `_ tutorials. We will use +the REPL code from `Chapter 7 `_ of that tutorial to supply the input for our JIT: Each time the user enters an expression the REPL will add a new IR module containing the code for that expression to the JIT. If the expression is a top-level expression like '1+1' or 'sin(x)', the REPL will also diff --git a/interpreter/llvm/src/docs/tutorial/BuildingAJIT2.rst b/interpreter/llvm/src/docs/tutorial/BuildingAJIT2.rst index 839875266a241..2f22bdad6c141 100644 --- a/interpreter/llvm/src/docs/tutorial/BuildingAJIT2.rst +++ b/interpreter/llvm/src/docs/tutorial/BuildingAJIT2.rst @@ -25,7 +25,7 @@ IRTransformLayer, to add IR optimization support to KaleidoscopeJIT. Optimizing Modules using the IRTransformLayer ============================================= -In `Chapter 4 `_ of the "Implementing a language with LLVM" +In `Chapter 4 `_ of the "Implementing a language with LLVM" tutorial series the llvm *FunctionPassManager* is introduced as a means for optimizing LLVM IR. Interested readers may read that chapter for details, but in short: to optimize a Module we create an llvm::FunctionPassManager @@ -148,7 +148,7 @@ At the bottom of our JIT we add a private method to do the actual optimization: *optimizeModule*. This function sets up a FunctionPassManager, adds some passes to it, runs it over every function in the module, and then returns the mutated module. The specific optimizations are the same ones used in -`Chapter 4 `_ of the "Implementing a language with LLVM" +`Chapter 4 `_ of the "Implementing a language with LLVM" tutorial series. Readers may visit that chapter for a more in-depth discussion of these, and of IR optimization in general. diff --git a/interpreter/llvm/src/docs/tutorial/LangImpl02.rst b/interpreter/llvm/src/docs/tutorial/LangImpl02.rst index 4be447eb5ba35..d72c8dc9add4e 100644 --- a/interpreter/llvm/src/docs/tutorial/LangImpl02.rst +++ b/interpreter/llvm/src/docs/tutorial/LangImpl02.rst @@ -10,7 +10,7 @@ Chapter 2 Introduction Welcome to Chapter 2 of the "`Implementing a language with LLVM `_" tutorial. This chapter shows you how to use the -lexer, built in `Chapter 1 `_, to build a full +lexer, built in `Chapter 1 `_, to build a full `parser `_ for our Kaleidoscope language. Once we have a parser, we'll define and build an `Abstract Syntax Tree `_ (AST). diff --git a/interpreter/llvm/src/docs/tutorial/LangImpl03.rst b/interpreter/llvm/src/docs/tutorial/LangImpl03.rst index 1dfe10175c747..fab2ddaf88292 100644 --- a/interpreter/llvm/src/docs/tutorial/LangImpl03.rst +++ b/interpreter/llvm/src/docs/tutorial/LangImpl03.rst @@ -10,7 +10,7 @@ Chapter 3 Introduction Welcome to Chapter 3 of the "`Implementing a language with LLVM `_" tutorial. This chapter shows you how to transform -the `Abstract Syntax Tree `_, built in Chapter 2, into +the `Abstract Syntax Tree `_, built in Chapter 2, into LLVM IR. This will teach you a little bit about how LLVM does things, as well as demonstrate how easy it is to use. It's much more work to build a lexer and parser than it is to generate LLVM IR code. :) @@ -362,7 +362,7 @@ end of the new basic block. Basic blocks in LLVM are an important part of functions that define the `Control Flow Graph `_. Since we don't have any control flow, our functions will only contain one block -at this point. We'll fix this in `Chapter 5 `_ :). +at this point. We'll fix this in `Chapter 5 `_ :). Next we add the function arguments to the NamedValues map (after first clearing it out) so that they're accessible to ``VariableExprAST`` nodes. @@ -540,7 +540,7 @@ functions referencing each other. This wraps up the third chapter of the Kaleidoscope tutorial. Up next, we'll describe how to `add JIT codegen and optimizer -support `_ to this so we can actually start running +support `_ to this so we can actually start running code! Full Code Listing diff --git a/interpreter/llvm/src/docs/tutorial/LangImpl04.rst b/interpreter/llvm/src/docs/tutorial/LangImpl04.rst index 16d7164ae15ee..921c4dcc21ad5 100644 --- a/interpreter/llvm/src/docs/tutorial/LangImpl04.rst +++ b/interpreter/llvm/src/docs/tutorial/LangImpl04.rst @@ -622,7 +622,7 @@ This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At this point, we can compile a non-Turing-complete programming language, optimize and JIT compile it in a user-driven way. Next up we'll look into `extending the language with control flow -constructs `_, tackling some interesting LLVM IR issues +constructs `_, tackling some interesting LLVM IR issues along the way. Full Code Listing diff --git a/interpreter/llvm/src/docs/tutorial/LangImpl05.rst b/interpreter/llvm/src/docs/tutorial/LangImpl05.rst index dcf45bcbf8d20..8650892e8f8bd 100644 --- a/interpreter/llvm/src/docs/tutorial/LangImpl05.rst +++ b/interpreter/llvm/src/docs/tutorial/LangImpl05.rst @@ -269,7 +269,7 @@ Phi nodes: #. Values that are implicit in the structure of your AST, such as the Phi node in this case. -In `Chapter 7 `_ of this tutorial ("mutable variables"), +In `Chapter 7 `_ of this tutorial ("mutable variables"), we'll talk about #1 in depth. For now, just believe me that you don't need SSA construction to handle this case. For #2, you have the choice of using the techniques that we will describe for #1, or you can insert @@ -790,7 +790,7 @@ of the tutorial. In this chapter we added two control flow constructs, and used them to motivate a couple of aspects of the LLVM IR that are important for front-end implementors to know. In the next chapter of our saga, we will get a bit crazier and add `user-defined -operators `_ to our poor innocent language. +operators `_ to our poor innocent language. Full Code Listing ================= diff --git a/interpreter/llvm/src/docs/tutorial/LangImpl06.rst b/interpreter/llvm/src/docs/tutorial/LangImpl06.rst index c1035bce85593..cb8ec766bb261 100644 --- a/interpreter/llvm/src/docs/tutorial/LangImpl06.rst +++ b/interpreter/llvm/src/docs/tutorial/LangImpl06.rst @@ -41,7 +41,7 @@ The point of going into user-defined operators in a tutorial like this is to show the power and flexibility of using a hand-written parser. Thus far, the parser we have been implementing uses recursive descent for most parts of the grammar and operator precedence parsing for the -expressions. See `Chapter 2 `_ for details. By +expressions. See `Chapter 2 `_ for details. By using operator precedence parsing, it is very easy to allow the programmer to introduce new operators into the grammar: the grammar is dynamically extensible as the JIT runs. @@ -734,7 +734,7 @@ side-effects, but it can't actually define and mutate a variable itself. Strikingly, variable mutation is an important feature of some languages, and it is not at all obvious how to `add support for mutable -variables `_ without having to add an "SSA construction" +variables `_ without having to add an "SSA construction" phase to your front-end. In the next chapter, we will describe how you can add variable mutation without building SSA in your front-end. diff --git a/interpreter/llvm/src/docs/tutorial/OCamlLangImpl5.rst b/interpreter/llvm/src/docs/tutorial/OCamlLangImpl5.rst index 6e17de4b2bde8..d06bf6ec252a8 100644 --- a/interpreter/llvm/src/docs/tutorial/OCamlLangImpl5.rst +++ b/interpreter/llvm/src/docs/tutorial/OCamlLangImpl5.rst @@ -258,7 +258,7 @@ a truth value as a 1-bit (bool) value. let then_bb = append_block context "then" the_function in position_at_end then_bb builder; -As opposed to the `C++ tutorial `_, we have to build our +As opposed to the `C++ tutorial `_, we have to build our basic blocks bottom up since we can't have dangling BasicBlocks. We start off by saving a pointer to the first block (which might not be the entry block), which we'll need to build a conditional branch later. We diff --git a/interpreter/llvm/src/examples/ExceptionDemo/ExceptionDemo.cpp b/interpreter/llvm/src/examples/ExceptionDemo/ExceptionDemo.cpp index a8b82e1da778a..d4c2a8cc5ad9b 100644 --- a/interpreter/llvm/src/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/interpreter/llvm/src/examples/ExceptionDemo/ExceptionDemo.cpp @@ -49,7 +49,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Verifier.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" @@ -59,7 +59,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h index a14fd1dc20eca..5a2148a14a14e 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,7 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" @@ -40,14 +40,15 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer<> ObjectLayer; - IRCompileLayer CompileLayer; + RTDyldObjectLinkingLayer ObjectLayer; + IRCompileLayer CompileLayer; public: - typedef decltype(CompileLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(CompileLayer)::ModuleHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); } @@ -72,15 +73,10 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }); - // Build a singleton module set to hold our module. - std::vector> Ms; - Ms.push_back(std::move(M)); - // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return CompileLayer.addModuleSet(std::move(Ms), - make_unique(), - std::move(Resolver)); + return cantFail(CompileLayer.addModule(std::move(M), + std::move(Resolver))); } JITSymbol findSymbol(const std::string Name) { @@ -91,7 +87,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - CompileLayer.removeModuleSet(H); + cantFail(CompileLayer.removeModule(H)); } }; diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp index 163caa6872d7b..2471344c6d65f 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp @@ -1150,7 +1150,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h index 2039be4571a59..9a295f1566cb4 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,7 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" @@ -44,22 +44,23 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer<> ObjectLayer; - IRCompileLayer CompileLayer; + RTDyldObjectLinkingLayer ObjectLayer; + IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::shared_ptr)>; IRTransformLayer OptimizeLayer; public: - typedef decltype(OptimizeLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(OptimizeLayer)::ModuleHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, - [this](std::unique_ptr M) { + [this](std::shared_ptr M) { return optimizeModule(std::move(M)); }) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); @@ -85,15 +86,10 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }); - // Build a singleton module set to hold our module. - std::vector> Ms; - Ms.push_back(std::move(M)); - // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return OptimizeLayer.addModuleSet(std::move(Ms), - make_unique(), - std::move(Resolver)); + return cantFail(OptimizeLayer.addModule(std::move(M), + std::move(Resolver))); } JITSymbol findSymbol(const std::string Name) { @@ -104,11 +100,11 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - OptimizeLayer.removeModuleSet(H); + cantFail(OptimizeLayer.removeModule(H)); } private: - std::unique_ptr optimizeModule(std::unique_ptr M) { + std::shared_ptr optimizeModule(std::shared_ptr M) { // Create a function pass manager. auto FPM = llvm::make_unique(M.get()); diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp index 163caa6872d7b..2471344c6d65f 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp @@ -1150,7 +1150,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h index d22d41855072c..a03f5ce5e2383 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" @@ -46,11 +47,11 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer<> ObjectLayer; - IRCompileLayer CompileLayer; + RTDyldObjectLinkingLayer ObjectLayer; + IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::shared_ptr)>; IRTransformLayer OptimizeLayer; @@ -58,13 +59,14 @@ class KaleidoscopeJIT { CompileOnDemandLayer CODLayer; public: - typedef decltype(CODLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(CODLayer)::ModuleHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, - [this](std::unique_ptr M) { + [this](std::shared_ptr M) { return optimizeModule(std::move(M)); }), CompileCallbackManager( @@ -97,15 +99,9 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }); - // Build a singleton module set to hold our module. - std::vector> Ms; - Ms.push_back(std::move(M)); - // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return CODLayer.addModuleSet(std::move(Ms), - make_unique(), - std::move(Resolver)); + return cantFail(CODLayer.addModule(std::move(M), std::move(Resolver))); } JITSymbol findSymbol(const std::string Name) { @@ -116,11 +112,11 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - CODLayer.removeModuleSet(H); + cantFail(CODLayer.removeModule(H)); } private: - std::unique_ptr optimizeModule(std::unique_ptr M) { + std::shared_ptr optimizeModule(std::shared_ptr M) { // Create a function pass manager. auto FPM = llvm::make_unique(M.get()); diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp index 163caa6872d7b..2471344c6d65f 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp @@ -1150,7 +1150,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h index e0a78410f7134..d10e4748f1a17 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,10 +17,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" @@ -73,11 +73,11 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer<> ObjectLayer; - IRCompileLayer CompileLayer; + RTDyldObjectLinkingLayer ObjectLayer; + IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::shared_ptr)>; IRTransformLayer OptimizeLayer; @@ -85,14 +85,15 @@ class KaleidoscopeJIT { std::unique_ptr IndirectStubsMgr; public: - typedef decltype(OptimizeLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(OptimizeLayer)::ModuleHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, - [this](std::unique_ptr M) { + [this](std::shared_ptr M) { return optimizeModule(std::move(M)); }), CompileCallbackMgr( @@ -106,7 +107,6 @@ class KaleidoscopeJIT { TargetMachine &getTargetMachine() { return *TM; } ModuleHandle addModule(std::unique_ptr M) { - // Build our symbol resolver: // Lambda 1: Look back into the JIT itself to find symbols that are part of // the same "logical dylib". @@ -126,15 +126,10 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }); - // Build a singleton module set to hold our module. - std::vector> Ms; - Ms.push_back(std::move(M)); - // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return OptimizeLayer.addModuleSet(std::move(Ms), - make_unique(), - std::move(Resolver)); + return cantFail(OptimizeLayer.addModule(std::move(M), + std::move(Resolver))); } Error addFunctionAST(std::unique_ptr FnAST) { @@ -180,7 +175,7 @@ class KaleidoscopeJIT { addModule(std::move(M)); auto Sym = findSymbol(SharedFnAST->getName() + "$impl"); assert(Sym && "Couldn't find compiled function?"); - JITTargetAddress SymAddr = Sym.getAddress(); + JITTargetAddress SymAddr = cantFail(Sym.getAddress()); if (auto Err = IndirectStubsMgr->updatePointer(mangle(SharedFnAST->getName()), SymAddr)) { @@ -200,7 +195,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - OptimizeLayer.removeModuleSet(H); + cantFail(OptimizeLayer.removeModule(H)); } private: @@ -211,7 +206,7 @@ class KaleidoscopeJIT { return MangledNameStream.str(); } - std::unique_ptr optimizeModule(std::unique_ptr M) { + std::shared_ptr optimizeModule(std::shared_ptr M) { // Create a function pass manager. auto FPM = llvm::make_unique(M.get()); diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp index ff4b5220105be..ed8ae31ba0fd4 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp @@ -1153,7 +1153,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h index 70a896fe8f007..7ea535b3af537 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,9 +20,8 @@ #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" @@ -73,17 +72,17 @@ namespace llvm { namespace orc { // Typedef the remote-client API. -typedef remote::OrcRemoteTargetClient MyRemote; +using MyRemote = remote::OrcRemoteTargetClient; class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer<> ObjectLayer; - IRCompileLayer CompileLayer; + RTDyldObjectLinkingLayer ObjectLayer; + IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::shared_ptr)>; IRTransformLayer OptimizeLayer; @@ -92,15 +91,24 @@ class KaleidoscopeJIT { MyRemote &Remote; public: - typedef decltype(OptimizeLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(OptimizeLayer)::ModuleHandleT; KaleidoscopeJIT(MyRemote &Remote) : TM(EngineBuilder().selectTarget(Triple(Remote.getTargetTriple()), "", "", SmallVector())), DL(TM->createDataLayout()), + ObjectLayer([&Remote]() { + std::unique_ptr MemMgr; + if (auto Err = Remote.createRemoteMemoryManager(MemMgr)) { + logAllUnhandledErrors(std::move(Err), errs(), + "Error creating remote memory manager:"); + exit(1); + } + return MemMgr; + }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, - [this](std::unique_ptr M) { + [this](std::shared_ptr M) { return optimizeModule(std::move(M)); }), Remote(Remote) { @@ -124,7 +132,6 @@ class KaleidoscopeJIT { TargetMachine &getTargetMachine() { return *TM; } ModuleHandle addModule(std::unique_ptr M) { - // Build our symbol resolver: // Lambda 1: Look back into the JIT itself to find symbols that are part of // the same "logical dylib". @@ -148,22 +155,10 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }); - std::unique_ptr MemMgr; - if (auto Err = Remote.createRemoteMemoryManager(MemMgr)) { - logAllUnhandledErrors(std::move(Err), errs(), - "Error creating remote memory manager:"); - exit(1); - } - - // Build a singleton module set to hold our module. - std::vector> Ms; - Ms.push_back(std::move(M)); - // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return OptimizeLayer.addModuleSet(std::move(Ms), - std::move(MemMgr), - std::move(Resolver)); + return cantFail(OptimizeLayer.addModule(std::move(M), + std::move(Resolver))); } Error addFunctionAST(std::unique_ptr FnAST) { @@ -209,7 +204,7 @@ class KaleidoscopeJIT { addModule(std::move(M)); auto Sym = findSymbol(SharedFnAST->getName() + "$impl"); assert(Sym && "Couldn't find compiled function?"); - JITTargetAddress SymAddr = Sym.getAddress(); + JITTargetAddress SymAddr = cantFail(Sym.getAddress()); if (auto Err = IndirectStubsMgr->updatePointer(mangle(SharedFnAST->getName()), SymAddr)) { @@ -233,7 +228,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - OptimizeLayer.removeModuleSet(H); + cantFail(OptimizeLayer.removeModule(H)); } private: @@ -244,7 +239,7 @@ class KaleidoscopeJIT { return MangledNameStream.str(); } - std::unique_ptr optimizeModule(std::unique_ptr M) { + std::shared_ptr optimizeModule(std::shared_ptr M) { // Create a function pass manager. auto FPM = llvm::make_unique(M.get()); diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp index da6e8ac652348..e50a7ecf96bcd 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp @@ -1,17 +1,19 @@ +#include "../RemoteJITUtils.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h" +#include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetSelect.h" -#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h" -#include "llvm/ExecutionEngine/Orc/OrcABISupport.h" - -#include "../RemoteJITUtils.h" - +#include +#include #include -#include +#include #include #include - using namespace llvm; using namespace llvm::orc; @@ -22,7 +24,7 @@ cl::opt Port("port", ExitOnError ExitOnErr; -typedef int (*MainFun)(int, const char*[]); +using MainFun = int (*)(int, const char*[]); template NativePtrT MakeNative(uint64_t P) { @@ -36,7 +38,6 @@ void printExprResult(double Val) { // --- LAZY COMPILE TEST --- int main(int argc, char* argv[]) { - if (argc == 0) ExitOnErr.setBanner("jit_server: "); else @@ -59,14 +60,14 @@ int main(int argc, char* argv[]) { int sockfd = socket(PF_INET, SOCK_STREAM, 0); sockaddr_in servAddr, clientAddr; socklen_t clientAddrLen = sizeof(clientAddr); - bzero(&servAddr, sizeof(servAddr)); + memset(&servAddr, 0, sizeof(servAddr)); servAddr.sin_family = PF_INET; servAddr.sin_family = INADDR_ANY; servAddr.sin_port = htons(Port); { // avoid "Address already in use" error. - int yes=1; + int yes = 1; if (setsockopt(sockfd,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { errs() << "Error calling setsockopt.\n"; return 1; @@ -98,7 +99,8 @@ int main(int argc, char* argv[]) { }; FDRPCChannel TCPChannel(newsockfd, newsockfd); - typedef remote::OrcRemoteTargetServer MyServerT; + + using MyServerT = remote::OrcRemoteTargetServer; MyServerT Server(TCPChannel, SymbolLookup, RegisterEHFrames, DeregisterEHFrames); diff --git a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp index edd050959d6b5..7bbc06a0958f3 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp @@ -1177,7 +1177,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - ExitOnErr(TheJIT->executeRemoteExpr(ExprSymbol.getAddress())); + ExitOnErr(TheJIT->executeRemoteExpr(cantFail(ExprSymbol.getAddress()))); // Delete the anonymous expression module from the JIT. TheJIT->removeModule(H); diff --git a/interpreter/llvm/src/examples/Kaleidoscope/Chapter4/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/Chapter4/toy.cpp index cf7d6c2bee04e..921fa8908040f 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/Chapter4/toy.cpp @@ -611,7 +611,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/Chapter5/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/Chapter5/toy.cpp index 6852973bae400..2d23bdb26c21e 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/Chapter5/toy.cpp @@ -885,7 +885,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/Chapter6/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/Chapter6/toy.cpp index 0c2221735589c..b5e4495539fc1 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/Chapter6/toy.cpp @@ -1004,7 +1004,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/Chapter7/toy.cpp b/interpreter/llvm/src/examples/Kaleidoscope/Chapter7/toy.cpp index 79ac7b33d7a16..32f4a658c5d27 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/interpreter/llvm/src/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1173,7 +1173,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/interpreter/llvm/src/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/interpreter/llvm/src/examples/Kaleidoscope/include/KaleidoscopeJIT.h index 1dca39deba3c3..215ce03af99b9 100644 --- a/interpreter/llvm/src/examples/Kaleidoscope/include/KaleidoscopeJIT.h +++ b/interpreter/llvm/src/examples/Kaleidoscope/include/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,7 +19,6 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" @@ -40,12 +39,13 @@ namespace orc { class KaleidoscopeJIT { public: - typedef RTDyldObjectLinkingLayer<> ObjLayerT; - typedef IRCompileLayer CompileLayerT; - typedef CompileLayerT::ModuleSetHandleT ModuleHandleT; + using ObjLayerT = RTDyldObjectLinkingLayer; + using CompileLayerT = IRCompileLayer; + using ModuleHandleT = CompileLayerT::ModuleHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); } @@ -63,9 +63,8 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }, [](const std::string &S) { return nullptr; }); - auto H = CompileLayer.addModuleSet(singletonSet(std::move(M)), - make_unique(), - std::move(Resolver)); + auto H = cantFail(CompileLayer.addModule(std::move(M), + std::move(Resolver))); ModuleHandles.push_back(H); return H; @@ -73,7 +72,7 @@ class KaleidoscopeJIT { void removeModule(ModuleHandleT H) { ModuleHandles.erase(find(ModuleHandles, H)); - CompileLayer.removeModuleSet(H); + cantFail(CompileLayer.removeModule(H)); } JITSymbol findSymbol(const std::string Name) { @@ -90,12 +89,6 @@ class KaleidoscopeJIT { return MangledName; } - template static std::vector singletonSet(T t) { - std::vector Vec; - Vec.push_back(std::move(t)); - return Vec; - } - JITSymbol findMangledSymbol(const std::string &Name) { #ifdef LLVM_ON_WIN32 // The symbol lookup of ObjectLinkingLayer uses the SymbolRef::SF_Exported @@ -122,7 +115,7 @@ class KaleidoscopeJIT { return JITSymbol(SymAddr, JITSymbolFlags::Exported); #ifdef LLVM_ON_WIN32 - // For Windows retry without "_" at begining, as RTDyldMemoryManager uses + // For Windows retry without "_" at beginning, as RTDyldMemoryManager uses // GetProcAddress and standard libraries like msvcrt.dll use names // with and without "_" (for example "_itoa" but "sin"). if (Name.length() > 2 && Name[0] == '_') diff --git a/interpreter/llvm/src/examples/ParallelJIT/ParallelJIT.cpp b/interpreter/llvm/src/examples/ParallelJIT/ParallelJIT.cpp index f1932d2471cb8..ff44375e3921e 100644 --- a/interpreter/llvm/src/examples/ParallelJIT/ParallelJIT.cpp +++ b/interpreter/llvm/src/examples/ParallelJIT/ParallelJIT.cpp @@ -145,6 +145,7 @@ class WaitForThreads waitFor = 0; int result = pthread_cond_init( &condition, nullptr ); + (void)result; assert( result == 0 ); result = pthread_mutex_init( &mutex, nullptr ); diff --git a/interpreter/llvm/src/include/llvm-c/Core.h b/interpreter/llvm/src/include/llvm-c/Core.h index 0a1d8faf99b76..22cef23007c36 100644 --- a/interpreter/llvm/src/include/llvm-c/Core.h +++ b/interpreter/llvm/src/include/llvm-c/Core.h @@ -1039,6 +1039,20 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy); */ LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty); +/** + * Returns type's subtypes + * + * @see llvm::Type::subtypes() + */ +void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr); + +/** + * Return the number of types in the derived type. + * + * @see llvm::Type::getNumContainedTypes() + */ +unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp); + /** * Create a fixed size array type that refers to a specific type. * diff --git a/interpreter/llvm/src/include/llvm-c/ExecutionEngine.h b/interpreter/llvm/src/include/llvm-c/ExecutionEngine.h index b72a91a8b137e..51830fe139c6e 100644 --- a/interpreter/llvm/src/include/llvm-c/ExecutionEngine.h +++ b/interpreter/llvm/src/include/llvm-c/ExecutionEngine.h @@ -19,9 +19,9 @@ #ifndef LLVM_C_EXECUTIONENGINE_H #define LLVM_C_EXECUTIONENGINE_H -#include "llvm-c/Types.h" #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/interpreter/llvm/src/include/llvm-c/OrcBindings.h b/interpreter/llvm/src/include/llvm-c/OrcBindings.h index de2969ab1c9bb..7ee3954313584 100644 --- a/interpreter/llvm/src/include/llvm-c/OrcBindings.h +++ b/interpreter/llvm/src/include/llvm-c/OrcBindings.h @@ -29,6 +29,8 @@ extern "C" { #endif +typedef struct LLVMOpaqueSharedModule *LLVMSharedModuleRef; +typedef struct LLVMOpaqueSharedObjectBuffer *LLVMSharedObjectBufferRef; typedef struct LLVMOrcOpaqueJITStack *LLVMOrcJITStackRef; typedef uint32_t LLVMOrcModuleHandle; typedef uint64_t LLVMOrcTargetAddress; @@ -38,6 +40,45 @@ typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack, typedef enum { LLVMOrcErrSuccess = 0, LLVMOrcErrGeneric } LLVMOrcErrorCode; +/** + * Turn an LLVMModuleRef into an LLVMSharedModuleRef. + * + * The JIT uses shared ownership for LLVM modules, since it is generally + * difficult to know when the JIT will be finished with a module (and the JIT + * has no way of knowing when a user may be finished with one). + * + * Calling this method with an LLVMModuleRef creates a shared-pointer to the + * module, and returns a reference to this shared pointer. + * + * The shared module should be disposed when finished with by calling + * LLVMOrcDisposeSharedModule (not LLVMDisposeModule). The Module will be + * deleted when the last shared pointer owner relinquishes it. + */ + +LLVMSharedModuleRef LLVMOrcMakeSharedModule(LLVMModuleRef Mod); + +/** + * Dispose of a shared module. + * + * The module should not be accessed after this call. The module will be + * deleted once all clients (including the JIT itself) have released their + * shared pointers. + */ + +void LLVMOrcDisposeSharedModuleRef(LLVMSharedModuleRef SharedMod); + +/** + * Get an LLVMSharedObjectBufferRef from an LLVMMemoryBufferRef. + */ +LLVMSharedObjectBufferRef +LLVMOrcMakeSharedObjectBuffer(LLVMMemoryBufferRef ObjBuffer); + +/** + * Dispose of a shared object buffer. + */ +void +LLVMOrcDisposeSharedObjectBufferRef(LLVMSharedObjectBufferRef SharedObjBuffer); + /** * Create an ORC JIT stack. * @@ -72,8 +113,9 @@ void LLVMOrcDisposeMangledSymbol(char *MangledSymbol); /** * Create a lazy compile callback. */ -LLVMOrcTargetAddress +LLVMOrcErrorCode LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx); @@ -94,26 +136,31 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, /** * Add module to be eagerly compiled. */ -LLVMOrcModuleHandle -LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, +LLVMOrcErrorCode +LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, + LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx); /** * Add module to be lazily compiled one function at a time. */ -LLVMOrcModuleHandle -LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, +LLVMOrcErrorCode +LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, + LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx); /** * Add an object file. */ -LLVMOrcModuleHandle LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, - LLVMObjectFileRef Obj, - LLVMOrcSymbolResolverFn SymbolResolver, - void *SymbolResolverCtx); +LLVMOrcErrorCode LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, + LLVMSharedObjectBufferRef Obj, + LLVMOrcSymbolResolverFn SymbolResolver, + void *SymbolResolverCtx); /** * Remove a module set from the JIT. @@ -121,18 +168,20 @@ LLVMOrcModuleHandle LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, * This works for all modules that can be added via OrcAdd*, including object * files. */ -void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H); +LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle H); /** * Get symbol address from JIT instance. */ -LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, - const char *SymbolName); +LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, + const char *SymbolName); /** * Dispose of an ORC JIT stack. */ -void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack); +LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack); #ifdef __cplusplus } diff --git a/interpreter/llvm/src/include/llvm-c/Support.h b/interpreter/llvm/src/include/llvm-c/Support.h index 735d1fbc78cc4..6de184ccab49e 100644 --- a/interpreter/llvm/src/include/llvm-c/Support.h +++ b/interpreter/llvm/src/include/llvm-c/Support.h @@ -14,8 +14,8 @@ #ifndef LLVM_C_SUPPORT_H #define LLVM_C_SUPPORT_H -#include "llvm/Support/DataTypes.h" #include "llvm-c/Types.h" +#include "llvm/Support/DataTypes.h" #ifdef __cplusplus extern "C" { diff --git a/interpreter/llvm/src/include/llvm-c/TargetMachine.h b/interpreter/llvm/src/include/llvm-c/TargetMachine.h index 1d1f61f1a5b48..f4f7f7698c45b 100644 --- a/interpreter/llvm/src/include/llvm-c/TargetMachine.h +++ b/interpreter/llvm/src/include/llvm-c/TargetMachine.h @@ -19,8 +19,8 @@ #ifndef LLVM_C_TARGETMACHINE_H #define LLVM_C_TARGETMACHINE_H -#include "llvm-c/Types.h" #include "llvm-c/Target.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/interpreter/llvm/src/include/llvm-c/Transforms/Vectorize.h b/interpreter/llvm/src/include/llvm-c/Transforms/Vectorize.h index a82ef49cb1672..cf8306aee762f 100644 --- a/interpreter/llvm/src/include/llvm-c/Transforms/Vectorize.h +++ b/interpreter/llvm/src/include/llvm-c/Transforms/Vectorize.h @@ -33,7 +33,7 @@ extern "C" { * @{ */ -/** See llvm::createBBVectorizePass function. */ +/** DEPRECATED - Use LLVMAddSLPVectorizePass */ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM); /** See llvm::createLoopVectorizePass function. */ diff --git a/interpreter/llvm/src/include/llvm/ADT/APFloat.h b/interpreter/llvm/src/include/llvm/ADT/APFloat.h index bef6efde1f012..9c5e392c48087 100644 --- a/interpreter/llvm/src/include/llvm/ADT/APFloat.h +++ b/interpreter/llvm/src/include/llvm/ADT/APFloat.h @@ -140,8 +140,8 @@ enum lostFraction { // Example of truncated bits: // implementation classes. This struct should not define any non-static data // members. struct APFloatBase { - // TODO remove this and use APInt typedef directly. typedef APInt::WordType integerPart; + static const unsigned integerPartWidth = APInt::APINT_BITS_PER_WORD; /// A signed type to represent a floating point numbers unbiased exponent. typedef signed short ExponentType; diff --git a/interpreter/llvm/src/include/llvm/ADT/APInt.h b/interpreter/llvm/src/include/llvm/ADT/APInt.h index 94fbd1a29bf9c..a1cce6e5fe170 100644 --- a/interpreter/llvm/src/include/llvm/ADT/APInt.h +++ b/interpreter/llvm/src/include/llvm/ADT/APInt.h @@ -182,8 +182,9 @@ class LLVM_NODISCARD APInt { /// provides a more convenient form of divide for internal use since KnuthDiv /// has specific constraints on its inputs. If those constraints are not met /// then it provides a simpler form of divide. - static void divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, - unsigned rhsWords, APInt *Quotient, APInt *Remainder); + static void divide(const WordType *LHS, unsigned lhsWords, + const WordType *RHS, unsigned rhsWords, WordType *Quotient, + WordType *Remainder); /// out-of-line slow case for inline constructor void initSlowCase(uint64_t val, bool isSigned); @@ -212,6 +213,12 @@ class LLVM_NODISCARD APInt { /// out-of-line slow case for countLeadingZeros unsigned countLeadingZerosSlowCase() const LLVM_READONLY; + /// out-of-line slow case for countLeadingOnes. + unsigned countLeadingOnesSlowCase() const LLVM_READONLY; + + /// out-of-line slow case for countTrailingZeros. + unsigned countTrailingZerosSlowCase() const LLVM_READONLY; + /// out-of-line slow case for countTrailingOnes unsigned countTrailingOnesSlowCase() const LLVM_READONLY; @@ -382,7 +389,7 @@ class LLVM_NODISCARD APInt { bool isAllOnesValue() const { if (isSingleWord()) return U.VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth); - return countPopulationSlowCase() == BitWidth; + return countTrailingOnesSlowCase() == BitWidth; } /// \brief Determine if all bits are clear @@ -391,6 +398,15 @@ class LLVM_NODISCARD APInt { /// not. bool isNullValue() const { return !*this; } + /// \brief Determine if this is a value of 1. + /// + /// This checks to see if the value of this APInt is one. + bool isOneValue() const { + if (isSingleWord()) + return U.VAL == 1; + return countLeadingZerosSlowCase() == BitWidth - 1; + } + /// \brief Determine if this is the largest unsigned value. /// /// This checks to see if the value of this APInt is the maximum unsigned @@ -402,7 +418,9 @@ class LLVM_NODISCARD APInt { /// This checks to see if the value of this APInt is the maximum signed /// value for the APInt's bit width. bool isMaxSignedValue() const { - return !isNegative() && countPopulation() == BitWidth - 1; + if (isSingleWord()) + return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1); + return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1; } /// \brief Determine if this is the smallest unsigned value. @@ -416,7 +434,9 @@ class LLVM_NODISCARD APInt { /// This checks to see if the value of this APInt is the minimum signed /// value for the APInt's bit width. bool isMinSignedValue() const { - return isNegative() && isPowerOf2(); + if (isSingleWord()) + return U.VAL == (WordType(1) << (BitWidth - 1)); + return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1; } /// \brief Check if this APInt has an N-bits unsigned integer value. @@ -1016,11 +1036,13 @@ class LLVM_NODISCARD APInt { /// /// \returns a new APInt value containing the division result APInt udiv(const APInt &RHS) const; + APInt udiv(uint64_t RHS) const; /// \brief Signed division function for APInt. /// /// Signed divide this APInt by APInt RHS. APInt sdiv(const APInt &RHS) const; + APInt sdiv(int64_t RHS) const; /// \brief Unsigned remainder operation. /// @@ -1032,11 +1054,13 @@ class LLVM_NODISCARD APInt { /// /// \returns a new APInt value containing the remainder result APInt urem(const APInt &RHS) const; + uint64_t urem(uint64_t RHS) const; /// \brief Function for signed remainder operation. /// /// Signed remainder operation on APInt. APInt srem(const APInt &RHS) const; + int64_t srem(int64_t RHS) const; /// \brief Dual division/remainder interface. /// @@ -1047,9 +1071,13 @@ class LLVM_NODISCARD APInt { /// udivrem(X, Y, X, Y), for example. static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder); + static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, + uint64_t &Remainder); static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder); + static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient, + int64_t &Remainder); // Operations that return overflow indicators. APInt sadd_ov(const APInt &RHS, bool &Overflow) const; @@ -1067,9 +1095,7 @@ class LLVM_NODISCARD APInt { /// \returns the bit value at bitPosition bool operator[](unsigned bitPosition) const { assert(bitPosition < getBitWidth() && "Bit position out of bounds!"); - return (maskBit(bitPosition) & - (isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)])) != - 0; + return (maskBit(bitPosition) & getWord(bitPosition)) != 0; } /// @} @@ -1562,7 +1588,11 @@ class LLVM_NODISCARD APInt { /// /// \returns 0 if the high order bit is not set, otherwise returns the number /// of 1 bits from the most significant to the least - unsigned countLeadingOnes() const LLVM_READONLY; + unsigned countLeadingOnes() const { + if (isSingleWord()) + return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); + return countLeadingOnesSlowCase(); + } /// Computes the number of leading bits of this APInt that are equal to its /// sign bit. @@ -1578,7 +1608,11 @@ class LLVM_NODISCARD APInt { /// /// \returns BitWidth if the value is zero, otherwise returns the number of /// zeros from the least significant bit to the first one bit. - unsigned countTrailingZeros() const LLVM_READONLY; + unsigned countTrailingZeros() const { + if (isSingleWord()) + return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth); + return countTrailingZerosSlowCase(); + } /// \brief Count the number of trailing one bits. /// @@ -2017,7 +2051,7 @@ inline APInt operator-(APInt a, const APInt &b) { } inline APInt operator-(const APInt &a, APInt &&b) { - b = -std::move(b); + b.negate(); b += a; return std::move(b); } @@ -2028,7 +2062,7 @@ inline APInt operator-(APInt a, uint64_t RHS) { } inline APInt operator-(uint64_t LHS, APInt b) { - b = -std::move(b); + b.negate(); b += LHS; return b; } diff --git a/interpreter/llvm/src/include/llvm/ADT/AllocatorList.h b/interpreter/llvm/src/include/llvm/ADT/AllocatorList.h index 05a549f96ec70..178c6742a87b9 100644 --- a/interpreter/llvm/src/include/llvm/ADT/AllocatorList.h +++ b/interpreter/llvm/src/include/llvm/ADT/AllocatorList.h @@ -10,10 +10,16 @@ #ifndef LLVM_ADT_ALLOCATORLIST_H #define LLVM_ADT_ALLOCATORLIST_H +#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/simple_ilist.h" #include "llvm/Support/Allocator.h" +#include +#include +#include +#include #include +#include namespace llvm { @@ -39,7 +45,8 @@ template class AllocatorList : AllocatorT { T V; }; - typedef simple_ilist list_type; + using list_type = simple_ilist; + list_type List; AllocatorT &getAlloc() { return *this; } @@ -51,13 +58,17 @@ template class AllocatorList : AllocatorT { struct Cloner { AllocatorList &AL; + Cloner(AllocatorList &AL) : AL(AL) {} + Node *operator()(const Node &N) const { return AL.create(N.V); } }; struct Disposer { AllocatorList &AL; + Disposer(AllocatorList &AL) : AL(AL) {} + void operator()(Node *N) const { N->~Node(); AL.getAlloc().Deallocate(N); @@ -65,13 +76,13 @@ template class AllocatorList : AllocatorT { }; public: - typedef T value_type; - typedef T *pointer; - typedef T &reference; - typedef const T *const_pointer; - typedef const T &const_reference; - typedef typename list_type::size_type size_type; - typedef typename list_type::difference_type difference_type; + using value_type = T; + using pointer = T *; + using reference = T &; + using const_pointer = const T *; + using const_reference = const T &; + using size_type = typename list_type::size_type; + using difference_type = typename list_type::difference_type; private: template @@ -83,20 +94,18 @@ template class AllocatorList : AllocatorT { friend class IteratorImpl; friend AllocatorList; - typedef iterator_adaptor_base, - IteratorBase, std::bidirectional_iterator_tag, - ValueT> - base_type; + using base_type = + iterator_adaptor_base, IteratorBase, + std::bidirectional_iterator_tag, ValueT>; public: - typedef ValueT value_type; - typedef ValueT *pointer; - typedef ValueT &reference; + using value_type = ValueT; + using pointer = ValueT *; + using reference = ValueT &; IteratorImpl() = default; IteratorImpl(const IteratorImpl &) = default; IteratorImpl &operator=(const IteratorImpl &) = default; - ~IteratorImpl() = default; explicit IteratorImpl(const IteratorBase &I) : base_type(I) {} @@ -106,6 +115,8 @@ template class AllocatorList : AllocatorT { OtherIteratorBase, IteratorBase>::value>::type * = nullptr) : base_type(X.wrapped()) {} + ~IteratorImpl() = default; + reference operator*() const { return base_type::wrapped()->V; } pointer operator->() const { return &operator*(); } @@ -118,30 +129,34 @@ template class AllocatorList : AllocatorT { }; public: - typedef IteratorImpl iterator; - typedef IteratorImpl - reverse_iterator; - typedef IteratorImpl - const_iterator; - typedef IteratorImpl - const_reverse_iterator; + using iterator = IteratorImpl; + using reverse_iterator = + IteratorImpl; + using const_iterator = + IteratorImpl; + using const_reverse_iterator = + IteratorImpl; AllocatorList() = default; AllocatorList(AllocatorList &&X) : AllocatorT(std::move(X.getAlloc())), List(std::move(X.List)) {} + AllocatorList(const AllocatorList &X) { List.cloneFrom(X.List, Cloner(*this), Disposer(*this)); } + AllocatorList &operator=(AllocatorList &&X) { clear(); // Dispose of current nodes explicitly. List = std::move(X.List); getAlloc() = std::move(X.getAlloc()); return *this; } + AllocatorList &operator=(const AllocatorList &X) { List.cloneFrom(X.List, Cloner(*this), Disposer(*this)); return *this; } + ~AllocatorList() { clear(); } void swap(AllocatorList &RHS) { diff --git a/interpreter/llvm/src/include/llvm/ADT/ArrayRef.h b/interpreter/llvm/src/include/llvm/ADT/ArrayRef.h index 6b35d0aec8b2b..925ebafc3feda 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ArrayRef.h +++ b/interpreter/llvm/src/include/llvm/ADT/ArrayRef.h @@ -1,4 +1,4 @@ -//===--- ArrayRef.h - Array Reference Wrapper -------------------*- C++ -*-===// +//===- ArrayRef.h - Array Reference Wrapper ---------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,21 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Compiler.h" +#include #include +#include +#include +#include +#include +#include +#include #include namespace llvm { + /// ArrayRef - Represent a constant reference to an array (0 or more elements /// consecutively in memory), i.e. a start pointer and a length. It allows /// various APIs to take consecutive elements easily and conveniently. @@ -32,28 +41,27 @@ namespace llvm { template class LLVM_NODISCARD ArrayRef { public: - typedef const T *iterator; - typedef const T *const_iterator; - typedef size_t size_type; - - typedef std::reverse_iterator reverse_iterator; + using iterator = const T *; + using const_iterator = const T *; + using size_type = size_t; + using reverse_iterator = std::reverse_iterator; private: /// The start of the array, in an external buffer. - const T *Data; + const T *Data = nullptr; /// The number of elements. - size_type Length; + size_type Length = 0; public: /// @name Constructors /// @{ /// Construct an empty ArrayRef. - /*implicit*/ ArrayRef() : Data(nullptr), Length(0) {} + /*implicit*/ ArrayRef() = default; /// Construct an empty ArrayRef from None. - /*implicit*/ ArrayRef(NoneType) : Data(nullptr), Length(0) {} + /*implicit*/ ArrayRef(NoneType) {} /// Construct an ArrayRef from a single element. /*implicit*/ ArrayRef(const T &OneElt) @@ -282,9 +290,8 @@ namespace llvm { template class LLVM_NODISCARD MutableArrayRef : public ArrayRef { public: - typedef T *iterator; - - typedef std::reverse_iterator reverse_iterator; + using iterator = T *; + using reverse_iterator = std::reverse_iterator; /// Construct an empty MutableArrayRef. /*implicit*/ MutableArrayRef() : ArrayRef() {} @@ -416,19 +423,23 @@ namespace llvm { /// This is a MutableArrayRef that owns its array. template class OwningArrayRef : public MutableArrayRef { public: - OwningArrayRef() {} + OwningArrayRef() = default; OwningArrayRef(size_t Size) : MutableArrayRef(new T[Size], Size) {} + OwningArrayRef(ArrayRef Data) : MutableArrayRef(new T[Data.size()], Data.size()) { std::copy(Data.begin(), Data.end(), this->begin()); } + OwningArrayRef(OwningArrayRef &&Other) { *this = Other; } + OwningArrayRef &operator=(OwningArrayRef &&Other) { delete[] this->data(); this->MutableArrayRef::operator=(Other); Other.MutableArrayRef::operator=(MutableArrayRef()); return *this; } + ~OwningArrayRef() { delete[] this->data(); } }; @@ -517,13 +528,14 @@ namespace llvm { // ArrayRefs can be treated like a POD type. template struct isPodLike; - template struct isPodLike > { + template struct isPodLike> { static const bool value = true; }; template hash_code hash_value(ArrayRef S) { return hash_combine_range(S.begin(), S.end()); } + } // end namespace llvm #endif // LLVM_ADT_ARRAYREF_H diff --git a/interpreter/llvm/src/include/llvm/ADT/BitVector.h b/interpreter/llvm/src/include/llvm/ADT/BitVector.h index 4a2af7cd68a6d..e68ef5f53d106 100644 --- a/interpreter/llvm/src/include/llvm/ADT/BitVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/BitVector.h @@ -15,6 +15,7 @@ #define LLVM_ADT_BITVECTOR_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/MathExtras.h" #include #include @@ -26,6 +27,50 @@ namespace llvm { +/// ForwardIterator for the bits that are set. +/// Iterators get invalidated when resize / reserve is called. +template class const_set_bits_iterator_impl { + const BitVectorT &Parent; + int Current = 0; + + void advance() { + assert(Current != -1 && "Trying to advance past end."); + Current = Parent.find_next(Current); + } + +public: + const_set_bits_iterator_impl(const BitVectorT &Parent, int Current) + : Parent(Parent), Current(Current) {} + explicit const_set_bits_iterator_impl(const BitVectorT &Parent) + : const_set_bits_iterator_impl(Parent, Parent.find_first()) {} + const_set_bits_iterator_impl(const const_set_bits_iterator_impl &) = default; + + const_set_bits_iterator_impl operator++(int) { + auto Prev = *this; + advance(); + return Prev; + } + + const_set_bits_iterator_impl &operator++() { + advance(); + return *this; + } + + unsigned operator*() const { return Current; } + + bool operator==(const const_set_bits_iterator_impl &Other) const { + assert(&Parent == &Other.Parent && + "Comparing iterators from different BitVectors"); + return Current == Other.Current; + } + + bool operator!=(const const_set_bits_iterator_impl &Other) const { + assert(&Parent == &Other.Parent && + "Comparing iterators from different BitVectors"); + return Current != Other.Current; + } +}; + class BitVector { typedef unsigned long BitWord; @@ -73,6 +118,18 @@ class BitVector { } }; + typedef const_set_bits_iterator_impl const_set_bits_iterator; + typedef const_set_bits_iterator set_iterator; + + const_set_bits_iterator set_bits_begin() const { + return const_set_bits_iterator(*this); + } + const_set_bits_iterator set_bits_end() const { + return const_set_bits_iterator(*this, -1); + } + iterator_range set_bits() const { + return make_range(set_bits_begin(), set_bits_end()); + } /// BitVector default ctor - Creates an empty bitvector. BitVector() : Size(0) {} @@ -146,138 +203,164 @@ class BitVector { return !any(); } - /// find_first - Returns the index of the first set bit, -1 if none - /// of the bits are set. - int find_first() const { - for (unsigned i = 0; i < NumBitWords(size()); ++i) - if (Bits[i] != 0) - return i * BITWORD_SIZE + countTrailingZeros(Bits[i]); - return -1; - } - - /// find_last - Returns the index of the last set bit, -1 if none of the bits - /// are set. - int find_last() const { - if (Size == 0) + /// find_first_in - Returns the index of the first set bit in the range + /// [Begin, End). Returns -1 if all bits in the range are unset. + int find_first_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) return -1; - unsigned N = NumBitWords(size()); - assert(N > 0); + unsigned FirstWord = Begin / BITWORD_SIZE; + unsigned LastWord = (End - 1) / BITWORD_SIZE; - unsigned i = N - 1; - while (i > 0 && Bits[i] == BitWord(0)) - --i; + // Check subsequent words. + for (unsigned i = FirstWord; i <= LastWord; ++i) { + BitWord Copy = Bits[i]; - return int((i + 1) * BITWORD_SIZE - countLeadingZeros(Bits[i])) - 1; - } + if (i == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy &= maskTrailingZeros(FirstBit); + } - /// find_first_unset - Returns the index of the first unset bit, -1 if all - /// of the bits are set. - int find_first_unset() const { - for (unsigned i = 0; i < NumBitWords(size()); ++i) - if (Bits[i] != ~0UL) { - unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Bits[i]); - return Result < size() ? Result : -1; + if (i == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy &= maskTrailingOnes(LastBit + 1); } + if (Copy != 0) + return i * BITWORD_SIZE + countTrailingZeros(Copy); + } return -1; } - /// find_last_unset - Returns the index of the last unset bit, -1 if all of - /// the bits are set. - int find_last_unset() const { - if (Size == 0) + /// find_last_in - Returns the index of the last set bit in the range + /// [Begin, End). Returns -1 if all bits in the range are unset. + int find_last_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) return -1; - const unsigned N = NumBitWords(size()); - assert(N > 0); + unsigned LastWord = (End - 1) / BITWORD_SIZE; + unsigned FirstWord = Begin / BITWORD_SIZE; - unsigned i = N - 1; - BitWord W = Bits[i]; + for (unsigned i = LastWord + 1; i >= FirstWord + 1; --i) { + unsigned CurrentWord = i - 1; - // The last word in the BitVector has some unused bits, so we need to set - // them all to 1 first. Set them all to 1 so they don't get treated as - // valid unset bits. - unsigned UnusedCount = BITWORD_SIZE - size() % BITWORD_SIZE; - W |= maskLeadingOnes(UnusedCount); + BitWord Copy = Bits[CurrentWord]; + if (CurrentWord == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy &= maskTrailingOnes(LastBit + 1); + } - while (W == ~BitWord(0) && --i > 0) - W = Bits[i]; + if (CurrentWord == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy &= maskTrailingZeros(FirstBit); + } + + if (Copy != 0) + return (CurrentWord + 1) * BITWORD_SIZE - countLeadingZeros(Copy) - 1; + } - return int((i + 1) * BITWORD_SIZE - countLeadingOnes(W)) - 1; + return -1; } - /// find_next - Returns the index of the next set bit following the - /// "Prev" bit. Returns -1 if the next set bit is not found. - int find_next(unsigned Prev) const { - ++Prev; - if (Prev >= Size) + /// find_first_unset_in - Returns the index of the first unset bit in the + /// range [Begin, End). Returns -1 if all bits in the range are set. + int find_first_unset_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) return -1; - unsigned WordPos = Prev / BITWORD_SIZE; - unsigned BitPos = Prev % BITWORD_SIZE; - BitWord Copy = Bits[WordPos]; - // Mask off previous bits. - Copy &= maskTrailingZeros(BitPos); - - if (Copy != 0) - return WordPos * BITWORD_SIZE + countTrailingZeros(Copy); + unsigned FirstWord = Begin / BITWORD_SIZE; + unsigned LastWord = (End - 1) / BITWORD_SIZE; // Check subsequent words. - for (unsigned i = WordPos+1; i < NumBitWords(size()); ++i) - if (Bits[i] != 0) - return i * BITWORD_SIZE + countTrailingZeros(Bits[i]); + for (unsigned i = FirstWord; i <= LastWord; ++i) { + BitWord Copy = Bits[i]; + + if (i == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy |= maskTrailingOnes(FirstBit); + } + + if (i == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy |= maskTrailingZeros(LastBit + 1); + } + if (Copy != ~0UL) { + unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Copy); + return Result < size() ? Result : -1; + } + } return -1; } - /// find_next_unset - Returns the index of the next unset bit following the - /// "Prev" bit. Returns -1 if all remaining bits are set. - int find_next_unset(unsigned Prev) const { - ++Prev; - if (Prev >= Size) + /// find_last_unset_in - Returns the index of the last unset bit in the + /// range [Begin, End). Returns -1 if all bits in the range are set. + int find_last_unset_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) return -1; - unsigned WordPos = Prev / BITWORD_SIZE; - unsigned BitPos = Prev % BITWORD_SIZE; - BitWord Copy = Bits[WordPos]; - // Mask in previous bits. - BitWord Mask = (1 << BitPos) - 1; - Copy |= Mask; + unsigned LastWord = (End - 1) / BITWORD_SIZE; + unsigned FirstWord = Begin / BITWORD_SIZE; - if (Copy != ~0UL) - return next_unset_in_word(WordPos, Copy); + for (unsigned i = LastWord + 1; i >= FirstWord + 1; --i) { + unsigned CurrentWord = i - 1; - // Check subsequent words. - for (unsigned i = WordPos + 1; i < NumBitWords(size()); ++i) - if (Bits[i] != ~0UL) - return next_unset_in_word(i, Bits[i]); + BitWord Copy = Bits[CurrentWord]; + if (CurrentWord == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy |= maskTrailingZeros(LastBit + 1); + } + + if (CurrentWord == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy |= maskTrailingOnes(FirstBit); + } + + if (Copy != ~0UL) { + unsigned Result = + (CurrentWord + 1) * BITWORD_SIZE - countLeadingOnes(Copy) - 1; + return Result < Size ? Result : -1; + } + } return -1; } + /// find_first - Returns the index of the first set bit, -1 if none + /// of the bits are set. + int find_first() const { return find_first_in(0, Size); } + + /// find_last - Returns the index of the last set bit, -1 if none of the bits + /// are set. + int find_last() const { return find_last_in(0, Size); } + + /// find_next - Returns the index of the next set bit following the + /// "Prev" bit. Returns -1 if the next set bit is not found. + int find_next(unsigned Prev) const { return find_first_in(Prev + 1, Size); } + /// find_prev - Returns the index of the first set bit that precedes the /// the bit at \p PriorTo. Returns -1 if all previous bits are unset. - int find_prev(unsigned PriorTo) const { - if (PriorTo == 0) - return -1; + int find_prev(unsigned PriorTo) const { return find_last_in(0, PriorTo); } - --PriorTo; + /// find_first_unset - Returns the index of the first unset bit, -1 if all + /// of the bits are set. + int find_first_unset() const { return find_first_unset_in(0, Size); } - unsigned WordPos = PriorTo / BITWORD_SIZE; - unsigned BitPos = PriorTo % BITWORD_SIZE; - BitWord Copy = Bits[WordPos]; - // Mask off next bits. - Copy &= maskTrailingOnes(BitPos + 1); + /// find_next_unset - Returns the index of the next unset bit following the + /// "Prev" bit. Returns -1 if all remaining bits are set. + int find_next_unset(unsigned Prev) const { + return find_first_unset_in(Prev + 1, Size); + } - if (Copy != 0) - return (WordPos + 1) * BITWORD_SIZE - countLeadingZeros(Copy) - 1; + /// find_last_unset - Returns the index of the last unset bit, -1 if all of + /// the bits are set. + int find_last_unset() const { return find_last_unset_in(0, Size); } - // Check previous words. - for (unsigned i = 1; i <= WordPos; ++i) { - unsigned Index = WordPos - i; - if (Bits[Index] == 0) - continue; - return (Index + 1) * BITWORD_SIZE - countLeadingZeros(Bits[Index]) - 1; - } - return -1; + /// find_prev_unset - Returns the index of the first unset bit that precedes + /// the bit at \p PriorTo. Returns -1 if all previous bits are set. + int find_prev_unset(unsigned PriorTo) { + return find_last_unset_in(0, PriorTo); } /// clear - Removes all bits from the bitvector. Does not change capacity. diff --git a/interpreter/llvm/src/include/llvm/ADT/BreadthFirstIterator.h b/interpreter/llvm/src/include/llvm/ADT/BreadthFirstIterator.h index eaeecb6e057ff..6bc63c283b097 100644 --- a/interpreter/llvm/src/include/llvm/ADT/BreadthFirstIterator.h +++ b/interpreter/llvm/src/include/llvm/ADT/BreadthFirstIterator.h @@ -25,7 +25,6 @@ #include "llvm/ADT/iterator_range.h" #include #include -#include #include namespace llvm { @@ -49,13 +48,13 @@ template , public bf_iterator_storage { - typedef std::iterator super; + using super = std::iterator; - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; // First element is the node reference, second is the next child to visit. - typedef std::pair> QueueElement; + using QueueElement = std::pair>; // Visit queue - used to maintain BFS ordering. // Optional<> because we need markers for levels. @@ -109,7 +108,7 @@ class bf_iterator } public: - typedef typename super::pointer pointer; + using pointer = typename super::pointer; // Provide static begin and end methods as our public "constructors" static bf_iterator begin(const GraphT &G) { diff --git a/interpreter/llvm/src/include/llvm/ADT/DAGDeltaAlgorithm.h b/interpreter/llvm/src/include/llvm/ADT/DAGDeltaAlgorithm.h index 5ea0fe8728682..41fdd43efb8a3 100644 --- a/interpreter/llvm/src/include/llvm/ADT/DAGDeltaAlgorithm.h +++ b/interpreter/llvm/src/include/llvm/ADT/DAGDeltaAlgorithm.h @@ -1,4 +1,4 @@ -//===--- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ----*- C++ -*--===// +//===- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -40,12 +40,12 @@ class DAGDeltaAlgorithm { virtual void anchor(); public: - typedef unsigned change_ty; - typedef std::pair edge_ty; + using change_ty = unsigned; + using edge_ty = std::pair; // FIXME: Use a decent data structure. - typedef std::set changeset_ty; - typedef std::vector changesetlist_ty; + using changeset_ty = std::set; + using changesetlist_ty = std::vector; public: virtual ~DAGDeltaAlgorithm() = default; diff --git a/interpreter/llvm/src/include/llvm/ADT/DeltaAlgorithm.h b/interpreter/llvm/src/include/llvm/ADT/DeltaAlgorithm.h index a26f37dfdc7dc..6becb2a601044 100644 --- a/interpreter/llvm/src/include/llvm/ADT/DeltaAlgorithm.h +++ b/interpreter/llvm/src/include/llvm/ADT/DeltaAlgorithm.h @@ -1,4 +1,4 @@ -//===--- DeltaAlgorithm.h - A Set Minimization Algorithm -------*- C++ -*--===// +//===- DeltaAlgorithm.h - A Set Minimization Algorithm ---------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -35,10 +35,10 @@ namespace llvm { /// predicate. class DeltaAlgorithm { public: - typedef unsigned change_ty; + using change_ty = unsigned; // FIXME: Use a decent data structure. - typedef std::set changeset_ty; - typedef std::vector changesetlist_ty; + using changeset_ty = std::set; + using changesetlist_ty = std::vector; private: /// Cache of failed test results. Successful test results are never cached @@ -90,4 +90,4 @@ class DeltaAlgorithm { } // end namespace llvm -#endif +#endif // LLVM_ADT_DELTAALGORITHM_H diff --git a/interpreter/llvm/src/include/llvm/ADT/DenseMap.h b/interpreter/llvm/src/include/llvm/ADT/DenseMap.h index fd8d3bf368a88..b311e69ec9d37 100644 --- a/interpreter/llvm/src/include/llvm/ADT/DenseMap.h +++ b/interpreter/llvm/src/include/llvm/ADT/DenseMap.h @@ -25,8 +25,8 @@ #include #include #include -#include #include +#include #include namespace llvm { @@ -57,14 +57,15 @@ class DenseMapBase : public DebugEpochBase { using const_arg_type_t = typename const_pointer_or_const_ref::type; public: - typedef unsigned size_type; - typedef KeyT key_type; - typedef ValueT mapped_type; - typedef BucketT value_type; - - typedef DenseMapIterator iterator; - typedef DenseMapIterator - const_iterator; + using size_type = unsigned; + using key_type = KeyT; + using mapped_type = ValueT; + using value_type = BucketT; + + using iterator = DenseMapIterator; + using const_iterator = + DenseMapIterator; + inline iterator begin() { // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets(). return empty() ? end() : iterator(getBuckets(), getBucketsEnd(), *this); @@ -387,15 +388,18 @@ class DenseMapBase : public DebugEpochBase { static unsigned getHashValue(const KeyT &Val) { return KeyInfoT::getHashValue(Val); } + template static unsigned getHashValue(const LookupKeyT &Val) { return KeyInfoT::getHashValue(Val); } + static const KeyT getEmptyKey() { static_assert(std::is_base_of::value, "Must pass the derived type to this template!"); return KeyInfoT::getEmptyKey(); } + static const KeyT getTombstoneKey() { return KeyInfoT::getTombstoneKey(); } @@ -404,39 +408,51 @@ class DenseMapBase : public DebugEpochBase { unsigned getNumEntries() const { return static_cast(this)->getNumEntries(); } + void setNumEntries(unsigned Num) { static_cast(this)->setNumEntries(Num); } + void incrementNumEntries() { setNumEntries(getNumEntries() + 1); } + void decrementNumEntries() { setNumEntries(getNumEntries() - 1); } + unsigned getNumTombstones() const { return static_cast(this)->getNumTombstones(); } + void setNumTombstones(unsigned Num) { static_cast(this)->setNumTombstones(Num); } + void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); } + void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); } + const BucketT *getBuckets() const { return static_cast(this)->getBuckets(); } + BucketT *getBuckets() { return static_cast(this)->getBuckets(); } + unsigned getNumBuckets() const { return static_cast(this)->getNumBuckets(); } + BucketT *getBucketsEnd() { return getBuckets() + getNumBuckets(); } + const BucketT *getBucketsEnd() const { return getBuckets() + getNumBuckets(); } @@ -587,10 +603,11 @@ template > class DenseMap : public DenseMapBase, KeyT, ValueT, KeyInfoT, BucketT> { + friend class DenseMapBase; + // Lift some types from the dependent base class into this class for // simplicity of referring to them. - typedef DenseMapBase BaseT; - friend class DenseMapBase; + using BaseT = DenseMapBase; BucketT *Buckets; unsigned NumEntries; @@ -705,6 +722,7 @@ class DenseMap : public DenseMapBase, unsigned getNumEntries() const { return NumEntries; } + void setNumEntries(unsigned Num) { NumEntries = Num; } @@ -712,6 +730,7 @@ class DenseMap : public DenseMapBase, unsigned getNumTombstones() const { return NumTombstones; } + void setNumTombstones(unsigned Num) { NumTombstones = Num; } @@ -743,10 +762,12 @@ class SmallDenseMap : public DenseMapBase< SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT> { + friend class DenseMapBase; + // Lift some types from the dependent base class into this class for // simplicity of referring to them. - typedef DenseMapBase BaseT; - friend class DenseMapBase; + using BaseT = DenseMapBase; + static_assert(isPowerOf2_64(InlineBuckets), "InlineBuckets must be a power of 2."); @@ -972,6 +993,7 @@ class SmallDenseMap unsigned getNumEntries() const { return NumEntries; } + void setNumEntries(unsigned Num) { // NumEntries is hardcoded to be 31 bits wide. assert(Num < (1U << 31) && "Cannot support more than 1<<31 entries"); @@ -981,6 +1003,7 @@ class SmallDenseMap unsigned getNumTombstones() const { return NumTombstones; } + void setNumTombstones(unsigned Num) { NumTombstones = Num; } @@ -992,15 +1015,18 @@ class SmallDenseMap // 'storage.buffer' static type is 'char *'. return reinterpret_cast(storage.buffer); } + BucketT *getInlineBuckets() { return const_cast( const_cast(this)->getInlineBuckets()); } + const LargeRep *getLargeRep() const { assert(!Small); // Note, same rule about aliasing as with getInlineBuckets. return reinterpret_cast(storage.buffer); } + LargeRep *getLargeRep() { return const_cast( const_cast(this)->getLargeRep()); @@ -1009,10 +1035,12 @@ class SmallDenseMap const BucketT *getBuckets() const { return Small ? getInlineBuckets() : getLargeRep()->Buckets; } + BucketT *getBuckets() { return const_cast( const_cast(this)->getBuckets()); } + unsigned getNumBuckets() const { return Small ? InlineBuckets : getLargeRep()->NumBuckets; } @@ -1037,23 +1065,25 @@ class SmallDenseMap template class DenseMapIterator : DebugEpochBase::HandleBase { - typedef DenseMapIterator ConstIterator; friend class DenseMapIterator; friend class DenseMapIterator; + using ConstIterator = DenseMapIterator; + public: - typedef ptrdiff_t difference_type; - typedef typename std::conditional::type - value_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef std::forward_iterator_tag iterator_category; + using difference_type = ptrdiff_t; + using value_type = + typename std::conditional::type; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::forward_iterator_tag; private: - pointer Ptr, End; + pointer Ptr = nullptr; + pointer End = nullptr; public: - DenseMapIterator() : Ptr(nullptr), End(nullptr) {} + DenseMapIterator() = default; DenseMapIterator(pointer Pos, pointer E, const DebugEpochBase &Epoch, bool NoAdvance = false) diff --git a/interpreter/llvm/src/include/llvm/ADT/DenseMapInfo.h b/interpreter/llvm/src/include/llvm/ADT/DenseMapInfo.h index bb973ac650634..a96904c7dbbf6 100644 --- a/interpreter/llvm/src/include/llvm/ADT/DenseMapInfo.h +++ b/interpreter/llvm/src/include/llvm/ADT/DenseMapInfo.h @@ -18,7 +18,10 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm/Support/type_traits.h" +#include +#include +#include +#include namespace llvm { @@ -38,15 +41,18 @@ struct DenseMapInfo { Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } + static inline T* getTombstoneKey() { uintptr_t Val = static_cast(-2); Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } + static unsigned getHashValue(const T *PtrVal) { return (unsigned((uintptr_t)PtrVal) >> 4) ^ (unsigned((uintptr_t)PtrVal) >> 9); } + static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; } }; @@ -55,6 +61,7 @@ template<> struct DenseMapInfo { static inline char getEmptyKey() { return ~0; } static inline char getTombstoneKey() { return ~0 - 1; } static unsigned getHashValue(const char& Val) { return Val * 37U; } + static bool isEqual(const char &LHS, const char &RHS) { return LHS == RHS; } @@ -65,6 +72,7 @@ template <> struct DenseMapInfo { static inline unsigned short getEmptyKey() { return 0xFFFF; } static inline unsigned short getTombstoneKey() { return 0xFFFF - 1; } static unsigned getHashValue(const unsigned short &Val) { return Val * 37U; } + static bool isEqual(const unsigned short &LHS, const unsigned short &RHS) { return LHS == RHS; } @@ -75,6 +83,7 @@ template<> struct DenseMapInfo { static inline unsigned getEmptyKey() { return ~0U; } static inline unsigned getTombstoneKey() { return ~0U - 1; } static unsigned getHashValue(const unsigned& Val) { return Val * 37U; } + static bool isEqual(const unsigned& LHS, const unsigned& RHS) { return LHS == RHS; } @@ -84,9 +93,11 @@ template<> struct DenseMapInfo { template<> struct DenseMapInfo { static inline unsigned long getEmptyKey() { return ~0UL; } static inline unsigned long getTombstoneKey() { return ~0UL - 1L; } + static unsigned getHashValue(const unsigned long& Val) { return (unsigned)(Val * 37UL); } + static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) { return LHS == RHS; } @@ -96,9 +107,11 @@ template<> struct DenseMapInfo { template<> struct DenseMapInfo { static inline unsigned long long getEmptyKey() { return ~0ULL; } static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; } + static unsigned getHashValue(const unsigned long long& Val) { return (unsigned)(Val * 37ULL); } + static bool isEqual(const unsigned long long& LHS, const unsigned long long& RHS) { return LHS == RHS; @@ -118,6 +131,7 @@ template<> struct DenseMapInfo { static inline int getEmptyKey() { return 0x7fffffff; } static inline int getTombstoneKey() { return -0x7fffffff - 1; } static unsigned getHashValue(const int& Val) { return (unsigned)(Val * 37U); } + static bool isEqual(const int& LHS, const int& RHS) { return LHS == RHS; } @@ -128,10 +142,13 @@ template<> struct DenseMapInfo { static inline long getEmptyKey() { return (1UL << (sizeof(long) * 8 - 1)) - 1UL; } + static inline long getTombstoneKey() { return getEmptyKey() - 1L; } + static unsigned getHashValue(const long& Val) { return (unsigned)(Val * 37UL); } + static bool isEqual(const long& LHS, const long& RHS) { return LHS == RHS; } @@ -141,9 +158,11 @@ template<> struct DenseMapInfo { template<> struct DenseMapInfo { static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; } static inline long long getTombstoneKey() { return -0x7fffffffffffffffLL-1; } + static unsigned getHashValue(const long long& Val) { return (unsigned)(Val * 37ULL); } + static bool isEqual(const long long& LHS, const long long& RHS) { return LHS == RHS; @@ -152,19 +171,21 @@ template<> struct DenseMapInfo { // Provide DenseMapInfo for all pairs whose members have info. template -struct DenseMapInfo > { - typedef std::pair Pair; - typedef DenseMapInfo FirstInfo; - typedef DenseMapInfo SecondInfo; +struct DenseMapInfo> { + using Pair = std::pair; + using FirstInfo = DenseMapInfo; + using SecondInfo = DenseMapInfo; static inline Pair getEmptyKey() { return std::make_pair(FirstInfo::getEmptyKey(), SecondInfo::getEmptyKey()); } + static inline Pair getTombstoneKey() { return std::make_pair(FirstInfo::getTombstoneKey(), SecondInfo::getTombstoneKey()); } + static unsigned getHashValue(const Pair& PairVal) { uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32 | (uint64_t)SecondInfo::getHashValue(PairVal.second); @@ -178,6 +199,7 @@ struct DenseMapInfo > { key ^= (key >> 31); return (unsigned)key; } + static bool isEqual(const Pair &LHS, const Pair &RHS) { return FirstInfo::isEqual(LHS.first, RHS.first) && SecondInfo::isEqual(LHS.second, RHS.second); @@ -190,16 +212,19 @@ template <> struct DenseMapInfo { return StringRef(reinterpret_cast(~static_cast(0)), 0); } + static inline StringRef getTombstoneKey() { return StringRef(reinterpret_cast(~static_cast(1)), 0); } + static unsigned getHashValue(StringRef Val) { assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!"); assert(Val.data() != getTombstoneKey().data() && "Cannot hash the tombstone key!"); return (unsigned)(hash_value(Val)); } + static bool isEqual(StringRef LHS, StringRef RHS) { if (RHS.data() == getEmptyKey().data()) return LHS.data() == getEmptyKey().data(); @@ -215,16 +240,19 @@ template struct DenseMapInfo> { return ArrayRef(reinterpret_cast(~static_cast(0)), size_t(0)); } + static inline ArrayRef getTombstoneKey() { return ArrayRef(reinterpret_cast(~static_cast(1)), size_t(0)); } + static unsigned getHashValue(ArrayRef Val) { assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!"); assert(Val.data() != getTombstoneKey().data() && "Cannot hash the tombstone key!"); return (unsigned)(hash_value(Val)); } + static bool isEqual(ArrayRef LHS, ArrayRef RHS) { if (RHS.data() == getEmptyKey().data()) return LHS.data() == getEmptyKey().data(); @@ -236,4 +264,4 @@ template struct DenseMapInfo> { } // end namespace llvm -#endif +#endif // LLVM_ADT_DENSEMAPINFO_H diff --git a/interpreter/llvm/src/include/llvm/ADT/DenseSet.h b/interpreter/llvm/src/include/llvm/ADT/DenseSet.h index fcf304c3ecc41..7e5171c3f3a44 100644 --- a/interpreter/llvm/src/include/llvm/ADT/DenseSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/DenseSet.h @@ -15,11 +15,18 @@ #define LLVM_ADT_DENSESET_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/type_traits.h" +#include +#include #include +#include +#include namespace llvm { namespace detail { + struct DenseSetEmpty {}; // Use the empty base class trick so we can create a DenseMap where the buckets @@ -48,13 +55,14 @@ class DenseSetImpl { static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT), "DenseMap buckets unexpectedly large!"); MapTy TheMap; + template using const_arg_type_t = typename const_pointer_or_const_ref::type; public: - typedef ValueT key_type; - typedef ValueT value_type; - typedef unsigned size_type; + using key_type = ValueT; + using value_type = ValueT; + using size_type = unsigned; explicit DenseSetImpl(unsigned InitialReserve = 0) : TheMap(InitialReserve) {} @@ -100,11 +108,11 @@ class DenseSetImpl { friend class ConstIterator; public: - typedef typename MapTy::iterator::difference_type difference_type; - typedef ValueT value_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef std::forward_iterator_tag iterator_category; + using difference_type = typename MapTy::iterator::difference_type; + using value_type = ValueT; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::forward_iterator_tag; Iterator() = default; Iterator(const typename MapTy::iterator &i) : I(i) {} @@ -126,16 +134,14 @@ class DenseSetImpl { friend class Iterator; public: - typedef typename MapTy::const_iterator::difference_type difference_type; - typedef ValueT value_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef std::forward_iterator_tag iterator_category; - - ConstIterator(const Iterator &B) : I(B.I) {} + using difference_type = typename MapTy::const_iterator::difference_type; + using value_type = ValueT; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::forward_iterator_tag; ConstIterator() = default; - + ConstIterator(const Iterator &B) : I(B.I) {} ConstIterator(const typename MapTy::const_iterator &i) : I(i) {} const ValueT &operator*() const { return I->getFirst(); } @@ -147,8 +153,8 @@ class DenseSetImpl { bool operator!=(const ConstIterator& X) const { return I != X.I; } }; - typedef Iterator iterator; - typedef ConstIterator const_iterator; + using iterator = Iterator; + using const_iterator = ConstIterator; iterator begin() { return Iterator(TheMap.begin()); } iterator end() { return Iterator(TheMap.end()); } @@ -208,7 +214,7 @@ class DenseSetImpl { } }; -} // namespace detail +} // end namespace detail /// Implements a dense probed hash-table based set. template > @@ -246,4 +252,4 @@ class SmallDenseSet } // end namespace llvm -#endif +#endif // LLVM_ADT_DENSESET_H diff --git a/interpreter/llvm/src/include/llvm/ADT/DepthFirstIterator.h b/interpreter/llvm/src/include/llvm/ADT/DepthFirstIterator.h index b020d48cb3f08..e964d7fa23911 100644 --- a/interpreter/llvm/src/include/llvm/ADT/DepthFirstIterator.h +++ b/interpreter/llvm/src/include/llvm/ADT/DepthFirstIterator.h @@ -68,13 +68,14 @@ class df_iterator_storage { // cross edges in the spanning tree but is not used in the common case. template struct df_iterator_default_set : public SmallPtrSet { - typedef SmallPtrSet BaseSet; - typedef typename BaseSet::iterator iterator; - std::pair insert(NodeRef N) { return BaseSet::insert(N) ; } + using BaseSet = SmallPtrSet; + using iterator = typename BaseSet::iterator; + + std::pair insert(NodeRef N) { return BaseSet::insert(N); } template void insert(IterT Begin, IterT End) { BaseSet::insert(Begin,End); } - void completed(NodeRef) { } + void completed(NodeRef) {} }; // Generic Depth First Iterator @@ -85,15 +86,14 @@ template , public df_iterator_storage { - typedef std::iterator super; - - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; + using super = std::iterator; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; // First element is node reference, second is the 'next child' to visit. // The second child is initialized lazily to pick up graph changes during the // DFS. - typedef std::pair> StackElement; + using StackElement = std::pair>; // VisitStack - Used to maintain the ordering. Top = current block std::vector VisitStack; @@ -103,12 +103,15 @@ class df_iterator this->Visited.insert(Node); VisitStack.push_back(StackElement(Node, None)); } + inline df_iterator() = default; // End is when stack is empty + inline df_iterator(NodeRef Node, SetType &S) : df_iterator_storage(S) { if (this->Visited.insert(Node).second) VisitStack.push_back(StackElement(Node, None)); } + inline df_iterator(SetType &S) : df_iterator_storage(S) { // End is when stack is empty @@ -142,7 +145,7 @@ class df_iterator } public: - typedef typename super::pointer pointer; + using pointer = typename super::pointer; // Provide static begin and end methods as our public "constructors" static df_iterator begin(const GraphT &G) { diff --git a/interpreter/llvm/src/include/llvm/ADT/EquivalenceClasses.h b/interpreter/llvm/src/include/llvm/ADT/EquivalenceClasses.h index 8fcac178ffc97..af293d4c1422a 100644 --- a/interpreter/llvm/src/include/llvm/ADT/EquivalenceClasses.h +++ b/interpreter/llvm/src/include/llvm/ADT/EquivalenceClasses.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes --*- C++ -*-===// +//===- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -69,6 +69,7 @@ class EquivalenceClasses { /// leader is determined by a bit stolen from one of the pointers. class ECValue { friend class EquivalenceClasses; + mutable const ECValue *Leader, *Next; ElemTy Data; @@ -141,14 +142,14 @@ class EquivalenceClasses { // /// iterator* - Provides a way to iterate over all values in the set. - typedef typename std::set::const_iterator iterator; + using iterator = typename std::set::const_iterator; + iterator begin() const { return TheMapping.begin(); } iterator end() const { return TheMapping.end(); } bool empty() const { return TheMapping.empty(); } /// member_* Iterate over the members of an equivalence class. - /// class member_iterator; member_iterator member_begin(iterator I) const { // Only leaders provide anything to iterate over. @@ -204,7 +205,6 @@ class EquivalenceClasses { /// equivalence class it is in. This does the path-compression part that /// makes union-find "union findy". This returns an end iterator if the value /// is not in the equivalence class. - /// member_iterator findLeader(iterator I) const { if (I == TheMapping.end()) return member_end(); return member_iterator(I->getLeader()); @@ -241,15 +241,17 @@ class EquivalenceClasses { class member_iterator : public std::iterator { - typedef std::iterator super; - const ECValue *Node; friend class EquivalenceClasses; + using super = std::iterator; + + const ECValue *Node; + public: - typedef size_t size_type; - typedef typename super::pointer pointer; - typedef typename super::reference reference; + using size_type = size_t; + using pointer = typename super::pointer; + using reference = typename super::reference; explicit member_iterator() = default; explicit member_iterator(const ECValue *N) : Node(N) {} diff --git a/interpreter/llvm/src/include/llvm/ADT/FoldingSet.h b/interpreter/llvm/src/include/llvm/ADT/FoldingSet.h index dab18297dd3b4..c5987a947e182 100644 --- a/interpreter/llvm/src/include/llvm/ADT/FoldingSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/FoldingSet.h @@ -40,7 +40,7 @@ namespace llvm { /// FoldingSetNode. The node class must also define a Profile method used to /// establish the unique bits of data for the node. The Profile method is /// passed a FoldingSetNodeID object which is used to gather the bits. Just -/// call one of the Add* functions defined in the FoldingSetImpl::NodeID class. +/// call one of the Add* functions defined in the FoldingSetBase::NodeID class. /// NOTE: That the folding set does not own the nodes and it is the /// responsibility of the user to dispose of the nodes. /// @@ -104,13 +104,13 @@ class FoldingSetNodeID; class StringRef; //===----------------------------------------------------------------------===// -/// FoldingSetImpl - Implements the folding set functionality. The main +/// FoldingSetBase - Implements the folding set functionality. The main /// structure is an array of buckets. Each bucket is indexed by the hash of /// the nodes it contains. The bucket itself points to the nodes contained /// in the bucket via a singly linked list. The last node in the list points /// back to the bucket to facilitate node removal. /// -class FoldingSetImpl { +class FoldingSetBase { virtual void anchor(); // Out of line virtual method. protected: @@ -126,10 +126,10 @@ class FoldingSetImpl { /// is greater than twice the number of buckets. unsigned NumNodes; - explicit FoldingSetImpl(unsigned Log2InitSize = 6); - FoldingSetImpl(FoldingSetImpl &&Arg); - FoldingSetImpl &operator=(FoldingSetImpl &&RHS); - ~FoldingSetImpl(); + explicit FoldingSetBase(unsigned Log2InitSize = 6); + FoldingSetBase(FoldingSetBase &&Arg); + FoldingSetBase &operator=(FoldingSetBase &&RHS); + ~FoldingSetBase(); public: //===--------------------------------------------------------------------===// @@ -152,33 +152,6 @@ class FoldingSetImpl { /// clear - Remove all nodes from the folding set. void clear(); - /// RemoveNode - Remove a node from the folding set, returning true if one - /// was removed or false if the node was not in the folding set. - bool RemoveNode(Node *N); - - /// GetOrInsertNode - If there is an existing simple Node exactly - /// equal to the specified node, return it. Otherwise, insert 'N' and return - /// it instead. - Node *GetOrInsertNode(Node *N); - - /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, - /// return it. If not, return the insertion token that will make insertion - /// faster. - Node *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos); - - /// InsertNode - Insert the specified node into the folding set, knowing that - /// it is not already in the folding set. InsertPos must be obtained from - /// FindNodeOrInsertPos. - void InsertNode(Node *N, void *InsertPos); - - /// InsertNode - Insert the specified node into the folding set, knowing that - /// it is not already in the folding set. - void InsertNode(Node *N) { - Node *Inserted = GetOrInsertNode(N); - (void)Inserted; - assert(Inserted == N && "Node already inserted!"); - } - /// size - Returns the number of nodes in the folding set. unsigned size() const { return NumNodes; } @@ -220,6 +193,28 @@ class FoldingSetImpl { /// ComputeNodeHash - Instantiations of the FoldingSet template implement /// this function to compute a hash value for the given node. virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const = 0; + + // The below methods are protected to encourage subclasses to provide a more + // type-safe API. + + /// RemoveNode - Remove a node from the folding set, returning true if one + /// was removed or false if the node was not in the folding set. + bool RemoveNode(Node *N); + + /// GetOrInsertNode - If there is an existing simple Node exactly + /// equal to the specified node, return it. Otherwise, insert 'N' and return + /// it instead. + Node *GetOrInsertNode(Node *N); + + /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, + /// return it. If not, return the insertion token that will make insertion + /// faster. + Node *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos); + + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. InsertPos must be obtained from + /// FindNodeOrInsertPos. + void InsertNode(Node *N, void *InsertPos); }; //===----------------------------------------------------------------------===// @@ -293,7 +288,7 @@ class FoldingSetNodeIDRef { FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {} /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, - /// used to lookup the node in the FoldingSetImpl. + /// used to lookup the node in the FoldingSetBase. unsigned ComputeHash() const; bool operator==(FoldingSetNodeIDRef) const; @@ -345,7 +340,7 @@ class FoldingSetNodeID { inline void clear() { Bits.clear(); } /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used - /// to lookup the node in the FoldingSetImpl. + /// to lookup the node in the FoldingSetBase. unsigned ComputeHash() const; /// operator== - Used to compare two nodes to each other. @@ -368,7 +363,7 @@ class FoldingSetNodeID { }; // Convenience type to hide the implementation of the folding set. -typedef FoldingSetImpl::Node FoldingSetNode; +typedef FoldingSetBase::Node FoldingSetNode; template class FoldingSetIterator; template class FoldingSetBucketIterator; @@ -407,6 +402,71 @@ DefaultContextualFoldingSetTrait::ComputeHash(T &X, return TempID.ComputeHash(); } +//===----------------------------------------------------------------------===// +/// FoldingSetImpl - An implementation detail that lets us share code between +/// FoldingSet and ContextualFoldingSet. +template class FoldingSetImpl : public FoldingSetBase { +protected: + explicit FoldingSetImpl(unsigned Log2InitSize) + : FoldingSetBase(Log2InitSize) {} + + FoldingSetImpl(FoldingSetImpl &&Arg) = default; + FoldingSetImpl &operator=(FoldingSetImpl &&RHS) = default; + ~FoldingSetImpl() = default; + +public: + typedef FoldingSetIterator iterator; + iterator begin() { return iterator(Buckets); } + iterator end() { return iterator(Buckets+NumBuckets); } + + typedef FoldingSetIterator const_iterator; + const_iterator begin() const { return const_iterator(Buckets); } + const_iterator end() const { return const_iterator(Buckets+NumBuckets); } + + typedef FoldingSetBucketIterator bucket_iterator; + + bucket_iterator bucket_begin(unsigned hash) { + return bucket_iterator(Buckets + (hash & (NumBuckets-1))); + } + + bucket_iterator bucket_end(unsigned hash) { + return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true); + } + + /// RemoveNode - Remove a node from the folding set, returning true if one + /// was removed or false if the node was not in the folding set. + bool RemoveNode(T *N) { return FoldingSetBase::RemoveNode(N); } + + /// GetOrInsertNode - If there is an existing simple Node exactly + /// equal to the specified node, return it. Otherwise, insert 'N' and + /// return it instead. + T *GetOrInsertNode(T *N) { + return static_cast(FoldingSetBase::GetOrInsertNode(N)); + } + + /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, + /// return it. If not, return the insertion token that will make insertion + /// faster. + T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { + return static_cast(FoldingSetBase::FindNodeOrInsertPos(ID, InsertPos)); + } + + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. InsertPos must be obtained from + /// FindNodeOrInsertPos. + void InsertNode(T *N, void *InsertPos) { + FoldingSetBase::InsertNode(N, InsertPos); + } + + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. + void InsertNode(T *N) { + T *Inserted = GetOrInsertNode(N); + (void)Inserted; + assert(Inserted == N && "Node already inserted!"); + } +}; + //===----------------------------------------------------------------------===// /// FoldingSet - This template class is used to instantiate a specialized /// implementation of the folding set to the node class T. T must be a @@ -416,8 +476,10 @@ DefaultContextualFoldingSetTrait::ComputeHash(T &X, /// moved-from state is not a valid state for anything other than /// move-assigning and destroying. This is primarily to enable movable APIs /// that incorporate these objects. -template class FoldingSet final : public FoldingSetImpl { -private: +template class FoldingSet final : public FoldingSetImpl { + using Super = FoldingSetImpl; + using Node = typename Super::Node; + /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a /// way to convert nodes into a unique specifier. void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const override { @@ -442,45 +504,10 @@ template class FoldingSet final : public FoldingSetImpl { public: explicit FoldingSet(unsigned Log2InitSize = 6) - : FoldingSetImpl(Log2InitSize) {} - - FoldingSet(FoldingSet &&Arg) : FoldingSetImpl(std::move(Arg)) {} - FoldingSet &operator=(FoldingSet &&RHS) { - (void)FoldingSetImpl::operator=(std::move(RHS)); - return *this; - } - - typedef FoldingSetIterator iterator; - iterator begin() { return iterator(Buckets); } - iterator end() { return iterator(Buckets+NumBuckets); } - - typedef FoldingSetIterator const_iterator; - const_iterator begin() const { return const_iterator(Buckets); } - const_iterator end() const { return const_iterator(Buckets+NumBuckets); } - - typedef FoldingSetBucketIterator bucket_iterator; - - bucket_iterator bucket_begin(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1))); - } - - bucket_iterator bucket_end(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true); - } + : Super(Log2InitSize) {} - /// GetOrInsertNode - If there is an existing simple Node exactly - /// equal to the specified node, return it. Otherwise, insert 'N' and - /// return it instead. - T *GetOrInsertNode(Node *N) { - return static_cast(FoldingSetImpl::GetOrInsertNode(N)); - } - - /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, - /// return it. If not, return the insertion token that will make insertion - /// faster. - T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - return static_cast(FoldingSetImpl::FindNodeOrInsertPos(ID, InsertPos)); - } + FoldingSet(FoldingSet &&Arg) = default; + FoldingSet &operator=(FoldingSet &&RHS) = default; }; //===----------------------------------------------------------------------===// @@ -493,74 +520,42 @@ template class FoldingSet final : public FoldingSetImpl { /// function with signature /// void Profile(FoldingSetNodeID &, Ctx); template -class ContextualFoldingSet final : public FoldingSetImpl { +class ContextualFoldingSet final : public FoldingSetImpl { // Unfortunately, this can't derive from FoldingSet because the - // construction vtable for FoldingSet requires + // construction of the vtable for FoldingSet requires // FoldingSet::GetNodeProfile to be instantiated, which in turn // requires a single-argument T::Profile(). -private: + using Super = FoldingSetImpl; + using Node = typename Super::Node; + Ctx Context; /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a /// way to convert nodes into a unique specifier. - void GetNodeProfile(FoldingSetImpl::Node *N, - FoldingSetNodeID &ID) const override { + void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const override { T *TN = static_cast(N); ContextualFoldingSetTrait::Profile(*TN, ID, Context); } - bool NodeEquals(FoldingSetImpl::Node *N, const FoldingSetNodeID &ID, - unsigned IDHash, FoldingSetNodeID &TempID) const override { + bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash, + FoldingSetNodeID &TempID) const override { T *TN = static_cast(N); return ContextualFoldingSetTrait::Equals(*TN, ID, IDHash, TempID, Context); } - unsigned ComputeNodeHash(FoldingSetImpl::Node *N, - FoldingSetNodeID &TempID) const override { + unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const override { T *TN = static_cast(N); return ContextualFoldingSetTrait::ComputeHash(*TN, TempID, Context); } public: explicit ContextualFoldingSet(Ctx Context, unsigned Log2InitSize = 6) - : FoldingSetImpl(Log2InitSize), Context(Context) + : Super(Log2InitSize), Context(Context) {} Ctx getContext() const { return Context; } - - typedef FoldingSetIterator iterator; - iterator begin() { return iterator(Buckets); } - iterator end() { return iterator(Buckets+NumBuckets); } - - typedef FoldingSetIterator const_iterator; - const_iterator begin() const { return const_iterator(Buckets); } - const_iterator end() const { return const_iterator(Buckets+NumBuckets); } - - typedef FoldingSetBucketIterator bucket_iterator; - - bucket_iterator bucket_begin(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1))); - } - - bucket_iterator bucket_end(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true); - } - - /// GetOrInsertNode - If there is an existing simple Node exactly - /// equal to the specified node, return it. Otherwise, insert 'N' - /// and return it instead. - T *GetOrInsertNode(Node *N) { - return static_cast(FoldingSetImpl::GetOrInsertNode(N)); - } - - /// FindNodeOrInsertPos - Look up the node specified by ID. If it - /// exists, return it. If not, return the insertion token that will - /// make insertion faster. - T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - return static_cast(FoldingSetImpl::FindNodeOrInsertPos(ID, InsertPos)); - } }; //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/include/llvm/ADT/GraphTraits.h b/interpreter/llvm/src/include/llvm/ADT/GraphTraits.h index 2c88c4271b489..225d9eb847f00 100644 --- a/interpreter/llvm/src/include/llvm/ADT/GraphTraits.h +++ b/interpreter/llvm/src/include/llvm/ADT/GraphTraits.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/GraphTraits.h - Graph traits template ----------*- C++ -*-===// +//===- llvm/ADT/GraphTraits.h - Graph traits template -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -41,7 +41,6 @@ struct GraphTraits { // static ChildIteratorType child_end (NodeRef) // Return iterators that point to the beginning and ending of the child // node list for the specified node. - // // typedef ...iterator nodes_iterator; - dereference to a NodeRef // static nodes_iterator nodes_begin(GraphType *G) @@ -50,8 +49,6 @@ struct GraphTraits { // static unsigned size (GraphType *G) // Return total number of nodes in the graph - // - // If anyone tries to use this class without having an appropriate // specialization, make an error. If you get this error, it's because you @@ -59,11 +56,9 @@ struct GraphTraits { // graph, or you need to define it for a new graph type. Either that or // your argument to XXX_begin(...) is unknown or needs to have the proper .h // file #include'd. - // - typedef typename GraphType::UnknownGraphTypeError NodeRef; + using NodeRef = typename GraphType::UnknownGraphTypeError; }; - // Inverse - This class is used as a little marker class to tell the graph // iterator to iterate over the graph in a graph defined "Inverse" ordering. // Not all graphs define an inverse ordering, and if they do, it depends on @@ -74,7 +69,7 @@ struct GraphTraits { // for (; I != E; ++I) { ... } // // Which is equivalent to: -// df_iterator > I = idf_begin(M), E = idf_end(M); +// df_iterator> I = idf_begin(M), E = idf_end(M); // for (; I != E; ++I) { ... } // template @@ -115,6 +110,7 @@ inverse_children(const typename GraphTraits::NodeRef &G) { return make_range(GraphTraits>::child_begin(G), GraphTraits>::child_end(G)); } -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_ADT_GRAPHTRAITS_H diff --git a/interpreter/llvm/src/include/llvm/ADT/ImmutableList.h b/interpreter/llvm/src/include/llvm/ADT/ImmutableList.h index e5f51bafe995d..60d63e09d4268 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ImmutableList.h +++ b/interpreter/llvm/src/include/llvm/ADT/ImmutableList.h @@ -63,8 +63,8 @@ class ImmutableListImpl : public FoldingSetNode { template class ImmutableList { public: - typedef T value_type; - typedef ImmutableListFactory Factory; + using value_type = T; + using Factory = ImmutableListFactory; private: const ImmutableListImpl* X; @@ -141,8 +141,8 @@ class ImmutableList { template class ImmutableListFactory { - typedef ImmutableListImpl ListTy; - typedef FoldingSet CacheTy; + using ListTy = ImmutableListImpl; + using CacheTy = FoldingSet; CacheTy Cache; uintptr_t Allocator; diff --git a/interpreter/llvm/src/include/llvm/ADT/ImmutableMap.h b/interpreter/llvm/src/include/llvm/ADT/ImmutableMap.h index f197d407ba3bc..10d1e1f0139ba 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ImmutableMap.h +++ b/interpreter/llvm/src/include/llvm/ADT/ImmutableMap.h @@ -26,12 +26,12 @@ namespace llvm { /// only the first element (the key) is used by isEqual and isLess. template struct ImutKeyValueInfo { - typedef const std::pair value_type; - typedef const value_type& value_type_ref; - typedef const T key_type; - typedef const T& key_type_ref; - typedef const S data_type; - typedef const S& data_type_ref; + using value_type = const std::pair; + using value_type_ref = const value_type&; + using key_type = const T; + using key_type_ref = const T&; + using data_type = const S; + using data_type_ref = const S&; static inline key_type_ref KeyOfValue(value_type_ref V) { return V.first; @@ -62,13 +62,13 @@ template > class ImmutableMap { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef typename ValInfo::key_type key_type; - typedef typename ValInfo::key_type_ref key_type_ref; - typedef typename ValInfo::data_type data_type; - typedef typename ValInfo::data_type_ref data_type_ref; - typedef ImutAVLTree TreeTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using key_type = typename ValInfo::key_type; + using key_type_ref = typename ValInfo::key_type_ref; + using data_type = typename ValInfo::data_type; + using data_type_ref = typename ValInfo::data_type_ref; + using TreeTy = ImutAVLTree; protected: TreeTy* Root; @@ -86,6 +86,10 @@ class ImmutableMap { if (Root) { Root->retain(); } } + ~ImmutableMap() { + if (Root) { Root->release(); } + } + ImmutableMap &operator=(const ImmutableMap &X) { if (Root != X.Root) { if (X.Root) { X.Root->retain(); } @@ -95,10 +99,6 @@ class ImmutableMap { return *this; } - ~ImmutableMap() { - if (Root) { Root->release(); } - } - class Factory { typename TreeTy::Factory F; const bool Canonicalize; @@ -166,12 +166,14 @@ class ImmutableMap { template struct CBWrapper { Callback C; + void operator()(value_type_ref V) { C(V.first,V.second); } }; template struct CBWrapperRef { Callback &C; + CBWrapperRef(Callback& c) : C(c) {} void operator()(value_type_ref V) { C(V.first,V.second); } @@ -254,14 +256,14 @@ template > class ImmutableMapRef { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef typename ValInfo::key_type key_type; - typedef typename ValInfo::key_type_ref key_type_ref; - typedef typename ValInfo::data_type data_type; - typedef typename ValInfo::data_type_ref data_type_ref; - typedef ImutAVLTree TreeTy; - typedef typename TreeTy::Factory FactoryTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using key_type = typename ValInfo::key_type; + using key_type_ref = typename ValInfo::key_type_ref; + using data_type = typename ValInfo::data_type; + using data_type_ref = typename ValInfo::data_type_ref; + using TreeTy = ImutAVLTree; + using FactoryTy = typename TreeTy::Factory; protected: TreeTy *Root; @@ -292,6 +294,11 @@ class ImmutableMapRef { } } + ~ImmutableMapRef() { + if (Root) + Root->release(); + } + ImmutableMapRef &operator=(const ImmutableMapRef &X) { if (Root != X.Root) { if (X.Root) @@ -306,11 +313,6 @@ class ImmutableMapRef { return *this; } - ~ImmutableMapRef() { - if (Root) - Root->release(); - } - static inline ImmutableMapRef getEmptyMap(FactoryTy *F) { return ImmutableMapRef(0, F); } diff --git a/interpreter/llvm/src/include/llvm/ADT/ImmutableSet.h b/interpreter/llvm/src/include/llvm/ADT/ImmutableSet.h index 0724a28306a03..9d580c5a3d416 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ImmutableSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/ImmutableSet.h @@ -16,16 +16,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" #include -#include -#include #include +#include #include #include +#include namespace llvm { @@ -41,18 +41,16 @@ template class ImutAVLTreeGenericIterator; template class ImutAVLTree { public: - typedef typename ImutInfo::key_type_ref key_type_ref; - typedef typename ImutInfo::value_type value_type; - typedef typename ImutInfo::value_type_ref value_type_ref; + using key_type_ref = typename ImutInfo::key_type_ref; + using value_type = typename ImutInfo::value_type; + using value_type_ref = typename ImutInfo::value_type_ref; + using Factory = ImutAVLFactory; + using iterator = ImutAVLTreeInOrderIterator; - typedef ImutAVLFactory Factory; friend class ImutAVLFactory; friend class ImutIntervalAVLFactory; - friend class ImutAVLTreeGenericIterator; - typedef ImutAVLTreeInOrderIterator iterator; - //===----------------------------------------------------===// // Public Interface. //===----------------------------------------------------===// @@ -225,17 +223,17 @@ class ImutAVLTree { Factory *factory; ImutAVLTree *left; ImutAVLTree *right; - ImutAVLTree *prev; - ImutAVLTree *next; + ImutAVLTree *prev = nullptr; + ImutAVLTree *next = nullptr; - unsigned height : 28; - unsigned IsMutable : 1; - unsigned IsDigestCached : 1; - unsigned IsCanonicalized : 1; + unsigned height : 28; + bool IsMutable : 1; + bool IsDigestCached : 1; + bool IsCanonicalized : 1; value_type value; - uint32_t digest; - uint32_t refCount; + uint32_t digest = 0; + uint32_t refCount = 0; //===----------------------------------------------------===// // Internal methods (node manipulation; used by Factory). @@ -246,9 +244,8 @@ class ImutAVLTree { /// ImutAVLFactory. ImutAVLTree(Factory *f, ImutAVLTree* l, ImutAVLTree* r, value_type_ref v, unsigned height) - : factory(f), left(l), right(r), prev(nullptr), next(nullptr), - height(height), IsMutable(true), IsDigestCached(false), - IsCanonicalized(0), value(v), digest(0), refCount(0) + : factory(f), left(l), right(r), height(height), IsMutable(true), + IsDigestCached(false), IsCanonicalized(false), value(v) { if (left) left->retain(); if (right) right->retain(); @@ -369,11 +366,11 @@ class ImutAVLTree { template class ImutAVLFactory { friend class ImutAVLTree; - typedef ImutAVLTree TreeTy; - typedef typename TreeTy::value_type_ref value_type_ref; - typedef typename TreeTy::key_type_ref key_type_ref; - typedef DenseMap CacheTy; + using TreeTy = ImutAVLTree; + using value_type_ref = typename TreeTy::value_type_ref; + using key_type_ref = typename TreeTy::key_type_ref; + using CacheTy = DenseMap; CacheTy Cache; uintptr_t Allocator; @@ -659,7 +656,7 @@ class ImutAVLTreeGenericIterator enum VisitFlag { VisitedNone=0x0, VisitedLeft=0x1, VisitedRight=0x3, Flags=0x3 }; - typedef ImutAVLTree TreeTy; + using TreeTy = ImutAVLTree; ImutAVLTreeGenericIterator() = default; ImutAVLTreeGenericIterator(const TreeTy *Root) { @@ -764,11 +761,12 @@ template class ImutAVLTreeInOrderIterator : public std::iterator> { - typedef ImutAVLTreeGenericIterator InternalIteratorTy; + using InternalIteratorTy = ImutAVLTreeGenericIterator; + InternalIteratorTy InternalItr; public: - typedef ImutAVLTree TreeTy; + using TreeTy = ImutAVLTree; ImutAVLTreeInOrderIterator(const TreeTy* Root) : InternalItr(Root) { if (Root) @@ -840,8 +838,8 @@ struct ImutAVLValueIterator /// and generic handling of pointers is done below. template struct ImutProfileInfo { - typedef const T value_type; - typedef const T& value_type_ref; + using value_type = const T; + using value_type_ref = const T&; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { FoldingSetTrait::Profile(X,ID); @@ -851,8 +849,8 @@ struct ImutProfileInfo { /// Profile traits for integers. template struct ImutProfileInteger { - typedef const T value_type; - typedef const T& value_type_ref; + using value_type = const T; + using value_type_ref = const T&; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { ID.AddInteger(X); @@ -878,8 +876,8 @@ PROFILE_INTEGER_INFO(unsigned long long) /// Profile traits for booleans. template <> struct ImutProfileInfo { - typedef const bool value_type; - typedef const bool& value_type_ref; + using value_type = const bool; + using value_type_ref = const bool&; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { ID.AddBoolean(X); @@ -890,8 +888,8 @@ struct ImutProfileInfo { /// references to unique objects. template struct ImutProfileInfo { - typedef const T* value_type; - typedef value_type value_type_ref; + using value_type = const T*; + using value_type_ref = value_type; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { ID.AddPointer(X); @@ -910,12 +908,12 @@ struct ImutProfileInfo { /// std::equal_to<> and std::less<> to perform comparison of elements. template struct ImutContainerInfo : public ImutProfileInfo { - typedef typename ImutProfileInfo::value_type value_type; - typedef typename ImutProfileInfo::value_type_ref value_type_ref; - typedef value_type key_type; - typedef value_type_ref key_type_ref; - typedef bool data_type; - typedef bool data_type_ref; + using value_type = typename ImutProfileInfo::value_type; + using value_type_ref = typename ImutProfileInfo::value_type_ref; + using key_type = value_type; + using key_type_ref = value_type_ref; + using data_type = bool; + using data_type_ref = bool; static key_type_ref KeyOfValue(value_type_ref D) { return D; } static data_type_ref DataOfValue(value_type_ref) { return true; } @@ -936,12 +934,12 @@ struct ImutContainerInfo : public ImutProfileInfo { /// their addresses. template struct ImutContainerInfo : public ImutProfileInfo { - typedef typename ImutProfileInfo::value_type value_type; - typedef typename ImutProfileInfo::value_type_ref value_type_ref; - typedef value_type key_type; - typedef value_type_ref key_type_ref; - typedef bool data_type; - typedef bool data_type_ref; + using value_type = typename ImutProfileInfo::value_type; + using value_type_ref = typename ImutProfileInfo::value_type_ref; + using key_type = value_type; + using key_type_ref = value_type_ref; + using data_type = bool; + using data_type_ref = bool; static key_type_ref KeyOfValue(value_type_ref D) { return D; } static data_type_ref DataOfValue(value_type_ref) { return true; } @@ -960,9 +958,9 @@ struct ImutContainerInfo : public ImutProfileInfo { template > class ImmutableSet { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef ImutAVLTree TreeTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using TreeTy = ImutAVLTree; private: TreeTy *Root; @@ -980,6 +978,10 @@ class ImmutableSet { if (Root) { Root->retain(); } } + ~ImmutableSet() { + if (Root) { Root->release(); } + } + ImmutableSet &operator=(const ImmutableSet &X) { if (Root != X.Root) { if (X.Root) { X.Root->retain(); } @@ -989,10 +991,6 @@ class ImmutableSet { return *this; } - ~ImmutableSet() { - if (Root) { Root->release(); } - } - class Factory { typename TreeTy::Factory F; const bool Canonicalize; @@ -1084,7 +1082,7 @@ class ImmutableSet { // Iterators. //===--------------------------------------------------===// - typedef ImutAVLValueIterator iterator; + using iterator = ImutAVLValueIterator; iterator begin() const { return iterator(Root); } iterator end() const { return iterator(); } @@ -1112,10 +1110,10 @@ class ImmutableSet { template > class ImmutableSetRef { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef ImutAVLTree TreeTy; - typedef typename TreeTy::Factory FactoryTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using TreeTy = ImutAVLTree; + using FactoryTy = typename TreeTy::Factory; private: TreeTy *Root; @@ -1138,6 +1136,10 @@ class ImmutableSetRef { if (Root) { Root->retain(); } } + ~ImmutableSetRef() { + if (Root) { Root->release(); } + } + ImmutableSetRef &operator=(const ImmutableSetRef &X) { if (Root != X.Root) { if (X.Root) { X.Root->retain(); } @@ -1147,9 +1149,6 @@ class ImmutableSetRef { } return *this; } - ~ImmutableSetRef() { - if (Root) { Root->release(); } - } static ImmutableSetRef getEmptySet(FactoryTy *F) { return ImmutableSetRef(0, F); @@ -1196,7 +1195,7 @@ class ImmutableSetRef { // Iterators. //===--------------------------------------------------===// - typedef ImutAVLValueIterator iterator; + using iterator = ImutAVLValueIterator; iterator begin() const { return iterator(Root); } iterator end() const { return iterator(); } diff --git a/interpreter/llvm/src/include/llvm/ADT/IndexedMap.h b/interpreter/llvm/src/include/llvm/ADT/IndexedMap.h index 5ba85c0279209..2ee80d2cde63a 100644 --- a/interpreter/llvm/src/include/llvm/ADT/IndexedMap.h +++ b/interpreter/llvm/src/include/llvm/ADT/IndexedMap.h @@ -20,28 +20,28 @@ #ifndef LLVM_ADT_INDEXEDMAP_H #define LLVM_ADT_INDEXEDMAP_H -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include -#include namespace llvm { -template > +template > class IndexedMap { - typedef typename ToIndexT::argument_type IndexT; + using IndexT = typename ToIndexT::argument_type; // Prefer SmallVector with zero inline storage over std::vector. IndexedMaps // can grow very large and SmallVector grows more efficiently as long as T // is trivially copyable. - typedef SmallVector StorageT; + using StorageT = SmallVector; + StorageT storage_; T nullVal_; ToIndexT toIndex_; public: - IndexedMap() : nullVal_(T()) { } + IndexedMap() : nullVal_(T()) {} - explicit IndexedMap(const T& val) : nullVal_(val) { } + explicit IndexedMap(const T& val) : nullVal_(val) {} typename StorageT::reference operator[](IndexT n) { assert(toIndex_(n) < storage_.size() && "index out of bounds!"); @@ -80,6 +80,6 @@ template > } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_INDEXEDMAP_H diff --git a/interpreter/llvm/src/include/llvm/ADT/IntervalMap.h b/interpreter/llvm/src/include/llvm/ADT/IntervalMap.h index 430b9671bd1d7..f71366811218b 100644 --- a/interpreter/llvm/src/include/llvm/ADT/IntervalMap.h +++ b/interpreter/llvm/src/include/llvm/ADT/IntervalMap.h @@ -106,6 +106,7 @@ #include "llvm/Support/RecyclingAllocator.h" #include #include +#include #include #include #include @@ -186,7 +187,7 @@ struct IntervalMapHalfOpenInfo { /// It should be considered private to the implementation. namespace IntervalMapImpl { -typedef std::pair IdxPair; +using IdxPair = std::pair; //===----------------------------------------------------------------------===// //--- IntervalMapImpl::NodeBase ---// @@ -445,7 +446,7 @@ struct NodeSizer { LeafSize = DesiredLeafSize > MinLeafSize ? DesiredLeafSize : MinLeafSize }; - typedef NodeBase, ValT, LeafSize> LeafBase; + using LeafBase = NodeBase, ValT, LeafSize>; enum { // Now that we have the leaf branching factor, compute the actual allocation @@ -461,8 +462,8 @@ struct NodeSizer { /// This typedef is very likely to be identical for all IntervalMaps with /// reasonably sized entries, so the same allocator can be shared among /// different kinds of maps. - typedef RecyclingAllocator Allocator; + using Allocator = + RecyclingAllocator; }; //===----------------------------------------------------------------------===// @@ -930,12 +931,12 @@ template ::LeafSize, typename Traits = IntervalMapInfo> class IntervalMap { - typedef IntervalMapImpl::NodeSizer Sizer; - typedef IntervalMapImpl::LeafNode Leaf; - typedef IntervalMapImpl::BranchNode - Branch; - typedef IntervalMapImpl::LeafNode RootLeaf; - typedef IntervalMapImpl::IdxPair IdxPair; + using Sizer = IntervalMapImpl::NodeSizer; + using Leaf = IntervalMapImpl::LeafNode; + using Branch = + IntervalMapImpl::BranchNode; + using RootLeaf = IntervalMapImpl::LeafNode; + using IdxPair = IntervalMapImpl::IdxPair; // The RootLeaf capacity is given as a template parameter. We must compute the // corresponding RootBranch capacity. @@ -945,8 +946,8 @@ class IntervalMap { RootBranchCap = DesiredRootBranchCap ? DesiredRootBranchCap : 1 }; - typedef IntervalMapImpl::BranchNode - RootBranch; + using RootBranch = + IntervalMapImpl::BranchNode; // When branched, we store a global start key as well as the branch node. struct RootBranchData { @@ -955,10 +956,10 @@ class IntervalMap { }; public: - typedef typename Sizer::Allocator Allocator; - typedef KeyT KeyType; - typedef ValT ValueType; - typedef Traits KeyTraits; + using Allocator = typename Sizer::Allocator; + using KeyType = KeyT; + using ValueType = ValT; + using KeyTraits = Traits; private: // The root data is either a RootLeaf or a RootBranchData instance. @@ -1290,7 +1291,7 @@ class IntervalMap::const_iterator : friend class IntervalMap; // The map referred to. - IntervalMap *map; + IntervalMap *map = nullptr; // We store a full path from the root to the current position. // The path may be partially filled, but never between iterator calls. @@ -1338,7 +1339,7 @@ class IntervalMap::const_iterator : public: /// const_iterator - Create an iterator that isn't pointing anywhere. - const_iterator() : map(nullptr) {} + const_iterator() = default; /// setMap - Change the map iterated over. This call must be followed by a /// call to goToBegin(), goToEnd(), or find() @@ -1509,7 +1510,8 @@ const_iterator::treeAdvanceTo(KeyT x) { template class IntervalMap::iterator : public const_iterator { friend class IntervalMap; - typedef IntervalMapImpl::IdxPair IdxPair; + + using IdxPair = IntervalMapImpl::IdxPair; explicit iterator(IntervalMap &map) : const_iterator(map) {} @@ -2003,7 +2005,7 @@ iterator::overflow(unsigned Level) { // Elements have been rearranged, now update node sizes and stops. bool SplitRoot = false; unsigned Pos = 0; - for (;;) { + while (true) { KeyT Stop = Node[Pos]->stop(NewSize[Pos]-1); if (NewNode && Pos == NewNode) { SplitRoot = insertNode(Level, NodeRef(Node[Pos], NewSize[Pos]), Stop); @@ -2045,8 +2047,9 @@ iterator::overflow(unsigned Level) { /// template class IntervalMapOverlaps { - typedef typename MapA::KeyType KeyType; - typedef typename MapA::KeyTraits Traits; + using KeyType = typename MapA::KeyType; + using Traits = typename MapA::KeyTraits; + typename MapA::const_iterator posA; typename MapB::const_iterator posB; @@ -2071,7 +2074,7 @@ class IntervalMapOverlaps { // Already overlapping. return; - for (;;) { + while (true) { // Make a.end > b.start. posA.advanceTo(posB.start()); if (!posA.valid() || !Traits::stopLess(posB.stop(), posA.start())) diff --git a/interpreter/llvm/src/include/llvm/ADT/IntrusiveRefCntPtr.h b/interpreter/llvm/src/include/llvm/ADT/IntrusiveRefCntPtr.h index a77cf04ea4d1d..430ef86afbd95 100644 --- a/interpreter/llvm/src/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/interpreter/llvm/src/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -1,4 +1,4 @@ -//== llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer ---*- C++ -*-==// +//==- llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -73,9 +73,10 @@ template class RefCountedBase { public: RefCountedBase() = default; - RefCountedBase(const RefCountedBase &) : RefCount(0) {} + RefCountedBase(const RefCountedBase &) {} void Retain() const { ++RefCount; } + void Release() const { assert(RefCount > 0 && "Reference count is already zero."); if (--RefCount == 0) @@ -136,7 +137,7 @@ template class IntrusiveRefCntPtr { T *Obj = nullptr; public: - typedef T element_type; + using element_type = T; explicit IntrusiveRefCntPtr() = default; IntrusiveRefCntPtr(T *obj) : Obj(obj) { retain(); } @@ -153,13 +154,13 @@ template class IntrusiveRefCntPtr { retain(); } + ~IntrusiveRefCntPtr() { release(); } + IntrusiveRefCntPtr &operator=(IntrusiveRefCntPtr S) { swap(S); return *this; } - ~IntrusiveRefCntPtr() { release(); } - T &operator*() const { return *Obj; } T *operator->() const { return Obj; } T *get() const { return Obj; } @@ -183,6 +184,7 @@ template class IntrusiveRefCntPtr { if (Obj) IntrusiveRefCntPtrInfo::retain(Obj); } + void release() { if (Obj) IntrusiveRefCntPtrInfo::release(Obj); @@ -248,14 +250,16 @@ bool operator!=(const IntrusiveRefCntPtr &A, std::nullptr_t B) { template struct simplify_type; template struct simplify_type> { - typedef T *SimpleType; + using SimpleType = T *; + static SimpleType getSimplifiedValue(IntrusiveRefCntPtr &Val) { return Val.get(); } }; template struct simplify_type> { - typedef /*const*/ T *SimpleType; + using SimpleType = /*const*/ T *; + static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr &Val) { return Val.get(); } diff --git a/interpreter/llvm/src/include/llvm/ADT/MapVector.h b/interpreter/llvm/src/include/llvm/ADT/MapVector.h index ac1885758cb9c..26a555ee1d3bd 100644 --- a/interpreter/llvm/src/include/llvm/ADT/MapVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/MapVector.h @@ -19,6 +19,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include +#include +#include +#include +#include +#include #include namespace llvm { @@ -27,20 +33,20 @@ namespace llvm { /// in a deterministic order. The values are kept in a std::vector and the /// mapping is done with DenseMap from Keys to indexes in that vector. template, - typename VectorType = std::vector > > + typename MapType = DenseMap, + typename VectorType = std::vector>> class MapVector { - typedef typename VectorType::value_type value_type; - typedef typename VectorType::size_type size_type; + using value_type = typename VectorType::value_type; + using size_type = typename VectorType::size_type; MapType Map; VectorType Vector; public: - typedef typename VectorType::iterator iterator; - typedef typename VectorType::const_iterator const_iterator; - typedef typename VectorType::reverse_iterator reverse_iterator; - typedef typename VectorType::const_reverse_iterator const_reverse_iterator; + using iterator = typename VectorType::iterator; + using const_iterator = typename VectorType::const_iterator; + using reverse_iterator = typename VectorType::reverse_iterator; + using const_reverse_iterator = typename VectorType::const_reverse_iterator; /// Clear the MapVector and return the underlying vector. VectorType takeVector() { @@ -220,4 +226,4 @@ struct SmallMapVector } // end namespace llvm -#endif +#endif // LLVM_ADT_MAPVECTOR_H diff --git a/interpreter/llvm/src/include/llvm/ADT/Optional.h b/interpreter/llvm/src/include/llvm/ADT/Optional.h index 701872c9f63fc..b782d9da17ac4 100644 --- a/interpreter/llvm/src/include/llvm/ADT/Optional.h +++ b/interpreter/llvm/src/include/llvm/ADT/Optional.h @@ -1,4 +1,4 @@ -//===-- Optional.h - Simple variant for passing optional values ---*- C++ -*-=// +//===- Optional.h - Simple variant for passing optional values --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,6 +19,8 @@ #include "llvm/ADT/None.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/type_traits.h" +#include #include #include #include @@ -28,15 +30,18 @@ namespace llvm { template class Optional { AlignedCharArrayUnion storage; - bool hasVal; + bool hasVal = false; + public: - typedef T value_type; + using value_type = T; + + Optional(NoneType) {} + explicit Optional() {} - Optional(NoneType) : hasVal(false) {} - explicit Optional() : hasVal(false) {} Optional(const T &y) : hasVal(true) { new (storage.buffer) T(y); } + Optional(const Optional &O) : hasVal(O.hasVal) { if (hasVal) new (storage.buffer) T(*O); @@ -45,12 +50,18 @@ class Optional { Optional(T &&y) : hasVal(true) { new (storage.buffer) T(std::forward(y)); } + Optional(Optional &&O) : hasVal(O) { if (O) { new (storage.buffer) T(std::move(*O)); O.reset(); } } + + ~Optional() { + reset(); + } + Optional &operator=(T &&y) { if (hasVal) **this = std::move(y); @@ -60,6 +71,7 @@ class Optional { } return *this; } + Optional &operator=(Optional &&O) { if (!O) reset(); @@ -112,10 +124,6 @@ class Optional { } } - ~Optional() { - reset(); - } - const T* getPointer() const { assert(hasVal); return reinterpret_cast(storage.buffer); } T* getPointer() { assert(hasVal); return reinterpret_cast(storage.buffer); } const T& getValue() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } @@ -144,8 +152,7 @@ class Optional { #endif }; -template struct isPodLike; -template struct isPodLike > { +template struct isPodLike> { // An Optional is pod-like if T is. static const bool value = isPodLike::value; }; @@ -284,6 +291,6 @@ template bool operator>=(const T &X, const Optional &Y) { return !(X < Y); } -} // end llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_OPTIONAL_H diff --git a/interpreter/llvm/src/include/llvm/ADT/PackedVector.h b/interpreter/llvm/src/include/llvm/ADT/PackedVector.h index 8f925f1ff5cbc..95adc2926813b 100644 --- a/interpreter/llvm/src/include/llvm/ADT/PackedVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/PackedVector.h @@ -76,8 +76,8 @@ template class PackedVector : public PackedVectorBase::is_signed> { BitVectorTy Bits; - typedef PackedVectorBase::is_signed> base; + using base = PackedVectorBase::is_signed>; public: class reference { @@ -99,7 +99,7 @@ class PackedVector : public PackedVectorBase #include +#include +#include namespace llvm { @@ -29,7 +32,7 @@ namespace llvm { /// Also, the default constructed value zero initializes the integer. template class PointerEmbeddedInt { - uintptr_t Value; + uintptr_t Value = 0; // Note: This '<' is correct; using '<=' would result in some shifts // overflowing their storage types. @@ -54,15 +57,12 @@ class PointerEmbeddedInt { explicit PointerEmbeddedInt(uintptr_t Value, RawValueTag) : Value(Value) {} public: - PointerEmbeddedInt() : Value(0) {} + PointerEmbeddedInt() = default; - PointerEmbeddedInt(IntT I) { - *this = I; - } + PointerEmbeddedInt(IntT I) { *this = I; } PointerEmbeddedInt &operator=(IntT I) { - assert((std::is_signed::value ? llvm::isInt(I) - : llvm::isUInt(I)) && + assert((std::is_signed::value ? isInt(I) : isUInt(I)) && "Integer has bits outside those preserved!"); Value = static_cast(I) << Shift; return *this; @@ -81,15 +81,17 @@ class PointerEmbeddedInt { // types. template class PointerLikeTypeTraits> { - typedef PointerEmbeddedInt T; + using T = PointerEmbeddedInt; public: static inline void *getAsVoidPointer(const T &P) { return reinterpret_cast(P.Value); } + static inline T getFromVoidPointer(void *P) { return T(reinterpret_cast(P), typename T::RawValueTag()); } + static inline T getFromVoidPointer(const void *P) { return T(reinterpret_cast(P), typename T::RawValueTag()); } @@ -101,17 +103,19 @@ class PointerLikeTypeTraits> { // itself can be a key. template struct DenseMapInfo> { - typedef PointerEmbeddedInt T; - - typedef DenseMapInfo IntInfo; + using T = PointerEmbeddedInt; + using IntInfo = DenseMapInfo; static inline T getEmptyKey() { return IntInfo::getEmptyKey(); } static inline T getTombstoneKey() { return IntInfo::getTombstoneKey(); } + static unsigned getHashValue(const T &Arg) { return IntInfo::getHashValue(Arg); } + static bool isEqual(const T &LHS, const T &RHS) { return LHS == RHS; } }; -} -#endif +} // end namespace llvm + +#endif // LLVM_ADT_POINTEREMBEDDEDINT_H diff --git a/interpreter/llvm/src/include/llvm/ADT/PointerUnion.h b/interpreter/llvm/src/include/llvm/ADT/PointerUnion.h index 9eb15524c0f30..aeab641f5715a 100644 --- a/interpreter/llvm/src/include/llvm/ADT/PointerUnion.h +++ b/interpreter/llvm/src/include/llvm/ADT/PointerUnion.h @@ -19,8 +19,8 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include -#include #include +#include namespace llvm { @@ -158,7 +158,7 @@ template class PointerUnion { assert( get() == Val.getPointer() && "Can't get the address because PointerLikeTypeTraits changes the ptr"); - return (PT1 *)Val.getAddrOfPointer(); + return const_cast(reinterpret_cast(Val.getAddrOfPointer())); } /// Assignment from nullptr which just clears the union. diff --git a/interpreter/llvm/src/include/llvm/ADT/PostOrderIterator.h b/interpreter/llvm/src/include/llvm/ADT/PostOrderIterator.h index 8fc08eb252eb2..dc8a9b6e78b20 100644 --- a/interpreter/llvm/src/include/llvm/ADT/PostOrderIterator.h +++ b/interpreter/llvm/src/include/llvm/ADT/PostOrderIterator.h @@ -17,9 +17,9 @@ #define LLVM_ADT_POSTORDERITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/iterator_range.h" #include #include #include @@ -96,24 +96,14 @@ template , public po_iterator_storage { - typedef std::iterator super; - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; + using super = std::iterator; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; // VisitStack - Used to maintain the ordering. Top = current block // First element is basic block pointer, second is the 'next child' to visit std::vector> VisitStack; - void traverseChild() { - while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) { - NodeRef BB = *VisitStack.back().second++; - if (this->insertEdge(Optional(VisitStack.back().first), BB)) { - // If the block is not visited... - VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); - } - } - } - po_iterator(NodeRef BB) { this->insertEdge(Optional(), BB); VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); @@ -134,8 +124,18 @@ class po_iterator : po_iterator_storage(S) { } // End is when stack is empty. + void traverseChild() { + while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) { + NodeRef BB = *VisitStack.back().second++; + if (this->insertEdge(Optional(VisitStack.back().first), BB)) { + // If the block is not visited... + VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); + } + } + } + public: - typedef typename super::pointer pointer; + using pointer = typename super::pointer; // Provide static "constructors"... static po_iterator begin(GraphT G) { @@ -286,7 +286,8 @@ inverse_post_order_ext(const T &G, SetType &S) { template> class ReversePostOrderTraversal { - typedef typename GT::NodeRef NodeRef; + using NodeRef = typename GT::NodeRef; + std::vector Blocks; // Block list in normal PO order void Initialize(NodeRef BB) { @@ -294,7 +295,7 @@ class ReversePostOrderTraversal { } public: - typedef typename std::vector::reverse_iterator rpo_iterator; + using rpo_iterator = typename std::vector::reverse_iterator; ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); } diff --git a/interpreter/llvm/src/include/llvm/ADT/PriorityWorklist.h b/interpreter/llvm/src/include/llvm/ADT/PriorityWorklist.h index 3198dd438700d..aa531f3337d9c 100644 --- a/interpreter/llvm/src/include/llvm/ADT/PriorityWorklist.h +++ b/interpreter/llvm/src/include/llvm/ADT/PriorityWorklist.h @@ -18,12 +18,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include #include #include +#include +#include #include namespace llvm { @@ -55,11 +56,11 @@ template , typename MapT = DenseMap> class PriorityWorklist { public: - typedef T value_type; - typedef T key_type; - typedef T& reference; - typedef const T& const_reference; - typedef typename MapT::size_type size_type; + using value_type = T; + using key_type = T; + using reference = T&; + using const_reference = const T&; + using size_type = typename MapT::size_type; /// Construct an empty PriorityWorklist PriorityWorklist() = default; diff --git a/interpreter/llvm/src/include/llvm/ADT/SCCIterator.h b/interpreter/llvm/src/include/llvm/ADT/SCCIterator.h index 9a8a7b168fce2..784a58dc002f5 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SCCIterator.h +++ b/interpreter/llvm/src/include/llvm/ADT/SCCIterator.h @@ -1,4 +1,4 @@ -//===---- ADT/SCCIterator.h - Strongly Connected Comp. Iter. ----*- C++ -*-===// +//===- ADT/SCCIterator.h - Strongly Connected Comp. Iter. -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -43,10 +43,10 @@ template > class scc_iterator : public iterator_facade_base< scc_iterator, std::forward_iterator_tag, const std::vector, ptrdiff_t> { - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; - typedef std::vector SccTy; - typedef typename scc_iterator::reference reference; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; + using SccTy = std::vector; + using reference = typename scc_iterator::reference; /// Element of VisitStack during DFS. struct StackElement { @@ -232,16 +232,6 @@ template scc_iterator scc_end(const T &G) { return scc_iterator::end(G); } -/// \brief Construct the begin iterator for a deduced graph type T's Inverse. -template scc_iterator> scc_begin(const Inverse &G) { - return scc_iterator>::begin(G); -} - -/// \brief Construct the end iterator for a deduced graph type T's Inverse. -template scc_iterator> scc_end(const Inverse &G) { - return scc_iterator>::end(G); -} - } // end namespace llvm #endif // LLVM_ADT_SCCITERATOR_H diff --git a/interpreter/llvm/src/include/llvm/ADT/STLExtras.h b/interpreter/llvm/src/include/llvm/ADT/STLExtras.h index 8c28412bb6078..83f289c42a23a 100644 --- a/interpreter/llvm/src/include/llvm/ADT/STLExtras.h +++ b/interpreter/llvm/src/include/llvm/ADT/STLExtras.h @@ -100,6 +100,8 @@ class function_ref { } public: + function_ref() : callback(nullptr) {} + template function_ref(Callable &&callable, typename std::enable_if< @@ -110,6 +112,8 @@ class function_ref { Ret operator()(Params ...params) const { return callback(callable, std::forward(params)...); } + + operator bool() const { return callback; } }; // deleter - Very very very simple method that is used to invoke operator diff --git a/interpreter/llvm/src/include/llvm/ADT/ScopedHashTable.h b/interpreter/llvm/src/include/llvm/ADT/ScopedHashTable.h index d52128e294a32..22b0c1bdaf4d0 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ScopedHashTable.h +++ b/interpreter/llvm/src/include/llvm/ADT/ScopedHashTable.h @@ -109,6 +109,7 @@ class ScopedHashTableScope { ScopedHashTableVal *getLastValInScope() { return LastValInScope; } + void setLastValInScope(ScopedHashTableVal *Val) { LastValInScope = Val; } @@ -151,13 +152,14 @@ class ScopedHashTable { public: /// ScopeTy - This is a helpful typedef that allows clients to get easy access /// to the name of the scope for this hash table. - typedef ScopedHashTableScope ScopeTy; - typedef unsigned size_type; + using ScopeTy = ScopedHashTableScope; + using size_type = unsigned; private: friend class ScopedHashTableScope; - typedef ScopedHashTableVal ValTy; + using ValTy = ScopedHashTableVal; + DenseMap TopLevelMap; ScopeTy *CurScope = nullptr; @@ -165,7 +167,7 @@ class ScopedHashTable { public: ScopedHashTable() = default; - ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {} + ScopedHashTable(AllocatorTy A) : Allocator(A) {} ScopedHashTable(const ScopedHashTable &) = delete; ScopedHashTable &operator=(const ScopedHashTable &) = delete; @@ -194,7 +196,7 @@ class ScopedHashTable { insertIntoScope(CurScope, Key, Val); } - typedef ScopedHashTableIterator iterator; + using iterator = ScopedHashTableIterator; iterator end() { return iterator(0); } diff --git a/interpreter/llvm/src/include/llvm/ADT/Sequence.h b/interpreter/llvm/src/include/llvm/ADT/Sequence.h index 5d36831cc128e..3d4a897bf9a9e 100644 --- a/interpreter/llvm/src/include/llvm/ADT/Sequence.h +++ b/interpreter/llvm/src/include/llvm/ADT/Sequence.h @@ -13,27 +13,31 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_SEQ_H -#define LLVM_ADT_SEQ_H +#ifndef LLVM_ADT_SEQUENCE_H +#define LLVM_ADT_SEQUENCE_H #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include +#include +#include namespace llvm { namespace detail { + template class value_sequence_iterator : public iterator_facade_base, std::random_access_iterator_tag, const ValueT> { - typedef typename value_sequence_iterator::iterator_facade_base BaseT; + using BaseT = typename value_sequence_iterator::iterator_facade_base; ValueT Value; public: - typedef typename BaseT::difference_type difference_type; - typedef typename BaseT::reference reference; + using difference_type = typename BaseT::difference_type; + using reference = typename BaseT::reference; value_sequence_iterator() = default; value_sequence_iterator(const value_sequence_iterator &) = default; @@ -65,7 +69,8 @@ class value_sequence_iterator reference operator*() const { return Value; } }; -} // End detail namespace. + +} // end namespace detail template iterator_range> seq(ValueT Begin, @@ -74,6 +79,6 @@ iterator_range> seq(ValueT Begin, detail::value_sequence_iterator(End)); } -} +} // end namespace llvm -#endif +#endif // LLVM_ADT_SEQUENCE_H diff --git a/interpreter/llvm/src/include/llvm/ADT/SetVector.h b/interpreter/llvm/src/include/llvm/ADT/SetVector.h index 13378aa3a04ef..04ed52fc543f3 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SetVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/SetVector.h @@ -40,17 +40,17 @@ template , typename Set = DenseSet> class SetVector { public: - typedef T value_type; - typedef T key_type; - typedef T& reference; - typedef const T& const_reference; - typedef Set set_type; - typedef Vector vector_type; - typedef typename vector_type::const_iterator iterator; - typedef typename vector_type::const_iterator const_iterator; - typedef typename vector_type::const_reverse_iterator reverse_iterator; - typedef typename vector_type::const_reverse_iterator const_reverse_iterator; - typedef typename vector_type::size_type size_type; + using value_type = T; + using key_type = T; + using reference = T&; + using const_reference = const T&; + using set_type = Set; + using vector_type = Vector; + using iterator = typename vector_type::const_iterator; + using const_iterator = typename vector_type::const_iterator; + using reverse_iterator = typename vector_type::const_reverse_iterator; + using const_reverse_iterator = typename vector_type::const_reverse_iterator; + using size_type = typename vector_type::size_type; /// \brief Construct an empty SetVector SetVector() = default; diff --git a/interpreter/llvm/src/include/llvm/ADT/SmallBitVector.h b/interpreter/llvm/src/include/llvm/ADT/SmallBitVector.h index 0eeacc162543e..b6391746639b0 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SmallBitVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/SmallBitVector.h @@ -15,8 +15,15 @@ #define LLVM_ADT_SMALLBITVECTOR_H #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/MathExtras.h" +#include #include +#include +#include +#include +#include +#include namespace llvm { @@ -29,7 +36,7 @@ class SmallBitVector { // TODO: In "large" mode, a pointer to a BitVector is used, leading to an // unnecessary level of indirection. It would be more efficient to use a // pointer to memory containing size, allocation size, and the array of bits. - uintptr_t X; + uintptr_t X = 1; enum { // The number of bits in this class. @@ -54,7 +61,8 @@ class SmallBitVector { "Unsupported word size"); public: - typedef unsigned size_type; + using size_type = unsigned; + // Encapsulation of a single bit. class reference { SmallBitVector &TheVector; @@ -135,7 +143,7 @@ class SmallBitVector { public: /// Creates an empty bitvector. - SmallBitVector() : X(1) {} + SmallBitVector() = default; /// Creates a bitvector of specified number of bits. All bits are initialized /// to the specified value. @@ -163,6 +171,21 @@ class SmallBitVector { delete getPointer(); } + using const_set_bits_iterator = const_set_bits_iterator_impl; + using set_iterator = const_set_bits_iterator; + + const_set_bits_iterator set_bits_begin() const { + return const_set_bits_iterator(*this); + } + + const_set_bits_iterator set_bits_end() const { + return const_set_bits_iterator(*this, -1); + } + + iterator_range set_bits() const { + return make_range(set_bits_begin(), set_bits_end()); + } + /// Tests whether there are no bits in this bitvector. bool empty() const { return isSmall() ? getSmallSize() == 0 : getPointer()->empty(); @@ -664,14 +687,16 @@ operator^(const SmallBitVector &LHS, const SmallBitVector &RHS) { return Result; } -} // End llvm namespace +} // end namespace llvm namespace std { - /// Implement std::swap in terms of BitVector swap. - inline void - swap(llvm::SmallBitVector &LHS, llvm::SmallBitVector &RHS) { - LHS.swap(RHS); - } + +/// Implement std::swap in terms of BitVector swap. +inline void +swap(llvm::SmallBitVector &LHS, llvm::SmallBitVector &RHS) { + LHS.swap(RHS); } -#endif +} // end namespace std + +#endif // LLVM_ADT_SMALLBITVECTOR_H diff --git a/interpreter/llvm/src/include/llvm/ADT/SmallPtrSet.h b/interpreter/llvm/src/include/llvm/ADT/SmallPtrSet.h index 196ab6338047c..4e8a2490ee3c5 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SmallPtrSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/SmallPtrSet.h @@ -15,25 +15,18 @@ #ifndef LLVM_ADT_SMALLPTRSET_H #define LLVM_ADT_SMALLPTRSET_H -#include "llvm/Config/abi-breaking.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include "llvm/Support/ReverseIteration.h" #include "llvm/Support/type_traits.h" #include #include -#include #include +#include #include #include #include -#if LLVM_ENABLE_ABI_BREAKING_CHECKS -namespace llvm { -template struct ReverseIterate { static bool value; }; -template bool ReverseIterate::value = false; -} -#endif - namespace llvm { /// SmallPtrSetImplBase - This is the common code shared among all the @@ -92,7 +85,7 @@ class SmallPtrSetImplBase { } public: - typedef unsigned size_type; + using size_type = unsigned; SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete; @@ -273,14 +266,14 @@ class SmallPtrSetIteratorImpl { /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet. template class SmallPtrSetIterator : public SmallPtrSetIteratorImpl { - typedef PointerLikeTypeTraits PtrTraits; + using PtrTraits = PointerLikeTypeTraits; public: - typedef PtrTy value_type; - typedef PtrTy reference; - typedef PtrTy pointer; - typedef std::ptrdiff_t difference_type; - typedef std::forward_iterator_tag iterator_category; + using value_type = PtrTy; + using reference = PtrTy; + using pointer = PtrTy; + using difference_type = std::ptrdiff_t; + using iterator_category = std::forward_iterator_tag; explicit SmallPtrSetIterator(const void *const *BP, const void *const *E) : SmallPtrSetIteratorImpl(BP, E) {} @@ -351,8 +344,8 @@ struct RoundUpToPowerOfTwo { template class SmallPtrSetImpl : public SmallPtrSetImplBase { using ConstPtrType = typename add_const_past_pointer::type; - typedef PointerLikeTypeTraits PtrTraits; - typedef PointerLikeTypeTraits ConstPtrTraits; + using PtrTraits = PointerLikeTypeTraits; + using ConstPtrTraits = PointerLikeTypeTraits; protected: // Constructors that forward to the base. @@ -365,8 +358,10 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase { : SmallPtrSetImplBase(SmallStorage, SmallSize) {} public: - typedef SmallPtrSetIterator iterator; - typedef SmallPtrSetIterator const_iterator; + using iterator = SmallPtrSetIterator; + using const_iterator = SmallPtrSetIterator; + using key_type = ConstPtrType; + using value_type = PtrType; SmallPtrSetImpl(const SmallPtrSetImpl &) = delete; @@ -431,7 +426,7 @@ class SmallPtrSet : public SmallPtrSetImpl { // DenseSet<> instead if you expect many elements in the set. static_assert(SmallSize <= 32, "SmallSize should be small"); - typedef SmallPtrSetImpl BaseT; + using BaseT = SmallPtrSetImpl; // Make sure that SmallSize is a power of two, round up if not. enum { SmallSizePowTwo = RoundUpToPowerOfTwo::Val }; diff --git a/interpreter/llvm/src/include/llvm/ADT/SmallSet.h b/interpreter/llvm/src/include/llvm/ADT/SmallSet.h index 6dac1677b7a26..d52d0f07f9a63 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SmallSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/SmallSet.h @@ -39,8 +39,9 @@ class SmallSet { /// we will never use. SmallVector Vector; std::set Set; - typedef typename SmallVector::const_iterator VIterator; - typedef typename SmallVector::iterator mutable_iterator; + + using VIterator = typename SmallVector::const_iterator; + using mutable_iterator = typename SmallVector::iterator; // In small mode SmallPtrSet uses linear search for the elements, so it is // not a good idea to choose this value too high. You may consider using a @@ -48,7 +49,7 @@ class SmallSet { static_assert(N <= 32, "N should be small"); public: - typedef size_t size_type; + using size_type = size_t; SmallSet() = default; diff --git a/interpreter/llvm/src/include/llvm/ADT/SmallVector.h b/interpreter/llvm/src/include/llvm/ADT/SmallVector.h index b9588214023cc..bf2a62f43affc 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SmallVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/SmallVector.h @@ -71,7 +71,7 @@ class SmallVectorTemplateCommon : public SmallVectorBase { // Allocate raw space for N elements of type T. If T has a ctor or dtor, we // don't want it to be automatically run, so we need to represent the space as // something else. Use an array of char of sufficient alignment. - typedef AlignedCharArrayUnion U; + using U = AlignedCharArrayUnion; U FirstEl; // Space after 'FirstEl' is clobbered, do not add any instance vars after it. @@ -96,19 +96,19 @@ class SmallVectorTemplateCommon : public SmallVectorBase { void setEnd(T *P) { this->EndX = P; } public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T value_type; - typedef T *iterator; - typedef const T *const_iterator; + using size_type = size_t; + using difference_type = ptrdiff_t; + using value_type = T; + using iterator = T *; + using const_iterator = const T *; - typedef std::reverse_iterator const_reverse_iterator; - typedef std::reverse_iterator reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using reverse_iterator = std::reverse_iterator; - typedef T &reference; - typedef const T &const_reference; - typedef T *pointer; - typedef const T *const_pointer; + using reference = T &; + using const_reference = const T &; + using pointer = T *; + using const_pointer = const T *; // forward iterator creation methods. LLVM_ATTRIBUTE_ALWAYS_INLINE @@ -319,12 +319,12 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon { /// reduce code duplication based on the SmallVector 'N' template parameter. template class SmallVectorImpl : public SmallVectorTemplateBase::value> { - typedef SmallVectorTemplateBase::value > SuperClass; + using SuperClass = SmallVectorTemplateBase::value>; public: - typedef typename SuperClass::iterator iterator; - typedef typename SuperClass::const_iterator const_iterator; - typedef typename SuperClass::size_type size_type; + using iterator = typename SuperClass::iterator; + using const_iterator = typename SuperClass::const_iterator; + using size_type = typename SuperClass::size_type; protected: // Default ctor - Initialize to empty. @@ -388,7 +388,10 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { void swap(SmallVectorImpl &RHS); /// Add the specified range to the end of the SmallVector. - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> void append(in_iter in_start, in_iter in_end) { size_type NumInputs = std::distance(in_start, in_end); // Grow allocated space if needed. @@ -415,6 +418,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { append(IL.begin(), IL.end()); } + // FIXME: Consider assigning over existing elements, rather than clearing & + // re-initializing them - for all assign(...) variants. + void assign(size_type NumElts, const T &Elt) { clear(); if (this->capacity() < NumElts) @@ -423,6 +429,15 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { std::uninitialized_fill(this->begin(), this->end(), Elt); } + template ::iterator_category, + std::input_iterator_tag>::value>::type> + void assign(in_iter in_start, in_iter in_end) { + clear(); + append(in_start, in_end); + } + void assign(std::initializer_list IL) { clear(); append(IL); @@ -571,7 +586,10 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { return I; } - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> iterator insert(iterator I, ItTy From, ItTy To) { // Convert iterator to elt# to avoid invalidating iterator when we reserve() size_t InsertElt = I - this->begin(); @@ -845,15 +863,17 @@ class SmallVector : public SmallVectorImpl { SmallVectorStorage Storage; public: - SmallVector() : SmallVectorImpl(N) { - } + SmallVector() : SmallVectorImpl(N) {} explicit SmallVector(size_t Size, const T &Value = T()) : SmallVectorImpl(N) { this->assign(Size, Value); } - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> SmallVector(ItTy S, ItTy E) : SmallVectorImpl(N) { this->append(S, E); } @@ -883,16 +903,16 @@ class SmallVector : public SmallVectorImpl { SmallVectorImpl::operator=(::std::move(RHS)); } - const SmallVector &operator=(SmallVector &&RHS) { - SmallVectorImpl::operator=(::std::move(RHS)); - return *this; - } - SmallVector(SmallVectorImpl &&RHS) : SmallVectorImpl(N) { if (!RHS.empty()) SmallVectorImpl::operator=(::std::move(RHS)); } + const SmallVector &operator=(SmallVector &&RHS) { + SmallVectorImpl::operator=(::std::move(RHS)); + return *this; + } + const SmallVector &operator=(SmallVectorImpl &&RHS) { SmallVectorImpl::operator=(::std::move(RHS)); return *this; diff --git a/interpreter/llvm/src/include/llvm/ADT/SparseBitVector.h b/interpreter/llvm/src/include/llvm/ADT/SparseBitVector.h index a82cef6028f94..4cbf40c76805e 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SparseBitVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/SparseBitVector.h @@ -1,4 +1,4 @@ -//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector -*- C++ -*- ===// +//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -41,8 +41,8 @@ namespace llvm { template struct SparseBitVectorElement { public: - typedef unsigned long BitWord; - typedef unsigned size_type; + using BitWord = unsigned long; + using size_type = unsigned; enum { BITWORD_SIZE = sizeof(BitWord) * CHAR_BIT, BITWORDS_PER_ELEMENT = (ElementSize + BITWORD_SIZE - 1) / BITWORD_SIZE, @@ -100,7 +100,7 @@ template struct SparseBitVectorElement { Bits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE); } - bool test_and_set (unsigned Idx) { + bool test_and_set(unsigned Idx) { bool old = test(Idx); if (!old) { set(Idx); @@ -254,9 +254,9 @@ template struct SparseBitVectorElement { template class SparseBitVector { - typedef std::list> ElementList; - typedef typename ElementList::iterator ElementListIter; - typedef typename ElementList::const_iterator ElementListConstIter; + using ElementList = std::list>; + using ElementListIter = typename ElementList::iterator; + using ElementListConstIter = typename ElementList::const_iterator; enum { BITWORD_SIZE = SparseBitVectorElement::BITWORD_SIZE }; @@ -421,14 +421,12 @@ class SparseBitVector { }; public: - typedef SparseBitVectorIterator iterator; + using iterator = SparseBitVectorIterator; SparseBitVector() { CurrElementIter = Elements.begin(); } - ~SparseBitVector() = default; - // SparseBitVector copy ctor. SparseBitVector(const SparseBitVector &RHS) { ElementListConstIter ElementIter = RHS.Elements.begin(); @@ -440,6 +438,8 @@ class SparseBitVector { CurrElementIter = Elements.begin (); } + ~SparseBitVector() = default; + // Clear. void clear() { Elements.clear(); diff --git a/interpreter/llvm/src/include/llvm/ADT/SparseMultiSet.h b/interpreter/llvm/src/include/llvm/ADT/SparseMultiSet.h index 08da4b68ebaac..c91e0d70f65a3 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SparseMultiSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/SparseMultiSet.h @@ -1,4 +1,4 @@ -//===--- llvm/ADT/SparseMultiSet.h - Sparse multiset ------------*- C++ -*-===// +//===- llvm/ADT/SparseMultiSet.h - Sparse multiset --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,9 +21,9 @@ #ifndef LLVM_ADT_SPARSEMULTISET_H #define LLVM_ADT_SPARSEMULTISET_H +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/STLExtras.h" #include #include #include @@ -101,7 +101,7 @@ class SparseMultiSet { unsigned Prev; unsigned Next; - SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) { } + SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) {} /// List tails have invalid Nexts. bool isTail() const { @@ -118,8 +118,8 @@ class SparseMultiSet { bool isValid() const { return Prev != INVALID; } }; - typedef typename KeyFunctorT::argument_type KeyT; - typedef SmallVector DenseT; + using KeyT = typename KeyFunctorT::argument_type; + using DenseT = SmallVector; DenseT Dense; SparseT *Sparse = nullptr; unsigned Universe = 0; @@ -183,12 +183,12 @@ class SparseMultiSet { } public: - typedef ValueT value_type; - typedef ValueT &reference; - typedef const ValueT &const_reference; - typedef ValueT *pointer; - typedef const ValueT *const_pointer; - typedef unsigned size_type; + using value_type = ValueT; + using reference = ValueT &; + using const_reference = const ValueT &; + using pointer = ValueT *; + using const_pointer = const ValueT *; + using size_type = unsigned; SparseMultiSet() = default; SparseMultiSet(const SparseMultiSet &) = delete; @@ -227,7 +227,7 @@ class SparseMultiSet { unsigned SparseIdx; iterator_base(SMSPtrTy P, unsigned I, unsigned SI) - : SMS(P), Idx(I), SparseIdx(SI) { } + : SMS(P), Idx(I), SparseIdx(SI) {} /// Whether our iterator has fallen outside our dense vector. bool isEnd() const { @@ -248,11 +248,11 @@ class SparseMultiSet { void setNext(unsigned N) { SMS->Dense[Idx].Next = N; } public: - typedef std::iterator super; - typedef typename super::value_type value_type; - typedef typename super::difference_type difference_type; - typedef typename super::pointer pointer; - typedef typename super::reference reference; + using super = std::iterator; + using value_type = typename super::value_type; + using difference_type = typename super::difference_type; + using pointer = typename super::pointer; + using reference = typename super::reference; reference operator*() const { assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx && @@ -308,11 +308,12 @@ class SparseMultiSet { return I; } }; - typedef iterator_base iterator; - typedef iterator_base const_iterator; + + using iterator = iterator_base; + using const_iterator = iterator_base; // Convenience types - typedef std::pair RangePair; + using RangePair = std::pair; /// Returns an iterator past this container. Note that such an iterator cannot /// be decremented, but will compare equal to other end iterators. diff --git a/interpreter/llvm/src/include/llvm/ADT/SparseSet.h b/interpreter/llvm/src/include/llvm/ADT/SparseSet.h index 00c18c743219a..25ade8831922f 100644 --- a/interpreter/llvm/src/include/llvm/ADT/SparseSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/SparseSet.h @@ -1,4 +1,4 @@ -//===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===// +//===- llvm/ADT/SparseSet.h - Sparse set ------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -125,9 +125,9 @@ class SparseSet { !std::numeric_limits::is_signed, "SparseT must be an unsigned integer type"); - typedef typename KeyFunctorT::argument_type KeyT; - typedef SmallVector DenseT; - typedef unsigned size_type; + using KeyT = typename KeyFunctorT::argument_type; + using DenseT = SmallVector; + using size_type = unsigned; DenseT Dense; SparseT *Sparse = nullptr; unsigned Universe = 0; @@ -135,11 +135,11 @@ class SparseSet { SparseSetValFunctor ValIndexOf; public: - typedef ValueT value_type; - typedef ValueT &reference; - typedef const ValueT &const_reference; - typedef ValueT *pointer; - typedef const ValueT *const_pointer; + using value_type = ValueT; + using reference = ValueT &; + using const_reference = const ValueT &; + using pointer = ValueT *; + using const_pointer = const ValueT *; SparseSet() = default; SparseSet(const SparseSet &) = delete; @@ -168,8 +168,8 @@ class SparseSet { } // Import trivial vector stuff from DenseT. - typedef typename DenseT::iterator iterator; - typedef typename DenseT::const_iterator const_iterator; + using iterator = typename DenseT::iterator; + using const_iterator = typename DenseT::const_iterator; const_iterator begin() const { return Dense.begin(); } const_iterator end() const { return Dense.end(); } diff --git a/interpreter/llvm/src/include/llvm/ADT/Statistic.h b/interpreter/llvm/src/include/llvm/ADT/Statistic.h index 53fa2a50fcbaf..d5ebba409c3d3 100644 --- a/interpreter/llvm/src/include/llvm/ADT/Statistic.h +++ b/interpreter/llvm/src/include/llvm/ADT/Statistic.h @@ -101,6 +101,16 @@ class Statistic { return init(); } + void updateMax(unsigned V) { + unsigned PrevMax = Value.load(std::memory_order_relaxed); + // Keep trying to update max until we succeed or another thread produces + // a bigger max than us. + while (V > PrevMax && !Value.compare_exchange_weak( + PrevMax, V, std::memory_order_relaxed)) { + } + init(); + } + #else // Statistics are disabled in release builds. const Statistic &operator=(unsigned Val) { @@ -131,6 +141,8 @@ class Statistic { return *this; } + void updateMax(unsigned V) {} + #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) protected: diff --git a/interpreter/llvm/src/include/llvm/ADT/StringExtras.h b/interpreter/llvm/src/include/llvm/ADT/StringExtras.h index 26f11924b7718..cc32bf43f29c8 100644 --- a/interpreter/llvm/src/include/llvm/ADT/StringExtras.h +++ b/interpreter/llvm/src/include/llvm/ADT/StringExtras.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/StringExtras.h - Useful string functions -------*- C++ -*-===// +//===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,13 +14,22 @@ #ifndef LLVM_ADT_STRINGEXTRAS_H #define LLVM_ADT_STRINGEXTRAS_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataTypes.h" +#include +#include +#include +#include +#include #include +#include +#include namespace llvm { -class raw_ostream; + template class SmallVectorImpl; +class raw_ostream; /// hexdigit - Return the hexadecimal character for the /// given number \p X (which should be less than 16). @@ -34,6 +43,11 @@ static inline StringRef toStringRef(bool B) { return StringRef(B ? "true" : "false"); } +/// Construct a string ref from an array ref of unsigned chars. +static inline StringRef toStringRef(ArrayRef Input) { + return StringRef(reinterpret_cast(Input.begin()), Input.size()); +} + /// Interpret the given character \p C as a hexadecimal digit and return its /// value. /// @@ -62,7 +76,7 @@ static inline std::string utohexstr(uint64_t X, bool LowerCase = false) { /// Convert buffer \p Input to its hexadecimal representation. /// The returned string is double the size of \p Input. -static inline std::string toHex(StringRef Input) { +inline std::string toHex(StringRef Input) { static const char *const LUT = "0123456789ABCDEF"; size_t Length = Input.size(); @@ -76,6 +90,10 @@ static inline std::string toHex(StringRef Input) { return Output; } +inline std::string toHex(ArrayRef Input) { + return toHex(toStringRef(Input)); +} + static inline uint8_t hexFromNibbles(char MSB, char LSB) { unsigned U1 = hexDigitValue(MSB); unsigned U2 = hexDigitValue(LSB); @@ -106,6 +124,39 @@ static inline std::string fromHex(StringRef Input) { return Output; } +/// \brief Convert the string \p S to an integer of the specified type using +/// the radix \p Base. If \p Base is 0, auto-detects the radix. +/// Returns true if the number was successfully converted, false otherwise. +template bool to_integer(StringRef S, N &Num, unsigned Base = 0) { + return !S.getAsInteger(Base, Num); +} + +namespace detail { +template +inline bool to_float(const Twine &T, N &Num, N (*StrTo)(const char *, char **)) { + SmallString<32> Storage; + StringRef S = T.toNullTerminatedStringRef(Storage); + char *End; + N Temp = StrTo(S.data(), &End); + if (*End != '\0') + return false; + Num = Temp; + return true; +} +} + +inline bool to_float(const Twine &T, float &Num) { + return detail::to_float(T, Num, strtof); +} + +inline bool to_float(const Twine &T, double &Num) { + return detail::to_float(T, Num, strtod); +} + +inline bool to_float(const Twine &T, long double &Num) { + return detail::to_float(T, Num, strtold); +} + static inline std::string utostr(uint64_t X, bool isNeg = false) { char Buffer[21]; char *BufPtr = std::end(Buffer); @@ -121,7 +172,6 @@ static inline std::string utostr(uint64_t X, bool isNeg = false) { return std::string(BufPtr, std::end(Buffer)); } - static inline std::string itostr(int64_t X) { if (X < 0) return utostr(static_cast(-X), true); @@ -254,13 +304,14 @@ template inline size_t join_items_size(const A1 &A, Args &&... Items) { return join_one_item_size(A) + join_items_size(std::forward(Items)...); } -} + +} // end namespace detail /// Joins the strings in the range [Begin, End), adding Separator between /// the elements. template inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) { - typedef typename std::iterator_traits::iterator_category tag; + using tag = typename std::iterator_traits::iterator_category; return detail::join_impl(Begin, End, Separator, tag()); } @@ -288,6 +339,6 @@ inline std::string join_items(Sep Separator, Args &&... Items) { return Result; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_STRINGEXTRAS_H diff --git a/interpreter/llvm/src/include/llvm/ADT/StringMap.h b/interpreter/llvm/src/include/llvm/ADT/StringMap.h index c36fda7d69065..d573148665a1a 100644 --- a/interpreter/llvm/src/include/llvm/ADT/StringMap.h +++ b/interpreter/llvm/src/include/llvm/ADT/StringMap.h @@ -1,4 +1,4 @@ -//===--- StringMap.h - String Hash table map interface ----------*- C++ -*-===// +//===- StringMap.h - String Hash table map interface ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,25 +16,23 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include #include #include #include #include #include -#include +#include #include namespace llvm { - template - class StringMapConstIterator; - template - class StringMapIterator; - template class StringMapKeyIterator; - template - class StringMapEntry; +template class StringMapConstIterator; +template class StringMapIterator; +template class StringMapKeyIterator; /// StringMapEntryBase - Shared base class of StringMapEntry instances. class StringMapEntryBase { @@ -53,17 +51,15 @@ class StringMapImpl { // Array of NumBuckets pointers to entries, null pointers are holes. // TheTable[NumBuckets] contains a sentinel value for easy iteration. Followed // by an array of the actual hash values as unsigned integers. - StringMapEntryBase **TheTable; - unsigned NumBuckets; - unsigned NumItems; - unsigned NumTombstones; + StringMapEntryBase **TheTable = nullptr; + unsigned NumBuckets = 0; + unsigned NumItems = 0; + unsigned NumTombstones = 0; unsigned ItemSize; protected: explicit StringMapImpl(unsigned itemSize) - : TheTable(nullptr), - // Initialize the map with zero buckets to allocation. - NumBuckets(0), NumItems(0), NumTombstones(0), ItemSize(itemSize) {} + : ItemSize(itemSize) {} StringMapImpl(StringMapImpl &&RHS) : TheTable(RHS.TheTable), NumBuckets(RHS.NumBuckets), NumItems(RHS.NumItems), NumTombstones(RHS.NumTombstones), @@ -225,9 +221,10 @@ class StringMap : public StringMapImpl { AllocatorTy Allocator; public: - typedef StringMapEntry MapEntryTy; + using MapEntryTy = StringMapEntry; StringMap() : StringMapImpl(static_cast(sizeof(MapEntryTy))) {} + explicit StringMap(unsigned InitialSize) : StringMapImpl(InitialSize, static_cast(sizeof(MapEntryTy))) {} @@ -248,12 +245,6 @@ class StringMap : public StringMapImpl { StringMap(StringMap &&RHS) : StringMapImpl(std::move(RHS)), Allocator(std::move(RHS.Allocator)) {} - StringMap &operator=(StringMap RHS) { - StringMapImpl::swap(RHS); - std::swap(Allocator, RHS.Allocator); - return *this; - } - StringMap(const StringMap &RHS) : StringMapImpl(static_cast(sizeof(MapEntryTy))), Allocator(RHS.Allocator) { @@ -289,16 +280,37 @@ class StringMap : public StringMapImpl { // not worthwhile. } + StringMap &operator=(StringMap RHS) { + StringMapImpl::swap(RHS); + std::swap(Allocator, RHS.Allocator); + return *this; + } + + ~StringMap() { + // Delete all the elements in the map, but don't reset the elements + // to default values. This is a copy of clear(), but avoids unnecessary + // work not required in the destructor. + if (!empty()) { + for (unsigned I = 0, E = NumBuckets; I != E; ++I) { + StringMapEntryBase *Bucket = TheTable[I]; + if (Bucket && Bucket != getTombstoneVal()) { + static_cast(Bucket)->Destroy(Allocator); + } + } + } + free(TheTable); + } + AllocatorTy &getAllocator() { return Allocator; } const AllocatorTy &getAllocator() const { return Allocator; } - typedef const char* key_type; - typedef ValueTy mapped_type; - typedef StringMapEntry value_type; - typedef size_t size_type; + using key_type = const char*; + using mapped_type = ValueTy; + using value_type = StringMapEntry; + using size_type = size_t; - typedef StringMapConstIterator const_iterator; - typedef StringMapIterator iterator; + using const_iterator = StringMapConstIterator; + using iterator = StringMapIterator; iterator begin() { return iterator(TheTable, NumBuckets == 0); @@ -313,7 +325,7 @@ class StringMap : public StringMapImpl { return const_iterator(TheTable+NumBuckets, true); } - llvm::iterator_range> keys() const { + iterator_range> keys() const { return make_range(StringMapKeyIterator(begin()), StringMapKeyIterator(end())); } @@ -433,21 +445,6 @@ class StringMap : public StringMapImpl { erase(I); return true; } - - ~StringMap() { - // Delete all the elements in the map, but don't reset the elements - // to default values. This is a copy of clear(), but avoids unnecessary - // work not required in the destructor. - if (!empty()) { - for (unsigned I = 0, E = NumBuckets; I != E; ++I) { - StringMapEntryBase *Bucket = TheTable[I]; - if (Bucket && Bucket != getTombstoneVal()) { - static_cast(Bucket)->Destroy(Allocator); - } - } - } - free(TheTable); - } }; template @@ -542,7 +539,6 @@ class StringMapKeyIterator public: StringMapKeyIterator() = default; - explicit StringMapKeyIterator(StringMapConstIterator Iter) : base(std::move(Iter)) {} diff --git a/interpreter/llvm/src/include/llvm/ADT/StringRef.h b/interpreter/llvm/src/include/llvm/ADT/StringRef.h index ce48f6d3bad32..f6c93a858db1d 100644 --- a/interpreter/llvm/src/include/llvm/ADT/StringRef.h +++ b/interpreter/llvm/src/include/llvm/ADT/StringRef.h @@ -1,4 +1,4 @@ -//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// +//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,16 +15,18 @@ #include "llvm/Support/Compiler.h" #include #include +#include #include #include #include +#include #include namespace llvm { - template - class SmallVectorImpl; + class APInt; class hash_code; + template class SmallVectorImpl; class StringRef; /// Helper functions for StringRef::getAsInteger. @@ -46,10 +48,11 @@ namespace llvm { /// general safe to store a StringRef. class StringRef { public: - typedef const char *iterator; - typedef const char *const_iterator; static const size_t npos = ~size_t(0); - typedef size_t size_type; + + using iterator = const char *; + using const_iterator = const char *; + using size_type = size_t; private: /// The start of the string, in an external buffer. @@ -906,6 +909,7 @@ namespace llvm { // StringRefs can be treated like a POD type. template struct isPodLike; template <> struct isPodLike { static const bool value = true; }; -} -#endif +} // end namespace llvm + +#endif // LLVM_ADT_STRINGREF_H diff --git a/interpreter/llvm/src/include/llvm/ADT/StringSet.h b/interpreter/llvm/src/include/llvm/ADT/StringSet.h index c32c2a4974385..9af44c07df795 100644 --- a/interpreter/llvm/src/include/llvm/ADT/StringSet.h +++ b/interpreter/llvm/src/include/llvm/ADT/StringSet.h @@ -1,4 +1,4 @@ -//===--- StringSet.h - The LLVM Compiler Driver -----------------*- C++ -*-===// +//===- StringSet.h - The LLVM Compiler Driver -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,13 +15,19 @@ #define LLVM_ADT_STRINGSET_H #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include +#include +#include namespace llvm { /// StringSet - A wrapper for StringMap that provides set-like functionality. - template - class StringSet : public llvm::StringMap { - typedef llvm::StringMap base; + template + class StringSet : public StringMap { + using base = StringMap; + public: StringSet() = default; StringSet(std::initializer_list S) { @@ -40,6 +46,7 @@ namespace llvm { base::insert(std::make_pair(*It, '\0')); } }; -} + +} // end namespace llvm #endif // LLVM_ADT_STRINGSET_H diff --git a/interpreter/llvm/src/include/llvm/ADT/TinyPtrVector.h b/interpreter/llvm/src/include/llvm/ADT/TinyPtrVector.h index ca43b6046193d..79740713f75b0 100644 --- a/interpreter/llvm/src/include/llvm/ADT/TinyPtrVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/TinyPtrVector.h @@ -30,9 +30,9 @@ namespace llvm { template class TinyPtrVector { public: - typedef SmallVector VecTy; - typedef typename VecTy::value_type value_type; - typedef PointerUnion PtrUnion; + using VecTy = SmallVector; + using value_type = typename VecTy::value_type; + using PtrUnion = PointerUnion; private: PtrUnion Val; @@ -167,10 +167,10 @@ class TinyPtrVector { return Val.template get()->size(); } - typedef EltTy *iterator; - typedef const EltTy *const_iterator; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; + using iterator = EltTy *; + using const_iterator = const EltTy *; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; iterator begin() { if (Val.template is()) diff --git a/interpreter/llvm/src/include/llvm/ADT/Triple.h b/interpreter/llvm/src/include/llvm/ADT/Triple.h index e3a8a31ba9bc3..cd560658ca4ec 100644 --- a/interpreter/llvm/src/include/llvm/ADT/Triple.h +++ b/interpreter/llvm/src/include/llvm/ADT/Triple.h @@ -59,6 +59,7 @@ class Triple { mips64, // MIPS64: mips64 mips64el, // MIPS64EL: mips64el msp430, // MSP430: msp430 + nios2, // NIOSII: nios2 ppc, // PPC: powerpc ppc64, // PPC64: powerpc64, ppu ppc64le, // PPC64LE: powerpc64le @@ -146,6 +147,7 @@ class Triple { enum OSType { UnknownOS, + Ananas, CloudABI, Darwin, DragonFly, @@ -238,7 +240,9 @@ class Triple { /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. - Triple() : Data(), Arch(), Vendor(), OS(), Environment(), ObjectFormat() {} + Triple() + : Data(), Arch(), SubArch(), Vendor(), OS(), Environment(), + ObjectFormat() {} explicit Triple(const Twine &Str); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); @@ -252,6 +256,10 @@ class Triple { ObjectFormat == Other.ObjectFormat; } + bool operator!=(const Triple &Other) const { + return !(*this == Other); + } + /// @} /// @name Normalization /// @{ @@ -722,6 +730,12 @@ class Triple { /// \returns true if the triple is little endian, false otherwise. bool isLittleEndian() const; + /// Test whether target triples are compatible. + bool isCompatibleWith(const Triple &Other) const; + + /// Merge target triples. + std::string merge(const Triple &Other) const; + /// @} /// @name Static helpers for IDs. /// @{ diff --git a/interpreter/llvm/src/include/llvm/ADT/UniqueVector.h b/interpreter/llvm/src/include/llvm/ADT/UniqueVector.h index e1ab4b56023f8..b17fb2392bafc 100644 --- a/interpreter/llvm/src/include/llvm/ADT/UniqueVector.h +++ b/interpreter/llvm/src/include/llvm/ADT/UniqueVector.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/UniqueVector.h ---------------------------------*- C++ -*-===// +//===- llvm/ADT/UniqueVector.h ----------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,16 +24,15 @@ namespace llvm { /// Entries can be fetched using operator[] with the entry ID. template class UniqueVector { public: - typedef typename std::vector VectorType; - typedef typename VectorType::iterator iterator; - typedef typename VectorType::const_iterator const_iterator; + using VectorType = typename std::vector; + using iterator = typename VectorType::iterator; + using const_iterator = typename VectorType::const_iterator; private: // Map - Used to handle the correspondence of entry to ID. std::map Map; // Vector - ID ordered vector of entries. Entries can be indexed by ID - 1. - // VectorType Vector; public: @@ -68,7 +67,6 @@ template class UniqueVector { } /// operator[] - Returns a reference to the entry with the specified ID. - /// const T &operator[](unsigned ID) const { assert(ID-1 < size() && "ID is 0 or out of range!"); return Vector[ID - 1]; @@ -87,21 +85,18 @@ template class UniqueVector { const_iterator end() const { return Vector.end(); } /// size - Returns the number of entries in the vector. - /// size_t size() const { return Vector.size(); } /// empty - Returns true if the vector is empty. - /// bool empty() const { return Vector.empty(); } /// reset - Clears all the entries. - /// void reset() { Map.clear(); Vector.resize(0, 0); } }; -} // End of namespace llvm +} // end namespace llvm #endif // LLVM_ADT_UNIQUEVECTOR_H diff --git a/interpreter/llvm/src/include/llvm/ADT/ilist_base.h b/interpreter/llvm/src/include/llvm/ADT/ilist_base.h index 1ffc864bea2f3..3d818a48d41d4 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ilist_base.h +++ b/interpreter/llvm/src/include/llvm/ADT/ilist_base.h @@ -1,4 +1,4 @@ -//===- llvm/ADT/ilist_base.h - Intrusive List Base ---------------*- C++ -*-==// +//===- llvm/ADT/ilist_base.h - Intrusive List Base --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,15 +12,13 @@ #include "llvm/ADT/ilist_node_base.h" #include -#include -#include namespace llvm { /// Implementations of list algorithms using ilist_node_base. template class ilist_base { public: - typedef ilist_node_base node_base_type; + using node_base_type = ilist_node_base; static void insertBeforeImpl(node_base_type &Next, node_base_type &N) { node_base_type &Prev = *Next.getPrev(); diff --git a/interpreter/llvm/src/include/llvm/ADT/ilist_iterator.h b/interpreter/llvm/src/include/llvm/ADT/ilist_iterator.h index c848d1a134f19..671e644e01542 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ilist_iterator.h +++ b/interpreter/llvm/src/include/llvm/ADT/ilist_iterator.h @@ -1,4 +1,4 @@ -//===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator -------*- C++ -*-==// +//===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,28 +23,30 @@ namespace ilist_detail { /// Find const-correct node types. template struct IteratorTraits; template struct IteratorTraits { - typedef typename OptionsT::value_type value_type; - typedef typename OptionsT::pointer pointer; - typedef typename OptionsT::reference reference; - typedef ilist_node_impl *node_pointer; - typedef ilist_node_impl &node_reference; + using value_type = typename OptionsT::value_type; + using pointer = typename OptionsT::pointer; + using reference = typename OptionsT::reference; + using node_pointer = ilist_node_impl *; + using node_reference = ilist_node_impl &; }; template struct IteratorTraits { - typedef const typename OptionsT::value_type value_type; - typedef typename OptionsT::const_pointer pointer; - typedef typename OptionsT::const_reference reference; - typedef const ilist_node_impl *node_pointer; - typedef const ilist_node_impl &node_reference; + using value_type = const typename OptionsT::value_type; + using pointer = typename OptionsT::const_pointer; + using reference = typename OptionsT::const_reference; + using node_pointer = const ilist_node_impl *; + using node_reference = const ilist_node_impl &; }; template struct IteratorHelper; template <> struct IteratorHelper : ilist_detail::NodeAccess { - typedef ilist_detail::NodeAccess Access; + using Access = ilist_detail::NodeAccess; + template static void increment(T *&I) { I = Access::getNext(*I); } template static void decrement(T *&I) { I = Access::getPrev(*I); } }; template <> struct IteratorHelper : ilist_detail::NodeAccess { - typedef ilist_detail::NodeAccess Access; + using Access = ilist_detail::NodeAccess; + template static void increment(T *&I) { I = Access::getPrev(*I); } template static void decrement(T *&I) { I = Access::getNext(*I); } }; @@ -58,24 +60,23 @@ class ilist_iterator : ilist_detail::SpecificNodeAccess { friend ilist_iterator; friend ilist_iterator; - typedef ilist_detail::IteratorTraits Traits; - typedef ilist_detail::SpecificNodeAccess Access; + using Traits = ilist_detail::IteratorTraits; + using Access = ilist_detail::SpecificNodeAccess; public: - typedef typename Traits::value_type value_type; - typedef typename Traits::pointer pointer; - typedef typename Traits::reference reference; - typedef ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - - typedef typename OptionsT::const_pointer const_pointer; - typedef typename OptionsT::const_reference const_reference; + using value_type = typename Traits::value_type; + using pointer = typename Traits::pointer; + using reference = typename Traits::reference; + using difference_type = ptrdiff_t; + using iterator_category = std::bidirectional_iterator_tag; + using const_pointer = typename OptionsT::const_pointer; + using const_reference = typename OptionsT::const_reference; private: - typedef typename Traits::node_pointer node_pointer; - typedef typename Traits::node_reference node_reference; + using node_pointer = typename Traits::node_pointer; + using node_reference = typename Traits::node_reference; - node_pointer NodePtr; + node_pointer NodePtr = nullptr; public: /// Create from an ilist_node. @@ -83,7 +84,7 @@ class ilist_iterator : ilist_detail::SpecificNodeAccess { explicit ilist_iterator(pointer NP) : NodePtr(Access::getNodePtr(NP)) {} explicit ilist_iterator(reference NR) : NodePtr(Access::getNodePtr(&NR)) {} - ilist_iterator() : NodePtr(nullptr) {} + ilist_iterator() = default; // This is templated so that we can allow constructing a const iterator from // a nonconst iterator... @@ -184,8 +185,8 @@ template struct simplify_type; /// FIXME: remove this, since there is no implicit conversion to NodeTy. template struct simplify_type> { - typedef ilist_iterator iterator; - typedef typename iterator::pointer SimpleType; + using iterator = ilist_iterator; + using SimpleType = typename iterator::pointer; static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; } }; diff --git a/interpreter/llvm/src/include/llvm/ADT/ilist_node.h b/interpreter/llvm/src/include/llvm/ADT/ilist_node.h index 7244d0f405860..3362611697cb0 100644 --- a/interpreter/llvm/src/include/llvm/ADT/ilist_node.h +++ b/interpreter/llvm/src/include/llvm/ADT/ilist_node.h @@ -1,4 +1,4 @@ -//==-- llvm/ADT/ilist_node.h - Intrusive Linked List Helper ------*- C++ -*-==// +//===- llvm/ADT/ilist_node.h - Intrusive Linked List Helper -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,11 +21,10 @@ namespace llvm { namespace ilist_detail { + struct NodeAccess; -} // end namespace ilist_detail -template -struct ilist_traits; +} // end namespace ilist_detail template class ilist_iterator; template class ilist_sentinel; @@ -39,9 +38,9 @@ template class ilist_sentinel; /// provide type safety: you can't insert nodes of \a ilist_node_impl into the /// wrong \a simple_ilist or \a iplist. template class ilist_node_impl : OptionsT::node_base_type { - typedef typename OptionsT::value_type value_type; - typedef typename OptionsT::node_base_type node_base_type; - typedef typename OptionsT::list_base_type list_base_type; + using value_type = typename OptionsT::value_type; + using node_base_type = typename OptionsT::node_base_type; + using list_base_type = typename OptionsT::list_base_type; friend typename OptionsT::list_base_type; friend struct ilist_detail::NodeAccess; @@ -52,17 +51,18 @@ template class ilist_node_impl : OptionsT::node_base_type { friend class ilist_iterator; protected: - ilist_node_impl() = default; + using self_iterator = ilist_iterator; + using const_self_iterator = ilist_iterator; + using reverse_self_iterator = ilist_iterator; + using const_reverse_self_iterator = ilist_iterator; - typedef ilist_iterator self_iterator; - typedef ilist_iterator const_self_iterator; - typedef ilist_iterator reverse_self_iterator; - typedef ilist_iterator const_reverse_self_iterator; + ilist_node_impl() = default; private: ilist_node_impl *getPrev() { return static_cast(node_base_type::getPrev()); } + ilist_node_impl *getNext() { return static_cast(node_base_type::getNext()); } @@ -70,6 +70,7 @@ template class ilist_node_impl : OptionsT::node_base_type { const ilist_node_impl *getPrev() const { return static_cast(node_base_type::getPrev()); } + const ilist_node_impl *getNext() const { return static_cast(node_base_type::getNext()); } @@ -80,9 +81,11 @@ template class ilist_node_impl : OptionsT::node_base_type { public: self_iterator getIterator() { return self_iterator(*this); } const_self_iterator getIterator() const { return const_self_iterator(*this); } + reverse_self_iterator getReverseIterator() { return reverse_self_iterator(*this); } + const_reverse_self_iterator getReverseIterator() const { return const_reverse_self_iterator(*this); } @@ -151,6 +154,7 @@ class ilist_node }; namespace ilist_detail { + /// An access class for ilist_node private API. /// /// This gives access to the private parts of ilist nodes. Nodes for an ilist @@ -163,15 +167,18 @@ struct NodeAccess { static ilist_node_impl *getNodePtr(typename OptionsT::pointer N) { return N; } + template static const ilist_node_impl * getNodePtr(typename OptionsT::const_pointer N) { return N; } + template static typename OptionsT::pointer getValuePtr(ilist_node_impl *N) { return static_cast(N); } + template static typename OptionsT::const_pointer getValuePtr(const ilist_node_impl *N) { @@ -182,15 +189,18 @@ struct NodeAccess { static ilist_node_impl *getPrev(ilist_node_impl &N) { return N.getPrev(); } + template static ilist_node_impl *getNext(ilist_node_impl &N) { return N.getNext(); } + template static const ilist_node_impl * getPrev(const ilist_node_impl &N) { return N.getPrev(); } + template static const ilist_node_impl * getNext(const ilist_node_impl &N) { @@ -200,23 +210,27 @@ struct NodeAccess { template struct SpecificNodeAccess : NodeAccess { protected: - typedef typename OptionsT::pointer pointer; - typedef typename OptionsT::const_pointer const_pointer; - typedef ilist_node_impl node_type; + using pointer = typename OptionsT::pointer; + using const_pointer = typename OptionsT::const_pointer; + using node_type = ilist_node_impl; static node_type *getNodePtr(pointer N) { return NodeAccess::getNodePtr(N); } + static const node_type *getNodePtr(const_pointer N) { return NodeAccess::getNodePtr(N); } + static pointer getValuePtr(node_type *N) { return NodeAccess::getValuePtr(N); } + static const_pointer getValuePtr(const node_type *N) { return NodeAccess::getValuePtr(N); } }; + } // end namespace ilist_detail template @@ -265,6 +279,7 @@ class ilist_node_with_parent : public ilist_node { getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr)); return List.getPrevNode(*static_cast(this)); } + /// \brief Get the previous node, or \c nullptr for the list head. const NodeTy *getPrevNode() const { return const_cast(this)->getPrevNode(); @@ -278,6 +293,7 @@ class ilist_node_with_parent : public ilist_node { getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr)); return List.getNextNode(*static_cast(this)); } + /// \brief Get the next node, or \c nullptr for the list tail. const NodeTy *getNextNode() const { return const_cast(this)->getNextNode(); @@ -285,6 +301,6 @@ class ilist_node_with_parent : public ilist_node { /// @} }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_ILIST_NODE_H diff --git a/interpreter/llvm/src/include/llvm/ADT/iterator.h b/interpreter/llvm/src/include/llvm/ADT/iterator.h index 28dcdf9613ef2..15720a67c047b 100644 --- a/interpreter/llvm/src/include/llvm/ADT/iterator.h +++ b/interpreter/llvm/src/include/llvm/ADT/iterator.h @@ -11,9 +11,11 @@ #define LLVM_ADT_ITERATOR_H #include "llvm/ADT/iterator_range.h" +#include #include #include #include +#include namespace llvm { @@ -206,7 +208,7 @@ template < class iterator_adaptor_base : public iterator_facade_base { - typedef typename iterator_adaptor_base::iterator_facade_base BaseT; + using BaseT = typename iterator_adaptor_base::iterator_facade_base; protected: WrappedIteratorT I; @@ -221,7 +223,7 @@ class iterator_adaptor_base const WrappedIteratorT &wrapped() const { return I; } public: - typedef DifferenceTypeT difference_type; + using difference_type = DifferenceTypeT; DerivedT &operator+=(difference_type n) { static_assert( @@ -279,7 +281,7 @@ class iterator_adaptor_base /// which is implemented with some iterator over T*s: /// /// \code -/// typedef pointee_iterator::iterator> iterator; +/// using iterator = pointee_iterator::iterator>; /// \endcode template #include +#include namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/ADT/simple_ilist.h b/interpreter/llvm/src/include/llvm/ADT/simple_ilist.h index a1ab59170840f..4c7598a1acb4e 100644 --- a/interpreter/llvm/src/include/llvm/ADT/simple_ilist.h +++ b/interpreter/llvm/src/include/llvm/ADT/simple_ilist.h @@ -13,9 +13,14 @@ #include "llvm/ADT/ilist_base.h" #include "llvm/ADT/ilist_iterator.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/ilist_node_options.h" +#include "llvm/Support/Compiler.h" #include #include #include +#include +#include +#include namespace llvm { @@ -77,23 +82,23 @@ class simple_ilist typename ilist_detail::compute_node_options::type> { static_assert(ilist_detail::check_options::value, "Unrecognized node option!"); - typedef - typename ilist_detail::compute_node_options::type OptionsT; - typedef typename OptionsT::list_base_type list_base_type; + using OptionsT = + typename ilist_detail::compute_node_options::type; + using list_base_type = typename OptionsT::list_base_type; ilist_sentinel Sentinel; public: - typedef typename OptionsT::value_type value_type; - typedef typename OptionsT::pointer pointer; - typedef typename OptionsT::reference reference; - typedef typename OptionsT::const_pointer const_pointer; - typedef typename OptionsT::const_reference const_reference; - typedef ilist_iterator iterator; - typedef ilist_iterator const_iterator; - typedef ilist_iterator reverse_iterator; - typedef ilist_iterator const_reverse_iterator; - typedef size_t size_type; - typedef ptrdiff_t difference_type; + using value_type = typename OptionsT::value_type; + using pointer = typename OptionsT::pointer; + using reference = typename OptionsT::reference; + using const_pointer = typename OptionsT::const_pointer; + using const_reference = typename OptionsT::const_reference; + using iterator = ilist_iterator; + using const_iterator = ilist_iterator; + using reverse_iterator = ilist_iterator; + using const_reverse_iterator = ilist_iterator; + using size_type = size_t; + using difference_type = ptrdiff_t; simple_ilist() = default; ~simple_ilist() = default; diff --git a/interpreter/llvm/src/include/llvm/Analysis/AliasAnalysis.h b/interpreter/llvm/src/include/llvm/Analysis/AliasAnalysis.h index 1b8b9751faa19..e00ae4f3beece 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/AliasAnalysis.h +++ b/interpreter/llvm/src/include/llvm/Analysis/AliasAnalysis.h @@ -38,11 +38,11 @@ #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H #define LLVM_ANALYSIS_ALIASANALYSIS_H +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { class BasicAAResult; diff --git a/interpreter/llvm/src/include/llvm/Analysis/AliasSetTracker.h b/interpreter/llvm/src/include/llvm/Analysis/AliasSetTracker.h index eac97501c759c..daafd2fabe781 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/AliasSetTracker.h +++ b/interpreter/llvm/src/include/llvm/Analysis/AliasSetTracker.h @@ -69,10 +69,15 @@ class AliasSet : public ilist_node { if (AAInfo == DenseMapInfo::getEmptyKey()) // We don't have a AAInfo yet. Set it to NewAAInfo. AAInfo = NewAAInfo; - else if (AAInfo != NewAAInfo) - // NewAAInfo conflicts with AAInfo. - AAInfo = DenseMapInfo::getTombstoneKey(); - + else { + AAMDNodes Intersection(AAInfo.intersect(NewAAInfo)); + if (!Intersection) { + // NewAAInfo conflicts with AAInfo. + AAInfo = DenseMapInfo::getTombstoneKey(); + return SizeChanged; + } + AAInfo = Intersection; + } return SizeChanged; } diff --git a/interpreter/llvm/src/include/llvm/Analysis/AssumptionCache.h b/interpreter/llvm/src/include/llvm/Analysis/AssumptionCache.h index 04c6fd70e07f7..58d72afdc1b6f 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/AssumptionCache.h +++ b/interpreter/llvm/src/include/llvm/Analysis/AssumptionCache.h @@ -21,8 +21,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include diff --git a/interpreter/llvm/src/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/interpreter/llvm/src/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 3e05e09900a5f..5de3821242e0f 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/interpreter/llvm/src/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1353,4 +1353,4 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { #undef DEBUG_TYPE -#endif +#endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H diff --git a/interpreter/llvm/src/include/llvm/Analysis/BranchProbabilityInfo.h b/interpreter/llvm/src/include/llvm/Analysis/BranchProbabilityInfo.h index 6a876679543d4..94d3d4de6c9dd 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/BranchProbabilityInfo.h @@ -26,6 +26,7 @@ namespace llvm { class LoopInfo; +class TargetLibraryInfo; class raw_ostream; /// \brief Analysis providing branch probability information. @@ -43,8 +44,9 @@ class raw_ostream; class BranchProbabilityInfo { public: BranchProbabilityInfo() {} - BranchProbabilityInfo(const Function &F, const LoopInfo &LI) { - calculate(F, LI); + BranchProbabilityInfo(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr) { + calculate(F, LI, TLI); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) @@ -116,7 +118,8 @@ class BranchProbabilityInfo { return IsLikely ? LikelyProb : LikelyProb.getCompl(); } - void calculate(const Function &F, const LoopInfo &LI); + void calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); @@ -171,7 +174,7 @@ class BranchProbabilityInfo { bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); - bool calcZeroHeuristics(const BasicBlock *BB); + bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); bool calcInvokeHeuristics(const BasicBlock *BB); }; diff --git a/interpreter/llvm/src/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/interpreter/llvm/src/include/llvm/Analysis/CFLAliasAnalysisUtils.h new file mode 100644 index 0000000000000..981a8ddc2289a --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Analysis/CFLAliasAnalysisUtils.h @@ -0,0 +1,58 @@ +//=- CFLAliasAnalysisUtils.h - Utilities for CFL Alias Analysis ----*- C++-*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// \file +// These are the utilities/helpers used by the CFL Alias Analyses available in +// tree, i.e. Steensgaard's and Andersens'. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H +#define LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/ValueHandle.h" + +namespace llvm { +namespace cflaa { + +template struct FunctionHandle final : public CallbackVH { + FunctionHandle(Function *Fn, AAResult *Result) + : CallbackVH(Fn), Result(Result) { + assert(Fn != nullptr); + assert(Result != nullptr); + } + + void deleted() override { removeSelfFromCache(); } + void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } + +private: + AAResult *Result; + + void removeSelfFromCache() { + assert(Result != nullptr); + auto *Val = getValPtr(); + Result->evict(cast(Val)); + setValPtr(nullptr); + } +}; + +static inline const Function *parentFunctionOfValue(const Value *Val) { + if (auto *Inst = dyn_cast(Val)) { + auto *Bb = Inst->getParent(); + return Bb->getParent(); + } + + if (auto *Arg = dyn_cast(Val)) + return Arg->getParent(); + return nullptr; +} // namespace cflaa +} // namespace llvm +} + +#endif // LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H diff --git a/interpreter/llvm/src/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/interpreter/llvm/src/include/llvm/Analysis/CFLAndersAliasAnalysis.h index f3520aa3fe829..4146ad4d18ac0 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/CFLAndersAliasAnalysis.h +++ b/interpreter/llvm/src/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -18,8 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysisUtils.h" #include "llvm/IR/Function.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include @@ -47,7 +47,7 @@ class CFLAndersAAResult : public AAResultBase { return false; } /// Evict the given function from cache - void evict(const Function &Fn); + void evict(const Function *Fn); /// \brief Get the alias summary for the given function /// Return nullptr if the summary is not found or not available @@ -57,27 +57,6 @@ class CFLAndersAAResult : public AAResultBase { AliasResult alias(const MemoryLocation &, const MemoryLocation &); private: - struct FunctionHandle final : public CallbackVH { - FunctionHandle(Function *Fn, CFLAndersAAResult *Result) - : CallbackVH(Fn), Result(Result) { - assert(Fn != nullptr); - assert(Result != nullptr); - } - - void deleted() override { removeSelfFromCache(); } - void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } - - private: - CFLAndersAAResult *Result; - - void removeSelfFromCache() { - assert(Result != nullptr); - auto *Val = getValPtr(); - Result->evict(*cast(Val)); - setValPtr(nullptr); - } - }; - /// \brief Ensures that the given function is available in the cache. /// Returns the appropriate entry from the cache. const Optional &ensureCached(const Function &); @@ -97,7 +76,7 @@ class CFLAndersAAResult : public AAResultBase { /// that simply has empty sets. DenseMap> Cache; - std::forward_list Handles; + std::forward_list> Handles; }; /// Analysis pass providing a never-invalidated alias analysis result. diff --git a/interpreter/llvm/src/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/interpreter/llvm/src/include/llvm/Analysis/CFLSteensAliasAnalysis.h index 3aae9a1e9b2e5..fd3fa5febcdfb 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/CFLSteensAliasAnalysis.h +++ b/interpreter/llvm/src/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysisUtils.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" @@ -85,27 +86,6 @@ class CFLSteensAAResult : public AAResultBase { } private: - struct FunctionHandle final : public CallbackVH { - FunctionHandle(Function *Fn, CFLSteensAAResult *Result) - : CallbackVH(Fn), Result(Result) { - assert(Fn != nullptr); - assert(Result != nullptr); - } - - void deleted() override { removeSelfFromCache(); } - void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } - - private: - CFLSteensAAResult *Result; - - void removeSelfFromCache() { - assert(Result != nullptr); - auto *Val = getValPtr(); - Result->evict(cast(Val)); - setValPtr(nullptr); - } - }; - const TargetLibraryInfo &TLI; /// \brief Cached mapping of Functions to their StratifiedSets. @@ -114,7 +94,7 @@ class CFLSteensAAResult : public AAResultBase { /// have any kind of recursion, it is discernable from a function /// that simply has empty sets. DenseMap> Cache; - std::forward_list Handles; + std::forward_list> Handles; FunctionInfo buildSetsFrom(Function *F); }; diff --git a/interpreter/llvm/src/include/llvm/Analysis/CGSCCPassManager.h b/interpreter/llvm/src/include/llvm/Analysis/CGSCCPassManager.h index a15a9e18c8153..32868cbecdcf0 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/CGSCCPassManager.h +++ b/interpreter/llvm/src/include/llvm/Analysis/CGSCCPassManager.h @@ -577,12 +577,17 @@ class CGSCCToFunctionPassAdaptor // analyses will eventually occur when the module pass completes. PA.intersect(std::move(PassPA)); - // Update the call graph based on this function pass. This may also - // update the current SCC to point to a smaller, more refined SCC. - CurrentC = &updateCGAndAnalysisManagerForFunctionPass( - CG, *CurrentC, *N, AM, UR, DebugLogging); - assert(CG.lookupSCC(*N) == CurrentC && - "Current SCC not updated to the SCC containing the current node!"); + // If the call graph hasn't been preserved, update it based on this + // function pass. This may also update the current SCC to point to + // a smaller, more refined SCC. + auto PAC = PA.getChecker(); + if (!PAC.preserved() && !PAC.preservedSet>()) { + CurrentC = &updateCGAndAnalysisManagerForFunctionPass( + CG, *CurrentC, *N, AM, UR, DebugLogging); + assert( + CG.lookupSCC(*N) == CurrentC && + "Current SCC not updated to the SCC containing the current node!"); + } } // By definition we preserve the proxy. And we preserve all analyses on diff --git a/interpreter/llvm/src/include/llvm/Analysis/ConstantFolding.h b/interpreter/llvm/src/include/llvm/Analysis/ConstantFolding.h index ff6ca1959153a..42034741b8e3c 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ConstantFolding.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ConstantFolding.h @@ -31,6 +31,7 @@ class DataLayout; class Function; class GlobalValue; class Instruction; +class ImmutableCallSite; class TargetLibraryInfo; class Type; @@ -125,11 +126,12 @@ Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. -bool canConstantFoldCallTo(const Function *F); +bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F); /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. -Constant *ConstantFoldCall(Function *F, ArrayRef Operands, +Constant *ConstantFoldCall(ImmutableCallSite CS, Function *F, + ArrayRef Operands, const TargetLibraryInfo *TLI = nullptr); /// \brief Check whether the given call has no side-effects. diff --git a/interpreter/llvm/src/include/llvm/Analysis/DemandedBits.h b/interpreter/llvm/src/include/llvm/Analysis/DemandedBits.h index e5fd8a0007fed..e52c66f361c3d 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/DemandedBits.h +++ b/interpreter/llvm/src/include/llvm/Analysis/DemandedBits.h @@ -22,11 +22,11 @@ #ifndef LLVM_ANALYSIS_DEMANDED_BITS_H #define LLVM_ANALYSIS_DEMANDED_BITS_H -#include "llvm/Pass.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontier.h b/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontier.h index 8cae63c3c8697..b566aeaf1fd61 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontier.h +++ b/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontier.h @@ -29,9 +29,9 @@ namespace llvm { /// DominanceFrontierBase - Common base class for computing forward and inverse /// dominance frontiers for a function. /// -template +template class DominanceFrontierBase { -public: + public: typedef std::set DomSetType; // Dom set for a bb typedef std::map DomSetMapType; // Dom set map @@ -40,10 +40,10 @@ class DominanceFrontierBase { DomSetMapType Frontiers; std::vector Roots; - const bool IsPostDominators; + static constexpr bool IsPostDominators = IsPostDom; -public: - DominanceFrontierBase(bool isPostDom) : IsPostDominators(isPostDom) {} + public: + DominanceFrontierBase() {} /// getRoots - Return the root blocks of the current CFG. This may include /// multiple blocks if we are computing post dominators. For forward @@ -96,7 +96,7 @@ class DominanceFrontierBase { /// compare - Return true if the other dominance frontier base matches /// this dominance frontier base. Otherwise return false. - bool compare(DominanceFrontierBase &Other) const; + bool compare(DominanceFrontierBase &Other) const; /// print - Convert to human readable form /// @@ -113,22 +113,21 @@ class DominanceFrontierBase { /// used to compute a forward dominator frontiers. /// template -class ForwardDominanceFrontierBase : public DominanceFrontierBase { -private: +class ForwardDominanceFrontierBase + : public DominanceFrontierBase { + private: typedef GraphTraits BlockTraits; public: - typedef DominatorTreeBase DomTreeT; - typedef DomTreeNodeBase DomTreeNodeT; - typedef typename DominanceFrontierBase::DomSetType DomSetType; - - ForwardDominanceFrontierBase() : DominanceFrontierBase(false) {} - - void analyze(DomTreeT &DT) { - this->Roots = DT.getRoots(); - assert(this->Roots.size() == 1 && - "Only one entry block for forward domfronts!"); - calculate(DT, DT[this->Roots[0]]); + typedef DomTreeBase DomTreeT; + typedef DomTreeNodeBase DomTreeNodeT; + typedef typename DominanceFrontierBase::DomSetType DomSetType; + + void analyze(DomTreeT &DT) { + this->Roots = DT.getRoots(); + assert(this->Roots.size() == 1 && + "Only one entry block for forward domfronts!"); + calculate(DT, DT[this->Roots[0]]); } const DomSetType &calculate(const DomTreeT &DT, const DomTreeNodeT *Node); @@ -136,15 +135,16 @@ class ForwardDominanceFrontierBase : public DominanceFrontierBase { class DominanceFrontier : public ForwardDominanceFrontierBase { public: - typedef DominatorTreeBase DomTreeT; - typedef DomTreeNodeBase DomTreeNodeT; - typedef DominanceFrontierBase::DomSetType DomSetType; - typedef DominanceFrontierBase::iterator iterator; - typedef DominanceFrontierBase::const_iterator const_iterator; - - /// Handle invalidation explicitly. - bool invalidate(Function &F, const PreservedAnalyses &PA, - FunctionAnalysisManager::Invalidator &); + typedef DomTreeBase DomTreeT; + typedef DomTreeNodeBase DomTreeNodeT; + typedef DominanceFrontierBase::DomSetType DomSetType; + typedef DominanceFrontierBase::iterator iterator; + typedef DominanceFrontierBase::const_iterator + const_iterator; + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); }; class DominanceFrontierWrapperPass : public FunctionPass { @@ -168,7 +168,8 @@ class DominanceFrontierWrapperPass : public FunctionPass { void dump() const; }; -extern template class DominanceFrontierBase; +extern template class DominanceFrontierBase; +extern template class DominanceFrontierBase; extern template class ForwardDominanceFrontierBase; /// \brief Analysis pass which computes a \c DominanceFrontier. diff --git a/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontierImpl.h b/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontierImpl.h index 9f8cacc24f2ce..5093b975e7091 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontierImpl.h +++ b/interpreter/llvm/src/include/llvm/Analysis/DominanceFrontierImpl.h @@ -39,33 +39,33 @@ class DFCalculateWorkObject { const DomTreeNodeT *parentNode; }; -template -void DominanceFrontierBase::removeBlock(BlockT *BB) { +template +void DominanceFrontierBase::removeBlock(BlockT *BB) { assert(find(BB) != end() && "Block is not in DominanceFrontier!"); for (iterator I = begin(), E = end(); I != E; ++I) I->second.erase(BB); Frontiers.erase(BB); } -template -void DominanceFrontierBase::addToFrontier(iterator I, - BlockT *Node) { +template +void DominanceFrontierBase::addToFrontier(iterator I, + BlockT *Node) { assert(I != end() && "BB is not in DominanceFrontier!"); assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB"); I->second.erase(Node); } -template -void DominanceFrontierBase::removeFromFrontier(iterator I, - BlockT *Node) { +template +void DominanceFrontierBase::removeFromFrontier( + iterator I, BlockT *Node) { assert(I != end() && "BB is not in DominanceFrontier!"); assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB"); I->second.erase(Node); } -template -bool DominanceFrontierBase::compareDomSet(DomSetType &DS1, - const DomSetType &DS2) const { +template +bool DominanceFrontierBase::compareDomSet( + DomSetType &DS1, const DomSetType &DS2) const { std::set tmpSet; for (BlockT *BB : DS2) tmpSet.insert(BB); @@ -88,9 +88,9 @@ bool DominanceFrontierBase::compareDomSet(DomSetType &DS1, return false; } -template -bool DominanceFrontierBase::compare( - DominanceFrontierBase &Other) const { +template +bool DominanceFrontierBase::compare( + DominanceFrontierBase &Other) const { DomSetMapType tmpFrontiers; for (typename DomSetMapType::const_iterator I = Other.begin(), E = Other.end(); @@ -118,8 +118,8 @@ bool DominanceFrontierBase::compare( return false; } -template -void DominanceFrontierBase::print(raw_ostream &OS) const { +template +void DominanceFrontierBase::print(raw_ostream &OS) const { for (const_iterator I = begin(), E = end(); I != E; ++I) { OS << " DomFrontier for BB "; if (I->first) @@ -142,8 +142,8 @@ void DominanceFrontierBase::print(raw_ostream &OS) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -template -void DominanceFrontierBase::dump() const { +template +void DominanceFrontierBase::dump() const { print(dbgs()); } #endif diff --git a/interpreter/llvm/src/include/llvm/Analysis/InlineCost.h b/interpreter/llvm/src/include/llvm/Analysis/InlineCost.h index d91d08a524dc9..f33a2de5a5f4a 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/InlineCost.h +++ b/interpreter/llvm/src/include/llvm/Analysis/InlineCost.h @@ -14,8 +14,8 @@ #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include #include @@ -160,7 +160,7 @@ InlineParams getInlineParams(int Threshold); /// the -Oz flag. InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel); -/// Return the cost associated with a callsite, including paramater passing +/// Return the cost associated with a callsite, including parameter passing /// and the call/return instruction. int getCallsiteCost(CallSite CS, const DataLayout &DL); diff --git a/interpreter/llvm/src/include/llvm/Analysis/InstructionSimplify.h b/interpreter/llvm/src/include/llvm/Analysis/InstructionSimplify.h index bf73e099a2bfd..be0f32ef444a4 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/InstructionSimplify.h +++ b/interpreter/llvm/src/include/llvm/Analysis/InstructionSimplify.h @@ -41,6 +41,7 @@ template class ArrayRef; class AssumptionCache; class DominatorTree; class Instruction; +class ImmutableCallSite; class DataLayout; class FastMathFlags; struct LoopStandardAnalysisResults; @@ -70,174 +71,174 @@ struct SimplifyQuery { Copy.CxtI = I; return Copy; } - }; +}; - // NOTE: the explicit multiple argument versions of these functions are - // deprecated. - // Please use the SimplifyQuery versions in new code. +// NOTE: the explicit multiple argument versions of these functions are +// deprecated. +// Please use the SimplifyQuery versions in new code. - /// Given operands for an Add, fold the result or return null. - Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, +/// Given operands for an Add, fold the result or return null. +Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const SimplifyQuery &Q); - /// Given operands for a Sub, fold the result or return null. - Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const SimplifyQuery &Q); +/// Given operands for a Sub, fold the result or return null. +Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); - /// Given operands for an FAdd, fold the result or return null. - Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operands for an FAdd, fold the result or return null. +Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); - /// Given operands for an FSub, fold the result or return null. - Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operands for an FSub, fold the result or return null. +Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); - /// Given operands for an FMul, fold the result or return null. - Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operands for an FMul, fold the result or return null. +Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); - /// Given operands for a Mul, fold the result or return null. - Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for a Mul, fold the result or return null. +Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for an SDiv, fold the result or return null. - Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for an SDiv, fold the result or return null. +Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for a UDiv, fold the result or return null. - Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for a UDiv, fold the result or return null. +Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for an FDiv, fold the result or return null. - Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operands for an FDiv, fold the result or return null. +Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); - /// Given operands for an SRem, fold the result or return null. - Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for an SRem, fold the result or return null. +Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for a URem, fold the result or return null. - Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for a URem, fold the result or return null. +Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for an FRem, fold the result or return null. - Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); - - /// Given operands for a Shl, fold the result or return null. - Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const SimplifyQuery &Q); - - /// Given operands for a LShr, fold the result or return null. - Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const SimplifyQuery &Q); +/// Given operands for an FRem, fold the result or return null. +Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); - /// Given operands for a AShr, fold the result or return nulll. - Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const SimplifyQuery &Q); +/// Given operands for a Shl, fold the result or return null. +Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q); - /// Given operands for an And, fold the result or return null. - Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for a LShr, fold the result or return null. +Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); - /// Given operands for an Or, fold the result or return null. - Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for a AShr, fold the result or return nulll. +Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); - /// Given operands for an Xor, fold the result or return null. - Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for an And, fold the result or return null. +Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for an ICmpInst, fold the result or return null. - Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const SimplifyQuery &Q); +/// Given operands for an Or, fold the result or return null. +Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for an FCmpInst, fold the result or return null. - Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for an Xor, fold the result or return null. +Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for a SelectInst, fold the result or return null. - Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const SimplifyQuery &Q); +/// Given operands for an ICmpInst, fold the result or return null. +Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); - /// Given operands for a GetElementPtrInst, fold the result or return null. - Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, - const SimplifyQuery &Q); +/// Given operands for an FCmpInst, fold the result or return null. +Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); - /// Given operands for an InsertValueInst, fold the result or return null. - Value *SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef Idxs, - const SimplifyQuery &Q); +/// Given operands for a SelectInst, fold the result or return null. +Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const SimplifyQuery &Q); - /// Given operands for an ExtractValueInst, fold the result or return null. - Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, - const SimplifyQuery &Q); +/// Given operands for a GetElementPtrInst, fold the result or return null. +Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, + const SimplifyQuery &Q); - /// Given operands for an ExtractElementInst, fold the result or return null. - Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, - const SimplifyQuery &Q); +/// Given operands for an InsertValueInst, fold the result or return null. +Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, + const SimplifyQuery &Q); - /// Given operands for a CastInst, fold the result or return null. - Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, - const SimplifyQuery &Q); +/// Given operands for an ExtractValueInst, fold the result or return null. +Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, + const SimplifyQuery &Q); - /// Given operands for a ShuffleVectorInst, fold the result or return null. - Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const SimplifyQuery &Q); +/// Given operands for an ExtractElementInst, fold the result or return null. +Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q); - //=== Helper functions for higher up the class hierarchy. +/// Given operands for a CastInst, fold the result or return null. +Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q); +/// Given operands for a ShuffleVectorInst, fold the result or return null. +Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q); - /// Given operands for a CmpInst, fold the result or return null. - Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const SimplifyQuery &Q); +//=== Helper functions for higher up the class hierarchy. - /// Given operands for a BinaryOperator, fold the result or return null. - Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, +/// Given operands for a CmpInst, fold the result or return null. +Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for an FP BinaryOperator, fold the result or return null. - /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the - /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. - Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); - - /// Given a function and iterators over arguments, fold the result or return - /// null. - Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const SimplifyQuery &Q); - - /// Given a function and set of arguments, fold the result or return null. - Value *SimplifyCall(Value *V, ArrayRef Args, const SimplifyQuery &Q); - - /// See if we can compute a simplified version of this instruction. If not, - /// return null. - Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, - OptimizationRemarkEmitter *ORE = nullptr); - - /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. - /// - /// This first performs a normal RAUW of I with SimpleV. It then recursively - /// attempts to simplify those users updated by the operation. The 'I' - /// instruction must not be equal to the simplified value 'SimpleV'. - /// - /// The function returns true if any simplifications were performed. - bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); - - /// Recursively attempt to simplify an instruction. - /// - /// This routine uses SimplifyInstruction to simplify 'I', and if successful - /// replaces uses of 'I' with the simplified value. It then recurses on each - /// of the users impacted. It returns true if any simplifications were - /// performed. - bool recursivelySimplifyInstruction(Instruction *I, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); - // These helper functions return a SimplifyQuery structure that contains as - // many of the optional analysis we use as are currently valid. This is the - // strongly preferred way of constructing SimplifyQuery in passes. - const SimplifyQuery getBestSimplifyQuery(Pass &, Function &); - template - const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, - Function &); - const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &, - const DataLayout &); +/// Given operands for a BinaryOperator, fold the result or return null. +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for an FP BinaryOperator, fold the result or return null. +/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. +Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); + +/// Given a function and iterators over arguments, fold the result or return +/// null. +Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const SimplifyQuery &Q); + +/// Given a function and set of arguments, fold the result or return null. +Value *SimplifyCall(ImmutableCallSite CS, Value *V, ArrayRef Args, + const SimplifyQuery &Q); + +/// See if we can compute a simplified version of this instruction. If not, +/// return null. +Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE = nullptr); + +/// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. +/// +/// This first performs a normal RAUW of I with SimpleV. It then recursively +/// attempts to simplify those users updated by the operation. The 'I' +/// instruction must not be equal to the simplified value 'SimpleV'. +/// +/// The function returns true if any simplifications were performed. +bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); + +/// Recursively attempt to simplify an instruction. +/// +/// This routine uses SimplifyInstruction to simplify 'I', and if successful +/// replaces uses of 'I' with the simplified value. It then recurses on each +/// of the users impacted. It returns true if any simplifications were +/// performed. +bool recursivelySimplifyInstruction(Instruction *I, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); + +// These helper functions return a SimplifyQuery structure that contains as +// many of the optional analysis we use as are currently valid. This is the +// strongly preferred way of constructing SimplifyQuery in passes. +const SimplifyQuery getBestSimplifyQuery(Pass &, Function &); +template +const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, + Function &); +const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &, + const DataLayout &); } // end namespace llvm #endif diff --git a/interpreter/llvm/src/include/llvm/Analysis/IteratedDominanceFrontier.h b/interpreter/llvm/src/include/llvm/Analysis/IteratedDominanceFrontier.h index af788c818f806..edaf4e9025bc8 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/IteratedDominanceFrontier.h +++ b/interpreter/llvm/src/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -42,11 +42,11 @@ namespace llvm { /// By default, liveness is not used to prune the IDF computation. /// The template parameters should be either BasicBlock* or Inverse, depending on if you want the forward or reverse IDF. -template +template class IDFCalculator { - -public: - IDFCalculator(DominatorTreeBase &DT) : DT(DT), useLiveIn(false) {} + public: + IDFCalculator(DominatorTreeBase &DT) + : DT(DT), useLiveIn(false) {} /// \brief Give the IDF calculator the set of blocks in which the value is /// defined. This is equivalent to the set of starting blocks it should be @@ -84,13 +84,12 @@ class IDFCalculator { void calculate(SmallVectorImpl &IDFBlocks); private: - DominatorTreeBase &DT; - bool useLiveIn; - DenseMap DomLevels; - const SmallPtrSetImpl *LiveInBlocks; - const SmallPtrSetImpl *DefBlocks; + DominatorTreeBase &DT; + bool useLiveIn; + const SmallPtrSetImpl *LiveInBlocks; + const SmallPtrSetImpl *DefBlocks; }; -typedef IDFCalculator ForwardIDFCalculator; -typedef IDFCalculator> ReverseIDFCalculator; +typedef IDFCalculator ForwardIDFCalculator; +typedef IDFCalculator, true> ReverseIDFCalculator; } #endif diff --git a/interpreter/llvm/src/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/interpreter/llvm/src/include/llvm/Analysis/LazyBranchProbabilityInfo.h index 067d7ebfd1f53..e1d404b1ada23 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -24,6 +24,7 @@ namespace llvm { class AnalysisUsage; class Function; class LoopInfo; +class TargetLibraryInfo; /// \brief This is an alternative analysis pass to /// BranchProbabilityInfoWrapperPass. The difference is that with this pass the @@ -55,14 +56,15 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { /// analysis without paying for the overhead if BPI doesn't end up being used. class LazyBranchProbabilityInfo { public: - LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI) - : Calculated(false), F(F), LI(LI) {} + LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI, + const TargetLibraryInfo *TLI) + : Calculated(false), F(F), LI(LI), TLI(TLI) {} /// Retrieve the BPI with the branch probabilities computed. BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI); + BPI.calculate(*F, *LI, TLI); Calculated = true; } return BPI; @@ -77,6 +79,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { bool Calculated; const Function *F; const LoopInfo *LI; + const TargetLibraryInfo *TLI; }; std::unique_ptr LBPI; diff --git a/interpreter/llvm/src/include/llvm/Analysis/LazyCallGraph.h b/interpreter/llvm/src/include/llvm/Analysis/LazyCallGraph.h index ad7f5c80549fc..a025f2275fb42 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/LazyCallGraph.h +++ b/interpreter/llvm/src/include/llvm/Analysis/LazyCallGraph.h @@ -43,6 +43,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -652,17 +653,23 @@ class LazyCallGraph { /// Make an existing internal ref edge into a call edge. /// /// This may form a larger cycle and thus collapse SCCs into TargetN's SCC. - /// If that happens, the deleted SCC pointers are returned. These SCCs are - /// not in a valid state any longer but the pointers will remain valid - /// until destruction of the parent graph instance for the purpose of - /// clearing cached information. + /// If that happens, the optional callback \p MergedCB will be invoked (if + /// provided) on the SCCs being merged away prior to actually performing + /// the merge. Note that this will never include the target SCC as that + /// will be the SCC functions are merged into to resolve the cycle. Once + /// this function returns, these merged SCCs are not in a valid state but + /// the pointers will remain valid until destruction of the parent graph + /// instance for the purpose of clearing cached information. This function + /// also returns 'true' if a cycle was formed and some SCCs merged away as + /// a convenience. /// /// After this operation, both SourceN's SCC and TargetN's SCC may move /// position within this RefSCC's postorder list. Any SCCs merged are /// merged into the TargetN's SCC in order to preserve reachability analyses /// which took place on that SCC. - SmallVector switchInternalEdgeToCall(Node &SourceN, - Node &TargetN); + bool switchInternalEdgeToCall( + Node &SourceN, Node &TargetN, + function_ref MergedSCCs)> MergeCB = {}); /// Make an existing internal call edge between separate SCCs into a ref /// edge. @@ -902,7 +909,7 @@ class LazyCallGraph { /// This sets up the graph and computes all of the entry points of the graph. /// No function definitions are scanned until their nodes in the graph are /// requested during traversal. - LazyCallGraph(Module &M); + LazyCallGraph(Module &M, TargetLibraryInfo &TLI); LazyCallGraph(LazyCallGraph &&G); LazyCallGraph &operator=(LazyCallGraph &&RHS); @@ -960,6 +967,22 @@ class LazyCallGraph { return insertInto(F, N); } + /// Get the sequence of known and defined library functions. + /// + /// These functions, because they are known to LLVM, can have calls + /// introduced out of thin air from arbitrary IR. + ArrayRef getLibFunctions() const { + return LibFunctions.getArrayRef(); + } + + /// Test whether a function is a known and defined library function tracked by + /// the call graph. + /// + /// Because these functions are known to LLVM they are specially modeled in + /// the call graph and even when all IR-level references have been removed + /// remain active and reachable. + bool isLibFunction(Function &F) const { return LibFunctions.count(&F); } + ///@{ /// \name Pre-SCC Mutation API /// @@ -1094,6 +1117,11 @@ class LazyCallGraph { /// These are all of the RefSCCs which have no children. SmallVector LeafRefSCCs; + /// Defined functions that are also known library functions which the + /// optimizer can reason about and therefore might introduce calls to out of + /// thin air. + SmallSetVector LibFunctions; + /// Helper to insert a new function, with an already looked-up entry in /// the NodeMap. Node &insertInto(Function &F, Node *&MappedN); @@ -1210,8 +1238,8 @@ class LazyCallGraphAnalysis : public AnalysisInfoMixin { /// /// This just builds the set of entry points to the call graph. The rest is /// built lazily as it is walked. - LazyCallGraph run(Module &M, ModuleAnalysisManager &) { - return LazyCallGraph(M); + LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) { + return LazyCallGraph(M, AM.getResult(M)); } }; diff --git a/interpreter/llvm/src/include/llvm/Analysis/LazyValueInfo.h b/interpreter/llvm/src/include/llvm/Analysis/LazyValueInfo.h index 49e088e533dc1..787c88cc6ec1a 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/LazyValueInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/LazyValueInfo.h @@ -93,6 +93,13 @@ class LazyValueInfo { Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI = nullptr); + /// Return the ConstantRage constraint that is known to hold for the + /// specified value on the specified edge. This may be only be called + /// on integer-typed Values. + ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB, + Instruction *CxtI = nullptr); + /// Inform the analysis cache that we have threaded an edge from /// PredBB to OldSucc to be from PredBB to NewSucc instead. void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc); @@ -100,8 +107,11 @@ class LazyValueInfo { /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); - /// Print the \LazyValueInfoCache. - void printCache(Function &F, raw_ostream &OS); + /// Print the \LazyValueInfo Analysis. + /// We pass in the DTree that is required for identifying which basic blocks + /// we can solve/print for, in the LVIPrinter. The DT is optional + /// in LVI, so we need to pass it here as an argument. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS); // For old PM pass. Delete once LazyValueInfoWrapperPass is gone. void releaseMemory(); diff --git a/interpreter/llvm/src/include/llvm/Analysis/Loads.h b/interpreter/llvm/src/include/llvm/Analysis/Loads.h index a59c1f88e229f..f110c28bfc6d2 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/Loads.h +++ b/interpreter/llvm/src/include/llvm/Analysis/Loads.h @@ -39,6 +39,15 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); +/// Returns true if V is always dereferenceable for Size byte with alignment +/// greater or equal than requested. If the context instruction is specified +/// performs context-sensitive analysis and returns true if the pointer is +/// dereferenceable at the specified instruction. +bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const APInt &Size, const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr); + /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive diff --git a/interpreter/llvm/src/include/llvm/Analysis/LoopInfo.h b/interpreter/llvm/src/include/llvm/Analysis/LoopInfo.h index 096df1e421a77..70ce9a8705175 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/LoopInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/LoopInfo.h @@ -56,7 +56,8 @@ class Loop; class MDNode; class PHINode; class raw_ostream; -template class DominatorTreeBase; +template +class DominatorTreeBase; template class LoopInfoBase; template class LoopBase; @@ -663,12 +664,12 @@ class LoopInfoBase { } /// Create the loop forest using a stable algorithm. - void analyze(const DominatorTreeBase &DomTree); + void analyze(const DominatorTreeBase &DomTree); // Debugging void print(raw_ostream &OS) const; - void verify(const DominatorTreeBase &DomTree) const; + void verify(const DominatorTreeBase &DomTree) const; }; // Implementation in LoopInfoImpl.h @@ -683,7 +684,7 @@ class LoopInfo : public LoopInfoBase { LoopInfo(const LoopInfo &) = delete; public: LoopInfo() {} - explicit LoopInfo(const DominatorTreeBase &DomTree); + explicit LoopInfo(const DominatorTreeBase &DomTree); LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast(Arg))) {} LoopInfo &operator=(LoopInfo &&RHS) { diff --git a/interpreter/llvm/src/include/llvm/Analysis/LoopInfoImpl.h b/interpreter/llvm/src/include/llvm/Analysis/LoopInfoImpl.h index 249fa572c024f..e9177e68ed77d 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/LoopInfoImpl.h +++ b/interpreter/llvm/src/include/llvm/Analysis/LoopInfoImpl.h @@ -17,8 +17,8 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" @@ -91,8 +91,9 @@ getExitEdges(SmallVectorImpl &ExitEdges) const { /// getLoopPreheader - If there is a preheader for this loop, return it. A /// loop has a preheader if there is only one edge to the header of the loop -/// from outside of the loop. If this is the case, the block branching to the -/// header of the loop is the preheader node. +/// from outside of the loop and it is legal to hoist instructions into the +/// predecessor. If this is the case, the block branching to the header of the +/// loop is the preheader node. /// /// This method returns null if there is no preheader for the loop. /// @@ -102,6 +103,10 @@ BlockT *LoopBase::getLoopPreheader() const { BlockT *Out = getLoopPredecessor(); if (!Out) return nullptr; + // Make sure we are allowed to hoist instructions into the predecessor. + if (!Out->isLegalToHoistInto()) + return nullptr; + // Make sure there is only one exit out of the preheader. typedef GraphTraits BlockTraits; typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); @@ -335,10 +340,10 @@ void LoopBase::print(raw_ostream &OS, unsigned Depth, /// Discover a subloop with the specified backedges such that: All blocks within /// this loop are mapped to this loop or a subloop. And all subloops within this /// loop have their parent loop set to this loop or a subloop. -template -static void discoverAndMapSubloop(LoopT *L, ArrayRef Backedges, - LoopInfoBase *LI, - const DominatorTreeBase &DomTree) { +template +static void discoverAndMapSubloop( + LoopT *L, ArrayRef Backedges, LoopInfoBase *LI, + const DomTreeBase &DomTree) { typedef GraphTraits > InvBlockTraits; unsigned NumBlocks = 0; @@ -457,10 +462,9 @@ void PopulateLoopsDFS::insertIntoLoop(BlockT *Block) { /// /// The Block vectors are inclusive, so step 3 requires loop-depth number of /// insertions per block. -template -void LoopInfoBase:: -analyze(const DominatorTreeBase &DomTree) { - +template +void LoopInfoBase::analyze( + const DomTreeBase &DomTree) { // Postorder traversal of the dominator tree. const DomTreeNodeBase *DomRoot = DomTree.getRootNode(); for (auto DomNode : post_order(DomRoot)) { @@ -602,7 +606,7 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL, template void LoopInfoBase::verify( - const DominatorTreeBase &DomTree) const { + const DomTreeBase &DomTree) const { DenseSet Loops; for (iterator I = begin(), E = end(); I != E; ++I) { assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); diff --git a/interpreter/llvm/src/include/llvm/Analysis/LoopPass.h b/interpreter/llvm/src/include/llvm/Analysis/LoopPass.h index 496ae189e57ba..75e7688bbdc26 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/LoopPass.h +++ b/interpreter/llvm/src/include/llvm/Analysis/LoopPass.h @@ -126,9 +126,8 @@ class LPPassManager : public FunctionPass, public PMDataManager { } public: - // Add a new loop into the loop queue as a child of the given parent, or at - // the top level if \c ParentLoop is null. - Loop &addLoop(Loop *ParentLoop); + // Add a new loop into the loop queue. + void addLoop(Loop &L); //===--------------------------------------------------------------------===// /// SimpleAnalysis - Provides simple interface to update analysis info diff --git a/interpreter/llvm/src/include/llvm/Analysis/MemoryBuiltins.h b/interpreter/llvm/src/include/llvm/Analysis/MemoryBuiltins.h index 60dafccd84bdf..23ab372703eea 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/MemoryBuiltins.h +++ b/interpreter/llvm/src/include/llvm/Analysis/MemoryBuiltins.h @@ -224,6 +224,9 @@ class ObjectSizeOffsetVisitor SizeOffsetType visitSelectInst(SelectInst &I); SizeOffsetType visitUndefValue(UndefValue&); SizeOffsetType visitInstruction(Instruction &I); + +private: + bool CheckedZextOrTrunc(APInt &I); }; typedef std::pair SizeOffsetEvalType; diff --git a/interpreter/llvm/src/include/llvm/Analysis/MemoryDependenceAnalysis.h b/interpreter/llvm/src/include/llvm/Analysis/MemoryDependenceAnalysis.h index a401887016c94..1dbbf6cc6addf 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/interpreter/llvm/src/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -15,8 +15,8 @@ #define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/PointerEmbeddedInt.h" +#include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/BasicBlock.h" diff --git a/interpreter/llvm/src/include/llvm/Analysis/MemorySSA.h b/interpreter/llvm/src/include/llvm/Analysis/MemorySSA.h index db31ae9f4f109..5cec2bfb0cfbf 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/MemorySSA.h +++ b/interpreter/llvm/src/include/llvm/Analysis/MemorySSA.h @@ -84,6 +84,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedUser.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" @@ -127,7 +128,7 @@ using const_memoryaccess_def_iterator = // \brief The base for all memory accesses. All memory accesses in a block are // linked together using an intrusive list. class MemoryAccess - : public User, + : public DerivedUser, public ilist_node>, public ilist_node> { public: @@ -138,22 +139,20 @@ class MemoryAccess // Methods for support type inquiry through isa, cast, and // dyn_cast - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { unsigned ID = V->getValueID(); return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal; } MemoryAccess(const MemoryAccess &) = delete; MemoryAccess &operator=(const MemoryAccess &) = delete; - ~MemoryAccess() override; - void *operator new(size_t, unsigned) = delete; void *operator new(size_t) = delete; BasicBlock *getBlock() const { return Block; } - virtual void print(raw_ostream &OS) const = 0; - virtual void dump() const; + void print(raw_ostream &OS) const; + void dump() const; /// \brief The user iterators for a memory access typedef user_iterator iterator; @@ -207,11 +206,12 @@ class MemoryAccess /// \brief Used for debugging and tracking things about MemoryAccesses. /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. - virtual unsigned getID() const = 0; + inline unsigned getID() const; - MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB, - unsigned NumOperands) - : User(Type::getVoidTy(C), Vty, nullptr, NumOperands), Block(BB) {} + MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue, + BasicBlock *BB, unsigned NumOperands) + : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue), + Block(BB) {} private: BasicBlock *Block; @@ -231,7 +231,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) { /// MemoryDef instead. class MemoryUseOrDef : public MemoryAccess { public: - void *operator new(size_t, unsigned) = delete; void *operator new(size_t) = delete; DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); @@ -242,27 +241,27 @@ class MemoryUseOrDef : public MemoryAccess { /// \brief Get the access that produces the memory state used by this Use. MemoryAccess *getDefiningAccess() const { return getOperand(0); } - static inline bool classof(const Value *MA) { + static bool classof(const Value *MA) { return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal; } // Sadly, these have to be public because they are needed in some of the // iterators. - virtual bool isOptimized() const = 0; - virtual MemoryAccess *getOptimized() const = 0; - virtual void setOptimized(MemoryAccess *) = 0; + inline bool isOptimized() const; + inline MemoryAccess *getOptimized() const; + inline void setOptimized(MemoryAccess *); /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to /// be rewalked by the walker if necessary. /// This really should only be called by tests. - virtual void resetOptimized() = 0; + inline void resetOptimized(); protected: friend class MemorySSA; friend class MemorySSAUpdater; MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty, - Instruction *MI, BasicBlock *BB) - : MemoryAccess(C, Vty, BB, 1), MemoryInst(MI) { + DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB) + : MemoryAccess(C, Vty, DeleteValue, BB, 1), MemoryInst(MI) { setDefiningAccess(DMA); } void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) { @@ -292,42 +291,40 @@ class MemoryUse final : public MemoryUseOrDef { DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) - : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {} + : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB), + OptimizedID(0) {} // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - static inline bool classof(const Value *MA) { + static bool classof(const Value *MA) { return MA->getValueID() == MemoryUseVal; } - void print(raw_ostream &OS) const override; + void print(raw_ostream &OS) const; - virtual void setOptimized(MemoryAccess *DMA) override { + void setOptimized(MemoryAccess *DMA) { OptimizedID = DMA->getID(); setOperand(0, DMA); } - virtual bool isOptimized() const override { + bool isOptimized() const { return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); } - virtual MemoryAccess *getOptimized() const override { + MemoryAccess *getOptimized() const { return getDefiningAccess(); } - virtual void resetOptimized() override { + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } protected: friend class MemorySSA; - unsigned getID() const override { - llvm_unreachable("MemoryUses do not have IDs"); - } - private: + static void deleteMe(DerivedUser *Self); + unsigned int OptimizedID; }; @@ -350,38 +347,38 @@ class MemoryDef final : public MemoryUseOrDef { MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, unsigned Ver) - : MemoryUseOrDef(C, DMA, MemoryDefVal, MI, BB), ID(Ver), - Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {} + : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB), + ID(Ver), Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {} // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - static inline bool classof(const Value *MA) { + static bool classof(const Value *MA) { return MA->getValueID() == MemoryDefVal; } - virtual void setOptimized(MemoryAccess *MA) override { + void setOptimized(MemoryAccess *MA) { Optimized = MA; OptimizedID = getDefiningAccess()->getID(); } - virtual MemoryAccess *getOptimized() const override { return Optimized; } - virtual bool isOptimized() const override { + MemoryAccess *getOptimized() const { return Optimized; } + bool isOptimized() const { return getOptimized() && getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); } - virtual void resetOptimized() override { + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } - void print(raw_ostream &OS) const override; + void print(raw_ostream &OS) const; -protected: friend class MemorySSA; - unsigned getID() const override { return ID; } + unsigned getID() const { return ID; } private: + static void deleteMe(DerivedUser *Self); + const unsigned ID; MemoryAccess *Optimized; unsigned int OptimizedID; @@ -432,12 +429,11 @@ class MemoryPhi final : public MemoryAccess { DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0) - : MemoryAccess(C, MemoryPhiVal, BB, 0), ID(Ver), ReservedSpace(NumPreds) { + : MemoryAccess(C, MemoryPhiVal, deleteMe, BB, 0), ID(Ver), + ReservedSpace(NumPreds) { allocHungoffUses(ReservedSpace); } - void *operator new(size_t, unsigned) = delete; - // Block iterator interface. This provides access to the list of incoming // basic blocks, which parallels the list of incoming values. typedef BasicBlock **block_iterator; @@ -530,11 +526,13 @@ class MemoryPhi final : public MemoryAccess { return getIncomingValue(Idx); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == MemoryPhiVal; } - void print(raw_ostream &OS) const override; + void print(raw_ostream &OS) const; + + unsigned getID() const { return ID; } protected: friend class MemorySSA; @@ -546,8 +544,6 @@ class MemoryPhi final : public MemoryAccess { User::allocHungoffUses(N, /* IsPhi */ true); } - unsigned getID() const final { return ID; } - private: // For debugging only const unsigned ID; @@ -561,8 +557,45 @@ class MemoryPhi final : public MemoryAccess { ReservedSpace = std::max(E + E / 2, 2u); growHungoffUses(ReservedSpace, /* IsPhi */ true); } + + static void deleteMe(DerivedUser *Self); }; +inline unsigned MemoryAccess::getID() const { + assert((isa(this) || isa(this)) && + "only memory defs and phis have ids"); + if (const auto *MD = dyn_cast(this)) + return MD->getID(); + return cast(this)->getID(); +} + +inline bool MemoryUseOrDef::isOptimized() const { + if (const auto *MD = dyn_cast(this)) + return MD->isOptimized(); + return cast(this)->isOptimized(); +} + +inline MemoryAccess *MemoryUseOrDef::getOptimized() const { + if (const auto *MD = dyn_cast(this)) + return MD->getOptimized(); + return cast(this)->getOptimized(); +} + +inline void MemoryUseOrDef::setOptimized(MemoryAccess *MA) { + if (auto *MD = dyn_cast(this)) + MD->setOptimized(MA); + else + cast(this)->setOptimized(MA); +} + +inline void MemoryUseOrDef::resetOptimized() { + if (auto *MD = dyn_cast(this)) + MD->resetOptimized(); + else + cast(this)->resetOptimized(); +} + + template <> struct OperandTraits : public HungoffOperandTraits<2> {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess) diff --git a/interpreter/llvm/src/include/llvm/Analysis/MemorySSAUpdater.h b/interpreter/llvm/src/include/llvm/Analysis/MemorySSAUpdater.h index d30eeeaa95b6a..b36b2f01dac62 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/MemorySSAUpdater.h +++ b/interpreter/llvm/src/include/llvm/Analysis/MemorySSAUpdater.h @@ -34,6 +34,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" @@ -45,7 +46,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Analysis/MemorySSA.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/interpreter/llvm/src/include/llvm/Analysis/ObjCARCAnalysisUtils.h index 5f4d8ecbbfbbe..e80412a305641 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -23,8 +23,8 @@ #ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H #define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/Passes.h" diff --git a/interpreter/llvm/src/include/llvm/Analysis/ObjCARCInstKind.h b/interpreter/llvm/src/include/llvm/Analysis/ObjCARCInstKind.h index 3b37ddf78f587..02ff035782388 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ObjCARCInstKind.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ObjCARCInstKind.h @@ -10,8 +10,8 @@ #ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H #define LLVM_ANALYSIS_OBJCARCINSTKIND_H -#include "llvm/IR/Instructions.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" namespace llvm { namespace objcarc { diff --git a/interpreter/llvm/src/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/interpreter/llvm/src/include/llvm/Analysis/OptimizationDiagnosticInfo.h index edd9140a3493b..64dd0737a1123 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/OptimizationDiagnosticInfo.h @@ -34,7 +34,7 @@ class Value; /// /// It allows reporting when optimizations are performed and when they are not /// along with the reasons for it. Hotness information of the corresponding -/// code region can be included in the remark if DiagnosticHotnessRequested is +/// code region can be included in the remark if DiagnosticsHotnessRequested is /// enabled in the LLVM context. class OptimizationRemarkEmitter { public: @@ -45,10 +45,10 @@ class OptimizationRemarkEmitter { /// analysis pass). /// /// Note that this ctor has a very different cost depending on whether - /// F->getContext().getDiagnosticHotnessRequested() is on or not. If it's off + /// F->getContext().getDiagnosticsHotnessRequested() is on or not. If it's off /// the operation is free. /// - /// Whereas if DiagnosticHotnessRequested is on, it is fairly expensive + /// Whereas if DiagnosticsHotnessRequested is on, it is fairly expensive /// operation since BFI and all its required analyses are computed. This is /// for example useful for CGSCC passes that can't use function analyses /// passes in the old PM. diff --git a/interpreter/llvm/src/include/llvm/Analysis/OrderedBasicBlock.h b/interpreter/llvm/src/include/llvm/Analysis/OrderedBasicBlock.h index 5aa813eb48324..2e716af1f60dd 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/OrderedBasicBlock.h +++ b/interpreter/llvm/src/include/llvm/Analysis/OrderedBasicBlock.h @@ -58,6 +58,7 @@ class OrderedBasicBlock { /// comes before \p B in \p BB. This is a simplification that considers /// cached instruction positions and ignores other basic blocks, being /// only relevant to compare relative instructions positions inside \p BB. + /// Returns false for A == B. bool dominates(const Instruction *A, const Instruction *B); }; diff --git a/interpreter/llvm/src/include/llvm/Analysis/PostDominators.h b/interpreter/llvm/src/include/llvm/Analysis/PostDominators.h index 94ee3b03bb86f..17f2e8eaf4a25 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/PostDominators.h +++ b/interpreter/llvm/src/include/llvm/Analysis/PostDominators.h @@ -22,10 +22,8 @@ namespace llvm { /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to /// compute the post-dominator tree. /// -struct PostDominatorTree : public DominatorTreeBase { - typedef DominatorTreeBase Base; - - PostDominatorTree() : DominatorTreeBase(true) {} +struct PostDominatorTree : public PostDomTreeBase { + typedef PostDomTreeBase Base; /// Handle invalidation explicitly. bool invalidate(Function &F, const PreservedAnalyses &PA, diff --git a/interpreter/llvm/src/include/llvm/Analysis/ProfileSummaryInfo.h b/interpreter/llvm/src/include/llvm/Analysis/ProfileSummaryInfo.h index c5f97083af4d0..6aaabe1d18890 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ProfileSummaryInfo.h @@ -55,6 +55,21 @@ class ProfileSummaryInfo { ProfileSummaryInfo(ProfileSummaryInfo &&Arg) : M(Arg.M), Summary(std::move(Arg.Summary)) {} + /// \brief Returns true if profile summary is available. + bool hasProfileSummary() { return computeSummary(); } + + /// \brief Returns true if module \c M has sample profile. + bool hasSampleProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Sample; + } + + /// \brief Returns true if module \c M has instrumentation profile. + bool hasInstrumentationProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Instr; + } + /// Handle the invalidation of this information. /// /// When used as a result of \c ProfileSummaryAnalysis this method will be diff --git a/interpreter/llvm/src/include/llvm/Analysis/RegionInfo.h b/interpreter/llvm/src/include/llvm/Analysis/RegionInfo.h index 16ee07fa31771..2e34928b28adf 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/RegionInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/RegionInfo.h @@ -37,18 +37,38 @@ #ifndef LLVM_ANALYSIS_REGIONINFO_H #define LLVM_ANALYSIS_REGIONINFO_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/IR/CFG.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include +#include #include #include #include +#include +#include +#include namespace llvm { +class DominanceFrontier; +class DominatorTree; +class Loop; +class LoopInfo; +struct PostDominatorTree; +class Region; +template class RegionBase; +class RegionInfo; +template class RegionInfoBase; +class RegionNode; + // Class to be specialized for different users of RegionInfo // (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to // pass around an unreasonable number of template parameters. @@ -59,37 +79,23 @@ struct RegionTraits { // RegionT // RegionNodeT // RegionInfoT - typedef typename FuncT_::UnknownRegionTypeError BrokenT; + using BrokenT = typename FuncT_::UnknownRegionTypeError; }; -class DominatorTree; -class DominanceFrontier; -class Loop; -class LoopInfo; -struct PostDominatorTree; -class raw_ostream; -class Region; -template -class RegionBase; -class RegionNode; -class RegionInfo; -template -class RegionInfoBase; - template <> struct RegionTraits { - typedef Function FuncT; - typedef BasicBlock BlockT; - typedef Region RegionT; - typedef RegionNode RegionNodeT; - typedef RegionInfo RegionInfoT; - typedef DominatorTree DomTreeT; - typedef DomTreeNode DomTreeNodeT; - typedef DominanceFrontier DomFrontierT; - typedef PostDominatorTree PostDomTreeT; - typedef Instruction InstT; - typedef Loop LoopT; - typedef LoopInfo LoopInfoT; + using FuncT = Function; + using BlockT = BasicBlock; + using RegionT = Region; + using RegionNodeT = RegionNode; + using RegionInfoT = RegionInfo; + using DomTreeT = DominatorTree; + using DomTreeNodeT = DomTreeNode; + using DomFrontierT = DominanceFrontier; + using PostDomTreeT = PostDominatorTree; + using InstT = Instruction; + using LoopT = Loop; + using LoopInfoT = LoopInfo; static unsigned getNumSuccessors(BasicBlock *BB) { return BB->getTerminator()->getNumSuccessors(); @@ -113,13 +119,10 @@ class RegionNodeBase { friend class RegionBase; public: - typedef typename Tr::BlockT BlockT; - typedef typename Tr::RegionT RegionT; + using BlockT = typename Tr::BlockT; + using RegionT = typename Tr::RegionT; private: - RegionNodeBase(const RegionNodeBase &) = delete; - const RegionNodeBase &operator=(const RegionNodeBase &) = delete; - /// This is the entry basic block that starts this region node. If this is a /// BasicBlock RegionNode, then entry is just the basic block, that this /// RegionNode represents. Otherwise it is the entry of this (Sub)RegionNode. @@ -150,6 +153,9 @@ class RegionNodeBase { : entry(Entry, isSubRegion), parent(Parent) {} public: + RegionNodeBase(const RegionNodeBase &) = delete; + RegionNodeBase &operator=(const RegionNodeBase &) = delete; + /// @brief Get the parent Region of this RegionNode. /// /// The parent Region is the Region this RegionNode belongs to. If for @@ -247,24 +253,22 @@ class RegionNodeBase { /// tree, the second one creates a graphical representation using graphviz. template class RegionBase : public RegionNodeBase { - typedef typename Tr::FuncT FuncT; - typedef typename Tr::BlockT BlockT; - typedef typename Tr::RegionInfoT RegionInfoT; - typedef typename Tr::RegionT RegionT; - typedef typename Tr::RegionNodeT RegionNodeT; - typedef typename Tr::DomTreeT DomTreeT; - typedef typename Tr::LoopT LoopT; - typedef typename Tr::LoopInfoT LoopInfoT; - typedef typename Tr::InstT InstT; - - typedef GraphTraits BlockTraits; - typedef GraphTraits> InvBlockTraits; - typedef typename BlockTraits::ChildIteratorType SuccIterTy; - typedef typename InvBlockTraits::ChildIteratorType PredIterTy; - friend class RegionInfoBase; - RegionBase(const RegionBase &) = delete; - const RegionBase &operator=(const RegionBase &) = delete; + + using FuncT = typename Tr::FuncT; + using BlockT = typename Tr::BlockT; + using RegionInfoT = typename Tr::RegionInfoT; + using RegionT = typename Tr::RegionT; + using RegionNodeT = typename Tr::RegionNodeT; + using DomTreeT = typename Tr::DomTreeT; + using LoopT = typename Tr::LoopT; + using LoopInfoT = typename Tr::LoopInfoT; + using InstT = typename Tr::InstT; + + using BlockTraits = GraphTraits; + using InvBlockTraits = GraphTraits>; + using SuccIterTy = typename BlockTraits::ChildIteratorType; + using PredIterTy = typename InvBlockTraits::ChildIteratorType; // Information necessary to manage this Region. RegionInfoT *RI; @@ -274,12 +278,12 @@ class RegionBase : public RegionNodeBase { // (The entry BasicBlock is part of RegionNode) BlockT *exit; - typedef std::vector> RegionSet; + using RegionSet = std::vector>; // The subregions of this region. RegionSet children; - typedef std::map> BBNodeMapT; + using BBNodeMapT = std::map>; // Save the BasicBlock RegionNodes that are element of this Region. mutable BBNodeMapT BBNodeMap; @@ -308,6 +312,9 @@ class RegionBase : public RegionNodeBase { RegionBase(BlockT *Entry, BlockT *Exit, RegionInfoT *RI, DomTreeT *DT, RegionT *Parent = nullptr); + RegionBase(const RegionBase &) = delete; + RegionBase &operator=(const RegionBase &) = delete; + /// Delete the Region and all its subregions. ~RegionBase(); @@ -543,8 +550,8 @@ class RegionBase : public RegionNodeBase { /// /// These iterators iterator over all subregions of this Region. //@{ - typedef typename RegionSet::iterator iterator; - typedef typename RegionSet::const_iterator const_iterator; + using iterator = typename RegionSet::iterator; + using const_iterator = typename RegionSet::const_iterator; iterator begin() { return children.begin(); } iterator end() { return children.end(); } @@ -563,12 +570,13 @@ class RegionBase : public RegionNodeBase { class block_iterator_wrapper : public df_iterator< typename std::conditional::type *> { - typedef df_iterator< - typename std::conditional::type *> super; + using super = + df_iterator< + typename std::conditional::type *>; public: - typedef block_iterator_wrapper Self; - typedef typename super::value_type value_type; + using Self = block_iterator_wrapper; + using value_type = typename super::value_type; // Construct the begin iterator. block_iterator_wrapper(value_type Entry, value_type Exit) @@ -592,8 +600,8 @@ class RegionBase : public RegionNodeBase { } }; - typedef block_iterator_wrapper block_iterator; - typedef block_iterator_wrapper const_block_iterator; + using block_iterator = block_iterator_wrapper; + using const_block_iterator = block_iterator_wrapper; block_iterator block_begin() { return block_iterator(getEntry(), getExit()); } @@ -604,8 +612,8 @@ class RegionBase : public RegionNodeBase { } const_block_iterator block_end() const { return const_block_iterator(); } - typedef iterator_range block_range; - typedef iterator_range const_block_range; + using block_range = iterator_range; + using const_block_range = iterator_range; /// @brief Returns a range view of the basic blocks in the region. inline block_range blocks() { @@ -626,14 +634,14 @@ class RegionBase : public RegionNodeBase { /// are direct children of this Region. It does not iterate over any /// RegionNodes that are also element of a subregion of this Region. //@{ - typedef df_iterator, - false, GraphTraits> - element_iterator; + using element_iterator = + df_iterator, false, + GraphTraits>; - typedef df_iterator, false, - GraphTraits> - const_element_iterator; + using const_element_iterator = + df_iterator, false, + GraphTraits>; element_iterator element_begin(); element_iterator element_end(); @@ -661,29 +669,26 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RegionNodeBase &Node); /// Tree. template class RegionInfoBase { - typedef typename Tr::BlockT BlockT; - typedef typename Tr::FuncT FuncT; - typedef typename Tr::RegionT RegionT; - typedef typename Tr::RegionInfoT RegionInfoT; - typedef typename Tr::DomTreeT DomTreeT; - typedef typename Tr::DomTreeNodeT DomTreeNodeT; - typedef typename Tr::PostDomTreeT PostDomTreeT; - typedef typename Tr::DomFrontierT DomFrontierT; - typedef GraphTraits BlockTraits; - typedef GraphTraits> InvBlockTraits; - typedef typename BlockTraits::ChildIteratorType SuccIterTy; - typedef typename InvBlockTraits::ChildIteratorType PredIterTy; - friend class RegionInfo; friend class MachineRegionInfo; - typedef DenseMap BBtoBBMap; - typedef DenseMap BBtoRegionMap; - RegionInfoBase(); - virtual ~RegionInfoBase(); + using BlockT = typename Tr::BlockT; + using FuncT = typename Tr::FuncT; + using RegionT = typename Tr::RegionT; + using RegionInfoT = typename Tr::RegionInfoT; + using DomTreeT = typename Tr::DomTreeT; + using DomTreeNodeT = typename Tr::DomTreeNodeT; + using PostDomTreeT = typename Tr::PostDomTreeT; + using DomFrontierT = typename Tr::DomFrontierT; + using BlockTraits = GraphTraits; + using InvBlockTraits = GraphTraits>; + using SuccIterTy = typename BlockTraits::ChildIteratorType; + using PredIterTy = typename InvBlockTraits::ChildIteratorType; + + using BBtoBBMap = DenseMap; + using BBtoRegionMap = DenseMap; - RegionInfoBase(const RegionInfoBase &) = delete; - const RegionInfoBase &operator=(const RegionInfoBase &) = delete; + RegionInfoBase(); RegionInfoBase(RegionInfoBase &&Arg) : DT(std::move(Arg.DT)), PDT(std::move(Arg.PDT)), DF(std::move(Arg.DF)), @@ -691,6 +696,7 @@ class RegionInfoBase { BBtoRegion(std::move(Arg.BBtoRegion)) { Arg.wipe(); } + RegionInfoBase &operator=(RegionInfoBase &&RHS) { DT = std::move(RHS.DT); PDT = std::move(RHS.PDT); @@ -701,12 +707,14 @@ class RegionInfoBase { return *this; } + virtual ~RegionInfoBase(); + DomTreeT *DT; PostDomTreeT *PDT; DomFrontierT *DF; /// The top level region. - RegionT *TopLevelRegion; + RegionT *TopLevelRegion = nullptr; /// Map every BB to the smallest region, that contains BB. BBtoRegionMap BBtoRegion; @@ -785,6 +793,9 @@ class RegionInfoBase { void calculate(FuncT &F); public: + RegionInfoBase(const RegionInfoBase &) = delete; + RegionInfoBase &operator=(const RegionInfoBase &) = delete; + static bool VerifyRegionInfo; static typename RegionT::PrintStyle printStyle; @@ -887,21 +898,22 @@ class Region : public RegionBase> { class RegionInfo : public RegionInfoBase> { public: - typedef RegionInfoBase> Base; + using Base = RegionInfoBase>; explicit RegionInfo(); - ~RegionInfo() override; - RegionInfo(RegionInfo &&Arg) : Base(std::move(static_cast(Arg))) { updateRegionTree(*this, TopLevelRegion); } + RegionInfo &operator=(RegionInfo &&RHS) { Base::operator=(std::move(static_cast(RHS))); updateRegionTree(*this, TopLevelRegion); return *this; } + ~RegionInfo() override; + /// Handle invalidation explicitly. bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &); @@ -931,8 +943,8 @@ class RegionInfoPass : public FunctionPass { public: static char ID; - explicit RegionInfoPass(); + explicit RegionInfoPass(); ~RegionInfoPass() override; RegionInfo &getRegionInfo() { return RI; } @@ -953,10 +965,11 @@ class RegionInfoPass : public FunctionPass { /// \brief Analysis pass that exposes the \c RegionInfo for a function. class RegionInfoAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; + static AnalysisKey Key; public: - typedef RegionInfo Result; + using Result = RegionInfo; RegionInfo run(Function &F, FunctionAnalysisManager &AM); }; @@ -967,6 +980,7 @@ class RegionInfoPrinterPass : public PassInfoMixin { public: explicit RegionInfoPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; @@ -995,8 +1009,8 @@ RegionNodeBase>::getNodeAs() const { template inline raw_ostream &operator<<(raw_ostream &OS, const RegionNodeBase &Node) { - typedef typename Tr::BlockT BlockT; - typedef typename Tr::RegionT RegionT; + using BlockT = typename Tr::BlockT; + using RegionT = typename Tr::RegionT; if (Node.isSubRegion()) return OS << Node.template getNodeAs()->getNameStr(); @@ -1008,5 +1022,6 @@ extern template class RegionBase>; extern template class RegionNodeBase>; extern template class RegionInfoBase>; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_REGIONINFO_H diff --git a/interpreter/llvm/src/include/llvm/Analysis/RegionInfoImpl.h b/interpreter/llvm/src/include/llvm/Analysis/RegionInfoImpl.h index a16c534484b35..cd4ec0a03a9ee 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/RegionInfoImpl.h +++ b/interpreter/llvm/src/include/llvm/Analysis/RegionInfoImpl.h @@ -12,7 +12,11 @@ #ifndef LLVM_ANALYSIS_REGIONINFOIMPL_H #define LLVM_ANALYSIS_REGIONINFOIMPL_H +#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" @@ -20,14 +24,20 @@ #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include +#include #include +#include #include - -namespace llvm { +#include +#include +#include #define DEBUG_TYPE "region" +namespace llvm { + //===----------------------------------------------------------------------===// /// RegionBase Implementation template @@ -303,7 +313,8 @@ RegionBase::element_end() const { template typename Tr::RegionT *RegionBase::getSubRegionNode(BlockT *BB) const { - typedef typename Tr::RegionT RegionT; + using RegionT = typename Tr::RegionT; + RegionT *R = RI->getRegionFor(BB); if (!R || R == this) @@ -330,7 +341,8 @@ typename Tr::RegionNodeT *RegionBase::getBBNode(BlockT *BB) const { if (at == BBNodeMap.end()) { auto Deconst = const_cast *>(this); typename BBNodeMapT::value_type V = { - BB, make_unique(static_cast(Deconst), BB)}; + BB, + llvm::make_unique(static_cast(Deconst), BB)}; at = BBNodeMap.insert(std::move(V)).first; } return at->second.get(); @@ -357,10 +369,10 @@ void RegionBase::transferChildrenTo(RegionT *To) { template void RegionBase::addSubRegion(RegionT *SubRegion, bool moveChildren) { assert(!SubRegion->parent && "SubRegion already has a parent!"); - assert(find_if(*this, - [&](const std::unique_ptr &R) { - return R.get() == SubRegion; - }) == children.end() && + assert(llvm::find_if(*this, + [&](const std::unique_ptr &R) { + return R.get() == SubRegion; + }) == children.end() && "Subregion already exists!"); SubRegion->parent = static_cast(this); @@ -402,7 +414,7 @@ typename Tr::RegionT *RegionBase::removeSubRegion(RegionT *Child) { assert(Child->parent == this && "Child is not a child of this region!"); Child->parent = nullptr; typename RegionSet::iterator I = - find_if(children, [&](const std::unique_ptr &R) { + llvm::find_if(children, [&](const std::unique_ptr &R) { return R.get() == Child; }); assert(I != children.end() && "Region does not exit. Unable to remove."); @@ -505,8 +517,7 @@ void RegionBase::clearNodeCache() { // template -RegionInfoBase::RegionInfoBase() - : TopLevelRegion(nullptr) {} +RegionInfoBase::RegionInfoBase() = default; template RegionInfoBase::~RegionInfoBase() { @@ -543,7 +554,8 @@ bool RegionInfoBase::isCommonDomFrontier(BlockT *BB, BlockT *entry, template bool RegionInfoBase::isRegion(BlockT *entry, BlockT *exit) const { assert(entry && exit && "entry and exit must not be null!"); - typedef typename DomFrontierT::DomSetType DST; + + using DST = typename DomFrontierT::DomSetType; DST *entrySuccs = &DF->find(entry)->second; @@ -689,7 +701,8 @@ void RegionInfoBase::findRegionsWithEntry(BlockT *entry, template void RegionInfoBase::scanForRegions(FuncT &F, BBtoBBMap *ShortCut) { - typedef typename std::add_pointer::type FuncPtrT; + using FuncPtrT = typename std::add_pointer::type; + BlockT *entry = GraphTraits::getEntryNode(&F); DomTreeNodeT *N = DT->getNode(entry); @@ -876,7 +889,7 @@ RegionInfoBase::getCommonRegion(SmallVectorImpl &BBs) const { template void RegionInfoBase::calculate(FuncT &F) { - typedef typename std::add_pointer::type FuncPtrT; + using FuncPtrT = typename std::add_pointer::type; // ShortCut a function where for every BB the exit of the largest region // starting with BB is stored. These regions can be threated as single BBS. @@ -888,8 +901,8 @@ void RegionInfoBase::calculate(FuncT &F) { buildRegionsTree(DT->getNode(BB), TopLevelRegion); } -#undef DEBUG_TYPE - } // end namespace llvm -#endif +#undef DEBUG_TYPE + +#endif // LLVM_ANALYSIS_REGIONINFOIMPL_H diff --git a/interpreter/llvm/src/include/llvm/Analysis/RegionIterator.h b/interpreter/llvm/src/include/llvm/Analysis/RegionIterator.h index de2f3bf3f12b6..4f823cc822103 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/RegionIterator.h +++ b/interpreter/llvm/src/include/llvm/Analysis/RegionIterator.h @@ -8,17 +8,23 @@ //===----------------------------------------------------------------------===// // This file defines the iterators to iterate over the elements of a Region. //===----------------------------------------------------------------------===// + #ifndef LLVM_ANALYSIS_REGIONITERATOR_H #define LLVM_ANALYSIS_REGIONITERATOR_H +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/IR/CFG.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include namespace llvm { + +class BasicBlock; + //===----------------------------------------------------------------------===// /// @brief Hierarchical RegionNode successor iterator. /// @@ -33,10 +39,9 @@ namespace llvm { template class RNSuccIterator : public std::iterator { - typedef std::iterator super; - - typedef GraphTraits BlockTraits; - typedef typename BlockTraits::ChildIteratorType SuccIterTy; + using super = std::iterator; + using BlockTraits = GraphTraits; + using SuccIterTy = typename BlockTraits::ChildIteratorType; // The iterator works in two modes, bb mode or region mode. enum ItMode { @@ -92,16 +97,15 @@ class RNSuccIterator inline bool isExit(BlockT* BB) const { return getNode()->getParent()->getExit() == BB; } -public: - typedef RNSuccIterator Self; - typedef typename super::value_type value_type; +public: + using Self = RNSuccIterator; + using value_type = typename super::value_type; /// @brief Create begin iterator of a RegionNode. inline RNSuccIterator(NodeRef node) : Node(node, node->isSubRegion() ? ItRgBegin : ItBB), BItor(BlockTraits::child_begin(node->getEntry())) { - // Skip the exit block if (!isRegionMode()) while (BlockTraits::child_end(node->getEntry()) != BItor && isExit(*BItor)) @@ -153,7 +157,6 @@ class RNSuccIterator } }; - //===----------------------------------------------------------------------===// /// @brief Flat RegionNode iterator. /// @@ -163,16 +166,16 @@ class RNSuccIterator template class RNSuccIterator, BlockT, RegionT> : public std::iterator { - typedef std::iterator super; - typedef GraphTraits BlockTraits; - typedef typename BlockTraits::ChildIteratorType SuccIterTy; + using super = std::iterator; + using BlockTraits = GraphTraits; + using SuccIterTy = typename BlockTraits::ChildIteratorType; NodeRef Node; SuccIterTy Itor; public: - typedef RNSuccIterator, BlockT, RegionT> Self; - typedef typename super::value_type value_type; + using Self = RNSuccIterator, BlockT, RegionT>; + using value_type = typename super::value_type; /// @brief Create the iterator from a RegionNode. /// @@ -255,8 +258,8 @@ inline RNSuccIterator succ_end(NodeRef Node) { #define RegionNodeGraphTraits(NodeT, BlockT, RegionT) \ template <> struct GraphTraits { \ - typedef NodeT *NodeRef; \ - typedef RNSuccIterator ChildIteratorType; \ + using NodeRef = NodeT *; \ + using ChildIteratorType = RNSuccIterator; \ static NodeRef getEntryNode(NodeRef N) { return N; } \ static inline ChildIteratorType child_begin(NodeRef N) { \ return RNSuccIterator(N); \ @@ -266,9 +269,9 @@ inline RNSuccIterator succ_end(NodeRef Node) { } \ }; \ template <> struct GraphTraits> { \ - typedef NodeT *NodeRef; \ - typedef RNSuccIterator, BlockT, RegionT> \ - ChildIteratorType; \ + using NodeRef = NodeT *; \ + using ChildIteratorType = \ + RNSuccIterator, BlockT, RegionT>; \ static NodeRef getEntryNode(NodeRef N) { return N; } \ static inline ChildIteratorType child_begin(NodeRef N) { \ return RNSuccIterator, BlockT, RegionT>(N); \ @@ -280,7 +283,7 @@ inline RNSuccIterator succ_end(NodeRef Node) { #define RegionGraphTraits(RegionT, NodeT) \ template <> struct GraphTraits : public GraphTraits { \ - typedef df_iterator nodes_iterator; \ + using nodes_iterator = df_iterator; \ static NodeRef getEntryNode(RegionT *R) { \ return R->getNode(R->getEntry()); \ } \ @@ -294,9 +297,9 @@ inline RNSuccIterator succ_end(NodeRef Node) { template <> \ struct GraphTraits> \ : public GraphTraits> { \ - typedef df_iterator, false, \ - GraphTraits>> \ - nodes_iterator; \ + using nodes_iterator = \ + df_iterator, false, \ + GraphTraits>>; \ static NodeRef getEntryNode(RegionT *R) { \ return R->getBBNode(R->getEntry()); \ } \ @@ -315,17 +318,19 @@ RegionGraphTraits(Region, RegionNode); RegionGraphTraits(const Region, const RegionNode); template <> struct GraphTraits - : public GraphTraits > { - typedef df_iterator, false, - GraphTraits>> - nodes_iterator; + : public GraphTraits> { + using nodes_iterator = + df_iterator, false, + GraphTraits>>; static NodeRef getEntryNode(RegionInfo *RI) { - return GraphTraits >::getEntryNode(RI->getTopLevelRegion()); + return GraphTraits>::getEntryNode(RI->getTopLevelRegion()); } + static nodes_iterator nodes_begin(RegionInfo* RI) { return nodes_iterator::begin(getEntryNode(RI)); } + static nodes_iterator nodes_end(RegionInfo *RI) { return nodes_iterator::end(getEntryNode(RI)); } @@ -333,21 +338,23 @@ template <> struct GraphTraits template <> struct GraphTraits : public GraphTraits { - typedef df_iterator, false, - GraphTraits>> - nodes_iterator; + using nodes_iterator = + df_iterator, false, + GraphTraits>>; static NodeRef getEntryNode(RegionInfoPass *RI) { return GraphTraits::getEntryNode(&RI->getRegionInfo()); } + static nodes_iterator nodes_begin(RegionInfoPass* RI) { return GraphTraits::nodes_begin(&RI->getRegionInfo()); } + static nodes_iterator nodes_end(RegionInfoPass *RI) { return GraphTraits::nodes_end(&RI->getRegionInfo()); } }; -} // End namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_REGIONITERATOR_H diff --git a/interpreter/llvm/src/include/llvm/Analysis/RegionPass.h b/interpreter/llvm/src/include/llvm/Analysis/RegionPass.h index b5f38139abf20..515b362e54071 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/RegionPass.h +++ b/interpreter/llvm/src/include/llvm/Analysis/RegionPass.h @@ -78,6 +78,11 @@ class RegionPass : public Pass { return PMT_RegionPassManager; } //@} + +protected: + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when optimization bisect is over the limit. + bool skipRegion(Region &R) const; }; /// @brief The pass manager to schedule RegionPasses. diff --git a/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolution.h b/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolution.h index 919c766ae7bf2..d1b182755cf81 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolution.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolution.h @@ -237,17 +237,15 @@ struct FoldingSetTrait : DefaultFoldingSetTrait { }; /// This class represents an assumption that two SCEV expressions are equal, -/// and this can be checked at run-time. We assume that the left hand side is -/// a SCEVUnknown and the right hand side a constant. +/// and this can be checked at run-time. class SCEVEqualPredicate final : public SCEVPredicate { - /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a - /// constant. - const SCEVUnknown *LHS; - const SCEVConstant *RHS; + /// We assume that LHS == RHS. + const SCEV *LHS; + const SCEV *RHS; public: - SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS, - const SCEVConstant *RHS); + SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS, + const SCEV *RHS); /// Implementation of the SCEVPredicate interface bool implies(const SCEVPredicate *N) const override; @@ -256,13 +254,13 @@ class SCEVEqualPredicate final : public SCEVPredicate { const SCEV *getExpr() const override; /// Returns the left hand side of the equality. - const SCEVUnknown *getLHS() const { return LHS; } + const SCEV *getLHS() const { return LHS; } /// Returns the right hand side of the equality. - const SCEVConstant *getRHS() const { return RHS; } + const SCEV *getRHS() const { return RHS; } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEVPredicate *P) { + static bool classof(const SCEVPredicate *P) { return P->getKind() == P_Equal; } }; @@ -360,7 +358,7 @@ class SCEVWrapPredicate final : public SCEVPredicate { bool isAlwaysTrue() const override; /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEVPredicate *P) { + static bool classof(const SCEVPredicate *P) { return P->getKind() == P_Wrap; } }; @@ -406,7 +404,7 @@ class SCEVUnionPredicate final : public SCEVPredicate { unsigned getComplexity() const override { return Preds.size(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEVPredicate *P) { + static bool classof(const SCEVPredicate *P) { return P->getKind() == P_Union; } }; @@ -568,27 +566,16 @@ class ScalarEvolution { Predicates.insert(P); } - /*implicit*/ ExitLimit(const SCEV *E) - : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {} + /*implicit*/ ExitLimit(const SCEV *E); ExitLimit( const SCEV *E, const SCEV *M, bool MaxOrZero, - ArrayRef *> PredSetList) - : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { - assert((isa(ExactNotTaken) || - !isa(MaxNotTaken)) && - "Exact is not allowed to be less precise than Max"); - for (auto *PredSet : PredSetList) - for (auto *P : *PredSet) - addPredicate(P); - } + ArrayRef *> PredSetList); ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero, - const SmallPtrSetImpl &PredSet) - : ExitLimit(E, M, MaxOrZero, {&PredSet}) {} + const SmallPtrSetImpl &PredSet); - ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero) - : ExitLimit(E, M, MaxOrZero, None) {} + ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero); /// Test whether this ExitLimit contains any computed information, or /// whether it's all SCEVCouldNotCompute values. @@ -647,7 +634,7 @@ class ScalarEvolution { /// @} public: - BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {} + BackedgeTakenInfo() : MaxAndComplete(nullptr, 0), MaxOrZero(false) {} BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; @@ -667,10 +654,12 @@ class ScalarEvolution { /// Test whether this BackedgeTakenInfo contains complete information. bool hasFullInfo() const { return isComplete(); } - /// Return an expression indicating the exact backedge-taken count of the - /// loop if it is known or SCEVCouldNotCompute otherwise. This is the - /// number of times the loop header can be guaranteed to execute, minus - /// one. + /// Return an expression indicating the exact *backedge-taken* + /// count of the loop if it is known or SCEVCouldNotCompute + /// otherwise. If execution makes it to the backedge on every + /// iteration (i.e. there are no abnormal exists like exception + /// throws and thread exits) then this is the number of times the + /// loop header will execute minus one. /// /// If the SCEV predicate associated with the answer can be different /// from AlwaysTrue, we must add a (non null) Predicates argument. @@ -793,7 +782,9 @@ class ScalarEvolution { } /// Determine the range for a particular SCEV. - ConstantRange getRange(const SCEV *S, RangeSignHint Hint); + /// NOTE: This returns a reference to an entry in a cache. It must be + /// copied if its needed for longer. + const ConstantRange &getRangeRef(const SCEV *S, RangeSignHint Hint); /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}. /// Helper for \c getRange. @@ -1204,45 +1195,38 @@ class ScalarEvolution { const SCEV *getConstant(const APInt &Val); const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); - - typedef SmallDenseMap, const SCEV *, 8> - ExtendCacheTy; - const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty); - const SCEV *getZeroExtendExprCached(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache); - const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache); - - const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty); - const SCEV *getSignExtendExprCached(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache); - const SCEV *getSignExtendExprImpl(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache); + const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0); const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {LHS, RHS}; - return getAddExpr(Ops, Flags); + return getAddExpr(Ops, Flags, Depth); } const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {Op0, Op1, Op2}; - return getAddExpr(Ops, Flags); + return getAddExpr(Ops, Flags, Depth); } const SCEV *getMulExpr(SmallVectorImpl &Ops, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {LHS, RHS}; - return getMulExpr(Ops, Flags); + return getMulExpr(Ops, Flags, Depth); } const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {Op0, Op1, Op2}; - return getMulExpr(Ops, Flags); + return getMulExpr(Ops, Flags, Depth); } const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS); @@ -1255,6 +1239,14 @@ class ScalarEvolution { SmallVector NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } + + /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some + /// Predicates. If successful return these ; + /// The function is intended to be called from PSCEV (the caller will decide + /// whether to actually add the predicates and carry out the rewrites). + Optional>> + createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); + /// Returns an expression for a GEP /// /// \p GEP The GEP. The indices contained in the GEP itself are ignored, @@ -1296,7 +1288,8 @@ class ScalarEvolution { /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is zero extended. @@ -1409,11 +1402,11 @@ class ScalarEvolution { const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock); /// If the specified loop has a predictable backedge-taken count, return it, - /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count - /// is the number of times the loop header will be branched to from within - /// the loop. This is one less than the trip count of the loop, since it - /// doesn't count the first iteration, when the header is branched to from - /// outside the loop. + /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count is + /// the number of times the loop header will be branched to from within the + /// loop, assuming there are no abnormal exists like exception throws. This is + /// one less than the trip count of the loop, since it doesn't count the first + /// iteration, when the header is branched to from outside the loop. /// /// Note that it is not valid to call this method on a loop without a /// loop-invariant backedge-taken count (see @@ -1428,8 +1421,10 @@ class ScalarEvolution { const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, SCEVUnionPredicate &Predicates); - /// Similar to getBackedgeTakenCount, except return the least SCEV value - /// that is known never to be less than the actual backedge taken count. + /// When successful, this returns a SCEVConstant that is greater than or equal + /// to (i.e. a "conservative over-approximation") of the value returend by + /// getBackedgeTakenCount. If such a value cannot be computed, it returns the + /// SCEVCouldNotCompute object. const SCEV *getMaxBackedgeTakenCount(const Loop *L); /// Return true if the backedge taken count is either the value returned by @@ -1465,15 +1460,35 @@ class ScalarEvolution { uint32_t GetMinTrailingZeros(const SCEV *S); /// Determine the unsigned range for a particular SCEV. - /// + /// NOTE: This returns a copy of the reference returned by getRangeRef. ConstantRange getUnsignedRange(const SCEV *S) { - return getRange(S, HINT_RANGE_UNSIGNED); + return getRangeRef(S, HINT_RANGE_UNSIGNED); + } + + /// Determine the min of the unsigned range for a particular SCEV. + APInt getUnsignedRangeMin(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMin(); + } + + /// Determine the max of the unsigned range for a particular SCEV. + APInt getUnsignedRangeMax(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMax(); } /// Determine the signed range for a particular SCEV. - /// + /// NOTE: This returns a copy of the reference returned by getRangeRef. ConstantRange getSignedRange(const SCEV *S) { - return getRange(S, HINT_RANGE_SIGNED); + return getRangeRef(S, HINT_RANGE_SIGNED); + } + + /// Determine the min of the signed range for a particular SCEV. + APInt getSignedRangeMin(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMin(); + } + + /// Determine the max of the signed range for a particular SCEV. + APInt getSignedRangeMax(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMax(); } /// Test if the given expression is known to be negative. @@ -1540,6 +1555,11 @@ class ScalarEvolution { /// specified loop. bool isLoopInvariant(const SCEV *S, const Loop *L); + /// Determine if the SCEV can be evaluated at loop's entry. It is true if it + /// doesn't depend on a SCEVUnknown of an instruction which is dominated by + /// the header of loop L. + bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L); + /// Return true if the given SCEV changes value in a known way in the /// specified loop. This property being true implies that the value is /// variant in the loop AND that we can emit an expression to compute the @@ -1661,8 +1681,7 @@ class ScalarEvolution { return F.getParent()->getDataLayout(); } - const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS, - const SCEVConstant *RHS); + const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, @@ -1678,6 +1697,19 @@ class ScalarEvolution { SmallPtrSetImpl &Preds); private: + /// Similar to createAddRecFromPHI, but with the additional flexibility of + /// suggesting runtime overflow checks in case casts are encountered. + /// If successful, the analysis records that for this loop, \p SymbolicPHI, + /// which is the UnknownSCEV currently representing the PHI, can be rewritten + /// into an AddRec, assuming some predicates; The function then returns the + /// AddRec and the predicates as a pair, and caches this pair in + /// PredicatedSCEVRewrites. + /// If the analysis is not successful, a mapping from the \p SymbolicPHI to + /// itself (with no predicates) is recorded, and a nullptr with an empty + /// predicates vector is returned as a pair. + Optional>> + createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI); + /// Compute the backedge taken count knowing the interval difference, the /// stride and presence of the equality in the comparison. const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, @@ -1695,15 +1727,25 @@ class ScalarEvolution { bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap); - /// Get add expr already created or create a new one + /// Get add expr already created or create a new one. const SCEV *getOrCreateAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags); + /// Get mul expr already created or create a new one. + const SCEV *getOrCreateMulExpr(SmallVectorImpl &Ops, + SCEV::NoWrapFlags Flags); + private: FoldingSet UniqueSCEVs; FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression + /// they can be rewritten into under certain predicates. + DenseMap, + std::pair>> + PredicatedSCEVRewrites; + /// The head of a linked list of all SCEVUnknown values that have been /// allocated. This is used by releaseMemory to locate them all and call /// their destructors. diff --git a/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionExpressions.h b/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionExpressions.h index 2c693bceb24db..56ddb5028d6d0 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -46,7 +46,7 @@ namespace llvm { Type *getType() const { return V->getType(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scConstant; } }; @@ -65,7 +65,7 @@ namespace llvm { Type *getType() const { return Ty; } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scTruncate || S->getSCEVType() == scZeroExtend || S->getSCEVType() == scSignExtend; @@ -82,7 +82,7 @@ namespace llvm { public: /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scTruncate; } }; @@ -97,7 +97,7 @@ namespace llvm { public: /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scZeroExtend; } }; @@ -112,7 +112,7 @@ namespace llvm { public: /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scSignExtend; } }; @@ -167,7 +167,7 @@ namespace llvm { } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || S->getSCEVType() == scSMaxExpr || @@ -185,7 +185,7 @@ namespace llvm { public: /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || S->getSCEVType() == scSMaxExpr || @@ -217,7 +217,7 @@ namespace llvm { } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr; } }; @@ -234,7 +234,7 @@ namespace llvm { public: /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scMulExpr; } }; @@ -263,7 +263,7 @@ namespace llvm { } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scUDivExpr; } }; @@ -345,7 +345,7 @@ namespace llvm { } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scAddRecExpr; } }; @@ -363,7 +363,7 @@ namespace llvm { public: /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scSMaxExpr; } }; @@ -382,7 +382,7 @@ namespace llvm { public: /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scUMaxExpr; } }; @@ -428,7 +428,7 @@ namespace llvm { Type *getType() const { return getValPtr()->getType(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEV *S) { + static bool classof(const SCEV *S) { return S->getSCEVType() == scUnknown; } }; diff --git a/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionNormalization.h b/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionNormalization.h index b73ad95278a00..51c92121c8f0e 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionNormalization.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -36,8 +36,8 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H #define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Analysis/TargetLibraryInfo.h b/interpreter/llvm/src/include/llvm/Analysis/TargetLibraryInfo.h index 944250cfd6ac1..d75e7833279b0 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/TargetLibraryInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/TargetLibraryInfo.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -191,6 +192,14 @@ class TargetLibraryInfoImpl { void setShouldSignExtI32Param(bool Val) { ShouldSignExtI32Param = Val; } + + /// Returns the size of the wchar_t type in bytes. + unsigned getWCharSize(const Module &M) const; + + /// Returns size of the default wchar_t type on target \p T. This is mostly + /// intended to verify that the size in the frontend matches LLVM. All other + /// queries should use getWCharSize() instead. + static unsigned getTargetWCharSize(const Triple &T); }; /// Provides information about what library functions are available for @@ -231,6 +240,13 @@ class TargetLibraryInfo { return Impl->getLibFunc(FDecl, F); } + /// If a callsite does not have the 'nobuiltin' attribute, return if the + /// called function is a known library function and set F to that function. + bool getLibFunc(ImmutableCallSite CS, LibFunc &F) const { + return !CS.isNoBuiltin() && CS.getCalledFunction() && + getLibFunc(*(CS.getCalledFunction()), F); + } + /// Tests whether a library function is available. bool has(LibFunc F) const { return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; @@ -307,6 +323,11 @@ class TargetLibraryInfo { return Attribute::None; } + /// \copydoc TargetLibraryInfoImpl::getWCharSize() + unsigned getWCharSize(const Module &M) const { + return Impl->getWCharSize(M); + } + /// Handle invalidation from the pass manager. /// /// If we try to invalidate this info, just return false. It cannot become diff --git a/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfo.h b/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfo.h index ee40a36ccafa8..24edd3826a2e2 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfo.h +++ b/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfo.h @@ -155,6 +155,13 @@ class TargetTransformInfo { int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) const; + /// \brief Estimate the cost of a EXT operation when lowered. + /// + /// The contract for this function is the same as \c getOperationCost except + /// that it supports an interface that provides extra information specific to + /// the EXT operation. + int getExtCost(const Instruction *I, const Value *Src) const; + /// \brief Estimate the cost of a function call when lowered. /// /// The contract for this is the same as \c getOperationCost except that it @@ -216,9 +223,23 @@ class TargetTransformInfo { /// other context they may not be folded. This routine can distinguish such /// cases. /// + /// \p Operands is a list of operands which can be a result of transformations + /// of the current operands. The number of the operands on the list must equal + /// to the number of the current operands the IR user has. Their order on the + /// list must be the same as the order of the current operands the IR user + /// has. + /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - int getUserCost(const User *U) const; + int getUserCost(const User *U, ArrayRef Operands) const; + + /// \brief This is a helper function which calls the two-argument getUserCost + /// with \p Operands which are the current operands U has. + int getUserCost(const User *U) const { + SmallVector Operands(U->value_op_begin(), + U->value_op_end()); + return getUserCost(U, Operands); + } /// \brief Return true if branch divergence exists. /// @@ -235,6 +256,11 @@ class TargetTransformInfo { /// starting with the sources of divergence. bool isSourceOfDivergence(const Value *V) const; + // \brief Returns true for the target specific + // set of operations which produce uniform result + // even taking non-unform arguments + bool isAlwaysUniform(const Value *V) const; + /// Returns the address space ID for a target's 'flat' address space. Note /// this is not necessarily the same as addrspace(0), which LLVM sometimes /// refers to as the generic address space. The flat address space is a @@ -267,6 +293,19 @@ class TargetTransformInfo { /// incurs significant execution cost. bool isLoweredToCall(const Function *F) const; + struct LSRCost { + /// TODO: Some of these could be merged. Also, a lexical ordering + /// isn't always optimal. + unsigned Insns; + unsigned NumRegs; + unsigned AddRecCost; + unsigned NumIVMuls; + unsigned NumBaseAdds; + unsigned ImmCost; + unsigned SetupCost; + unsigned ScaleCost; + }; + /// Parameters that control the generic loop unrolling transformation. struct UnrollingPreferences { /// The cost threshold for the unrolled loop. Should be relative to the @@ -348,7 +387,8 @@ class TargetTransformInfo { /// \brief Get target-customized preferences for the generic loop unrolling /// transformation. The caller will initialize UP with the current /// target-independent defaults. - void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + void getUnrollingPreferences(Loop *L, ScalarEvolution &, + UnrollingPreferences &UP) const; /// @} @@ -385,6 +425,10 @@ class TargetTransformInfo { bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; + /// \brief Return true if LSR cost of C1 is lower than C1. + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) const; + /// \brief Return true if the target supports masked load/store /// AVX2 and AVX-512 targets allow masks for consecutive load and store bool isLegalMaskedStore(Type *DataType) const; @@ -396,6 +440,9 @@ class TargetTransformInfo { bool isLegalMaskedScatter(Type *DataType) const; bool isLegalMaskedGather(Type *DataType) const; + /// Return true if target doesn't mind addresses in vectors. + bool prefersVectorizedAddressing() const; + /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. @@ -451,6 +498,9 @@ class TargetTransformInfo { /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; + /// \brief Enable inline expansion of memcmp + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const; + /// \brief Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -537,6 +587,9 @@ class TargetTransformInfo { /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \return The width of the smallest vector register type. + unsigned getMinVectorRegisterBitWidth() const; + /// \return True if it should be considered for address type promotion. /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is /// profitable without finding other extensions fed by the same input. @@ -696,6 +749,10 @@ class TargetTransformInfo { /// if false is returned. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; + /// \returns The maximum element size, in bytes, for an element + /// unordered-atomic memory intrinsic. + unsigned getAtomicMemIntrinsicMaxElementSize() const; + /// \returns A value which is the result of the given memory intrinsic. New /// instructions may be created to extract the result from the given intrinsic /// memory operation. Returns nullptr if the target cannot create a result @@ -703,6 +760,28 @@ class TargetTransformInfo { Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const; + /// \returns The type to use in a loop expansion of a memcpy call. + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, unsigned DestAlign) const; + + /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. + /// \param RemainingBytes The number of bytes to copy. + /// + /// Calculates the operand types to use when copying \p RemainingBytes of + /// memory, where source and destination alignments are \p SrcAlign and + /// \p DestAlign respectively. + void getMemcpyLoopResidualLoweringType(SmallVectorImpl &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const; + + /// \returns True if we want to test the new memcpy lowering functionality in + /// Transform/Utils. + /// Temporary. Will be removed once we move to the new functionality and + /// remove the old. + bool useWideIRMemcpyLoopLowering() const; + /// \returns True if the two functions have compatible attributes for inlining /// purposes. bool areInlineCompatible(const Function *Caller, @@ -777,6 +856,7 @@ class TargetTransformInfo::Concept { virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) = 0; + virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; virtual int getCallCost(const Function *F, int NumArgs) = 0; virtual int getCallCost(const Function *F, @@ -788,22 +868,28 @@ class TargetTransformInfo::Concept { ArrayRef Arguments) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) = 0; - virtual int getUserCost(const User *U) = 0; + virtual int + getUserCost(const User *U, ArrayRef Operands) = 0; virtual bool hasBranchDivergence() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; + virtual bool isAlwaysUniform(const Value *V) = 0; virtual unsigned getFlatAddressSpace() = 0; virtual bool isLoweredToCall(const Function *F) = 0; - virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; + virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, + UnrollingPreferences &UP) = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; + virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; @@ -821,6 +907,7 @@ class TargetTransformInfo::Concept { unsigned VF) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; + virtual bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, @@ -839,7 +926,8 @@ class TargetTransformInfo::Concept { virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; - virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual unsigned getRegisterBitWidth(bool Vector) const = 0; + virtual unsigned getMinVectorRegisterBitWidth() = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() = 0; @@ -892,8 +980,15 @@ class TargetTransformInfo::Concept { virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) = 0; virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) = 0; + virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) = 0; + virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const = 0; + virtual void getMemcpyLoopResidualLoweringType( + SmallVectorImpl &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; @@ -935,6 +1030,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { ArrayRef Operands) override { return Impl.getGEPCost(PointeeType, Ptr, Operands); } + int getExtCost(const Instruction *I, const Value *Src) override { + return Impl.getExtCost(I, Src); + } int getCallCost(FunctionType *FTy, int NumArgs) override { return Impl.getCallCost(FTy, NumArgs); } @@ -956,12 +1054,18 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { ArrayRef Arguments) override { return Impl.getIntrinsicCost(IID, RetTy, Arguments); } - int getUserCost(const User *U) override { return Impl.getUserCost(U); } + int getUserCost(const User *U, ArrayRef Operands) override { + return Impl.getUserCost(U, Operands); + } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool isSourceOfDivergence(const Value *V) override { return Impl.isSourceOfDivergence(V); } + bool isAlwaysUniform(const Value *V) override { + return Impl.isAlwaysUniform(V); + } + unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } @@ -969,8 +1073,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { bool isLoweredToCall(const Function *F) override { return Impl.isLoweredToCall(F); } - void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override { - return Impl.getUnrollingPreferences(L, UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + UnrollingPreferences &UP) override { + return Impl.getUnrollingPreferences(L, SE, UP); } bool isLegalAddImmediate(int64_t Imm) override { return Impl.isLegalAddImmediate(Imm); @@ -984,6 +1089,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) override { + return Impl.isLSRCostLess(C1, C2); + } bool isLegalMaskedStore(Type *DataType) override { return Impl.isLegalMaskedStore(DataType); } @@ -996,6 +1105,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { bool isLegalMaskedGather(Type *DataType) override { return Impl.isLegalMaskedGather(DataType); } + bool prefersVectorizedAddressing() override { + return Impl.prefersVectorizedAddressing(); + } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) override { @@ -1036,6 +1148,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) override { + return Impl.expandMemCmp(I, MaxLoadSize); + } bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } @@ -1073,9 +1188,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { unsigned getNumberOfRegisters(bool Vector) override { return Impl.getNumberOfRegisters(Vector); } - unsigned getRegisterBitWidth(bool Vector) override { + unsigned getRegisterBitWidth(bool Vector) const override { return Impl.getRegisterBitWidth(Vector); } + unsigned getMinVectorRegisterBitWidth() override { + return Impl.getMinVectorRegisterBitWidth(); + } bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { return Impl.shouldConsiderAddressTypePromotion( @@ -1180,10 +1298,26 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { MemIntrinsicInfo &Info) override { return Impl.getTgtMemIntrinsic(Inst, Info); } + unsigned getAtomicMemIntrinsicMaxElementSize() const override { + return Impl.getAtomicMemIntrinsicMaxElementSize(); + } Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) override { return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const override { + return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign); + } + void getMemcpyLoopResidualLoweringType(SmallVectorImpl &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const override { + Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAlign, DestAlign); + } bool areInlineCompatible(const Function *Caller, const Function *Callee) const override { return Impl.areInlineCompatible(Caller, Callee); diff --git a/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfoImpl.h b/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfoImpl.h index 1760dbf254836..0b07fe9aa2323 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/interpreter/llvm/src/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -17,13 +17,13 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" -#include "llvm/Analysis/VectorUtils.h" namespace llvm { @@ -120,6 +120,10 @@ class TargetTransformInfoImplBase { return SI.getNumCases(); } + int getExtCost(const Instruction *I, const Value *Src) { + return TTI::TCC_Basic; + } + unsigned getCallCost(FunctionType *FTy, int NumArgs) { assert(FTy && "FunctionType must be provided to this routine."); @@ -177,6 +181,8 @@ class TargetTransformInfoImplBase { bool isSourceOfDivergence(const Value *V) { return false; } + bool isAlwaysUniform(const Value *V) { return false; } + unsigned getFlatAddressSpace () { return -1; } @@ -215,7 +221,8 @@ class TargetTransformInfoImplBase { return true; } - void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {} + void getUnrollingPreferences(Loop *, ScalarEvolution &, + TTI::UnrollingPreferences &) {} bool isLegalAddImmediate(int64_t Imm) { return false; } @@ -229,6 +236,13 @@ class TargetTransformInfoImplBase { return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } + bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { + return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); + } + bool isLegalMaskedStore(Type *DataType) { return false; } bool isLegalMaskedLoad(Type *DataType) { return false; } @@ -237,6 +251,8 @@ class TargetTransformInfoImplBase { bool isLegalMaskedGather(Type *DataType) { return false; } + bool prefersVectorizedAddressing() { return true; } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { // Guess that all legal addressing mode are free. @@ -272,6 +288,8 @@ class TargetTransformInfoImplBase { bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { return false; } + bool enableInterleavedAccessVectorization() { return false; } bool isFPVectorizationPotentiallyUnsafe() { return false; } @@ -309,7 +327,9 @@ class TargetTransformInfoImplBase { unsigned getNumberOfRegisters(bool Vector) { return 8; } - unsigned getRegisterBitWidth(bool Vector) { return 32; } + unsigned getRegisterBitWidth(bool Vector) const { return 32; } + + unsigned getMinVectorRegisterBitWidth() { return 128; } bool shouldConsiderAddressTypePromotion(const Instruction &I, @@ -414,11 +434,34 @@ class TargetTransformInfoImplBase { return false; } + unsigned getAtomicMemIntrinsicMaxElementSize() const { + // Note for overrides: You must ensure for all element unordered-atomic + // memory intrinsics that all power-of-2 element sizes up to, and + // including, the return value of this method have a corresponding + // runtime lib call. These runtime lib call definitions can be found + // in RuntimeLibcalls.h + return 0; + } + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) { return nullptr; } + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, unsigned DestAlign) const { + return Type::getInt8Ty(Context); + } + + void getMemcpyLoopResidualLoweringType(SmallVectorImpl &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const { + for (unsigned i = 0; i != RemainingBytes; ++i) + OpsOut.push_back(Type::getInt8Ty(Context)); + } + bool areInlineCompatible(const Function *Caller, const Function *Callee) const { return (Caller->getFnAttribute("target-cpu") == @@ -660,14 +703,14 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys); } - unsigned getUserCost(const User *U) { + unsigned getUserCost(const User *U, ArrayRef Operands) { if (isa(U)) return TTI::TCC_Free; // Model all PHI nodes as free. if (const GEPOperator *GEP = dyn_cast(U)) { - SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); - return static_cast(this)->getGEPCost( - GEP->getSourceElementType(), GEP->getPointerOperand(), Indices); + return static_cast(this)->getGEPCost(GEP->getSourceElementType(), + GEP->getPointerOperand(), + Operands.drop_front()); } if (auto CS = ImmutableCallSite(U)) { @@ -689,6 +732,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { // nop on most sane targets. if (isa(CI->getOperand(0))) return TTI::TCC_Free; + if (isa(CI) || isa(CI) || isa(CI)) + return static_cast(this)->getExtCost(CI, Operands.back()); } return static_cast(this)->getOperationCost( diff --git a/interpreter/llvm/src/include/llvm/Analysis/TypeMetadataUtils.h b/interpreter/llvm/src/include/llvm/Analysis/TypeMetadataUtils.h index 17906ba4e3926..422e153a5a78c 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/TypeMetadataUtils.h +++ b/interpreter/llvm/src/include/llvm/Analysis/TypeMetadataUtils.h @@ -20,6 +20,13 @@ namespace llvm { +/// The type of CFI jumptable needed for a function. +enum CfiFunctionLinkage { + CFL_Definition = 0, + CFL_Declaration = 1, + CFL_WeakDeclaration = 2 +}; + /// A call site that could be devirtualized. struct DevirtCallSite { /// The offset from the address point to the virtual function. diff --git a/interpreter/llvm/src/include/llvm/Analysis/ValueTracking.h b/interpreter/llvm/src/include/llvm/Analysis/ValueTracking.h index 18901ecf8c33e..da058b1d3918a 100644 --- a/interpreter/llvm/src/include/llvm/Analysis/ValueTracking.h +++ b/interpreter/llvm/src/include/llvm/Analysis/ValueTracking.h @@ -60,7 +60,8 @@ template class ArrayRef; KnownBits computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one @@ -73,14 +74,6 @@ template class ArrayRef; const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// Determine whether the sign bit is known to be zero or one. Convenience - /// wrapper around computeKnownBits. - void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); - /// Return true if the given value is known to have exactly one bit set when /// defined. For vectors return true if every element is known to be a power /// of two when defined. Supports values with integer or pointer type and @@ -92,6 +85,8 @@ template class ArrayRef; const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); + bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); + /// Return true if the given value is known to be non-zero when defined. For /// vectors, return true if every element is known to be non-zero when /// defined. For pointers, if the context instruction and dominator tree are @@ -226,9 +221,38 @@ template class ArrayRef; DL); } - /// Returns true if the GEP is based on a pointer to a string (array of i8), - /// and is indexing into this string. - bool isGEPBasedOnPointerToString(const GEPOperator *GEP); + /// Returns true if the GEP is based on a pointer to a string (array of + // \p CharSize integers) and is indexing into this string. + bool isGEPBasedOnPointerToString(const GEPOperator *GEP, + unsigned CharSize = 8); + + /// Represents offset+length into a ConstantDataArray. + struct ConstantDataArraySlice { + /// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid + /// initializer, it just doesn't fit the ConstantDataArray interface). + const ConstantDataArray *Array; + /// Slice starts at this Offset. + uint64_t Offset; + /// Length of the slice. + uint64_t Length; + + /// Moves the Offset and adjusts Length accordingly. + void move(uint64_t Delta) { + assert(Delta < Length); + Offset += Delta; + Length -= Delta; + } + /// Convenience accessor for elements in the slice. + uint64_t operator[](unsigned I) const { + return Array==nullptr ? 0 : Array->getElementAsInteger(I + Offset); + } + }; + + /// Returns true if the value \p V is a pointer into a ContantDataArray. + /// If successful \p Index will point to a ConstantDataArray info object + /// with an appropriate offset. + bool getConstantDataArrayInfo(const Value *V, ConstantDataArraySlice &Slice, + unsigned ElementSize, uint64_t Offset = 0); /// This function computes the length of a null-terminated C string pointed to /// by V. If successful, it returns true and returns the string in Str. If @@ -241,7 +265,7 @@ template class ArrayRef; /// If we can compute the length of the string pointed to by the specified /// pointer, return 'len+1'. If we can't, return 0. - uint64_t GetStringLength(const Value *V); + uint64_t GetStringLength(const Value *V, unsigned CharSize = 8); /// This method strips off any GEP address adjustments and pointer casts from /// the specified value, returning the original object being addressed. Note @@ -288,6 +312,12 @@ template class ArrayRef; const DataLayout &DL, LoopInfo *LI = nullptr, unsigned MaxLookup = 6); + /// This is a wrapper around GetUnderlyingObjects and adds support for basic + /// ptrtoint+arithmetic+inttoptr sequences. + void getUnderlyingObjectsForCodeGen(const Value *V, + SmallVectorImpl &Objects, + const DataLayout &DL); + /// Return true if the only users of this pointer are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); @@ -499,8 +529,7 @@ template class ArrayRef; /// (A) Optional isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, - bool InvertAPred = false, - unsigned Depth = 0, + bool LHSIsFalse = false, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/COFF.h b/interpreter/llvm/src/include/llvm/BinaryFormat/COFF.h new file mode 100644 index 0000000000000..b395db6eaa838 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/COFF.h @@ -0,0 +1,719 @@ +//===-- llvm/BinaryFormat/COFF.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an definitions used in Windows COFF Files. +// +// Structures and enums defined within this file where created using +// information from Microsoft's publicly available PE/COFF format document: +// +// Microsoft Portable Executable and Common Object File Format Specification +// Revision 8.1 - February 15, 2008 +// +// As of 5/2/2010, hosted by Microsoft at: +// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_COFF_H +#define LLVM_BINARYFORMAT_COFF_H + +#include "llvm/Support/DataTypes.h" +#include +#include + +namespace llvm { +namespace COFF { + +// The maximum number of sections that a COFF object can have (inclusive). +const int32_t MaxNumberOfSections16 = 65279; + +// The PE signature bytes that follows the DOS stub header. +static const char PEMagic[] = {'P', 'E', '\0', '\0'}; + +static const char BigObjMagic[] = { + '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b', + '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8', +}; + +static const char ClGlObjMagic[] = { + '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d', + '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2', +}; + +// The signature bytes that start a .res file. +static const char WinResMagic[] = { + '\x00', '\x00', '\x00', '\x00', '\x20', '\x00', '\x00', '\x00', + '\xff', '\xff', '\x00', '\x00', '\xff', '\xff', '\x00', '\x00', +}; + +// Sizes in bytes of various things in the COFF format. +enum { + Header16Size = 20, + Header32Size = 56, + NameSize = 8, + Symbol16Size = 18, + Symbol32Size = 20, + SectionSize = 40, + RelocationSize = 10 +}; + +struct header { + uint16_t Machine; + int32_t NumberOfSections; + uint32_t TimeDateStamp; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; + uint16_t SizeOfOptionalHeader; + uint16_t Characteristics; +}; + +struct BigObjHeader { + enum : uint16_t { MinBigObjectVersion = 2 }; + + uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). + uint16_t Sig2; ///< Must be 0xFFFF. + uint16_t Version; + uint16_t Machine; + uint32_t TimeDateStamp; + uint8_t UUID[16]; + uint32_t unused1; + uint32_t unused2; + uint32_t unused3; + uint32_t unused4; + uint32_t NumberOfSections; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; +}; + +enum MachineTypes { + MT_Invalid = 0xffff, + + IMAGE_FILE_MACHINE_UNKNOWN = 0x0, + IMAGE_FILE_MACHINE_AM33 = 0x13, + IMAGE_FILE_MACHINE_AMD64 = 0x8664, + IMAGE_FILE_MACHINE_ARM = 0x1C0, + IMAGE_FILE_MACHINE_ARMNT = 0x1C4, + IMAGE_FILE_MACHINE_ARM64 = 0xAA64, + IMAGE_FILE_MACHINE_EBC = 0xEBC, + IMAGE_FILE_MACHINE_I386 = 0x14C, + IMAGE_FILE_MACHINE_IA64 = 0x200, + IMAGE_FILE_MACHINE_M32R = 0x9041, + IMAGE_FILE_MACHINE_MIPS16 = 0x266, + IMAGE_FILE_MACHINE_MIPSFPU = 0x366, + IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466, + IMAGE_FILE_MACHINE_POWERPC = 0x1F0, + IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1, + IMAGE_FILE_MACHINE_R4000 = 0x166, + IMAGE_FILE_MACHINE_SH3 = 0x1A2, + IMAGE_FILE_MACHINE_SH3DSP = 0x1A3, + IMAGE_FILE_MACHINE_SH4 = 0x1A6, + IMAGE_FILE_MACHINE_SH5 = 0x1A8, + IMAGE_FILE_MACHINE_THUMB = 0x1C2, + IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 +}; + +enum Characteristics { + C_Invalid = 0, + + /// The file does not contain base relocations and must be loaded at its + /// preferred base. If this cannot be done, the loader will error. + IMAGE_FILE_RELOCS_STRIPPED = 0x0001, + /// The file is valid and can be run. + IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002, + /// COFF line numbers have been stripped. This is deprecated and should be + /// 0. + IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004, + /// COFF symbol table entries for local symbols have been removed. This is + /// deprecated and should be 0. + IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008, + /// Aggressively trim working set. This is deprecated and must be 0. + IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010, + /// Image can handle > 2GiB addresses. + IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020, + /// Little endian: the LSB precedes the MSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_LO = 0x0080, + /// Machine is based on a 32bit word architecture. + IMAGE_FILE_32BIT_MACHINE = 0x0100, + /// Debugging info has been removed. + IMAGE_FILE_DEBUG_STRIPPED = 0x0200, + /// If the image is on removable media, fully load it and copy it to swap. + IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400, + /// If the image is on network media, fully load it and copy it to swap. + IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800, + /// The image file is a system file, not a user program. + IMAGE_FILE_SYSTEM = 0x1000, + /// The image file is a DLL. + IMAGE_FILE_DLL = 0x2000, + /// This file should only be run on a uniprocessor machine. + IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000, + /// Big endian: the MSB precedes the LSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_HI = 0x8000 +}; + +enum ResourceTypeID { + RID_Cursor = 1, + RID_Bitmap = 2, + RID_Icon = 3, + RID_Menu = 4, + RID_Dialog = 5, + RID_String = 6, + RID_FontDir = 7, + RID_Font = 8, + RID_Accelerator = 9, + RID_RCData = 10, + RID_MessageTable = 11, + RID_Group_Cursor = 12, + RID_Group_Icon = 14, + RID_Version = 16, + RID_DLGInclude = 17, + RID_PlugPlay = 19, + RID_VXD = 20, + RID_AniCursor = 21, + RID_AniIcon = 22, + RID_HTML = 23, + RID_Manifest = 24, +}; + +struct symbol { + char Name[NameSize]; + uint32_t Value; + int32_t SectionNumber; + uint16_t Type; + uint8_t StorageClass; + uint8_t NumberOfAuxSymbols; +}; + +enum SymbolSectionNumber : int32_t { + IMAGE_SYM_DEBUG = -2, + IMAGE_SYM_ABSOLUTE = -1, + IMAGE_SYM_UNDEFINED = 0 +}; + +/// Storage class tells where and what the symbol represents +enum SymbolStorageClass { + SSC_Invalid = 0xff, + + IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, ///< Physical end of function + IMAGE_SYM_CLASS_NULL = 0, ///< No symbol + IMAGE_SYM_CLASS_AUTOMATIC = 1, ///< Stack variable + IMAGE_SYM_CLASS_EXTERNAL = 2, ///< External symbol + IMAGE_SYM_CLASS_STATIC = 3, ///< Static + IMAGE_SYM_CLASS_REGISTER = 4, ///< Register variable + IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, ///< External definition + IMAGE_SYM_CLASS_LABEL = 6, ///< Label + IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, ///< Undefined label + IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, ///< Member of structure + IMAGE_SYM_CLASS_ARGUMENT = 9, ///< Function argument + IMAGE_SYM_CLASS_STRUCT_TAG = 10, ///< Structure tag + IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, ///< Member of union + IMAGE_SYM_CLASS_UNION_TAG = 12, ///< Union tag + IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, ///< Type definition + IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, ///< Undefined static + IMAGE_SYM_CLASS_ENUM_TAG = 15, ///< Enumeration tag + IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, ///< Member of enumeration + IMAGE_SYM_CLASS_REGISTER_PARAM = 17, ///< Register parameter + IMAGE_SYM_CLASS_BIT_FIELD = 18, ///< Bit field + /// ".bb" or ".eb" - beginning or end of block + IMAGE_SYM_CLASS_BLOCK = 100, + /// ".bf" or ".ef" - beginning or end of function + IMAGE_SYM_CLASS_FUNCTION = 101, + IMAGE_SYM_CLASS_END_OF_STRUCT = 102, ///< End of structure + IMAGE_SYM_CLASS_FILE = 103, ///< File name + /// Line number, reformatted as symbol + IMAGE_SYM_CLASS_SECTION = 104, + IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, ///< Duplicate tag + /// External symbol in dmert public lib + IMAGE_SYM_CLASS_CLR_TOKEN = 107 +}; + +enum SymbolBaseType { + IMAGE_SYM_TYPE_NULL = 0, ///< No type information or unknown base type. + IMAGE_SYM_TYPE_VOID = 1, ///< Used with void pointers and functions. + IMAGE_SYM_TYPE_CHAR = 2, ///< A character (signed byte). + IMAGE_SYM_TYPE_SHORT = 3, ///< A 2-byte signed integer. + IMAGE_SYM_TYPE_INT = 4, ///< A natural integer type on the target. + IMAGE_SYM_TYPE_LONG = 5, ///< A 4-byte signed integer. + IMAGE_SYM_TYPE_FLOAT = 6, ///< A 4-byte floating-point number. + IMAGE_SYM_TYPE_DOUBLE = 7, ///< An 8-byte floating-point number. + IMAGE_SYM_TYPE_STRUCT = 8, ///< A structure. + IMAGE_SYM_TYPE_UNION = 9, ///< An union. + IMAGE_SYM_TYPE_ENUM = 10, ///< An enumerated type. + IMAGE_SYM_TYPE_MOE = 11, ///< A member of enumeration (a specific value). + IMAGE_SYM_TYPE_BYTE = 12, ///< A byte; unsigned 1-byte integer. + IMAGE_SYM_TYPE_WORD = 13, ///< A word; unsigned 2-byte integer. + IMAGE_SYM_TYPE_UINT = 14, ///< An unsigned integer of natural size. + IMAGE_SYM_TYPE_DWORD = 15 ///< An unsigned 4-byte integer. +}; + +enum SymbolComplexType { + IMAGE_SYM_DTYPE_NULL = 0, ///< No complex type; simple scalar variable. + IMAGE_SYM_DTYPE_POINTER = 1, ///< A pointer to base type. + IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type. + IMAGE_SYM_DTYPE_ARRAY = 3, ///< An array of base type. + + /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT)) + SCT_COMPLEX_TYPE_SHIFT = 4 +}; + +enum AuxSymbolType { IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1 }; + +struct section { + char Name[NameSize]; + uint32_t VirtualSize; + uint32_t VirtualAddress; + uint32_t SizeOfRawData; + uint32_t PointerToRawData; + uint32_t PointerToRelocations; + uint32_t PointerToLineNumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLineNumbers; + uint32_t Characteristics; +}; + +enum SectionCharacteristics : uint32_t { + SC_Invalid = 0xffffffff, + + IMAGE_SCN_TYPE_NOLOAD = 0x00000002, + IMAGE_SCN_TYPE_NO_PAD = 0x00000008, + IMAGE_SCN_CNT_CODE = 0x00000020, + IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, + IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, + IMAGE_SCN_LNK_OTHER = 0x00000100, + IMAGE_SCN_LNK_INFO = 0x00000200, + IMAGE_SCN_LNK_REMOVE = 0x00000800, + IMAGE_SCN_LNK_COMDAT = 0x00001000, + IMAGE_SCN_GPREL = 0x00008000, + IMAGE_SCN_MEM_PURGEABLE = 0x00020000, + IMAGE_SCN_MEM_16BIT = 0x00020000, + IMAGE_SCN_MEM_LOCKED = 0x00040000, + IMAGE_SCN_MEM_PRELOAD = 0x00080000, + IMAGE_SCN_ALIGN_1BYTES = 0x00100000, + IMAGE_SCN_ALIGN_2BYTES = 0x00200000, + IMAGE_SCN_ALIGN_4BYTES = 0x00300000, + IMAGE_SCN_ALIGN_8BYTES = 0x00400000, + IMAGE_SCN_ALIGN_16BYTES = 0x00500000, + IMAGE_SCN_ALIGN_32BYTES = 0x00600000, + IMAGE_SCN_ALIGN_64BYTES = 0x00700000, + IMAGE_SCN_ALIGN_128BYTES = 0x00800000, + IMAGE_SCN_ALIGN_256BYTES = 0x00900000, + IMAGE_SCN_ALIGN_512BYTES = 0x00A00000, + IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000, + IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, + IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, + IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, + IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, + IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, + IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, + IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, + IMAGE_SCN_MEM_SHARED = 0x10000000, + IMAGE_SCN_MEM_EXECUTE = 0x20000000, + IMAGE_SCN_MEM_READ = 0x40000000, + IMAGE_SCN_MEM_WRITE = 0x80000000 +}; + +struct relocation { + uint32_t VirtualAddress; + uint32_t SymbolTableIndex; + uint16_t Type; +}; + +enum RelocationTypeI386 { + IMAGE_REL_I386_ABSOLUTE = 0x0000, + IMAGE_REL_I386_DIR16 = 0x0001, + IMAGE_REL_I386_REL16 = 0x0002, + IMAGE_REL_I386_DIR32 = 0x0006, + IMAGE_REL_I386_DIR32NB = 0x0007, + IMAGE_REL_I386_SEG12 = 0x0009, + IMAGE_REL_I386_SECTION = 0x000A, + IMAGE_REL_I386_SECREL = 0x000B, + IMAGE_REL_I386_TOKEN = 0x000C, + IMAGE_REL_I386_SECREL7 = 0x000D, + IMAGE_REL_I386_REL32 = 0x0014 +}; + +enum RelocationTypeAMD64 { + IMAGE_REL_AMD64_ABSOLUTE = 0x0000, + IMAGE_REL_AMD64_ADDR64 = 0x0001, + IMAGE_REL_AMD64_ADDR32 = 0x0002, + IMAGE_REL_AMD64_ADDR32NB = 0x0003, + IMAGE_REL_AMD64_REL32 = 0x0004, + IMAGE_REL_AMD64_REL32_1 = 0x0005, + IMAGE_REL_AMD64_REL32_2 = 0x0006, + IMAGE_REL_AMD64_REL32_3 = 0x0007, + IMAGE_REL_AMD64_REL32_4 = 0x0008, + IMAGE_REL_AMD64_REL32_5 = 0x0009, + IMAGE_REL_AMD64_SECTION = 0x000A, + IMAGE_REL_AMD64_SECREL = 0x000B, + IMAGE_REL_AMD64_SECREL7 = 0x000C, + IMAGE_REL_AMD64_TOKEN = 0x000D, + IMAGE_REL_AMD64_SREL32 = 0x000E, + IMAGE_REL_AMD64_PAIR = 0x000F, + IMAGE_REL_AMD64_SSPAN32 = 0x0010 +}; + +enum RelocationTypesARM { + IMAGE_REL_ARM_ABSOLUTE = 0x0000, + IMAGE_REL_ARM_ADDR32 = 0x0001, + IMAGE_REL_ARM_ADDR32NB = 0x0002, + IMAGE_REL_ARM_BRANCH24 = 0x0003, + IMAGE_REL_ARM_BRANCH11 = 0x0004, + IMAGE_REL_ARM_TOKEN = 0x0005, + IMAGE_REL_ARM_BLX24 = 0x0008, + IMAGE_REL_ARM_BLX11 = 0x0009, + IMAGE_REL_ARM_SECTION = 0x000E, + IMAGE_REL_ARM_SECREL = 0x000F, + IMAGE_REL_ARM_MOV32A = 0x0010, + IMAGE_REL_ARM_MOV32T = 0x0011, + IMAGE_REL_ARM_BRANCH20T = 0x0012, + IMAGE_REL_ARM_BRANCH24T = 0x0014, + IMAGE_REL_ARM_BLX23T = 0x0015 +}; + +enum RelocationTypesARM64 { + IMAGE_REL_ARM64_ABSOLUTE = 0x0000, + IMAGE_REL_ARM64_ADDR32 = 0x0001, + IMAGE_REL_ARM64_ADDR32NB = 0x0002, + IMAGE_REL_ARM64_BRANCH26 = 0x0003, + IMAGE_REL_ARM64_PAGEBASE_REL21 = 0x0004, + IMAGE_REL_ARM64_REL21 = 0x0005, + IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006, + IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007, + IMAGE_REL_ARM64_SECREL = 0x0008, + IMAGE_REL_ARM64_SECREL_LOW12A = 0x0009, + IMAGE_REL_ARM64_SECREL_HIGH12A = 0x000A, + IMAGE_REL_ARM64_SECREL_LOW12L = 0x000B, + IMAGE_REL_ARM64_TOKEN = 0x000C, + IMAGE_REL_ARM64_SECTION = 0x000D, + IMAGE_REL_ARM64_ADDR64 = 0x000E, + IMAGE_REL_ARM64_BRANCH19 = 0x000F, + IMAGE_REL_ARM64_BRANCH14 = 0x0010, +}; + +enum COMDATType { + IMAGE_COMDAT_SELECT_NODUPLICATES = 1, + IMAGE_COMDAT_SELECT_ANY, + IMAGE_COMDAT_SELECT_SAME_SIZE, + IMAGE_COMDAT_SELECT_EXACT_MATCH, + IMAGE_COMDAT_SELECT_ASSOCIATIVE, + IMAGE_COMDAT_SELECT_LARGEST, + IMAGE_COMDAT_SELECT_NEWEST +}; + +// Auxiliary Symbol Formats +struct AuxiliaryFunctionDefinition { + uint32_t TagIndex; + uint32_t TotalSize; + uint32_t PointerToLinenumber; + uint32_t PointerToNextFunction; + char unused[2]; +}; + +struct AuxiliarybfAndefSymbol { + uint8_t unused1[4]; + uint16_t Linenumber; + uint8_t unused2[6]; + uint32_t PointerToNextFunction; + uint8_t unused3[2]; +}; + +struct AuxiliaryWeakExternal { + uint32_t TagIndex; + uint32_t Characteristics; + uint8_t unused[10]; +}; + +enum WeakExternalCharacteristics { + IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1, + IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2, + IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3 +}; + +struct AuxiliarySectionDefinition { + uint32_t Length; + uint16_t NumberOfRelocations; + uint16_t NumberOfLinenumbers; + uint32_t CheckSum; + uint32_t Number; + uint8_t Selection; + char unused; +}; + +struct AuxiliaryCLRToken { + uint8_t AuxType; + uint8_t unused1; + uint32_t SymbolTableIndex; + char unused2[12]; +}; + +union Auxiliary { + AuxiliaryFunctionDefinition FunctionDefinition; + AuxiliarybfAndefSymbol bfAndefSymbol; + AuxiliaryWeakExternal WeakExternal; + AuxiliarySectionDefinition SectionDefinition; +}; + +/// @brief The Import Directory Table. +/// +/// There is a single array of these and one entry per imported DLL. +struct ImportDirectoryTableEntry { + uint32_t ImportLookupTableRVA; + uint32_t TimeDateStamp; + uint32_t ForwarderChain; + uint32_t NameRVA; + uint32_t ImportAddressTableRVA; +}; + +/// @brief The PE32 Import Lookup Table. +/// +/// There is an array of these for each imported DLL. It represents either +/// the ordinal to import from the target DLL, or a name to lookup and import +/// from the target DLL. +/// +/// This also happens to be the same format used by the Import Address Table +/// when it is initially written out to the image. +struct ImportLookupTableEntry32 { + uint32_t data; + + /// @brief Is this entry specified by ordinal, or name? + bool isOrdinal() const { return data & 0x80000000; } + + /// @brief Get the ordinal value of this entry. isOrdinal must be true. + uint16_t getOrdinal() const { + assert(isOrdinal() && "ILT entry is not an ordinal!"); + return data & 0xFFFF; + } + + /// @brief Set the ordinal value and set isOrdinal to true. + void setOrdinal(uint16_t o) { + data = o; + data |= 0x80000000; + } + + /// @brief Get the Hint/Name entry RVA. isOrdinal must be false. + uint32_t getHintNameRVA() const { + assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!"); + return data; + } + + /// @brief Set the Hint/Name entry RVA and set isOrdinal to false. + void setHintNameRVA(uint32_t rva) { data = rva; } +}; + +/// @brief The DOS compatible header at the front of all PEs. +struct DOSHeader { + uint16_t Magic; + uint16_t UsedBytesInTheLastPage; + uint16_t FileSizeInPages; + uint16_t NumberOfRelocationItems; + uint16_t HeaderSizeInParagraphs; + uint16_t MinimumExtraParagraphs; + uint16_t MaximumExtraParagraphs; + uint16_t InitialRelativeSS; + uint16_t InitialSP; + uint16_t Checksum; + uint16_t InitialIP; + uint16_t InitialRelativeCS; + uint16_t AddressOfRelocationTable; + uint16_t OverlayNumber; + uint16_t Reserved[4]; + uint16_t OEMid; + uint16_t OEMinfo; + uint16_t Reserved2[10]; + uint32_t AddressOfNewExeHeader; +}; + +struct PE32Header { + enum { PE32 = 0x10b, PE32_PLUS = 0x20b }; + + uint16_t Magic; + uint8_t MajorLinkerVersion; + uint8_t MinorLinkerVersion; + uint32_t SizeOfCode; + uint32_t SizeOfInitializedData; + uint32_t SizeOfUninitializedData; + uint32_t AddressOfEntryPoint; // RVA + uint32_t BaseOfCode; // RVA + uint32_t BaseOfData; // RVA + uint32_t ImageBase; + uint32_t SectionAlignment; + uint32_t FileAlignment; + uint16_t MajorOperatingSystemVersion; + uint16_t MinorOperatingSystemVersion; + uint16_t MajorImageVersion; + uint16_t MinorImageVersion; + uint16_t MajorSubsystemVersion; + uint16_t MinorSubsystemVersion; + uint32_t Win32VersionValue; + uint32_t SizeOfImage; + uint32_t SizeOfHeaders; + uint32_t CheckSum; + uint16_t Subsystem; + // FIXME: This should be DllCharacteristics to match the COFF spec. + uint16_t DLLCharacteristics; + uint32_t SizeOfStackReserve; + uint32_t SizeOfStackCommit; + uint32_t SizeOfHeapReserve; + uint32_t SizeOfHeapCommit; + uint32_t LoaderFlags; + // FIXME: This should be NumberOfRvaAndSizes to match the COFF spec. + uint32_t NumberOfRvaAndSize; +}; + +struct DataDirectory { + uint32_t RelativeVirtualAddress; + uint32_t Size; +}; + +enum DataDirectoryIndex { + EXPORT_TABLE = 0, + IMPORT_TABLE, + RESOURCE_TABLE, + EXCEPTION_TABLE, + CERTIFICATE_TABLE, + BASE_RELOCATION_TABLE, + DEBUG_DIRECTORY, + ARCHITECTURE, + GLOBAL_PTR, + TLS_TABLE, + LOAD_CONFIG_TABLE, + BOUND_IMPORT, + IAT, + DELAY_IMPORT_DESCRIPTOR, + CLR_RUNTIME_HEADER, + + NUM_DATA_DIRECTORIES +}; + +enum WindowsSubsystem { + IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem. + IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes + IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem. + IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem. + IMAGE_SUBSYSTEM_OS2_CUI = 5, ///< The OS/2 character subsytem. + IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem. + IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, ///< Native Windows 9x driver. + IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE. + IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application. + IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot + /// services. + IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time + /// services. + IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image. + IMAGE_SUBSYSTEM_XBOX = 14, ///< XBOX. + IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application. +}; + +enum DLLCharacteristics { + /// ASLR with 64 bit address space. + IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020, + /// DLL can be relocated at load time. + IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040, + /// Code integrity checks are enforced. + IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080, + ///< Image is NX compatible. + IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100, + /// Isolation aware, but do not isolate the image. + IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200, + /// Does not use structured exception handling (SEH). No SEH handler may be + /// called in this image. + IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400, + /// Do not bind the image. + IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800, + ///< Image should execute in an AppContainer. + IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000, + ///< A WDM driver. + IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000, + ///< Image supports Control Flow Guard. + IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000, + /// Terminal Server aware. + IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000 +}; + +enum DebugType { + IMAGE_DEBUG_TYPE_UNKNOWN = 0, + IMAGE_DEBUG_TYPE_COFF = 1, + IMAGE_DEBUG_TYPE_CODEVIEW = 2, + IMAGE_DEBUG_TYPE_FPO = 3, + IMAGE_DEBUG_TYPE_MISC = 4, + IMAGE_DEBUG_TYPE_EXCEPTION = 5, + IMAGE_DEBUG_TYPE_FIXUP = 6, + IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7, + IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8, + IMAGE_DEBUG_TYPE_BORLAND = 9, + IMAGE_DEBUG_TYPE_RESERVED10 = 10, + IMAGE_DEBUG_TYPE_CLSID = 11, + IMAGE_DEBUG_TYPE_VC_FEATURE = 12, + IMAGE_DEBUG_TYPE_POGO = 13, + IMAGE_DEBUG_TYPE_ILTCG = 14, + IMAGE_DEBUG_TYPE_MPX = 15, + IMAGE_DEBUG_TYPE_REPRO = 16, +}; + +enum BaseRelocationType { + IMAGE_REL_BASED_ABSOLUTE = 0, + IMAGE_REL_BASED_HIGH = 1, + IMAGE_REL_BASED_LOW = 2, + IMAGE_REL_BASED_HIGHLOW = 3, + IMAGE_REL_BASED_HIGHADJ = 4, + IMAGE_REL_BASED_MIPS_JMPADDR = 5, + IMAGE_REL_BASED_ARM_MOV32A = 5, + IMAGE_REL_BASED_ARM_MOV32T = 7, + IMAGE_REL_BASED_MIPS_JMPADDR16 = 9, + IMAGE_REL_BASED_DIR64 = 10 +}; + +enum ImportType { IMPORT_CODE = 0, IMPORT_DATA = 1, IMPORT_CONST = 2 }; + +enum ImportNameType { + /// Import is by ordinal. This indicates that the value in the Ordinal/Hint + /// field of the import header is the import's ordinal. If this constant is + /// not specified, then the Ordinal/Hint field should always be interpreted + /// as the import's hint. + IMPORT_ORDINAL = 0, + /// The import name is identical to the public symbol name + IMPORT_NAME = 1, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _. + IMPORT_NAME_NOPREFIX = 2, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _, and truncating at the first @. + IMPORT_NAME_UNDECORATE = 3 +}; + +struct ImportHeader { + uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). + uint16_t Sig2; ///< Must be 0xFFFF. + uint16_t Version; + uint16_t Machine; + uint32_t TimeDateStamp; + uint32_t SizeOfData; + uint16_t OrdinalHint; + uint16_t TypeInfo; + + ImportType getType() const { return static_cast(TypeInfo & 0x3); } + + ImportNameType getNameType() const { + return static_cast((TypeInfo & 0x1C) >> 2); + } +}; + +enum CodeViewIdentifiers { + DEBUG_SECTION_MAGIC = 0x4, +}; + +inline bool isReservedSectionNumber(int32_t SectionNumber) { + return SectionNumber <= 0; +} + +} // End namespace COFF. +} // End namespace llvm. + +#endif diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/Dwarf.def b/interpreter/llvm/src/include/llvm/BinaryFormat/Dwarf.def new file mode 100644 index 0000000000000..3df3300de4668 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/Dwarf.def @@ -0,0 +1,838 @@ +//===- llvm/Support/Dwarf.def - Dwarf definitions ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Macros for running through Dwarf enumerators. +// +//===----------------------------------------------------------------------===// + +// TODO: Add other DW-based macros. +#if !(defined HANDLE_DW_TAG || defined HANDLE_DW_AT || \ + defined HANDLE_DW_FORM || defined HANDLE_DW_OP || \ + defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \ + defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ + defined HANDLE_DW_CC || defined HANDLE_DW_LNS || \ + defined HANDLE_DW_LNE || defined HANDLE_DW_LNCT || \ + defined HANDLE_DW_MACRO || defined HANDLE_DW_RLE || \ + defined HANDLE_DW_CFA || defined HANDLE_DW_APPLE_PROPERTY || \ + defined HANDLE_DW_UT) +#error "Missing macro definition of HANDLE_DW*" +#endif + +#ifndef HANDLE_DW_TAG +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_AT +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_FORM +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_OP +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_LANG +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_ATE +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_VIRTUALITY +#define HANDLE_DW_VIRTUALITY(ID, NAME) +#endif + +#ifndef HANDLE_DW_DEFAULTED +#define HANDLE_DW_DEFAULTED(ID, NAME) +#endif + +#ifndef HANDLE_DW_CC +#define HANDLE_DW_CC(ID, NAME) +#endif + +#ifndef HANDLE_DW_LNS +#define HANDLE_DW_LNS(ID, NAME) +#endif + +#ifndef HANDLE_DW_LNE +#define HANDLE_DW_LNE(ID, NAME) +#endif + +#ifndef HANDLE_DW_LNCT +#define HANDLE_DW_LNCT(ID, NAME) +#endif + +#ifndef HANDLE_DW_MACRO +#define HANDLE_DW_MACRO(ID, NAME) +#endif + +#ifndef HANDLE_DW_RLE +#define HANDLE_DW_RLE(ID, NAME) +#endif + +#ifndef HANDLE_DW_CFA +#define HANDLE_DW_CFA(ID, NAME) +#endif + +#ifndef HANDLE_DW_APPLE_PROPERTY +#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) +#endif + +#ifndef HANDLE_DW_UT +#define HANDLE_DW_UT(ID, NAME) +#endif + +HANDLE_DW_TAG(0x0000, null, 2, DWARF) +HANDLE_DW_TAG(0x0001, array_type, 2, DWARF) +HANDLE_DW_TAG(0x0002, class_type, 2, DWARF) +HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF) +HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF) +HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF) +HANDLE_DW_TAG(0x000a, label, 2, DWARF) +HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF) +HANDLE_DW_TAG(0x000d, member, 2, DWARF) +HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF) +HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF) +HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF) +HANDLE_DW_TAG(0x0012, string_type, 2, DWARF) +HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF) +HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF) +HANDLE_DW_TAG(0x0016, typedef, 2, DWARF) +HANDLE_DW_TAG(0x0017, union_type, 2, DWARF) +HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF) +HANDLE_DW_TAG(0x0019, variant, 2, DWARF) +HANDLE_DW_TAG(0x001a, common_block, 2, DWARF) +HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF) +HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF) +HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF) +HANDLE_DW_TAG(0x001e, module, 2, DWARF) +HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF) +HANDLE_DW_TAG(0x0020, set_type, 2, DWARF) +HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF) +HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF) +HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF) +HANDLE_DW_TAG(0x0024, base_type, 2, DWARF) +HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF) +HANDLE_DW_TAG(0x0026, const_type, 2, DWARF) +HANDLE_DW_TAG(0x0027, constant, 2, DWARF) +HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF) +HANDLE_DW_TAG(0x0029, file_type, 2, DWARF) +HANDLE_DW_TAG(0x002a, friend, 2, DWARF) +HANDLE_DW_TAG(0x002b, namelist, 2, DWARF) +HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF) +HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF) +HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF) +HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF) +HANDLE_DW_TAG(0x0032, try_block, 2, DWARF) +HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF) +HANDLE_DW_TAG(0x0034, variable, 2, DWARF) +HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF) +HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF) +HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF) +HANDLE_DW_TAG(0x0039, namespace, 3, DWARF) +HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF) +HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF) +HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF) +HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF) +HANDLE_DW_TAG(0x003f, condition, 3, DWARF) +HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF) +HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF) +HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF) +HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF) +HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF) +HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF) +HANDLE_DW_TAG(0x0048, call_site, 5, DWARF) +HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF) +HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF) +HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF) +// Vendor extensions: +HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS) +HANDLE_DW_TAG(0x4101, format_label, 0, GNU) +HANDLE_DW_TAG(0x4102, function_template, 0, GNU) +HANDLE_DW_TAG(0x4103, class_template, 0, GNU) +HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU) +HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU) +HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU) +HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE) +HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND) +HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND) +HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND) +HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND) +HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND) + +// Attributes. +HANDLE_DW_AT(0x01, sibling, 2, DWARF) +HANDLE_DW_AT(0x02, location, 2, DWARF) +HANDLE_DW_AT(0x03, name, 2, DWARF) +HANDLE_DW_AT(0x09, ordering, 2, DWARF) +HANDLE_DW_AT(0x0b, byte_size, 2, DWARF) +HANDLE_DW_AT(0x0c, bit_offset, 2, DWARF) +HANDLE_DW_AT(0x0d, bit_size, 2, DWARF) +HANDLE_DW_AT(0x10, stmt_list, 2, DWARF) +HANDLE_DW_AT(0x11, low_pc, 2, DWARF) +HANDLE_DW_AT(0x12, high_pc, 2, DWARF) +HANDLE_DW_AT(0x13, language, 2, DWARF) +HANDLE_DW_AT(0x15, discr, 2, DWARF) +HANDLE_DW_AT(0x16, discr_value, 2, DWARF) +HANDLE_DW_AT(0x17, visibility, 2, DWARF) +HANDLE_DW_AT(0x18, import, 2, DWARF) +HANDLE_DW_AT(0x19, string_length, 2, DWARF) +HANDLE_DW_AT(0x1a, common_reference, 2, DWARF) +HANDLE_DW_AT(0x1b, comp_dir, 2, DWARF) +HANDLE_DW_AT(0x1c, const_value, 2, DWARF) +HANDLE_DW_AT(0x1d, containing_type, 2, DWARF) +HANDLE_DW_AT(0x1e, default_value, 2, DWARF) +HANDLE_DW_AT(0x20, inline, 2, DWARF) +HANDLE_DW_AT(0x21, is_optional, 2, DWARF) +HANDLE_DW_AT(0x22, lower_bound, 2, DWARF) +HANDLE_DW_AT(0x25, producer, 2, DWARF) +HANDLE_DW_AT(0x27, prototyped, 2, DWARF) +HANDLE_DW_AT(0x2a, return_addr, 2, DWARF) +HANDLE_DW_AT(0x2c, start_scope, 2, DWARF) +HANDLE_DW_AT(0x2e, bit_stride, 2, DWARF) +HANDLE_DW_AT(0x2f, upper_bound, 2, DWARF) +HANDLE_DW_AT(0x31, abstract_origin, 2, DWARF) +HANDLE_DW_AT(0x32, accessibility, 2, DWARF) +HANDLE_DW_AT(0x33, address_class, 2, DWARF) +HANDLE_DW_AT(0x34, artificial, 2, DWARF) +HANDLE_DW_AT(0x35, base_types, 2, DWARF) +HANDLE_DW_AT(0x36, calling_convention, 2, DWARF) +HANDLE_DW_AT(0x37, count, 2, DWARF) +HANDLE_DW_AT(0x38, data_member_location, 2, DWARF) +HANDLE_DW_AT(0x39, decl_column, 2, DWARF) +HANDLE_DW_AT(0x3a, decl_file, 2, DWARF) +HANDLE_DW_AT(0x3b, decl_line, 2, DWARF) +HANDLE_DW_AT(0x3c, declaration, 2, DWARF) +HANDLE_DW_AT(0x3d, discr_list, 2, DWARF) +HANDLE_DW_AT(0x3e, encoding, 2, DWARF) +HANDLE_DW_AT(0x3f, external, 2, DWARF) +HANDLE_DW_AT(0x40, frame_base, 2, DWARF) +HANDLE_DW_AT(0x41, friend, 2, DWARF) +HANDLE_DW_AT(0x42, identifier_case, 2, DWARF) +HANDLE_DW_AT(0x43, macro_info, 2, DWARF) +HANDLE_DW_AT(0x44, namelist_item, 2, DWARF) +HANDLE_DW_AT(0x45, priority, 2, DWARF) +HANDLE_DW_AT(0x46, segment, 2, DWARF) +HANDLE_DW_AT(0x47, specification, 2, DWARF) +HANDLE_DW_AT(0x48, static_link, 2, DWARF) +HANDLE_DW_AT(0x49, type, 2, DWARF) +HANDLE_DW_AT(0x4a, use_location, 2, DWARF) +HANDLE_DW_AT(0x4b, variable_parameter, 2, DWARF) +HANDLE_DW_AT(0x4c, virtuality, 2, DWARF) +HANDLE_DW_AT(0x4d, vtable_elem_location, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_AT(0x4e, allocated, 3, DWARF) +HANDLE_DW_AT(0x4f, associated, 3, DWARF) +HANDLE_DW_AT(0x50, data_location, 3, DWARF) +HANDLE_DW_AT(0x51, byte_stride, 3, DWARF) +HANDLE_DW_AT(0x52, entry_pc, 3, DWARF) +HANDLE_DW_AT(0x53, use_UTF8, 3, DWARF) +HANDLE_DW_AT(0x54, extension, 3, DWARF) +HANDLE_DW_AT(0x55, ranges, 3, DWARF) +HANDLE_DW_AT(0x56, trampoline, 3, DWARF) +HANDLE_DW_AT(0x57, call_column, 3, DWARF) +HANDLE_DW_AT(0x58, call_file, 3, DWARF) +HANDLE_DW_AT(0x59, call_line, 3, DWARF) +HANDLE_DW_AT(0x5a, description, 3, DWARF) +HANDLE_DW_AT(0x5b, binary_scale, 3, DWARF) +HANDLE_DW_AT(0x5c, decimal_scale, 3, DWARF) +HANDLE_DW_AT(0x5d, small, 3, DWARF) +HANDLE_DW_AT(0x5e, decimal_sign, 3, DWARF) +HANDLE_DW_AT(0x5f, digit_count, 3, DWARF) +HANDLE_DW_AT(0x60, picture_string, 3, DWARF) +HANDLE_DW_AT(0x61, mutable, 3, DWARF) +HANDLE_DW_AT(0x62, threads_scaled, 3, DWARF) +HANDLE_DW_AT(0x63, explicit, 3, DWARF) +HANDLE_DW_AT(0x64, object_pointer, 3, DWARF) +HANDLE_DW_AT(0x65, endianity, 3, DWARF) +HANDLE_DW_AT(0x66, elemental, 3, DWARF) +HANDLE_DW_AT(0x67, pure, 3, DWARF) +HANDLE_DW_AT(0x68, recursive, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_AT(0x69, signature, 4, DWARF) +HANDLE_DW_AT(0x6a, main_subprogram, 4, DWARF) +HANDLE_DW_AT(0x6b, data_bit_offset, 4, DWARF) +HANDLE_DW_AT(0x6c, const_expr, 4, DWARF) +HANDLE_DW_AT(0x6d, enum_class, 4, DWARF) +HANDLE_DW_AT(0x6e, linkage_name, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_AT(0x6f, string_length_bit_size, 5, DWARF) +HANDLE_DW_AT(0x70, string_length_byte_size, 5, DWARF) +HANDLE_DW_AT(0x71, rank, 5, DWARF) +HANDLE_DW_AT(0x72, str_offsets_base, 5, DWARF) +HANDLE_DW_AT(0x73, addr_base, 5, DWARF) +HANDLE_DW_AT(0x74, rnglists_base, 5, DWARF) +HANDLE_DW_AT(0x75, dwo_id, 0, DWARF) ///< Retracted from DWARF v5. +HANDLE_DW_AT(0x76, dwo_name, 5, DWARF) +HANDLE_DW_AT(0x77, reference, 5, DWARF) +HANDLE_DW_AT(0x78, rvalue_reference, 5, DWARF) +HANDLE_DW_AT(0x79, macros, 5, DWARF) +HANDLE_DW_AT(0x7a, call_all_calls, 5, DWARF) +HANDLE_DW_AT(0x7b, call_all_source_calls, 5, DWARF) +HANDLE_DW_AT(0x7c, call_all_tail_calls, 5, DWARF) +HANDLE_DW_AT(0x7d, call_return_pc, 5, DWARF) +HANDLE_DW_AT(0x7e, call_value, 5, DWARF) +HANDLE_DW_AT(0x7f, call_origin, 5, DWARF) +HANDLE_DW_AT(0x80, call_parameter, 5, DWARF) +HANDLE_DW_AT(0x81, call_pc, 5, DWARF) +HANDLE_DW_AT(0x82, call_tail_call, 5, DWARF) +HANDLE_DW_AT(0x83, call_target, 5, DWARF) +HANDLE_DW_AT(0x84, call_target_clobbered, 5, DWARF) +HANDLE_DW_AT(0x85, call_data_location, 5, DWARF) +HANDLE_DW_AT(0x86, call_data_value, 5, DWARF) +HANDLE_DW_AT(0x87, noreturn, 5, DWARF) +HANDLE_DW_AT(0x88, alignment, 5, DWARF) +HANDLE_DW_AT(0x89, export_symbols, 5, DWARF) +HANDLE_DW_AT(0x8a, deleted, 5, DWARF) +HANDLE_DW_AT(0x8b, defaulted, 5, DWARF) +HANDLE_DW_AT(0x8c, loclists_base, 5, DWARF) +// Vendor extensions: +HANDLE_DW_AT(0x2002, MIPS_loop_begin, 0, MIPS) +HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin, 0, MIPS) +HANDLE_DW_AT(0x2004, MIPS_epilog_begin, 0, MIPS) +HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor, 0, MIPS) +HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth, 0, MIPS) +HANDLE_DW_AT(0x2007, MIPS_linkage_name, 0, MIPS) +HANDLE_DW_AT(0x2008, MIPS_stride, 0, MIPS) +HANDLE_DW_AT(0x2009, MIPS_abstract_name, 0, MIPS) +HANDLE_DW_AT(0x200a, MIPS_clone_origin, 0, MIPS) +HANDLE_DW_AT(0x200b, MIPS_has_inlines, 0, MIPS) +HANDLE_DW_AT(0x200c, MIPS_stride_byte, 0, MIPS) +HANDLE_DW_AT(0x200d, MIPS_stride_elem, 0, MIPS) +HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype, 0, MIPS) +HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype, 0, MIPS) +HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype, 0, MIPS) +// This one appears to have only been implemented by Open64 for +// fortran and may conflict with other extensions. +HANDLE_DW_AT(0x2011, MIPS_assumed_size, 0, MIPS) +// GNU extensions +HANDLE_DW_AT(0x2101, sf_names, 0, GNU) +HANDLE_DW_AT(0x2102, src_info, 0, GNU) +HANDLE_DW_AT(0x2103, mac_info, 0, GNU) +HANDLE_DW_AT(0x2104, src_coords, 0, GNU) +HANDLE_DW_AT(0x2105, body_begin, 0, GNU) +HANDLE_DW_AT(0x2106, body_end, 0, GNU) +HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU) +HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU) +HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU) +HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU) +// Extensions for Fission proposal. +HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU) +HANDLE_DW_AT(0x2131, GNU_dwo_id, 0, GNU) +HANDLE_DW_AT(0x2132, GNU_ranges_base, 0, GNU) +HANDLE_DW_AT(0x2133, GNU_addr_base, 0, GNU) +HANDLE_DW_AT(0x2134, GNU_pubnames, 0, GNU) +HANDLE_DW_AT(0x2135, GNU_pubtypes, 0, GNU) +HANDLE_DW_AT(0x2136, GNU_discriminator, 0, GNU) +// Borland extensions. +HANDLE_DW_AT(0x3b11, BORLAND_property_read, 0, BORLAND) +HANDLE_DW_AT(0x3b12, BORLAND_property_write, 0, BORLAND) +HANDLE_DW_AT(0x3b13, BORLAND_property_implements, 0, BORLAND) +HANDLE_DW_AT(0x3b14, BORLAND_property_index, 0, BORLAND) +HANDLE_DW_AT(0x3b15, BORLAND_property_default, 0, BORLAND) +HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit, 0, BORLAND) +HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class, 0, BORLAND) +HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record, 0, BORLAND) +HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass, 0, BORLAND) +HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor, 0, BORLAND) +HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor, 0, BORLAND) +HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method, 0, BORLAND) +HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface, 0, BORLAND) +HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI, 0, BORLAND) +HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return, 0, BORLAND) +HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr, 0, BORLAND) +HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND) +// LLVM project extensions. +HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM) +HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM) +HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM) +// Apple extensions. +HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE) +HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE) +HANDLE_DW_AT(0x3fe3, APPLE_isa, 0, APPLE) +HANDLE_DW_AT(0x3fe4, APPLE_block, 0, APPLE) +HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers, 0, APPLE) +HANDLE_DW_AT(0x3fe6, APPLE_runtime_class, 0, APPLE) +HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr, 0, APPLE) +HANDLE_DW_AT(0x3fe8, APPLE_property_name, 0, APPLE) +HANDLE_DW_AT(0x3fe9, APPLE_property_getter, 0, APPLE) +HANDLE_DW_AT(0x3fea, APPLE_property_setter, 0, APPLE) +HANDLE_DW_AT(0x3feb, APPLE_property_attribute, 0, APPLE) +HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type, 0, APPLE) +HANDLE_DW_AT(0x3fed, APPLE_property, 0, APPLE) + +// Attribute form encodings. +HANDLE_DW_FORM(0x01, addr, 2, DWARF) +HANDLE_DW_FORM(0x03, block2, 2, DWARF) +HANDLE_DW_FORM(0x04, block4, 2, DWARF) +HANDLE_DW_FORM(0x05, data2, 2, DWARF) +HANDLE_DW_FORM(0x06, data4, 2, DWARF) +HANDLE_DW_FORM(0x07, data8, 2, DWARF) +HANDLE_DW_FORM(0x08, string, 2, DWARF) +HANDLE_DW_FORM(0x09, block, 2, DWARF) +HANDLE_DW_FORM(0x0a, block1, 2, DWARF) +HANDLE_DW_FORM(0x0b, data1, 2, DWARF) +HANDLE_DW_FORM(0x0c, flag, 2, DWARF) +HANDLE_DW_FORM(0x0d, sdata, 2, DWARF) +HANDLE_DW_FORM(0x0e, strp, 2, DWARF) +HANDLE_DW_FORM(0x0f, udata, 2, DWARF) +HANDLE_DW_FORM(0x10, ref_addr, 2, DWARF) +HANDLE_DW_FORM(0x11, ref1, 2, DWARF) +HANDLE_DW_FORM(0x12, ref2, 2, DWARF) +HANDLE_DW_FORM(0x13, ref4, 2, DWARF) +HANDLE_DW_FORM(0x14, ref8, 2, DWARF) +HANDLE_DW_FORM(0x15, ref_udata, 2, DWARF) +HANDLE_DW_FORM(0x16, indirect, 2, DWARF) +// New in DWARF v4: +HANDLE_DW_FORM(0x17, sec_offset, 4, DWARF) +HANDLE_DW_FORM(0x18, exprloc, 4, DWARF) +HANDLE_DW_FORM(0x19, flag_present, 4, DWARF) +// This was defined out of sequence. +HANDLE_DW_FORM(0x20, ref_sig8, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_FORM(0x1a, strx, 5, DWARF) +HANDLE_DW_FORM(0x1b, addrx, 5, DWARF) +HANDLE_DW_FORM(0x1c, ref_sup4, 5, DWARF) +HANDLE_DW_FORM(0x1d, strp_sup, 5, DWARF) +HANDLE_DW_FORM(0x1e, data16, 5, DWARF) +HANDLE_DW_FORM(0x1f, line_strp, 5, DWARF) +HANDLE_DW_FORM(0x21, implicit_const, 5, DWARF) +HANDLE_DW_FORM(0x22, loclistx, 5, DWARF) +HANDLE_DW_FORM(0x23, rnglistx, 5, DWARF) +HANDLE_DW_FORM(0x24, ref_sup8, 5, DWARF) +HANDLE_DW_FORM(0x25, strx1, 5, DWARF) +HANDLE_DW_FORM(0x26, strx2, 5, DWARF) +HANDLE_DW_FORM(0x27, strx3, 5, DWARF) +HANDLE_DW_FORM(0x28, strx4, 5, DWARF) +HANDLE_DW_FORM(0x29, addrx1, 5, DWARF) +HANDLE_DW_FORM(0x2a, addrx2, 5, DWARF) +HANDLE_DW_FORM(0x2b, addrx3, 5, DWARF) +HANDLE_DW_FORM(0x2c, addrx4, 5, DWARF) +// Extensions for Fission proposal +HANDLE_DW_FORM(0x1f01, GNU_addr_index, 0, GNU) +HANDLE_DW_FORM(0x1f02, GNU_str_index, 0, GNU) +// Alternate debug sections proposal (output of "dwz" tool). +HANDLE_DW_FORM(0x1f20, GNU_ref_alt, 0, GNU) +HANDLE_DW_FORM(0x1f21, GNU_strp_alt, 0, GNU) + +// DWARF Expression operators. +HANDLE_DW_OP(0x03, addr, 2, DWARF) +HANDLE_DW_OP(0x06, deref, 2, DWARF) +HANDLE_DW_OP(0x08, const1u, 2, DWARF) +HANDLE_DW_OP(0x09, const1s, 2, DWARF) +HANDLE_DW_OP(0x0a, const2u, 2, DWARF) +HANDLE_DW_OP(0x0b, const2s, 2, DWARF) +HANDLE_DW_OP(0x0c, const4u, 2, DWARF) +HANDLE_DW_OP(0x0d, const4s, 2, DWARF) +HANDLE_DW_OP(0x0e, const8u, 2, DWARF) +HANDLE_DW_OP(0x0f, const8s, 2, DWARF) +HANDLE_DW_OP(0x10, constu, 2, DWARF) +HANDLE_DW_OP(0x11, consts, 2, DWARF) +HANDLE_DW_OP(0x12, dup, 2, DWARF) +HANDLE_DW_OP(0x13, drop, 2, DWARF) +HANDLE_DW_OP(0x14, over, 2, DWARF) +HANDLE_DW_OP(0x15, pick, 2, DWARF) +HANDLE_DW_OP(0x16, swap, 2, DWARF) +HANDLE_DW_OP(0x17, rot, 2, DWARF) +HANDLE_DW_OP(0x18, xderef, 2, DWARF) +HANDLE_DW_OP(0x19, abs, 2, DWARF) +HANDLE_DW_OP(0x1a, and, 2, DWARF) +HANDLE_DW_OP(0x1b, div, 2, DWARF) +HANDLE_DW_OP(0x1c, minus, 2, DWARF) +HANDLE_DW_OP(0x1d, mod, 2, DWARF) +HANDLE_DW_OP(0x1e, mul, 2, DWARF) +HANDLE_DW_OP(0x1f, neg, 2, DWARF) +HANDLE_DW_OP(0x20, not, 2, DWARF) +HANDLE_DW_OP(0x21, or, 2, DWARF) +HANDLE_DW_OP(0x22, plus, 2, DWARF) +HANDLE_DW_OP(0x23, plus_uconst, 2, DWARF) +HANDLE_DW_OP(0x24, shl, 2, DWARF) +HANDLE_DW_OP(0x25, shr, 2, DWARF) +HANDLE_DW_OP(0x26, shra, 2, DWARF) +HANDLE_DW_OP(0x27, xor, 2, DWARF) +HANDLE_DW_OP(0x28, bra, 2, DWARF) +HANDLE_DW_OP(0x29, eq, 2, DWARF) +HANDLE_DW_OP(0x2a, ge, 2, DWARF) +HANDLE_DW_OP(0x2b, gt, 2, DWARF) +HANDLE_DW_OP(0x2c, le, 2, DWARF) +HANDLE_DW_OP(0x2d, lt, 2, DWARF) +HANDLE_DW_OP(0x2e, ne, 2, DWARF) +HANDLE_DW_OP(0x2f, skip, 2, DWARF) +HANDLE_DW_OP(0x30, lit0, 2, DWARF) +HANDLE_DW_OP(0x31, lit1, 2, DWARF) +HANDLE_DW_OP(0x32, lit2, 2, DWARF) +HANDLE_DW_OP(0x33, lit3, 2, DWARF) +HANDLE_DW_OP(0x34, lit4, 2, DWARF) +HANDLE_DW_OP(0x35, lit5, 2, DWARF) +HANDLE_DW_OP(0x36, lit6, 2, DWARF) +HANDLE_DW_OP(0x37, lit7, 2, DWARF) +HANDLE_DW_OP(0x38, lit8, 2, DWARF) +HANDLE_DW_OP(0x39, lit9, 2, DWARF) +HANDLE_DW_OP(0x3a, lit10, 2, DWARF) +HANDLE_DW_OP(0x3b, lit11, 2, DWARF) +HANDLE_DW_OP(0x3c, lit12, 2, DWARF) +HANDLE_DW_OP(0x3d, lit13, 2, DWARF) +HANDLE_DW_OP(0x3e, lit14, 2, DWARF) +HANDLE_DW_OP(0x3f, lit15, 2, DWARF) +HANDLE_DW_OP(0x40, lit16, 2, DWARF) +HANDLE_DW_OP(0x41, lit17, 2, DWARF) +HANDLE_DW_OP(0x42, lit18, 2, DWARF) +HANDLE_DW_OP(0x43, lit19, 2, DWARF) +HANDLE_DW_OP(0x44, lit20, 2, DWARF) +HANDLE_DW_OP(0x45, lit21, 2, DWARF) +HANDLE_DW_OP(0x46, lit22, 2, DWARF) +HANDLE_DW_OP(0x47, lit23, 2, DWARF) +HANDLE_DW_OP(0x48, lit24, 2, DWARF) +HANDLE_DW_OP(0x49, lit25, 2, DWARF) +HANDLE_DW_OP(0x4a, lit26, 2, DWARF) +HANDLE_DW_OP(0x4b, lit27, 2, DWARF) +HANDLE_DW_OP(0x4c, lit28, 2, DWARF) +HANDLE_DW_OP(0x4d, lit29, 2, DWARF) +HANDLE_DW_OP(0x4e, lit30, 2, DWARF) +HANDLE_DW_OP(0x4f, lit31, 2, DWARF) +HANDLE_DW_OP(0x50, reg0, 2, DWARF) +HANDLE_DW_OP(0x51, reg1, 2, DWARF) +HANDLE_DW_OP(0x52, reg2, 2, DWARF) +HANDLE_DW_OP(0x53, reg3, 2, DWARF) +HANDLE_DW_OP(0x54, reg4, 2, DWARF) +HANDLE_DW_OP(0x55, reg5, 2, DWARF) +HANDLE_DW_OP(0x56, reg6, 2, DWARF) +HANDLE_DW_OP(0x57, reg7, 2, DWARF) +HANDLE_DW_OP(0x58, reg8, 2, DWARF) +HANDLE_DW_OP(0x59, reg9, 2, DWARF) +HANDLE_DW_OP(0x5a, reg10, 2, DWARF) +HANDLE_DW_OP(0x5b, reg11, 2, DWARF) +HANDLE_DW_OP(0x5c, reg12, 2, DWARF) +HANDLE_DW_OP(0x5d, reg13, 2, DWARF) +HANDLE_DW_OP(0x5e, reg14, 2, DWARF) +HANDLE_DW_OP(0x5f, reg15, 2, DWARF) +HANDLE_DW_OP(0x60, reg16, 2, DWARF) +HANDLE_DW_OP(0x61, reg17, 2, DWARF) +HANDLE_DW_OP(0x62, reg18, 2, DWARF) +HANDLE_DW_OP(0x63, reg19, 2, DWARF) +HANDLE_DW_OP(0x64, reg20, 2, DWARF) +HANDLE_DW_OP(0x65, reg21, 2, DWARF) +HANDLE_DW_OP(0x66, reg22, 2, DWARF) +HANDLE_DW_OP(0x67, reg23, 2, DWARF) +HANDLE_DW_OP(0x68, reg24, 2, DWARF) +HANDLE_DW_OP(0x69, reg25, 2, DWARF) +HANDLE_DW_OP(0x6a, reg26, 2, DWARF) +HANDLE_DW_OP(0x6b, reg27, 2, DWARF) +HANDLE_DW_OP(0x6c, reg28, 2, DWARF) +HANDLE_DW_OP(0x6d, reg29, 2, DWARF) +HANDLE_DW_OP(0x6e, reg30, 2, DWARF) +HANDLE_DW_OP(0x6f, reg31, 2, DWARF) +HANDLE_DW_OP(0x70, breg0, 2, DWARF) +HANDLE_DW_OP(0x71, breg1, 2, DWARF) +HANDLE_DW_OP(0x72, breg2, 2, DWARF) +HANDLE_DW_OP(0x73, breg3, 2, DWARF) +HANDLE_DW_OP(0x74, breg4, 2, DWARF) +HANDLE_DW_OP(0x75, breg5, 2, DWARF) +HANDLE_DW_OP(0x76, breg6, 2, DWARF) +HANDLE_DW_OP(0x77, breg7, 2, DWARF) +HANDLE_DW_OP(0x78, breg8, 2, DWARF) +HANDLE_DW_OP(0x79, breg9, 2, DWARF) +HANDLE_DW_OP(0x7a, breg10, 2, DWARF) +HANDLE_DW_OP(0x7b, breg11, 2, DWARF) +HANDLE_DW_OP(0x7c, breg12, 2, DWARF) +HANDLE_DW_OP(0x7d, breg13, 2, DWARF) +HANDLE_DW_OP(0x7e, breg14, 2, DWARF) +HANDLE_DW_OP(0x7f, breg15, 2, DWARF) +HANDLE_DW_OP(0x80, breg16, 2, DWARF) +HANDLE_DW_OP(0x81, breg17, 2, DWARF) +HANDLE_DW_OP(0x82, breg18, 2, DWARF) +HANDLE_DW_OP(0x83, breg19, 2, DWARF) +HANDLE_DW_OP(0x84, breg20, 2, DWARF) +HANDLE_DW_OP(0x85, breg21, 2, DWARF) +HANDLE_DW_OP(0x86, breg22, 2, DWARF) +HANDLE_DW_OP(0x87, breg23, 2, DWARF) +HANDLE_DW_OP(0x88, breg24, 2, DWARF) +HANDLE_DW_OP(0x89, breg25, 2, DWARF) +HANDLE_DW_OP(0x8a, breg26, 2, DWARF) +HANDLE_DW_OP(0x8b, breg27, 2, DWARF) +HANDLE_DW_OP(0x8c, breg28, 2, DWARF) +HANDLE_DW_OP(0x8d, breg29, 2, DWARF) +HANDLE_DW_OP(0x8e, breg30, 2, DWARF) +HANDLE_DW_OP(0x8f, breg31, 2, DWARF) +HANDLE_DW_OP(0x90, regx, 2, DWARF) +HANDLE_DW_OP(0x91, fbreg, 2, DWARF) +HANDLE_DW_OP(0x92, bregx, 2, DWARF) +HANDLE_DW_OP(0x93, piece, 2, DWARF) +HANDLE_DW_OP(0x94, deref_size, 2, DWARF) +HANDLE_DW_OP(0x95, xderef_size, 2, DWARF) +HANDLE_DW_OP(0x96, nop, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_OP(0x97, push_object_address, 3, DWARF) +HANDLE_DW_OP(0x98, call2, 3, DWARF) +HANDLE_DW_OP(0x99, call4, 3, DWARF) +HANDLE_DW_OP(0x9a, call_ref, 3, DWARF) +HANDLE_DW_OP(0x9b, form_tls_address, 3, DWARF) +HANDLE_DW_OP(0x9c, call_frame_cfa, 3, DWARF) +HANDLE_DW_OP(0x9d, bit_piece, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_OP(0x9e, implicit_value, 4, DWARF) +HANDLE_DW_OP(0x9f, stack_value, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_OP(0xa0, implicit_pointer, 5, DWARF) +HANDLE_DW_OP(0xa1, addrx, 5, DWARF) +HANDLE_DW_OP(0xa2, constx, 5, DWARF) +HANDLE_DW_OP(0xa3, entry_value, 5, DWARF) +HANDLE_DW_OP(0xa4, const_type, 5, DWARF) +HANDLE_DW_OP(0xa5, regval_type, 5, DWARF) +HANDLE_DW_OP(0xa6, deref_type, 5, DWARF) +HANDLE_DW_OP(0xa7, xderef_type, 5, DWARF) +HANDLE_DW_OP(0xa8, convert, 5, DWARF) +HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF) +// Vendor extensions: +// Extensions for GNU-style thread-local storage. +HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU) +// Extensions for Fission proposal. +HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU) +HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU) + +// DWARF languages. +HANDLE_DW_LANG(0x0001, C89, 2, DWARF) +HANDLE_DW_LANG(0x0002, C, 2, DWARF) +HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF) +HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF) +HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF) +HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF) +HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF) +HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF) +HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF) +HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_LANG(0x000b, Java, 3, DWARF) +HANDLE_DW_LANG(0x000c, C99, 3, DWARF) +HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF) +HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF) +HANDLE_DW_LANG(0x000f, PLI, 3, DWARF) +HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF) +HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF) +HANDLE_DW_LANG(0x0012, UPC, 3, DWARF) +HANDLE_DW_LANG(0x0013, D, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_LANG(0x0014, Python, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF) +HANDLE_DW_LANG(0x0016, Go, 5, DWARF) +HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF) +HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF) +HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF) +HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF) +HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF) +HANDLE_DW_LANG(0x001c, Rust, 5, DWARF) +HANDLE_DW_LANG(0x001d, C11, 5, DWARF) +HANDLE_DW_LANG(0x001e, Swift, 5, DWARF) +HANDLE_DW_LANG(0x001f, Julia, 5, DWARF) +HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF) +HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF) +HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF) +HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF) +HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF) +HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF) +// Vendor extensions: +HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS) +HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE) +HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND) + +// DWARF attribute type encodings. +HANDLE_DW_ATE(0x01, address, 2, DWARF) +HANDLE_DW_ATE(0x02, boolean, 2, DWARF) +HANDLE_DW_ATE(0x03, complex_float, 2, DWARF) +HANDLE_DW_ATE(0x04, float, 2, DWARF) +HANDLE_DW_ATE(0x05, signed, 2, DWARF) +HANDLE_DW_ATE(0x06, signed_char, 2, DWARF) +HANDLE_DW_ATE(0x07, unsigned, 2, DWARF) +HANDLE_DW_ATE(0x08, unsigned_char, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_ATE(0x09, imaginary_float, 3, DWARF) +HANDLE_DW_ATE(0x0a, packed_decimal, 3, DWARF) +HANDLE_DW_ATE(0x0b, numeric_string, 3, DWARF) +HANDLE_DW_ATE(0x0c, edited, 3, DWARF) +HANDLE_DW_ATE(0x0d, signed_fixed, 3, DWARF) +HANDLE_DW_ATE(0x0e, unsigned_fixed, 3, DWARF) +HANDLE_DW_ATE(0x0f, decimal_float, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_ATE(0x10, UTF, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_ATE(0x11, UCS, 5, DWARF) +HANDLE_DW_ATE(0x12, ASCII, 5, DWARF) + +// DWARF virtuality codes. +HANDLE_DW_VIRTUALITY(0x00, none) +HANDLE_DW_VIRTUALITY(0x01, virtual) +HANDLE_DW_VIRTUALITY(0x02, pure_virtual) + +// DWARF v5 Defaulted Member Encodings. +HANDLE_DW_DEFAULTED(0x00, no) +HANDLE_DW_DEFAULTED(0x01, in_class) +HANDLE_DW_DEFAULTED(0x02, out_of_class) + +// DWARF calling convention codes. +HANDLE_DW_CC(0x01, normal) +HANDLE_DW_CC(0x02, program) +HANDLE_DW_CC(0x03, nocall) +// New in DWARF v5: +HANDLE_DW_CC(0x04, pass_by_reference) +HANDLE_DW_CC(0x05, pass_by_value) +// Vendor extensions: +HANDLE_DW_CC(0x41, GNU_borland_fastcall_i386) +HANDLE_DW_CC(0xb0, BORLAND_safecall) +HANDLE_DW_CC(0xb1, BORLAND_stdcall) +HANDLE_DW_CC(0xb2, BORLAND_pascal) +HANDLE_DW_CC(0xb3, BORLAND_msfastcall) +HANDLE_DW_CC(0xb4, BORLAND_msreturn) +HANDLE_DW_CC(0xb5, BORLAND_thiscall) +HANDLE_DW_CC(0xb6, BORLAND_fastcall) +HANDLE_DW_CC(0xc0, LLVM_vectorcall) + +// Line Number Extended Opcode Encodings +HANDLE_DW_LNE(0x01, end_sequence) +HANDLE_DW_LNE(0x02, set_address) +HANDLE_DW_LNE(0x03, define_file) +// New in DWARF v4: +HANDLE_DW_LNE(0x04, set_discriminator) + +// Line Number Standard Opcode Encodings. +HANDLE_DW_LNS(0x00, extended_op) +HANDLE_DW_LNS(0x01, copy) +HANDLE_DW_LNS(0x02, advance_pc) +HANDLE_DW_LNS(0x03, advance_line) +HANDLE_DW_LNS(0x04, set_file) +HANDLE_DW_LNS(0x05, set_column) +HANDLE_DW_LNS(0x06, negate_stmt) +HANDLE_DW_LNS(0x07, set_basic_block) +HANDLE_DW_LNS(0x08, const_add_pc) +HANDLE_DW_LNS(0x09, fixed_advance_pc) +// New in DWARF v3: +HANDLE_DW_LNS(0x0a, set_prologue_end) +HANDLE_DW_LNS(0x0b, set_epilogue_begin) +HANDLE_DW_LNS(0x0c, set_isa) + +// DWARF v5 Line number header entry format. +HANDLE_DW_LNCT(0x01, path) +HANDLE_DW_LNCT(0x02, directory_index) +HANDLE_DW_LNCT(0x03, timestamp) +HANDLE_DW_LNCT(0x04, size) +HANDLE_DW_LNCT(0x05, MD5) + +// DWARF v5 Macro information. +HANDLE_DW_MACRO(0x01, define) +HANDLE_DW_MACRO(0x02, undef) +HANDLE_DW_MACRO(0x03, start_file) +HANDLE_DW_MACRO(0x04, end_file) +HANDLE_DW_MACRO(0x05, define_strp) +HANDLE_DW_MACRO(0x06, undef_strp) +HANDLE_DW_MACRO(0x07, import) +HANDLE_DW_MACRO(0x08, define_sup) +HANDLE_DW_MACRO(0x09, undef_sup) +HANDLE_DW_MACRO(0x0a, import_sup) +HANDLE_DW_MACRO(0x0b, define_strx) +HANDLE_DW_MACRO(0x0c, undef_strx) + +// DWARF v5 Range List Entry encoding values. +HANDLE_DW_RLE(0x00, end_of_list) +HANDLE_DW_RLE(0x01, base_addressx) +HANDLE_DW_RLE(0x02, startx_endx) +HANDLE_DW_RLE(0x03, startx_length) +HANDLE_DW_RLE(0x04, offset_pair) +HANDLE_DW_RLE(0x05, base_address) +HANDLE_DW_RLE(0x06, start_end) +HANDLE_DW_RLE(0x07, start_length) + +// Call frame instruction encodings. +HANDLE_DW_CFA(0x00, nop) +HANDLE_DW_CFA(0x40, advance_loc) +HANDLE_DW_CFA(0x80, offset) +HANDLE_DW_CFA(0xc0, restore) +HANDLE_DW_CFA(0x01, set_loc) +HANDLE_DW_CFA(0x02, advance_loc1) +HANDLE_DW_CFA(0x03, advance_loc2) +HANDLE_DW_CFA(0x04, advance_loc4) +HANDLE_DW_CFA(0x05, offset_extended) +HANDLE_DW_CFA(0x06, restore_extended) +HANDLE_DW_CFA(0x07, undefined) +HANDLE_DW_CFA(0x08, same_value) +HANDLE_DW_CFA(0x09, register) +HANDLE_DW_CFA(0x0a, remember_state) +HANDLE_DW_CFA(0x0b, restore_state) +HANDLE_DW_CFA(0x0c, def_cfa) +HANDLE_DW_CFA(0x0d, def_cfa_register) +HANDLE_DW_CFA(0x0e, def_cfa_offset) +// New in DWARF v3: +HANDLE_DW_CFA(0x0f, def_cfa_expression) +HANDLE_DW_CFA(0x10, expression) +HANDLE_DW_CFA(0x11, offset_extended_sf) +HANDLE_DW_CFA(0x12, def_cfa_sf) +HANDLE_DW_CFA(0x13, def_cfa_offset_sf) +HANDLE_DW_CFA(0x14, val_offset) +HANDLE_DW_CFA(0x15, val_offset_sf) +HANDLE_DW_CFA(0x16, val_expression) +// Vendor extensions: +HANDLE_DW_CFA(0x1d, MIPS_advance_loc8) +HANDLE_DW_CFA(0x2d, GNU_window_save) +HANDLE_DW_CFA(0x2e, GNU_args_size) + +// Apple Objective-C Property Attributes. +// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! +HANDLE_DW_APPLE_PROPERTY(0x01, readonly) +HANDLE_DW_APPLE_PROPERTY(0x02, getter) +HANDLE_DW_APPLE_PROPERTY(0x04, assign) +HANDLE_DW_APPLE_PROPERTY(0x08, readwrite) +HANDLE_DW_APPLE_PROPERTY(0x10, retain) +HANDLE_DW_APPLE_PROPERTY(0x20, copy) +HANDLE_DW_APPLE_PROPERTY(0x40, nonatomic) +HANDLE_DW_APPLE_PROPERTY(0x80, setter) +HANDLE_DW_APPLE_PROPERTY(0x100, atomic) +HANDLE_DW_APPLE_PROPERTY(0x200, weak) +HANDLE_DW_APPLE_PROPERTY(0x400, strong) +HANDLE_DW_APPLE_PROPERTY(0x800, unsafe_unretained) +HANDLE_DW_APPLE_PROPERTY(0x1000, nullability) +HANDLE_DW_APPLE_PROPERTY(0x2000, null_resettable) +HANDLE_DW_APPLE_PROPERTY(0x4000, class) + +// DWARF v5 Unit Types. +HANDLE_DW_UT(0x01, compile) +HANDLE_DW_UT(0x02, type) +HANDLE_DW_UT(0x03, partial) +HANDLE_DW_UT(0x04, skeleton) +HANDLE_DW_UT(0x05, split_compile) +HANDLE_DW_UT(0x06, split_type) + +#undef HANDLE_DW_TAG +#undef HANDLE_DW_AT +#undef HANDLE_DW_FORM +#undef HANDLE_DW_OP +#undef HANDLE_DW_LANG +#undef HANDLE_DW_ATE +#undef HANDLE_DW_VIRTUALITY +#undef HANDLE_DW_DEFAULTED +#undef HANDLE_DW_CC +#undef HANDLE_DW_LNS +#undef HANDLE_DW_LNE +#undef HANDLE_DW_LNCT +#undef HANDLE_DW_MACRO +#undef HANDLE_DW_RLE +#undef HANDLE_DW_CFA +#undef HANDLE_DW_APPLE_PROPERTY +#undef HANDLE_DW_UT diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/Dwarf.h b/interpreter/llvm/src/include/llvm/BinaryFormat/Dwarf.h new file mode 100644 index 0000000000000..80456a0808f2d --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/Dwarf.h @@ -0,0 +1,496 @@ +//===-- llvm/BinaryFormat/Dwarf.h ---Dwarf Constants-------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief This file contains constants used for implementing Dwarf +// debug support. +// +// For details on the Dwarf specfication see the latest DWARF Debugging +// Information Format standard document on http://www.dwarfstd.org. This +// file often includes support for non-released standard features. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_DWARF_H +#define LLVM_BINARYFORMAT_DWARF_H + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class StringRef; + +namespace dwarf { + +//===----------------------------------------------------------------------===// +// DWARF constants as gleaned from the DWARF Debugging Information Format V.5 +// reference manual http://www.dwarfstd.org/. +// + +// Do not mix the following two enumerations sets. DW_TAG_invalid changes the +// enumeration base type. + +enum LLVMConstants : uint32_t { + // LLVM mock tags (see also llvm/BinaryFormat/Dwarf.def). + DW_TAG_invalid = ~0U, // Tag for invalid results. + DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results. + DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results. + + // Other constants. + DWARF_VERSION = 4, // Default dwarf version we output. + DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes. + DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames. + DW_ARANGES_VERSION = 2, // Section version number for .debug_aranges. + // Identifiers we use to distinguish vendor extensions. + DWARF_VENDOR_DWARF = 0, // Defined in v2 or later of the DWARF standard. + DWARF_VENDOR_APPLE = 1, + DWARF_VENDOR_BORLAND = 2, + DWARF_VENDOR_GNU = 3, + DWARF_VENDOR_GOOGLE = 4, + DWARF_VENDOR_LLVM = 5, + DWARF_VENDOR_MIPS = 6 +}; + +// Special ID values that distinguish a CIE from a FDE in DWARF CFI. +// Not inside an enum because a 64-bit value is needed. +const uint32_t DW_CIE_ID = UINT32_MAX; +const uint64_t DW64_CIE_ID = UINT64_MAX; + +// Identifier of an invalid DIE offset in the .debug_info section. +const uint32_t DW_INVALID_OFFSET = UINT32_MAX; + +enum Tag : uint16_t { +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_TAG_lo_user = 0x4080, + DW_TAG_hi_user = 0xffff, + DW_TAG_user_base = 0x1000 // Recommended base for user tags. +}; + +inline bool isType(Tag T) { + switch (T) { + case DW_TAG_array_type: + case DW_TAG_class_type: + case DW_TAG_interface_type: + case DW_TAG_enumeration_type: + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + case DW_TAG_string_type: + case DW_TAG_structure_type: + case DW_TAG_subroutine_type: + case DW_TAG_union_type: + case DW_TAG_ptr_to_member_type: + case DW_TAG_set_type: + case DW_TAG_subrange_type: + case DW_TAG_base_type: + case DW_TAG_const_type: + case DW_TAG_file_type: + case DW_TAG_packed_type: + case DW_TAG_volatile_type: + case DW_TAG_typedef: + return true; + default: + return false; + } +} + +/// Attributes. +enum Attribute : uint16_t { +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) DW_AT_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_AT_lo_user = 0x2000, + DW_AT_hi_user = 0x3fff, +}; + +enum Form : uint16_t { +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) DW_FORM_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF. +}; + +enum LocationAtom { +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) DW_OP_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_OP_lo_user = 0xe0, + DW_OP_hi_user = 0xff, + DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata. +}; + +enum TypeKind { +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_ATE_lo_user = 0x80, + DW_ATE_hi_user = 0xff +}; + +enum DecimalSignEncoding { + // Decimal sign attribute values + DW_DS_unsigned = 0x01, + DW_DS_leading_overpunch = 0x02, + DW_DS_trailing_overpunch = 0x03, + DW_DS_leading_separate = 0x04, + DW_DS_trailing_separate = 0x05 +}; + +enum EndianityEncoding { + // Endianity attribute values + DW_END_default = 0x00, + DW_END_big = 0x01, + DW_END_little = 0x02, + DW_END_lo_user = 0x40, + DW_END_hi_user = 0xff +}; + +enum AccessAttribute { + // Accessibility codes + DW_ACCESS_public = 0x01, + DW_ACCESS_protected = 0x02, + DW_ACCESS_private = 0x03 +}; + +enum VisibilityAttribute { + // Visibility codes + DW_VIS_local = 0x01, + DW_VIS_exported = 0x02, + DW_VIS_qualified = 0x03 +}; + +enum VirtualityAttribute { +#define HANDLE_DW_VIRTUALITY(ID, NAME) DW_VIRTUALITY_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_VIRTUALITY_max = 0x02 +}; + +enum DefaultedMemberAttribute { +#define HANDLE_DW_DEFAULTED(ID, NAME) DW_DEFAULTED_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_DEFAULTED_max = 0x02 +}; + +enum SourceLanguage { +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_LANG_lo_user = 0x8000, + DW_LANG_hi_user = 0xffff +}; + +enum CaseSensitivity { + // Identifier case codes + DW_ID_case_sensitive = 0x00, + DW_ID_up_case = 0x01, + DW_ID_down_case = 0x02, + DW_ID_case_insensitive = 0x03 +}; + +enum CallingConvention { +// Calling convention codes +#define HANDLE_DW_CC(ID, NAME) DW_CC_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_CC_lo_user = 0x40, + DW_CC_hi_user = 0xff +}; + +enum InlineAttribute { + // Inline codes + DW_INL_not_inlined = 0x00, + DW_INL_inlined = 0x01, + DW_INL_declared_not_inlined = 0x02, + DW_INL_declared_inlined = 0x03 +}; + +enum ArrayDimensionOrdering { + // Array ordering + DW_ORD_row_major = 0x00, + DW_ORD_col_major = 0x01 +}; + +enum DiscriminantList { + // Discriminant descriptor values + DW_DSC_label = 0x00, + DW_DSC_range = 0x01 +}; + +/// Line Number Standard Opcode Encodings. +enum LineNumberOps : uint8_t { +#define HANDLE_DW_LNS(ID, NAME) DW_LNS_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" +}; + +/// Line Number Extended Opcode Encodings. +enum LineNumberExtendedOps { +#define HANDLE_DW_LNE(ID, NAME) DW_LNE_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_LNE_lo_user = 0x80, + DW_LNE_hi_user = 0xff +}; + +enum LineNumberEntryFormat { +#define HANDLE_DW_LNCT(ID, NAME) DW_LNCT_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_LNCT_lo_user = 0x2000, + DW_LNCT_hi_user = 0x3fff, +}; + +enum MacinfoRecordType { + // Macinfo Type Encodings + DW_MACINFO_define = 0x01, + DW_MACINFO_undef = 0x02, + DW_MACINFO_start_file = 0x03, + DW_MACINFO_end_file = 0x04, + DW_MACINFO_vendor_ext = 0xff +}; + +/// DWARF v5 macro information entry type encodings. +enum MacroEntryType { +#define HANDLE_DW_MACRO(ID, NAME) DW_MACRO_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_MACRO_lo_user = 0xe0, + DW_MACRO_hi_user = 0xff +}; + +/// DWARF v5 range list entry encoding values. +enum RangeListEntries { +#define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" +}; + +/// Call frame instruction encodings. +enum CallFrameInfo { +#define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_CFA_extended = 0x00, + + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f +}; + +enum Constants { + // Children flag + DW_CHILDREN_no = 0x00, + DW_CHILDREN_yes = 0x01, + + DW_EH_PE_absptr = 0x00, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_signed = 0x08, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + DW_EH_PE_indirect = 0x80 +}; + +/// Constants for location lists in DWARF v5. +enum LocationListEntry : unsigned char { + DW_LLE_end_of_list = 0x00, + DW_LLE_base_addressx = 0x01, + DW_LLE_startx_endx = 0x02, + DW_LLE_startx_length = 0x03, + DW_LLE_offset_pair = 0x04, + DW_LLE_default_location = 0x05, + DW_LLE_base_address = 0x06, + DW_LLE_start_end = 0x07, + DW_LLE_start_length = 0x08 +}; + +/// Constants for the DW_APPLE_PROPERTY_attributes attribute. +/// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! +enum ApplePropertyAttributes { +#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) DW_APPLE_PROPERTY_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" +}; + +/// Constants for unit types in DWARF v5. +enum UnitType : unsigned char { +#define HANDLE_DW_UT(ID, NAME) DW_UT_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_UT_lo_user = 0x80, + DW_UT_hi_user = 0xff +}; + +// Constants for the DWARF v5 Accelerator Table Proposal +enum AcceleratorTable { + // Data layout descriptors. + DW_ATOM_null = 0u, // Marker as the end of a list of atoms. + DW_ATOM_die_offset = 1u, // DIE offset in the debug_info section. + DW_ATOM_cu_offset = 2u, // Offset of the compile unit header that contains the + // item in question. + DW_ATOM_die_tag = 3u, // A tag entry. + DW_ATOM_type_flags = 4u, // Set of flags for a type. + + // DW_ATOM_type_flags values. + + // Always set for C++, only set for ObjC if this is the @implementation for a + // class. + DW_FLAG_type_implementation = 2u, + + // Hash functions. + + // Daniel J. Bernstein hash. + DW_hash_function_djb = 0u +}; + +// Constants for the GNU pubnames/pubtypes extensions supporting gdb index. +enum GDBIndexEntryKind { + GIEK_NONE, + GIEK_TYPE, + GIEK_VARIABLE, + GIEK_FUNCTION, + GIEK_OTHER, + GIEK_UNUSED5, + GIEK_UNUSED6, + GIEK_UNUSED7 +}; + +enum GDBIndexEntryLinkage { GIEL_EXTERNAL, GIEL_STATIC }; + +/// \defgroup DwarfConstantsDumping Dwarf constants dumping functions +/// +/// All these functions map their argument's value back to the +/// corresponding enumerator name or return nullptr if the value isn't +/// known. +/// +/// @{ +StringRef TagString(unsigned Tag); +StringRef ChildrenString(unsigned Children); +StringRef AttributeString(unsigned Attribute); +StringRef FormEncodingString(unsigned Encoding); +StringRef OperationEncodingString(unsigned Encoding); +StringRef AttributeEncodingString(unsigned Encoding); +StringRef DecimalSignString(unsigned Sign); +StringRef EndianityString(unsigned Endian); +StringRef AccessibilityString(unsigned Access); +StringRef VisibilityString(unsigned Visibility); +StringRef VirtualityString(unsigned Virtuality); +StringRef LanguageString(unsigned Language); +StringRef CaseString(unsigned Case); +StringRef ConventionString(unsigned Convention); +StringRef InlineCodeString(unsigned Code); +StringRef ArrayOrderString(unsigned Order); +StringRef DiscriminantString(unsigned Discriminant); +StringRef LNStandardString(unsigned Standard); +StringRef LNExtendedString(unsigned Encoding); +StringRef MacinfoString(unsigned Encoding); +StringRef CallFrameString(unsigned Encoding); +StringRef ApplePropertyString(unsigned); +StringRef UnitTypeString(unsigned); +StringRef AtomTypeString(unsigned Atom); +StringRef GDBIndexEntryKindString(GDBIndexEntryKind Kind); +StringRef GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage); +/// @} + +/// \defgroup DwarfConstantsParsing Dwarf constants parsing functions +/// +/// These functions map their strings back to the corresponding enumeration +/// value or return 0 if there is none, except for these exceptions: +/// +/// \li \a getTag() returns \a DW_TAG_invalid on invalid input. +/// \li \a getVirtuality() returns \a DW_VIRTUALITY_invalid on invalid input. +/// \li \a getMacinfo() returns \a DW_MACINFO_invalid on invalid input. +/// +/// @{ +unsigned getTag(StringRef TagString); +unsigned getOperationEncoding(StringRef OperationEncodingString); +unsigned getVirtuality(StringRef VirtualityString); +unsigned getLanguage(StringRef LanguageString); +unsigned getCallingConvention(StringRef LanguageString); +unsigned getAttributeEncoding(StringRef EncodingString); +unsigned getMacinfo(StringRef MacinfoString); +/// @} + +/// \defgroup DwarfConstantsVersioning Dwarf version for constants +/// +/// For constants defined by DWARF, returns the DWARF version when the constant +/// was first defined. For vendor extensions, if there is a version-related +/// policy for when to emit it, returns a version number for that policy. +/// Otherwise returns 0. +/// +/// @{ +unsigned TagVersion(Tag T); +unsigned AttributeVersion(Attribute A); +unsigned FormVersion(Form F); +unsigned OperationVersion(LocationAtom O); +unsigned AttributeEncodingVersion(TypeKind E); +unsigned LanguageVersion(SourceLanguage L); +/// @} + +/// \defgroup DwarfConstantsVendor Dwarf "vendor" for constants +/// +/// These functions return an identifier describing "who" defined the constant, +/// either the DWARF standard itself or the vendor who defined the extension. +/// +/// @{ +unsigned TagVendor(Tag T); +unsigned AttributeVendor(Attribute A); +unsigned FormVendor(Form F); +unsigned OperationVendor(LocationAtom O); +unsigned AttributeEncodingVendor(TypeKind E); +unsigned LanguageVendor(SourceLanguage L); +/// @} + +/// Tells whether the specified form is defined in the specified version, +/// or is an extension if extensions are allowed. +bool isValidFormForVersion(Form F, unsigned Version, bool ExtensionsOk = true); + +/// \brief Returns the symbolic string representing Val when used as a value +/// for attribute Attr. +StringRef AttributeValueString(uint16_t Attr, unsigned Val); + +/// \brief Decsribes an entry of the various gnu_pub* debug sections. +/// +/// The gnu_pub* kind looks like: +/// +/// 0-3 reserved +/// 4-6 symbol kind +/// 7 0 == global, 1 == static +/// +/// A gdb_index descriptor includes the above kind, shifted 24 bits up with the +/// offset of the cu within the debug_info section stored in those 24 bits. +struct PubIndexEntryDescriptor { + GDBIndexEntryKind Kind; + GDBIndexEntryLinkage Linkage; + PubIndexEntryDescriptor(GDBIndexEntryKind Kind, GDBIndexEntryLinkage Linkage) + : Kind(Kind), Linkage(Linkage) {} + /* implicit */ PubIndexEntryDescriptor(GDBIndexEntryKind Kind) + : Kind(Kind), Linkage(GIEL_EXTERNAL) {} + explicit PubIndexEntryDescriptor(uint8_t Value) + : Kind( + static_cast((Value & KIND_MASK) >> KIND_OFFSET)), + Linkage(static_cast((Value & LINKAGE_MASK) >> + LINKAGE_OFFSET)) {} + uint8_t toBits() const { + return Kind << KIND_OFFSET | Linkage << LINKAGE_OFFSET; + } + +private: + enum { + KIND_OFFSET = 4, + KIND_MASK = 7 << KIND_OFFSET, + LINKAGE_OFFSET = 7, + LINKAGE_MASK = 1 << LINKAGE_OFFSET + }; +}; + +/// Constants that define the DWARF format as 32 or 64 bit. +enum DwarfFormat : uint8_t { DWARF32, DWARF64 }; + +} // End of namespace dwarf + +} // End of namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELF.h b/interpreter/llvm/src/include/llvm/BinaryFormat/ELF.h new file mode 100644 index 0000000000000..a4450ee13b409 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELF.h @@ -0,0 +1,1364 @@ +//===- llvm/BinaryFormat/ELF.h - ELF constants and structures ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header contains common, non-processor-specific data structures and +// constants for the ELF file format. +// +// The details of the ELF32 bits in this file are largely based on the Tool +// Interface Standard (TIS) Executable and Linking Format (ELF) Specification +// Version 1.2, May 1995. The ELF64 stuff is based on ELF-64 Object File Format +// Version 1.5, Draft 2, May 1998 as well as OpenBSD header files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_ELF_H +#define LLVM_BINARYFORMAT_ELF_H + +#include +#include + +namespace llvm { +namespace ELF { + +using Elf32_Addr = uint32_t; // Program address +using Elf32_Off = uint32_t; // File offset +using Elf32_Half = uint16_t; +using Elf32_Word = uint32_t; +using Elf32_Sword = int32_t; + +using Elf64_Addr = uint64_t; +using Elf64_Off = uint64_t; +using Elf64_Half = uint16_t; +using Elf64_Word = uint32_t; +using Elf64_Sword = int32_t; +using Elf64_Xword = uint64_t; +using Elf64_Sxword = int64_t; + +// Object file magic string. +static const char ElfMagic[] = {0x7f, 'E', 'L', 'F', '\0'}; + +// e_ident size and indices. +enum { + EI_MAG0 = 0, // File identification index. + EI_MAG1 = 1, // File identification index. + EI_MAG2 = 2, // File identification index. + EI_MAG3 = 3, // File identification index. + EI_CLASS = 4, // File class. + EI_DATA = 5, // Data encoding. + EI_VERSION = 6, // File version. + EI_OSABI = 7, // OS/ABI identification. + EI_ABIVERSION = 8, // ABI version. + EI_PAD = 9, // Start of padding bytes. + EI_NIDENT = 16 // Number of bytes in e_ident. +}; + +struct Elf32_Ehdr { + unsigned char e_ident[EI_NIDENT]; // ELF Identification bytes + Elf32_Half e_type; // Type of file (see ET_* below) + Elf32_Half e_machine; // Required architecture for this file (see EM_*) + Elf32_Word e_version; // Must be equal to 1 + Elf32_Addr e_entry; // Address to jump to in order to start program + Elf32_Off e_phoff; // Program header table's file offset, in bytes + Elf32_Off e_shoff; // Section header table's file offset, in bytes + Elf32_Word e_flags; // Processor-specific flags + Elf32_Half e_ehsize; // Size of ELF header, in bytes + Elf32_Half e_phentsize; // Size of an entry in the program header table + Elf32_Half e_phnum; // Number of entries in the program header table + Elf32_Half e_shentsize; // Size of an entry in the section header table + Elf32_Half e_shnum; // Number of entries in the section header table + Elf32_Half e_shstrndx; // Sect hdr table index of sect name string table + + bool checkMagic() const { + return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0; + } + + unsigned char getFileClass() const { return e_ident[EI_CLASS]; } + unsigned char getDataEncoding() const { return e_ident[EI_DATA]; } +}; + +// 64-bit ELF header. Fields are the same as for ELF32, but with different +// types (see above). +struct Elf64_Ehdr { + unsigned char e_ident[EI_NIDENT]; + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; + + bool checkMagic() const { + return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0; + } + + unsigned char getFileClass() const { return e_ident[EI_CLASS]; } + unsigned char getDataEncoding() const { return e_ident[EI_DATA]; } +}; + +// File types +enum { + ET_NONE = 0, // No file type + ET_REL = 1, // Relocatable file + ET_EXEC = 2, // Executable file + ET_DYN = 3, // Shared object file + ET_CORE = 4, // Core file + ET_LOPROC = 0xff00, // Beginning of processor-specific codes + ET_HIPROC = 0xffff // Processor-specific +}; + +// Versioning +enum { EV_NONE = 0, EV_CURRENT = 1 }; + +// Machine architectures +// See current registered ELF machine architectures at: +// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html +enum { + EM_NONE = 0, // No machine + EM_M32 = 1, // AT&T WE 32100 + EM_SPARC = 2, // SPARC + EM_386 = 3, // Intel 386 + EM_68K = 4, // Motorola 68000 + EM_88K = 5, // Motorola 88000 + EM_IAMCU = 6, // Intel MCU + EM_860 = 7, // Intel 80860 + EM_MIPS = 8, // MIPS R3000 + EM_S370 = 9, // IBM System/370 + EM_MIPS_RS3_LE = 10, // MIPS RS3000 Little-endian + EM_PARISC = 15, // Hewlett-Packard PA-RISC + EM_VPP500 = 17, // Fujitsu VPP500 + EM_SPARC32PLUS = 18, // Enhanced instruction set SPARC + EM_960 = 19, // Intel 80960 + EM_PPC = 20, // PowerPC + EM_PPC64 = 21, // PowerPC64 + EM_S390 = 22, // IBM System/390 + EM_SPU = 23, // IBM SPU/SPC + EM_V800 = 36, // NEC V800 + EM_FR20 = 37, // Fujitsu FR20 + EM_RH32 = 38, // TRW RH-32 + EM_RCE = 39, // Motorola RCE + EM_ARM = 40, // ARM + EM_ALPHA = 41, // DEC Alpha + EM_SH = 42, // Hitachi SH + EM_SPARCV9 = 43, // SPARC V9 + EM_TRICORE = 44, // Siemens TriCore + EM_ARC = 45, // Argonaut RISC Core + EM_H8_300 = 46, // Hitachi H8/300 + EM_H8_300H = 47, // Hitachi H8/300H + EM_H8S = 48, // Hitachi H8S + EM_H8_500 = 49, // Hitachi H8/500 + EM_IA_64 = 50, // Intel IA-64 processor architecture + EM_MIPS_X = 51, // Stanford MIPS-X + EM_COLDFIRE = 52, // Motorola ColdFire + EM_68HC12 = 53, // Motorola M68HC12 + EM_MMA = 54, // Fujitsu MMA Multimedia Accelerator + EM_PCP = 55, // Siemens PCP + EM_NCPU = 56, // Sony nCPU embedded RISC processor + EM_NDR1 = 57, // Denso NDR1 microprocessor + EM_STARCORE = 58, // Motorola Star*Core processor + EM_ME16 = 59, // Toyota ME16 processor + EM_ST100 = 60, // STMicroelectronics ST100 processor + EM_TINYJ = 61, // Advanced Logic Corp. TinyJ embedded processor family + EM_X86_64 = 62, // AMD x86-64 architecture + EM_PDSP = 63, // Sony DSP Processor + EM_PDP10 = 64, // Digital Equipment Corp. PDP-10 + EM_PDP11 = 65, // Digital Equipment Corp. PDP-11 + EM_FX66 = 66, // Siemens FX66 microcontroller + EM_ST9PLUS = 67, // STMicroelectronics ST9+ 8/16 bit microcontroller + EM_ST7 = 68, // STMicroelectronics ST7 8-bit microcontroller + EM_68HC16 = 69, // Motorola MC68HC16 Microcontroller + EM_68HC11 = 70, // Motorola MC68HC11 Microcontroller + EM_68HC08 = 71, // Motorola MC68HC08 Microcontroller + EM_68HC05 = 72, // Motorola MC68HC05 Microcontroller + EM_SVX = 73, // Silicon Graphics SVx + EM_ST19 = 74, // STMicroelectronics ST19 8-bit microcontroller + EM_VAX = 75, // Digital VAX + EM_CRIS = 76, // Axis Communications 32-bit embedded processor + EM_JAVELIN = 77, // Infineon Technologies 32-bit embedded processor + EM_FIREPATH = 78, // Element 14 64-bit DSP Processor + EM_ZSP = 79, // LSI Logic 16-bit DSP Processor + EM_MMIX = 80, // Donald Knuth's educational 64-bit processor + EM_HUANY = 81, // Harvard University machine-independent object files + EM_PRISM = 82, // SiTera Prism + EM_AVR = 83, // Atmel AVR 8-bit microcontroller + EM_FR30 = 84, // Fujitsu FR30 + EM_D10V = 85, // Mitsubishi D10V + EM_D30V = 86, // Mitsubishi D30V + EM_V850 = 87, // NEC v850 + EM_M32R = 88, // Mitsubishi M32R + EM_MN10300 = 89, // Matsushita MN10300 + EM_MN10200 = 90, // Matsushita MN10200 + EM_PJ = 91, // picoJava + EM_OPENRISC = 92, // OpenRISC 32-bit embedded processor + EM_ARC_COMPACT = 93, // ARC International ARCompact processor (old + // spelling/synonym: EM_ARC_A5) + EM_XTENSA = 94, // Tensilica Xtensa Architecture + EM_VIDEOCORE = 95, // Alphamosaic VideoCore processor + EM_TMM_GPP = 96, // Thompson Multimedia General Purpose Processor + EM_NS32K = 97, // National Semiconductor 32000 series + EM_TPC = 98, // Tenor Network TPC processor + EM_SNP1K = 99, // Trebia SNP 1000 processor + EM_ST200 = 100, // STMicroelectronics (www.st.com) ST200 + EM_IP2K = 101, // Ubicom IP2xxx microcontroller family + EM_MAX = 102, // MAX Processor + EM_CR = 103, // National Semiconductor CompactRISC microprocessor + EM_F2MC16 = 104, // Fujitsu F2MC16 + EM_MSP430 = 105, // Texas Instruments embedded microcontroller msp430 + EM_BLACKFIN = 106, // Analog Devices Blackfin (DSP) processor + EM_SE_C33 = 107, // S1C33 Family of Seiko Epson processors + EM_SEP = 108, // Sharp embedded microprocessor + EM_ARCA = 109, // Arca RISC Microprocessor + EM_UNICORE = 110, // Microprocessor series from PKU-Unity Ltd. and MPRC + // of Peking University + EM_EXCESS = 111, // eXcess: 16/32/64-bit configurable embedded CPU + EM_DXP = 112, // Icera Semiconductor Inc. Deep Execution Processor + EM_ALTERA_NIOS2 = 113, // Altera Nios II soft-core processor + EM_CRX = 114, // National Semiconductor CompactRISC CRX + EM_XGATE = 115, // Motorola XGATE embedded processor + EM_C166 = 116, // Infineon C16x/XC16x processor + EM_M16C = 117, // Renesas M16C series microprocessors + EM_DSPIC30F = 118, // Microchip Technology dsPIC30F Digital Signal + // Controller + EM_CE = 119, // Freescale Communication Engine RISC core + EM_M32C = 120, // Renesas M32C series microprocessors + EM_TSK3000 = 131, // Altium TSK3000 core + EM_RS08 = 132, // Freescale RS08 embedded processor + EM_SHARC = 133, // Analog Devices SHARC family of 32-bit DSP + // processors + EM_ECOG2 = 134, // Cyan Technology eCOG2 microprocessor + EM_SCORE7 = 135, // Sunplus S+core7 RISC processor + EM_DSP24 = 136, // New Japan Radio (NJR) 24-bit DSP Processor + EM_VIDEOCORE3 = 137, // Broadcom VideoCore III processor + EM_LATTICEMICO32 = 138, // RISC processor for Lattice FPGA architecture + EM_SE_C17 = 139, // Seiko Epson C17 family + EM_TI_C6000 = 140, // The Texas Instruments TMS320C6000 DSP family + EM_TI_C2000 = 141, // The Texas Instruments TMS320C2000 DSP family + EM_TI_C5500 = 142, // The Texas Instruments TMS320C55x DSP family + EM_MMDSP_PLUS = 160, // STMicroelectronics 64bit VLIW Data Signal Processor + EM_CYPRESS_M8C = 161, // Cypress M8C microprocessor + EM_R32C = 162, // Renesas R32C series microprocessors + EM_TRIMEDIA = 163, // NXP Semiconductors TriMedia architecture family + EM_HEXAGON = 164, // Qualcomm Hexagon processor + EM_8051 = 165, // Intel 8051 and variants + EM_STXP7X = 166, // STMicroelectronics STxP7x family of configurable + // and extensible RISC processors + EM_NDS32 = 167, // Andes Technology compact code size embedded RISC + // processor family + EM_ECOG1 = 168, // Cyan Technology eCOG1X family + EM_ECOG1X = 168, // Cyan Technology eCOG1X family + EM_MAXQ30 = 169, // Dallas Semiconductor MAXQ30 Core Micro-controllers + EM_XIMO16 = 170, // New Japan Radio (NJR) 16-bit DSP Processor + EM_MANIK = 171, // M2000 Reconfigurable RISC Microprocessor + EM_CRAYNV2 = 172, // Cray Inc. NV2 vector architecture + EM_RX = 173, // Renesas RX family + EM_METAG = 174, // Imagination Technologies META processor + // architecture + EM_MCST_ELBRUS = 175, // MCST Elbrus general purpose hardware architecture + EM_ECOG16 = 176, // Cyan Technology eCOG16 family + EM_CR16 = 177, // National Semiconductor CompactRISC CR16 16-bit + // microprocessor + EM_ETPU = 178, // Freescale Extended Time Processing Unit + EM_SLE9X = 179, // Infineon Technologies SLE9X core + EM_L10M = 180, // Intel L10M + EM_K10M = 181, // Intel K10M + EM_AARCH64 = 183, // ARM AArch64 + EM_AVR32 = 185, // Atmel Corporation 32-bit microprocessor family + EM_STM8 = 186, // STMicroeletronics STM8 8-bit microcontroller + EM_TILE64 = 187, // Tilera TILE64 multicore architecture family + EM_TILEPRO = 188, // Tilera TILEPro multicore architecture family + EM_CUDA = 190, // NVIDIA CUDA architecture + EM_TILEGX = 191, // Tilera TILE-Gx multicore architecture family + EM_CLOUDSHIELD = 192, // CloudShield architecture family + EM_COREA_1ST = 193, // KIPO-KAIST Core-A 1st generation processor family + EM_COREA_2ND = 194, // KIPO-KAIST Core-A 2nd generation processor family + EM_ARC_COMPACT2 = 195, // Synopsys ARCompact V2 + EM_OPEN8 = 196, // Open8 8-bit RISC soft processor core + EM_RL78 = 197, // Renesas RL78 family + EM_VIDEOCORE5 = 198, // Broadcom VideoCore V processor + EM_78KOR = 199, // Renesas 78KOR family + EM_56800EX = 200, // Freescale 56800EX Digital Signal Controller (DSC) + EM_BA1 = 201, // Beyond BA1 CPU architecture + EM_BA2 = 202, // Beyond BA2 CPU architecture + EM_XCORE = 203, // XMOS xCORE processor family + EM_MCHP_PIC = 204, // Microchip 8-bit PIC(r) family + EM_INTEL205 = 205, // Reserved by Intel + EM_INTEL206 = 206, // Reserved by Intel + EM_INTEL207 = 207, // Reserved by Intel + EM_INTEL208 = 208, // Reserved by Intel + EM_INTEL209 = 209, // Reserved by Intel + EM_KM32 = 210, // KM211 KM32 32-bit processor + EM_KMX32 = 211, // KM211 KMX32 32-bit processor + EM_KMX16 = 212, // KM211 KMX16 16-bit processor + EM_KMX8 = 213, // KM211 KMX8 8-bit processor + EM_KVARC = 214, // KM211 KVARC processor + EM_CDP = 215, // Paneve CDP architecture family + EM_COGE = 216, // Cognitive Smart Memory Processor + EM_COOL = 217, // iCelero CoolEngine + EM_NORC = 218, // Nanoradio Optimized RISC + EM_CSR_KALIMBA = 219, // CSR Kalimba architecture family + EM_AMDGPU = 224, // AMD GPU architecture + EM_RISCV = 243, // RISC-V + EM_LANAI = 244, // Lanai 32-bit processor + EM_BPF = 247, // Linux kernel bpf virtual machine + + // A request has been made to the maintainer of the official registry for + // such numbers for an official value for WebAssembly. As soon as one is + // allocated, this enum will be updated to use it. + EM_WEBASSEMBLY = 0x4157, // WebAssembly architecture +}; + +// Object file classes. +enum { + ELFCLASSNONE = 0, + ELFCLASS32 = 1, // 32-bit object file + ELFCLASS64 = 2 // 64-bit object file +}; + +// Object file byte orderings. +enum { + ELFDATANONE = 0, // Invalid data encoding. + ELFDATA2LSB = 1, // Little-endian object file + ELFDATA2MSB = 2 // Big-endian object file +}; + +// OS ABI identification. +enum { + ELFOSABI_NONE = 0, // UNIX System V ABI + ELFOSABI_HPUX = 1, // HP-UX operating system + ELFOSABI_NETBSD = 2, // NetBSD + ELFOSABI_GNU = 3, // GNU/Linux + ELFOSABI_LINUX = 3, // Historical alias for ELFOSABI_GNU. + ELFOSABI_HURD = 4, // GNU/Hurd + ELFOSABI_SOLARIS = 6, // Solaris + ELFOSABI_AIX = 7, // AIX + ELFOSABI_IRIX = 8, // IRIX + ELFOSABI_FREEBSD = 9, // FreeBSD + ELFOSABI_TRU64 = 10, // TRU64 UNIX + ELFOSABI_MODESTO = 11, // Novell Modesto + ELFOSABI_OPENBSD = 12, // OpenBSD + ELFOSABI_OPENVMS = 13, // OpenVMS + ELFOSABI_NSK = 14, // Hewlett-Packard Non-Stop Kernel + ELFOSABI_AROS = 15, // AROS + ELFOSABI_FENIXOS = 16, // FenixOS + ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI + ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000 + ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime + ELFOSABI_C6000_LINUX = 65, // Linux TMS320C6000 + ELFOSABI_ARM = 97, // ARM + ELFOSABI_STANDALONE = 255 // Standalone (embedded) application +}; + +#define ELF_RELOC(name, value) name = value, + +// X86_64 relocations. +enum { +#include "ELFRelocs/x86_64.def" +}; + +// i386 relocations. +enum { +#include "ELFRelocs/i386.def" +}; + +// ELF Relocation types for PPC32 +enum { +#include "ELFRelocs/PowerPC.def" +}; + +// Specific e_flags for PPC64 +enum { + // e_flags bits specifying ABI: + // 1 for original ABI using function descriptors, + // 2 for revised ABI without function descriptors, + // 0 for unspecified or not using any features affected by the differences. + EF_PPC64_ABI = 3 +}; + +// Special values for the st_other field in the symbol table entry for PPC64. +enum { + STO_PPC64_LOCAL_BIT = 5, + STO_PPC64_LOCAL_MASK = (7 << STO_PPC64_LOCAL_BIT) +}; +static inline int64_t decodePPC64LocalEntryOffset(unsigned Other) { + unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT; + return ((1 << Val) >> 2) << 2; +} +static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) { + unsigned Val = + (Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4) + : (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0))); + return Val << STO_PPC64_LOCAL_BIT; +} + +// ELF Relocation types for PPC64 +enum { +#include "ELFRelocs/PowerPC64.def" +}; + +// ELF Relocation types for AArch64 +enum { +#include "ELFRelocs/AArch64.def" +}; + +// ARM Specific e_flags +enum : unsigned { + EF_ARM_SOFT_FLOAT = 0x00000200U, + EF_ARM_VFP_FLOAT = 0x00000400U, + EF_ARM_EABI_UNKNOWN = 0x00000000U, + EF_ARM_EABI_VER1 = 0x01000000U, + EF_ARM_EABI_VER2 = 0x02000000U, + EF_ARM_EABI_VER3 = 0x03000000U, + EF_ARM_EABI_VER4 = 0x04000000U, + EF_ARM_EABI_VER5 = 0x05000000U, + EF_ARM_EABIMASK = 0xFF000000U +}; + +// ELF Relocation types for ARM +enum { +#include "ELFRelocs/ARM.def" +}; + +// AVR specific e_flags +enum : unsigned { + EF_AVR_ARCH_AVR1 = 1, + EF_AVR_ARCH_AVR2 = 2, + EF_AVR_ARCH_AVR25 = 25, + EF_AVR_ARCH_AVR3 = 3, + EF_AVR_ARCH_AVR31 = 31, + EF_AVR_ARCH_AVR35 = 35, + EF_AVR_ARCH_AVR4 = 4, + EF_AVR_ARCH_AVR5 = 5, + EF_AVR_ARCH_AVR51 = 51, + EF_AVR_ARCH_AVR6 = 6, + EF_AVR_ARCH_AVRTINY = 100, + EF_AVR_ARCH_XMEGA1 = 101, + EF_AVR_ARCH_XMEGA2 = 102, + EF_AVR_ARCH_XMEGA3 = 103, + EF_AVR_ARCH_XMEGA4 = 104, + EF_AVR_ARCH_XMEGA5 = 105, + EF_AVR_ARCH_XMEGA6 = 106, + EF_AVR_ARCH_XMEGA7 = 107 +}; + +// ELF Relocation types for AVR +enum { +#include "ELFRelocs/AVR.def" +}; + +// Mips Specific e_flags +enum : unsigned { + EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions + EF_MIPS_PIC = 0x00000002, // Position independent code + EF_MIPS_CPIC = 0x00000004, // Call object with Position independent code + EF_MIPS_ABI2 = 0x00000020, // File uses N32 ABI + EF_MIPS_32BITMODE = 0x00000100, // Code compiled for a 64-bit machine + // in 32-bit mode + EF_MIPS_FP64 = 0x00000200, // Code compiled for a 32-bit machine + // but uses 64-bit FP registers + EF_MIPS_NAN2008 = 0x00000400, // Uses IEE 754-2008 NaN encoding + + // ABI flags + EF_MIPS_ABI_O32 = 0x00001000, // This file follows the first MIPS 32 bit ABI + EF_MIPS_ABI_O64 = 0x00002000, // O32 ABI extended for 64-bit architecture. + EF_MIPS_ABI_EABI32 = 0x00003000, // EABI in 32 bit mode. + EF_MIPS_ABI_EABI64 = 0x00004000, // EABI in 64 bit mode. + EF_MIPS_ABI = 0x0000f000, // Mask for selecting EF_MIPS_ABI_ variant. + + // MIPS machine variant + EF_MIPS_MACH_NONE = 0x00000000, // A standard MIPS implementation. + EF_MIPS_MACH_3900 = 0x00810000, // Toshiba R3900 + EF_MIPS_MACH_4010 = 0x00820000, // LSI R4010 + EF_MIPS_MACH_4100 = 0x00830000, // NEC VR4100 + EF_MIPS_MACH_4650 = 0x00850000, // MIPS R4650 + EF_MIPS_MACH_4120 = 0x00870000, // NEC VR4120 + EF_MIPS_MACH_4111 = 0x00880000, // NEC VR4111/VR4181 + EF_MIPS_MACH_SB1 = 0x008a0000, // Broadcom SB-1 + EF_MIPS_MACH_OCTEON = 0x008b0000, // Cavium Networks Octeon + EF_MIPS_MACH_XLR = 0x008c0000, // RMI Xlr + EF_MIPS_MACH_OCTEON2 = 0x008d0000, // Cavium Networks Octeon2 + EF_MIPS_MACH_OCTEON3 = 0x008e0000, // Cavium Networks Octeon3 + EF_MIPS_MACH_5400 = 0x00910000, // NEC VR5400 + EF_MIPS_MACH_5900 = 0x00920000, // MIPS R5900 + EF_MIPS_MACH_5500 = 0x00980000, // NEC VR5500 + EF_MIPS_MACH_9000 = 0x00990000, // Unknown + EF_MIPS_MACH_LS2E = 0x00a00000, // ST Microelectronics Loongson 2E + EF_MIPS_MACH_LS2F = 0x00a10000, // ST Microelectronics Loongson 2F + EF_MIPS_MACH_LS3A = 0x00a20000, // Loongson 3A + EF_MIPS_MACH = 0x00ff0000, // EF_MIPS_MACH_xxx selection mask + + // ARCH_ASE + EF_MIPS_MICROMIPS = 0x02000000, // microMIPS + EF_MIPS_ARCH_ASE_M16 = 0x04000000, // Has Mips-16 ISA extensions + EF_MIPS_ARCH_ASE_MDMX = 0x08000000, // Has MDMX multimedia extensions + EF_MIPS_ARCH_ASE = 0x0f000000, // Mask for EF_MIPS_ARCH_ASE_xxx flags + + // ARCH + EF_MIPS_ARCH_1 = 0x00000000, // MIPS1 instruction set + EF_MIPS_ARCH_2 = 0x10000000, // MIPS2 instruction set + EF_MIPS_ARCH_3 = 0x20000000, // MIPS3 instruction set + EF_MIPS_ARCH_4 = 0x30000000, // MIPS4 instruction set + EF_MIPS_ARCH_5 = 0x40000000, // MIPS5 instruction set + EF_MIPS_ARCH_32 = 0x50000000, // MIPS32 instruction set per linux not elf.h + EF_MIPS_ARCH_64 = 0x60000000, // MIPS64 instruction set per linux not elf.h + EF_MIPS_ARCH_32R2 = 0x70000000, // mips32r2, mips32r3, mips32r5 + EF_MIPS_ARCH_64R2 = 0x80000000, // mips64r2, mips64r3, mips64r5 + EF_MIPS_ARCH_32R6 = 0x90000000, // mips32r6 + EF_MIPS_ARCH_64R6 = 0xa0000000, // mips64r6 + EF_MIPS_ARCH = 0xf0000000 // Mask for applying EF_MIPS_ARCH_ variant +}; + +// ELF Relocation types for Mips +enum { +#include "ELFRelocs/Mips.def" +}; + +// Special values for the st_other field in the symbol table entry for MIPS. +enum { + STO_MIPS_OPTIONAL = 0x04, // Symbol whose definition is optional + STO_MIPS_PLT = 0x08, // PLT entry related dynamic table record + STO_MIPS_PIC = 0x20, // PIC func in an object mixes PIC/non-PIC + STO_MIPS_MICROMIPS = 0x80, // MIPS Specific ISA for MicroMips + STO_MIPS_MIPS16 = 0xf0 // MIPS Specific ISA for Mips16 +}; + +// .MIPS.options section descriptor kinds +enum { + ODK_NULL = 0, // Undefined + ODK_REGINFO = 1, // Register usage information + ODK_EXCEPTIONS = 2, // Exception processing options + ODK_PAD = 3, // Section padding options + ODK_HWPATCH = 4, // Hardware patches applied + ODK_FILL = 5, // Linker fill value + ODK_TAGS = 6, // Space for tool identification + ODK_HWAND = 7, // Hardware AND patches applied + ODK_HWOR = 8, // Hardware OR patches applied + ODK_GP_GROUP = 9, // GP group to use for text/data sections + ODK_IDENT = 10, // ID information + ODK_PAGESIZE = 11 // Page size information +}; + +// Hexagon-specific e_flags +enum { + // Object processor version flags, bits[11:0] + EF_HEXAGON_MACH_V2 = 0x00000001, // Hexagon V2 + EF_HEXAGON_MACH_V3 = 0x00000002, // Hexagon V3 + EF_HEXAGON_MACH_V4 = 0x00000003, // Hexagon V4 + EF_HEXAGON_MACH_V5 = 0x00000004, // Hexagon V5 + EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55 + EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60 + EF_HEXAGON_MACH_V62 = 0x00000062, // Hexagon V62 + + // Highest ISA version flags + EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0] + // of e_flags + EF_HEXAGON_ISA_V2 = 0x00000010, // Hexagon V2 ISA + EF_HEXAGON_ISA_V3 = 0x00000020, // Hexagon V3 ISA + EF_HEXAGON_ISA_V4 = 0x00000030, // Hexagon V4 ISA + EF_HEXAGON_ISA_V5 = 0x00000040, // Hexagon V5 ISA + EF_HEXAGON_ISA_V55 = 0x00000050, // Hexagon V55 ISA + EF_HEXAGON_ISA_V60 = 0x00000060, // Hexagon V60 ISA + EF_HEXAGON_ISA_V62 = 0x00000062, // Hexagon V62 ISA +}; + +// Hexagon-specific section indexes for common small data +enum { + SHN_HEXAGON_SCOMMON = 0xff00, // Other access sizes + SHN_HEXAGON_SCOMMON_1 = 0xff01, // Byte-sized access + SHN_HEXAGON_SCOMMON_2 = 0xff02, // Half-word-sized access + SHN_HEXAGON_SCOMMON_4 = 0xff03, // Word-sized access + SHN_HEXAGON_SCOMMON_8 = 0xff04 // Double-word-size access +}; + +// ELF Relocation types for Hexagon +enum { +#include "ELFRelocs/Hexagon.def" +}; + +// ELF Relocation type for Lanai. +enum { +#include "ELFRelocs/Lanai.def" +}; + +// ELF Relocation types for RISC-V +enum { +#include "ELFRelocs/RISCV.def" +}; + +// ELF Relocation types for S390/zSeries +enum { +#include "ELFRelocs/SystemZ.def" +}; + +// ELF Relocation type for Sparc. +enum { +#include "ELFRelocs/Sparc.def" +}; + +// ELF Relocation types for WebAssembly +enum { +#include "ELFRelocs/WebAssembly.def" +}; + +// ELF Relocation types for AMDGPU +enum { +#include "ELFRelocs/AMDGPU.def" +}; + +// ELF Relocation types for BPF +enum { +#include "ELFRelocs/BPF.def" +}; + +#undef ELF_RELOC + +// Section header. +struct Elf32_Shdr { + Elf32_Word sh_name; // Section name (index into string table) + Elf32_Word sh_type; // Section type (SHT_*) + Elf32_Word sh_flags; // Section flags (SHF_*) + Elf32_Addr sh_addr; // Address where section is to be loaded + Elf32_Off sh_offset; // File offset of section data, in bytes + Elf32_Word sh_size; // Size of section, in bytes + Elf32_Word sh_link; // Section type-specific header table index link + Elf32_Word sh_info; // Section type-specific extra information + Elf32_Word sh_addralign; // Section address alignment + Elf32_Word sh_entsize; // Size of records contained within the section +}; + +// Section header for ELF64 - same fields as ELF32, different types. +struct Elf64_Shdr { + Elf64_Word sh_name; + Elf64_Word sh_type; + Elf64_Xword sh_flags; + Elf64_Addr sh_addr; + Elf64_Off sh_offset; + Elf64_Xword sh_size; + Elf64_Word sh_link; + Elf64_Word sh_info; + Elf64_Xword sh_addralign; + Elf64_Xword sh_entsize; +}; + +// Special section indices. +enum { + SHN_UNDEF = 0, // Undefined, missing, irrelevant, or meaningless + SHN_LORESERVE = 0xff00, // Lowest reserved index + SHN_LOPROC = 0xff00, // Lowest processor-specific index + SHN_HIPROC = 0xff1f, // Highest processor-specific index + SHN_LOOS = 0xff20, // Lowest operating system-specific index + SHN_HIOS = 0xff3f, // Highest operating system-specific index + SHN_ABS = 0xfff1, // Symbol has absolute value; does not need relocation + SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables + SHN_XINDEX = 0xffff, // Mark that the index is >= SHN_LORESERVE + SHN_HIRESERVE = 0xffff // Highest reserved index +}; + +// Section types. +enum : unsigned { + SHT_NULL = 0, // No associated section (inactive entry). + SHT_PROGBITS = 1, // Program-defined contents. + SHT_SYMTAB = 2, // Symbol table. + SHT_STRTAB = 3, // String table. + SHT_RELA = 4, // Relocation entries; explicit addends. + SHT_HASH = 5, // Symbol hash table. + SHT_DYNAMIC = 6, // Information for dynamic linking. + SHT_NOTE = 7, // Information about the file. + SHT_NOBITS = 8, // Data occupies no space in the file. + SHT_REL = 9, // Relocation entries; no explicit addends. + SHT_SHLIB = 10, // Reserved. + SHT_DYNSYM = 11, // Symbol table. + SHT_INIT_ARRAY = 14, // Pointers to initialization functions. + SHT_FINI_ARRAY = 15, // Pointers to termination functions. + SHT_PREINIT_ARRAY = 16, // Pointers to pre-init functions. + SHT_GROUP = 17, // Section group. + SHT_SYMTAB_SHNDX = 18, // Indices for SHN_XINDEX entries. + SHT_LOOS = 0x60000000, // Lowest operating system-specific type. + SHT_LLVM_ODRTAB = 0x6fff4c00, // LLVM ODR table. + SHT_GNU_ATTRIBUTES = 0x6ffffff5, // Object attributes. + SHT_GNU_HASH = 0x6ffffff6, // GNU-style hash table. + SHT_GNU_verdef = 0x6ffffffd, // GNU version definitions. + SHT_GNU_verneed = 0x6ffffffe, // GNU version references. + SHT_GNU_versym = 0x6fffffff, // GNU symbol versions table. + SHT_HIOS = 0x6fffffff, // Highest operating system-specific type. + SHT_LOPROC = 0x70000000, // Lowest processor arch-specific type. + // Fixme: All this is duplicated in MCSectionELF. Why?? + // Exception Index table + SHT_ARM_EXIDX = 0x70000001U, + // BPABI DLL dynamic linking pre-emption map + SHT_ARM_PREEMPTMAP = 0x70000002U, + // Object file compatibility attributes + SHT_ARM_ATTRIBUTES = 0x70000003U, + SHT_ARM_DEBUGOVERLAY = 0x70000004U, + SHT_ARM_OVERLAYSECTION = 0x70000005U, + SHT_HEX_ORDERED = 0x70000000, // Link editor is to sort the entries in + // this section based on their sizes + SHT_X86_64_UNWIND = 0x70000001, // Unwind information + + SHT_MIPS_REGINFO = 0x70000006, // Register usage information + SHT_MIPS_OPTIONS = 0x7000000d, // General options + SHT_MIPS_DWARF = 0x7000001e, // DWARF debugging section. + SHT_MIPS_ABIFLAGS = 0x7000002a, // ABI information. + + SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. + SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. + SHT_HIUSER = 0xffffffff // Highest type reserved for applications. +}; + +// Section flags. +enum : unsigned { + // Section data should be writable during execution. + SHF_WRITE = 0x1, + + // Section occupies memory during program execution. + SHF_ALLOC = 0x2, + + // Section contains executable machine instructions. + SHF_EXECINSTR = 0x4, + + // The data in this section may be merged. + SHF_MERGE = 0x10, + + // The data in this section is null-terminated strings. + SHF_STRINGS = 0x20, + + // A field in this section holds a section header table index. + SHF_INFO_LINK = 0x40U, + + // Adds special ordering requirements for link editors. + SHF_LINK_ORDER = 0x80U, + + // This section requires special OS-specific processing to avoid incorrect + // behavior. + SHF_OS_NONCONFORMING = 0x100U, + + // This section is a member of a section group. + SHF_GROUP = 0x200U, + + // This section holds Thread-Local Storage. + SHF_TLS = 0x400U, + + // Identifies a section containing compressed data. + SHF_COMPRESSED = 0x800U, + + // This section is excluded from the final executable or shared library. + SHF_EXCLUDE = 0x80000000U, + + // Start of target-specific flags. + + SHF_MASKOS = 0x0ff00000, + + // Bits indicating processor-specific flags. + SHF_MASKPROC = 0xf0000000, + + /// All sections with the "d" flag are grouped together by the linker to form + /// the data section and the dp register is set to the start of the section by + /// the boot code. + XCORE_SHF_DP_SECTION = 0x10000000, + + /// All sections with the "c" flag are grouped together by the linker to form + /// the constant pool and the cp register is set to the start of the constant + /// pool by the boot code. + XCORE_SHF_CP_SECTION = 0x20000000, + + // If an object file section does not have this flag set, then it may not hold + // more than 2GB and can be freely referred to in objects using smaller code + // models. Otherwise, only objects using larger code models can refer to them. + // For example, a medium code model object can refer to data in a section that + // sets this flag besides being able to refer to data in a section that does + // not set it; likewise, a small code model object can refer only to code in a + // section that does not set this flag. + SHF_X86_64_LARGE = 0x10000000, + + // All sections with the GPREL flag are grouped into a global data area + // for faster accesses + SHF_HEX_GPREL = 0x10000000, + + // Section contains text/data which may be replicated in other sections. + // Linker must retain only one copy. + SHF_MIPS_NODUPES = 0x01000000, + + // Linker must generate implicit hidden weak names. + SHF_MIPS_NAMES = 0x02000000, + + // Section data local to process. + SHF_MIPS_LOCAL = 0x04000000, + + // Do not strip this section. + SHF_MIPS_NOSTRIP = 0x08000000, + + // Section must be part of global data area. + SHF_MIPS_GPREL = 0x10000000, + + // This section should be merged. + SHF_MIPS_MERGE = 0x20000000, + + // Address size to be inferred from section entry size. + SHF_MIPS_ADDR = 0x40000000, + + // Section data is string data by default. + SHF_MIPS_STRING = 0x80000000, + + // Make code section unreadable when in execute-only mode + SHF_ARM_PURECODE = 0x20000000 +}; + +// Section Group Flags +enum : unsigned { + GRP_COMDAT = 0x1, + GRP_MASKOS = 0x0ff00000, + GRP_MASKPROC = 0xf0000000 +}; + +// Symbol table entries for ELF32. +struct Elf32_Sym { + Elf32_Word st_name; // Symbol name (index into string table) + Elf32_Addr st_value; // Value or address associated with the symbol + Elf32_Word st_size; // Size of the symbol + unsigned char st_info; // Symbol's type and binding attributes + unsigned char st_other; // Must be zero; reserved + Elf32_Half st_shndx; // Which section (header table index) it's defined in + + // These accessors and mutators correspond to the ELF32_ST_BIND, + // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification: + unsigned char getBinding() const { return st_info >> 4; } + unsigned char getType() const { return st_info & 0x0f; } + void setBinding(unsigned char b) { setBindingAndType(b, getType()); } + void setType(unsigned char t) { setBindingAndType(getBinding(), t); } + void setBindingAndType(unsigned char b, unsigned char t) { + st_info = (b << 4) + (t & 0x0f); + } +}; + +// Symbol table entries for ELF64. +struct Elf64_Sym { + Elf64_Word st_name; // Symbol name (index into string table) + unsigned char st_info; // Symbol's type and binding attributes + unsigned char st_other; // Must be zero; reserved + Elf64_Half st_shndx; // Which section (header tbl index) it's defined in + Elf64_Addr st_value; // Value or address associated with the symbol + Elf64_Xword st_size; // Size of the symbol + + // These accessors and mutators are identical to those defined for ELF32 + // symbol table entries. + unsigned char getBinding() const { return st_info >> 4; } + unsigned char getType() const { return st_info & 0x0f; } + void setBinding(unsigned char b) { setBindingAndType(b, getType()); } + void setType(unsigned char t) { setBindingAndType(getBinding(), t); } + void setBindingAndType(unsigned char b, unsigned char t) { + st_info = (b << 4) + (t & 0x0f); + } +}; + +// The size (in bytes) of symbol table entries. +enum { + SYMENTRY_SIZE32 = 16, // 32-bit symbol entry size + SYMENTRY_SIZE64 = 24 // 64-bit symbol entry size. +}; + +// Symbol bindings. +enum { + STB_LOCAL = 0, // Local symbol, not visible outside obj file containing def + STB_GLOBAL = 1, // Global symbol, visible to all object files being combined + STB_WEAK = 2, // Weak symbol, like global but lower-precedence + STB_GNU_UNIQUE = 10, + STB_LOOS = 10, // Lowest operating system-specific binding type + STB_HIOS = 12, // Highest operating system-specific binding type + STB_LOPROC = 13, // Lowest processor-specific binding type + STB_HIPROC = 15 // Highest processor-specific binding type +}; + +// Symbol types. +enum { + STT_NOTYPE = 0, // Symbol's type is not specified + STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.) + STT_FUNC = 2, // Symbol is executable code (function, etc.) + STT_SECTION = 3, // Symbol refers to a section + STT_FILE = 4, // Local, absolute symbol that refers to a file + STT_COMMON = 5, // An uninitialized common block + STT_TLS = 6, // Thread local data object + STT_GNU_IFUNC = 10, // GNU indirect function + STT_LOOS = 10, // Lowest operating system-specific symbol type + STT_HIOS = 12, // Highest operating system-specific symbol type + STT_LOPROC = 13, // Lowest processor-specific symbol type + STT_HIPROC = 15, // Highest processor-specific symbol type + + // AMDGPU symbol types + STT_AMDGPU_HSA_KERNEL = 10 +}; + +enum { + STV_DEFAULT = 0, // Visibility is specified by binding type + STV_INTERNAL = 1, // Defined by processor supplements + STV_HIDDEN = 2, // Not visible to other components + STV_PROTECTED = 3 // Visible in other components but not preemptable +}; + +// Symbol number. +enum { STN_UNDEF = 0 }; + +// Special relocation symbols used in the MIPS64 ELF relocation entries +enum { + RSS_UNDEF = 0, // None + RSS_GP = 1, // Value of gp + RSS_GP0 = 2, // Value of gp used to create object being relocated + RSS_LOC = 3 // Address of location being relocated +}; + +// Relocation entry, without explicit addend. +struct Elf32_Rel { + Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf32_Word r_info; // Symbol table index and type of relocation to apply + + // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE, + // and ELF32_R_INFO macros defined in the ELF specification: + Elf32_Word getSymbol() const { return (r_info >> 8); } + unsigned char getType() const { return (unsigned char)(r_info & 0x0ff); } + void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf32_Word s, unsigned char t) { + r_info = (s << 8) + t; + } +}; + +// Relocation entry with explicit addend. +struct Elf32_Rela { + Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf32_Word r_info; // Symbol table index and type of relocation to apply + Elf32_Sword r_addend; // Compute value for relocatable field by adding this + + // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE, + // and ELF32_R_INFO macros defined in the ELF specification: + Elf32_Word getSymbol() const { return (r_info >> 8); } + unsigned char getType() const { return (unsigned char)(r_info & 0x0ff); } + void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf32_Word s, unsigned char t) { + r_info = (s << 8) + t; + } +}; + +// Relocation entry, without explicit addend. +struct Elf64_Rel { + Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr). + Elf64_Xword r_info; // Symbol table index and type of relocation to apply. + + // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE, + // and ELF64_R_INFO macros defined in the ELF specification: + Elf64_Word getSymbol() const { return (r_info >> 32); } + Elf64_Word getType() const { return (Elf64_Word)(r_info & 0xffffffffL); } + void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); } + void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf64_Word s, Elf64_Word t) { + r_info = ((Elf64_Xword)s << 32) + (t & 0xffffffffL); + } +}; + +// Relocation entry with explicit addend. +struct Elf64_Rela { + Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr). + Elf64_Xword r_info; // Symbol table index and type of relocation to apply. + Elf64_Sxword r_addend; // Compute value for relocatable field by adding this. + + // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE, + // and ELF64_R_INFO macros defined in the ELF specification: + Elf64_Word getSymbol() const { return (r_info >> 32); } + Elf64_Word getType() const { return (Elf64_Word)(r_info & 0xffffffffL); } + void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); } + void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf64_Word s, Elf64_Word t) { + r_info = ((Elf64_Xword)s << 32) + (t & 0xffffffffL); + } +}; + +// Program header for ELF32. +struct Elf32_Phdr { + Elf32_Word p_type; // Type of segment + Elf32_Off p_offset; // File offset where segment is located, in bytes + Elf32_Addr p_vaddr; // Virtual address of beginning of segment + Elf32_Addr p_paddr; // Physical address of beginning of segment (OS-specific) + Elf32_Word p_filesz; // Num. of bytes in file image of segment (may be zero) + Elf32_Word p_memsz; // Num. of bytes in mem image of segment (may be zero) + Elf32_Word p_flags; // Segment flags + Elf32_Word p_align; // Segment alignment constraint +}; + +// Program header for ELF64. +struct Elf64_Phdr { + Elf64_Word p_type; // Type of segment + Elf64_Word p_flags; // Segment flags + Elf64_Off p_offset; // File offset where segment is located, in bytes + Elf64_Addr p_vaddr; // Virtual address of beginning of segment + Elf64_Addr p_paddr; // Physical addr of beginning of segment (OS-specific) + Elf64_Xword p_filesz; // Num. of bytes in file image of segment (may be zero) + Elf64_Xword p_memsz; // Num. of bytes in mem image of segment (may be zero) + Elf64_Xword p_align; // Segment alignment constraint +}; + +// Segment types. +enum { + PT_NULL = 0, // Unused segment. + PT_LOAD = 1, // Loadable segment. + PT_DYNAMIC = 2, // Dynamic linking information. + PT_INTERP = 3, // Interpreter pathname. + PT_NOTE = 4, // Auxiliary information. + PT_SHLIB = 5, // Reserved. + PT_PHDR = 6, // The program header table itself. + PT_TLS = 7, // The thread-local storage template. + PT_LOOS = 0x60000000, // Lowest operating system-specific pt entry type. + PT_HIOS = 0x6fffffff, // Highest operating system-specific pt entry type. + PT_LOPROC = 0x70000000, // Lowest processor-specific program hdr entry type. + PT_HIPROC = 0x7fffffff, // Highest processor-specific program hdr entry type. + + // x86-64 program header types. + // These all contain stack unwind tables. + PT_GNU_EH_FRAME = 0x6474e550, + PT_SUNW_EH_FRAME = 0x6474e550, + PT_SUNW_UNWIND = 0x6464e550, + + PT_GNU_STACK = 0x6474e551, // Indicates stack executability. + PT_GNU_RELRO = 0x6474e552, // Read-only after relocation. + + PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data. + PT_OPENBSD_WXNEEDED = 0x65a3dbe7, // Program does W^X violations. + PT_OPENBSD_BOOTDATA = 0x65a41be6, // Section for boot arguments. + + // ARM program header types. + PT_ARM_ARCHEXT = 0x70000000, // Platform architecture compatibility info + // These all contain stack unwind tables. + PT_ARM_EXIDX = 0x70000001, + PT_ARM_UNWIND = 0x70000001, + + // MIPS program header types. + PT_MIPS_REGINFO = 0x70000000, // Register usage information. + PT_MIPS_RTPROC = 0x70000001, // Runtime procedure table. + PT_MIPS_OPTIONS = 0x70000002, // Options segment. + PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment. + + // WebAssembly program header types. + PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions. +}; + +// Segment flag bits. +enum : unsigned { + PF_X = 1, // Execute + PF_W = 2, // Write + PF_R = 4, // Read + PF_MASKOS = 0x0ff00000, // Bits for operating system-specific semantics. + PF_MASKPROC = 0xf0000000 // Bits for processor-specific semantics. +}; + +// Dynamic table entry for ELF32. +struct Elf32_Dyn { + Elf32_Sword d_tag; // Type of dynamic table entry. + union { + Elf32_Word d_val; // Integer value of entry. + Elf32_Addr d_ptr; // Pointer value of entry. + } d_un; +}; + +// Dynamic table entry for ELF64. +struct Elf64_Dyn { + Elf64_Sxword d_tag; // Type of dynamic table entry. + union { + Elf64_Xword d_val; // Integer value of entry. + Elf64_Addr d_ptr; // Pointer value of entry. + } d_un; +}; + +// Dynamic table entry tags. +enum { + DT_NULL = 0, // Marks end of dynamic array. + DT_NEEDED = 1, // String table offset of needed library. + DT_PLTRELSZ = 2, // Size of relocation entries in PLT. + DT_PLTGOT = 3, // Address associated with linkage table. + DT_HASH = 4, // Address of symbolic hash table. + DT_STRTAB = 5, // Address of dynamic string table. + DT_SYMTAB = 6, // Address of dynamic symbol table. + DT_RELA = 7, // Address of relocation table (Rela entries). + DT_RELASZ = 8, // Size of Rela relocation table. + DT_RELAENT = 9, // Size of a Rela relocation entry. + DT_STRSZ = 10, // Total size of the string table. + DT_SYMENT = 11, // Size of a symbol table entry. + DT_INIT = 12, // Address of initialization function. + DT_FINI = 13, // Address of termination function. + DT_SONAME = 14, // String table offset of a shared objects name. + DT_RPATH = 15, // String table offset of library search path. + DT_SYMBOLIC = 16, // Changes symbol resolution algorithm. + DT_REL = 17, // Address of relocation table (Rel entries). + DT_RELSZ = 18, // Size of Rel relocation table. + DT_RELENT = 19, // Size of a Rel relocation entry. + DT_PLTREL = 20, // Type of relocation entry used for linking. + DT_DEBUG = 21, // Reserved for debugger. + DT_TEXTREL = 22, // Relocations exist for non-writable segments. + DT_JMPREL = 23, // Address of relocations associated with PLT. + DT_BIND_NOW = 24, // Process all relocations before execution. + DT_INIT_ARRAY = 25, // Pointer to array of initialization functions. + DT_FINI_ARRAY = 26, // Pointer to array of termination functions. + DT_INIT_ARRAYSZ = 27, // Size of DT_INIT_ARRAY. + DT_FINI_ARRAYSZ = 28, // Size of DT_FINI_ARRAY. + DT_RUNPATH = 29, // String table offset of lib search path. + DT_FLAGS = 30, // Flags. + DT_ENCODING = 32, // Values from here to DT_LOOS follow the rules + // for the interpretation of the d_un union. + + DT_PREINIT_ARRAY = 32, // Pointer to array of preinit functions. + DT_PREINIT_ARRAYSZ = 33, // Size of the DT_PREINIT_ARRAY array. + + DT_LOOS = 0x60000000, // Start of environment specific tags. + DT_HIOS = 0x6FFFFFFF, // End of environment specific tags. + DT_LOPROC = 0x70000000, // Start of processor specific tags. + DT_HIPROC = 0x7FFFFFFF, // End of processor specific tags. + + DT_GNU_HASH = 0x6FFFFEF5, // Reference to the GNU hash table. + DT_TLSDESC_PLT = + 0x6FFFFEF6, // Location of PLT entry for TLS descriptor resolver calls. + DT_TLSDESC_GOT = 0x6FFFFEF7, // Location of GOT entry used by TLS descriptor + // resolver PLT entry. + DT_RELACOUNT = 0x6FFFFFF9, // ELF32_Rela count. + DT_RELCOUNT = 0x6FFFFFFA, // ELF32_Rel count. + + DT_FLAGS_1 = 0X6FFFFFFB, // Flags_1. + DT_VERSYM = 0x6FFFFFF0, // The address of .gnu.version section. + DT_VERDEF = 0X6FFFFFFC, // The address of the version definition table. + DT_VERDEFNUM = 0X6FFFFFFD, // The number of entries in DT_VERDEF. + DT_VERNEED = 0X6FFFFFFE, // The address of the version Dependency table. + DT_VERNEEDNUM = 0X6FFFFFFF, // The number of entries in DT_VERNEED. + + // Hexagon specific dynamic table entries + DT_HEXAGON_SYMSZ = 0x70000000, + DT_HEXAGON_VER = 0x70000001, + DT_HEXAGON_PLT = 0x70000002, + + // Mips specific dynamic table entry tags. + DT_MIPS_RLD_VERSION = 0x70000001, // 32 bit version number for runtime + // linker interface. + DT_MIPS_TIME_STAMP = 0x70000002, // Time stamp. + DT_MIPS_ICHECKSUM = 0x70000003, // Checksum of external strings + // and common sizes. + DT_MIPS_IVERSION = 0x70000004, // Index of version string + // in string table. + DT_MIPS_FLAGS = 0x70000005, // 32 bits of flags. + DT_MIPS_BASE_ADDRESS = 0x70000006, // Base address of the segment. + DT_MIPS_MSYM = 0x70000007, // Address of .msym section. + DT_MIPS_CONFLICT = 0x70000008, // Address of .conflict section. + DT_MIPS_LIBLIST = 0x70000009, // Address of .liblist section. + DT_MIPS_LOCAL_GOTNO = 0x7000000a, // Number of local global offset + // table entries. + DT_MIPS_CONFLICTNO = 0x7000000b, // Number of entries + // in the .conflict section. + DT_MIPS_LIBLISTNO = 0x70000010, // Number of entries + // in the .liblist section. + DT_MIPS_SYMTABNO = 0x70000011, // Number of entries + // in the .dynsym section. + DT_MIPS_UNREFEXTNO = 0x70000012, // Index of first external dynamic symbol + // not referenced locally. + DT_MIPS_GOTSYM = 0x70000013, // Index of first dynamic symbol + // in global offset table. + DT_MIPS_HIPAGENO = 0x70000014, // Number of page table entries + // in global offset table. + DT_MIPS_RLD_MAP = 0x70000016, // Address of run time loader map, + // used for debugging. + DT_MIPS_DELTA_CLASS = 0x70000017, // Delta C++ class definition. + DT_MIPS_DELTA_CLASS_NO = 0x70000018, // Number of entries + // in DT_MIPS_DELTA_CLASS. + DT_MIPS_DELTA_INSTANCE = 0x70000019, // Delta C++ class instances. + DT_MIPS_DELTA_INSTANCE_NO = 0x7000001A, // Number of entries + // in DT_MIPS_DELTA_INSTANCE. + DT_MIPS_DELTA_RELOC = 0x7000001B, // Delta relocations. + DT_MIPS_DELTA_RELOC_NO = 0x7000001C, // Number of entries + // in DT_MIPS_DELTA_RELOC. + DT_MIPS_DELTA_SYM = 0x7000001D, // Delta symbols that Delta + // relocations refer to. + DT_MIPS_DELTA_SYM_NO = 0x7000001E, // Number of entries + // in DT_MIPS_DELTA_SYM. + DT_MIPS_DELTA_CLASSSYM = 0x70000020, // Delta symbols that hold + // class declarations. + DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021, // Number of entries + // in DT_MIPS_DELTA_CLASSSYM. + DT_MIPS_CXX_FLAGS = 0x70000022, // Flags indicating information + // about C++ flavor. + DT_MIPS_PIXIE_INIT = 0x70000023, // Pixie information. + DT_MIPS_SYMBOL_LIB = 0x70000024, // Address of .MIPS.symlib + DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025, // The GOT index of the first PTE + // for a segment + DT_MIPS_LOCAL_GOTIDX = 0x70000026, // The GOT index of the first PTE + // for a local symbol + DT_MIPS_HIDDEN_GOTIDX = 0x70000027, // The GOT index of the first PTE + // for a hidden symbol + DT_MIPS_PROTECTED_GOTIDX = 0x70000028, // The GOT index of the first PTE + // for a protected symbol + DT_MIPS_OPTIONS = 0x70000029, // Address of `.MIPS.options'. + DT_MIPS_INTERFACE = 0x7000002A, // Address of `.interface'. + DT_MIPS_DYNSTR_ALIGN = 0x7000002B, // Unknown. + DT_MIPS_INTERFACE_SIZE = 0x7000002C, // Size of the .interface section. + DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002D, // Size of rld_text_resolve + // function stored in the GOT. + DT_MIPS_PERF_SUFFIX = 0x7000002E, // Default suffix of DSO to be added + // by rld on dlopen() calls. + DT_MIPS_COMPACT_SIZE = 0x7000002F, // Size of compact relocation + // section (O32). + DT_MIPS_GP_VALUE = 0x70000030, // GP value for auxiliary GOTs. + DT_MIPS_AUX_DYNAMIC = 0x70000031, // Address of auxiliary .dynamic. + DT_MIPS_PLTGOT = 0x70000032, // Address of the base of the PLTGOT. + DT_MIPS_RWPLT = 0x70000034, // Points to the base + // of a writable PLT. + DT_MIPS_RLD_MAP_REL = 0x70000035, // Relative offset of run time loader + // map, used for debugging. + + // Sun machine-independent extensions. + DT_AUXILIARY = 0x7FFFFFFD, // Shared object to load before self + DT_FILTER = 0x7FFFFFFF // Shared object to get values from +}; + +// DT_FLAGS values. +enum { + DF_ORIGIN = 0x01, // The object may reference $ORIGIN. + DF_SYMBOLIC = 0x02, // Search the shared lib before searching the exe. + DF_TEXTREL = 0x04, // Relocations may modify a non-writable segment. + DF_BIND_NOW = 0x08, // Process all relocations on load. + DF_STATIC_TLS = 0x10 // Reject attempts to load dynamically. +}; + +// State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 entry. +enum { + DF_1_NOW = 0x00000001, // Set RTLD_NOW for this object. + DF_1_GLOBAL = 0x00000002, // Set RTLD_GLOBAL for this object. + DF_1_GROUP = 0x00000004, // Set RTLD_GROUP for this object. + DF_1_NODELETE = 0x00000008, // Set RTLD_NODELETE for this object. + DF_1_LOADFLTR = 0x00000010, // Trigger filtee loading at runtime. + DF_1_INITFIRST = 0x00000020, // Set RTLD_INITFIRST for this object. + DF_1_NOOPEN = 0x00000040, // Set RTLD_NOOPEN for this object. + DF_1_ORIGIN = 0x00000080, // $ORIGIN must be handled. + DF_1_DIRECT = 0x00000100, // Direct binding enabled. + DF_1_TRANS = 0x00000200, + DF_1_INTERPOSE = 0x00000400, // Object is used to interpose. + DF_1_NODEFLIB = 0x00000800, // Ignore default lib search path. + DF_1_NODUMP = 0x00001000, // Object can't be dldump'ed. + DF_1_CONFALT = 0x00002000, // Configuration alternative created. + DF_1_ENDFILTEE = 0x00004000, // Filtee terminates filters search. + DF_1_DISPRELDNE = 0x00008000, // Disp reloc applied at build time. + DF_1_DISPRELPND = 0x00010000, // Disp reloc applied at run-time. + DF_1_NODIRECT = 0x00020000, // Object has no-direct binding. + DF_1_IGNMULDEF = 0x00040000, + DF_1_NOKSYMS = 0x00080000, + DF_1_NOHDR = 0x00100000, + DF_1_EDITED = 0x00200000, // Object is modified after built. + DF_1_NORELOC = 0x00400000, + DF_1_SYMINTPOSE = 0x00800000, // Object has individual interposers. + DF_1_GLOBAUDIT = 0x01000000, // Global auditing required. + DF_1_SINGLETON = 0x02000000 // Singleton symbols are used. +}; + +// DT_MIPS_FLAGS values. +enum { + RHF_NONE = 0x00000000, // No flags. + RHF_QUICKSTART = 0x00000001, // Uses shortcut pointers. + RHF_NOTPOT = 0x00000002, // Hash size is not a power of two. + RHS_NO_LIBRARY_REPLACEMENT = 0x00000004, // Ignore LD_LIBRARY_PATH. + RHF_NO_MOVE = 0x00000008, // DSO address may not be relocated. + RHF_SGI_ONLY = 0x00000010, // SGI specific features. + RHF_GUARANTEE_INIT = 0x00000020, // Guarantee that .init will finish + // executing before any non-init + // code in DSO is called. + RHF_DELTA_C_PLUS_PLUS = 0x00000040, // Contains Delta C++ code. + RHF_GUARANTEE_START_INIT = 0x00000080, // Guarantee that .init will start + // executing before any non-init + // code in DSO is called. + RHF_PIXIE = 0x00000100, // Generated by pixie. + RHF_DEFAULT_DELAY_LOAD = 0x00000200, // Delay-load DSO by default. + RHF_REQUICKSTART = 0x00000400, // Object may be requickstarted + RHF_REQUICKSTARTED = 0x00000800, // Object has been requickstarted + RHF_CORD = 0x00001000, // Generated by cord. + RHF_NO_UNRES_UNDEF = 0x00002000, // Object contains no unresolved + // undef symbols. + RHF_RLD_ORDER_SAFE = 0x00004000 // Symbol table is in a safe order. +}; + +// ElfXX_VerDef structure version (GNU versioning) +enum { VER_DEF_NONE = 0, VER_DEF_CURRENT = 1 }; + +// VerDef Flags (ElfXX_VerDef::vd_flags) +enum { VER_FLG_BASE = 0x1, VER_FLG_WEAK = 0x2, VER_FLG_INFO = 0x4 }; + +// Special constants for the version table. (SHT_GNU_versym/.gnu.version) +enum { + VER_NDX_LOCAL = 0, // Unversioned local symbol + VER_NDX_GLOBAL = 1, // Unversioned global symbol + VERSYM_VERSION = 0x7fff, // Version Index mask + VERSYM_HIDDEN = 0x8000 // Hidden bit (non-default version) +}; + +// ElfXX_VerNeed structure version (GNU versioning) +enum { VER_NEED_NONE = 0, VER_NEED_CURRENT = 1 }; + +// SHT_NOTE section types +enum { + NT_FREEBSD_THRMISC = 7, + NT_FREEBSD_PROCSTAT_PROC = 8, + NT_FREEBSD_PROCSTAT_FILES = 9, + NT_FREEBSD_PROCSTAT_VMMAP = 10, + NT_FREEBSD_PROCSTAT_GROUPS = 11, + NT_FREEBSD_PROCSTAT_UMASK = 12, + NT_FREEBSD_PROCSTAT_RLIMIT = 13, + NT_FREEBSD_PROCSTAT_OSREL = 14, + NT_FREEBSD_PROCSTAT_PSSTRINGS = 15, + NT_FREEBSD_PROCSTAT_AUXV = 16, +}; + +enum { + NT_GNU_ABI_TAG = 1, + NT_GNU_HWCAP = 2, + NT_GNU_BUILD_ID = 3, + NT_GNU_GOLD_VERSION = 4, +}; + +enum { + GNU_ABI_TAG_LINUX = 0, + GNU_ABI_TAG_HURD = 1, + GNU_ABI_TAG_SOLARIS = 2, + GNU_ABI_TAG_FREEBSD = 3, + GNU_ABI_TAG_NETBSD = 4, + GNU_ABI_TAG_SYLLABLE = 5, + GNU_ABI_TAG_NACL = 6, +}; + +// Compressed section header for ELF32. +struct Elf32_Chdr { + Elf32_Word ch_type; + Elf32_Word ch_size; + Elf32_Word ch_addralign; +}; + +// Compressed section header for ELF64. +struct Elf64_Chdr { + Elf64_Word ch_type; + Elf64_Word ch_reserved; + Elf64_Xword ch_size; + Elf64_Xword ch_addralign; +}; + +// Legal values for ch_type field of compressed section header. +enum { + ELFCOMPRESS_ZLIB = 1, // ZLIB/DEFLATE algorithm. + ELFCOMPRESS_LOOS = 0x60000000, // Start of OS-specific. + ELFCOMPRESS_HIOS = 0x6fffffff, // End of OS-specific. + ELFCOMPRESS_LOPROC = 0x70000000, // Start of processor-specific. + ELFCOMPRESS_HIPROC = 0x7fffffff // End of processor-specific. +}; + +} // end namespace ELF +} // end namespace llvm + +#endif // LLVM_BINARYFORMAT_ELF_H diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AArch64.def new file mode 100644 index 0000000000000..4afcd7d1f0939 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -0,0 +1,218 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// Based on ABI release 1.1-beta, dated 6 November 2013. NB: The cover page of +// this document, IHI0056C_beta_aaelf64.pdf, on infocenter.arm.com, still +// labels this as release 1.0. +ELF_RELOC(R_AARCH64_NONE, 0) +ELF_RELOC(R_AARCH64_ABS64, 0x101) +ELF_RELOC(R_AARCH64_ABS32, 0x102) +ELF_RELOC(R_AARCH64_ABS16, 0x103) +ELF_RELOC(R_AARCH64_PREL64, 0x104) +ELF_RELOC(R_AARCH64_PREL32, 0x105) +ELF_RELOC(R_AARCH64_PREL16, 0x106) +ELF_RELOC(R_AARCH64_MOVW_UABS_G0, 0x107) +ELF_RELOC(R_AARCH64_MOVW_UABS_G0_NC, 0x108) +ELF_RELOC(R_AARCH64_MOVW_UABS_G1, 0x109) +ELF_RELOC(R_AARCH64_MOVW_UABS_G1_NC, 0x10a) +ELF_RELOC(R_AARCH64_MOVW_UABS_G2, 0x10b) +ELF_RELOC(R_AARCH64_MOVW_UABS_G2_NC, 0x10c) +ELF_RELOC(R_AARCH64_MOVW_UABS_G3, 0x10d) +ELF_RELOC(R_AARCH64_MOVW_SABS_G0, 0x10e) +ELF_RELOC(R_AARCH64_MOVW_SABS_G1, 0x10f) +ELF_RELOC(R_AARCH64_MOVW_SABS_G2, 0x110) +ELF_RELOC(R_AARCH64_LD_PREL_LO19, 0x111) +ELF_RELOC(R_AARCH64_ADR_PREL_LO21, 0x112) +ELF_RELOC(R_AARCH64_ADR_PREL_PG_HI21, 0x113) +ELF_RELOC(R_AARCH64_ADR_PREL_PG_HI21_NC, 0x114) +ELF_RELOC(R_AARCH64_ADD_ABS_LO12_NC, 0x115) +ELF_RELOC(R_AARCH64_LDST8_ABS_LO12_NC, 0x116) +ELF_RELOC(R_AARCH64_TSTBR14, 0x117) +ELF_RELOC(R_AARCH64_CONDBR19, 0x118) +ELF_RELOC(R_AARCH64_JUMP26, 0x11a) +ELF_RELOC(R_AARCH64_CALL26, 0x11b) +ELF_RELOC(R_AARCH64_LDST16_ABS_LO12_NC, 0x11c) +ELF_RELOC(R_AARCH64_LDST32_ABS_LO12_NC, 0x11d) +ELF_RELOC(R_AARCH64_LDST64_ABS_LO12_NC, 0x11e) +ELF_RELOC(R_AARCH64_MOVW_PREL_G0, 0x11f) +ELF_RELOC(R_AARCH64_MOVW_PREL_G0_NC, 0x120) +ELF_RELOC(R_AARCH64_MOVW_PREL_G1, 0x121) +ELF_RELOC(R_AARCH64_MOVW_PREL_G1_NC, 0x122) +ELF_RELOC(R_AARCH64_MOVW_PREL_G2, 0x123) +ELF_RELOC(R_AARCH64_MOVW_PREL_G2_NC, 0x124) +ELF_RELOC(R_AARCH64_MOVW_PREL_G3, 0x125) +ELF_RELOC(R_AARCH64_LDST128_ABS_LO12_NC, 0x12b) +ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G0, 0x12c) +ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G0_NC, 0x12d) +ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G1, 0x12e) +ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G1_NC, 0x12f) +ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G2, 0x130) +ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G2_NC, 0x131) +ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G3, 0x132) +ELF_RELOC(R_AARCH64_GOTREL64, 0x133) +ELF_RELOC(R_AARCH64_GOTREL32, 0x134) +ELF_RELOC(R_AARCH64_GOT_LD_PREL19, 0x135) +ELF_RELOC(R_AARCH64_LD64_GOTOFF_LO15, 0x136) +ELF_RELOC(R_AARCH64_ADR_GOT_PAGE, 0x137) +ELF_RELOC(R_AARCH64_LD64_GOT_LO12_NC, 0x138) +ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15, 0x139) +ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21, 0x200) +ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21, 0x201) +ELF_RELOC(R_AARCH64_TLSGD_ADD_LO12_NC, 0x202) +ELF_RELOC(R_AARCH64_TLSGD_MOVW_G1, 0x203) +ELF_RELOC(R_AARCH64_TLSGD_MOVW_G0_NC, 0x204) +ELF_RELOC(R_AARCH64_TLSLD_ADR_PREL21, 0x205) +ELF_RELOC(R_AARCH64_TLSLD_ADR_PAGE21, 0x206) +ELF_RELOC(R_AARCH64_TLSLD_ADD_LO12_NC, 0x207) +ELF_RELOC(R_AARCH64_TLSLD_MOVW_G1, 0x208) +ELF_RELOC(R_AARCH64_TLSLD_MOVW_G0_NC, 0x209) +ELF_RELOC(R_AARCH64_TLSLD_LD_PREL19, 0x20a) +ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G2, 0x20b) +ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G1, 0x20c) +ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC, 0x20d) +ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G0, 0x20e) +ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC, 0x20f) +ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_HI12, 0x210) +ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_LO12, 0x211) +ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC, 0x212) +ELF_RELOC(R_AARCH64_TLSLD_LDST8_DTPREL_LO12, 0x213) +ELF_RELOC(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC, 0x214) +ELF_RELOC(R_AARCH64_TLSLD_LDST16_DTPREL_LO12, 0x215) +ELF_RELOC(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC, 0x216) +ELF_RELOC(R_AARCH64_TLSLD_LDST32_DTPREL_LO12, 0x217) +ELF_RELOC(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC, 0x218) +ELF_RELOC(R_AARCH64_TLSLD_LDST64_DTPREL_LO12, 0x219) +ELF_RELOC(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC, 0x21a) +ELF_RELOC(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1, 0x21b) +ELF_RELOC(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC, 0x21c) +ELF_RELOC(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, 0x21d) +ELF_RELOC(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, 0x21e) +ELF_RELOC(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19, 0x21f) +ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G2, 0x220) +ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1, 0x221) +ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC, 0x222) +ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G0, 0x223) +ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC, 0x224) +ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_HI12, 0x225) +ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_LO12, 0x226) +ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, 0x227) +ELF_RELOC(R_AARCH64_TLSLE_LDST8_TPREL_LO12, 0x228) +ELF_RELOC(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC, 0x229) +ELF_RELOC(R_AARCH64_TLSLE_LDST16_TPREL_LO12, 0x22a) +ELF_RELOC(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC, 0x22b) +ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12, 0x22c) +ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC, 0x22d) +ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12, 0x22e) +ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC, 0x22f) +ELF_RELOC(R_AARCH64_TLSDESC_LD_PREL19, 0x230) +ELF_RELOC(R_AARCH64_TLSDESC_ADR_PREL21, 0x231) +ELF_RELOC(R_AARCH64_TLSDESC_ADR_PAGE21, 0x232) +ELF_RELOC(R_AARCH64_TLSDESC_LD64_LO12, 0x233) +ELF_RELOC(R_AARCH64_TLSDESC_ADD_LO12, 0x234) +ELF_RELOC(R_AARCH64_TLSDESC_OFF_G1, 0x235) +ELF_RELOC(R_AARCH64_TLSDESC_OFF_G0_NC, 0x236) +ELF_RELOC(R_AARCH64_TLSDESC_LDR, 0x237) +ELF_RELOC(R_AARCH64_TLSDESC_ADD, 0x238) +ELF_RELOC(R_AARCH64_TLSDESC_CALL, 0x239) +ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12, 0x23a) +ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC, 0x23b) +ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12, 0x23c) +ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC, 0x23d) +ELF_RELOC(R_AARCH64_COPY, 0x400) +ELF_RELOC(R_AARCH64_GLOB_DAT, 0x401) +ELF_RELOC(R_AARCH64_JUMP_SLOT, 0x402) +ELF_RELOC(R_AARCH64_RELATIVE, 0x403) +ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x404) +ELF_RELOC(R_AARCH64_TLS_DTPMOD64, 0x405) +ELF_RELOC(R_AARCH64_TLS_TPREL64, 0x406) +ELF_RELOC(R_AARCH64_TLSDESC, 0x407) +ELF_RELOC(R_AARCH64_IRELATIVE, 0x408) + +// ELF_RELOC(R_AARCH64_P32_NONE, 0) +ELF_RELOC(R_AARCH64_P32_ABS32, 0x001) +ELF_RELOC(R_AARCH64_P32_ABS16, 0x002) +ELF_RELOC(R_AARCH64_P32_PREL32, 0x003) +ELF_RELOC(R_AARCH64_P32_PREL16, 0x004) +ELF_RELOC(R_AARCH64_P32_MOVW_UABS_G0, 0x005) +ELF_RELOC(R_AARCH64_P32_MOVW_UABS_G0_NC, 0x006) +ELF_RELOC(R_AARCH64_P32_MOVW_UABS_G1, 0x007) +ELF_RELOC(R_AARCH64_P32_MOVW_SABS_G0, 0x008) +ELF_RELOC(R_AARCH64_P32_LD_PREL_LO19, 0x009) +ELF_RELOC(R_AARCH64_P32_ADR_PREL_LO21, 0x00a) +ELF_RELOC(R_AARCH64_P32_ADR_PREL_PG_HI21, 0x00b) +ELF_RELOC(R_AARCH64_P32_ADD_ABS_LO12_NC, 0x00c) +ELF_RELOC(R_AARCH64_P32_LDST8_ABS_LO12_NC, 0x00d) +ELF_RELOC(R_AARCH64_P32_LDST16_ABS_LO12_NC, 0x00e) +ELF_RELOC(R_AARCH64_P32_LDST32_ABS_LO12_NC, 0x00f) +ELF_RELOC(R_AARCH64_P32_LDST64_ABS_LO12_NC, 0x010) +ELF_RELOC(R_AARCH64_P32_LDST128_ABS_LO12_NC, 0x011) +ELF_RELOC(R_AARCH64_P32_TSTBR14, 0x012) +ELF_RELOC(R_AARCH64_P32_CONDBR19, 0x013) +ELF_RELOC(R_AARCH64_P32_JUMP26, 0x014) +ELF_RELOC(R_AARCH64_P32_CALL26, 0x015) +ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0, 0x016) +ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0_NC, 0x017) +ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G1, 0x018) +ELF_RELOC(R_AARCH64_P32_GOT_LD_PREL19, 0x019) +ELF_RELOC(R_AARCH64_P32_ADR_GOT_PAGE, 0x01a) +ELF_RELOC(R_AARCH64_P32_LD32_GOT_LO12_NC, 0x01b) +ELF_RELOC(R_AARCH64_P32_LD32_GOTPAGE_LO14, 0x01c) +ELF_RELOC(R_AARCH64_P32_TLSGD_ADR_PREL21, 0x050) +ELF_RELOC(R_AARCH64_P32_TLSGD_ADR_PAGE21, 0x051) +ELF_RELOC(R_AARCH64_P32_TLSGD_ADD_LO12_NC, 0x052) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADR_PREL21, 0x053) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADR_PAGE21, 0x054) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_LO12_NC, 0x055) +ELF_RELOC(R_AARCH64_P32_TLSLD_LD_PREL19, 0x056) +ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1, 0x057) +ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0, 0x058) +ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC, 0x059) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_DTPREL_HI12, 0x05a) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12, 0x05b) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12_NC, 0x05c) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12, 0x05d) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC, 0x05e) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12, 0x05f) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC, 0x060) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12, 0x061) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC, 0x062) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12, 0x063) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC, 0x064) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12, 0x065) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12_NC,0x066) +ELF_RELOC(R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21, 0x067) +ELF_RELOC(R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC, 0x068) +ELF_RELOC(R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19, 0x069) +ELF_RELOC(R_AARCH64_P32_TLSLE_MOVW_TPREL_G1, 0x06a) +ELF_RELOC(R_AARCH64_P32_TLSLE_MOVW_TPREL_G0, 0x06b) +ELF_RELOC(R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC, 0x06c) +ELF_RELOC(R_AARCH64_P32_TLSLE_ADD_TPREL_HI12, 0x06d) +ELF_RELOC(R_AARCH64_P32_TLSLE_ADD_TPREL_LO12, 0x06e) +ELF_RELOC(R_AARCH64_P32_TLSLE_ADD_TPREL_LO12_NC, 0x06f) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12, 0x070) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC, 0x071) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12, 0x072) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC, 0x073) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12, 0x074) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC, 0x075) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12, 0x076) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC, 0x077) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12, 0x078) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12_NC, 0x079) +ELF_RELOC(R_AARCH64_P32_TLSDESC_LD_PREL19, 0x07a) +ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PREL21, 0x07b) +ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PAGE21, 0x07c) +ELF_RELOC(R_AARCH64_P32_TLSDESC_LD32_LO12, 0x07d) +ELF_RELOC(R_AARCH64_P32_TLSDESC_ADD_LO12, 0x07e) +ELF_RELOC(R_AARCH64_P32_TLSDESC_CALL, 0x07f) +ELF_RELOC(R_AARCH64_P32_COPY, 0x0b4) +ELF_RELOC(R_AARCH64_P32_GLOB_DAT, 0x0b5) +ELF_RELOC(R_AARCH64_P32_JUMP_SLOT, 0x0b6) +ELF_RELOC(R_AARCH64_P32_RELATIVE, 0x0b7) +ELF_RELOC(R_AARCH64_P32_TLS_DTPREL, 0x0b8) +ELF_RELOC(R_AARCH64_P32_TLS_DTPMOD, 0x0b9) +ELF_RELOC(R_AARCH64_P32_TLS_TPREL, 0x0ba) +ELF_RELOC(R_AARCH64_P32_TLSDESC, 0x0bb) +ELF_RELOC(R_AARCH64_P32_IRELATIVE, 0x0bc) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def new file mode 100644 index 0000000000000..c66f88d14ec71 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def @@ -0,0 +1,16 @@ +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_AMDGPU_NONE, 0) +ELF_RELOC(R_AMDGPU_ABS32_LO, 1) +ELF_RELOC(R_AMDGPU_ABS32_HI, 2) +ELF_RELOC(R_AMDGPU_ABS64, 3) +ELF_RELOC(R_AMDGPU_REL32, 4) +ELF_RELOC(R_AMDGPU_REL64, 5) +ELF_RELOC(R_AMDGPU_ABS32, 6) +ELF_RELOC(R_AMDGPU_GOTPCREL, 7) +ELF_RELOC(R_AMDGPU_GOTPCREL32_LO, 8) +ELF_RELOC(R_AMDGPU_GOTPCREL32_HI, 9) +ELF_RELOC(R_AMDGPU_REL32_LO, 10) +ELF_RELOC(R_AMDGPU_REL32_HI, 11) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/ARM.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/ARM.def new file mode 100644 index 0000000000000..730fc5b8836c8 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/ARM.def @@ -0,0 +1,138 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// Meets 2.09 ABI Specs. +ELF_RELOC(R_ARM_NONE, 0x00) +ELF_RELOC(R_ARM_PC24, 0x01) +ELF_RELOC(R_ARM_ABS32, 0x02) +ELF_RELOC(R_ARM_REL32, 0x03) +ELF_RELOC(R_ARM_LDR_PC_G0, 0x04) +ELF_RELOC(R_ARM_ABS16, 0x05) +ELF_RELOC(R_ARM_ABS12, 0x06) +ELF_RELOC(R_ARM_THM_ABS5, 0x07) +ELF_RELOC(R_ARM_ABS8, 0x08) +ELF_RELOC(R_ARM_SBREL32, 0x09) +ELF_RELOC(R_ARM_THM_CALL, 0x0a) +ELF_RELOC(R_ARM_THM_PC8, 0x0b) +ELF_RELOC(R_ARM_BREL_ADJ, 0x0c) +ELF_RELOC(R_ARM_TLS_DESC, 0x0d) +ELF_RELOC(R_ARM_THM_SWI8, 0x0e) +ELF_RELOC(R_ARM_XPC25, 0x0f) +ELF_RELOC(R_ARM_THM_XPC22, 0x10) +ELF_RELOC(R_ARM_TLS_DTPMOD32, 0x11) +ELF_RELOC(R_ARM_TLS_DTPOFF32, 0x12) +ELF_RELOC(R_ARM_TLS_TPOFF32, 0x13) +ELF_RELOC(R_ARM_COPY, 0x14) +ELF_RELOC(R_ARM_GLOB_DAT, 0x15) +ELF_RELOC(R_ARM_JUMP_SLOT, 0x16) +ELF_RELOC(R_ARM_RELATIVE, 0x17) +ELF_RELOC(R_ARM_GOTOFF32, 0x18) +ELF_RELOC(R_ARM_BASE_PREL, 0x19) +ELF_RELOC(R_ARM_GOT_BREL, 0x1a) +ELF_RELOC(R_ARM_PLT32, 0x1b) +ELF_RELOC(R_ARM_CALL, 0x1c) +ELF_RELOC(R_ARM_JUMP24, 0x1d) +ELF_RELOC(R_ARM_THM_JUMP24, 0x1e) +ELF_RELOC(R_ARM_BASE_ABS, 0x1f) +ELF_RELOC(R_ARM_ALU_PCREL_7_0, 0x20) +ELF_RELOC(R_ARM_ALU_PCREL_15_8, 0x21) +ELF_RELOC(R_ARM_ALU_PCREL_23_15, 0x22) +ELF_RELOC(R_ARM_LDR_SBREL_11_0_NC, 0x23) +ELF_RELOC(R_ARM_ALU_SBREL_19_12_NC, 0x24) +ELF_RELOC(R_ARM_ALU_SBREL_27_20_CK, 0x25) +ELF_RELOC(R_ARM_TARGET1, 0x26) +ELF_RELOC(R_ARM_SBREL31, 0x27) +ELF_RELOC(R_ARM_V4BX, 0x28) +ELF_RELOC(R_ARM_TARGET2, 0x29) +ELF_RELOC(R_ARM_PREL31, 0x2a) +ELF_RELOC(R_ARM_MOVW_ABS_NC, 0x2b) +ELF_RELOC(R_ARM_MOVT_ABS, 0x2c) +ELF_RELOC(R_ARM_MOVW_PREL_NC, 0x2d) +ELF_RELOC(R_ARM_MOVT_PREL, 0x2e) +ELF_RELOC(R_ARM_THM_MOVW_ABS_NC, 0x2f) +ELF_RELOC(R_ARM_THM_MOVT_ABS, 0x30) +ELF_RELOC(R_ARM_THM_MOVW_PREL_NC, 0x31) +ELF_RELOC(R_ARM_THM_MOVT_PREL, 0x32) +ELF_RELOC(R_ARM_THM_JUMP19, 0x33) +ELF_RELOC(R_ARM_THM_JUMP6, 0x34) +ELF_RELOC(R_ARM_THM_ALU_PREL_11_0, 0x35) +ELF_RELOC(R_ARM_THM_PC12, 0x36) +ELF_RELOC(R_ARM_ABS32_NOI, 0x37) +ELF_RELOC(R_ARM_REL32_NOI, 0x38) +ELF_RELOC(R_ARM_ALU_PC_G0_NC, 0x39) +ELF_RELOC(R_ARM_ALU_PC_G0, 0x3a) +ELF_RELOC(R_ARM_ALU_PC_G1_NC, 0x3b) +ELF_RELOC(R_ARM_ALU_PC_G1, 0x3c) +ELF_RELOC(R_ARM_ALU_PC_G2, 0x3d) +ELF_RELOC(R_ARM_LDR_PC_G1, 0x3e) +ELF_RELOC(R_ARM_LDR_PC_G2, 0x3f) +ELF_RELOC(R_ARM_LDRS_PC_G0, 0x40) +ELF_RELOC(R_ARM_LDRS_PC_G1, 0x41) +ELF_RELOC(R_ARM_LDRS_PC_G2, 0x42) +ELF_RELOC(R_ARM_LDC_PC_G0, 0x43) +ELF_RELOC(R_ARM_LDC_PC_G1, 0x44) +ELF_RELOC(R_ARM_LDC_PC_G2, 0x45) +ELF_RELOC(R_ARM_ALU_SB_G0_NC, 0x46) +ELF_RELOC(R_ARM_ALU_SB_G0, 0x47) +ELF_RELOC(R_ARM_ALU_SB_G1_NC, 0x48) +ELF_RELOC(R_ARM_ALU_SB_G1, 0x49) +ELF_RELOC(R_ARM_ALU_SB_G2, 0x4a) +ELF_RELOC(R_ARM_LDR_SB_G0, 0x4b) +ELF_RELOC(R_ARM_LDR_SB_G1, 0x4c) +ELF_RELOC(R_ARM_LDR_SB_G2, 0x4d) +ELF_RELOC(R_ARM_LDRS_SB_G0, 0x4e) +ELF_RELOC(R_ARM_LDRS_SB_G1, 0x4f) +ELF_RELOC(R_ARM_LDRS_SB_G2, 0x50) +ELF_RELOC(R_ARM_LDC_SB_G0, 0x51) +ELF_RELOC(R_ARM_LDC_SB_G1, 0x52) +ELF_RELOC(R_ARM_LDC_SB_G2, 0x53) +ELF_RELOC(R_ARM_MOVW_BREL_NC, 0x54) +ELF_RELOC(R_ARM_MOVT_BREL, 0x55) +ELF_RELOC(R_ARM_MOVW_BREL, 0x56) +ELF_RELOC(R_ARM_THM_MOVW_BREL_NC, 0x57) +ELF_RELOC(R_ARM_THM_MOVT_BREL, 0x58) +ELF_RELOC(R_ARM_THM_MOVW_BREL, 0x59) +ELF_RELOC(R_ARM_TLS_GOTDESC, 0x5a) +ELF_RELOC(R_ARM_TLS_CALL, 0x5b) +ELF_RELOC(R_ARM_TLS_DESCSEQ, 0x5c) +ELF_RELOC(R_ARM_THM_TLS_CALL, 0x5d) +ELF_RELOC(R_ARM_PLT32_ABS, 0x5e) +ELF_RELOC(R_ARM_GOT_ABS, 0x5f) +ELF_RELOC(R_ARM_GOT_PREL, 0x60) +ELF_RELOC(R_ARM_GOT_BREL12, 0x61) +ELF_RELOC(R_ARM_GOTOFF12, 0x62) +ELF_RELOC(R_ARM_GOTRELAX, 0x63) +ELF_RELOC(R_ARM_GNU_VTENTRY, 0x64) +ELF_RELOC(R_ARM_GNU_VTINHERIT, 0x65) +ELF_RELOC(R_ARM_THM_JUMP11, 0x66) +ELF_RELOC(R_ARM_THM_JUMP8, 0x67) +ELF_RELOC(R_ARM_TLS_GD32, 0x68) +ELF_RELOC(R_ARM_TLS_LDM32, 0x69) +ELF_RELOC(R_ARM_TLS_LDO32, 0x6a) +ELF_RELOC(R_ARM_TLS_IE32, 0x6b) +ELF_RELOC(R_ARM_TLS_LE32, 0x6c) +ELF_RELOC(R_ARM_TLS_LDO12, 0x6d) +ELF_RELOC(R_ARM_TLS_LE12, 0x6e) +ELF_RELOC(R_ARM_TLS_IE12GP, 0x6f) +ELF_RELOC(R_ARM_PRIVATE_0, 0x70) +ELF_RELOC(R_ARM_PRIVATE_1, 0x71) +ELF_RELOC(R_ARM_PRIVATE_2, 0x72) +ELF_RELOC(R_ARM_PRIVATE_3, 0x73) +ELF_RELOC(R_ARM_PRIVATE_4, 0x74) +ELF_RELOC(R_ARM_PRIVATE_5, 0x75) +ELF_RELOC(R_ARM_PRIVATE_6, 0x76) +ELF_RELOC(R_ARM_PRIVATE_7, 0x77) +ELF_RELOC(R_ARM_PRIVATE_8, 0x78) +ELF_RELOC(R_ARM_PRIVATE_9, 0x79) +ELF_RELOC(R_ARM_PRIVATE_10, 0x7a) +ELF_RELOC(R_ARM_PRIVATE_11, 0x7b) +ELF_RELOC(R_ARM_PRIVATE_12, 0x7c) +ELF_RELOC(R_ARM_PRIVATE_13, 0x7d) +ELF_RELOC(R_ARM_PRIVATE_14, 0x7e) +ELF_RELOC(R_ARM_PRIVATE_15, 0x7f) +ELF_RELOC(R_ARM_ME_TOO, 0x80) +ELF_RELOC(R_ARM_THM_TLS_DESCSEQ16, 0x81) +ELF_RELOC(R_ARM_THM_TLS_DESCSEQ32, 0x82) +ELF_RELOC(R_ARM_IRELATIVE, 0xa0) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AVR.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AVR.def new file mode 100644 index 0000000000000..5692d6cb9aa07 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/AVR.def @@ -0,0 +1,40 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_AVR_NONE, 0) +ELF_RELOC(R_AVR_32, 1) +ELF_RELOC(R_AVR_7_PCREL, 2) +ELF_RELOC(R_AVR_13_PCREL, 3) +ELF_RELOC(R_AVR_16, 4) +ELF_RELOC(R_AVR_16_PM, 5) +ELF_RELOC(R_AVR_LO8_LDI, 6) +ELF_RELOC(R_AVR_HI8_LDI, 7) +ELF_RELOC(R_AVR_HH8_LDI, 8) +ELF_RELOC(R_AVR_LO8_LDI_NEG, 9) +ELF_RELOC(R_AVR_HI8_LDI_NEG, 10) +ELF_RELOC(R_AVR_HH8_LDI_NEG, 11) +ELF_RELOC(R_AVR_LO8_LDI_PM, 12) +ELF_RELOC(R_AVR_HI8_LDI_PM, 13) +ELF_RELOC(R_AVR_HH8_LDI_PM, 14) +ELF_RELOC(R_AVR_LO8_LDI_PM_NEG, 15) +ELF_RELOC(R_AVR_HI8_LDI_PM_NEG, 16) +ELF_RELOC(R_AVR_HH8_LDI_PM_NEG, 17) +ELF_RELOC(R_AVR_CALL, 18) +ELF_RELOC(R_AVR_LDI, 19) +ELF_RELOC(R_AVR_6, 20) +ELF_RELOC(R_AVR_6_ADIW, 21) +ELF_RELOC(R_AVR_MS8_LDI, 22) +ELF_RELOC(R_AVR_MS8_LDI_NEG, 23) +ELF_RELOC(R_AVR_LO8_LDI_GS, 24) +ELF_RELOC(R_AVR_HI8_LDI_GS, 25) +ELF_RELOC(R_AVR_8, 26) +ELF_RELOC(R_AVR_8_LO8, 27) +ELF_RELOC(R_AVR_8_HI8, 28) +ELF_RELOC(R_AVR_8_HLO8, 29) +ELF_RELOC(R_AVR_SYM_DIFF, 30) +ELF_RELOC(R_AVR_16_LDST, 31) +ELF_RELOC(R_AVR_LDS_STS_16, 33) +ELF_RELOC(R_AVR_PORT6, 34) +ELF_RELOC(R_AVR_PORT5, 35) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/BPF.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/BPF.def new file mode 100644 index 0000000000000..5dd7f70b6963a --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/BPF.def @@ -0,0 +1,8 @@ +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// No relocation +ELF_RELOC(R_BPF_NONE, 0) +ELF_RELOC(R_BPF_64_64, 1) +ELF_RELOC(R_BPF_64_32, 10) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def new file mode 100644 index 0000000000000..5021e2b26ce5f --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def @@ -0,0 +1,106 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// Release 5 ABI +ELF_RELOC(R_HEX_NONE, 0) +ELF_RELOC(R_HEX_B22_PCREL, 1) +ELF_RELOC(R_HEX_B15_PCREL, 2) +ELF_RELOC(R_HEX_B7_PCREL, 3) +ELF_RELOC(R_HEX_LO16, 4) +ELF_RELOC(R_HEX_HI16, 5) +ELF_RELOC(R_HEX_32, 6) +ELF_RELOC(R_HEX_16, 7) +ELF_RELOC(R_HEX_8, 8) +ELF_RELOC(R_HEX_GPREL16_0, 9) +ELF_RELOC(R_HEX_GPREL16_1, 10) +ELF_RELOC(R_HEX_GPREL16_2, 11) +ELF_RELOC(R_HEX_GPREL16_3, 12) +ELF_RELOC(R_HEX_HL16, 13) +ELF_RELOC(R_HEX_B13_PCREL, 14) +ELF_RELOC(R_HEX_B9_PCREL, 15) +ELF_RELOC(R_HEX_B32_PCREL_X, 16) +ELF_RELOC(R_HEX_32_6_X, 17) +ELF_RELOC(R_HEX_B22_PCREL_X, 18) +ELF_RELOC(R_HEX_B15_PCREL_X, 19) +ELF_RELOC(R_HEX_B13_PCREL_X, 20) +ELF_RELOC(R_HEX_B9_PCREL_X, 21) +ELF_RELOC(R_HEX_B7_PCREL_X, 22) +ELF_RELOC(R_HEX_16_X, 23) +ELF_RELOC(R_HEX_12_X, 24) +ELF_RELOC(R_HEX_11_X, 25) +ELF_RELOC(R_HEX_10_X, 26) +ELF_RELOC(R_HEX_9_X, 27) +ELF_RELOC(R_HEX_8_X, 28) +ELF_RELOC(R_HEX_7_X, 29) +ELF_RELOC(R_HEX_6_X, 30) +ELF_RELOC(R_HEX_32_PCREL, 31) +ELF_RELOC(R_HEX_COPY, 32) +ELF_RELOC(R_HEX_GLOB_DAT, 33) +ELF_RELOC(R_HEX_JMP_SLOT, 34) +ELF_RELOC(R_HEX_RELATIVE, 35) +ELF_RELOC(R_HEX_PLT_B22_PCREL, 36) +ELF_RELOC(R_HEX_GOTREL_LO16, 37) +ELF_RELOC(R_HEX_GOTREL_HI16, 38) +ELF_RELOC(R_HEX_GOTREL_32, 39) +ELF_RELOC(R_HEX_GOT_LO16, 40) +ELF_RELOC(R_HEX_GOT_HI16, 41) +ELF_RELOC(R_HEX_GOT_32, 42) +ELF_RELOC(R_HEX_GOT_16, 43) +ELF_RELOC(R_HEX_DTPMOD_32, 44) +ELF_RELOC(R_HEX_DTPREL_LO16, 45) +ELF_RELOC(R_HEX_DTPREL_HI16, 46) +ELF_RELOC(R_HEX_DTPREL_32, 47) +ELF_RELOC(R_HEX_DTPREL_16, 48) +ELF_RELOC(R_HEX_GD_PLT_B22_PCREL, 49) +ELF_RELOC(R_HEX_GD_GOT_LO16, 50) +ELF_RELOC(R_HEX_GD_GOT_HI16, 51) +ELF_RELOC(R_HEX_GD_GOT_32, 52) +ELF_RELOC(R_HEX_GD_GOT_16, 53) +ELF_RELOC(R_HEX_IE_LO16, 54) +ELF_RELOC(R_HEX_IE_HI16, 55) +ELF_RELOC(R_HEX_IE_32, 56) +ELF_RELOC(R_HEX_IE_GOT_LO16, 57) +ELF_RELOC(R_HEX_IE_GOT_HI16, 58) +ELF_RELOC(R_HEX_IE_GOT_32, 59) +ELF_RELOC(R_HEX_IE_GOT_16, 60) +ELF_RELOC(R_HEX_TPREL_LO16, 61) +ELF_RELOC(R_HEX_TPREL_HI16, 62) +ELF_RELOC(R_HEX_TPREL_32, 63) +ELF_RELOC(R_HEX_TPREL_16, 64) +ELF_RELOC(R_HEX_6_PCREL_X, 65) +ELF_RELOC(R_HEX_GOTREL_32_6_X, 66) +ELF_RELOC(R_HEX_GOTREL_16_X, 67) +ELF_RELOC(R_HEX_GOTREL_11_X, 68) +ELF_RELOC(R_HEX_GOT_32_6_X, 69) +ELF_RELOC(R_HEX_GOT_16_X, 70) +ELF_RELOC(R_HEX_GOT_11_X, 71) +ELF_RELOC(R_HEX_DTPREL_32_6_X, 72) +ELF_RELOC(R_HEX_DTPREL_16_X, 73) +ELF_RELOC(R_HEX_DTPREL_11_X, 74) +ELF_RELOC(R_HEX_GD_GOT_32_6_X, 75) +ELF_RELOC(R_HEX_GD_GOT_16_X, 76) +ELF_RELOC(R_HEX_GD_GOT_11_X, 77) +ELF_RELOC(R_HEX_IE_32_6_X, 78) +ELF_RELOC(R_HEX_IE_16_X, 79) +ELF_RELOC(R_HEX_IE_GOT_32_6_X, 80) +ELF_RELOC(R_HEX_IE_GOT_16_X, 81) +ELF_RELOC(R_HEX_IE_GOT_11_X, 82) +ELF_RELOC(R_HEX_TPREL_32_6_X, 83) +ELF_RELOC(R_HEX_TPREL_16_X, 84) +ELF_RELOC(R_HEX_TPREL_11_X, 85) +ELF_RELOC(R_HEX_LD_PLT_B22_PCREL, 86) +ELF_RELOC(R_HEX_LD_GOT_LO16, 87) +ELF_RELOC(R_HEX_LD_GOT_HI16, 88) +ELF_RELOC(R_HEX_LD_GOT_32, 89) +ELF_RELOC(R_HEX_LD_GOT_16, 90) +ELF_RELOC(R_HEX_LD_GOT_32_6_X, 91) +ELF_RELOC(R_HEX_LD_GOT_16_X, 92) +ELF_RELOC(R_HEX_LD_GOT_11_X, 93) +ELF_RELOC(R_HEX_23_REG, 94) +ELF_RELOC(R_HEX_GD_PLT_B22_PCREL_X, 95) +ELF_RELOC(R_HEX_GD_PLT_B32_PCREL_X, 96) +ELF_RELOC(R_HEX_LD_PLT_B22_PCREL_X, 97) +ELF_RELOC(R_HEX_LD_PLT_B32_PCREL_X, 98) +ELF_RELOC(R_HEX_27_REG, 99) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Lanai.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Lanai.def new file mode 100644 index 0000000000000..77ecb048403d3 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Lanai.def @@ -0,0 +1,19 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// No relocation +ELF_RELOC(R_LANAI_NONE, 0) +// 21-bit symbol relocation +ELF_RELOC(R_LANAI_21, 1) +// 21-bit symbol relocation with last two bits masked to 0 +ELF_RELOC(R_LANAI_21_F, 2) +// 25-bit branch targets +ELF_RELOC(R_LANAI_25, 3) +// General 32-bit relocation +ELF_RELOC(R_LANAI_32, 4) +// Upper 16-bits of a symbolic relocation +ELF_RELOC(R_LANAI_HI16, 5) +// Lower 16-bits of a symbolic relocation +ELF_RELOC(R_LANAI_LO16, 6) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Mips.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Mips.def new file mode 100644 index 0000000000000..bc0088dff3f43 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Mips.def @@ -0,0 +1,117 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_MIPS_NONE, 0) +ELF_RELOC(R_MIPS_16, 1) +ELF_RELOC(R_MIPS_32, 2) +ELF_RELOC(R_MIPS_REL32, 3) +ELF_RELOC(R_MIPS_26, 4) +ELF_RELOC(R_MIPS_HI16, 5) +ELF_RELOC(R_MIPS_LO16, 6) +ELF_RELOC(R_MIPS_GPREL16, 7) +ELF_RELOC(R_MIPS_LITERAL, 8) +ELF_RELOC(R_MIPS_GOT16, 9) +ELF_RELOC(R_MIPS_PC16, 10) +ELF_RELOC(R_MIPS_CALL16, 11) +ELF_RELOC(R_MIPS_GPREL32, 12) +ELF_RELOC(R_MIPS_UNUSED1, 13) +ELF_RELOC(R_MIPS_UNUSED2, 14) +ELF_RELOC(R_MIPS_UNUSED3, 15) +ELF_RELOC(R_MIPS_SHIFT5, 16) +ELF_RELOC(R_MIPS_SHIFT6, 17) +ELF_RELOC(R_MIPS_64, 18) +ELF_RELOC(R_MIPS_GOT_DISP, 19) +ELF_RELOC(R_MIPS_GOT_PAGE, 20) +ELF_RELOC(R_MIPS_GOT_OFST, 21) +ELF_RELOC(R_MIPS_GOT_HI16, 22) +ELF_RELOC(R_MIPS_GOT_LO16, 23) +ELF_RELOC(R_MIPS_SUB, 24) +ELF_RELOC(R_MIPS_INSERT_A, 25) +ELF_RELOC(R_MIPS_INSERT_B, 26) +ELF_RELOC(R_MIPS_DELETE, 27) +ELF_RELOC(R_MIPS_HIGHER, 28) +ELF_RELOC(R_MIPS_HIGHEST, 29) +ELF_RELOC(R_MIPS_CALL_HI16, 30) +ELF_RELOC(R_MIPS_CALL_LO16, 31) +ELF_RELOC(R_MIPS_SCN_DISP, 32) +ELF_RELOC(R_MIPS_REL16, 33) +ELF_RELOC(R_MIPS_ADD_IMMEDIATE, 34) +ELF_RELOC(R_MIPS_PJUMP, 35) +ELF_RELOC(R_MIPS_RELGOT, 36) +ELF_RELOC(R_MIPS_JALR, 37) +ELF_RELOC(R_MIPS_TLS_DTPMOD32, 38) +ELF_RELOC(R_MIPS_TLS_DTPREL32, 39) +ELF_RELOC(R_MIPS_TLS_DTPMOD64, 40) +ELF_RELOC(R_MIPS_TLS_DTPREL64, 41) +ELF_RELOC(R_MIPS_TLS_GD, 42) +ELF_RELOC(R_MIPS_TLS_LDM, 43) +ELF_RELOC(R_MIPS_TLS_DTPREL_HI16, 44) +ELF_RELOC(R_MIPS_TLS_DTPREL_LO16, 45) +ELF_RELOC(R_MIPS_TLS_GOTTPREL, 46) +ELF_RELOC(R_MIPS_TLS_TPREL32, 47) +ELF_RELOC(R_MIPS_TLS_TPREL64, 48) +ELF_RELOC(R_MIPS_TLS_TPREL_HI16, 49) +ELF_RELOC(R_MIPS_TLS_TPREL_LO16, 50) +ELF_RELOC(R_MIPS_GLOB_DAT, 51) +ELF_RELOC(R_MIPS_PC21_S2, 60) +ELF_RELOC(R_MIPS_PC26_S2, 61) +ELF_RELOC(R_MIPS_PC18_S3, 62) +ELF_RELOC(R_MIPS_PC19_S2, 63) +ELF_RELOC(R_MIPS_PCHI16, 64) +ELF_RELOC(R_MIPS_PCLO16, 65) +ELF_RELOC(R_MIPS16_26, 100) +ELF_RELOC(R_MIPS16_GPREL, 101) +ELF_RELOC(R_MIPS16_GOT16, 102) +ELF_RELOC(R_MIPS16_CALL16, 103) +ELF_RELOC(R_MIPS16_HI16, 104) +ELF_RELOC(R_MIPS16_LO16, 105) +ELF_RELOC(R_MIPS16_TLS_GD, 106) +ELF_RELOC(R_MIPS16_TLS_LDM, 107) +ELF_RELOC(R_MIPS16_TLS_DTPREL_HI16, 108) +ELF_RELOC(R_MIPS16_TLS_DTPREL_LO16, 109) +ELF_RELOC(R_MIPS16_TLS_GOTTPREL, 110) +ELF_RELOC(R_MIPS16_TLS_TPREL_HI16, 111) +ELF_RELOC(R_MIPS16_TLS_TPREL_LO16, 112) +ELF_RELOC(R_MIPS_COPY, 126) +ELF_RELOC(R_MIPS_JUMP_SLOT, 127) +ELF_RELOC(R_MICROMIPS_26_S1, 133) +ELF_RELOC(R_MICROMIPS_HI16, 134) +ELF_RELOC(R_MICROMIPS_LO16, 135) +ELF_RELOC(R_MICROMIPS_GPREL16, 136) +ELF_RELOC(R_MICROMIPS_LITERAL, 137) +ELF_RELOC(R_MICROMIPS_GOT16, 138) +ELF_RELOC(R_MICROMIPS_PC7_S1, 139) +ELF_RELOC(R_MICROMIPS_PC10_S1, 140) +ELF_RELOC(R_MICROMIPS_PC16_S1, 141) +ELF_RELOC(R_MICROMIPS_CALL16, 142) +ELF_RELOC(R_MICROMIPS_GOT_DISP, 145) +ELF_RELOC(R_MICROMIPS_GOT_PAGE, 146) +ELF_RELOC(R_MICROMIPS_GOT_OFST, 147) +ELF_RELOC(R_MICROMIPS_GOT_HI16, 148) +ELF_RELOC(R_MICROMIPS_GOT_LO16, 149) +ELF_RELOC(R_MICROMIPS_SUB, 150) +ELF_RELOC(R_MICROMIPS_HIGHER, 151) +ELF_RELOC(R_MICROMIPS_HIGHEST, 152) +ELF_RELOC(R_MICROMIPS_CALL_HI16, 153) +ELF_RELOC(R_MICROMIPS_CALL_LO16, 154) +ELF_RELOC(R_MICROMIPS_SCN_DISP, 155) +ELF_RELOC(R_MICROMIPS_JALR, 156) +ELF_RELOC(R_MICROMIPS_HI0_LO16, 157) +ELF_RELOC(R_MICROMIPS_TLS_GD, 162) +ELF_RELOC(R_MICROMIPS_TLS_LDM, 163) +ELF_RELOC(R_MICROMIPS_TLS_DTPREL_HI16, 164) +ELF_RELOC(R_MICROMIPS_TLS_DTPREL_LO16, 165) +ELF_RELOC(R_MICROMIPS_TLS_GOTTPREL, 166) +ELF_RELOC(R_MICROMIPS_TLS_TPREL_HI16, 169) +ELF_RELOC(R_MICROMIPS_TLS_TPREL_LO16, 170) +ELF_RELOC(R_MICROMIPS_GPREL7_S2, 172) +ELF_RELOC(R_MICROMIPS_PC23_S2, 173) +ELF_RELOC(R_MICROMIPS_PC21_S1, 174) +ELF_RELOC(R_MICROMIPS_PC26_S1, 175) +ELF_RELOC(R_MICROMIPS_PC18_S3, 176) +ELF_RELOC(R_MICROMIPS_PC19_S2, 177) +ELF_RELOC(R_MIPS_NUM, 218) +ELF_RELOC(R_MIPS_PC32, 248) +ELF_RELOC(R_MIPS_EH, 249) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def new file mode 100644 index 0000000000000..e4f8ee0ebe2b8 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def @@ -0,0 +1,123 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the +// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc. +// to their corresponding integer values. As a result, we need to undef them +// here before continuing. + +#undef R_PPC_NONE +#undef R_PPC_ADDR32 +#undef R_PPC_ADDR24 +#undef R_PPC_ADDR16 +#undef R_PPC_ADDR16_LO +#undef R_PPC_ADDR16_HI +#undef R_PPC_ADDR16_HA +#undef R_PPC_ADDR14 +#undef R_PPC_ADDR14_BRTAKEN +#undef R_PPC_ADDR14_BRNTAKEN +#undef R_PPC_REL24 +#undef R_PPC_REL14 +#undef R_PPC_REL14_BRTAKEN +#undef R_PPC_REL14_BRNTAKEN +#undef R_PPC_GOT16 +#undef R_PPC_GOT16_LO +#undef R_PPC_GOT16_HI +#undef R_PPC_GOT16_HA +#undef R_PPC_PLTREL24 +#undef R_PPC_JMP_SLOT +#undef R_PPC_LOCAL24PC +#undef R_PPC_REL32 +#undef R_PPC_TLS +#undef R_PPC_DTPMOD32 +#undef R_PPC_TPREL16 +#undef R_PPC_TPREL16_LO +#undef R_PPC_TPREL16_HI +#undef R_PPC_TPREL16_HA +#undef R_PPC_TPREL32 +#undef R_PPC_DTPREL16 +#undef R_PPC_DTPREL16_LO +#undef R_PPC_DTPREL16_HI +#undef R_PPC_DTPREL16_HA +#undef R_PPC_DTPREL32 +#undef R_PPC_GOT_TLSGD16 +#undef R_PPC_GOT_TLSGD16_LO +#undef R_PPC_GOT_TLSGD16_HI +#undef R_PPC_GOT_TLSGD16_HA +#undef R_PPC_GOT_TLSLD16 +#undef R_PPC_GOT_TLSLD16_LO +#undef R_PPC_GOT_TLSLD16_HI +#undef R_PPC_GOT_TLSLD16_HA +#undef R_PPC_GOT_TPREL16 +#undef R_PPC_GOT_TPREL16_LO +#undef R_PPC_GOT_TPREL16_HI +#undef R_PPC_GOT_TPREL16_HA +#undef R_PPC_GOT_DTPREL16 +#undef R_PPC_GOT_DTPREL16_LO +#undef R_PPC_GOT_DTPREL16_HI +#undef R_PPC_GOT_DTPREL16_HA +#undef R_PPC_TLSGD +#undef R_PPC_TLSLD +#undef R_PPC_REL16 +#undef R_PPC_REL16_LO +#undef R_PPC_REL16_HI +#undef R_PPC_REL16_HA + +ELF_RELOC(R_PPC_NONE, 0) /* No relocation. */ +ELF_RELOC(R_PPC_ADDR32, 1) +ELF_RELOC(R_PPC_ADDR24, 2) +ELF_RELOC(R_PPC_ADDR16, 3) +ELF_RELOC(R_PPC_ADDR16_LO, 4) +ELF_RELOC(R_PPC_ADDR16_HI, 5) +ELF_RELOC(R_PPC_ADDR16_HA, 6) +ELF_RELOC(R_PPC_ADDR14, 7) +ELF_RELOC(R_PPC_ADDR14_BRTAKEN, 8) +ELF_RELOC(R_PPC_ADDR14_BRNTAKEN, 9) +ELF_RELOC(R_PPC_REL24, 10) +ELF_RELOC(R_PPC_REL14, 11) +ELF_RELOC(R_PPC_REL14_BRTAKEN, 12) +ELF_RELOC(R_PPC_REL14_BRNTAKEN, 13) +ELF_RELOC(R_PPC_GOT16, 14) +ELF_RELOC(R_PPC_GOT16_LO, 15) +ELF_RELOC(R_PPC_GOT16_HI, 16) +ELF_RELOC(R_PPC_GOT16_HA, 17) +ELF_RELOC(R_PPC_PLTREL24, 18) +ELF_RELOC(R_PPC_JMP_SLOT, 21) +ELF_RELOC(R_PPC_LOCAL24PC, 23) +ELF_RELOC(R_PPC_REL32, 26) +ELF_RELOC(R_PPC_TLS, 67) +ELF_RELOC(R_PPC_DTPMOD32, 68) +ELF_RELOC(R_PPC_TPREL16, 69) +ELF_RELOC(R_PPC_TPREL16_LO, 70) +ELF_RELOC(R_PPC_TPREL16_HI, 71) +ELF_RELOC(R_PPC_TPREL16_HA, 72) +ELF_RELOC(R_PPC_TPREL32, 73) +ELF_RELOC(R_PPC_DTPREL16, 74) +ELF_RELOC(R_PPC_DTPREL16_LO, 75) +ELF_RELOC(R_PPC_DTPREL16_HI, 76) +ELF_RELOC(R_PPC_DTPREL16_HA, 77) +ELF_RELOC(R_PPC_DTPREL32, 78) +ELF_RELOC(R_PPC_GOT_TLSGD16, 79) +ELF_RELOC(R_PPC_GOT_TLSGD16_LO, 80) +ELF_RELOC(R_PPC_GOT_TLSGD16_HI, 81) +ELF_RELOC(R_PPC_GOT_TLSGD16_HA, 82) +ELF_RELOC(R_PPC_GOT_TLSLD16, 83) +ELF_RELOC(R_PPC_GOT_TLSLD16_LO, 84) +ELF_RELOC(R_PPC_GOT_TLSLD16_HI, 85) +ELF_RELOC(R_PPC_GOT_TLSLD16_HA, 86) +ELF_RELOC(R_PPC_GOT_TPREL16, 87) +ELF_RELOC(R_PPC_GOT_TPREL16_LO, 88) +ELF_RELOC(R_PPC_GOT_TPREL16_HI, 89) +ELF_RELOC(R_PPC_GOT_TPREL16_HA, 90) +ELF_RELOC(R_PPC_GOT_DTPREL16, 91) +ELF_RELOC(R_PPC_GOT_DTPREL16_LO, 92) +ELF_RELOC(R_PPC_GOT_DTPREL16_HI, 93) +ELF_RELOC(R_PPC_GOT_DTPREL16_HA, 94) +ELF_RELOC(R_PPC_TLSGD, 95) +ELF_RELOC(R_PPC_TLSLD, 96) +ELF_RELOC(R_PPC_REL16, 249) +ELF_RELOC(R_PPC_REL16_LO, 250) +ELF_RELOC(R_PPC_REL16_HI, 251) +ELF_RELOC(R_PPC_REL16_HA, 252) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def new file mode 100644 index 0000000000000..3a47c5a07574b --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def @@ -0,0 +1,181 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the +// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc. +// to their corresponding integer values. As a result, we need to undef them +// here before continuing. + +#undef R_PPC64_NONE +#undef R_PPC64_ADDR32 +#undef R_PPC64_ADDR24 +#undef R_PPC64_ADDR16 +#undef R_PPC64_ADDR16_LO +#undef R_PPC64_ADDR16_HI +#undef R_PPC64_ADDR16_HA +#undef R_PPC64_ADDR14 +#undef R_PPC64_ADDR14_BRTAKEN +#undef R_PPC64_ADDR14_BRNTAKEN +#undef R_PPC64_REL24 +#undef R_PPC64_REL14 +#undef R_PPC64_REL14_BRTAKEN +#undef R_PPC64_REL14_BRNTAKEN +#undef R_PPC64_GOT16 +#undef R_PPC64_GOT16_LO +#undef R_PPC64_GOT16_HI +#undef R_PPC64_GOT16_HA +#undef R_PPC64_GLOB_DAT +#undef R_PPC64_JMP_SLOT +#undef R_PPC64_RELATIVE +#undef R_PPC64_REL32 +#undef R_PPC64_ADDR64 +#undef R_PPC64_ADDR16_HIGHER +#undef R_PPC64_ADDR16_HIGHERA +#undef R_PPC64_ADDR16_HIGHEST +#undef R_PPC64_ADDR16_HIGHESTA +#undef R_PPC64_REL64 +#undef R_PPC64_TOC16 +#undef R_PPC64_TOC16_LO +#undef R_PPC64_TOC16_HI +#undef R_PPC64_TOC16_HA +#undef R_PPC64_TOC +#undef R_PPC64_ADDR16_DS +#undef R_PPC64_ADDR16_LO_DS +#undef R_PPC64_GOT16_DS +#undef R_PPC64_GOT16_LO_DS +#undef R_PPC64_TOC16_DS +#undef R_PPC64_TOC16_LO_DS +#undef R_PPC64_TLS +#undef R_PPC64_DTPMOD64 +#undef R_PPC64_TPREL16 +#undef R_PPC64_TPREL16_LO +#undef R_PPC64_TPREL16_HI +#undef R_PPC64_TPREL16_HA +#undef R_PPC64_TPREL64 +#undef R_PPC64_DTPREL16 +#undef R_PPC64_DTPREL16_LO +#undef R_PPC64_DTPREL16_HI +#undef R_PPC64_DTPREL16_HA +#undef R_PPC64_DTPREL64 +#undef R_PPC64_GOT_TLSGD16 +#undef R_PPC64_GOT_TLSGD16_LO +#undef R_PPC64_GOT_TLSGD16_HI +#undef R_PPC64_GOT_TLSGD16_HA +#undef R_PPC64_GOT_TLSLD16 +#undef R_PPC64_GOT_TLSLD16_LO +#undef R_PPC64_GOT_TLSLD16_HI +#undef R_PPC64_GOT_TLSLD16_HA +#undef R_PPC64_GOT_TPREL16_DS +#undef R_PPC64_GOT_TPREL16_LO_DS +#undef R_PPC64_GOT_TPREL16_HI +#undef R_PPC64_GOT_TPREL16_HA +#undef R_PPC64_GOT_DTPREL16_DS +#undef R_PPC64_GOT_DTPREL16_LO_DS +#undef R_PPC64_GOT_DTPREL16_HI +#undef R_PPC64_GOT_DTPREL16_HA +#undef R_PPC64_TPREL16_DS +#undef R_PPC64_TPREL16_LO_DS +#undef R_PPC64_TPREL16_HIGHER +#undef R_PPC64_TPREL16_HIGHERA +#undef R_PPC64_TPREL16_HIGHEST +#undef R_PPC64_TPREL16_HIGHESTA +#undef R_PPC64_DTPREL16_DS +#undef R_PPC64_DTPREL16_LO_DS +#undef R_PPC64_DTPREL16_HIGHER +#undef R_PPC64_DTPREL16_HIGHERA +#undef R_PPC64_DTPREL16_HIGHEST +#undef R_PPC64_DTPREL16_HIGHESTA +#undef R_PPC64_TLSGD +#undef R_PPC64_TLSLD +#undef R_PPC64_REL16 +#undef R_PPC64_REL16_LO +#undef R_PPC64_REL16_HI +#undef R_PPC64_REL16_HA + +ELF_RELOC(R_PPC64_NONE, 0) +ELF_RELOC(R_PPC64_ADDR32, 1) +ELF_RELOC(R_PPC64_ADDR24, 2) +ELF_RELOC(R_PPC64_ADDR16, 3) +ELF_RELOC(R_PPC64_ADDR16_LO, 4) +ELF_RELOC(R_PPC64_ADDR16_HI, 5) +ELF_RELOC(R_PPC64_ADDR16_HA, 6) +ELF_RELOC(R_PPC64_ADDR14, 7) +ELF_RELOC(R_PPC64_ADDR14_BRTAKEN, 8) +ELF_RELOC(R_PPC64_ADDR14_BRNTAKEN, 9) +ELF_RELOC(R_PPC64_REL24, 10) +ELF_RELOC(R_PPC64_REL14, 11) +ELF_RELOC(R_PPC64_REL14_BRTAKEN, 12) +ELF_RELOC(R_PPC64_REL14_BRNTAKEN, 13) +ELF_RELOC(R_PPC64_GOT16, 14) +ELF_RELOC(R_PPC64_GOT16_LO, 15) +ELF_RELOC(R_PPC64_GOT16_HI, 16) +ELF_RELOC(R_PPC64_GOT16_HA, 17) +ELF_RELOC(R_PPC64_GLOB_DAT, 20) +ELF_RELOC(R_PPC64_JMP_SLOT, 21) +ELF_RELOC(R_PPC64_RELATIVE, 22) +ELF_RELOC(R_PPC64_REL32, 26) +ELF_RELOC(R_PPC64_ADDR64, 38) +ELF_RELOC(R_PPC64_ADDR16_HIGHER, 39) +ELF_RELOC(R_PPC64_ADDR16_HIGHERA, 40) +ELF_RELOC(R_PPC64_ADDR16_HIGHEST, 41) +ELF_RELOC(R_PPC64_ADDR16_HIGHESTA, 42) +ELF_RELOC(R_PPC64_REL64, 44) +ELF_RELOC(R_PPC64_TOC16, 47) +ELF_RELOC(R_PPC64_TOC16_LO, 48) +ELF_RELOC(R_PPC64_TOC16_HI, 49) +ELF_RELOC(R_PPC64_TOC16_HA, 50) +ELF_RELOC(R_PPC64_TOC, 51) +ELF_RELOC(R_PPC64_ADDR16_DS, 56) +ELF_RELOC(R_PPC64_ADDR16_LO_DS, 57) +ELF_RELOC(R_PPC64_GOT16_DS, 58) +ELF_RELOC(R_PPC64_GOT16_LO_DS, 59) +ELF_RELOC(R_PPC64_TOC16_DS, 63) +ELF_RELOC(R_PPC64_TOC16_LO_DS, 64) +ELF_RELOC(R_PPC64_TLS, 67) +ELF_RELOC(R_PPC64_DTPMOD64, 68) +ELF_RELOC(R_PPC64_TPREL16, 69) +ELF_RELOC(R_PPC64_TPREL16_LO, 70) +ELF_RELOC(R_PPC64_TPREL16_HI, 71) +ELF_RELOC(R_PPC64_TPREL16_HA, 72) +ELF_RELOC(R_PPC64_TPREL64, 73) +ELF_RELOC(R_PPC64_DTPREL16, 74) +ELF_RELOC(R_PPC64_DTPREL16_LO, 75) +ELF_RELOC(R_PPC64_DTPREL16_HI, 76) +ELF_RELOC(R_PPC64_DTPREL16_HA, 77) +ELF_RELOC(R_PPC64_DTPREL64, 78) +ELF_RELOC(R_PPC64_GOT_TLSGD16, 79) +ELF_RELOC(R_PPC64_GOT_TLSGD16_LO, 80) +ELF_RELOC(R_PPC64_GOT_TLSGD16_HI, 81) +ELF_RELOC(R_PPC64_GOT_TLSGD16_HA, 82) +ELF_RELOC(R_PPC64_GOT_TLSLD16, 83) +ELF_RELOC(R_PPC64_GOT_TLSLD16_LO, 84) +ELF_RELOC(R_PPC64_GOT_TLSLD16_HI, 85) +ELF_RELOC(R_PPC64_GOT_TLSLD16_HA, 86) +ELF_RELOC(R_PPC64_GOT_TPREL16_DS, 87) +ELF_RELOC(R_PPC64_GOT_TPREL16_LO_DS, 88) +ELF_RELOC(R_PPC64_GOT_TPREL16_HI, 89) +ELF_RELOC(R_PPC64_GOT_TPREL16_HA, 90) +ELF_RELOC(R_PPC64_GOT_DTPREL16_DS, 91) +ELF_RELOC(R_PPC64_GOT_DTPREL16_LO_DS, 92) +ELF_RELOC(R_PPC64_GOT_DTPREL16_HI, 93) +ELF_RELOC(R_PPC64_GOT_DTPREL16_HA, 94) +ELF_RELOC(R_PPC64_TPREL16_DS, 95) +ELF_RELOC(R_PPC64_TPREL16_LO_DS, 96) +ELF_RELOC(R_PPC64_TPREL16_HIGHER, 97) +ELF_RELOC(R_PPC64_TPREL16_HIGHERA, 98) +ELF_RELOC(R_PPC64_TPREL16_HIGHEST, 99) +ELF_RELOC(R_PPC64_TPREL16_HIGHESTA, 100) +ELF_RELOC(R_PPC64_DTPREL16_DS, 101) +ELF_RELOC(R_PPC64_DTPREL16_LO_DS, 102) +ELF_RELOC(R_PPC64_DTPREL16_HIGHER, 103) +ELF_RELOC(R_PPC64_DTPREL16_HIGHERA, 104) +ELF_RELOC(R_PPC64_DTPREL16_HIGHEST, 105) +ELF_RELOC(R_PPC64_DTPREL16_HIGHESTA, 106) +ELF_RELOC(R_PPC64_TLSGD, 107) +ELF_RELOC(R_PPC64_TLSLD, 108) +ELF_RELOC(R_PPC64_REL16, 249) +ELF_RELOC(R_PPC64_REL16_LO, 250) +ELF_RELOC(R_PPC64_REL16_HI, 251) +ELF_RELOC(R_PPC64_REL16_HA, 252) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/RISCV.def new file mode 100644 index 0000000000000..9ec4955d26dba --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/RISCV.def @@ -0,0 +1,50 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_RISCV_NONE, 0) +ELF_RELOC(R_RISCV_32, 1) +ELF_RELOC(R_RISCV_64, 2) +ELF_RELOC(R_RISCV_RELATIVE, 3) +ELF_RELOC(R_RISCV_COPY, 4) +ELF_RELOC(R_RISCV_JUMP_SLOT, 5) +ELF_RELOC(R_RISCV_TLS_DTPMOD32, 6) +ELF_RELOC(R_RISCV_TLS_DTPMOD64, 7) +ELF_RELOC(R_RISCV_TLS_DTPREL32, 8) +ELF_RELOC(R_RISCV_TLS_DTPREL64, 9) +ELF_RELOC(R_RISCV_TLS_TPREL32, 10) +ELF_RELOC(R_RISCV_TLS_TPREL64, 11) +ELF_RELOC(R_RISCV_BRANCH, 16) +ELF_RELOC(R_RISCV_JAL, 17) +ELF_RELOC(R_RISCV_CALL, 18) +ELF_RELOC(R_RISCV_CALL_PLT, 19) +ELF_RELOC(R_RISCV_GOT_HI20, 20) +ELF_RELOC(R_RISCV_TLS_GOT_HI20, 21) +ELF_RELOC(R_RISCV_TLS_GD_HI20, 22) +ELF_RELOC(R_RISCV_PCREL_HI20, 23) +ELF_RELOC(R_RISCV_PCREL_LO12_I, 24) +ELF_RELOC(R_RISCV_PCREL_LO12_S, 25) +ELF_RELOC(R_RISCV_HI20, 26) +ELF_RELOC(R_RISCV_LO12_I, 27) +ELF_RELOC(R_RISCV_LO12_S, 28) +ELF_RELOC(R_RISCV_TPREL_HI20, 29) +ELF_RELOC(R_RISCV_TPREL_LO12_I, 30) +ELF_RELOC(R_RISCV_TPREL_LO12_S, 31) +ELF_RELOC(R_RISCV_TPREL_ADD, 32) +ELF_RELOC(R_RISCV_ADD8, 33) +ELF_RELOC(R_RISCV_ADD16, 34) +ELF_RELOC(R_RISCV_ADD32, 35) +ELF_RELOC(R_RISCV_ADD64, 36) +ELF_RELOC(R_RISCV_SUB8, 37) +ELF_RELOC(R_RISCV_SUB16, 38) +ELF_RELOC(R_RISCV_SUB32, 39) +ELF_RELOC(R_RISCV_SUB64, 40) +ELF_RELOC(R_RISCV_GNU_VTINHERIT, 41) +ELF_RELOC(R_RISCV_GNU_VTENTRY, 42) +ELF_RELOC(R_RISCV_ALIGN, 43) +ELF_RELOC(R_RISCV_RVC_BRANCH, 44) +ELF_RELOC(R_RISCV_RVC_JUMP, 45) +ELF_RELOC(R_RISCV_RVC_LUI, 46) +ELF_RELOC(R_RISCV_GPREL_I, 47) +ELF_RELOC(R_RISCV_GPREL_S, 48) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Sparc.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Sparc.def new file mode 100644 index 0000000000000..7e01a4a8a0a06 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/Sparc.def @@ -0,0 +1,89 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_SPARC_NONE, 0) +ELF_RELOC(R_SPARC_8, 1) +ELF_RELOC(R_SPARC_16, 2) +ELF_RELOC(R_SPARC_32, 3) +ELF_RELOC(R_SPARC_DISP8, 4) +ELF_RELOC(R_SPARC_DISP16, 5) +ELF_RELOC(R_SPARC_DISP32, 6) +ELF_RELOC(R_SPARC_WDISP30, 7) +ELF_RELOC(R_SPARC_WDISP22, 8) +ELF_RELOC(R_SPARC_HI22, 9) +ELF_RELOC(R_SPARC_22, 10) +ELF_RELOC(R_SPARC_13, 11) +ELF_RELOC(R_SPARC_LO10, 12) +ELF_RELOC(R_SPARC_GOT10, 13) +ELF_RELOC(R_SPARC_GOT13, 14) +ELF_RELOC(R_SPARC_GOT22, 15) +ELF_RELOC(R_SPARC_PC10, 16) +ELF_RELOC(R_SPARC_PC22, 17) +ELF_RELOC(R_SPARC_WPLT30, 18) +ELF_RELOC(R_SPARC_COPY, 19) +ELF_RELOC(R_SPARC_GLOB_DAT, 20) +ELF_RELOC(R_SPARC_JMP_SLOT, 21) +ELF_RELOC(R_SPARC_RELATIVE, 22) +ELF_RELOC(R_SPARC_UA32, 23) +ELF_RELOC(R_SPARC_PLT32, 24) +ELF_RELOC(R_SPARC_HIPLT22, 25) +ELF_RELOC(R_SPARC_LOPLT10, 26) +ELF_RELOC(R_SPARC_PCPLT32, 27) +ELF_RELOC(R_SPARC_PCPLT22, 28) +ELF_RELOC(R_SPARC_PCPLT10, 29) +ELF_RELOC(R_SPARC_10, 30) +ELF_RELOC(R_SPARC_11, 31) +ELF_RELOC(R_SPARC_64, 32) +ELF_RELOC(R_SPARC_OLO10, 33) +ELF_RELOC(R_SPARC_HH22, 34) +ELF_RELOC(R_SPARC_HM10, 35) +ELF_RELOC(R_SPARC_LM22, 36) +ELF_RELOC(R_SPARC_PC_HH22, 37) +ELF_RELOC(R_SPARC_PC_HM10, 38) +ELF_RELOC(R_SPARC_PC_LM22, 39) +ELF_RELOC(R_SPARC_WDISP16, 40) +ELF_RELOC(R_SPARC_WDISP19, 41) +ELF_RELOC(R_SPARC_7, 43) +ELF_RELOC(R_SPARC_5, 44) +ELF_RELOC(R_SPARC_6, 45) +ELF_RELOC(R_SPARC_DISP64, 46) +ELF_RELOC(R_SPARC_PLT64, 47) +ELF_RELOC(R_SPARC_HIX22, 48) +ELF_RELOC(R_SPARC_LOX10, 49) +ELF_RELOC(R_SPARC_H44, 50) +ELF_RELOC(R_SPARC_M44, 51) +ELF_RELOC(R_SPARC_L44, 52) +ELF_RELOC(R_SPARC_REGISTER, 53) +ELF_RELOC(R_SPARC_UA64, 54) +ELF_RELOC(R_SPARC_UA16, 55) +ELF_RELOC(R_SPARC_TLS_GD_HI22, 56) +ELF_RELOC(R_SPARC_TLS_GD_LO10, 57) +ELF_RELOC(R_SPARC_TLS_GD_ADD, 58) +ELF_RELOC(R_SPARC_TLS_GD_CALL, 59) +ELF_RELOC(R_SPARC_TLS_LDM_HI22, 60) +ELF_RELOC(R_SPARC_TLS_LDM_LO10, 61) +ELF_RELOC(R_SPARC_TLS_LDM_ADD, 62) +ELF_RELOC(R_SPARC_TLS_LDM_CALL, 63) +ELF_RELOC(R_SPARC_TLS_LDO_HIX22, 64) +ELF_RELOC(R_SPARC_TLS_LDO_LOX10, 65) +ELF_RELOC(R_SPARC_TLS_LDO_ADD, 66) +ELF_RELOC(R_SPARC_TLS_IE_HI22, 67) +ELF_RELOC(R_SPARC_TLS_IE_LO10, 68) +ELF_RELOC(R_SPARC_TLS_IE_LD, 69) +ELF_RELOC(R_SPARC_TLS_IE_LDX, 70) +ELF_RELOC(R_SPARC_TLS_IE_ADD, 71) +ELF_RELOC(R_SPARC_TLS_LE_HIX22, 72) +ELF_RELOC(R_SPARC_TLS_LE_LOX10, 73) +ELF_RELOC(R_SPARC_TLS_DTPMOD32, 74) +ELF_RELOC(R_SPARC_TLS_DTPMOD64, 75) +ELF_RELOC(R_SPARC_TLS_DTPOFF32, 76) +ELF_RELOC(R_SPARC_TLS_DTPOFF64, 77) +ELF_RELOC(R_SPARC_TLS_TPOFF32, 78) +ELF_RELOC(R_SPARC_TLS_TPOFF64, 79) +ELF_RELOC(R_SPARC_GOTDATA_HIX22, 80) +ELF_RELOC(R_SPARC_GOTDATA_LOX10, 81) +ELF_RELOC(R_SPARC_GOTDATA_OP_HIX22, 82) +ELF_RELOC(R_SPARC_GOTDATA_OP_LOX10, 83) +ELF_RELOC(R_SPARC_GOTDATA_OP, 84) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/SystemZ.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/SystemZ.def new file mode 100644 index 0000000000000..d6c0b79d40abe --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/SystemZ.def @@ -0,0 +1,71 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_390_NONE, 0) +ELF_RELOC(R_390_8, 1) +ELF_RELOC(R_390_12, 2) +ELF_RELOC(R_390_16, 3) +ELF_RELOC(R_390_32, 4) +ELF_RELOC(R_390_PC32, 5) +ELF_RELOC(R_390_GOT12, 6) +ELF_RELOC(R_390_GOT32, 7) +ELF_RELOC(R_390_PLT32, 8) +ELF_RELOC(R_390_COPY, 9) +ELF_RELOC(R_390_GLOB_DAT, 10) +ELF_RELOC(R_390_JMP_SLOT, 11) +ELF_RELOC(R_390_RELATIVE, 12) +ELF_RELOC(R_390_GOTOFF, 13) +ELF_RELOC(R_390_GOTPC, 14) +ELF_RELOC(R_390_GOT16, 15) +ELF_RELOC(R_390_PC16, 16) +ELF_RELOC(R_390_PC16DBL, 17) +ELF_RELOC(R_390_PLT16DBL, 18) +ELF_RELOC(R_390_PC32DBL, 19) +ELF_RELOC(R_390_PLT32DBL, 20) +ELF_RELOC(R_390_GOTPCDBL, 21) +ELF_RELOC(R_390_64, 22) +ELF_RELOC(R_390_PC64, 23) +ELF_RELOC(R_390_GOT64, 24) +ELF_RELOC(R_390_PLT64, 25) +ELF_RELOC(R_390_GOTENT, 26) +ELF_RELOC(R_390_GOTOFF16, 27) +ELF_RELOC(R_390_GOTOFF64, 28) +ELF_RELOC(R_390_GOTPLT12, 29) +ELF_RELOC(R_390_GOTPLT16, 30) +ELF_RELOC(R_390_GOTPLT32, 31) +ELF_RELOC(R_390_GOTPLT64, 32) +ELF_RELOC(R_390_GOTPLTENT, 33) +ELF_RELOC(R_390_PLTOFF16, 34) +ELF_RELOC(R_390_PLTOFF32, 35) +ELF_RELOC(R_390_PLTOFF64, 36) +ELF_RELOC(R_390_TLS_LOAD, 37) +ELF_RELOC(R_390_TLS_GDCALL, 38) +ELF_RELOC(R_390_TLS_LDCALL, 39) +ELF_RELOC(R_390_TLS_GD32, 40) +ELF_RELOC(R_390_TLS_GD64, 41) +ELF_RELOC(R_390_TLS_GOTIE12, 42) +ELF_RELOC(R_390_TLS_GOTIE32, 43) +ELF_RELOC(R_390_TLS_GOTIE64, 44) +ELF_RELOC(R_390_TLS_LDM32, 45) +ELF_RELOC(R_390_TLS_LDM64, 46) +ELF_RELOC(R_390_TLS_IE32, 47) +ELF_RELOC(R_390_TLS_IE64, 48) +ELF_RELOC(R_390_TLS_IEENT, 49) +ELF_RELOC(R_390_TLS_LE32, 50) +ELF_RELOC(R_390_TLS_LE64, 51) +ELF_RELOC(R_390_TLS_LDO32, 52) +ELF_RELOC(R_390_TLS_LDO64, 53) +ELF_RELOC(R_390_TLS_DTPMOD, 54) +ELF_RELOC(R_390_TLS_DTPOFF, 55) +ELF_RELOC(R_390_TLS_TPOFF, 56) +ELF_RELOC(R_390_20, 57) +ELF_RELOC(R_390_GOT20, 58) +ELF_RELOC(R_390_GOTPLT20, 59) +ELF_RELOC(R_390_TLS_GOTIE20, 60) +ELF_RELOC(R_390_IRELATIVE, 61) +ELF_RELOC(R_390_PC12DBL, 62) +ELF_RELOC(R_390_PLT12DBL, 63) +ELF_RELOC(R_390_PC24DBL, 64) +ELF_RELOC(R_390_PLT24DBL, 65) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def new file mode 100644 index 0000000000000..9a34349efb969 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def @@ -0,0 +1,8 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_WEBASSEMBLY_NONE, 0) +ELF_RELOC(R_WEBASSEMBLY_DATA, 1) +ELF_RELOC(R_WEBASSEMBLY_FUNCTION, 2) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/i386.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/i386.def new file mode 100644 index 0000000000000..1d28cf595cd5c --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/i386.def @@ -0,0 +1,47 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// TODO: this is just a subset +ELF_RELOC(R_386_NONE, 0) +ELF_RELOC(R_386_32, 1) +ELF_RELOC(R_386_PC32, 2) +ELF_RELOC(R_386_GOT32, 3) +ELF_RELOC(R_386_PLT32, 4) +ELF_RELOC(R_386_COPY, 5) +ELF_RELOC(R_386_GLOB_DAT, 6) +ELF_RELOC(R_386_JUMP_SLOT, 7) +ELF_RELOC(R_386_RELATIVE, 8) +ELF_RELOC(R_386_GOTOFF, 9) +ELF_RELOC(R_386_GOTPC, 10) +ELF_RELOC(R_386_32PLT, 11) +ELF_RELOC(R_386_TLS_TPOFF, 14) +ELF_RELOC(R_386_TLS_IE, 15) +ELF_RELOC(R_386_TLS_GOTIE, 16) +ELF_RELOC(R_386_TLS_LE, 17) +ELF_RELOC(R_386_TLS_GD, 18) +ELF_RELOC(R_386_TLS_LDM, 19) +ELF_RELOC(R_386_16, 20) +ELF_RELOC(R_386_PC16, 21) +ELF_RELOC(R_386_8, 22) +ELF_RELOC(R_386_PC8, 23) +ELF_RELOC(R_386_TLS_GD_32, 24) +ELF_RELOC(R_386_TLS_GD_PUSH, 25) +ELF_RELOC(R_386_TLS_GD_CALL, 26) +ELF_RELOC(R_386_TLS_GD_POP, 27) +ELF_RELOC(R_386_TLS_LDM_32, 28) +ELF_RELOC(R_386_TLS_LDM_PUSH, 29) +ELF_RELOC(R_386_TLS_LDM_CALL, 30) +ELF_RELOC(R_386_TLS_LDM_POP, 31) +ELF_RELOC(R_386_TLS_LDO_32, 32) +ELF_RELOC(R_386_TLS_IE_32, 33) +ELF_RELOC(R_386_TLS_LE_32, 34) +ELF_RELOC(R_386_TLS_DTPMOD32, 35) +ELF_RELOC(R_386_TLS_DTPOFF32, 36) +ELF_RELOC(R_386_TLS_TPOFF32, 37) +ELF_RELOC(R_386_TLS_GOTDESC, 39) +ELF_RELOC(R_386_TLS_DESC_CALL, 40) +ELF_RELOC(R_386_TLS_DESC, 41) +ELF_RELOC(R_386_IRELATIVE, 42) +ELF_RELOC(R_386_GOT32X, 43) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/x86_64.def b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/x86_64.def new file mode 100644 index 0000000000000..18fdcf9472dc4 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/ELFRelocs/x86_64.def @@ -0,0 +1,45 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_X86_64_NONE, 0) +ELF_RELOC(R_X86_64_64, 1) +ELF_RELOC(R_X86_64_PC32, 2) +ELF_RELOC(R_X86_64_GOT32, 3) +ELF_RELOC(R_X86_64_PLT32, 4) +ELF_RELOC(R_X86_64_COPY, 5) +ELF_RELOC(R_X86_64_GLOB_DAT, 6) +ELF_RELOC(R_X86_64_JUMP_SLOT, 7) +ELF_RELOC(R_X86_64_RELATIVE, 8) +ELF_RELOC(R_X86_64_GOTPCREL, 9) +ELF_RELOC(R_X86_64_32, 10) +ELF_RELOC(R_X86_64_32S, 11) +ELF_RELOC(R_X86_64_16, 12) +ELF_RELOC(R_X86_64_PC16, 13) +ELF_RELOC(R_X86_64_8, 14) +ELF_RELOC(R_X86_64_PC8, 15) +ELF_RELOC(R_X86_64_DTPMOD64, 16) +ELF_RELOC(R_X86_64_DTPOFF64, 17) +ELF_RELOC(R_X86_64_TPOFF64, 18) +ELF_RELOC(R_X86_64_TLSGD, 19) +ELF_RELOC(R_X86_64_TLSLD, 20) +ELF_RELOC(R_X86_64_DTPOFF32, 21) +ELF_RELOC(R_X86_64_GOTTPOFF, 22) +ELF_RELOC(R_X86_64_TPOFF32, 23) +ELF_RELOC(R_X86_64_PC64, 24) +ELF_RELOC(R_X86_64_GOTOFF64, 25) +ELF_RELOC(R_X86_64_GOTPC32, 26) +ELF_RELOC(R_X86_64_GOT64, 27) +ELF_RELOC(R_X86_64_GOTPCREL64, 28) +ELF_RELOC(R_X86_64_GOTPC64, 29) +ELF_RELOC(R_X86_64_GOTPLT64, 30) +ELF_RELOC(R_X86_64_PLTOFF64, 31) +ELF_RELOC(R_X86_64_SIZE32, 32) +ELF_RELOC(R_X86_64_SIZE64, 33) +ELF_RELOC(R_X86_64_GOTPC32_TLSDESC, 34) +ELF_RELOC(R_X86_64_TLSDESC_CALL, 35) +ELF_RELOC(R_X86_64_TLSDESC, 36) +ELF_RELOC(R_X86_64_IRELATIVE, 37) +ELF_RELOC(R_X86_64_GOTPCRELX, 41) +ELF_RELOC(R_X86_64_REX_GOTPCRELX, 42) diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/MachO.def b/interpreter/llvm/src/include/llvm/BinaryFormat/MachO.def new file mode 100644 index 0000000000000..95de48d2b19eb --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/MachO.def @@ -0,0 +1,120 @@ +//,,,-- llvm/Support/MachO.def - The MachO file definitions -----*- C++ -*-,,,// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//,,,----------------------------------------------------------------------,,,// +// +// Definitions for MachO files +// +//,,,----------------------------------------------------------------------,,,// + +#ifdef HANDLE_LOAD_COMMAND + +HANDLE_LOAD_COMMAND(LC_SEGMENT, 0x00000001u, segment_command) +HANDLE_LOAD_COMMAND(LC_SYMTAB, 0x00000002u, symtab_command) +// LC_SYMSEG is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_SYMSEG, 0x00000003u, symseg_command) +HANDLE_LOAD_COMMAND(LC_THREAD, 0x00000004u, thread_command) +HANDLE_LOAD_COMMAND(LC_UNIXTHREAD, 0x00000005u, thread_command) +// LC_LOADFVMLIB is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_LOADFVMLIB, 0x00000006u, fvmlib_command) +// LC_IDFVMLIB is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_IDFVMLIB, 0x00000007u, fvmlib_command) +// LC_IDENT is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_IDENT, 0x00000008u, ident_command) +// LC_FVMFILE is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_FVMFILE, 0x00000009u, fvmfile_command) +// LC_PREPAGE is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_PREPAGE, 0x0000000Au, load_command) +HANDLE_LOAD_COMMAND(LC_DYSYMTAB, 0x0000000Bu, dysymtab_command) +HANDLE_LOAD_COMMAND(LC_LOAD_DYLIB, 0x0000000Cu, dylib_command) +HANDLE_LOAD_COMMAND(LC_ID_DYLIB, 0x0000000Du, dylib_command) +HANDLE_LOAD_COMMAND(LC_LOAD_DYLINKER, 0x0000000Eu, dylinker_command) +HANDLE_LOAD_COMMAND(LC_ID_DYLINKER, 0x0000000Fu, dylinker_command) +// LC_PREBOUND_DYLIB is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_PREBOUND_DYLIB, 0x00000010u, prebound_dylib_command) +HANDLE_LOAD_COMMAND(LC_ROUTINES, 0x00000011u, routines_command) +HANDLE_LOAD_COMMAND(LC_SUB_FRAMEWORK, 0x00000012u, sub_framework_command) +HANDLE_LOAD_COMMAND(LC_SUB_UMBRELLA, 0x00000013u, sub_umbrella_command) +HANDLE_LOAD_COMMAND(LC_SUB_CLIENT, 0x00000014u, sub_client_command) +HANDLE_LOAD_COMMAND(LC_SUB_LIBRARY, 0x00000015u, sub_library_command) +// LC_TWOLEVEL_HINTS is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_TWOLEVEL_HINTS, 0x00000016u, twolevel_hints_command) +// LC_PREBIND_CKSUM is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_PREBIND_CKSUM, 0x00000017u, prebind_cksum_command) +// LC_LOAD_WEAK_DYLIB is obsolete and no longer supported. +HANDLE_LOAD_COMMAND(LC_LOAD_WEAK_DYLIB, 0x80000018u, dylib_command) +HANDLE_LOAD_COMMAND(LC_SEGMENT_64, 0x00000019u, segment_command_64) +HANDLE_LOAD_COMMAND(LC_ROUTINES_64, 0x0000001Au, routines_command_64) +HANDLE_LOAD_COMMAND(LC_UUID, 0x0000001Bu, uuid_command) +HANDLE_LOAD_COMMAND(LC_RPATH, 0x8000001Cu, rpath_command) +HANDLE_LOAD_COMMAND(LC_CODE_SIGNATURE, 0x0000001Du, linkedit_data_command) +HANDLE_LOAD_COMMAND(LC_SEGMENT_SPLIT_INFO, 0x0000001Eu, linkedit_data_command) +HANDLE_LOAD_COMMAND(LC_REEXPORT_DYLIB, 0x8000001Fu, dylib_command) +HANDLE_LOAD_COMMAND(LC_LAZY_LOAD_DYLIB, 0x00000020u, dylib_command) +HANDLE_LOAD_COMMAND(LC_ENCRYPTION_INFO, 0x00000021u, encryption_info_command) +HANDLE_LOAD_COMMAND(LC_DYLD_INFO, 0x00000022u, dyld_info_command) +HANDLE_LOAD_COMMAND(LC_DYLD_INFO_ONLY, 0x80000022u, dyld_info_command) +HANDLE_LOAD_COMMAND(LC_LOAD_UPWARD_DYLIB, 0x80000023u, dylib_command) +HANDLE_LOAD_COMMAND(LC_VERSION_MIN_MACOSX, 0x00000024u, version_min_command) +HANDLE_LOAD_COMMAND(LC_VERSION_MIN_IPHONEOS, 0x00000025u, version_min_command) +HANDLE_LOAD_COMMAND(LC_FUNCTION_STARTS, 0x00000026u, linkedit_data_command) +HANDLE_LOAD_COMMAND(LC_DYLD_ENVIRONMENT, 0x00000027u, dylinker_command) +HANDLE_LOAD_COMMAND(LC_MAIN, 0x80000028u, entry_point_command) +HANDLE_LOAD_COMMAND(LC_DATA_IN_CODE, 0x00000029u, linkedit_data_command) +HANDLE_LOAD_COMMAND(LC_SOURCE_VERSION, 0x0000002Au, source_version_command) +HANDLE_LOAD_COMMAND(LC_DYLIB_CODE_SIGN_DRS, 0x0000002Bu, linkedit_data_command) +HANDLE_LOAD_COMMAND(LC_ENCRYPTION_INFO_64, 0x0000002Cu, + encryption_info_command_64) +HANDLE_LOAD_COMMAND(LC_LINKER_OPTION, 0x0000002Du, linker_option_command) +HANDLE_LOAD_COMMAND(LC_LINKER_OPTIMIZATION_HINT, 0x0000002Eu, linkedit_data_command) +HANDLE_LOAD_COMMAND(LC_VERSION_MIN_TVOS, 0x0000002Fu, version_min_command) +HANDLE_LOAD_COMMAND(LC_VERSION_MIN_WATCHOS, 0x00000030u, version_min_command) +HANDLE_LOAD_COMMAND(LC_NOTE, 0x00000031u, note_command) +HANDLE_LOAD_COMMAND(LC_BUILD_VERSION, 0x00000032u, build_version_command) + +#endif + +#ifdef LOAD_COMMAND_STRUCT + +LOAD_COMMAND_STRUCT(dyld_info_command) +LOAD_COMMAND_STRUCT(dylib_command) +LOAD_COMMAND_STRUCT(dylinker_command) +LOAD_COMMAND_STRUCT(dysymtab_command) +LOAD_COMMAND_STRUCT(encryption_info_command) +LOAD_COMMAND_STRUCT(encryption_info_command_64) +LOAD_COMMAND_STRUCT(entry_point_command) +LOAD_COMMAND_STRUCT(fvmfile_command) +LOAD_COMMAND_STRUCT(fvmlib_command) +LOAD_COMMAND_STRUCT(ident_command) +LOAD_COMMAND_STRUCT(linkedit_data_command) +LOAD_COMMAND_STRUCT(linker_option_command) +LOAD_COMMAND_STRUCT(load_command) +LOAD_COMMAND_STRUCT(prebind_cksum_command) +LOAD_COMMAND_STRUCT(prebound_dylib_command) +LOAD_COMMAND_STRUCT(routines_command) +LOAD_COMMAND_STRUCT(routines_command_64) +LOAD_COMMAND_STRUCT(rpath_command) +LOAD_COMMAND_STRUCT(segment_command) +LOAD_COMMAND_STRUCT(segment_command_64) +LOAD_COMMAND_STRUCT(source_version_command) +LOAD_COMMAND_STRUCT(sub_client_command) +LOAD_COMMAND_STRUCT(sub_framework_command) +LOAD_COMMAND_STRUCT(sub_library_command) +LOAD_COMMAND_STRUCT(sub_umbrella_command) +LOAD_COMMAND_STRUCT(symseg_command) +LOAD_COMMAND_STRUCT(symtab_command) +LOAD_COMMAND_STRUCT(thread_command) +LOAD_COMMAND_STRUCT(twolevel_hints_command) +LOAD_COMMAND_STRUCT(uuid_command) +LOAD_COMMAND_STRUCT(version_min_command) +LOAD_COMMAND_STRUCT(note_command) +LOAD_COMMAND_STRUCT(build_version_command) + +#endif + +#undef HANDLE_LOAD_COMMAND +#undef LOAD_COMMAND_STRUCT diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/MachO.h b/interpreter/llvm/src/include/llvm/BinaryFormat/MachO.h new file mode 100644 index 0000000000000..3529c72acd9d6 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/MachO.h @@ -0,0 +1,1985 @@ +//===-- llvm/BinaryFormat/MachO.h - The MachO file format -------*- C++/-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines manifest constants for the MachO object file format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_MACHO_H +#define LLVM_BINARYFORMAT_MACHO_H + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Host.h" + +namespace llvm { +namespace MachO { +// Enums from +enum : uint32_t { + // Constants for the "magic" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_MAGIC = 0xFEEDFACEu, + MH_CIGAM = 0xCEFAEDFEu, + MH_MAGIC_64 = 0xFEEDFACFu, + MH_CIGAM_64 = 0xCFFAEDFEu, + FAT_MAGIC = 0xCAFEBABEu, + FAT_CIGAM = 0xBEBAFECAu, + FAT_MAGIC_64 = 0xCAFEBABFu, + FAT_CIGAM_64 = 0xBFBAFECAu +}; + +enum HeaderFileType { + // Constants for the "filetype" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_OBJECT = 0x1u, + MH_EXECUTE = 0x2u, + MH_FVMLIB = 0x3u, + MH_CORE = 0x4u, + MH_PRELOAD = 0x5u, + MH_DYLIB = 0x6u, + MH_DYLINKER = 0x7u, + MH_BUNDLE = 0x8u, + MH_DYLIB_STUB = 0x9u, + MH_DSYM = 0xAu, + MH_KEXT_BUNDLE = 0xBu +}; + +enum { + // Constant bits for the "flags" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_NOUNDEFS = 0x00000001u, + MH_INCRLINK = 0x00000002u, + MH_DYLDLINK = 0x00000004u, + MH_BINDATLOAD = 0x00000008u, + MH_PREBOUND = 0x00000010u, + MH_SPLIT_SEGS = 0x00000020u, + MH_LAZY_INIT = 0x00000040u, + MH_TWOLEVEL = 0x00000080u, + MH_FORCE_FLAT = 0x00000100u, + MH_NOMULTIDEFS = 0x00000200u, + MH_NOFIXPREBINDING = 0x00000400u, + MH_PREBINDABLE = 0x00000800u, + MH_ALLMODSBOUND = 0x00001000u, + MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000u, + MH_CANONICAL = 0x00004000u, + MH_WEAK_DEFINES = 0x00008000u, + MH_BINDS_TO_WEAK = 0x00010000u, + MH_ALLOW_STACK_EXECUTION = 0x00020000u, + MH_ROOT_SAFE = 0x00040000u, + MH_SETUID_SAFE = 0x00080000u, + MH_NO_REEXPORTED_DYLIBS = 0x00100000u, + MH_PIE = 0x00200000u, + MH_DEAD_STRIPPABLE_DYLIB = 0x00400000u, + MH_HAS_TLV_DESCRIPTORS = 0x00800000u, + MH_NO_HEAP_EXECUTION = 0x01000000u, + MH_APP_EXTENSION_SAFE = 0x02000000u, + MH_NLIST_OUTOFSYNC_WITH_DYLDINFO = 0x04000000u +}; + +enum : uint32_t { + // Flags for the "cmd" field in llvm::MachO::load_command + LC_REQ_DYLD = 0x80000000u +}; + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) LCName = LCValue, + +enum LoadCommandType : uint32_t { +#include "llvm/BinaryFormat/MachO.def" +}; + +#undef HANDLE_LOAD_COMMAND + +enum : uint32_t { + // Constant bits for the "flags" field in llvm::MachO::segment_command + SG_HIGHVM = 0x1u, + SG_FVMLIB = 0x2u, + SG_NORELOC = 0x4u, + SG_PROTECTED_VERSION_1 = 0x8u, + + // Constant masks for the "flags" field in llvm::MachO::section and + // llvm::MachO::section_64 + SECTION_TYPE = 0x000000ffu, // SECTION_TYPE + SECTION_ATTRIBUTES = 0xffffff00u, // SECTION_ATTRIBUTES + SECTION_ATTRIBUTES_USR = 0xff000000u, // SECTION_ATTRIBUTES_USR + SECTION_ATTRIBUTES_SYS = 0x00ffff00u // SECTION_ATTRIBUTES_SYS +}; + +/// These are the section type and attributes fields. A MachO section can +/// have only one Type, but can have any of the attributes specified. +enum SectionType : uint32_t { + // Constant masks for the "flags[7:0]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE) + + /// S_REGULAR - Regular section. + S_REGULAR = 0x00u, + /// S_ZEROFILL - Zero fill on demand section. + S_ZEROFILL = 0x01u, + /// S_CSTRING_LITERALS - Section with literal C strings. + S_CSTRING_LITERALS = 0x02u, + /// S_4BYTE_LITERALS - Section with 4 byte literals. + S_4BYTE_LITERALS = 0x03u, + /// S_8BYTE_LITERALS - Section with 8 byte literals. + S_8BYTE_LITERALS = 0x04u, + /// S_LITERAL_POINTERS - Section with pointers to literals. + S_LITERAL_POINTERS = 0x05u, + /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers. + S_NON_LAZY_SYMBOL_POINTERS = 0x06u, + /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers. + S_LAZY_SYMBOL_POINTERS = 0x07u, + /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in + /// the Reserved2 field. + S_SYMBOL_STUBS = 0x08u, + /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for + /// initialization. + S_MOD_INIT_FUNC_POINTERS = 0x09u, + /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for + /// termination. + S_MOD_TERM_FUNC_POINTERS = 0x0au, + /// S_COALESCED - Section contains symbols that are to be coalesced. + S_COALESCED = 0x0bu, + /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4 + /// gigabytes). + S_GB_ZEROFILL = 0x0cu, + /// S_INTERPOSING - Section with only pairs of function pointers for + /// interposing. + S_INTERPOSING = 0x0du, + /// S_16BYTE_LITERALS - Section with only 16 byte literals. + S_16BYTE_LITERALS = 0x0eu, + /// S_DTRACE_DOF - Section contains DTrace Object Format. + S_DTRACE_DOF = 0x0fu, + /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to + /// lazy loaded dylibs. + S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10u, + /// S_THREAD_LOCAL_REGULAR - Thread local data section. + S_THREAD_LOCAL_REGULAR = 0x11u, + /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section. + S_THREAD_LOCAL_ZEROFILL = 0x12u, + /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable + /// structure data. + S_THREAD_LOCAL_VARIABLES = 0x13u, + /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with pointers to thread + /// local structures. + S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14u, + /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local + /// variable initialization pointers to functions. + S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u, + + LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS +}; + +enum : uint32_t { + // Constant masks for the "flags[31:24]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR) + + /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine + /// instructions. + S_ATTR_PURE_INSTRUCTIONS = 0x80000000u, + /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be + /// in a ranlib table of contents. + S_ATTR_NO_TOC = 0x40000000u, + /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section + /// in files with the MY_DYLDLINK flag. + S_ATTR_STRIP_STATIC_SYMS = 0x20000000u, + /// S_ATTR_NO_DEAD_STRIP - No dead stripping. + S_ATTR_NO_DEAD_STRIP = 0x10000000u, + /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks. + S_ATTR_LIVE_SUPPORT = 0x08000000u, + /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by + /// dyld. + S_ATTR_SELF_MODIFYING_CODE = 0x04000000u, + /// S_ATTR_DEBUG - A debug section. + S_ATTR_DEBUG = 0x02000000u, + + // Constant masks for the "flags[23:8]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS) + + /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions. + S_ATTR_SOME_INSTRUCTIONS = 0x00000400u, + /// S_ATTR_EXT_RELOC - Section has external relocation entries. + S_ATTR_EXT_RELOC = 0x00000200u, + /// S_ATTR_LOC_RELOC - Section has local relocation entries. + S_ATTR_LOC_RELOC = 0x00000100u, + + // Constant masks for the value of an indirect symbol in an indirect + // symbol table + INDIRECT_SYMBOL_LOCAL = 0x80000000u, + INDIRECT_SYMBOL_ABS = 0x40000000u +}; + +enum DataRegionType { + // Constants for the "kind" field in a data_in_code_entry structure + DICE_KIND_DATA = 1u, + DICE_KIND_JUMP_TABLE8 = 2u, + DICE_KIND_JUMP_TABLE16 = 3u, + DICE_KIND_JUMP_TABLE32 = 4u, + DICE_KIND_ABS_JUMP_TABLE32 = 5u +}; + +enum RebaseType { + REBASE_TYPE_POINTER = 1u, + REBASE_TYPE_TEXT_ABSOLUTE32 = 2u, + REBASE_TYPE_TEXT_PCREL32 = 3u +}; + +enum { REBASE_OPCODE_MASK = 0xF0u, REBASE_IMMEDIATE_MASK = 0x0Fu }; + +enum RebaseOpcode { + REBASE_OPCODE_DONE = 0x00u, + REBASE_OPCODE_SET_TYPE_IMM = 0x10u, + REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x20u, + REBASE_OPCODE_ADD_ADDR_ULEB = 0x30u, + REBASE_OPCODE_ADD_ADDR_IMM_SCALED = 0x40u, + REBASE_OPCODE_DO_REBASE_IMM_TIMES = 0x50u, + REBASE_OPCODE_DO_REBASE_ULEB_TIMES = 0x60u, + REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB = 0x70u, + REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80u +}; + +enum BindType { + BIND_TYPE_POINTER = 1u, + BIND_TYPE_TEXT_ABSOLUTE32 = 2u, + BIND_TYPE_TEXT_PCREL32 = 3u +}; + +enum BindSpecialDylib { + BIND_SPECIAL_DYLIB_SELF = 0, + BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1, + BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2 +}; + +enum { + BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1u, + BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8u, + + BIND_OPCODE_MASK = 0xF0u, + BIND_IMMEDIATE_MASK = 0x0Fu +}; + +enum BindOpcode { + BIND_OPCODE_DONE = 0x00u, + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10u, + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20u, + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30u, + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40u, + BIND_OPCODE_SET_TYPE_IMM = 0x50u, + BIND_OPCODE_SET_ADDEND_SLEB = 0x60u, + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70u, + BIND_OPCODE_ADD_ADDR_ULEB = 0x80u, + BIND_OPCODE_DO_BIND = 0x90u, + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0u, + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0u, + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0u +}; + +enum { + EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u, + EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u, + EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u, + EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u +}; + +enum ExportSymbolKind { + EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u, + EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u, + EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u +}; + +enum { + // Constant masks for the "n_type" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + N_STAB = 0xe0, + N_PEXT = 0x10, + N_TYPE = 0x0e, + N_EXT = 0x01 +}; + +enum NListType : uint8_t { + // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and + // llvm::MachO::nlist_64 + N_UNDF = 0x0u, + N_ABS = 0x2u, + N_SECT = 0xeu, + N_PBUD = 0xcu, + N_INDR = 0xau +}; + +enum SectionOrdinal { + // Constants for the "n_sect" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + NO_SECT = 0u, + MAX_SECT = 0xffu +}; + +enum { + // Constant masks for the "n_desc" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + // The low 3 bits are the for the REFERENCE_TYPE. + REFERENCE_TYPE = 0x7, + REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5, + // Flag bits (some overlap with the library ordinal bits). + N_ARM_THUMB_DEF = 0x0008u, + REFERENCED_DYNAMICALLY = 0x0010u, + N_NO_DEAD_STRIP = 0x0020u, + N_WEAK_REF = 0x0040u, + N_WEAK_DEF = 0x0080u, + N_SYMBOL_RESOLVER = 0x0100u, + N_ALT_ENTRY = 0x0200u, + // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL() + // as these are in the top 8 bits. + SELF_LIBRARY_ORDINAL = 0x0, + MAX_LIBRARY_ORDINAL = 0xfd, + DYNAMIC_LOOKUP_ORDINAL = 0xfe, + EXECUTABLE_ORDINAL = 0xff +}; + +enum StabType { + // Constant values for the "n_type" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 when "(n_type & N_STAB) != 0" + N_GSYM = 0x20u, + N_FNAME = 0x22u, + N_FUN = 0x24u, + N_STSYM = 0x26u, + N_LCSYM = 0x28u, + N_BNSYM = 0x2Eu, + N_PC = 0x30u, + N_AST = 0x32u, + N_OPT = 0x3Cu, + N_RSYM = 0x40u, + N_SLINE = 0x44u, + N_ENSYM = 0x4Eu, + N_SSYM = 0x60u, + N_SO = 0x64u, + N_OSO = 0x66u, + N_LSYM = 0x80u, + N_BINCL = 0x82u, + N_SOL = 0x84u, + N_PARAMS = 0x86u, + N_VERSION = 0x88u, + N_OLEVEL = 0x8Au, + N_PSYM = 0xA0u, + N_EINCL = 0xA2u, + N_ENTRY = 0xA4u, + N_LBRAC = 0xC0u, + N_EXCL = 0xC2u, + N_RBRAC = 0xE0u, + N_BCOMM = 0xE2u, + N_ECOMM = 0xE4u, + N_ECOML = 0xE8u, + N_LENG = 0xFEu +}; + +enum : uint32_t { + // Constant values for the r_symbolnum field in an + // llvm::MachO::relocation_info structure when r_extern is 0. + R_ABS = 0, + + // Constant bits for the r_address field in an + // llvm::MachO::relocation_info structure. + R_SCATTERED = 0x80000000 +}; + +enum RelocationInfoType { + // Constant values for the r_type field in an + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + GENERIC_RELOC_VANILLA = 0, + GENERIC_RELOC_PAIR = 1, + GENERIC_RELOC_SECTDIFF = 2, + GENERIC_RELOC_PB_LA_PTR = 3, + GENERIC_RELOC_LOCAL_SECTDIFF = 4, + GENERIC_RELOC_TLV = 5, + + // Constant values for the r_type field in a PowerPC architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + PPC_RELOC_VANILLA = GENERIC_RELOC_VANILLA, + PPC_RELOC_PAIR = GENERIC_RELOC_PAIR, + PPC_RELOC_BR14 = 2, + PPC_RELOC_BR24 = 3, + PPC_RELOC_HI16 = 4, + PPC_RELOC_LO16 = 5, + PPC_RELOC_HA16 = 6, + PPC_RELOC_LO14 = 7, + PPC_RELOC_SECTDIFF = 8, + PPC_RELOC_PB_LA_PTR = 9, + PPC_RELOC_HI16_SECTDIFF = 10, + PPC_RELOC_LO16_SECTDIFF = 11, + PPC_RELOC_HA16_SECTDIFF = 12, + PPC_RELOC_JBSR = 13, + PPC_RELOC_LO14_SECTDIFF = 14, + PPC_RELOC_LOCAL_SECTDIFF = 15, + + // Constant values for the r_type field in an ARM architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + ARM_RELOC_VANILLA = GENERIC_RELOC_VANILLA, + ARM_RELOC_PAIR = GENERIC_RELOC_PAIR, + ARM_RELOC_SECTDIFF = GENERIC_RELOC_SECTDIFF, + ARM_RELOC_LOCAL_SECTDIFF = 3, + ARM_RELOC_PB_LA_PTR = 4, + ARM_RELOC_BR24 = 5, + ARM_THUMB_RELOC_BR22 = 6, + ARM_THUMB_32BIT_BRANCH = 7, // obsolete + ARM_RELOC_HALF = 8, + ARM_RELOC_HALF_SECTDIFF = 9, + + // Constant values for the r_type field in an ARM64 architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + + // For pointers. + ARM64_RELOC_UNSIGNED = 0, + // Must be followed by an ARM64_RELOC_UNSIGNED + ARM64_RELOC_SUBTRACTOR = 1, + // A B/BL instruction with 26-bit displacement. + ARM64_RELOC_BRANCH26 = 2, + // PC-rel distance to page of target. + ARM64_RELOC_PAGE21 = 3, + // Offset within page, scaled by r_length. + ARM64_RELOC_PAGEOFF12 = 4, + // PC-rel distance to page of GOT slot. + ARM64_RELOC_GOT_LOAD_PAGE21 = 5, + // Offset within page of GOT slot, scaled by r_length. + ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6, + // For pointers to GOT slots. + ARM64_RELOC_POINTER_TO_GOT = 7, + // PC-rel distance to page of TLVP slot. + ARM64_RELOC_TLVP_LOAD_PAGE21 = 8, + // Offset within page of TLVP slot, scaled by r_length. + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9, + // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. + ARM64_RELOC_ADDEND = 10, + + // Constant values for the r_type field in an x86_64 architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure + X86_64_RELOC_UNSIGNED = 0, + X86_64_RELOC_SIGNED = 1, + X86_64_RELOC_BRANCH = 2, + X86_64_RELOC_GOT_LOAD = 3, + X86_64_RELOC_GOT = 4, + X86_64_RELOC_SUBTRACTOR = 5, + X86_64_RELOC_SIGNED_1 = 6, + X86_64_RELOC_SIGNED_2 = 7, + X86_64_RELOC_SIGNED_4 = 8, + X86_64_RELOC_TLV = 9 +}; + +// Values for segment_command.initprot. +// From +enum { VM_PROT_READ = 0x1, VM_PROT_WRITE = 0x2, VM_PROT_EXECUTE = 0x4 }; + +// Values for platform field in build_version_command. +enum { + PLATFORM_MACOS = 1, + PLATFORM_IOS = 2, + PLATFORM_TVOS = 3, + PLATFORM_WATCHOS = 4, + PLATFORM_BRIDGEOS = 5 +}; + +// Values for tools enum in build_tool_version. +enum { TOOL_CLANG = 1, TOOL_SWIFT = 2, TOOL_LD = 3 }; + +// Structs from + +struct mach_header { + uint32_t magic; + uint32_t cputype; + uint32_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; +}; + +struct mach_header_64 { + uint32_t magic; + uint32_t cputype; + uint32_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; + uint32_t reserved; +}; + +struct load_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +struct segment_command { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint32_t vmaddr; + uint32_t vmsize; + uint32_t fileoff; + uint32_t filesize; + uint32_t maxprot; + uint32_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct segment_command_64 { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint64_t vmaddr; + uint64_t vmsize; + uint64_t fileoff; + uint64_t filesize; + uint32_t maxprot; + uint32_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct section { + char sectname[16]; + char segname[16]; + uint32_t addr; + uint32_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; +}; + +struct section_64 { + char sectname[16]; + char segname[16]; + uint64_t addr; + uint64_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; +}; + +struct fvmlib { + uint32_t name; + uint32_t minor_version; + uint32_t header_addr; +}; + +// The fvmlib_command is obsolete and no longer supported. +struct fvmlib_command { + uint32_t cmd; + uint32_t cmdsize; + struct fvmlib fvmlib; +}; + +struct dylib { + uint32_t name; + uint32_t timestamp; + uint32_t current_version; + uint32_t compatibility_version; +}; + +struct dylib_command { + uint32_t cmd; + uint32_t cmdsize; + struct dylib dylib; +}; + +struct sub_framework_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t umbrella; +}; + +struct sub_client_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t client; +}; + +struct sub_umbrella_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t sub_umbrella; +}; + +struct sub_library_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t sub_library; +}; + +// The prebound_dylib_command is obsolete and no longer supported. +struct prebound_dylib_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; + uint32_t nmodules; + uint32_t linked_modules; +}; + +struct dylinker_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; +}; + +struct thread_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +struct routines_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t init_address; + uint32_t init_module; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; +}; + +struct routines_command_64 { + uint32_t cmd; + uint32_t cmdsize; + uint64_t init_address; + uint64_t init_module; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t reserved5; + uint64_t reserved6; +}; + +struct symtab_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t symoff; + uint32_t nsyms; + uint32_t stroff; + uint32_t strsize; +}; + +struct dysymtab_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t iundefsym; + uint32_t nundefsym; + uint32_t tocoff; + uint32_t ntoc; + uint32_t modtaboff; + uint32_t nmodtab; + uint32_t extrefsymoff; + uint32_t nextrefsyms; + uint32_t indirectsymoff; + uint32_t nindirectsyms; + uint32_t extreloff; + uint32_t nextrel; + uint32_t locreloff; + uint32_t nlocrel; +}; + +struct dylib_table_of_contents { + uint32_t symbol_index; + uint32_t module_index; +}; + +struct dylib_module { + uint32_t module_name; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t irefsym; + uint32_t nrefsym; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextrel; + uint32_t nextrel; + uint32_t iinit_iterm; + uint32_t ninit_nterm; + uint32_t objc_module_info_addr; + uint32_t objc_module_info_size; +}; + +struct dylib_module_64 { + uint32_t module_name; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t irefsym; + uint32_t nrefsym; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextrel; + uint32_t nextrel; + uint32_t iinit_iterm; + uint32_t ninit_nterm; + uint32_t objc_module_info_size; + uint64_t objc_module_info_addr; +}; + +struct dylib_reference { + uint32_t isym : 24, flags : 8; +}; + +// The twolevel_hints_command is obsolete and no longer supported. +struct twolevel_hints_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t offset; + uint32_t nhints; +}; + +// The twolevel_hints_command is obsolete and no longer supported. +struct twolevel_hint { + uint32_t isub_image : 8, itoc : 24; +}; + +// The prebind_cksum_command is obsolete and no longer supported. +struct prebind_cksum_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cksum; +}; + +struct uuid_command { + uint32_t cmd; + uint32_t cmdsize; + uint8_t uuid[16]; +}; + +struct rpath_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t path; +}; + +struct linkedit_data_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t dataoff; + uint32_t datasize; +}; + +struct data_in_code_entry { + uint32_t offset; + uint16_t length; + uint16_t kind; +}; + +struct source_version_command { + uint32_t cmd; + uint32_t cmdsize; + uint64_t version; +}; + +struct encryption_info_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cryptoff; + uint32_t cryptsize; + uint32_t cryptid; +}; + +struct encryption_info_command_64 { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cryptoff; + uint32_t cryptsize; + uint32_t cryptid; + uint32_t pad; +}; + +struct version_min_command { + uint32_t cmd; // LC_VERSION_MIN_MACOSX or + // LC_VERSION_MIN_IPHONEOS + uint32_t cmdsize; // sizeof(struct version_min_command) + uint32_t version; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz +}; + +struct note_command { + uint32_t cmd; // LC_NOTE + uint32_t cmdsize; // sizeof(struct note_command) + char data_owner[16]; // owner name for this LC_NOTE + uint64_t offset; // file offset of this data + uint64_t size; // length of data region +}; + +struct build_tool_version { + uint32_t tool; // enum for the tool + uint32_t version; // version of the tool +}; + +struct build_version_command { + uint32_t cmd; // LC_BUILD_VERSION + uint32_t cmdsize; // sizeof(struct build_version_command) + + // ntools * sizeof(struct build_tool_version) + uint32_t platform; // platform + uint32_t minos; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t ntools; // number of tool entries following this +}; + +struct dyld_info_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t rebase_off; + uint32_t rebase_size; + uint32_t bind_off; + uint32_t bind_size; + uint32_t weak_bind_off; + uint32_t weak_bind_size; + uint32_t lazy_bind_off; + uint32_t lazy_bind_size; + uint32_t export_off; + uint32_t export_size; +}; + +struct linker_option_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t count; +}; + +// The symseg_command is obsolete and no longer supported. +struct symseg_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t offset; + uint32_t size; +}; + +// The ident_command is obsolete and no longer supported. +struct ident_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +// The fvmfile_command is obsolete and no longer supported. +struct fvmfile_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; + uint32_t header_addr; +}; + +struct tlv_descriptor_32 { + uint32_t thunk; + uint32_t key; + uint32_t offset; +}; + +struct tlv_descriptor_64 { + uint64_t thunk; + uint64_t key; + uint64_t offset; +}; + +struct tlv_descriptor { + uintptr_t thunk; + uintptr_t key; + uintptr_t offset; +}; + +struct entry_point_command { + uint32_t cmd; + uint32_t cmdsize; + uint64_t entryoff; + uint64_t stacksize; +}; + +// Structs from +struct fat_header { + uint32_t magic; + uint32_t nfat_arch; +}; + +struct fat_arch { + uint32_t cputype; + uint32_t cpusubtype; + uint32_t offset; + uint32_t size; + uint32_t align; +}; + +struct fat_arch_64 { + uint32_t cputype; + uint32_t cpusubtype; + uint64_t offset; + uint64_t size; + uint32_t align; + uint32_t reserved; +}; + +// Structs from +struct relocation_info { + int32_t r_address; + uint32_t r_symbolnum : 24, r_pcrel : 1, r_length : 2, r_extern : 1, + r_type : 4; +}; + +struct scattered_relocation_info { +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) + uint32_t r_scattered : 1, r_pcrel : 1, r_length : 2, r_type : 4, + r_address : 24; +#else + uint32_t r_address : 24, r_type : 4, r_length : 2, r_pcrel : 1, + r_scattered : 1; +#endif + int32_t r_value; +}; + +// Structs NOT from , but that make LLVM's life easier +struct any_relocation_info { + uint32_t r_word0, r_word1; +}; + +// Structs from +struct nlist_base { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; +}; + +struct nlist { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + int16_t n_desc; + uint32_t n_value; +}; + +struct nlist_64 { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; + uint64_t n_value; +}; + +// Byte order swapping functions for MachO structs + +inline void swapStruct(fat_header &mh) { + sys::swapByteOrder(mh.magic); + sys::swapByteOrder(mh.nfat_arch); +} + +inline void swapStruct(fat_arch &mh) { + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.offset); + sys::swapByteOrder(mh.size); + sys::swapByteOrder(mh.align); +} + +inline void swapStruct(fat_arch_64 &mh) { + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.offset); + sys::swapByteOrder(mh.size); + sys::swapByteOrder(mh.align); + sys::swapByteOrder(mh.reserved); +} + +inline void swapStruct(mach_header &mh) { + sys::swapByteOrder(mh.magic); + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.filetype); + sys::swapByteOrder(mh.ncmds); + sys::swapByteOrder(mh.sizeofcmds); + sys::swapByteOrder(mh.flags); +} + +inline void swapStruct(mach_header_64 &H) { + sys::swapByteOrder(H.magic); + sys::swapByteOrder(H.cputype); + sys::swapByteOrder(H.cpusubtype); + sys::swapByteOrder(H.filetype); + sys::swapByteOrder(H.ncmds); + sys::swapByteOrder(H.sizeofcmds); + sys::swapByteOrder(H.flags); + sys::swapByteOrder(H.reserved); +} + +inline void swapStruct(load_command &lc) { + sys::swapByteOrder(lc.cmd); + sys::swapByteOrder(lc.cmdsize); +} + +inline void swapStruct(symtab_command &lc) { + sys::swapByteOrder(lc.cmd); + sys::swapByteOrder(lc.cmdsize); + sys::swapByteOrder(lc.symoff); + sys::swapByteOrder(lc.nsyms); + sys::swapByteOrder(lc.stroff); + sys::swapByteOrder(lc.strsize); +} + +inline void swapStruct(segment_command_64 &seg) { + sys::swapByteOrder(seg.cmd); + sys::swapByteOrder(seg.cmdsize); + sys::swapByteOrder(seg.vmaddr); + sys::swapByteOrder(seg.vmsize); + sys::swapByteOrder(seg.fileoff); + sys::swapByteOrder(seg.filesize); + sys::swapByteOrder(seg.maxprot); + sys::swapByteOrder(seg.initprot); + sys::swapByteOrder(seg.nsects); + sys::swapByteOrder(seg.flags); +} + +inline void swapStruct(segment_command &seg) { + sys::swapByteOrder(seg.cmd); + sys::swapByteOrder(seg.cmdsize); + sys::swapByteOrder(seg.vmaddr); + sys::swapByteOrder(seg.vmsize); + sys::swapByteOrder(seg.fileoff); + sys::swapByteOrder(seg.filesize); + sys::swapByteOrder(seg.maxprot); + sys::swapByteOrder(seg.initprot); + sys::swapByteOrder(seg.nsects); + sys::swapByteOrder(seg.flags); +} + +inline void swapStruct(section_64 §) { + sys::swapByteOrder(sect.addr); + sys::swapByteOrder(sect.size); + sys::swapByteOrder(sect.offset); + sys::swapByteOrder(sect.align); + sys::swapByteOrder(sect.reloff); + sys::swapByteOrder(sect.nreloc); + sys::swapByteOrder(sect.flags); + sys::swapByteOrder(sect.reserved1); + sys::swapByteOrder(sect.reserved2); +} + +inline void swapStruct(section §) { + sys::swapByteOrder(sect.addr); + sys::swapByteOrder(sect.size); + sys::swapByteOrder(sect.offset); + sys::swapByteOrder(sect.align); + sys::swapByteOrder(sect.reloff); + sys::swapByteOrder(sect.nreloc); + sys::swapByteOrder(sect.flags); + sys::swapByteOrder(sect.reserved1); + sys::swapByteOrder(sect.reserved2); +} + +inline void swapStruct(dyld_info_command &info) { + sys::swapByteOrder(info.cmd); + sys::swapByteOrder(info.cmdsize); + sys::swapByteOrder(info.rebase_off); + sys::swapByteOrder(info.rebase_size); + sys::swapByteOrder(info.bind_off); + sys::swapByteOrder(info.bind_size); + sys::swapByteOrder(info.weak_bind_off); + sys::swapByteOrder(info.weak_bind_size); + sys::swapByteOrder(info.lazy_bind_off); + sys::swapByteOrder(info.lazy_bind_size); + sys::swapByteOrder(info.export_off); + sys::swapByteOrder(info.export_size); +} + +inline void swapStruct(dylib_command &d) { + sys::swapByteOrder(d.cmd); + sys::swapByteOrder(d.cmdsize); + sys::swapByteOrder(d.dylib.name); + sys::swapByteOrder(d.dylib.timestamp); + sys::swapByteOrder(d.dylib.current_version); + sys::swapByteOrder(d.dylib.compatibility_version); +} + +inline void swapStruct(sub_framework_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.umbrella); +} + +inline void swapStruct(sub_umbrella_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.sub_umbrella); +} + +inline void swapStruct(sub_library_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.sub_library); +} + +inline void swapStruct(sub_client_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.client); +} + +inline void swapStruct(routines_command &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.init_address); + sys::swapByteOrder(r.init_module); + sys::swapByteOrder(r.reserved1); + sys::swapByteOrder(r.reserved2); + sys::swapByteOrder(r.reserved3); + sys::swapByteOrder(r.reserved4); + sys::swapByteOrder(r.reserved5); + sys::swapByteOrder(r.reserved6); +} + +inline void swapStruct(routines_command_64 &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.init_address); + sys::swapByteOrder(r.init_module); + sys::swapByteOrder(r.reserved1); + sys::swapByteOrder(r.reserved2); + sys::swapByteOrder(r.reserved3); + sys::swapByteOrder(r.reserved4); + sys::swapByteOrder(r.reserved5); + sys::swapByteOrder(r.reserved6); +} + +inline void swapStruct(thread_command &t) { + sys::swapByteOrder(t.cmd); + sys::swapByteOrder(t.cmdsize); +} + +inline void swapStruct(dylinker_command &d) { + sys::swapByteOrder(d.cmd); + sys::swapByteOrder(d.cmdsize); + sys::swapByteOrder(d.name); +} + +inline void swapStruct(uuid_command &u) { + sys::swapByteOrder(u.cmd); + sys::swapByteOrder(u.cmdsize); +} + +inline void swapStruct(rpath_command &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.path); +} + +inline void swapStruct(source_version_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.version); +} + +inline void swapStruct(entry_point_command &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.entryoff); + sys::swapByteOrder(e.stacksize); +} + +inline void swapStruct(encryption_info_command &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.cryptoff); + sys::swapByteOrder(e.cryptsize); + sys::swapByteOrder(e.cryptid); +} + +inline void swapStruct(encryption_info_command_64 &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.cryptoff); + sys::swapByteOrder(e.cryptsize); + sys::swapByteOrder(e.cryptid); + sys::swapByteOrder(e.pad); +} + +inline void swapStruct(dysymtab_command &dst) { + sys::swapByteOrder(dst.cmd); + sys::swapByteOrder(dst.cmdsize); + sys::swapByteOrder(dst.ilocalsym); + sys::swapByteOrder(dst.nlocalsym); + sys::swapByteOrder(dst.iextdefsym); + sys::swapByteOrder(dst.nextdefsym); + sys::swapByteOrder(dst.iundefsym); + sys::swapByteOrder(dst.nundefsym); + sys::swapByteOrder(dst.tocoff); + sys::swapByteOrder(dst.ntoc); + sys::swapByteOrder(dst.modtaboff); + sys::swapByteOrder(dst.nmodtab); + sys::swapByteOrder(dst.extrefsymoff); + sys::swapByteOrder(dst.nextrefsyms); + sys::swapByteOrder(dst.indirectsymoff); + sys::swapByteOrder(dst.nindirectsyms); + sys::swapByteOrder(dst.extreloff); + sys::swapByteOrder(dst.nextrel); + sys::swapByteOrder(dst.locreloff); + sys::swapByteOrder(dst.nlocrel); +} + +inline void swapStruct(any_relocation_info &reloc) { + sys::swapByteOrder(reloc.r_word0); + sys::swapByteOrder(reloc.r_word1); +} + +inline void swapStruct(nlist_base &S) { + sys::swapByteOrder(S.n_strx); + sys::swapByteOrder(S.n_desc); +} + +inline void swapStruct(nlist &sym) { + sys::swapByteOrder(sym.n_strx); + sys::swapByteOrder(sym.n_desc); + sys::swapByteOrder(sym.n_value); +} + +inline void swapStruct(nlist_64 &sym) { + sys::swapByteOrder(sym.n_strx); + sys::swapByteOrder(sym.n_desc); + sys::swapByteOrder(sym.n_value); +} + +inline void swapStruct(linkedit_data_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.dataoff); + sys::swapByteOrder(C.datasize); +} + +inline void swapStruct(linker_option_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.count); +} + +inline void swapStruct(version_min_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.version); + sys::swapByteOrder(C.sdk); +} + +inline void swapStruct(note_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.size); +} + +inline void swapStruct(build_version_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.platform); + sys::swapByteOrder(C.minos); + sys::swapByteOrder(C.sdk); + sys::swapByteOrder(C.ntools); +} + +inline void swapStruct(build_tool_version &C) { + sys::swapByteOrder(C.tool); + sys::swapByteOrder(C.version); +} + +inline void swapStruct(data_in_code_entry &C) { + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.length); + sys::swapByteOrder(C.kind); +} + +inline void swapStruct(uint32_t &C) { sys::swapByteOrder(C); } + +// The prebind_cksum_command is obsolete and no longer supported. +inline void swapStruct(prebind_cksum_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.cksum); +} + +// The twolevel_hints_command is obsolete and no longer supported. +inline void swapStruct(twolevel_hints_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.nhints); +} + +// The prebound_dylib_command is obsolete and no longer supported. +inline void swapStruct(prebound_dylib_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.nmodules); + sys::swapByteOrder(C.linked_modules); +} + +// The fvmfile_command is obsolete and no longer supported. +inline void swapStruct(fvmfile_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.header_addr); +} + +// The symseg_command is obsolete and no longer supported. +inline void swapStruct(symseg_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.size); +} + +// The ident_command is obsolete and no longer supported. +inline void swapStruct(ident_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); +} + +inline void swapStruct(fvmlib &C) { + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.minor_version); + sys::swapByteOrder(C.header_addr); +} + +// The fvmlib_command is obsolete and no longer supported. +inline void swapStruct(fvmlib_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + swapStruct(C.fvmlib); +} + +// Get/Set functions from + +static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) { + return (((n_desc) >> 8u) & 0xffu); +} + +static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) { + n_desc = (((n_desc)&0x00ff) | (((ordinal)&0xff) << 8)); +} + +static inline uint8_t GET_COMM_ALIGN(uint16_t n_desc) { + return (n_desc >> 8u) & 0x0fu; +} + +static inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) { + n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u)); +} + +// Enums from +enum : uint32_t { + // Capability bits used in the definition of cpu_type. + CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits + CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI +}; + +// Constants for the cputype field. +enum CPUType { + CPU_TYPE_ANY = -1, + CPU_TYPE_X86 = 7, + CPU_TYPE_I386 = CPU_TYPE_X86, + CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64, + /* CPU_TYPE_MIPS = 8, */ + CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC + CPU_TYPE_ARM = 12, + CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64, + CPU_TYPE_SPARC = 14, + CPU_TYPE_POWERPC = 18, + CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64 +}; + +enum : uint32_t { + // Capability bits used in the definition of cpusubtype. + CPU_SUBTYPE_MASK = 0xff000000, // Mask for architecture bits + CPU_SUBTYPE_LIB64 = 0x80000000, // 64 bit libraries + + // Special CPU subtype constants. + CPU_SUBTYPE_MULTIPLE = ~0u +}; + +// Constants for the cpusubtype field. +enum CPUSubTypeX86 { + CPU_SUBTYPE_I386_ALL = 3, + CPU_SUBTYPE_386 = 3, + CPU_SUBTYPE_486 = 4, + CPU_SUBTYPE_486SX = 0x84, + CPU_SUBTYPE_586 = 5, + CPU_SUBTYPE_PENT = CPU_SUBTYPE_586, + CPU_SUBTYPE_PENTPRO = 0x16, + CPU_SUBTYPE_PENTII_M3 = 0x36, + CPU_SUBTYPE_PENTII_M5 = 0x56, + CPU_SUBTYPE_CELERON = 0x67, + CPU_SUBTYPE_CELERON_MOBILE = 0x77, + CPU_SUBTYPE_PENTIUM_3 = 0x08, + CPU_SUBTYPE_PENTIUM_3_M = 0x18, + CPU_SUBTYPE_PENTIUM_3_XEON = 0x28, + CPU_SUBTYPE_PENTIUM_M = 0x09, + CPU_SUBTYPE_PENTIUM_4 = 0x0a, + CPU_SUBTYPE_PENTIUM_4_M = 0x1a, + CPU_SUBTYPE_ITANIUM = 0x0b, + CPU_SUBTYPE_ITANIUM_2 = 0x1b, + CPU_SUBTYPE_XEON = 0x0c, + CPU_SUBTYPE_XEON_MP = 0x1c, + + CPU_SUBTYPE_X86_ALL = 3, + CPU_SUBTYPE_X86_64_ALL = 3, + CPU_SUBTYPE_X86_ARCH1 = 4, + CPU_SUBTYPE_X86_64_H = 8 +}; +static inline int CPU_SUBTYPE_INTEL(int Family, int Model) { + return Family | (Model << 4); +} +static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) { + return ((int)ST) & 0x0f; +} +static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) { + return ((int)ST) >> 4; +} +enum { CPU_SUBTYPE_INTEL_FAMILY_MAX = 15, CPU_SUBTYPE_INTEL_MODEL_ALL = 0 }; + +enum CPUSubTypeARM { + CPU_SUBTYPE_ARM_ALL = 0, + CPU_SUBTYPE_ARM_V4T = 5, + CPU_SUBTYPE_ARM_V6 = 6, + CPU_SUBTYPE_ARM_V5 = 7, + CPU_SUBTYPE_ARM_V5TEJ = 7, + CPU_SUBTYPE_ARM_XSCALE = 8, + CPU_SUBTYPE_ARM_V7 = 9, + // unused ARM_V7F = 10, + CPU_SUBTYPE_ARM_V7S = 11, + CPU_SUBTYPE_ARM_V7K = 12, + CPU_SUBTYPE_ARM_V6M = 14, + CPU_SUBTYPE_ARM_V7M = 15, + CPU_SUBTYPE_ARM_V7EM = 16 +}; + +enum CPUSubTypeARM64 { CPU_SUBTYPE_ARM64_ALL = 0 }; + +enum CPUSubTypeSPARC { CPU_SUBTYPE_SPARC_ALL = 0 }; + +enum CPUSubTypePowerPC { + CPU_SUBTYPE_POWERPC_ALL = 0, + CPU_SUBTYPE_POWERPC_601 = 1, + CPU_SUBTYPE_POWERPC_602 = 2, + CPU_SUBTYPE_POWERPC_603 = 3, + CPU_SUBTYPE_POWERPC_603e = 4, + CPU_SUBTYPE_POWERPC_603ev = 5, + CPU_SUBTYPE_POWERPC_604 = 6, + CPU_SUBTYPE_POWERPC_604e = 7, + CPU_SUBTYPE_POWERPC_620 = 8, + CPU_SUBTYPE_POWERPC_750 = 9, + CPU_SUBTYPE_POWERPC_7400 = 10, + CPU_SUBTYPE_POWERPC_7450 = 11, + CPU_SUBTYPE_POWERPC_970 = 100, + + CPU_SUBTYPE_MC980000_ALL = CPU_SUBTYPE_POWERPC_ALL, + CPU_SUBTYPE_MC98601 = CPU_SUBTYPE_POWERPC_601 +}; + +struct x86_thread_state32_t { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t edi; + uint32_t esi; + uint32_t ebp; + uint32_t esp; + uint32_t ss; + uint32_t eflags; + uint32_t eip; + uint32_t cs; + uint32_t ds; + uint32_t es; + uint32_t fs; + uint32_t gs; +}; + +struct x86_thread_state64_t { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rdi; + uint64_t rsi; + uint64_t rbp; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; + uint64_t rflags; + uint64_t cs; + uint64_t fs; + uint64_t gs; +}; + +enum x86_fp_control_precis { + x86_FP_PREC_24B = 0, + x86_FP_PREC_53B = 2, + x86_FP_PREC_64B = 3 +}; + +enum x86_fp_control_rc { + x86_FP_RND_NEAR = 0, + x86_FP_RND_DOWN = 1, + x86_FP_RND_UP = 2, + x86_FP_CHOP = 3 +}; + +struct fp_control_t { + unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1, + precis : 1, : 2, pc : 2, rc : 2, : 1, : 3; +}; + +struct fp_status_t { + unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1, + precis : 1, stkflt : 1, errsumm : 1, c0 : 1, c1 : 1, c2 : 1, tos : 3, + c3 : 1, busy : 1; +}; + +struct mmst_reg_t { + char mmst_reg[10]; + char mmst_rsrv[6]; +}; + +struct xmm_reg_t { + char xmm_reg[16]; +}; + +struct x86_float_state64_t { + int32_t fpu_reserved[2]; + fp_control_t fpu_fcw; + fp_status_t fpu_fsw; + uint8_t fpu_ftw; + uint8_t fpu_rsrv1; + uint16_t fpu_fop; + uint32_t fpu_ip; + uint16_t fpu_cs; + uint16_t fpu_rsrv2; + uint32_t fpu_dp; + uint16_t fpu_ds; + uint16_t fpu_rsrv3; + uint32_t fpu_mxcsr; + uint32_t fpu_mxcsrmask; + mmst_reg_t fpu_stmm0; + mmst_reg_t fpu_stmm1; + mmst_reg_t fpu_stmm2; + mmst_reg_t fpu_stmm3; + mmst_reg_t fpu_stmm4; + mmst_reg_t fpu_stmm5; + mmst_reg_t fpu_stmm6; + mmst_reg_t fpu_stmm7; + xmm_reg_t fpu_xmm0; + xmm_reg_t fpu_xmm1; + xmm_reg_t fpu_xmm2; + xmm_reg_t fpu_xmm3; + xmm_reg_t fpu_xmm4; + xmm_reg_t fpu_xmm5; + xmm_reg_t fpu_xmm6; + xmm_reg_t fpu_xmm7; + xmm_reg_t fpu_xmm8; + xmm_reg_t fpu_xmm9; + xmm_reg_t fpu_xmm10; + xmm_reg_t fpu_xmm11; + xmm_reg_t fpu_xmm12; + xmm_reg_t fpu_xmm13; + xmm_reg_t fpu_xmm14; + xmm_reg_t fpu_xmm15; + char fpu_rsrv4[6 * 16]; + uint32_t fpu_reserved1; +}; + +struct x86_exception_state64_t { + uint16_t trapno; + uint16_t cpu; + uint32_t err; + uint64_t faultvaddr; +}; + +inline void swapStruct(x86_thread_state32_t &x) { + sys::swapByteOrder(x.eax); + sys::swapByteOrder(x.ebx); + sys::swapByteOrder(x.ecx); + sys::swapByteOrder(x.edx); + sys::swapByteOrder(x.edi); + sys::swapByteOrder(x.esi); + sys::swapByteOrder(x.ebp); + sys::swapByteOrder(x.esp); + sys::swapByteOrder(x.ss); + sys::swapByteOrder(x.eflags); + sys::swapByteOrder(x.eip); + sys::swapByteOrder(x.cs); + sys::swapByteOrder(x.ds); + sys::swapByteOrder(x.es); + sys::swapByteOrder(x.fs); + sys::swapByteOrder(x.gs); +} + +inline void swapStruct(x86_thread_state64_t &x) { + sys::swapByteOrder(x.rax); + sys::swapByteOrder(x.rbx); + sys::swapByteOrder(x.rcx); + sys::swapByteOrder(x.rdx); + sys::swapByteOrder(x.rdi); + sys::swapByteOrder(x.rsi); + sys::swapByteOrder(x.rbp); + sys::swapByteOrder(x.rsp); + sys::swapByteOrder(x.r8); + sys::swapByteOrder(x.r9); + sys::swapByteOrder(x.r10); + sys::swapByteOrder(x.r11); + sys::swapByteOrder(x.r12); + sys::swapByteOrder(x.r13); + sys::swapByteOrder(x.r14); + sys::swapByteOrder(x.r15); + sys::swapByteOrder(x.rip); + sys::swapByteOrder(x.rflags); + sys::swapByteOrder(x.cs); + sys::swapByteOrder(x.fs); + sys::swapByteOrder(x.gs); +} + +inline void swapStruct(x86_float_state64_t &x) { + sys::swapByteOrder(x.fpu_reserved[0]); + sys::swapByteOrder(x.fpu_reserved[1]); + // TODO swap: fp_control_t fpu_fcw; + // TODO swap: fp_status_t fpu_fsw; + sys::swapByteOrder(x.fpu_fop); + sys::swapByteOrder(x.fpu_ip); + sys::swapByteOrder(x.fpu_cs); + sys::swapByteOrder(x.fpu_rsrv2); + sys::swapByteOrder(x.fpu_dp); + sys::swapByteOrder(x.fpu_ds); + sys::swapByteOrder(x.fpu_rsrv3); + sys::swapByteOrder(x.fpu_mxcsr); + sys::swapByteOrder(x.fpu_mxcsrmask); + sys::swapByteOrder(x.fpu_reserved1); +} + +inline void swapStruct(x86_exception_state64_t &x) { + sys::swapByteOrder(x.trapno); + sys::swapByteOrder(x.cpu); + sys::swapByteOrder(x.err); + sys::swapByteOrder(x.faultvaddr); +} + +struct x86_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct x86_thread_state_t { + x86_state_hdr_t tsh; + union { + x86_thread_state64_t ts64; + x86_thread_state32_t ts32; + } uts; +}; + +struct x86_float_state_t { + x86_state_hdr_t fsh; + union { + x86_float_state64_t fs64; + } ufs; +}; + +struct x86_exception_state_t { + x86_state_hdr_t esh; + union { + x86_exception_state64_t es64; + } ues; +}; + +inline void swapStruct(x86_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum X86ThreadFlavors { + x86_THREAD_STATE32 = 1, + x86_FLOAT_STATE32 = 2, + x86_EXCEPTION_STATE32 = 3, + x86_THREAD_STATE64 = 4, + x86_FLOAT_STATE64 = 5, + x86_EXCEPTION_STATE64 = 6, + x86_THREAD_STATE = 7, + x86_FLOAT_STATE = 8, + x86_EXCEPTION_STATE = 9, + x86_DEBUG_STATE32 = 10, + x86_DEBUG_STATE64 = 11, + x86_DEBUG_STATE = 12 +}; + +inline void swapStruct(x86_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == x86_THREAD_STATE64) + swapStruct(x.uts.ts64); +} + +inline void swapStruct(x86_float_state_t &x) { + swapStruct(x.fsh); + if (x.fsh.flavor == x86_FLOAT_STATE64) + swapStruct(x.ufs.fs64); +} + +inline void swapStruct(x86_exception_state_t &x) { + swapStruct(x.esh); + if (x.esh.flavor == x86_EXCEPTION_STATE64) + swapStruct(x.ues.es64); +} + +const uint32_t x86_THREAD_STATE32_COUNT = + sizeof(x86_thread_state32_t) / sizeof(uint32_t); + +const uint32_t x86_THREAD_STATE64_COUNT = + sizeof(x86_thread_state64_t) / sizeof(uint32_t); +const uint32_t x86_FLOAT_STATE64_COUNT = + sizeof(x86_float_state64_t) / sizeof(uint32_t); +const uint32_t x86_EXCEPTION_STATE64_COUNT = + sizeof(x86_exception_state64_t) / sizeof(uint32_t); + +const uint32_t x86_THREAD_STATE_COUNT = + sizeof(x86_thread_state_t) / sizeof(uint32_t); +const uint32_t x86_FLOAT_STATE_COUNT = + sizeof(x86_float_state_t) / sizeof(uint32_t); +const uint32_t x86_EXCEPTION_STATE_COUNT = + sizeof(x86_exception_state_t) / sizeof(uint32_t); + +struct arm_thread_state32_t { + uint32_t r[13]; + uint32_t sp; + uint32_t lr; + uint32_t pc; + uint32_t cpsr; +}; + +inline void swapStruct(arm_thread_state32_t &x) { + for (int i = 0; i < 13; i++) + sys::swapByteOrder(x.r[i]); + sys::swapByteOrder(x.sp); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.pc); + sys::swapByteOrder(x.cpsr); +} + +struct arm_thread_state64_t { + uint64_t x[29]; + uint64_t fp; + uint64_t lr; + uint64_t sp; + uint64_t pc; + uint32_t cpsr; + uint32_t pad; +}; + +inline void swapStruct(arm_thread_state64_t &x) { + for (int i = 0; i < 29; i++) + sys::swapByteOrder(x.x[i]); + sys::swapByteOrder(x.fp); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.sp); + sys::swapByteOrder(x.pc); + sys::swapByteOrder(x.cpsr); +} + +struct arm_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct arm_thread_state_t { + arm_state_hdr_t tsh; + union { + arm_thread_state32_t ts32; + } uts; +}; + +inline void swapStruct(arm_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum ARMThreadFlavors { + ARM_THREAD_STATE = 1, + ARM_VFP_STATE = 2, + ARM_EXCEPTION_STATE = 3, + ARM_DEBUG_STATE = 4, + ARN_THREAD_STATE_NONE = 5, + ARM_THREAD_STATE64 = 6, + ARM_EXCEPTION_STATE64 = 7 +}; + +inline void swapStruct(arm_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == ARM_THREAD_STATE) + swapStruct(x.uts.ts32); +} + +const uint32_t ARM_THREAD_STATE_COUNT = + sizeof(arm_thread_state32_t) / sizeof(uint32_t); + +const uint32_t ARM_THREAD_STATE64_COUNT = + sizeof(arm_thread_state64_t) / sizeof(uint32_t); + +struct ppc_thread_state32_t { + uint32_t srr0; + uint32_t srr1; + uint32_t r0; + uint32_t r1; + uint32_t r2; + uint32_t r3; + uint32_t r4; + uint32_t r5; + uint32_t r6; + uint32_t r7; + uint32_t r8; + uint32_t r9; + uint32_t r10; + uint32_t r11; + uint32_t r12; + uint32_t r13; + uint32_t r14; + uint32_t r15; + uint32_t r16; + uint32_t r17; + uint32_t r18; + uint32_t r19; + uint32_t r20; + uint32_t r21; + uint32_t r22; + uint32_t r23; + uint32_t r24; + uint32_t r25; + uint32_t r26; + uint32_t r27; + uint32_t r28; + uint32_t r29; + uint32_t r30; + uint32_t r31; + uint32_t ct; + uint32_t xer; + uint32_t lr; + uint32_t ctr; + uint32_t mq; + uint32_t vrsave; +}; + +inline void swapStruct(ppc_thread_state32_t &x) { + sys::swapByteOrder(x.srr0); + sys::swapByteOrder(x.srr1); + sys::swapByteOrder(x.r0); + sys::swapByteOrder(x.r1); + sys::swapByteOrder(x.r2); + sys::swapByteOrder(x.r3); + sys::swapByteOrder(x.r4); + sys::swapByteOrder(x.r5); + sys::swapByteOrder(x.r6); + sys::swapByteOrder(x.r7); + sys::swapByteOrder(x.r8); + sys::swapByteOrder(x.r9); + sys::swapByteOrder(x.r10); + sys::swapByteOrder(x.r11); + sys::swapByteOrder(x.r12); + sys::swapByteOrder(x.r13); + sys::swapByteOrder(x.r14); + sys::swapByteOrder(x.r15); + sys::swapByteOrder(x.r16); + sys::swapByteOrder(x.r17); + sys::swapByteOrder(x.r18); + sys::swapByteOrder(x.r19); + sys::swapByteOrder(x.r20); + sys::swapByteOrder(x.r21); + sys::swapByteOrder(x.r22); + sys::swapByteOrder(x.r23); + sys::swapByteOrder(x.r24); + sys::swapByteOrder(x.r25); + sys::swapByteOrder(x.r26); + sys::swapByteOrder(x.r27); + sys::swapByteOrder(x.r28); + sys::swapByteOrder(x.r29); + sys::swapByteOrder(x.r30); + sys::swapByteOrder(x.r31); + sys::swapByteOrder(x.ct); + sys::swapByteOrder(x.xer); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.ctr); + sys::swapByteOrder(x.mq); + sys::swapByteOrder(x.vrsave); +} + +struct ppc_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct ppc_thread_state_t { + ppc_state_hdr_t tsh; + union { + ppc_thread_state32_t ts32; + } uts; +}; + +inline void swapStruct(ppc_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum PPCThreadFlavors { + PPC_THREAD_STATE = 1, + PPC_FLOAT_STATE = 2, + PPC_EXCEPTION_STATE = 3, + PPC_VECTOR_STATE = 4, + PPC_THREAD_STATE64 = 5, + PPC_EXCEPTION_STATE64 = 6, + PPC_THREAD_STATE_NONE = 7 +}; + +inline void swapStruct(ppc_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == PPC_THREAD_STATE) + swapStruct(x.uts.ts32); +} + +const uint32_t PPC_THREAD_STATE_COUNT = + sizeof(ppc_thread_state32_t) / sizeof(uint32_t); + +// Define a union of all load command structs +#define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data; + +union macho_load_command { +#include "llvm/BinaryFormat/MachO.def" +}; + +} // end namespace MachO +} // end namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/Magic.h b/interpreter/llvm/src/include/llvm/BinaryFormat/Magic.h new file mode 100644 index 0000000000000..c0e23db5e1aee --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/Magic.h @@ -0,0 +1,73 @@ +//===- llvm/BinaryFormat/Magic.h - File magic identification ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_MAGIC_H +#define LLVM_BINARYFORMAT_MAGIC_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" + +#include + +namespace llvm { +/// file_magic - An "enum class" enumeration of file types based on magic (the +/// first N bytes of the file). +struct file_magic { + enum Impl { + unknown = 0, ///< Unrecognized file + bitcode, ///< Bitcode file + archive, ///< ar style archive file + elf, ///< ELF Unknown type + elf_relocatable, ///< ELF Relocatable object file + elf_executable, ///< ELF Executable image + elf_shared_object, ///< ELF dynamically linked shared lib + elf_core, ///< ELF core image + macho_object, ///< Mach-O Object file + macho_executable, ///< Mach-O Executable + macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM + macho_core, ///< Mach-O Core File + macho_preload_executable, ///< Mach-O Preloaded Executable + macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib + macho_dynamic_linker, ///< The Mach-O dynamic linker + macho_bundle, ///< Mach-O Bundle file + macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub + macho_dsym_companion, ///< Mach-O dSYM companion file + macho_kext_bundle, ///< Mach-O kext bundle file + macho_universal_binary, ///< Mach-O universal binary + coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file + coff_object, ///< COFF object file + coff_import_library, ///< COFF import library + pecoff_executable, ///< PECOFF executable file + windows_resource, ///< Windows compiled resource file (.res) + wasm_object ///< WebAssembly Object file + }; + + bool is_object() const { return V != unknown; } + + file_magic() = default; + file_magic(Impl V) : V(V) {} + operator Impl() const { return V; } + +private: + Impl V = unknown; +}; + +/// @brief Identify the type of a binary file based on how magical it is. +file_magic identify_magic(StringRef magic); + +/// @brief Get and identify \a path's type based on its content. +/// +/// @param path Input path. +/// @param result Set to the type of file, or file_magic::unknown. +/// @returns errc::success if result has been successfully set, otherwise a +/// platform-specific error_code. +std::error_code identify_magic(const Twine &path, file_magic &result); +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/Wasm.h b/interpreter/llvm/src/include/llvm/BinaryFormat/Wasm.h new file mode 100644 index 0000000000000..23e30b7a868d9 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/Wasm.h @@ -0,0 +1,217 @@ +//===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines manifest constants for the wasm object file format. +// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_WASM_H +#define LLVM_BINARYFORMAT_WASM_H + +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { +namespace wasm { + +// Object file magic string. +const char WasmMagic[] = {'\0', 'a', 's', 'm'}; +// Wasm binary format version +const uint32_t WasmVersion = 0x1; +// Wasm uses a 64k page size +const uint32_t WasmPageSize = 65536; + +struct WasmObjectHeader { + StringRef Magic; + uint32_t Version; +}; + +struct WasmSignature { + std::vector ParamTypes; + int32_t ReturnType; +}; + +struct WasmExport { + StringRef Name; + uint32_t Kind; + uint32_t Index; +}; + +struct WasmLimits { + uint32_t Flags; + uint32_t Initial; + uint32_t Maximum; +}; + +struct WasmTable { + int32_t ElemType; + WasmLimits Limits; +}; + +struct WasmInitExpr { + uint8_t Opcode; + union { + int32_t Int32; + int64_t Int64; + int32_t Float32; + int64_t Float64; + uint32_t Global; + } Value; +}; + +struct WasmGlobal { + int32_t Type; + bool Mutable; + WasmInitExpr InitExpr; +}; + +struct WasmImport { + StringRef Module; + StringRef Field; + uint32_t Kind; + union { + uint32_t SigIndex; + WasmGlobal Global; + WasmTable Table; + WasmLimits Memory; + }; +}; + +struct WasmLocalDecl { + int32_t Type; + uint32_t Count; +}; + +struct WasmFunction { + std::vector Locals; + ArrayRef Body; +}; + +struct WasmDataSegment { + uint32_t MemoryIndex; + WasmInitExpr Offset; + ArrayRef Content; +}; + +struct WasmElemSegment { + uint32_t TableIndex; + WasmInitExpr Offset; + std::vector Functions; +}; + +struct WasmRelocation { + uint32_t Type; // The type of the relocation. + uint32_t Index; // Index into function to global index space. + uint64_t Offset; // Offset from the start of the section. + int64_t Addend; // A value to add to the symbol. +}; + +struct WasmLinkingData { + uint32_t DataSize; + uint32_t DataAlignment; +}; + +enum : unsigned { + WASM_SEC_CUSTOM = 0, // Custom / User-defined section + WASM_SEC_TYPE = 1, // Function signature declarations + WASM_SEC_IMPORT = 2, // Import declarations + WASM_SEC_FUNCTION = 3, // Function declarations + WASM_SEC_TABLE = 4, // Indirect function table and other tables + WASM_SEC_MEMORY = 5, // Memory attributes + WASM_SEC_GLOBAL = 6, // Global declarations + WASM_SEC_EXPORT = 7, // Exports + WASM_SEC_START = 8, // Start function declaration + WASM_SEC_ELEM = 9, // Elements section + WASM_SEC_CODE = 10, // Function bodies (code) + WASM_SEC_DATA = 11 // Data segments +}; + +// Type immediate encodings used in various contexts. +enum { + WASM_TYPE_I32 = -0x01, + WASM_TYPE_I64 = -0x02, + WASM_TYPE_F32 = -0x03, + WASM_TYPE_F64 = -0x04, + WASM_TYPE_ANYFUNC = -0x10, + WASM_TYPE_FUNC = -0x20, + WASM_TYPE_NORESULT = -0x40, // for blocks with no result values +}; + +// Kinds of externals (for imports and exports). +enum : unsigned { + WASM_EXTERNAL_FUNCTION = 0x0, + WASM_EXTERNAL_TABLE = 0x1, + WASM_EXTERNAL_MEMORY = 0x2, + WASM_EXTERNAL_GLOBAL = 0x3, +}; + +// Opcodes used in initializer expressions. +enum : unsigned { + WASM_OPCODE_END = 0x0b, + WASM_OPCODE_GET_GLOBAL = 0x23, + WASM_OPCODE_I32_CONST = 0x41, + WASM_OPCODE_I64_CONST = 0x42, + WASM_OPCODE_F32_CONST = 0x43, + WASM_OPCODE_F64_CONST = 0x44, +}; + +enum : unsigned { + WASM_NAMES_FUNCTION = 0x1, + WASM_NAMES_LOCAL = 0x2, +}; + +enum : unsigned { + WASM_LIMITS_FLAG_HAS_MAX = 0x1, +}; + +// Subset of types that a value can have +enum class ValType { + I32 = WASM_TYPE_I32, + I64 = WASM_TYPE_I64, + F32 = WASM_TYPE_F32, + F64 = WASM_TYPE_F64, +}; + +// Linking metadata kinds. +enum : unsigned { + WASM_STACK_POINTER = 0x1, + WASM_SYMBOL_INFO = 0x2, + WASM_DATA_SIZE = 0x3, + WASM_DATA_ALIGNMENT = 0x4, +}; + +enum : unsigned { + WASM_SYMBOL_FLAG_WEAK = 0x1, +}; + +#define WASM_RELOC(name, value) name = value, + +enum : unsigned { +#include "WasmRelocs/WebAssembly.def" +}; + +#undef WASM_RELOC + +struct Global { + ValType Type; + bool Mutable; + + // The initial value for this global is either the value of an imported + // global, in which case InitialModule and InitialName specify the global + // import, or a value, in which case InitialModule is empty and InitialValue + // holds the value. + StringRef InitialModule; + StringRef InitialName; + uint64_t InitialValue; +}; + +} // end namespace wasm +} // end namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def b/interpreter/llvm/src/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def new file mode 100644 index 0000000000000..da64e025478de --- /dev/null +++ b/interpreter/llvm/src/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def @@ -0,0 +1,13 @@ + +#ifndef WASM_RELOC +#error "WASM_RELOC must be defined" +#endif + +WASM_RELOC(R_WEBASSEMBLY_FUNCTION_INDEX_LEB, 0) +WASM_RELOC(R_WEBASSEMBLY_TABLE_INDEX_SLEB, 1) +WASM_RELOC(R_WEBASSEMBLY_TABLE_INDEX_I32, 2) +WASM_RELOC(R_WEBASSEMBLY_GLOBAL_ADDR_LEB, 3) +WASM_RELOC(R_WEBASSEMBLY_GLOBAL_ADDR_SLEB, 4) +WASM_RELOC(R_WEBASSEMBLY_GLOBAL_ADDR_I32, 5) +WASM_RELOC(R_WEBASSEMBLY_TYPE_INDEX_LEB, 6) +WASM_RELOC(R_WEBASSEMBLY_GLOBAL_INDEX_LEB, 7) diff --git a/interpreter/llvm/src/include/llvm/Bitcode/BitcodeReader.h b/interpreter/llvm/src/include/llvm/Bitcode/BitcodeReader.h index 31ffb7645f3a6..160ddad5761f8 100644 --- a/interpreter/llvm/src/include/llvm/Bitcode/BitcodeReader.h +++ b/interpreter/llvm/src/include/llvm/Bitcode/BitcodeReader.h @@ -40,6 +40,14 @@ namespace llvm { return std::move(*Val); } + struct BitcodeFileContents; + + /// Basic information extracted from a bitcode module to be used for LTO. + struct BitcodeLTOInfo { + bool IsThinLTO; + bool HasSummary; + }; + /// Represents a module in a bitcode file. class BitcodeModule { // This covers the identification (if present) and module blocks. @@ -61,8 +69,8 @@ namespace llvm { IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} // Calls the ctor. - friend Expected> - getBitcodeModuleList(MemoryBufferRef Buffer); + friend Expected + getBitcodeFileContents(MemoryBufferRef Buffer); Expected> getModuleImpl(LLVMContext &Context, bool MaterializeAll, @@ -88,17 +96,31 @@ namespace llvm { /// Read the entire bitcode module and return it. Expected> parseModule(LLVMContext &Context); - /// Check if the given bitcode buffer contains a summary block. - Expected hasSummary(); + /// Returns information about the module to be used for LTO: whether to + /// compile with ThinLTO, and whether it has a summary. + Expected getLTOInfo(); /// Parse the specified bitcode buffer, returning the module summary index. Expected> getSummary(); /// Parse the specified bitcode buffer and merge its module summary index /// into CombinedIndex. - Error readSummary(ModuleSummaryIndex &CombinedIndex, unsigned ModuleId); + Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, + uint64_t ModuleId); }; + struct BitcodeFileContents { + std::vector Mods; + StringRef Symtab, StrtabForSymtab; + }; + + /// Returns the contents of a bitcode file. This includes the raw contents of + /// the symbol table embedded in the bitcode file. Clients which require a + /// symbol table should prefer to use irsymtab::read instead of this function + /// because it creates a reader for the irsymtab and handles upgrading bitcode + /// files without a symbol table or with an old symbol table. + Expected getBitcodeFileContents(MemoryBufferRef Buffer); + /// Returns a list of modules in the specified bitcode buffer. Expected> getBitcodeModuleList(MemoryBufferRef Buffer); @@ -138,8 +160,8 @@ namespace llvm { Expected> parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context); - /// Check if the given bitcode buffer contains a summary block. - Expected hasGlobalValueSummary(MemoryBufferRef Buffer); + /// Returns LTO information for the specified bitcode file. + Expected getBitcodeLTOInfo(MemoryBufferRef Buffer); /// Parse the specified bitcode buffer, returning the module summary index. Expected> @@ -148,7 +170,7 @@ namespace llvm { /// Parse the specified bitcode buffer and merge the index into CombinedIndex. Error readModuleSummaryIndex(MemoryBufferRef Buffer, ModuleSummaryIndex &CombinedIndex, - unsigned ModuleId); + uint64_t ModuleId); /// Parse the module summary index out of an IR file and return the module /// summary index object if found, or an empty summary if not. If Path refers diff --git a/interpreter/llvm/src/include/llvm/Bitcode/BitcodeWriter.h b/interpreter/llvm/src/include/llvm/Bitcode/BitcodeWriter.h index 23b5ae87b2787..f8b7fb341e881 100644 --- a/interpreter/llvm/src/include/llvm/Bitcode/BitcodeWriter.h +++ b/interpreter/llvm/src/include/llvm/Bitcode/BitcodeWriter.h @@ -28,18 +28,34 @@ namespace llvm { std::unique_ptr Stream; StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; - bool WroteStrtab = false; + + // Owns any strings created by the irsymtab writer until we create the + // string table. + BumpPtrAllocator Alloc; + + bool WroteStrtab = false, WroteSymtab = false; void writeBlob(unsigned Block, unsigned Record, StringRef Blob); + std::vector Mods; + public: /// Create a BitcodeWriter that writes to Buffer. BitcodeWriter(SmallVectorImpl &Buffer); ~BitcodeWriter(); + /// Attempt to write a symbol table to the bitcode file. This must be called + /// at most once after all modules have been written. + /// + /// A reader does not require a symbol table to interpret a bitcode file; + /// the symbol table is needed only to improve link-time performance. So + /// this function may decide not to write a symbol table. It may so decide + /// if, for example, the target is unregistered or the IR is malformed. + void writeSymtab(); + /// Write the bitcode file's string table. This must be called exactly once - /// after all modules have been written. + /// after all modules and the optional symbol table have been written. void writeStrtab(); /// Copy the string table for another module into this bitcode file. This @@ -67,6 +83,10 @@ namespace llvm { void writeModule(const Module *M, bool ShouldPreserveUseListOrder = false, const ModuleSummaryIndex *Index = nullptr, bool GenerateHash = false, ModuleHash *ModHash = nullptr); + + void writeIndex( + const ModuleSummaryIndex *Index, + const std::map *ModuleToSummariesForIndex); }; /// \brief Write the specified module to the specified raw output stream. diff --git a/interpreter/llvm/src/include/llvm/Bitcode/LLVMBitCodes.h b/interpreter/llvm/src/include/llvm/Bitcode/LLVMBitCodes.h index 8ee1e4b583b6c..3777f956cf279 100644 --- a/interpreter/llvm/src/include/llvm/Bitcode/LLVMBitCodes.h +++ b/interpreter/llvm/src/include/llvm/Bitcode/LLVMBitCodes.h @@ -22,7 +22,7 @@ namespace llvm { namespace bitc { -// The only top-level block types are MODULE, IDENTIFICATION and STRTAB. +// The only top-level block types are MODULE, IDENTIFICATION, STRTAB and SYMTAB. enum BlockIDs { // Blocks MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, @@ -55,6 +55,12 @@ enum BlockIDs { METADATA_KIND_BLOCK_ID, STRTAB_BLOCK_ID, + + FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, + + SYMTAB_BLOCK_ID, + + SYNC_SCOPE_NAMES_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, @@ -168,6 +174,10 @@ enum OperandBundleTagCode { OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N] }; +enum SyncScopeNameCode { + SYNC_SCOPE_NAME = 1, +}; + // Value symbol table codes. enum ValueSymtabCodes { VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] @@ -238,6 +248,14 @@ enum GlobalValueSummarySymtabCodes { // summaries, but it can also appear in per-module summaries for PGO data. // [valueid, guid] FS_VALUE_GUID = 16, + // The list of local functions with CFI jump tables. Function names are + // strings in strtab. + // [n * name] + FS_CFI_FUNCTION_DEFS = 17, + // The list of external functions with CFI jump tables. Function names are + // strings in strtab. + // [n * name] + FS_CFI_FUNCTION_DECLS = 18, }; enum MetadataCodes { @@ -392,12 +410,6 @@ enum AtomicOrderingCodes { ORDERING_SEQCST = 6 }; -/// Encoded SynchronizationScope values. -enum AtomicSynchScopeCodes { - SYNCHSCOPE_SINGLETHREAD = 0, - SYNCHSCOPE_CROSSTHREAD = 1 -}; - /// Markers and flags for call instruction. enum CallMarkersFlags { CALL_TAIL = 0, @@ -561,6 +573,10 @@ enum StrtabCodes { STRTAB_BLOB = 1, }; +enum SymtabCodes { + SYMTAB_BLOB = 1, +}; + } // End bitc namespace } // End llvm namespace diff --git a/interpreter/llvm/src/include/llvm/CodeGen/AsmPrinter.h b/interpreter/llvm/src/include/llvm/CodeGen/AsmPrinter.h index 180c0b5792488..60bbc9aaa5bd4 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/AsmPrinter.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/AsmPrinter.h @@ -34,6 +34,7 @@ namespace llvm { class AsmPrinterHandler; +class BasicBlock; class BlockAddress; class Constant; class ConstantArray; @@ -43,6 +44,7 @@ class DIEAbbrev; class DwarfDebug; class GCMetadataPrinter; class GlobalIndirectSymbol; +class GlobalObject; class GlobalValue; class GlobalVariable; class GCStrategy; @@ -65,6 +67,8 @@ class MCSubtargetInfo; class MCSymbol; class MCTargetOptions; class MDNode; +class Module; +class raw_ostream; class TargetLoweringObjectFile; class TargetMachine; @@ -109,7 +113,7 @@ class AsmPrinter : public MachineFunctionPass { /// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of /// its number of uses by other globals. - typedef std::pair GOTEquivUsePair; + using GOTEquivUsePair = std::pair; MapVector GlobalGOTEquivs; /// Enable print [latency:throughput] in output @@ -604,8 +608,8 @@ class AsmPrinter : public MachineFunctionPass { // Internal Implementation Details //===------------------------------------------------------------------===// - /// This emits visibility information about symbol, if this is suported by the - /// target. + /// This emits visibility information about symbol, if this is supported by + /// the target. void EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition = true) const; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/AtomicExpandUtils.h b/interpreter/llvm/src/include/llvm/CodeGen/AtomicExpandUtils.h index ac18eac8a1cef..1f9c96b18e1b4 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/AtomicExpandUtils.h @@ -1,4 +1,4 @@ -//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===// +//===- AtomicExpandUtils.h - Utilities for expanding atomic instructions --===// // // The LLVM Compiler Infrastructure // @@ -7,19 +7,24 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_ATOMICEXPANDUTILS_H +#define LLVM_CODEGEN_ATOMICEXPANDUTILS_H + #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/Support/AtomicOrdering.h" namespace llvm { -class Value; -class AtomicRMWInst; +class AtomicRMWInst; +class Value; /// Parameters (see the expansion example below): /// (the builder, %addr, %loaded, %new_val, ordering, /// /* OUT */ %success, /* OUT */ %new_loaded) -typedef function_ref &, Value *, Value *, Value *, - AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun; +using CreateCmpXchgInstFun = + function_ref &, Value *, Value *, Value *, AtomicOrdering, + Value *&, Value *&)>; /// \brief Expand an atomic RMW instruction into a loop utilizing /// cmpxchg. You'll want to make sure your target machine likes cmpxchg @@ -42,7 +47,8 @@ typedef function_ref &, Value *, Value *, Value *, /// loop: /// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] /// %new = some_op iN %loaded, %incr -/// ; This is what -atomic-expand will produce using this function on i686 targets: +/// ; This is what -atomic-expand will produce using this function on i686 +/// targets: /// %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val /// %new_loaded = extractvalue { iN, i1 } %pair, 0 /// %success = extractvalue { iN, i1 } %pair, 1 @@ -52,6 +58,8 @@ typedef function_ref &, Value *, Value *, Value *, /// [...] /// /// Returns true if the containing function was modified. -bool -expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); -} +bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_ATOMICEXPANDUTILS_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/BasicTTIImpl.h b/interpreter/llvm/src/include/llvm/CodeGen/BasicTTIImpl.h index 32542fa87463f..6331070247928 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/BasicTTIImpl.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/BasicTTIImpl.h @@ -17,11 +17,11 @@ #define LLVM_CODEGEN_BASICTTIIMPL_H #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { @@ -93,6 +93,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { bool isSourceOfDivergence(const Value *V) { return false; } + bool isAlwaysUniform(const Value *V) { return false; } + unsigned getFlatAddressSpace() { // Return an invalid address space. return -1; @@ -117,6 +119,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace); } + bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { + return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); + } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { TargetLoweringBase::AddrMode AM; @@ -149,6 +155,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return BaseT::getGEPCost(PointeeType, Ptr, Operands); } + int getExtCost(const Instruction *I, const Value *Src) { + if (getTLI()->isExtFree(I)) + return TargetTransformInfo::TCC_Free; + + if (isa(I) || isa(I)) + if (const LoadInst *LI = dyn_cast(Src)) + if (getTLI()->isExtLoad(LI, I, DL)) + return TargetTransformInfo::TCC_Free; + + return TargetTransformInfo::TCC_Basic; + } + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) { return BaseT::getIntrinsicCost(IID, RetTy, Arguments); @@ -271,7 +289,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getInliningThresholdMultiplier() { return 1; } - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP) { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: @@ -342,7 +361,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } - unsigned getRegisterBitWidth(bool Vector) { return 32; } + unsigned getRegisterBitWidth(bool Vector) const { return 32; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. @@ -421,7 +440,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); - bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); + bool IsFloat = Ty->isFPOrFPVectorTy(); // Assume that floating point arithmetic operations cost twice as much as // integer operations. unsigned OpCost = (IsFloat ? 2 : 1); @@ -1080,46 +1099,46 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return 0; } + /// Try to calculate arithmetic and shuffle op costs for reduction operations. + /// We're assuming that reduction operation are performing the following way: + /// 1. Non-pairwise reduction + /// %val1 = shufflevector %val, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val, val1 + /// After this operation we have a vector %red1 where only the first n/2 + /// elements are meaningful, the second n/2 elements are undefined and can be + /// dropped. All other operations are actually working with the vector of + /// length n/2, not n, though the real vector length is still n. + /// %val2 = shufflevector %red1, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/4 elements 3*n/4 elements + /// %red2 = op %red1, val2 - working with the vector of + /// length n/2, the resulting vector has length n/4 etc. + /// 2. Pairwise reduction: + /// Everything is the same except for an additional shuffle operation which + /// is used to produce operands for pairwise kind of reductions. + /// %val1 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %val2 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val1, val2 + /// Again, the operation is performed on vector, but the resulting + /// vector %red1 is vector. + /// + /// The cost model should take into account that the actual length of the + /// vector is reduced on each iteration. unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { assert(Ty->isVectorTy() && "Expect a vector type"); Type *ScalarTy = Ty->getVectorElementType(); unsigned NumVecElts = Ty->getVectorNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); - // Try to calculate arithmetic and shuffle op costs for reduction operations. - // We're assuming that reduction operation are performing the following way: - // 1. Non-pairwise reduction - // %val1 = shufflevector %val, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val, val1 - // After this operation we have a vector %red1 with only maningfull the - // first n/2 elements, the second n/2 elements are undefined and can be - // dropped. All other operations are actually working with the vector of - // length n/2, not n. though the real vector length is still n. - // %val2 = shufflevector %red1, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/4 elements 3*n/4 elements - // %red2 = op %red1, val2 - working with the vector of - // length n/2, the resulting vector has length n/4 etc. - // 2. Pairwise reduction: - // Everything is the same except for an additional shuffle operation which - // is used to produce operands for pairwise kind of reductions. - // %val1 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %val2 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val1, val2 - // Again, the operation is performed on vector, but the resulting - // vector %red1 is vector. - // - // The cost model should take into account that the actual length of the - // vector is reduced on each iteration. unsigned ArithCost = 0; unsigned ShuffleCost = 0; auto *ConcreteTTI = static_cast(this); diff --git a/interpreter/llvm/src/include/llvm/CodeGen/DFAPacketizer.h b/interpreter/llvm/src/include/llvm/CodeGen/DFAPacketizer.h index 8de140e91bf37..77c37ac7abeae 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/DFAPacketizer.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/DFAPacketizer.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-=====// +//===- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -29,17 +29,22 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include #include +#include +#include +#include namespace llvm { -class MCInstrDesc; +class DefaultVLIWScheduler; +class InstrItineraryData; +class MachineFunction; class MachineInstr; class MachineLoopInfo; -class MachineDominatorTree; -class InstrItineraryData; -class DefaultVLIWScheduler; +class MCInstrDesc; class SUnit; +class TargetInstrInfo; // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp @@ -64,17 +69,18 @@ class SUnit; #define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. #define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. -typedef uint64_t DFAInput; -typedef int64_t DFAStateInput; +using DFAInput = uint64_t; +using DFAStateInput = int64_t; + #define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. // -------------------------------------------------------------------- class DFAPacketizer { private: - typedef std::pair UnsignPair; + using UnsignPair = std::pair; const InstrItineraryData *InstrItins; - int CurrentState; + int CurrentState = 0; const DFAStateInput (*DFAStateInputTable)[2]; const unsigned *DFAStateEntryTable; @@ -101,24 +107,23 @@ class DFAPacketizer { // Check if the resources occupied by a MCInstrDesc are available in // the current state. - bool canReserveResources(const llvm::MCInstrDesc *MID); + bool canReserveResources(const MCInstrDesc *MID); // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. - void reserveResources(const llvm::MCInstrDesc *MID); + void reserveResources(const MCInstrDesc *MID); // Check if the resources occupied by a machine instruction are available // in the current state. - bool canReserveResources(llvm::MachineInstr &MI); + bool canReserveResources(MachineInstr &MI); // Reserve the resources occupied by a machine instruction and change the // current state to reflect that change. - void reserveResources(llvm::MachineInstr &MI); + void reserveResources(MachineInstr &MI); const InstrItineraryData *getInstrItins() const { return InstrItins; } }; - // VLIWPacketizerList implements a simple VLIW packetizer using DFA. The // packetizer works on machine basic blocks. For each instruction I in BB, // the packetizer consults the DFA to see if machine resources are available @@ -205,6 +210,6 @@ class VLIWPacketizerList { void addMutation(std::unique_ptr Mutation); }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_DFAPACKETIZER_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/DIE.h b/interpreter/llvm/src/include/llvm/CodeGen/DIE.h index a40147336fe2c..f809fc97fe593 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/DIE.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/DIE.h @@ -1,4 +1,4 @@ -//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// +//===- lib/CodeGen/DIE.h - DWARF Info Entries -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,16 +21,17 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Dwarf.h" #include #include #include #include #include #include +#include #include namespace llvm { @@ -53,11 +54,11 @@ class DIEAbbrevData { dwarf::Form Form; /// Dwarf attribute value for DW_FORM_implicit_const - int64_t Value; + int64_t Value = 0; public: DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) - : Attribute(A), Form(F), Value(0) {} + : Attribute(A), Form(F) {} DIEAbbrevData(dwarf::Attribute A, int64_t V) : Attribute(A), Form(dwarf::DW_FORM_implicit_const), Value(V) {} @@ -120,8 +121,8 @@ class DIEAbbrev : public FoldingSetNode { /// Print the abbreviation using the specified asm printer. void Emit(const AsmPrinter *AP) const; - void print(raw_ostream &O); - void dump(); + void print(raw_ostream &O) const; + void dump() const; }; //===--------------------------------------------------------------------===// @@ -136,13 +137,14 @@ class DIEAbbrevSet { /// storage container. BumpPtrAllocator &Alloc; /// \brief FoldingSet that uniques the abbreviations. - llvm::FoldingSet AbbreviationsSet; + FoldingSet AbbreviationsSet; /// A list of all the unique abbreviations in use. std::vector Abbreviations; public: DIEAbbrevSet(BumpPtrAllocator &A) : Alloc(A) {} ~DIEAbbrevSet(); + /// Generate the abbreviation declaration for a DIE and return a pointer to /// the generated abbreviation. /// @@ -289,13 +291,11 @@ class DIEInlineString { /// A pointer to another debug information entry. An instance of this class can /// also be used as a proxy for a debug information entry not yet defined /// (ie. types.) -class DIE; class DIEEntry { DIE *Entry; - DIEEntry() = delete; - public: + DIEEntry() = delete; explicit DIEEntry(DIE &E) : Entry(&E) {} DIE &getEntry() const { return *Entry; } @@ -348,10 +348,10 @@ class DIEValue { /// /// All values that aren't standard layout (or are larger than 8 bytes) /// should be stored by reference instead of by value. - typedef AlignedCharArrayUnion - ValTy; + using ValTy = AlignedCharArrayUnion; + static_assert(sizeof(ValTy) <= sizeof(uint64_t) || sizeof(ValTy) <= sizeof(void *), "Expected all large types to be stored via pointer"); @@ -383,11 +383,11 @@ class DIEValue { return; #define HANDLE_DIEVALUE_SMALL(T) \ case is##T: \ - destruct(); + destruct(); \ return; #define HANDLE_DIEVALUE_LARGE(T) \ case is##T: \ - destruct(); + destruct(); \ return; #include "llvm/CodeGen/DIEValue.def" } @@ -486,10 +486,12 @@ struct IntrusiveBackListNode { }; struct IntrusiveBackListBase { - typedef IntrusiveBackListNode Node; + using Node = IntrusiveBackListNode; + Node *Last = nullptr; bool empty() const { return !Last; } + void push_back(Node &N) { assert(N.Next.getPointer() == &N && "Expected unlinked node"); assert(N.Next.getInt() == true && "Expected unlinked node"); @@ -505,6 +507,7 @@ struct IntrusiveBackListBase { template class IntrusiveBackList : IntrusiveBackListBase { public: using IntrusiveBackListBase::empty; + void push_back(T &N) { IntrusiveBackListBase::push_back(N); } T &back() { return *static_cast(Last); } const T &back() const { return *static_cast(Last); } @@ -513,6 +516,7 @@ template class IntrusiveBackList : IntrusiveBackListBase { class iterator : public iterator_facade_base { friend class const_iterator; + Node *N = nullptr; public: @@ -585,10 +589,12 @@ template class IntrusiveBackList : IntrusiveBackListBase { class DIEValueList { struct Node : IntrusiveBackListNode { DIEValue V; + explicit Node(DIEValue V) : V(V) {} }; - typedef IntrusiveBackList ListTy; + using ListTy = IntrusiveBackList; + ListTy List; public: @@ -597,9 +603,10 @@ class DIEValueList { : public iterator_adaptor_base { friend class const_value_iterator; - typedef iterator_adaptor_base iterator_adaptor; + + using iterator_adaptor = + iterator_adaptor_base; public: value_iterator() = default; @@ -612,9 +619,9 @@ class DIEValueList { class const_value_iterator : public iterator_adaptor_base< const_value_iterator, ListTy::const_iterator, std::forward_iterator_tag, const DIEValue> { - typedef iterator_adaptor_base iterator_adaptor; + using iterator_adaptor = + iterator_adaptor_base; public: const_value_iterator() = default; @@ -627,8 +634,8 @@ class DIEValueList { const DIEValue &operator*() const { return wrapped()->V; } }; - typedef iterator_range value_range; - typedef iterator_range const_value_range; + using value_range = iterator_range; + using const_value_range = iterator_range; value_iterator addValue(BumpPtrAllocator &Alloc, const DIEValue &V) { List.push_back(*new (Alloc) Node(V)); @@ -657,15 +664,15 @@ class DIE : IntrusiveBackListNode, public DIEValueList { friend class DIEUnit; /// Dwarf unit relative offset. - unsigned Offset; + unsigned Offset = 0; /// Size of instance + children. - unsigned Size; + unsigned Size = 0; unsigned AbbrevNumber = ~0u; /// Dwarf tag code. dwarf::Tag Tag = (dwarf::Tag)0; /// Set to true to force a DIE to emit an abbreviation that says it has /// children even when it doesn't. This is used for unit testing purposes. - bool ForceChildren; + bool ForceChildren = false; /// Children DIEs. IntrusiveBackList Children; @@ -673,20 +680,19 @@ class DIE : IntrusiveBackListNode, public DIEValueList { /// DIEUnit which contains this DIE as its unit DIE. PointerUnion Owner; - DIE() = delete; - explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag), - ForceChildren(false) {} + explicit DIE(dwarf::Tag Tag) : Tag(Tag) {} public: + DIE() = delete; + DIE(const DIE &RHS) = delete; + DIE(DIE &&RHS) = delete; + DIE &operator=(const DIE &RHS) = delete; + DIE &operator=(const DIE &&RHS) = delete; + static DIE *get(BumpPtrAllocator &Alloc, dwarf::Tag Tag) { return new (Alloc) DIE(Tag); } - DIE(const DIE &RHS) = delete; - DIE(DIE &&RHS) = delete; - void operator=(const DIE &RHS) = delete; - void operator=(const DIE &&RHS) = delete; - // Accessors. unsigned getAbbrevNumber() const { return AbbrevNumber; } dwarf::Tag getTag() const { return Tag; } @@ -696,10 +702,10 @@ class DIE : IntrusiveBackListNode, public DIEValueList { bool hasChildren() const { return ForceChildren || !Children.empty(); } void setForceChildren(bool B) { ForceChildren = B; } - typedef IntrusiveBackList::iterator child_iterator; - typedef IntrusiveBackList::const_iterator const_child_iterator; - typedef iterator_range child_range; - typedef iterator_range const_child_range; + using child_iterator = IntrusiveBackList::iterator; + using const_child_iterator = IntrusiveBackList::const_iterator; + using child_range = iterator_range; + using const_child_range = iterator_range; child_range children() { return make_range(Children.begin(), Children.end()); @@ -774,7 +780,7 @@ class DIE : IntrusiveBackListNode, public DIEValueList { DIEValue findAttribute(dwarf::Attribute Attribute) const; void print(raw_ostream &O, unsigned IndentCount = 0) const; - void dump(); + void dump() const; }; //===--------------------------------------------------------------------===// @@ -838,10 +844,10 @@ struct BasicDIEUnit final : DIEUnit { /// DIELoc - Represents an expression location. // class DIELoc : public DIEValueList { - mutable unsigned Size; // Size in bytes excluding size header. + mutable unsigned Size = 0; // Size in bytes excluding size header. public: - DIELoc() : Size(0) {} + DIELoc() = default; /// ComputeSize - Calculate the size of the location expression. /// @@ -872,10 +878,10 @@ class DIELoc : public DIEValueList { /// DIEBlock - Represents a block of values. // class DIEBlock : public DIEValueList { - mutable unsigned Size; // Size in bytes excluding size header. + mutable unsigned Size = 0; // Size in bytes excluding size header. public: - DIEBlock() : Size(0) {} + DIEBlock() = default; /// ComputeSize - Calculate the size of the location expression. /// diff --git a/interpreter/llvm/src/include/llvm/CodeGen/ExecutionDepsFix.h b/interpreter/llvm/src/include/llvm/CodeGen/ExecutionDepsFix.h index 1d5b9684e1055..f4db8b7322dae 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/ExecutionDepsFix.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/ExecutionDepsFix.h @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-=// +//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -20,19 +20,30 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H #define LLVM_CODEGEN_EXECUTIONDEPSFIX_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include #include namespace llvm { +class MachineBasicBlock; +class MachineInstr; +class TargetInstrInfo; + /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track /// of execution domains. /// @@ -50,7 +61,7 @@ namespace llvm { /// domains. struct DomainValue { // Basic reference counting. - unsigned Refs; + unsigned Refs = 0; // Bitmask of available domains. For an open DomainValue, it is the still // possible domains for collapsing. For a collapsed DomainValue it is the @@ -65,6 +76,8 @@ struct DomainValue { // Twiddleable instructions using or defining these registers. SmallVector Instrs; + DomainValue() { clear(); } + // A collapsed DomainValue has no instructions to twiddle - it simply keeps // track of the domains where the registers are already available. bool isCollapsed() const { return Instrs.empty(); } @@ -97,8 +110,6 @@ struct DomainValue { return countTrailingZeros(AvailableDomains); } - DomainValue() : Refs(0) { clear(); } - // Clear this DomainValue and point to next which has all its data. void clear() { AvailableDomains = 0; @@ -136,29 +147,27 @@ class ExecutionDepsFix : public MachineFunctionPass { // Keeps clearance and domain information for all registers. Note that this // is different from the usual definition notion of liveness. The CPU // doesn't care whether or not we consider a register killed. - LiveReg *OutRegs; + LiveReg *OutRegs = nullptr; // Whether we have gotten to this block in primary processing yet. - bool PrimaryCompleted; + bool PrimaryCompleted = false; // The number of predecessors for which primary processing has completed - unsigned IncomingProcessed; + unsigned IncomingProcessed = 0; // The value of `IncomingProcessed` at the start of primary processing - unsigned PrimaryIncoming; + unsigned PrimaryIncoming = 0; // The number of predecessors for which all processing steps are done. - unsigned IncomingCompleted; + unsigned IncomingCompleted = 0; - MBBInfo() - : OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0), - PrimaryIncoming(0), IncomingCompleted(0) {} + MBBInfo() = default; }; - typedef DenseMap MBBInfoMap; + using MBBInfoMap = DenseMap; MBBInfoMap MBBInfos; /// List of undefined register reads in this block in forward order. - std::vector > UndefReads; + std::vector> UndefReads; /// Storage for register unit liveness. LivePhysRegs LiveRegSet; @@ -166,6 +175,7 @@ class ExecutionDepsFix : public MachineFunctionPass { /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; + public: ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} @@ -217,4 +227,4 @@ class ExecutionDepsFix : public MachineFunctionPass { } // end namepsace llvm -#endif +#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/FastISel.h b/interpreter/llvm/src/include/llvm/CodeGen/FastISel.h index 57fa0c73d2722..74e4179e73e98 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/FastISel.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/FastISel.h @@ -17,11 +17,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" @@ -30,19 +31,43 @@ #include #include #include -#include namespace llvm { +class AllocaInst; +class BasicBlock; +class CallInst; +class Constant; +class ConstantFP; +class DataLayout; +class FunctionLoweringInfo; +class LoadInst; class MachineConstantPool; +class MachineFrameInfo; +class MachineFunction; +class MachineInstr; +class MachineMemOperand; +class MachineOperand; +class MachineRegisterInfo; +class MCContext; +class MCInstrDesc; +class MCSymbol; +class TargetInstrInfo; +class TargetLibraryInfo; +class TargetMachine; +class TargetRegisterClass; +class TargetRegisterInfo; +class Type; +class User; +class Value; /// \brief This is a fast-path instruction selection class that generates poor /// code and doesn't support illegal types or non-trivial lowering, but runs /// quickly. class FastISel { public: - typedef TargetLoweringBase::ArgListEntry ArgListEntry; - typedef TargetLoweringBase::ArgListTy ArgListTy; + using ArgListEntry = TargetLoweringBase::ArgListEntry; + using ArgListTy = TargetLoweringBase::ArgListTy; struct CallLoweringInfo { Type *RetTy = nullptr; bool RetSExt : 1; @@ -202,6 +227,8 @@ class FastISel { MachineInstr *EmitStartPt; public: + virtual ~FastISel(); + /// \brief Return the position of the last instruction emitted for /// materializing constants for use in the current block. MachineInstr *getLastLocalValue() { return LastLocalValue; } @@ -293,8 +320,6 @@ class FastISel { /// \brief Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); - virtual ~FastISel(); - protected: explicit FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, @@ -334,7 +359,7 @@ class FastISel { /// \brief This method is called by target-independent code to request that an /// instruction with the given type, opcode, and register and immediate - // operands be emitted. + /// operands be emitted. virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm); diff --git a/interpreter/llvm/src/include/llvm/CodeGen/FaultMaps.h b/interpreter/llvm/src/include/llvm/CodeGen/FaultMaps.h index 0f0005b83c543..98ff526dfe946 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/FaultMaps.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/FaultMaps.h @@ -56,7 +56,7 @@ class FaultMaps { HandlerOffsetExpr(HandlerOffset) {} }; - typedef std::vector FunctionFaultInfos; + using FunctionFaultInfos = std::vector; // We'd like to keep a stable iteration order for FunctionInfos to help // FileCheck based testing. @@ -78,20 +78,17 @@ class FaultMaps { /// generated by the version of LLVM that includes it. No guarantees are made /// with respect to forward or backward compatibility. class FaultMapParser { - typedef uint8_t FaultMapVersionType; - static const size_t FaultMapVersionOffset = 0; + using FaultMapVersionType = uint8_t; + using Reserved0Type = uint8_t; + using Reserved1Type = uint16_t; + using NumFunctionsType = uint32_t; - typedef uint8_t Reserved0Type; + static const size_t FaultMapVersionOffset = 0; static const size_t Reserved0Offset = FaultMapVersionOffset + sizeof(FaultMapVersionType); - - typedef uint16_t Reserved1Type; static const size_t Reserved1Offset = Reserved0Offset + sizeof(Reserved0Type); - - typedef uint32_t NumFunctionsType; static const size_t NumFunctionsOffset = Reserved1Offset + sizeof(Reserved1Type); - static const size_t FunctionInfosOffset = NumFunctionsOffset + sizeof(NumFunctionsType); @@ -105,14 +102,13 @@ class FaultMapParser { public: class FunctionFaultInfoAccessor { - typedef uint32_t FaultKindType; - static const size_t FaultKindOffset = 0; + using FaultKindType = uint32_t; + using FaultingPCOffsetType = uint32_t; + using HandlerPCOffsetType = uint32_t; - typedef uint32_t FaultingPCOffsetType; + static const size_t FaultKindOffset = 0; static const size_t FaultingPCOffsetOffset = FaultKindOffset + sizeof(FaultKindType); - - typedef uint32_t HandlerPCOffsetType; static const size_t HandlerPCOffsetOffset = FaultingPCOffsetOffset + sizeof(FaultingPCOffsetType); @@ -140,20 +136,17 @@ class FaultMapParser { }; class FunctionInfoAccessor { - typedef uint64_t FunctionAddrType; - static const size_t FunctionAddrOffset = 0; + using FunctionAddrType = uint64_t; + using NumFaultingPCsType = uint32_t; + using ReservedType = uint32_t; - typedef uint32_t NumFaultingPCsType; + static const size_t FunctionAddrOffset = 0; static const size_t NumFaultingPCsOffset = FunctionAddrOffset + sizeof(FunctionAddrType); - - typedef uint32_t ReservedType; static const size_t ReservedOffset = NumFaultingPCsOffset + sizeof(NumFaultingPCsType); - static const size_t FunctionFaultInfosOffset = ReservedOffset + sizeof(ReservedType); - static const size_t FunctionInfoHeaderSize = FunctionFaultInfosOffset; const uint8_t *P = nullptr; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/FunctionLoweringInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/FunctionLoweringInfo.h index e7544bd7b70c8..f32a58915118f 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -1,4 +1,4 @@ -//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// +//===- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen ---===// // // The LLVM Compiler Infrastructure // @@ -23,29 +23,28 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Support/KnownBits.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include #include namespace llvm { -class AllocaInst; +class Argument; class BasicBlock; class BranchProbabilityInfo; class Function; -class GlobalVariable; class Instruction; -class MachineInstr; -class MachineBasicBlock; class MachineFunction; -class MachineModuleInfo; +class MachineInstr; class MachineRegisterInfo; -class SelectionDAG; class MVT; +class SelectionDAG; class TargetLowering; -class Value; //===--------------------------------------------------------------------===// /// FunctionLoweringInfo - This contains information that is global to a @@ -74,25 +73,29 @@ class FunctionLoweringInfo { /// A map from swifterror value in a basic block to the virtual register it is /// currently represented by. - llvm::DenseMap, unsigned> + DenseMap, unsigned> SwiftErrorVRegDefMap; /// A list of upward exposed vreg uses that need to be satisfied by either a /// copy def or a phi node at the beginning of the basic block representing /// the predecessor(s) swifterror value. - llvm::DenseMap, unsigned> + DenseMap, unsigned> SwiftErrorVRegUpwardsUse; + /// A map from instructions that define/use a swifterror value to the virtual + /// register that represents that def/use. + llvm::DenseMap, unsigned> + SwiftErrorVRegDefUses; + /// The swifterror argument of the current function. const Value *SwiftErrorArg; - typedef SmallVector SwiftErrorValues; + using SwiftErrorValues = SmallVector; /// A function can only have a single swifterror argument. And if it does /// have a swifterror argument, it must be the first entry in /// SwiftErrorVals. SwiftErrorValues SwiftErrorVals; - /// Get or create the swifterror value virtual register in /// SwiftErrorVRegDefMap for this basic block. unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *, @@ -103,6 +106,13 @@ class FunctionLoweringInfo { void setCurrentSwiftErrorVReg(const MachineBasicBlock *MBB, const Value *, unsigned); + /// Get or create the swifterror value virtual register for a def of a + /// swifterror by an instruction. + std::pair getOrCreateSwiftErrorVRegDefAt(const Instruction *); + std::pair + getOrCreateSwiftErrorVRegUseAt(const Instruction *, const MachineBasicBlock *, + const Value *); + /// ValueMap - Since we emit code for the function a basic block at a time, /// we must remember which virtual registers hold the values for /// cross-basic-block values. @@ -118,7 +128,7 @@ class FunctionLoweringInfo { /// slot), and we track that here. struct StatepointSpillMap { - typedef DenseMap> SlotMapTy; + using SlotMapTy = DenseMap>; /// Maps uniqued llvm IR values to the slots they were spilled in. If a /// value is mapped to None it means we visited the value but didn't spill @@ -172,8 +182,9 @@ class FunctionLoweringInfo { struct LiveOutInfo { unsigned NumSignBits : 31; unsigned IsValid : 1; - KnownBits Known; - LiveOutInfo() : NumSignBits(0), IsValid(true), Known(1) {} + KnownBits Known = 1; + + LiveOutInfo() : NumSignBits(0), IsValid(true) {} }; /// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND) @@ -298,4 +309,4 @@ class FunctionLoweringInfo { } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GCMetadata.h b/interpreter/llvm/src/include/llvm/CodeGen/GCMetadata.h index e6afcbc8ded28..ad2599fc120e5 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GCMetadata.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GCMetadata.h @@ -1,4 +1,4 @@ -//===-- GCMetadata.h - Garbage collector metadata ---------------*- C++ -*-===// +//===- GCMetadata.h - Garbage collector metadata ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -36,15 +36,20 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" +#include +#include +#include #include -#include +#include namespace llvm { -class AsmPrinter; + class Constant; +class Function; class MCSymbol; /// GCPoint - Metadata for a collector-safe point in machine code. @@ -62,20 +67,20 @@ struct GCPoint { /// collector. struct GCRoot { int Num; ///< Usually a frame index. - int StackOffset; ///< Offset from the stack pointer. + int StackOffset = -1; ///< Offset from the stack pointer. const Constant *Metadata; ///< Metadata straight from the call ///< to llvm.gcroot. - GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {} + GCRoot(int N, const Constant *MD) : Num(N), Metadata(MD) {} }; /// Garbage collection metadata for a single function. Currently, this /// information only applies to GCStrategies which use GCRoot. class GCFunctionInfo { public: - typedef std::vector::iterator iterator; - typedef std::vector::iterator roots_iterator; - typedef std::vector::const_iterator live_iterator; + using iterator = std::vector::iterator; + using roots_iterator = std::vector::iterator; + using live_iterator = std::vector::const_iterator; private: const Function &F; @@ -99,11 +104,9 @@ class GCFunctionInfo { ~GCFunctionInfo(); /// getFunction - Return the function to which this metadata applies. - /// const Function &getFunction() const { return F; } /// getStrategy - Return the GC strategy for the function. - /// GCStrategy &getStrategy() { return S; } /// addStackRoot - Registers a root that lives on the stack. Num is the @@ -126,24 +129,20 @@ class GCFunctionInfo { } /// getFrameSize/setFrameSize - Records the function's frame size. - /// uint64_t getFrameSize() const { return FrameSize; } void setFrameSize(uint64_t S) { FrameSize = S; } /// begin/end - Iterators for safe points. - /// iterator begin() { return SafePoints.begin(); } iterator end() { return SafePoints.end(); } size_t size() const { return SafePoints.size(); } /// roots_begin/roots_end - Iterators for all roots in the function. - /// roots_iterator roots_begin() { return Roots.begin(); } roots_iterator roots_end() { return Roots.end(); } size_t roots_size() const { return Roots.size(); } /// live_begin/live_end - Iterators for live roots at a given safe point. - /// live_iterator live_begin(const iterator &p) { return roots_begin(); } live_iterator live_end(const iterator &p) { return roots_end(); } size_t live_size(const iterator &p) const { return roots_size(); } @@ -166,7 +165,7 @@ class GCModuleInfo : public ImmutablePass { /// List of per function info objects. In theory, Each of these /// may be associated with a different GC. - typedef std::vector> FuncInfoVec; + using FuncInfoVec = std::vector>; FuncInfoVec::iterator funcinfo_begin() { return Functions.begin(); } FuncInfoVec::iterator funcinfo_end() { return Functions.end(); } @@ -177,11 +176,11 @@ class GCModuleInfo : public ImmutablePass { /// Non-owning map to bypass linear search when finding the GCFunctionInfo /// associated with a particular Function. - typedef DenseMap finfo_map_type; + using finfo_map_type = DenseMap; finfo_map_type FInfoMap; public: - typedef SmallVector,1>::const_iterator iterator; + using iterator = SmallVector, 1>::const_iterator; static char ID; @@ -202,6 +201,7 @@ class GCModuleInfo : public ImmutablePass { /// will soon change. GCFunctionInfo &getFunctionInfo(const Function &F); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GCMETADATA_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GCMetadataPrinter.h b/interpreter/llvm/src/include/llvm/CodeGen/GCMetadataPrinter.h index 2208470291138..1cc69a7b71af5 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GCMetadataPrinter.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GCMetadataPrinter.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables -*- C++ -*-===// +//===- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,45 +20,48 @@ #ifndef LLVM_CODEGEN_GCMETADATAPRINTER_H #define LLVM_CODEGEN_GCMETADATAPRINTER_H -#include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/Support/Registry.h" namespace llvm { +class AsmPrinter; class GCMetadataPrinter; +class GCModuleInfo; +class GCStrategy; +class Module; /// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the /// defaults from Registry. -typedef Registry GCMetadataPrinterRegistry; +using GCMetadataPrinterRegistry = Registry; /// GCMetadataPrinter - Emits GC metadata as assembly code. Instances are /// created, managed, and owned by the AsmPrinter. class GCMetadataPrinter { private: - GCStrategy *S; friend class AsmPrinter; + GCStrategy *S; + protected: // May only be subclassed. GCMetadataPrinter(); -private: +public: GCMetadataPrinter(const GCMetadataPrinter &) = delete; GCMetadataPrinter &operator=(const GCMetadataPrinter &) = delete; + virtual ~GCMetadataPrinter(); -public: GCStrategy &getStrategy() { return *S; } /// Called before the assembly for the module is generated by /// the AsmPrinter (but after target specific hooks.) virtual void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {} + /// Called after the assembly for the module is generated by /// the AsmPrinter (but before target specific hooks) virtual void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {} - - virtual ~GCMetadataPrinter(); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GCMETADATAPRINTER_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GCStrategy.h b/interpreter/llvm/src/include/llvm/CodeGen/GCStrategy.h index 5b1fafea25b57..16168e785f812 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GCStrategy.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GCStrategy.h @@ -174,7 +174,7 @@ class GCStrategy { /// Note that to use a custom GCMetadataPrinter w/gc.roots, you must also /// register your GCMetadataPrinter subclass with the /// GCMetadataPrinterRegistery as well. -typedef Registry GCRegistry; +using GCRegistry = Registry; } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/CallLowering.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/CallLowering.h index 3e9a9d514cb87..e7ce1946889e3 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GlobalISel/CallLowering.h - Call lowering --*- C++ -*-===// +//===- llvm/CodeGen/GlobalISel/CallLowering.h - Call lowering ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,21 +15,31 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H #define LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetCallingConv.h" +#include +#include namespace llvm { -// Forward declarations. + +class DataLayout; +class Function; class MachineIRBuilder; class MachineOperand; +struct MachinePointerInfo; +class MachineRegisterInfo; class TargetLowering; +class Type; class Value; class CallLowering { const TargetLowering *TLI; + public: struct ArgInfo { unsigned Reg; @@ -49,6 +59,12 @@ class CallLowering { /// arugment should go, exactly what happens can vary slightly. This /// class abstracts the differences. struct ValueHandler { + ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn) {} + + virtual ~ValueHandler() = default; + /// Materialize a VReg containing the address of the specified /// stack-based object. This is either based on a FrameIndex or /// direct SP manipulation, depending on the context. \p MPO @@ -89,12 +105,6 @@ class CallLowering { return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); } - ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - CCAssignFn *AssignFn) - : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn) {} - - virtual ~ValueHandler() {} - MachineIRBuilder &MIRBuilder; MachineRegisterInfo &MRI; CCAssignFn *AssignFn; @@ -112,7 +122,6 @@ class CallLowering { return static_cast(TLI); } - template void setArgFlags(ArgInfo &Arg, unsigned OpNum, const DataLayout &DL, const FuncInfoTy &FuncInfo) const; @@ -126,7 +135,7 @@ class CallLowering { public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} - virtual ~CallLowering() {} + virtual ~CallLowering() = default; /// This hook must be implemented to lower outgoing return values, described /// by \p Val, into the specified virtual register \p VReg. @@ -200,6 +209,7 @@ class CallLowering { unsigned ResReg, ArrayRef ArgRegs, std::function GetCalleeReg) const; }; -} // End namespace llvm. -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/IRTranslator.h index e292e8913db06..7061c014d9b7f 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GlobalISel/IRTranslator.h - IRTranslator ---*- C++ -*-===// +//===- llvm/CodeGen/GlobalISel/IRTranslator.h - IRTranslator ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,24 +19,33 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H #define LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H -#include "Types.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Types.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/Intrinsics.h" +#include +#include namespace llvm { -// Forward declarations. + +class AllocaInst; class BasicBlock; +class CallInst; class CallLowering; class Constant; +class DataLayout; class Instruction; class MachineBasicBlock; class MachineFunction; class MachineInstr; -class OptimizationRemarkEmitter; class MachineRegisterInfo; +class OptimizationRemarkEmitter; +class PHINode; class TargetPassConfig; +class User; +class Value; // Technically the pass should run on an hypothetical MachineModule, // since it should translate Global into some sort of MachineGlobal. @@ -53,6 +62,7 @@ class IRTranslator : public MachineFunctionPass { private: /// Interface used to lower the everything related to calls. const CallLowering *CLI; + /// Mapping of the values of the current LLVM IR function /// to the related virtual registers. ValueToVReg ValToVReg; @@ -67,7 +77,7 @@ class IRTranslator : public MachineFunctionPass { // a mapping between the edges arriving at the BasicBlock to the corresponding // created MachineBasicBlocks. Some BasicBlocks that get translated to a // single MachineBasicBlock may also end up in this Map. - typedef std::pair CFGEdge; + using CFGEdge = std::pair; DenseMap> MachinePreds; // List of stubbed PHI instructions, for values and basic blocks to be filled @@ -165,7 +175,6 @@ class IRTranslator : public MachineFunctionPass { return translateCompare(U, MIRBuilder); } - /// Add remaining operands onto phis we've translated. Executed after all /// MachineBasicBlocks for the function have been created. void finishPendingPhis(); @@ -356,7 +365,7 @@ class IRTranslator : public MachineFunctionPass { MachineFunction *MF; /// MachineRegisterInfo used to create virtual registers. - MachineRegisterInfo *MRI; + MachineRegisterInfo *MRI = nullptr; const DataLayout *DL; @@ -430,5 +439,6 @@ class IRTranslator : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; }; -} // End namespace llvm. -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 45f25f96ec1ff..1060d8fd667e6 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -1,4 +1,4 @@ -//==-- llvm/CodeGen/GlobalISel/InstructionSelector.h -------------*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/InstructionSelector.h ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,26 +16,32 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H -#include "llvm/ADT/Optional.h" -#include +#include "llvm/ADT/SmallVector.h" #include +#include +#include #include +#include +#include namespace llvm { + +class LLT; class MachineInstr; class MachineInstrBuilder; -class MachineFunction; class MachineOperand; class MachineRegisterInfo; class RegisterBankInfo; class TargetInstrInfo; +class TargetRegisterClass; class TargetRegisterInfo; /// Container class for CodeGen predicate results. /// This is convenient because std::bitset does not have a constructor /// with an initializer list of set bits. /// -/// Each InstructionSelector subclass should define a PredicateBitset class with: +/// Each InstructionSelector subclass should define a PredicateBitset class +/// with: /// const unsigned MAX_SUBTARGET_PREDICATES = 192; /// using PredicateBitset = PredicateBitsetImpl; /// and updating the constant to suit the target. Tablegen provides a suitable @@ -56,10 +62,136 @@ class PredicateBitsetImpl : public std::bitset { } }; +enum { + /// Record the specified instruction + /// - NewInsnID - Instruction ID to define + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_RecordInsn, + + /// Check the feature bits + /// - Expected features + GIM_CheckFeatures, + + /// Check the opcode on the specified instruction + /// - InsnID - Instruction ID + /// - Expected opcode + GIM_CheckOpcode, + /// Check the instruction has the right number of operands + /// - InsnID - Instruction ID + /// - Expected number of operands + GIM_CheckNumOperands, + + /// Check the type for the specified operand + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected type + GIM_CheckType, + /// Check the register bank for the specified operand + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected register bank (specified as a register class) + GIM_CheckRegBankForClass, + /// Check the operand matches a complex predicate + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - RendererID - The renderer to hold the result + /// - Complex predicate ID + GIM_CheckComplexPattern, + /// Check the operand is a specific integer + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected integer + GIM_CheckConstantInt, + /// Check the operand is a specific literal integer (i.e. MO.isImm() or + /// MO.isCImm() is true). + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected integer + GIM_CheckLiteralInt, + /// Check the operand is a specific intrinsic ID + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected Intrinsic ID + GIM_CheckIntrinsicID, + /// Check the specified operand is an MBB + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_CheckIsMBB, + + /// Check if the specified operand is safe to fold into the current + /// instruction. + /// - InsnID - Instruction ID + GIM_CheckIsSafeToFold, + + //=== Renderers === + + /// Mutate an instruction + /// - NewInsnID - Instruction ID to define + /// - OldInsnID - Instruction ID to mutate + /// - NewOpcode - The new opcode to use + GIR_MutateOpcode, + /// Build a new instruction + /// - InsnID - Instruction ID to define + /// - Opcode - The new opcode to use + GIR_BuildMI, + + /// Copy an operand to the specified instruction + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// - OpIdx - The operand to copy + GIR_Copy, + /// Copy an operand to the specified instruction + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// - OpIdx - The operand to copy + /// - SubRegIdx - The subregister to copy + GIR_CopySubReg, + /// Add an implicit register def to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RegNum - The register to add + GIR_AddImplicitDef, + /// Add an implicit register use to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RegNum - The register to add + GIR_AddImplicitUse, + /// Add an register to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RegNum - The register to add + GIR_AddRegister, + /// Add an immediate to the specified instruction + /// - InsnID - Instruction ID to modify + /// - Imm - The immediate to add + GIR_AddImm, + /// Render complex operands to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RendererID - The renderer to call + GIR_ComplexRenderer, + + /// Constrain an instruction operand to a register class. + /// - InsnID - Instruction ID to modify + /// - OpIdx - Operand index + /// - RCEnum - Register class enumeration value + GIR_ConstrainOperandRC, + /// Constrain an instructions operands according to the instruction + /// description. + /// - InsnID - Instruction ID to modify + GIR_ConstrainSelectedInstOperands, + /// Merge all memory operands into instruction. + /// - InsnID - Instruction ID to modify + GIR_MergeMemOperands, + /// Erase from parent. + /// - InsnID - Instruction ID to erase + GIR_EraseFromParent, + + /// A successful emission + GIR_Done, +}; + /// Provides the logic to select generic machine instructions. class InstructionSelector { public: - virtual ~InstructionSelector() {} + virtual ~InstructionSelector() = default; /// Select the (possibly generic) instruction \p I to only use target-specific /// opcodes. It is OK to insert multiple instructions, but they cannot be @@ -75,10 +207,50 @@ class InstructionSelector { virtual bool select(MachineInstr &I) const = 0; protected: - typedef std::function ComplexRendererFn; + using ComplexRendererFn = std::function; + using RecordedMIVector = SmallVector; + using NewMIVector = SmallVector; + struct MatcherState { + std::vector Renderers; + RecordedMIVector MIs; + + MatcherState(unsigned MaxRenderers); + }; + +public: + template + struct MatcherInfoTy { + const LLT *TypeObjects; + const PredicateBitset *FeatureBitsets; + const std::vector ComplexPredicates; + }; + +protected: InstructionSelector(); + /// Execute a given matcher table and return true if the match was successful + /// and false otherwise. + template + bool executeMatchTable( + TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, + const MatcherInfoTy &MatcherInfo, + const int64_t *MatchTable, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI, + const PredicateBitset &AvailableFeatures) const; + + /// Constrain a register operand of an instruction \p I to a specified + /// register class. This could involve inserting COPYs before (for uses) or + /// after (for defs) and may replace the operand of \p I. + /// \returns whether operand regclass constraining succeeded. + bool constrainOperandRegToRegClass(MachineInstr &I, unsigned OpIdx, + const TargetRegisterClass &RC, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const; + /// Mutate the newly-selected instruction \p I to constrain its (possibly /// generic) virtual register operands to the instruction's register class. /// This could involve inserting COPYs before (for uses) or after (for defs). @@ -99,6 +271,6 @@ class InstructionSelector { bool isObviouslySafeToFold(MachineInstr &MI) const; }; -} // End namespace llvm. +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h new file mode 100644 index 0000000000000..98b6b859b9e26 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -0,0 +1,337 @@ +//==-- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h ---------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file declares the API for the instruction selector. +/// This class is responsible for selecting machine instructions. +/// It's implemented by the target. It's used by the InstructionSelect pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H +#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H + +namespace llvm { +template +bool InstructionSelector::executeMatchTable( + TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, + const MatcherInfoTy &MatcherInfo, + const int64_t *MatchTable, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI, + const PredicateBitset &AvailableFeatures) const { + const int64_t *Command = MatchTable; + while (true) { + switch (*Command++) { + case GIM_RecordInsn: { + int64_t NewInsnID = *Command++; + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + + // As an optimisation we require that MIs[0] is always the root. Refuse + // any attempt to modify it. + assert(NewInsnID != 0 && "Refusing to modify MIs[0]"); + (void)NewInsnID; + + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isReg()) { + DEBUG(dbgs() << "Rejected (not a register)\n"); + return false; + } + if (TRI.isPhysicalRegister(MO.getReg())) { + DEBUG(dbgs() << "Rejected (is a physical register)\n"); + return false; + } + + assert((size_t)NewInsnID == State.MIs.size() && + "Expected to store MIs in order"); + State.MIs.push_back(MRI.getVRegDef(MO.getReg())); + DEBUG(dbgs() << "MIs[" << NewInsnID << "] = GIM_RecordInsn(" << InsnID + << ", " << OpIdx << ")\n"); + break; + } + + case GIM_CheckFeatures: { + int64_t ExpectedBitsetID = *Command++; + DEBUG(dbgs() << "GIM_CheckFeatures(ExpectedBitsetID=" << ExpectedBitsetID + << ")\n"); + if ((AvailableFeatures & MatcherInfo.FeatureBitsets[ExpectedBitsetID]) != + MatcherInfo.FeatureBitsets[ExpectedBitsetID]) { + DEBUG(dbgs() << "Rejected\n"); + return false; + } + break; + } + + case GIM_CheckOpcode: { + int64_t InsnID = *Command++; + int64_t Expected = *Command++; + + unsigned Opcode = State.MIs[InsnID]->getOpcode(); + DEBUG(dbgs() << "GIM_CheckOpcode(MIs[" << InsnID << "], ExpectedOpcode=" + << Expected << ") // Got=" << Opcode << "\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (Opcode != Expected) + return false; + break; + } + case GIM_CheckNumOperands: { + int64_t InsnID = *Command++; + int64_t Expected = *Command++; + DEBUG(dbgs() << "GIM_CheckNumOperands(MIs[" << InsnID + << "], Expected=" << Expected << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (State.MIs[InsnID]->getNumOperands() != Expected) + return false; + break; + } + + case GIM_CheckType: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t TypeID = *Command++; + DEBUG(dbgs() << "GIM_CheckType(MIs[" << InsnID << "]->getOperand(" + << OpIdx << "), TypeID=" << TypeID << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()) != + MatcherInfo.TypeObjects[TypeID]) + return false; + break; + } + case GIM_CheckRegBankForClass: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t RCEnum = *Command++; + DEBUG(dbgs() << "GIM_CheckRegBankForClass(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "), RCEnum=" << RCEnum + << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (&RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) != + RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI, TRI)) + return false; + break; + } + case GIM_CheckComplexPattern: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t RendererID = *Command++; + int64_t ComplexPredicateID = *Command++; + DEBUG(dbgs() << "State.Renderers[" << RendererID + << "] = GIM_CheckComplexPattern(MIs[" << InsnID + << "]->getOperand(" << OpIdx + << "), ComplexPredicateID=" << ComplexPredicateID << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + // FIXME: Use std::invoke() when it's available. + if (!(State.Renderers[RendererID] = + (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( + State.MIs[InsnID]->getOperand(OpIdx)))) + return false; + break; + } + case GIM_CheckConstantInt: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t Value = *Command++; + DEBUG(dbgs() << "GIM_CheckConstantInt(MIs[" << InsnID << "]->getOperand(" + << OpIdx << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value, MRI)) + return false; + break; + } + case GIM_CheckLiteralInt: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t Value = *Command++; + DEBUG(dbgs() << "GIM_CheckLiteralInt(MIs[" << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); + if (!OM.isCImm() || !OM.getCImm()->equalsInt(Value)) + return false; + break; + } + case GIM_CheckIntrinsicID: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t Value = *Command++; + DEBUG(dbgs() << "GIM_CheckIntrinsicID(MIs[" << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); + if (!OM.isIntrinsicID() || OM.getIntrinsicID() != Value) + return false; + break; + } + case GIM_CheckIsMBB: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + DEBUG(dbgs() << "GIM_CheckIsMBB(MIs[" << InsnID << "]->getOperand(" + << OpIdx << "))\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isMBB()) + return false; + break; + } + + case GIM_CheckIsSafeToFold: { + int64_t InsnID = *Command++; + DEBUG(dbgs() << "GIM_CheckIsSafeToFold(MIs[" << InsnID << "])\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!isObviouslySafeToFold(*State.MIs[InsnID])) + return false; + break; + } + + case GIR_MutateOpcode: { + int64_t OldInsnID = *Command++; + int64_t NewInsnID = *Command++; + int64_t NewOpcode = *Command++; + assert((size_t)NewInsnID == OutMIs.size() && + "Expected to store MIs in order"); + OutMIs.push_back( + MachineInstrBuilder(*State.MIs[OldInsnID]->getParent()->getParent(), + State.MIs[OldInsnID])); + OutMIs[NewInsnID]->setDesc(TII.get(NewOpcode)); + DEBUG(dbgs() << "GIR_MutateOpcode(OutMIs[" << NewInsnID << "], MIs[" + << OldInsnID << "], " << NewOpcode << ")\n"); + break; + } + case GIR_BuildMI: { + int64_t InsnID = *Command++; + int64_t Opcode = *Command++; + assert((size_t)InsnID == OutMIs.size() && + "Expected to store MIs in order"); + (void)InsnID; + OutMIs.push_back(BuildMI(*State.MIs[0]->getParent(), State.MIs[0], + State.MIs[0]->getDebugLoc(), TII.get(Opcode))); + DEBUG(dbgs() << "GIR_BuildMI(OutMIs[" << InsnID << "], " << Opcode + << ")\n"); + break; + } + + case GIR_Copy: { + int64_t NewInsnID = *Command++; + int64_t OldInsnID = *Command++; + int64_t OpIdx = *Command++; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(OpIdx)); + DEBUG(dbgs() << "GIR_Copy(OutMIs[" << NewInsnID << "], MIs[" << OldInsnID + << "], " << OpIdx << ")\n"); + break; + } + case GIR_CopySubReg: { + int64_t NewInsnID = *Command++; + int64_t OldInsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t SubRegIdx = *Command++; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + OutMIs[NewInsnID].addReg(State.MIs[OldInsnID]->getOperand(OpIdx).getReg(), + 0, SubRegIdx); + DEBUG(dbgs() << "GIR_CopySubReg(OutMIs[" << NewInsnID << "], MIs[" + << OldInsnID << "], " << OpIdx << ", " << SubRegIdx + << ")\n"); + break; + } + case GIR_AddImplicitDef: { + int64_t InsnID = *Command++; + int64_t RegNum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addDef(RegNum, RegState::Implicit); + DEBUG(dbgs() << "GIR_AddImplicitDef(OutMIs[" << InsnID << "], " << RegNum + << ")\n"); + break; + } + case GIR_AddImplicitUse: { + int64_t InsnID = *Command++; + int64_t RegNum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addUse(RegNum, RegState::Implicit); + DEBUG(dbgs() << "GIR_AddImplicitUse(OutMIs[" << InsnID << "], " << RegNum + << ")\n"); + break; + } + case GIR_AddRegister: { + int64_t InsnID = *Command++; + int64_t RegNum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addReg(RegNum); + DEBUG(dbgs() << "GIR_AddRegister(OutMIs[" << InsnID << "], " << RegNum + << ")\n"); + break; + } + case GIR_AddImm: { + int64_t InsnID = *Command++; + int64_t Imm = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addImm(Imm); + DEBUG(dbgs() << "GIR_AddImm(OutMIs[" << InsnID << "], " << Imm << ")\n"); + break; + } + case GIR_ComplexRenderer: { + int64_t InsnID = *Command++; + int64_t RendererID = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + State.Renderers[RendererID](OutMIs[InsnID]); + DEBUG(dbgs() << "GIR_ComplexRenderer(OutMIs[" << InsnID << "], " + << RendererID << ")\n"); + break; + } + + case GIR_ConstrainOperandRC: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t RCEnum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + constrainOperandRegToRegClass(*OutMIs[InsnID].getInstr(), OpIdx, + *TRI.getRegClass(RCEnum), TII, TRI, RBI); + DEBUG(dbgs() << "GIR_ConstrainOperandRC(OutMIs[" << InsnID << "], " + << OpIdx << ", " << RCEnum << ")\n"); + break; + } + case GIR_ConstrainSelectedInstOperands: { + int64_t InsnID = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + constrainSelectedInstRegOperands(*OutMIs[InsnID].getInstr(), TII, TRI, + RBI); + DEBUG(dbgs() << "GIR_ConstrainSelectedInstOperands(OutMIs[" << InsnID + << "])\n"); + break; + } + case GIR_MergeMemOperands: { + int64_t InsnID = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + for (const auto *FromMI : State.MIs) + for (const auto &MMO : FromMI->memoperands()) + OutMIs[InsnID].addMemOperand(MMO); + DEBUG(dbgs() << "GIR_MergeMemOperands(OutMIs[" << InsnID << "])\n"); + break; + } + case GIR_EraseFromParent: { + int64_t InsnID = *Command++; + assert(State.MIs[InsnID] && + "Attempted to erase an undefined instruction"); + State.MIs[InsnID]->eraseFromParent(); + DEBUG(dbgs() << "GIR_EraseFromParent(MIs[" << InsnID << "])\n"); + break; + } + + case GIR_Done: + DEBUG(dbgs() << "GIR_Done"); + return true; + + default: + llvm_unreachable("Unexpected command"); + } + } +} + +} // end namespace llvm + +#endif // LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Legalizer.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Legalizer.h index bed7230cc013b..9b9b8b563a30e 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Legalizer.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Legalizer.h @@ -59,7 +59,7 @@ class Legalizer : public MachineFunctionPass { const TargetInstrInfo &TII); bool combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII); + const TargetInstrInfo &TII, MachineIRBuilder &MIRBuilder); bool runOnMachineFunction(MachineFunction &MF) override; }; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 8fecafdc08d0e..1fd45b52e3ac7 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -21,9 +21,11 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H #define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H +#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" namespace llvm { // Forward declarations. @@ -99,6 +101,12 @@ class LegalizerHelper { const LegalizerInfo &LI; }; +/// Helper function that creates the given libcall. +LegalizerHelper::LegalizeResult +createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef Args); + } // End namespace llvm. #endif diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 21354ae20ed14..c259e93fdd366 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -1,4 +1,4 @@ -//==-- llvm/CodeGen/GlobalISel/LegalizerInfo.h -------------------*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/LegalizerInfo.h ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,33 +12,36 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZER_H -#define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZER_H +#ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H +#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H #include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Target/TargetOpcodes.h" - #include -#include +#include +#include +#include namespace llvm { -class LLVMContext; + class MachineInstr; class MachineIRBuilder; class MachineRegisterInfo; -class Type; -class VectorType; /// Legalization is decided based on an instruction's opcode, which type slot /// we're considering, and what the existing type is. These aspects are gathered /// together for convenience in the InstrAspect class. struct InstrAspect { unsigned Opcode; - unsigned Idx; + unsigned Idx = 0; LLT Type; - InstrAspect(unsigned Opcode, LLT Type) : Opcode(Opcode), Idx(0), Type(Type) {} + InstrAspect(unsigned Opcode, LLT Type) : Opcode(Opcode), Type(Type) {} InstrAspect(unsigned Opcode, unsigned Idx, LLT Type) : Opcode(Opcode), Idx(Idx), Type(Type) {} @@ -104,6 +107,19 @@ class LegalizerInfo { /// before any query is made or incorrect results may be returned. void computeTables(); + static bool needsLegalizingToDifferentSize(const LegalizeAction Action) { + switch (Action) { + case NarrowScalar: + case WidenScalar: + case FewerElements: + case MoreElements: + case Unsupported: + return true; + default: + return false; + } + } + /// More friendly way to set an action for common types that have an LLT /// representation. void setAction(const InstrAspect &Aspect, LegalizeAction Action) { @@ -125,7 +141,6 @@ class LegalizerInfo { ScalarInVectorActions[std::make_pair(Opcode, ScalarTy)] = Action; } - /// Determine what action should be taken to legalize the given generic /// instruction opcode, type-index and type. Requires computeTables to have /// been called. @@ -145,8 +160,8 @@ class LegalizerInfo { /// Iterate the given function (typically something like doubling the width) /// on Ty until we find a legal type for this operation. - Optional findLegalType(const InstrAspect &Aspect, - function_ref NextType) const { + Optional findLegalizableSize(const InstrAspect &Aspect, + function_ref NextType) const { LegalizeAction Action; const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx]; LLT Ty = Aspect.Type; @@ -158,10 +173,9 @@ class LegalizerInfo { if (DefaultIt == DefaultActions.end()) return None; Action = DefaultIt->second; - } - else + } else Action = ActionIt->second; - } while(Action != Legal); + } while (needsLegalizingToDifferentSize(Action)); return Ty; } @@ -203,18 +217,17 @@ class LegalizerInfo { static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START; static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; - typedef DenseMap TypeMap; - typedef DenseMap, LegalizeAction> SIVActionMap; + using TypeMap = DenseMap; + using SIVActionMap = DenseMap, LegalizeAction>; SmallVector Actions[LastOp - FirstOp + 1]; SIVActionMap ScalarInVectorActions; DenseMap, uint16_t> MaxLegalVectorElts; DenseMap DefaultActions; - bool TablesInitialized; + bool TablesInitialized = false; }; +} // end namespace llvm -} // End namespace llvm. - -#endif +#endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Localizer.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Localizer.h new file mode 100644 index 0000000000000..0a46eb9e7840d --- /dev/null +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -0,0 +1,78 @@ +//== llvm/CodeGen/GlobalISel/Localizer.h - Localizer -------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file describes the interface of the Localizer pass. +/// This pass moves/duplicates constant-like instructions close to their uses. +/// Its primarily goal is to workaround the deficiencies of the fast register +/// allocator. +/// With GlobalISel constants are all materialized in the entry block of +/// a function. However, the fast allocator cannot rematerialize constants and +/// has a lot more live-ranges to deal with and will most likely end up +/// spilling a lot. +/// By pushing the constants close to their use, we only create small +/// live-ranges. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H +#define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H + +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { +// Forward declarations. +class MachineRegisterInfo; + +/// This pass implements the localization mechanism described at the +/// top of this file. One specificity of the implementation is that +/// it will materialize one and only one instance of a constant per +/// basic block, thus enabling reuse of that constant within that block. +/// Moreover, it only materializes constants in blocks where they +/// are used. PHI uses are considered happening at the end of the +/// related predecessor. +class Localizer : public MachineFunctionPass { +public: + static char ID; + +private: + /// MRI contains all the register class/bank information that this + /// pass uses and updates. + MachineRegisterInfo *MRI; + + /// Check whether or not \p MI needs to be moved close to its uses. + static bool shouldLocalize(const MachineInstr &MI); + + /// Check if \p MOUse is used in the same basic block as \p Def. + /// If the use is in the same block, we say it is local. + /// When the use is not local, \p InsertMBB will contain the basic + /// block when to insert \p Def to have a local use. + static bool isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, + MachineBasicBlock *&InsertMBB); + + /// Initialize the field members using \p MF. + void init(MachineFunction &MF); + +public: + Localizer(); + + StringRef getPassName() const override { return "Localizer"; } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA) + .set(MachineFunctionProperties::Property::Legalized) + .set(MachineFunctionProperties::Property::RegBankSelected); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // End namespace llvm. + +#endif diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 6b662a7f74136..85e6fef1f3c26 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -16,9 +16,10 @@ #include "llvm/CodeGen/GlobalISel/Types.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" @@ -40,8 +41,8 @@ class MachineIRBuilder { MachineFunction *MF; /// Information used to access the description of the opcodes. const TargetInstrInfo *TII; - /// Information used to verify types are consistent. - const MachineRegisterInfo *MRI; + /// Information used to verify types are consistent and to create virtual registers. + MachineRegisterInfo *MRI; /// Debug location to be set to any instruction we create. DebugLoc DL; @@ -59,6 +60,21 @@ class MachineIRBuilder { } void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend); + MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1); + + unsigned getDestFromArg(unsigned Reg) { return Reg; } + unsigned getDestFromArg(LLT Ty) { + return getMF().getRegInfo().createGenericVirtualRegister(Ty); + } + unsigned getDestFromArg(const TargetRegisterClass *RC) { + return getMF().getRegInfo().createVirtualRegister(RC); + } + + unsigned getRegFromArg(unsigned Reg) { return Reg; } + + unsigned getRegFromArg(const MachineInstrBuilder &MIB) { + return MIB->getOperand(0).getReg(); + } public: /// Getter for the function we currently build. @@ -120,6 +136,22 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildInstr(unsigned Opcode); + /// DAG like Generic method for building arbitrary instructions as above. + /// \Opc opcode for the instruction. + /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst + /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder) + /// Uses of type MachineInstrBuilder will perform + /// getOperand(0).getReg() to convert to register. + template + MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, + UseArgsTy &&... Args) { + auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty)); + unsigned It[] = {(getRegFromArg(Args))...}; + for (const auto &i : It) + MIB.addUse(i); + return MIB; + } + /// Build but don't insert = \p Opcode . /// /// \pre setMF, setBasicBlock or setMI must have been called. @@ -188,6 +220,11 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildAdd(unsigned Res, unsigned Op0, unsigned Op1); + template + MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) { + unsigned Res = getDestFromArg(Ty); + return buildAdd(Res, (getRegFromArg(UseArgs))...); + } /// Build and insert \p Res = G_SUB \p Op0, \p Op1 /// @@ -229,6 +266,26 @@ class MachineIRBuilder { MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1); + /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value) + /// + /// G_GEP adds \p Value bytes to the pointer specified by \p Op0, + /// storing the resulting pointer in \p Res. If \p Value is zero then no + /// G_GEP or G_CONSTANT will be created and \pre Op0 will be assigned to + /// \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Op0 must be a generic virtual register with pointer type. + /// \pre \p ValueTy must be a scalar type. + /// \pre \p Res must be 0. This is to detect confusion between + /// materializeGEP() and buildGEP(). + /// \post \p Res will either be a new generic virtual register of the same + /// type as \p Op0 or \p Op0 itself. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + Optional materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, + uint64_t Value); + /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits /// /// G_PTR_MASK clears the low bits of a pointer operand without destroying its @@ -275,6 +332,18 @@ class MachineIRBuilder { MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0, unsigned Op1); + /// Build and insert \p Res = G_OR \p Op0, \p Op1 + /// + /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p + /// Op1. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers + /// with the same (scalar or vector) type). + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1); + /// Build and insert \p Res = G_ANYEXT \p Op0 /// /// G_ANYEXT produces a register of the specified width, with bits 0 to @@ -396,6 +465,10 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildConstant(unsigned Res, int64_t Val); + template + MachineInstrBuilder buildConstant(DstType &&Res, int64_t Val) { + return buildConstant(getDestFromArg(Res), Val); + } /// Build and insert \p Res = G_FCONSTANT \p Val /// /// G_FCONSTANT is a floating-point constant with the specified size and @@ -451,10 +524,12 @@ class MachineIRBuilder { /// Build and insert \p Res = IMPLICIT_DEF. MachineInstrBuilder buildUndef(unsigned Dst); - /// Build and insert \p Res = G_SEQUENCE \p Op0, \p Idx0... + /// Build and insert instructions to put \p Ops together at the specified p + /// Indices to form a larger register. /// - /// G_SEQUENCE inserts each element of Ops into an IMPLICIT_DEF register, - /// where each entry starts at the bit-index specified by \p Indices. + /// If the types of the input registers are uniform and cover the entirity of + /// \p Res then a G_MERGE_VALUES will be produced. Otherwise an IMPLICIT_DEF + /// followed by a sequence of G_INSERT instructions. /// /// \pre setBasicBlock or setMI must have been called. /// \pre The final element of the sequence must not extend past the end of the @@ -462,11 +537,8 @@ class MachineIRBuilder { /// \pre The bits defined by each Op (derived from index and scalar size) must /// not overlap. /// \pre \p Indices must be in ascending order of bit position. - /// - /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildSequence(unsigned Res, - ArrayRef Ops, - ArrayRef Indices); + void buildSequence(unsigned Res, ArrayRef Ops, + ArrayRef Indices); /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ... /// @@ -493,24 +565,6 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildUnmerge(ArrayRef Res, unsigned Op); - void addUsesWithIndices(MachineInstrBuilder MIB) {} - - template - void addUsesWithIndices(MachineInstrBuilder MIB, unsigned Reg, - unsigned BitIndex, ArgTys... Args) { - MIB.addUse(Reg).addImm(BitIndex); - addUsesWithIndices(MIB, Args...); - } - - template - MachineInstrBuilder buildSequence(unsigned Res, unsigned Op, - unsigned Index, ArgTys... Args) { - MachineInstrBuilder MIB = - buildInstr(TargetOpcode::G_SEQUENCE).addDef(Res); - addUsesWithIndices(MIB, Op, Index, Args...); - return MIB; - } - MachineInstrBuilder buildInsert(unsigned Res, unsigned Src, unsigned Op, unsigned Index); diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegBankSelect.h index f610bc02b6f26..676955c33fe9c 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -1,4 +1,4 @@ -//== llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector -*- C++ -*-==// +//=- llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector --*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -64,20 +64,27 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H #define LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include +#include +#include namespace llvm { -// Forward declarations. + class BlockFrequency; -class MachineBranchProbabilityInfo; class MachineBlockFrequencyInfo; +class MachineBranchProbabilityInfo; +class MachineOperand; class MachineRegisterInfo; +class Pass; +class raw_ostream; class TargetPassConfig; class TargetRegisterInfo; -class raw_ostream; /// This pass implements the reg bank selector pass used in the GlobalISel /// pipeline. At the end of this pass, all register operands have been assigned @@ -105,6 +112,7 @@ class RegBankSelect : public MachineFunctionPass { protected: /// Tell if the insert point has already been materialized. bool WasMaterialized = false; + /// Materialize the insertion point. /// /// If isSplit() is true, this involves actually splitting @@ -128,7 +136,7 @@ class RegBankSelect : public MachineFunctionPass { virtual MachineBasicBlock::iterator getPointImpl() = 0; public: - virtual ~InsertPoint() {} + virtual ~InsertPoint() = default; /// The first call to this method will cause the splitting to /// happen if need be, then sub sequent calls just return @@ -197,6 +205,7 @@ class RegBankSelect : public MachineFunctionPass { private: /// Insertion point. MachineInstr &Instr; + /// Does the insertion point is before or after Instr. bool Before; @@ -216,6 +225,7 @@ class RegBankSelect : public MachineFunctionPass { public: /// Create an insertion point before (\p Before=true) or after \p Instr. InstrInsertPoint(MachineInstr &Instr, bool Before = true); + bool isSplit() const override; uint64_t frequency(const Pass &P) const override; @@ -228,6 +238,7 @@ class RegBankSelect : public MachineFunctionPass { private: /// Insertion point. MachineBasicBlock &MBB; + /// Does the insertion point is at the beginning or end of MBB. bool Beginning; @@ -252,6 +263,7 @@ class RegBankSelect : public MachineFunctionPass { assert((Beginning || MBB.getFirstTerminator() == MBB.end()) && "Invalid end point"); } + bool isSplit() const override { return false; } uint64_t frequency(const Pass &P) const override; bool canMaterialize() const override { return true; }; @@ -262,10 +274,12 @@ class RegBankSelect : public MachineFunctionPass { private: /// Source of the edge. MachineBasicBlock &Src; + /// Destination of the edge. /// After the materialization is done, this hold the basic block /// that resulted from the splitting. MachineBasicBlock *DstOrSplit; + /// P is used to update the analysis passes as applicable. Pass &P; @@ -286,9 +300,11 @@ class RegBankSelect : public MachineFunctionPass { public: EdgeInsertPoint(MachineBasicBlock &Src, MachineBasicBlock &Dst, Pass &P) : InsertPoint(), Src(Src), DstOrSplit(&Dst), P(P) {} + bool isSplit() const override { return Src.succ_size() > 1 && DstOrSplit->pred_size() > 1; } + uint64_t frequency(const Pass &P) const override; bool canMaterialize() const override; }; @@ -311,9 +327,9 @@ class RegBankSelect : public MachineFunctionPass { /// \name Convenient types for a list of insertion points. /// @{ - typedef SmallVector, 2> InsertionPoints; - typedef InsertionPoints::iterator insertpt_iterator; - typedef InsertionPoints::const_iterator const_insertpt_iterator; + using InsertionPoints = SmallVector, 2>; + using insertpt_iterator = InsertionPoints::iterator; + using const_insertpt_iterator = InsertionPoints::const_iterator; /// @} private: @@ -324,7 +340,7 @@ class RegBankSelect : public MachineFunctionPass { /// Are all the insert points materializeable? bool CanMaterialize; /// Is there any of the insert points needing splitting? - bool HasSplit; + bool HasSplit = false; /// Insertion point for the repair code. /// The repairing code needs to happen just before these points. InsertionPoints InsertPoints; @@ -407,10 +423,10 @@ class RegBankSelect : public MachineFunctionPass { private: /// Cost of the local instructions. /// This cost is free of basic block frequency. - uint64_t LocalCost; + uint64_t LocalCost = 0; /// Cost of the non-local instructions. /// This cost should include the frequency of the related blocks. - uint64_t NonLocalCost; + uint64_t NonLocalCost = 0; /// Frequency of the block where the local instructions live. uint64_t LocalFreq; @@ -468,22 +484,22 @@ class RegBankSelect : public MachineFunctionPass { /// Interface to the target lowering info related /// to register banks. - const RegisterBankInfo *RBI; + const RegisterBankInfo *RBI = nullptr; /// MRI contains all the register class/bank information that this /// pass uses and updates. - MachineRegisterInfo *MRI; + MachineRegisterInfo *MRI = nullptr; /// Information on the register classes for the current function. - const TargetRegisterInfo *TRI; + const TargetRegisterInfo *TRI = nullptr; /// Get the frequency of blocks. /// This is required for non-fast mode. - MachineBlockFrequencyInfo *MBFI; + MachineBlockFrequencyInfo *MBFI = nullptr; /// Get the frequency of the edges. /// This is required for non-fast mode. - MachineBranchProbabilityInfo *MBPI; + MachineBranchProbabilityInfo *MBPI = nullptr; /// Current optimization remark emitter. Used to report failures. std::unique_ptr MORE; @@ -644,6 +660,6 @@ class RegBankSelect : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; }; -} // End namespace llvm. +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h index f32233b3a9e44..60905c7ec226d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -1,4 +1,4 @@ -//==-- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ----------------*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,26 +12,27 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_GLOBALISEL_REGBANKINFO_H -#define LLVM_CODEGEN_GLOBALISEL_REGBANKINFO_H +#ifndef LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H +#define LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/MachineValueType.h" // For SimpleValueType. +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" - #include -#include // For unique_ptr. +#include +#include namespace llvm { + class MachineInstr; class MachineRegisterInfo; +class raw_ostream; +class RegisterBank; class TargetInstrInfo; +class TargetRegisterClass; class TargetRegisterInfo; -class raw_ostream; /// Holds all the information related to register banks. class RegisterBankInfo { @@ -48,10 +49,12 @@ class RegisterBankInfo { /// original value. The bits are counted from less significant /// bits to most significant bits. unsigned StartIdx; + /// Length of this mapping in bits. This is how many bits this /// partial mapping covers in the original value: /// from StartIdx to StartIdx + Length -1. unsigned Length; + /// Register bank where the partial value lives. const RegisterBank *RegBank; @@ -180,13 +183,16 @@ class RegisterBankInfo { /// Identifier of the mapping. /// This is used to communicate between the target and the optimizers /// which mapping should be realized. - unsigned ID; + unsigned ID = InvalidMappingID; + /// Cost of this mapping. - unsigned Cost; + unsigned Cost = 0; + /// Mapping of all the operands. const ValueMapping *OperandsMapping; + /// Number of operands. - unsigned NumOperands; + unsigned NumOperands = 0; const ValueMapping &getOperandMapping(unsigned i) { assert(i < getNumOperands() && "Out of bound operand"); @@ -213,7 +219,7 @@ class RegisterBankInfo { /// Default constructor. /// Use this constructor to express that the mapping is invalid. - InstructionMapping() : ID(InvalidMappingID), Cost(0), NumOperands(0) {} + InstructionMapping() = default; /// Get the cost. unsigned getCost() const { return Cost; } @@ -264,7 +270,7 @@ class RegisterBankInfo { /// Convenient type to represent the alternatives for mapping an /// instruction. /// \todo When we move to TableGen this should be an array ref. - typedef SmallVector InstructionMappings; + using InstructionMappings = SmallVector; /// Helper class used to get/create the virtual registers that will be used /// to replace the MachineOperand when applying a mapping. @@ -273,12 +279,16 @@ class RegisterBankInfo { /// OpIdx-th operand starts. -1 means we do not have such mapping yet. /// Note: We use a SmallVector to avoid heap allocation for most cases. SmallVector OpToNewVRegIdx; + /// Hold the registers that will be used to map MI with InstrMapping. SmallVector NewVRegs; + /// Current MachineRegisterInfo, used to create new virtual registers. MachineRegisterInfo &MRI; + /// Instruction being remapped. MachineInstr &MI; + /// New mapping of the instruction. const InstructionMapping &InstrMapping; @@ -373,6 +383,7 @@ class RegisterBankInfo { protected: /// Hold the set of supported register banks. RegisterBank **RegBanks; + /// Total number of register banks. unsigned NumRegBanks; @@ -396,7 +407,7 @@ class RegisterBankInfo { mutable DenseMap> MapOfInstructionMappings; - /// Create a RegisterBankInfo that can accomodate up to \p NumRegBanks + /// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks /// RegisterBank instances. RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks); @@ -729,6 +740,7 @@ operator<<(raw_ostream &OS, const RegisterBankInfo::OperandsMapper &OpdMapper) { /// Hashing function for PartialMapping. /// It is required for the hashing of ValueMapping. hash_code hash_value(const RegisterBankInfo::PartialMapping &PartMapping); -} // End namespace llvm. -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Types.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Types.h index 7d974878d3b9f..7b22e343a7f86 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Types.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Types.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===// +//===- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,17 +16,19 @@ #define LLVM_CODEGEN_GLOBALISEL_TYPES_H #include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Value.h" namespace llvm { +class Value; + /// Map a value to a virtual register. /// For now, we chose to map aggregate types to on single virtual /// register. This might be revisited if it turns out to be inefficient. /// PR26161 tracks that. /// Note: We need to expose this type to the target hooks for thing like /// ABI lowering that would be used during IRTranslation. -typedef DenseMap ValueToVReg; +using ValueToVReg = DenseMap; + +} // end namespace llvm -} // End namespace llvm. -#endif +#endif // LLVM_CODEGEN_GLOBALISEL_TYPES_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Utils.h b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Utils.h index 69d5070698082..50ddbeb9432a3 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/GlobalISel/Utils.h @@ -29,13 +29,26 @@ class RegisterBankInfo; class TargetInstrInfo; class TargetPassConfig; class TargetRegisterInfo; +class TargetRegisterClass; class Twine; class ConstantFP; +/// Try to constrain Reg to the specified register class. If this fails, +/// create a new virtual register in the correct class and insert a COPY before +/// \p InsertPt. The debug location of \p InsertPt is used for the new copy. +/// +/// \return The virtual register constrained to the right register class. +unsigned constrainRegToClass(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, + MachineInstr &InsertPt, unsigned Reg, + const TargetRegisterClass &RegClass); + /// Try to constrain Reg so that it is usable by argument OpIdx of the /// provided MCInstrDesc \p II. If this fails, create a new virtual /// register in the correct class and insert a COPY before \p InsertPt. -/// The debug location of \p InsertPt is used for the new copy. +/// This is equivalent to constrainRegToClass() with RegClass obtained from the +/// MCInstrDesc. The debug location of \p InsertPt is used for the new copy. /// /// \return The virtual register constrained to the right register class. unsigned constrainOperandRegClass(const MachineFunction &MF, diff --git a/interpreter/llvm/src/include/llvm/CodeGen/ISDOpcodes.h b/interpreter/llvm/src/include/llvm/CodeGen/ISDOpcodes.h index f2a9a9f73ca6a..bc5d2353f63e3 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/ISDOpcodes.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/ISDOpcodes.h @@ -264,6 +264,14 @@ namespace ISD { /// optimized. STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM, + /// Constrained versions of libm-equivalent floating point intrinsics. + /// These will be lowered to the equivalent non-constrained pseudo-op + /// (or expanded to the equivalent library call) before final selection. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS, + STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, + STRICT_FRINT, STRICT_FNEARBYINT, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, @@ -402,12 +410,22 @@ namespace ISD { /// then the result type must also be a vector type. SETCC, - /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but + /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, and /// op #2 is a *carry value*. This operator checks the result of /// "LHS - RHS - Carry", and can be used to compare two wide integers: /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers. + /// FIXME: This node is deprecated in favor of SETCCCARRY. + /// It is kept around for now to provide a smooth transition path + /// toward the use of SETCCCARRY and will eventually be removed. SETCCE, + /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but + /// op #2 is a boolean indicating if there is an incoming carry. This + /// operator checks the result of "LHS - RHS - Carry", and can be used to + /// compare two wide integers: (setcce lhshi rhshi (subc lhslo rhslo) cc). + /// Only valid for integers. + SETCCCARRY, + /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded /// integer shift operations. The operation ordering is: /// [Lo,Hi] = op [LoLHS,HiLHS], Amt diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LexicalScopes.h b/interpreter/llvm/src/include/llvm/CodeGen/LexicalScopes.h index 6c35832f963c7..3ba503487823d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LexicalScopes.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LexicalScopes.h @@ -31,12 +31,13 @@ namespace llvm { class MachineBasicBlock; class MachineFunction; class MachineInstr; +class MDNode; //===----------------------------------------------------------------------===// /// InsnRange - This is used to track range of instructions with identical /// lexical scope. /// -typedef std::pair InsnRange; +using InsnRange = std::pair; //===----------------------------------------------------------------------===// /// LexicalScope - This class is used to track scope information. @@ -195,7 +196,7 @@ class LexicalScopes { } /// dump - Print data structures to dbgs(). - void dump(); + void dump() const; /// getOrCreateAbstractScope - Find or create an abstract lexical scope. LexicalScope *getOrCreateAbstractScope(const DILocalScope *Scope); diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LiveInterval.h b/interpreter/llvm/src/include/llvm/CodeGen/LiveInterval.h index b792cba4b78a5..f4fa872c7f5bb 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LiveInterval.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LiveInterval.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/LiveInterval.h - Interval representation ---*- C++ -*-===// +//===- llvm/CodeGen/LiveInterval.h - Interval representation ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,22 +21,30 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_H #define LLVM_CODEGEN_LIVEINTERVAL_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/MathExtras.h" +#include #include -#include +#include +#include +#include #include +#include +#include namespace llvm { + class CoalescerPair; class LiveIntervals; - class MachineInstr; class MachineRegisterInfo; - class TargetRegisterInfo; class raw_ostream; - template class SmallPtrSet; /// VNInfo - Value Number Information. /// This class holds information about a machine level values, including @@ -44,7 +52,7 @@ namespace llvm { /// class VNInfo { public: - typedef BumpPtrAllocator Allocator; + using Allocator = BumpPtrAllocator; /// The ID number of this value. unsigned id; @@ -53,14 +61,10 @@ namespace llvm { SlotIndex def; /// VNInfo constructor. - VNInfo(unsigned i, SlotIndex d) - : id(i), def(d) - { } + VNInfo(unsigned i, SlotIndex d) : id(i), def(d) {} /// VNInfo constructor, copies values from orig, except for the value number. - VNInfo(unsigned i, const VNInfo &orig) - : id(i), def(orig.def) - { } + VNInfo(unsigned i, const VNInfo &orig) : id(i), def(orig.def) {} /// Copy from the parameter into this VNInfo. void copyFrom(VNInfo &src) { @@ -152,16 +156,16 @@ namespace llvm { /// segment with a new value number is used. class LiveRange { public: - /// This represents a simple continuous liveness interval for a value. /// The start point is inclusive, the end point exclusive. These intervals /// are rendered as [start,end). struct Segment { SlotIndex start; // Start point of the interval (inclusive) SlotIndex end; // End point of the interval (exclusive) - VNInfo *valno; // identifier for the value contained in this segment. + VNInfo *valno = nullptr; // identifier for the value contained in this + // segment. - Segment() : valno(nullptr) {} + Segment() = default; Segment(SlotIndex S, SlotIndex E, VNInfo *V) : start(S), end(E), valno(V) { @@ -189,8 +193,8 @@ namespace llvm { void dump() const; }; - typedef SmallVector Segments; - typedef SmallVector VNInfoList; + using Segments = SmallVector; + using VNInfoList = SmallVector; Segments segments; // the liveness segments VNInfoList valnos; // value#'s @@ -198,22 +202,24 @@ namespace llvm { // The segment set is used temporarily to accelerate initial computation // of live ranges of physical registers in computeRegUnitRange. // After that the set is flushed to the segment vector and deleted. - typedef std::set SegmentSet; + using SegmentSet = std::set; std::unique_ptr segmentSet; - typedef Segments::iterator iterator; + using iterator = Segments::iterator; + using const_iterator = Segments::const_iterator; + iterator begin() { return segments.begin(); } iterator end() { return segments.end(); } - typedef Segments::const_iterator const_iterator; const_iterator begin() const { return segments.begin(); } const_iterator end() const { return segments.end(); } - typedef VNInfoList::iterator vni_iterator; + using vni_iterator = VNInfoList::iterator; + using const_vni_iterator = VNInfoList::const_iterator; + vni_iterator vni_begin() { return valnos.begin(); } vni_iterator vni_end() { return valnos.end(); } - typedef VNInfoList::const_iterator const_vni_iterator; const_vni_iterator vni_begin() const { return valnos.begin(); } const_vni_iterator vni_end() const { return valnos.end(); } @@ -631,40 +637,37 @@ namespace llvm { /// or stack slot. class LiveInterval : public LiveRange { public: - typedef LiveRange super; + using super = LiveRange; /// A live range for subregisters. The LaneMask specifies which parts of the /// super register are covered by the interval. /// (@sa TargetRegisterInfo::getSubRegIndexLaneMask()). class SubRange : public LiveRange { public: - SubRange *Next; + SubRange *Next = nullptr; LaneBitmask LaneMask; /// Constructs a new SubRange object. - SubRange(LaneBitmask LaneMask) - : Next(nullptr), LaneMask(LaneMask) { - } + SubRange(LaneBitmask LaneMask) : LaneMask(LaneMask) {} /// Constructs a new SubRange object by copying liveness from @p Other. SubRange(LaneBitmask LaneMask, const LiveRange &Other, BumpPtrAllocator &Allocator) - : LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) { - } + : LiveRange(Other, Allocator), LaneMask(LaneMask) {} void print(raw_ostream &OS) const; void dump() const; }; private: - SubRange *SubRanges; ///< Single linked list of subregister live ranges. + SubRange *SubRanges = nullptr; ///< Single linked list of subregister live + /// ranges. public: const unsigned reg; // the register or stack slot of this interval. float weight; // weight of this interval - LiveInterval(unsigned Reg, float Weight) - : SubRanges(nullptr), reg(Reg), weight(Weight) {} + LiveInterval(unsigned Reg, float Weight) : reg(Reg), weight(Weight) {} ~LiveInterval() { clearSubRanges(); @@ -673,8 +676,10 @@ namespace llvm { template class SingleLinkedListIterator { T *P; + public: SingleLinkedListIterator(T *P) : P(P) {} + SingleLinkedListIterator &operator++() { P = P->Next; return *this; @@ -698,7 +703,9 @@ namespace llvm { } }; - typedef SingleLinkedListIterator subrange_iterator; + using subrange_iterator = SingleLinkedListIterator; + using const_subrange_iterator = SingleLinkedListIterator; + subrange_iterator subrange_begin() { return subrange_iterator(SubRanges); } @@ -706,7 +713,6 @@ namespace llvm { return subrange_iterator(nullptr); } - typedef SingleLinkedListIterator const_subrange_iterator; const_subrange_iterator subrange_begin() const { return const_subrange_iterator(SubRanges); } @@ -759,12 +765,12 @@ namespace llvm { /// isSpillable - Can this interval be spilled? bool isSpillable() const { - return weight != llvm::huge_valf; + return weight != huge_valf; } /// markNotSpillable - Mark interval as not spillable void markNotSpillable() { - weight = llvm::huge_valf; + weight = huge_valf; } /// For a given lane mask @p LaneMask, compute indexes at which the @@ -931,5 +937,7 @@ namespace llvm { void Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineRegisterInfo &MRI); }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEINTERVAL_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalAnalysis.h b/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalAnalysis.h index f5b1f87720ad3..820e883624837 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -1,4 +1,4 @@ -//===-- LiveIntervalAnalysis.h - Live Interval Analysis ---------*- C++ -*-===// +//===- LiveIntervalAnalysis.h - Live Interval Analysis ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #ifndef LLVM_CODEGEN_LIVEINTERVALANALYSIS_H #define LLVM_CODEGEN_LIVEINTERVALANALYSIS_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -27,27 +28,29 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Support/Allocator.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetRegisterInfo.h" -#include +#include +#include +#include namespace llvm { extern cl::opt UseSegmentSetForPhysRegs; - class BitVector; - class BlockFrequency; - class LiveRangeCalc; - class LiveVariables; - class MachineDominatorTree; - class MachineLoopInfo; - class TargetRegisterInfo; - class MachineRegisterInfo; - class TargetInstrInfo; - class TargetRegisterClass; - class VirtRegMap; - class MachineBlockFrequencyInfo; +class BitVector; +class LiveRangeCalc; +class MachineBlockFrequencyInfo; +class MachineDominatorTree; +class MachineFunction; +class MachineInstr; +class MachineRegisterInfo; +class raw_ostream; +class TargetInstrInfo; +class VirtRegMap; class LiveIntervals : public MachineFunctionPass { MachineFunction* MF; @@ -56,8 +59,8 @@ extern cl::opt UseSegmentSetForPhysRegs; const TargetInstrInfo* TII; AliasAnalysis *AA; SlotIndexes* Indexes; - MachineDominatorTree *DomTree; - LiveRangeCalc *LRCalc; + MachineDominatorTree *DomTree = nullptr; + LiveRangeCalc *LRCalc = nullptr; /// Special pool allocator for VNInfo's (LiveInterval val#). VNInfo::Allocator VNInfoAllocator; @@ -95,6 +98,7 @@ extern cl::opt UseSegmentSetForPhysRegs; public: static char ID; + LiveIntervals(); ~LiveIntervals() override; @@ -189,7 +193,7 @@ extern cl::opt UseSegmentSetForPhysRegs; void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl *EndPoints); - /// This function should be used. Its intend is to tell you that + /// This function should not be used. Its intend is to tell you that /// you are doing something wrong if you call pruveValue directly on a /// LiveInterval. Indeed, you are supposed to call pruneValue on the main /// LiveRange and all the LiveRange of the subranges if any. @@ -466,6 +470,7 @@ extern cl::opt UseSegmentSetForPhysRegs; class HMEditor; }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEINTERVALANALYSIS_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalUnion.h b/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalUnion.h index 57e3deb038af3..b922e543c8560 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LiveIntervalUnion.h @@ -26,12 +26,14 @@ namespace llvm { +class raw_ostream; class TargetRegisterInfo; #ifndef NDEBUG // forward declaration template class SparseBitVector; -typedef SparseBitVector<128> LiveVirtRegBitSet; + +using LiveVirtRegBitSet = SparseBitVector<128>; #endif /// Union of live intervals that are strong candidates for coalescing into a @@ -42,19 +44,19 @@ class LiveIntervalUnion { // A set of live virtual register segments that supports fast insertion, // intersection, and removal. // Mapping SlotIndex intervals to virtual register numbers. - typedef IntervalMap LiveSegments; + using LiveSegments = IntervalMap; public: // SegmentIter can advance to the next segment ordered by starting position // which may belong to a different live virtual register. We also must be able // to reach the current segment's containing virtual register. - typedef LiveSegments::iterator SegmentIter; + using SegmentIter = LiveSegments::iterator; /// Const version of SegmentIter. - typedef LiveSegments::const_iterator ConstSegmentIter; + using ConstSegmentIter = LiveSegments::const_iterator; // LiveIntervalUnions share an external allocator. - typedef LiveSegments::Allocator Allocator; + using Allocator = LiveSegments::Allocator; private: unsigned Tag = 0; // unique tag for current contents. @@ -76,7 +78,7 @@ class LiveIntervalUnion { SlotIndex startIndex() const { return Segments.start(); } // Provide public access to the underlying map to allow overlap iteration. - typedef LiveSegments Map; + using Map = LiveSegments; const Map &getMap() const { return Segments; } /// getTag - Return an opaque tag representing the current state of the union. diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LivePhysRegs.h b/interpreter/llvm/src/include/llvm/CodeGen/LivePhysRegs.h index 9e04c467fadc0..f9c741dd75b2d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LivePhysRegs.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LivePhysRegs.h @@ -7,23 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file implements the LivePhysRegs utility for tracking liveness of -// physical registers. This can be used for ad-hoc liveness tracking after -// register allocation. You can start with the live-ins/live-outs at the -// beginning/end of a block and update the information while walking the -// instructions inside the block. This implementation tracks the liveness on a -// sub-register granularity. -// -// We assume that the high bits of a physical super-register are not preserved -// unless the instruction has an implicit-use operand reading the super- -// register. -// -// X86 Example: -// %YMM0 = ... -// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) -// -// %YMM0 = ... -// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) +/// \file +/// This file implements the LivePhysRegs utility for tracking liveness of +/// physical registers. This can be used for ad-hoc liveness tracking after +/// register allocation. You can start with the live-ins/live-outs at the +/// beginning/end of a block and update the information while walking the +/// instructions inside the block. This implementation tracks the liveness on a +/// sub-register granularity. +/// +/// We assume that the high bits of a physical super-register are not preserved +/// unless the instruction has an implicit-use operand reading the super- +/// register. +/// +/// X86 Example: +/// %YMM0 = ... +/// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) +/// +/// %YMM0 = ... +/// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H @@ -39,40 +40,42 @@ namespace llvm { class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; +class raw_ostream; -/// \brief A set of live physical registers with functions to track liveness +/// \brief A set of physical registers with utility functions to track liveness /// when walking backward/forward through a basic block. class LivePhysRegs { const TargetRegisterInfo *TRI = nullptr; SparseSet LiveRegs; - LivePhysRegs(const LivePhysRegs&) = delete; - LivePhysRegs &operator=(const LivePhysRegs&) = delete; - public: - /// \brief Constructs a new empty LivePhysRegs set. + /// Constructs an unitialized set. init() needs to be called to initialize it. LivePhysRegs() = default; - /// \brief Constructs and initialize an empty LivePhysRegs set. - LivePhysRegs(const TargetRegisterInfo *TRI) : TRI(TRI) { - assert(TRI && "Invalid TargetRegisterInfo pointer."); - LiveRegs.setUniverse(TRI->getNumRegs()); + /// Constructs and initializes an empty set. + LivePhysRegs(const TargetRegisterInfo &TRI) : TRI(&TRI) { + LiveRegs.setUniverse(TRI.getNumRegs()); } - /// \brief Clear and initialize the LivePhysRegs set. + LivePhysRegs(const LivePhysRegs&) = delete; + LivePhysRegs &operator=(const LivePhysRegs&) = delete; + + /// (re-)initializes and clears the set. void init(const TargetRegisterInfo &TRI) { this->TRI = &TRI; LiveRegs.clear(); LiveRegs.setUniverse(TRI.getNumRegs()); } - /// \brief Clears the LivePhysRegs set. + /// Clears the set. void clear() { LiveRegs.clear(); } - /// \brief Returns true if the set is empty. + /// Returns true if the set is empty. bool empty() const { return LiveRegs.empty(); } - /// \brief Adds a physical register and all its sub-registers to the set. + /// Adds a physical register and all its sub-registers to the set. void addReg(unsigned Reg) { assert(TRI && "LivePhysRegs is not initialized."); assert(Reg <= TRI->getNumRegs() && "Expected a physical register."); @@ -90,12 +93,13 @@ class LivePhysRegs { LiveRegs.erase(*R); } - /// \brief Removes physical registers clobbered by the regmask operand @p MO. + /// Removes physical registers clobbered by the regmask operand \p MO. void removeRegsInMask(const MachineOperand &MO, - SmallVectorImpl> *Clobbers); + SmallVectorImpl> *Clobbers = + nullptr); - /// \brief Returns true if register @p Reg is contained in the set. This also - /// works if only the super register of @p Reg has been defined, because + /// \brief Returns true if register \p Reg is contained in the set. This also + /// works if only the super register of \p Reg has been defined, because /// addReg() always adds all sub-registers to the set as well. /// Note: Returns false if just some sub registers are live, use available() /// when searching a free register. @@ -104,48 +108,48 @@ class LivePhysRegs { /// Returns true if register \p Reg and no aliasing register is in the set. bool available(const MachineRegisterInfo &MRI, unsigned Reg) const; - /// \brief Simulates liveness when stepping backwards over an - /// instruction(bundle): Remove Defs, add uses. This is the recommended way of - /// calculating liveness. + /// Simulates liveness when stepping backwards over an instruction(bundle). + /// Remove Defs, add uses. This is the recommended way of calculating + /// liveness. void stepBackward(const MachineInstr &MI); - /// \brief Simulates liveness when stepping forward over an - /// instruction(bundle): Remove killed-uses, add defs. This is the not - /// recommended way, because it depends on accurate kill flags. If possible - /// use stepBackward() instead of this function. - /// The clobbers set will be the list of registers either defined or clobbered - /// by a regmask. The operand will identify whether this is a regmask or - /// register operand. + /// Simulates liveness when stepping forward over an instruction(bundle). + /// Remove killed-uses, add defs. This is the not recommended way, because it + /// depends on accurate kill flags. If possible use stepBackward() instead of + /// this function. The clobbers set will be the list of registers either + /// defined or clobbered by a regmask. The operand will identify whether this + /// is a regmask or register operand. void stepForward(const MachineInstr &MI, SmallVectorImpl> &Clobbers); - /// Adds all live-in registers of basic block @p MBB. + /// Adds all live-in registers of basic block \p MBB. /// Live in registers are the registers in the blocks live-in list and the /// pristine registers. void addLiveIns(const MachineBasicBlock &MBB); - /// Adds all live-out registers of basic block @p MBB. + /// Adds all live-out registers of basic block \p MBB. /// Live out registers are the union of the live-in registers of the successor /// blocks and pristine registers. Live out registers of the end block are the /// callee saved registers. void addLiveOuts(const MachineBasicBlock &MBB); - /// Like addLiveOuts() but does not add pristine registers/callee saved + /// Adds all live-out registers of basic block \p MBB but skips pristine /// registers. void addLiveOutsNoPristines(const MachineBasicBlock &MBB); - typedef SparseSet::const_iterator const_iterator; + using const_iterator = SparseSet::const_iterator; + const_iterator begin() const { return LiveRegs.begin(); } const_iterator end() const { return LiveRegs.end(); } - /// \brief Prints the currently live registers to @p OS. + /// Prints the currently live registers to \p OS. void print(raw_ostream &OS) const; - /// \brief Dumps the currently live registers to the debug output. + /// Dumps the currently live registers to the debug output. void dump() const; private: - /// Adds live-in registers from basic block @p MBB, taking associated + /// \brief Adds live-in registers from basic block \p MBB, taking associated /// lane masks into consideration. void addBlockLiveIns(const MachineBasicBlock &MBB); }; @@ -155,11 +159,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) { return OS; } -/// Compute the live-in list for \p MBB assuming all of its successors live-in -/// lists are up-to-date. Uses the given LivePhysReg instance \p LiveRegs; This -/// is just here to avoid repeated heap allocations when calling this multiple -/// times in a pass. -void computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, +/// \brief Computes the live-in list for \p MBB assuming all of its successors +/// live-in lists are up-to-date. Uses the given LivePhysReg instance \p +/// LiveRegs; This is just here to avoid repeated heap allocations when calling +/// this multiple times in a pass. +void computeLiveIns(LivePhysRegs &LiveRegs, const MachineRegisterInfo &MRI, MachineBasicBlock &MBB); } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LiveRangeEdit.h b/interpreter/llvm/src/include/llvm/CodeGen/LiveRangeEdit.h index 4250777682ba5..362d9854a271a 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LiveRangeEdit.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LiveRangeEdit.h @@ -1,4 +1,4 @@ -//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===// +//===- LiveRangeEdit.h - Basic tools for split and spill --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,19 +19,28 @@ #define LLVM_CODEGEN_LIVERANGEEDIT_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include namespace llvm { class LiveIntervals; class MachineBlockFrequencyInfo; +class MachineInstr; class MachineLoopInfo; +class MachineOperand; +class TargetInstrInfo; +class TargetRegisterInfo; class VirtRegMap; class LiveRangeEdit : private MachineRegisterInfo::Delegate { @@ -39,7 +48,10 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// Callback methods for LiveRangeEdit owners. class Delegate { virtual void anchor(); + public: + virtual ~Delegate() = default; + /// Called immediately before erasing a dead machine instruction. virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} @@ -53,8 +65,6 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// Called after cloning a virtual register. /// This is used for new registers representing connected components of Old. virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} - - virtual ~Delegate() {} }; private: @@ -70,7 +80,7 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { const unsigned FirstNew; /// ScannedRemattable - true when remattable values have been identified. - bool ScannedRemattable; + bool ScannedRemattable = false; /// DeadRemats - The saved instructions which have already been dead after /// rematerialization but not deleted yet -- to be done in postOptimization. @@ -78,11 +88,11 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// Remattable - Values defined by remattable instructions as identified by /// tii.isTriviallyReMaterializable(). - SmallPtrSet Remattable; + SmallPtrSet Remattable; /// Rematted - Values that were actually rematted, and so need to have their /// live range trimmed or entirely removed. - SmallPtrSet Rematted; + SmallPtrSet Rematted; /// scanRemattable - Identify the Parent values that may rematerialize. void scanRemattable(AliasAnalysis *aa); @@ -94,11 +104,11 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. - bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); + bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); + + using ToShrinkSet = SetVector, + SmallPtrSet>; - typedef SetVector, - SmallPtrSet > ToShrinkSet; /// Helper for eliminateDeadDefs. void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, AliasAnalysis *AA); @@ -129,26 +139,26 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { SmallPtrSet *deadRemats = nullptr) : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis), VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate), - FirstNew(newRegs.size()), ScannedRemattable(false), - DeadRemats(deadRemats) { + FirstNew(newRegs.size()), DeadRemats(deadRemats) { MRI.setDelegate(this); } ~LiveRangeEdit() override { MRI.resetDelegate(this); } LiveInterval &getParent() const { - assert(Parent && "No parent LiveInterval"); - return *Parent; + assert(Parent && "No parent LiveInterval"); + return *Parent; } + unsigned getReg() const { return getParent().reg; } /// Iterator for accessing the new registers added by this edit. - typedef SmallVectorImpl::const_iterator iterator; - iterator begin() const { return NewRegs.begin()+FirstNew; } + using iterator = SmallVectorImpl::const_iterator; + iterator begin() const { return NewRegs.begin() + FirstNew; } iterator end() const { return NewRegs.end(); } - unsigned size() const { return NewRegs.size()-FirstNew; } + unsigned size() const { return NewRegs.size() - FirstNew; } bool empty() const { return size() == 0; } - unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; } + unsigned get(unsigned idx) const { return NewRegs[idx + FirstNew]; } /// pop_back - It allows LiveRangeEdit users to drop new registers. /// The context is when an original def instruction of a register is @@ -176,26 +186,25 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { return createEmptyIntervalFrom(getReg()); } - unsigned create() { - return createFrom(getReg()); - } + unsigned create() { return createFrom(getReg()); } /// anyRematerializable - Return true if any parent values may be /// rematerializable. /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(AliasAnalysis*); + bool anyRematerializable(AliasAnalysis *); /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - AliasAnalysis*); + AliasAnalysis *); /// Remat - Information needed to rematerialize at a specific location. struct Remat { - VNInfo *ParentVNI; // parent_'s value at the remat location. - MachineInstr *OrigMI; // Instruction defining OrigVNI. It contains the - // real expr for remat. - explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {} + VNInfo *ParentVNI; // parent_'s value at the remat location. + MachineInstr *OrigMI = nullptr; // Instruction defining OrigVNI. It contains + // the real expr for remat. + + explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {} }; /// canRematerializeAt - Determine if ParentVNI can be rematerialized at @@ -209,10 +218,8 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// liveness is not updated. /// Return the SlotIndex of the new instruction. SlotIndex rematerializeAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, - const Remat &RM, - const TargetRegisterInfo&, + MachineBasicBlock::iterator MI, unsigned DestReg, + const Remat &RM, const TargetRegisterInfo &, bool Late = false); /// markRematerialized - explicitly mark a value as rematerialized after doing @@ -248,11 +255,10 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. - void calculateRegClassAndHint(MachineFunction&, - const MachineLoopInfo&, - const MachineBlockFrequencyInfo&); + void calculateRegClassAndHint(MachineFunction &, const MachineLoopInfo &, + const MachineBlockFrequencyInfo &); }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_LIVERANGEEDIT_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LiveRegUnits.h b/interpreter/llvm/src/include/llvm/CodeGen/LiveRegUnits.h index 5de76c8b87bf7..c28b1a06854fc 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LiveRegUnits.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LiveRegUnits.h @@ -16,9 +16,9 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include namespace llvm { @@ -93,12 +93,14 @@ class LiveRegUnits { } /// Updates liveness when stepping backwards over the instruction \p MI. + /// This removes all register units defined or clobbered in \p MI and then + /// adds the units used (as in use operands) in \p MI. void stepBackward(const MachineInstr &MI); - /// Mark all register units live during instruction \p MI. - /// This can be used to accumulate live/unoccupied registers over a range of - /// instructions. - void accumulateBackward(const MachineInstr &MI); + /// Adds all register units used, defined or clobbered in \p MI. + /// This is useful when walking over a range of instruction to find registers + /// unused over the whole range. + void accumulate(const MachineInstr &MI); /// Adds registers living out of block \p MBB. /// Live out registers are the union of the live-in registers of the successor diff --git a/interpreter/llvm/src/include/llvm/CodeGen/LiveStackAnalysis.h b/interpreter/llvm/src/include/llvm/CodeGen/LiveStackAnalysis.h index 3ffbe3d775b42..c90ae7b184f4e 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/LiveStackAnalysis.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/LiveStackAnalysis.h @@ -1,4 +1,4 @@ -//===-- LiveStackAnalysis.h - Live Stack Slot Analysis ----------*- C++ -*-===// +//===- LiveStackAnalysis.h - Live Stack Slot Analysis -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,13 +18,16 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Pass.h" +#include #include #include namespace llvm { +class TargetRegisterClass; +class TargetRegisterInfo; + class LiveStacks : public MachineFunctionPass { const TargetRegisterInfo *TRI; @@ -33,8 +36,7 @@ class LiveStacks : public MachineFunctionPass { VNInfo::Allocator VNInfoAllocator; /// S2IMap - Stack slot indices to live interval mapping. - /// - typedef std::unordered_map SS2IntervalMap; + using SS2IntervalMap = std::unordered_map; SS2IntervalMap S2IMap; /// S2RCMap - Stack slot indices to register class mapping. @@ -42,12 +44,14 @@ class LiveStacks : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid + LiveStacks() : MachineFunctionPass(ID) { initializeLiveStacksPass(*PassRegistry::getPassRegistry()); } - typedef SS2IntervalMap::iterator iterator; - typedef SS2IntervalMap::const_iterator const_iterator; + using iterator = SS2IntervalMap::iterator; + using const_iterator = SS2IntervalMap::const_iterator; + const_iterator begin() const { return S2IMap.begin(); } const_iterator end() const { return S2IMap.end(); } iterator begin() { return S2IMap.begin(); } @@ -93,6 +97,7 @@ class LiveStacks : public MachineFunctionPass { /// print - Implement the dump method. void print(raw_ostream &O, const Module * = nullptr) const override; }; -} -#endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */ +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVESTACK_ANALYSIS_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MIRParser/MIRParser.h b/interpreter/llvm/src/include/llvm/CodeGen/MIRParser/MIRParser.h index dd0780397f429..b631a8c0122a2 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -18,7 +18,6 @@ #ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H -#include "llvm/CodeGen/MachineFunctionInitializer.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include @@ -27,29 +26,30 @@ namespace llvm { class StringRef; class MIRParserImpl; +class MachineModuleInfo; class SMDiagnostic; /// This class initializes machine functions by applying the state loaded from /// a MIR file. -class MIRParser : public MachineFunctionInitializer { +class MIRParser { std::unique_ptr Impl; public: MIRParser(std::unique_ptr Impl); MIRParser(const MIRParser &) = delete; - ~MIRParser() override; + ~MIRParser(); - /// Parse the optional LLVM IR module that's embedded in the MIR file. + /// Parses the optional LLVM IR module in the MIR file. /// /// A new, empty module is created if the LLVM IR isn't present. - /// Returns null if a parsing error occurred. - std::unique_ptr parseLLVMModule(); + /// \returns nullptr if a parsing error occurred. + std::unique_ptr parseIRModule(); - /// Initialize the machine function to the state that's described in the MIR - /// file. + /// \brief Parses MachineFunctions in the MIR file and add them to the given + /// MachineModuleInfo \p MMI. /// - /// Return true if error occurred. - bool initializeMachineFunction(MachineFunction &MF) override; + /// \returns true if an error occurred. + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); }; /// This function is the main interface to the MIR serialization format parser. diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MIRYamlMapping.h b/interpreter/llvm/src/include/llvm/CodeGen/MIRYamlMapping.h index 47b40de6fe1f6..1b1ba6a05837c 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MIRYamlMapping.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MIRYamlMapping.h @@ -72,6 +72,9 @@ template <> struct ScalarTraits { struct BlockStringValue { StringValue Value; + bool operator==(const BlockStringValue &Other) const { + return Value == Other.Value; + } }; template <> struct BlockScalarTraits { @@ -146,6 +149,10 @@ struct VirtualRegisterDefinition { StringValue Class; StringValue PreferredRegister; // TODO: Serialize the target specific register hints. + bool operator==(const VirtualRegisterDefinition &Other) const { + return ID == Other.ID && Class == Other.Class && + PreferredRegister == Other.PreferredRegister; + } }; template <> struct MappingTraits { @@ -162,6 +169,10 @@ template <> struct MappingTraits { struct MachineFunctionLiveIn { StringValue Register; StringValue VirtualRegister; + bool operator==(const MachineFunctionLiveIn &Other) const { + return Register == Other.Register && + VirtualRegister == Other.VirtualRegister; + } }; template <> struct MappingTraits { @@ -196,6 +207,14 @@ struct MachineStackObject { StringValue DebugVar; StringValue DebugExpr; StringValue DebugLoc; + bool operator==(const MachineStackObject &Other) const { + return ID == Other.ID && Name == Other.Name && Type == Other.Type && + Offset == Other.Offset && Size == Other.Size && + Alignment == Other.Alignment && + CalleeSavedRegister == Other.CalleeSavedRegister && + LocalOffset == Other.LocalOffset && DebugVar == Other.DebugVar && + DebugExpr == Other.DebugExpr && DebugLoc == Other.DebugLoc; + } }; template <> struct ScalarEnumerationTraits { @@ -214,13 +233,13 @@ template <> struct MappingTraits { YamlIO.mapOptional( "type", Object.Type, MachineStackObject::DefaultType); // Don't print the default type. - YamlIO.mapOptional("offset", Object.Offset); + YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment); + YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("local-offset", Object.LocalOffset); + YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional()); YamlIO.mapOptional("di-variable", Object.DebugVar, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("di-expression", Object.DebugExpr, @@ -244,6 +263,12 @@ struct FixedMachineStackObject { bool IsImmutable = false; bool IsAliased = false; StringValue CalleeSavedRegister; + bool operator==(const FixedMachineStackObject &Other) const { + return ID == Other.ID && Type == Other.Type && Offset == Other.Offset && + Size == Other.Size && Alignment == Other.Alignment && + IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased && + CalleeSavedRegister == Other.CalleeSavedRegister; + } }; template <> @@ -261,12 +286,12 @@ template <> struct MappingTraits { YamlIO.mapOptional( "type", Object.Type, FixedMachineStackObject::DefaultType); // Don't print the default type. - YamlIO.mapOptional("offset", Object.Offset); - YamlIO.mapOptional("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment); + YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); + YamlIO.mapOptional("size", Object.Size, (uint64_t)0); + YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); if (Object.Type != FixedMachineStackObject::SpillSlot) { - YamlIO.mapOptional("isImmutable", Object.IsImmutable); - YamlIO.mapOptional("isAliased", Object.IsAliased); + YamlIO.mapOptional("isImmutable", Object.IsImmutable, false); + YamlIO.mapOptional("isAliased", Object.IsAliased, false); } YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. @@ -279,13 +304,17 @@ struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; unsigned Alignment = 0; + bool operator==(const MachineConstantPoolValue &Other) const { + return ID == Other.ID && Value == Other.Value && + Alignment == Other.Alignment; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) { YamlIO.mapRequired("id", Constant.ID); - YamlIO.mapOptional("value", Constant.Value); - YamlIO.mapOptional("alignment", Constant.Alignment); + YamlIO.mapOptional("value", Constant.Value, StringValue()); + YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0); } }; @@ -293,16 +322,22 @@ struct MachineJumpTable { struct Entry { UnsignedValue ID; std::vector Blocks; + bool operator==(const Entry &Other) const { + return ID == Other.ID && Blocks == Other.Blocks; + } }; MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32; std::vector Entries; + bool operator==(const MachineJumpTable &Other) const { + return Kind == Other.Kind && Entries == Other.Entries; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineJumpTable::Entry &Entry) { YamlIO.mapRequired("id", Entry.ID); - YamlIO.mapOptional("blocks", Entry.Blocks); + YamlIO.mapOptional("blocks", Entry.Blocks, std::vector()); } }; @@ -322,7 +357,8 @@ namespace yaml { template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineJumpTable &JT) { YamlIO.mapRequired("kind", JT.Kind); - YamlIO.mapOptional("entries", JT.Entries); + YamlIO.mapOptional("entries", JT.Entries, + std::vector()); } }; @@ -351,25 +387,43 @@ struct MachineFrameInfo { bool HasMustTailInVarArgFunc = false; StringValue SavePoint; StringValue RestorePoint; + bool operator==(const MachineFrameInfo &Other) const { + return IsFrameAddressTaken == Other.IsFrameAddressTaken && + IsReturnAddressTaken == Other.IsReturnAddressTaken && + HasStackMap == Other.HasStackMap && + HasPatchPoint == Other.HasPatchPoint && + StackSize == Other.StackSize && + OffsetAdjustment == Other.OffsetAdjustment && + MaxAlignment == Other.MaxAlignment && + AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls && + StackProtector == Other.StackProtector && + MaxCallFrameSize == Other.MaxCallFrameSize && + HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment && + HasVAStart == Other.HasVAStart && + HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc && + SavePoint == Other.SavePoint && RestorePoint == Other.RestorePoint; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFrameInfo &MFI) { - YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken); - YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken); - YamlIO.mapOptional("hasStackMap", MFI.HasStackMap); - YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint); - YamlIO.mapOptional("stackSize", MFI.StackSize); - YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment); - YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment); - YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack); - YamlIO.mapOptional("hasCalls", MFI.HasCalls); + YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken, false); + YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken, false); + YamlIO.mapOptional("hasStackMap", MFI.HasStackMap, false); + YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint, false); + YamlIO.mapOptional("stackSize", MFI.StackSize, (uint64_t)0); + YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment, (int)0); + YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment, (unsigned)0); + YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack, false); + YamlIO.mapOptional("hasCalls", MFI.HasCalls, false); YamlIO.mapOptional("stackProtector", MFI.StackProtector, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, ~0u); - YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment); - YamlIO.mapOptional("hasVAStart", MFI.HasVAStart); - YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc); + YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0); + YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment, + false); + YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false); + YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc, + false); YamlIO.mapOptional("savePoint", MFI.SavePoint, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("restorePoint", MFI.RestorePoint, @@ -381,7 +435,6 @@ struct MachineFunction { StringRef Name; unsigned Alignment = 0; bool ExposesReturnsTwice = false; - bool NoVRegs; // GISel MachineFunctionProperties. bool Legalized = false; bool RegBankSelected = false; @@ -404,23 +457,28 @@ struct MachineFunction { template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFunction &MF) { YamlIO.mapRequired("name", MF.Name); - YamlIO.mapOptional("alignment", MF.Alignment); - YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice); - YamlIO.mapOptional("noVRegs", MF.NoVRegs); - YamlIO.mapOptional("legalized", MF.Legalized); - YamlIO.mapOptional("regBankSelected", MF.RegBankSelected); - YamlIO.mapOptional("selected", MF.Selected); - YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness); - YamlIO.mapOptional("registers", MF.VirtualRegisters); - YamlIO.mapOptional("liveins", MF.LiveIns); - YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters); - YamlIO.mapOptional("frameInfo", MF.FrameInfo); - YamlIO.mapOptional("fixedStack", MF.FixedStackObjects); - YamlIO.mapOptional("stack", MF.StackObjects); - YamlIO.mapOptional("constants", MF.Constants); + YamlIO.mapOptional("alignment", MF.Alignment, (unsigned)0); + YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false); + YamlIO.mapOptional("legalized", MF.Legalized, false); + YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); + YamlIO.mapOptional("selected", MF.Selected, false); + YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false); + YamlIO.mapOptional("registers", MF.VirtualRegisters, + std::vector()); + YamlIO.mapOptional("liveins", MF.LiveIns, + std::vector()); + YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters, + Optional>()); + YamlIO.mapOptional("frameInfo", MF.FrameInfo, MachineFrameInfo()); + YamlIO.mapOptional("fixedStack", MF.FixedStackObjects, + std::vector()); + YamlIO.mapOptional("stack", MF.StackObjects, + std::vector()); + YamlIO.mapOptional("constants", MF.Constants, + std::vector()); if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty()) - YamlIO.mapOptional("jumpTable", MF.JumpTableInfo); - YamlIO.mapOptional("body", MF.Body); + YamlIO.mapOptional("jumpTable", MF.JumpTableInfo, MachineJumpTable()); + YamlIO.mapOptional("body", MF.Body, BlockStringValue()); } }; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineBasicBlock.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineBasicBlock.h index 18d40564856d5..97a49ce4dc4fa 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineBasicBlock.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineBasicBlock.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineBasicBlock.h ------------------------*- C++ -*-===// +//===- llvm/CodeGen/MachineBasicBlock.h -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,41 +15,50 @@ #define LLVM_CODEGEN_MACHINEBASICBLOCK_H #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/ADT/simple_ilist.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Support/BranchProbability.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/Support/BranchProbability.h" +#include +#include #include +#include +#include +#include namespace llvm { -class Pass; class BasicBlock; class MachineFunction; class MCSymbol; -class MIPrinter; +class ModuleSlotTracker; +class Pass; class SlotIndexes; class StringRef; class raw_ostream; -class MachineBranchProbabilityInfo; +class TargetRegisterClass; +class TargetRegisterInfo; template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. + MachineBasicBlock *Parent; - typedef simple_ilist>::iterator - instr_iterator; + using instr_iterator = + simple_ilist>::iterator; public: void addNodeToList(MachineInstr *N); void removeNodeFromList(MachineInstr *N); void transferNodesFromList(ilist_traits &OldList, instr_iterator First, instr_iterator Last); - void deleteNode(MachineInstr *MI); }; @@ -69,7 +78,8 @@ class MachineBasicBlock }; private: - typedef ilist> Instructions; + using Instructions = ilist>; + Instructions Insts; const BasicBlock *BB; int Number; @@ -83,12 +93,12 @@ class MachineBasicBlock /// same order as Successors, or it is empty if we don't use it (disable /// optimization). std::vector Probs; - typedef std::vector::iterator probability_iterator; - typedef std::vector::const_iterator - const_probability_iterator; + using probability_iterator = std::vector::iterator; + using const_probability_iterator = + std::vector::const_iterator; /// Keep track of the physical registers that are livein of the basicblock. - typedef std::vector LiveInVector; + using LiveInVector = std::vector; LiveInVector LiveIns; /// Alignment of the basic block. Zero if the basic block does not need to be @@ -113,7 +123,7 @@ class MachineBasicBlock mutable MCSymbol *CachedMCSymbol = nullptr; // Intrusive list support - MachineBasicBlock() {} + MachineBasicBlock() = default; explicit MachineBasicBlock(MachineFunction &MF, const BasicBlock *BB); @@ -145,16 +155,16 @@ class MachineBasicBlock const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } - typedef Instructions::iterator instr_iterator; - typedef Instructions::const_iterator const_instr_iterator; - typedef Instructions::reverse_iterator reverse_instr_iterator; - typedef Instructions::const_reverse_iterator const_reverse_instr_iterator; + using instr_iterator = Instructions::iterator; + using const_instr_iterator = Instructions::const_iterator; + using reverse_instr_iterator = Instructions::reverse_iterator; + using const_reverse_instr_iterator = Instructions::const_reverse_iterator; - typedef MachineInstrBundleIterator iterator; - typedef MachineInstrBundleIterator const_iterator; - typedef MachineInstrBundleIterator reverse_iterator; - typedef MachineInstrBundleIterator - const_reverse_iterator; + using iterator = MachineInstrBundleIterator; + using const_iterator = MachineInstrBundleIterator; + using reverse_iterator = MachineInstrBundleIterator; + using const_reverse_iterator = + MachineInstrBundleIterator; unsigned size() const { return (unsigned)Insts.size(); } bool empty() const { return Insts.empty(); } @@ -178,8 +188,8 @@ class MachineBasicBlock reverse_instr_iterator instr_rend () { return Insts.rend(); } const_reverse_instr_iterator instr_rend () const { return Insts.rend(); } - typedef iterator_range instr_range; - typedef iterator_range const_instr_range; + using instr_range = iterator_range; + using const_instr_range = iterator_range; instr_range instrs() { return instr_range(instr_begin(), instr_end()); } const_instr_range instrs() const { return const_instr_range(instr_begin(), instr_end()); @@ -213,18 +223,18 @@ class MachineBasicBlock } // Machine-CFG iterators - typedef std::vector::iterator pred_iterator; - typedef std::vector::const_iterator const_pred_iterator; - typedef std::vector::iterator succ_iterator; - typedef std::vector::const_iterator const_succ_iterator; - typedef std::vector::reverse_iterator - pred_reverse_iterator; - typedef std::vector::const_reverse_iterator - const_pred_reverse_iterator; - typedef std::vector::reverse_iterator - succ_reverse_iterator; - typedef std::vector::const_reverse_iterator - const_succ_reverse_iterator; + using pred_iterator = std::vector::iterator; + using const_pred_iterator = std::vector::const_iterator; + using succ_iterator = std::vector::iterator; + using const_succ_iterator = std::vector::const_iterator; + using pred_reverse_iterator = + std::vector::reverse_iterator; + using const_pred_reverse_iterator = + std::vector::const_reverse_iterator; + using succ_reverse_iterator = + std::vector::reverse_iterator; + using const_succ_reverse_iterator = + std::vector::const_reverse_iterator; pred_iterator pred_begin() { return Predecessors.begin(); } const_pred_iterator pred_begin() const { return Predecessors.begin(); } pred_iterator pred_end() { return Predecessors.end(); } @@ -307,7 +317,7 @@ class MachineBasicBlock // Iteration support for live in sets. These sets are kept in sorted // order by their register number. - typedef LiveInVector::const_iterator livein_iterator; + using livein_iterator = LiveInVector::const_iterator; #ifndef NDEBUG /// Unlike livein_begin, this method does not check that the liveness /// information is accurate. Still for debug purposes it may be useful @@ -325,6 +335,9 @@ class MachineBasicBlock return make_range(livein_begin(), livein_end()); } + /// Remove entry from the livein set and return iterator to the next. + livein_iterator removeLiveIn(livein_iterator I); + /// Get the clobber mask for the start of this basic block. Funclets use this /// to prevent register allocation across funclet transitions. const uint32_t *getBeginClobberMask(const TargetRegisterInfo *TRI) const; @@ -363,6 +376,9 @@ class MachineBasicBlock /// Indicates if this is the entry block of a cleanup funclet. void setIsCleanupFuncletEntry(bool V = true) { IsCleanupFuncletEntry = V; } + /// Returns true if it is legal to hoist instructions into this block. + bool isLegalToHoistInto() const; + // Code Layout methods. /// Move 'this' block before or after the specified block. This only moves @@ -455,7 +471,6 @@ class MachineBasicBlock /// other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; - /// Return the fallthrough block if the block can implicitly /// transfer control to the block after it by falling off the end of /// it. This should return null if it can reach the block after @@ -695,7 +710,7 @@ class MachineBasicBlock LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, - unsigned Neighborhood=10) const; + unsigned Neighborhood = 10) const; // Debugging methods. void dump() const; @@ -714,7 +729,6 @@ class MachineBasicBlock /// Return the MCSymbol for this basic block. MCSymbol *getSymbol() const; - private: /// Return probability iterator corresponding to the I successor iterator. probability_iterator getProbabilityIterator(succ_iterator I); @@ -764,8 +778,8 @@ struct MBB2NumberFunctor : // template <> struct GraphTraits { - typedef MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::succ_iterator ChildIteratorType; + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::succ_iterator; static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } @@ -773,8 +787,8 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef const MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_succ_iterator; static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } @@ -787,28 +801,30 @@ template <> struct GraphTraits { // to be when traversing the predecessor edges of a MBB // instead of the successor edges. // -template <> struct GraphTraits > { - typedef MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::pred_iterator ChildIteratorType; +template <> struct GraphTraits> { + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); } static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; -template <> struct GraphTraits > { - typedef const MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::const_pred_iterator ChildIteratorType; +template <> struct GraphTraits> { + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); } static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; - - /// MachineInstrSpan provides an interface to get an iteration range /// containing the instruction it was initialized with, along with all /// those instructions inserted prior to or following that instruction @@ -816,6 +832,7 @@ template <> struct GraphTraits > { class MachineInstrSpan { MachineBasicBlock &MBB; MachineBasicBlock::iterator I, B, E; + public: MachineInstrSpan(MachineBasicBlock::iterator I) : MBB(*I->getParent()), @@ -854,6 +871,6 @@ inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) { return It; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEBASICBLOCK_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index cd1c204981ed8..cba79c818a761 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -1,4 +1,4 @@ -//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*-----===// +//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,26 +17,28 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/BlockFrequency.h" -#include +#include +#include namespace llvm { +template class BlockFrequencyInfoImpl; class MachineBasicBlock; class MachineBranchProbabilityInfo; +class MachineFunction; class MachineLoopInfo; -template class BlockFrequencyInfoImpl; +class raw_ostream; /// MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation /// to estimate machine basic block frequencies. class MachineBlockFrequencyInfo : public MachineFunctionPass { - typedef BlockFrequencyInfoImpl ImplType; + using ImplType = BlockFrequencyInfoImpl; std::unique_ptr MBFI; public: static char ID; MachineBlockFrequencyInfo(); - ~MachineBlockFrequencyInfo() override; void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -74,9 +76,8 @@ class MachineBlockFrequencyInfo : public MachineFunctionPass { const MachineBasicBlock *MBB) const; uint64_t getEntryFreq() const; - }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineConstantPool.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineConstantPool.h index d2036c4a29a55..1705a0f7e59b3 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineConstantPool.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineConstantPool.h @@ -1,4 +1,4 @@ -//===-- CodeGen/MachineConstantPool.h - Abstract Constant Pool --*- C++ -*-===// +//===- CodeGen/MachineConstantPool.h - Abstract Constant Pool ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,29 +18,28 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/MC/SectionKind.h" -#include #include #include namespace llvm { class Constant; -class FoldingSetNodeID; class DataLayout; -class TargetMachine; -class Type; +class FoldingSetNodeID; class MachineConstantPool; class raw_ostream; +class Type; /// Abstract base class for all machine specific constantpool value subclasses. /// class MachineConstantPoolValue { virtual void anchor(); + Type *Ty; public: explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {} - virtual ~MachineConstantPoolValue() {} + virtual ~MachineConstantPoolValue() = default; /// getType - get type of this MachineConstantPoolValue. /// @@ -81,6 +80,7 @@ class MachineConstantPoolEntry { : Alignment(A) { Val.ConstVal = V; } + MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A) : Alignment(A) { Val.MachineCPVal = V; @@ -153,13 +153,12 @@ class MachineConstantPool { /// print - Used by the MachineFunction printer to print information about /// constant pool objects. Implemented in MachineFunction.cpp - /// void print(raw_ostream &OS) const; /// dump - Call print(cerr) to be called from the debugger. void dump() const; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINECONSTANTPOOL_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineDominanceFrontier.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineDominanceFrontier.h index 4131194a0c0fc..6efeefd9a7217 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineDominanceFrontier.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineDominanceFrontier.h @@ -11,34 +11,36 @@ #define LLVM_CODEGEN_MACHINEDOMINANCEFRONTIER_H #include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" - +#include "llvm/Support/GenericDomTree.h" +#include namespace llvm { class MachineDominanceFrontier : public MachineFunctionPass { ForwardDominanceFrontierBase Base; + public: - typedef DominatorTreeBase DomTreeT; - typedef DomTreeNodeBase DomTreeNodeT; - typedef DominanceFrontierBase::DomSetType DomSetType; - typedef DominanceFrontierBase::iterator iterator; - typedef DominanceFrontierBase::const_iterator const_iterator; + using DomTreeT = DomTreeBase; + using DomTreeNodeT = DomTreeNodeBase; + using DomSetType = DominanceFrontierBase::DomSetType; + using iterator = DominanceFrontierBase::iterator; + using const_iterator = + DominanceFrontierBase::const_iterator; - void operator=(const MachineDominanceFrontier &) = delete; - MachineDominanceFrontier(const MachineDominanceFrontier &) = delete; + MachineDominanceFrontier(const MachineDominanceFrontier &) = delete; + MachineDominanceFrontier &operator=(const MachineDominanceFrontier &) = delete; - static char ID; + static char ID; - MachineDominanceFrontier(); + MachineDominanceFrontier(); - DominanceFrontierBase &getBase() { - return Base; - } + DominanceFrontierBase &getBase() { return Base; } - inline const std::vector &getRoots() const { - return Base.getRoots(); + inline const std::vector &getRoots() const { + return Base.getRoots(); } MachineBasicBlock *getRoot() const { @@ -93,7 +95,7 @@ class MachineDominanceFrontier : public MachineFunctionPass { return Base.compareDomSet(DS1, DS2); } - bool compare(DominanceFrontierBase &Other) const { + bool compare(DominanceFrontierBase &Other) const { return Base.compare(Other); } @@ -104,6 +106,6 @@ class MachineDominanceFrontier : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override; }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEDOMINANCEFRONTIER_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineDominators.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineDominators.h index 30b6cfdd1c36d..8bf98f6064956 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineDominators.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineDominators.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation --*- C++ -*-==// +//==- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -16,24 +16,29 @@ #define LLVM_CODEGEN_MACHINEDOMINATORS_H #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/GenericDomTreeConstruction.h" +#include #include +#include namespace llvm { -template<> -inline void DominatorTreeBase::addRoot(MachineBasicBlock* MBB) { +template <> +inline void DominatorTreeBase::addRoot( + MachineBasicBlock *MBB) { this->Roots.push_back(MBB); } extern template class DomTreeNodeBase; -extern template class DominatorTreeBase; +extern template class DominatorTreeBase; // DomTree +extern template class DominatorTreeBase; // PostDomTree -typedef DomTreeNodeBase MachineDomTreeNode; +using MachineDomTreeNode = DomTreeNodeBase; //===------------------------------------- /// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to @@ -52,6 +57,7 @@ class MachineDominatorTree : public MachineFunctionPass { /// The splitting of a critical edge is local and thus, it is possible /// to apply several of those changes at the same time. mutable SmallVector CriticalEdgesToSplit; + /// \brief Remember all the basic blocks that are inserted during /// edge splitting. /// Invariant: NewBBs == all the basic blocks contained in the NewBB @@ -61,7 +67,7 @@ class MachineDominatorTree : public MachineFunctionPass { mutable SmallSet NewBBs; /// The DominatorTreeBase that is used to compute a normal dominator tree - std::unique_ptr> DT; + std::unique_ptr> DT; /// \brief Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses @@ -75,9 +81,8 @@ class MachineDominatorTree : public MachineFunctionPass { MachineDominatorTree(); - DominatorTreeBase &getBase() { - if (!DT) - DT.reset(new DominatorTreeBase(false)); + DomTreeBase &getBase() { + if (!DT) DT.reset(new DomTreeBase()); applySplitCriticalEdges(); return *DT; } @@ -259,8 +264,8 @@ class MachineDominatorTree : public MachineFunctionPass { template struct MachineDomTreeGraphTraitsBase { - typedef Node *NodeRef; - typedef ChildIterator ChildIteratorType; + using NodeRef = Node *; + using ChildIteratorType = ChildIterator; static NodeRef getEntryNode(NodeRef N) { return N; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } @@ -287,6 +292,6 @@ template <> struct GraphTraits } }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEDOMINATORS_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineFunction.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineFunction.h index 5859a4e61fdd5..010d7032c516a 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineFunction.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineFunction.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineFunction.h --------------------------*- C++ -*-===// +//===- llvm/CodeGen/MachineFunction.h ---------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,38 +18,61 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H #define LLVM_CODEGEN_MACHINEFUNCTION_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/ilist.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Recycler.h" +#include +#include +#include +#include +#include namespace llvm { -class Value; +class BasicBlock; +class BlockAddress; +class DataLayout; +class DIExpression; +class DILocalVariable; +class DILocation; class Function; -class GCModuleInfo; -class MachineRegisterInfo; -class MachineFrameInfo; +class GlobalValue; class MachineConstantPool; +class MachineFrameInfo; +class MachineFunction; class MachineJumpTableInfo; class MachineModuleInfo; +class MachineRegisterInfo; class MCContext; +class MCInstrDesc; class Pass; class PseudoSourceValueManager; +class raw_ostream; +class SlotIndexes; class TargetMachine; -class TargetSubtargetInfo; class TargetRegisterClass; -struct MachinePointerInfo; +class TargetSubtargetInfo; struct WinEHFuncInfo; template <> struct ilist_alloc_traits { @@ -137,27 +160,33 @@ class MachineFunctionProperties { bool hasProperty(Property P) const { return Properties[static_cast(P)]; } + MachineFunctionProperties &set(Property P) { Properties.set(static_cast(P)); return *this; } + MachineFunctionProperties &reset(Property P) { Properties.reset(static_cast(P)); return *this; } + /// Reset all the properties. MachineFunctionProperties &reset() { Properties.reset(); return *this; } + MachineFunctionProperties &set(const MachineFunctionProperties &MFP) { Properties |= MFP.Properties; return *this; } + MachineFunctionProperties &reset(const MachineFunctionProperties &MFP) { Properties.reset(MFP.Properties); return *this; } + // Returns true if all properties set in V (i.e. required by a pass) are set // in this. bool verifyRequiredProperties(const MachineFunctionProperties &V) const { @@ -180,18 +209,17 @@ struct SEHHandler { const BlockAddress *RecoverBA; }; - /// This structure is used to retain landing pad info for the current function. struct LandingPadInfo { MachineBasicBlock *LandingPadBlock; // Landing pad block. SmallVector BeginLabels; // Labels prior to invoke. SmallVector EndLabels; // Labels after invoke. SmallVector SEHHandlers; // SEH handlers active at this lpad. - MCSymbol *LandingPadLabel; // Label at beginning of landing pad. - std::vector TypeIds; // List of type ids (filters negative). + MCSymbol *LandingPadLabel = nullptr; // Label at beginning of landing pad. + std::vector TypeIds; // List of type ids (filters negative). explicit LandingPadInfo(MachineBasicBlock *MBB) - : LandingPadBlock(MBB), LandingPadLabel(nullptr) {} + : LandingPadBlock(MBB) {} }; class MachineFunction { @@ -239,7 +267,7 @@ class MachineFunction { Recycler BasicBlockRecycler; // List of machine basic blocks in function - typedef ilist BasicBlockListType; + using BasicBlockListType = ilist; BasicBlockListType BasicBlocks; /// FunctionNumber - This provides a unique ID for each function emitted in @@ -281,7 +309,7 @@ class MachineFunction { std::vector LandingPads; /// Map a landing pad's EH symbol to the call site indexes. - DenseMap > LPadToCallSiteMap; + DenseMap> LPadToCallSiteMap; /// Map of invoke call site index values to associated begin EH_LABEL. DenseMap CallSiteMap; @@ -303,9 +331,6 @@ class MachineFunction { /// \} - MachineFunction(const MachineFunction &) = delete; - void operator=(const MachineFunction&) = delete; - /// Clear all the members of this MachineFunction, but the ones used /// to initialize again the MachineFunction. /// More specifically, this deallocates all the dynamically allocated @@ -316,8 +341,8 @@ class MachineFunction { /// In particular, the XXXInfo data structure. /// \pre Fn, Target, MMI, and FunctionNumber are properly set. void init(); -public: +public: struct VariableDbgInfo { const DILocalVariable *Var; const DIExpression *Expr; @@ -328,11 +353,13 @@ class MachineFunction { unsigned Slot, const DILocation *Loc) : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {} }; - typedef SmallVector VariableDbgInfoMapTy; + using VariableDbgInfoMapTy = SmallVector; VariableDbgInfoMapTy VariableDbgInfos; MachineFunction(const Function *Fn, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &MMI); + MachineFunction(const MachineFunction &) = delete; + MachineFunction &operator=(const MachineFunction &) = delete; ~MachineFunction(); /// Reset the instance as if it was just created. @@ -350,19 +377,15 @@ class MachineFunction { const DataLayout &getDataLayout() const; /// getFunction - Return the LLVM function that this machine code represents - /// const Function *getFunction() const { return Fn; } /// getName - Return the name of the corresponding LLVM function. - /// StringRef getName() const; /// getFunctionNumber - Return a unique ID for the current function. - /// unsigned getFunctionNumber() const { return FunctionNumber; } /// getTarget - Return the target machine this machine code is compiled with - /// const TargetMachine &getTarget() const { return Target; } /// getSubtarget - Return the subtarget for which this machine code is being @@ -378,14 +401,12 @@ class MachineFunction { } /// getRegInfo - Return information about the registers currently in use. - /// MachineRegisterInfo &getRegInfo() { return *RegInfo; } const MachineRegisterInfo &getRegInfo() const { return *RegInfo; } /// getFrameInfo - Return the frame info object for the current function. /// This object contains information about objects allocated on the stack /// frame of the current function in an abstract way. - /// MachineFrameInfo &getFrameInfo() { return *FrameInfo; } const MachineFrameInfo &getFrameInfo() const { return *FrameInfo; } @@ -402,7 +423,6 @@ class MachineFunction { /// getConstantPool - Return the constant pool object for the current /// function. - /// MachineConstantPool *getConstantPool() { return ConstantPool; } const MachineConstantPool *getConstantPool() const { return ConstantPool; } @@ -413,11 +433,9 @@ class MachineFunction { WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; } /// getAlignment - Return the alignment (log2, not bytes) of the function. - /// unsigned getAlignment() const { return Alignment; } /// setAlignment - Set the alignment (log2, not bytes) of the function. - /// void setAlignment(unsigned A) { Alignment = A; } /// ensureAlignment - Make sure the function is at least 1 << A bytes aligned. @@ -487,7 +505,6 @@ class MachineFunction { bool shouldSplitStack() const; /// getNumBlockIDs - Return the number of MBB ID's allocated. - /// unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); } /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and @@ -499,7 +516,6 @@ class MachineFunction { /// print - Print out the MachineFunction in a format suitable for debugging /// to the specified stream. - /// void print(raw_ostream &OS, const SlotIndexes* = nullptr) const; /// viewCFG - This function is meant for use from the debugger. You can just @@ -507,7 +523,6 @@ class MachineFunction { /// program, displaying the CFG of the current function with the code for each /// basic block inside. This depends on there being a 'dot' and 'gv' program /// in your path. - /// void viewCFG() const; /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -518,7 +533,6 @@ class MachineFunction { void viewCFGOnly() const; /// dump - Print the current MachineFunction to cerr, useful for debugger use. - /// void dump() const; /// Run the current MachineFunction through the machine code verifier, useful @@ -528,10 +542,10 @@ class MachineFunction { bool AbortOnError = true) const; // Provide accessors for the MachineBasicBlock list... - typedef BasicBlockListType::iterator iterator; - typedef BasicBlockListType::const_iterator const_iterator; - typedef BasicBlockListType::const_reverse_iterator const_reverse_iterator; - typedef BasicBlockListType::reverse_iterator reverse_iterator; + using iterator = BasicBlockListType::iterator; + using const_iterator = BasicBlockListType::const_iterator; + using const_reverse_iterator = BasicBlockListType::const_reverse_iterator; + using reverse_iterator = BasicBlockListType::reverse_iterator; /// Support for MachineBasicBlock::getNextNode(). static BasicBlockListType MachineFunction::* @@ -590,11 +604,9 @@ class MachineFunction { //===--------------------------------------------------------------------===// // Internal functions used to automatically number MachineBasicBlocks - // /// \brief Adds the MBB to the internal numbering. Returns the unique number /// assigned to the MBB. - /// unsigned addToMBBNumbering(MachineBasicBlock *MBB) { MBBNumbering.push_back(MBB); return (unsigned)MBBNumbering.size()-1; @@ -610,7 +622,6 @@ class MachineFunction { /// CreateMachineInstr - Allocate a new MachineInstr. Use this instead /// of `new MachineInstr'. - /// MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp = false); @@ -623,16 +634,13 @@ class MachineFunction { MachineInstr *CloneMachineInstr(const MachineInstr *Orig); /// DeleteMachineInstr - Delete the given MachineInstr. - /// void DeleteMachineInstr(MachineInstr *MI); /// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this /// instead of `new MachineBasicBlock'. - /// MachineBasicBlock *CreateMachineBasicBlock(const BasicBlock *bb = nullptr); /// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. - /// void DeleteMachineBasicBlock(MachineBasicBlock *MBB); /// getMachineMemOperand - Allocate a new MachineMemOperand. @@ -642,7 +650,7 @@ class MachineFunction { MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, AtomicOrdering Ordering = AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); @@ -653,7 +661,13 @@ class MachineFunction { MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size); - typedef ArrayRecycler::Capacity OperandCapacity; + /// Allocate a new MachineMemOperand by copying an existing one, + /// replacing only AliasAnalysis information. MachineMemOperands are owned + /// by the MachineFunction and need not be explicitly deallocated. + MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, + const AAMDNodes &AAInfo); + + using OperandCapacity = ArrayRecycler::Capacity; /// Allocate an array of MachineOperands. This is only intended for use by /// internal MachineInstr functions. @@ -700,7 +714,6 @@ class MachineFunction { //===--------------------------------------------------------------------===// // Label Manipulation. - // /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a @@ -858,13 +871,16 @@ template <> struct GraphTraits : static NodeRef getEntryNode(MachineFunction *F) { return &F->front(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; + static nodes_iterator nodes_begin(MachineFunction *F) { return nodes_iterator(F->begin()); } + static nodes_iterator nodes_end(MachineFunction *F) { return nodes_iterator(F->end()); } + static unsigned size (MachineFunction *F) { return F->size(); } }; template <> struct GraphTraits : @@ -872,37 +888,39 @@ template <> struct GraphTraits : static NodeRef getEntryNode(const MachineFunction *F) { return &F->front(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; + static nodes_iterator nodes_begin(const MachineFunction *F) { return nodes_iterator(F->begin()); } + static nodes_iterator nodes_end (const MachineFunction *F) { return nodes_iterator(F->end()); } + static unsigned size (const MachineFunction *F) { return F->size(); } }; - // Provide specializations of GraphTraits to be able to treat a function as a // graph of basic blocks... and to walk it in inverse order. Inverse order for // a function is considered to be when traversing the predecessor edges of a BB // instead of the successor edges. // -template <> struct GraphTraits > : - public GraphTraits > { +template <> struct GraphTraits> : + public GraphTraits> { static NodeRef getEntryNode(Inverse G) { return &G.Graph->front(); } }; -template <> struct GraphTraits > : - public GraphTraits > { +template <> struct GraphTraits> : + public GraphTraits> { static NodeRef getEntryNode(Inverse G) { return &G.Graph->front(); } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEFUNCTION_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineFunctionPass.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineFunctionPass.h index 653d1175d04b4..6d978daa20181 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineFunctionPass.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineFunctionPass.h @@ -19,8 +19,8 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H -#include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineInstr.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineInstr.h index e7e728c1be28b..b87aff102d478 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineInstr.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineInstr.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineInstr.h - MachineInstr class --------*- C++ -*-===// +//===- llvm/CodeGen/MachineInstr.h - MachineInstr class ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,7 +17,6 @@ #define LLVM_CODEGEN_MACHINEINSTR_H #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" @@ -28,19 +27,27 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/ArrayRecycler.h" #include "llvm/Target/TargetOpcodes.h" +#include +#include +#include +#include namespace llvm { -class StringRef; template class ArrayRef; -template class SmallVectorImpl; -class DILocalVariable; class DIExpression; +class DILocalVariable; +class MachineBasicBlock; +class MachineFunction; +class MachineMemOperand; +class MachineRegisterInfo; +class ModuleSlotTracker; +class raw_ostream; +template class SmallVectorImpl; +class StringRef; class TargetInstrInfo; class TargetRegisterClass; class TargetRegisterInfo; -class MachineFunction; -class MachineMemOperand; //===----------------------------------------------------------------------===// /// Representation of each machine instruction. @@ -53,7 +60,7 @@ class MachineInstr : public ilist_node_with_parent> { public: - typedef MachineMemOperand **mmo_iterator; + using mmo_iterator = MachineMemOperand **; /// Flags to specify different kinds of comments to output in /// assembly code. These flags carry semantic information not @@ -72,43 +79,39 @@ class MachineInstr BundledPred = 1 << 2, // Instruction has bundled predecessors. BundledSucc = 1 << 3 // Instruction has bundled successors. }; + private: const MCInstrDesc *MCID; // Instruction descriptor. - MachineBasicBlock *Parent; // Pointer to the owning basic block. + MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block. // Operands are allocated by an ArrayRecycler. - MachineOperand *Operands; // Pointer to the first operand. - unsigned NumOperands; // Number of operands on instruction. - typedef ArrayRecycler::Capacity OperandCapacity; + MachineOperand *Operands = nullptr; // Pointer to the first operand. + unsigned NumOperands = 0; // Number of operands on instruction. + using OperandCapacity = ArrayRecycler::Capacity; OperandCapacity CapOperands; // Capacity of the Operands array. - uint8_t Flags; // Various bits of additional + uint8_t Flags = 0; // Various bits of additional // information about machine // instruction. - uint8_t AsmPrinterFlags; // Various bits of information used by + uint8_t AsmPrinterFlags = 0; // Various bits of information used by // the AsmPrinter to emit helpful // comments. This is *not* semantic // information. Do not use this for // anything other than to convey comment // information to AsmPrinter. - uint8_t NumMemRefs; // Information on memory references. + uint8_t NumMemRefs = 0; // Information on memory references. // Note that MemRefs == nullptr, means 'don't know', not 'no memory access'. // Calling code must treat missing information conservatively. If the number // of memory operands required to be precise exceeds the maximum value of // NumMemRefs - currently 256 - we remove the operands entirely. Note also // that this is a non-owning reference to a shared copy on write buffer owned // by the MachineFunction and created via MF.allocateMemRefsArray. - mmo_iterator MemRefs; + mmo_iterator MemRefs = nullptr; DebugLoc debugLoc; // Source line information. - MachineInstr(const MachineInstr&) = delete; - void operator=(const MachineInstr&) = delete; - // Use MachineFunction::DeleteMachineInstr() instead. - ~MachineInstr() = delete; - // Intrusive list support friend struct ilist_traits; friend struct ilist_callback_traits; @@ -128,6 +131,11 @@ class MachineInstr friend class MachineFunction; public: + MachineInstr(const MachineInstr &) = delete; + MachineInstr &operator=(const MachineInstr &) = delete; + // Use MachineFunction::DeleteMachineInstr() instead. + ~MachineInstr() = delete; + const MachineBasicBlock* getParent() const { return Parent; } MachineBasicBlock* getParent() { return Parent; } @@ -178,7 +186,6 @@ class MachineInstr Flags &= ~((uint8_t)Flag); } - /// Return true if MI is in a bundle (but not the first MI in a bundle). /// /// A bundle looks like this before it's finalized: @@ -263,7 +270,6 @@ class MachineInstr /// earlier. /// /// If this method returns, the caller should try to recover from the error. - /// void emitError(StringRef Msg) const; /// Returns the target instruction descriptor of this MachineInstr. @@ -273,7 +279,6 @@ class MachineInstr unsigned getOpcode() const { return MCID->Opcode; } /// Access to explicit operands of the instruction. - /// unsigned getNumOperands() const { return NumOperands; } const MachineOperand& getOperand(unsigned i) const { @@ -289,8 +294,8 @@ class MachineInstr unsigned getNumExplicitOperands() const; /// iterator/begin/end - Iterate over all operands of a machine instruction. - typedef MachineOperand *mop_iterator; - typedef const MachineOperand *const_mop_iterator; + using mop_iterator = MachineOperand *; + using const_mop_iterator = const MachineOperand *; mop_iterator operands_begin() { return Operands; } mop_iterator operands_end() { return Operands + NumOperands; } @@ -374,6 +379,9 @@ class MachineInstr return NumMemRefs == 1; } + /// Return the number of memory operands. + unsigned getNumMemOperands() const { return NumMemRefs; } + /// API for querying MachineInstr properties. They are the same as MCInstrDesc /// queries but they are bundle aware. @@ -713,7 +721,6 @@ class MachineInstr return hasProperty(MCID::ExtraDefRegAllocReq, Type); } - enum MICheckType { CheckDefs, // Check all operands for equality CheckKillDead, // Check all operands including kill / dead markers @@ -767,6 +774,7 @@ class MachineInstr /// Returns true if the MachineInstr represents a label. bool isLabel() const { return isEHLabel() || isGCLabel(); } + bool isCFIInstruction() const { return getOpcode() == TargetOpcode::CFI_INSTRUCTION; } @@ -775,6 +783,7 @@ class MachineInstr bool isPosition() const { return isLabel() || isCFIInstruction(); } bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; } + /// A DBG_VALUE is indirect iff the first operand is a register and /// the second operand is an immediate. bool isIndirectDebugValue() const { @@ -787,29 +796,38 @@ class MachineInstr bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; } + bool isMSInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect(); } + bool isStackAligningInlineAsm() const; InlineAsm::AsmDialect getInlineAsmDialect() const; + bool isInsertSubreg() const { return getOpcode() == TargetOpcode::INSERT_SUBREG; } + bool isSubregToReg() const { return getOpcode() == TargetOpcode::SUBREG_TO_REG; } + bool isRegSequence() const { return getOpcode() == TargetOpcode::REG_SEQUENCE; } + bool isBundle() const { return getOpcode() == TargetOpcode::BUNDLE; } + bool isCopy() const { return getOpcode() == TargetOpcode::COPY; } + bool isFullCopy() const { return isCopy() && !getOperand(0).getSubReg() && !getOperand(1).getSubReg(); } + bool isExtractSubreg() const { return getOpcode() == TargetOpcode::EXTRACT_SUBREG; } @@ -826,26 +844,35 @@ class MachineInstr getOperand(0).getSubReg() == getOperand(1).getSubReg(); } - /// Return true if this is a transient instruction that is - /// either very likely to be eliminated during register allocation (such as - /// copy-like instructions), or if this instruction doesn't have an - /// execution-time cost. + /// Return true if this instruction doesn't produce any output in the form of + /// executable instructions. + bool isMetaInstruction() const { + switch (getOpcode()) { + default: + return false; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + case TargetOpcode::DBG_VALUE: + return true; + } + } + + /// Return true if this is a transient instruction that is either very likely + /// to be eliminated during register allocation (such as copy-like + /// instructions), or if this instruction doesn't have an execution-time cost. bool isTransient() const { - switch(getOpcode()) { - default: return false; + switch (getOpcode()) { + default: + return isMetaInstruction(); // Copy-like instructions are usually eliminated during register allocation. case TargetOpcode::PHI: case TargetOpcode::COPY: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: case TargetOpcode::REG_SEQUENCE: - // Pseudo-instructions that don't produce any real output. - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: return true; } } @@ -969,7 +996,6 @@ class MachineInstr /// /// The flag operand is an immediate that can be decoded with methods like /// InlineAsm::hasRegClassConstraint(). - /// int findInlineAsmFlagIdx(unsigned OpIdx, unsigned *GroupNo = nullptr) const; /// Compute the static register class constraint for operand OpIdx. @@ -978,7 +1004,6 @@ class MachineInstr /// /// Returns NULL if the static register class constraint cannot be /// determined. - /// const TargetRegisterClass* getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, @@ -1319,6 +1344,6 @@ inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) { return OS; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEINSTR_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineInstrBundleIterator.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineInstrBundleIterator.h index 3104185385eaf..5fe4964ff1165 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineInstrBundleIterator.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineInstrBundleIterator.h @@ -15,34 +15,37 @@ #define LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H #include "llvm/ADT/ilist.h" +#include "llvm/ADT/simple_ilist.h" +#include #include +#include namespace llvm { template struct MachineInstrBundleIteratorTraits; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::iterator instr_iterator; - typedef typename list_type::iterator nonconst_instr_iterator; - typedef typename list_type::const_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::iterator; + using nonconst_instr_iterator = typename list_type::iterator; + using const_instr_iterator = typename list_type::const_iterator; }; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::reverse_iterator instr_iterator; - typedef typename list_type::reverse_iterator nonconst_instr_iterator; - typedef typename list_type::const_reverse_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::reverse_iterator; + using nonconst_instr_iterator = typename list_type::reverse_iterator; + using const_instr_iterator = typename list_type::const_reverse_iterator; }; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::const_iterator instr_iterator; - typedef typename list_type::iterator nonconst_instr_iterator; - typedef typename list_type::const_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::const_iterator; + using nonconst_instr_iterator = typename list_type::iterator; + using const_instr_iterator = typename list_type::const_iterator; }; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::const_reverse_iterator instr_iterator; - typedef typename list_type::reverse_iterator nonconst_instr_iterator; - typedef typename list_type::const_reverse_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::const_reverse_iterator; + using nonconst_instr_iterator = typename list_type::reverse_iterator; + using const_instr_iterator = typename list_type::const_reverse_iterator; }; template struct MachineInstrBundleIteratorHelper; @@ -104,27 +107,27 @@ template <> struct MachineInstrBundleIteratorHelper { /// inside bundles (i.e. walk top level MIs only). template class MachineInstrBundleIterator : MachineInstrBundleIteratorHelper { - typedef MachineInstrBundleIteratorTraits Traits; - typedef typename Traits::instr_iterator instr_iterator; + using Traits = MachineInstrBundleIteratorTraits; + using instr_iterator = typename Traits::instr_iterator; + instr_iterator MII; public: - typedef typename instr_iterator::value_type value_type; - typedef typename instr_iterator::difference_type difference_type; - typedef typename instr_iterator::pointer pointer; - typedef typename instr_iterator::reference reference; - typedef std::bidirectional_iterator_tag iterator_category; - - typedef typename instr_iterator::const_pointer const_pointer; - typedef typename instr_iterator::const_reference const_reference; + using value_type = typename instr_iterator::value_type; + using difference_type = typename instr_iterator::difference_type; + using pointer = typename instr_iterator::pointer; + using reference = typename instr_iterator::reference; + using const_pointer = typename instr_iterator::const_pointer; + using const_reference = typename instr_iterator::const_reference; + using iterator_category = std::bidirectional_iterator_tag; private: - typedef typename Traits::nonconst_instr_iterator nonconst_instr_iterator; - typedef typename Traits::const_instr_iterator const_instr_iterator; - typedef MachineInstrBundleIterator< - typename nonconst_instr_iterator::value_type, IsReverse> - nonconst_iterator; - typedef MachineInstrBundleIterator reverse_iterator; + using nonconst_instr_iterator = typename Traits::nonconst_instr_iterator; + using const_instr_iterator = typename Traits::const_instr_iterator; + using nonconst_iterator = + MachineInstrBundleIterator; + using reverse_iterator = MachineInstrBundleIterator; public: MachineInstrBundleIterator(instr_iterator MI) : MII(MI) { @@ -138,12 +141,14 @@ class MachineInstrBundleIterator : MachineInstrBundleIteratorHelper { "MachineInstrBundleIterator with a " "bundled MI"); } + MachineInstrBundleIterator(pointer MI) : MII(MI) { // FIXME: This conversion should be explicit. assert((!MI || !MI->isBundledWithPred()) && "It's not legal to initialize " "MachineInstrBundleIterator " "with a bundled MI"); } + // Template allows conversion from const to nonconst. template MachineInstrBundleIterator( @@ -151,6 +156,7 @@ class MachineInstrBundleIterator : MachineInstrBundleIteratorHelper { typename std::enable_if::value, void *>::type = nullptr) : MII(I.getInstrIterator()) {} + MachineInstrBundleIterator() : MII(nullptr) {} /// Explicit conversion between forward/reverse iterators. @@ -280,4 +286,4 @@ class MachineInstrBundleIterator : MachineInstrBundleIteratorHelper { } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineLoopInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineLoopInfo.h index 5c814f22f99b2..58cffaade9d2a 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineLoopInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineLoopInfo.h @@ -33,6 +33,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" namespace llvm { @@ -71,6 +73,7 @@ class MachineLoop : public LoopBase { private: friend class LoopInfoBase; + explicit MachineLoop(MachineBasicBlock *MBB) : LoopBase(MBB) {} }; @@ -79,11 +82,9 @@ class MachineLoop : public LoopBase { extern template class LoopInfoBase; class MachineLoopInfo : public MachineFunctionPass { - LoopInfoBase LI; friend class LoopBase; - void operator=(const MachineLoopInfo &) = delete; - MachineLoopInfo(const MachineLoopInfo &) = delete; + LoopInfoBase LI; public: static char ID; // Pass identification, replacement for typeid @@ -91,6 +92,8 @@ class MachineLoopInfo : public MachineFunctionPass { MachineLoopInfo() : MachineFunctionPass(ID) { initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); } + MachineLoopInfo(const MachineLoopInfo &) = delete; + MachineLoopInfo &operator=(const MachineLoopInfo &) = delete; LoopInfoBase& getBase() { return LI; } @@ -103,7 +106,7 @@ class MachineLoopInfo : public MachineFunctionPass { bool SpeculativePreheader = false) const; /// The iterator interface to the top-level loops in the current function. - typedef LoopInfoBase::iterator iterator; + using iterator = LoopInfoBase::iterator; inline iterator begin() const { return LI.begin(); } inline iterator end() const { return LI.end(); } bool empty() const { return LI.empty(); } @@ -166,11 +169,10 @@ class MachineLoopInfo : public MachineFunctionPass { } }; - // Allow clients to walk the list of nested loops... template <> struct GraphTraits { - typedef const MachineLoop *NodeRef; - typedef MachineLoopInfo::iterator ChildIteratorType; + using NodeRef = const MachineLoop *; + using ChildIteratorType = MachineLoopInfo::iterator; static NodeRef getEntryNode(const MachineLoop *L) { return L; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } @@ -178,14 +180,14 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef MachineLoop *NodeRef; - typedef MachineLoopInfo::iterator ChildIteratorType; + using NodeRef = MachineLoop *; + using ChildIteratorType = MachineLoopInfo::iterator; static NodeRef getEntryNode(MachineLoop *L) { return L; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } static ChildIteratorType child_end(NodeRef N) { return N->end(); } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINELOOPINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineMemOperand.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineMemOperand.h index a311124a35bae..a9de0db05d72c 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineMemOperand.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineMemOperand.h @@ -21,7 +21,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Value.h" // PointerLikeTypeTraits +#include "llvm/IR/Value.h" // PointerLikeTypeTraits #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" @@ -59,6 +59,11 @@ struct MachinePointerInfo { return MachinePointerInfo(V.get(), Offset+O); } + /// Return true if memory region [V, V+Offset+Size) is known to be + /// dereferenceable. + bool isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const; + /// Return the LLVM IR address space number that this pointer points into. unsigned getAddrSpace() const; @@ -109,6 +114,9 @@ class MachineMemOperand { MOInvariant = 1u << 5, // Reserved for use by target-specific passes. + // Targets may override getSerializableMachineMemOperandTargetFlags() to + // enable MIR serialization/parsing of these flags. If more of these flags + // are added, the MIR printing/parsing code will need to be updated as well. MOTargetFlag1 = 1u << 6, MOTargetFlag2 = 1u << 7, MOTargetFlag3 = 1u << 8, @@ -119,8 +127,8 @@ class MachineMemOperand { private: /// Atomic information for this memory operation. struct MachineAtomicInfo { - /// Synchronization scope for this memory operation. - unsigned SynchScope : 1; // enum SynchronizationScope + /// Synchronization scope ID for this memory operation. + unsigned SSID : 8; // SyncScope::ID /// Atomic ordering requirements for this memory operation. For cmpxchg /// atomic operations, atomic ordering requirements when store occurs. unsigned Ordering : 4; // enum AtomicOrdering @@ -147,7 +155,7 @@ class MachineMemOperand { unsigned base_alignment, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, AtomicOrdering Ordering = AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); @@ -197,9 +205,9 @@ class MachineMemOperand { /// Return the range tag for the memory reference. const MDNode *getRanges() const { return Ranges; } - /// Return the synchronization scope for this memory operation. - SynchronizationScope getSynchScope() const { - return static_cast(AtomicInfo.SynchScope); + /// Returns the synchronization scope ID for this memory operation. + SyncScope::ID getSyncScopeID() const { + return static_cast(AtomicInfo.SSID); } /// Return the atomic ordering requirements for this memory operation. For diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfo.h index f46ef41879d17..d64941a9e725a 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfo.h @@ -31,35 +31,25 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H #define LLVM_CODEGEN_MACHINEMODULEINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Pass.h" -#include "llvm/Support/DataTypes.h" +#include +#include +#include namespace llvm { -//===----------------------------------------------------------------------===// -// Forward declarations. -class BlockAddress; +class BasicBlock; class CallInst; -class Constant; -class GlobalVariable; -class LandingPadInst; -class MDNode; -class MMIAddrLabelMap; -class MachineBasicBlock; +class Function; class MachineFunction; -class MachineFunctionInitializer; +class MMIAddrLabelMap; class Module; -class PointerType; -class StructType; +class TargetMachine; //===----------------------------------------------------------------------===// /// This class can be derived from and used by targets to hold private @@ -69,11 +59,12 @@ class StructType; /// class MachineModuleInfoImpl { public: - typedef PointerIntPair StubValueTy; + using StubValueTy = PointerIntPair; + using SymbolListTy = std::vector>; + virtual ~MachineModuleInfoImpl(); - typedef std::vector > SymbolListTy; -protected: +protected: /// Return the entries from a DenseMap in a deterministic sorted orer. /// Clears the map. static SymbolListTy getSortedStubs(DenseMap&); @@ -134,7 +125,6 @@ class MachineModuleInfo : public ImmutablePass { /// comments in lib/Target/X86/X86FrameLowering.cpp for more details. bool UsesMorestackAddr; - MachineFunctionInitializer *MFInitializer; /// Maps IR Functions to their corresponding MachineFunctions. DenseMap> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -158,14 +148,13 @@ class MachineModuleInfo : public ImmutablePass { void setModule(const Module *M) { TheModule = M; } const Module *getModule() const { return TheModule; } - void setMachineFunctionInitializer(MachineFunctionInitializer *MFInit) { - MFInitializer = MFInit; - } - /// Returns the MachineFunction constructed for the IR function \p F. - /// Creates a new MachineFunction and runs the MachineFunctionInitializer - /// if none exists yet. - MachineFunction &getMachineFunction(const Function &F); + /// Creates a new MachineFunction if none exists yet. + MachineFunction &getOrCreateMachineFunction(const Function &F); + + /// \bried Returns the MachineFunction associated to IR function \p F if there + /// is one, otherwise nullptr. + MachineFunction *getMachineFunction(const Function &F) const; /// Delete the MachineFunction \p MF and reset the link in the IR Function to /// Machine Function map. @@ -252,6 +241,6 @@ class MachineModuleInfo : public ImmutablePass { /// which will link in MSVCRT's floating-point support. void computeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo &MMI); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEMODULEINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfoImpls.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfoImpls.h index f28a79c5b5cca..34b21ceddd434 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H #define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/Wasm.h" +#include "llvm/CodeGen/ValueTypes.h" namespace llvm { class MCSymbol; @@ -77,33 +77,6 @@ class MachineModuleInfoELF : public MachineModuleInfoImpl { SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); } }; -/// MachineModuleInfoWasm - This is a MachineModuleInfoImpl implementation -/// for Wasm targets. -class MachineModuleInfoWasm : public MachineModuleInfoImpl { - /// WebAssembly global variables defined by CodeGen. - std::vector Globals; - - /// The WebAssembly global variable which is the stack pointer. - unsigned StackPointerGlobal; - - virtual void anchor(); // Out of line virtual method. -public: - MachineModuleInfoWasm(const MachineModuleInfo &) - : StackPointerGlobal(-1U) {} - - void addGlobal(const wasm::Global &G) { Globals.push_back(G); } - const std::vector &getGlobals() const { return Globals; } - - bool hasStackPointerGlobal() const { - return StackPointerGlobal != -1U; - } - unsigned getStackPointerGlobal() const { - assert(hasStackPointerGlobal() && "Stack ptr global hasn't been set"); - return StackPointerGlobal; - } - void setStackPointerGlobal(unsigned Global) { StackPointerGlobal = Global; } -}; - } // end namespace llvm #endif diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineOperand.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineOperand.h index e163540882968..2560399bcf545 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineOperand.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineOperand.h @@ -14,8 +14,8 @@ #ifndef LLVM_CODEGEN_MACHINEOPERAND_H #define LLVM_CODEGEN_MACHINEOPERAND_H -#include "llvm/Support/DataTypes.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/DataTypes.h" #include namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index da8fdcdf5a33d..6ad5de533d13d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -134,7 +134,7 @@ using MNV = DiagnosticInfoMIROptimization::MachineArgument; /// /// It allows reporting when optimizations are performed and when they are not /// along with the reasons for it. Hotness information of the corresponding -/// code region can be included in the remark if DiagnosticHotnessRequested is +/// code region can be included in the remark if DiagnosticsHotnessRequested is /// enabled in the LLVM context. class MachineOptimizationRemarkEmitter { public: diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachinePassRegistry.h b/interpreter/llvm/src/include/llvm/CodeGen/MachinePassRegistry.h index db914b1f8bc71..3aba0bba7d1a3 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachinePassRegistry.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachinePassRegistry.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachinePassRegistry.h ----------------------*- C++ -*-===// +//===- llvm/CodeGen/MachinePassRegistry.h -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,13 +18,13 @@ #ifndef LLVM_CODEGEN_MACHINEPASSREGISTRY_H #define LLVM_CODEGEN_MACHINEPASSREGISTRY_H +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" namespace llvm { -typedef void *(*MachinePassCtor)(); - +using MachinePassCtor = void *(*)(); //===----------------------------------------------------------------------===// /// @@ -34,36 +34,30 @@ typedef void *(*MachinePassCtor)(); //===----------------------------------------------------------------------===// class MachinePassRegistryListener { virtual void anchor(); + public: - MachinePassRegistryListener() {} - virtual ~MachinePassRegistryListener() {} + MachinePassRegistryListener() = default; + virtual ~MachinePassRegistryListener() = default; + virtual void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) = 0; virtual void NotifyRemove(StringRef N) = 0; }; - //===----------------------------------------------------------------------===// /// /// MachinePassRegistryNode - Machine pass node stored in registration list. /// //===----------------------------------------------------------------------===// class MachinePassRegistryNode { - private: - - MachinePassRegistryNode *Next; // Next function pass in list. + MachinePassRegistryNode *Next = nullptr; // Next function pass in list. StringRef Name; // Name of function pass. StringRef Description; // Description string. MachinePassCtor Ctor; // Function pass creator. public: - MachinePassRegistryNode(const char *N, const char *D, MachinePassCtor C) - : Next(nullptr) - , Name(N) - , Description(D) - , Ctor(C) - {} + : Name(N), Description(D), Ctor(C) {} // Accessors MachinePassRegistryNode *getNext() const { return Next; } @@ -72,25 +66,20 @@ class MachinePassRegistryNode { StringRef getDescription() const { return Description; } MachinePassCtor getCtor() const { return Ctor; } void setNext(MachinePassRegistryNode *N) { Next = N; } - }; - //===----------------------------------------------------------------------===// /// /// MachinePassRegistry - Track the registration of machine passes. /// //===----------------------------------------------------------------------===// class MachinePassRegistry { - private: - MachinePassRegistryNode *List; // List of registry nodes. MachinePassCtor Default; // Default function pass creator. - MachinePassRegistryListener* Listener;// Listener for list adds are removes. + MachinePassRegistryListener *Listener; // Listener for list adds are removes. public: - // NO CONSTRUCTOR - we don't want static constructor ordering to mess // with the registry. @@ -109,10 +98,8 @@ class MachinePassRegistry { /// Remove - Removes a function pass from the registration list. /// void Remove(MachinePassRegistryNode *Node); - }; - //===----------------------------------------------------------------------===// /// /// RegisterPassParser class - Handle the addition of new machine passes. @@ -142,7 +129,6 @@ class RegisterPassParser : public MachinePassRegistryListener, } // Implement the MachinePassRegistryListener callbacks. - // void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) override { this->addLiteralOption(N, (typename RegistryClass::FunctionPassCtor)C, D); } @@ -151,7 +137,6 @@ class RegisterPassParser : public MachinePassRegistryListener, } }; - } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEPASSREGISTRY_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachinePostDominators.h b/interpreter/llvm/src/include/llvm/CodeGen/MachinePostDominators.h index 70bdb191ad343..d29d2d85cb0a5 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachinePostDominators.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachinePostDominators.h @@ -26,7 +26,7 @@ namespace llvm { /// struct MachinePostDominatorTree : public MachineFunctionPass { private: - DominatorTreeBase *DT; + PostDomTreeBase *DT; public: static char ID; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineRegionInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineRegionInfo.h index 21f847c7e5bab..8394b58d0a16c 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineRegionInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineRegionInfo.h @@ -10,83 +10,77 @@ #ifndef LLVM_CODEGEN_MACHINEREGIONINFO_H #define LLVM_CODEGEN_MACHINEREGIONINFO_H +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" - +#include namespace llvm { -class MachineDominatorTree; struct MachinePostDominatorTree; class MachineRegion; class MachineRegionNode; class MachineRegionInfo; -template<> -struct RegionTraits { - typedef MachineFunction FuncT; - typedef MachineBasicBlock BlockT; - typedef MachineRegion RegionT; - typedef MachineRegionNode RegionNodeT; - typedef MachineRegionInfo RegionInfoT; - typedef MachineDominatorTree DomTreeT; - typedef MachineDomTreeNode DomTreeNodeT; - typedef MachinePostDominatorTree PostDomTreeT; - typedef MachineDominanceFrontier DomFrontierT; - typedef MachineInstr InstT; - typedef MachineLoop LoopT; - typedef MachineLoopInfo LoopInfoT; +template <> struct RegionTraits { + using FuncT = MachineFunction; + using BlockT = MachineBasicBlock; + using RegionT = MachineRegion; + using RegionNodeT = MachineRegionNode; + using RegionInfoT = MachineRegionInfo; + using DomTreeT = MachineDominatorTree; + using DomTreeNodeT = MachineDomTreeNode; + using PostDomTreeT = MachinePostDominatorTree; + using DomFrontierT = MachineDominanceFrontier; + using InstT = MachineInstr; + using LoopT = MachineLoop; + using LoopInfoT = MachineLoopInfo; static unsigned getNumSuccessors(MachineBasicBlock *BB) { return BB->succ_size(); } }; - class MachineRegionNode : public RegionNodeBase> { public: - inline MachineRegionNode(MachineRegion *Parent, - MachineBasicBlock *Entry, + inline MachineRegionNode(MachineRegion *Parent, MachineBasicBlock *Entry, bool isSubRegion = false) - : RegionNodeBase>(Parent, Entry, isSubRegion) { - - } + : RegionNodeBase>(Parent, Entry, + isSubRegion) {} bool operator==(const MachineRegion &RN) const { - return this == reinterpret_cast(&RN); + return this == reinterpret_cast(&RN); } }; class MachineRegion : public RegionBase> { public: MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, - MachineRegionInfo* RI, - MachineDominatorTree *DT, MachineRegion *Parent = nullptr); + MachineRegionInfo *RI, MachineDominatorTree *DT, + MachineRegion *Parent = nullptr); ~MachineRegion(); bool operator==(const MachineRegionNode &RN) const { - return &RN == reinterpret_cast(this); + return &RN == reinterpret_cast(this); } }; class MachineRegionInfo : public RegionInfoBase> { public: explicit MachineRegionInfo(); - ~MachineRegionInfo() override; // updateStatistics - Update statistic about created regions. void updateStatistics(MachineRegion *R) final; - void recalculate(MachineFunction &F, - MachineDominatorTree *DT, - MachinePostDominatorTree *PDT, - MachineDominanceFrontier *DF); + void recalculate(MachineFunction &F, MachineDominatorTree *DT, + MachinePostDominatorTree *PDT, MachineDominanceFrontier *DF); }; class MachineRegionInfoPass : public MachineFunctionPass { @@ -94,17 +88,13 @@ class MachineRegionInfoPass : public MachineFunctionPass { public: static char ID; - explicit MachineRegionInfoPass(); + explicit MachineRegionInfoPass(); ~MachineRegionInfoPass() override; - MachineRegionInfo &getRegionInfo() { - return RI; - } + MachineRegionInfo &getRegionInfo() { return RI; } - const MachineRegionInfo &getRegionInfo() const { - return RI; - } + const MachineRegionInfo &getRegionInfo() const { return RI; } /// @name MachineFunctionPass interface //@{ @@ -117,66 +107,76 @@ class MachineRegionInfoPass : public MachineFunctionPass { //@} }; - template <> template <> -inline MachineBasicBlock* RegionNodeBase>::getNodeAs() const { +inline MachineBasicBlock * +RegionNodeBase>::getNodeAs() + const { assert(!isSubRegion() && "This is not a MachineBasicBlock RegionNode!"); return getEntry(); } -template<> -template<> -inline MachineRegion* RegionNodeBase>::getNodeAs() const { +template <> +template <> +inline MachineRegion * +RegionNodeBase>::getNodeAs() + const { assert(isSubRegion() && "This is not a subregion RegionNode!"); - auto Unconst = const_cast>*>(this); - return reinterpret_cast(Unconst); + auto Unconst = + const_cast> *>(this); + return reinterpret_cast(Unconst); } - RegionNodeGraphTraits(MachineRegionNode, MachineBasicBlock, MachineRegion); -RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock, MachineRegion); +RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock, + MachineRegion); RegionGraphTraits(MachineRegion, MachineRegionNode); RegionGraphTraits(const MachineRegion, const MachineRegionNode); -template <> struct GraphTraits - : public GraphTraits > { - typedef df_iterator, false, - GraphTraits>> - nodes_iterator; +template <> +struct GraphTraits + : public GraphTraits> { + using nodes_iterator = df_iterator, + false, GraphTraits>>; static NodeRef getEntryNode(MachineRegionInfo *RI) { - return GraphTraits >::getEntryNode(RI->getTopLevelRegion()); + return GraphTraits>::getEntryNode( + RI->getTopLevelRegion()); } - static nodes_iterator nodes_begin(MachineRegionInfo* RI) { + + static nodes_iterator nodes_begin(MachineRegionInfo *RI) { return nodes_iterator::begin(getEntryNode(RI)); } + static nodes_iterator nodes_end(MachineRegionInfo *RI) { return nodes_iterator::end(getEntryNode(RI)); } }; -template <> struct GraphTraits - : public GraphTraits { - typedef df_iterator, false, - GraphTraits>> - nodes_iterator; +template <> +struct GraphTraits + : public GraphTraits { + using nodes_iterator = df_iterator, + false, GraphTraits>>; static NodeRef getEntryNode(MachineRegionInfoPass *RI) { - return GraphTraits::getEntryNode(&RI->getRegionInfo()); + return GraphTraits::getEntryNode(&RI->getRegionInfo()); } - static nodes_iterator nodes_begin(MachineRegionInfoPass* RI) { - return GraphTraits::nodes_begin(&RI->getRegionInfo()); + + static nodes_iterator nodes_begin(MachineRegionInfoPass *RI) { + return GraphTraits::nodes_begin(&RI->getRegionInfo()); } + static nodes_iterator nodes_end(MachineRegionInfoPass *RI) { - return GraphTraits::nodes_end(&RI->getRegionInfo()); + return GraphTraits::nodes_end(&RI->getRegionInfo()); } }; extern template class RegionBase>; extern template class RegionNodeBase>; extern template class RegionInfoBase>; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_MACHINEREGIONINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineRegisterInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineRegisterInfo.h index 6e5c6473ff4a4..8347f00cbc7a4 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineRegisterInfo.h @@ -14,11 +14,13 @@ #ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H #define LLVM_CODEGEN_MACHINEREGISTERINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -28,21 +30,21 @@ #include "llvm/MC/LaneBitmask.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include #include #include #include #include #include #include +#include namespace llvm { class PSetIterator; /// Convenient type to represent either a register class or a register bank. -typedef PointerUnion - RegClassOrRegBank; +using RegClassOrRegBank = + PointerUnion; /// MachineRegisterInfo - Keep track of information for virtual and physical /// registers, including vreg register classes, use/def chains for registers, @@ -125,7 +127,7 @@ class MachineRegisterInfo { /// started. BitVector ReservedRegs; - typedef DenseMap VRegToTypeMap; + using VRegToTypeMap = DenseMap; /// Map generic virtual registers to their actual size. mutable std::unique_ptr VRegToType; @@ -266,8 +268,8 @@ class MachineRegisterInfo { /// reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified /// register. - typedef defusechain_iterator - reg_iterator; + using reg_iterator = + defusechain_iterator; reg_iterator reg_begin(unsigned RegNo) const { return reg_iterator(getRegUseDefListHead(RegNo)); } @@ -279,8 +281,8 @@ class MachineRegisterInfo { /// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses /// of the specified register, stepping by MachineInstr. - typedef defusechain_instr_iterator - reg_instr_iterator; + using reg_instr_iterator = + defusechain_instr_iterator; reg_instr_iterator reg_instr_begin(unsigned RegNo) const { return reg_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -295,8 +297,8 @@ class MachineRegisterInfo { /// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses /// of the specified register, stepping by bundle. - typedef defusechain_instr_iterator - reg_bundle_iterator; + using reg_bundle_iterator = + defusechain_instr_iterator; reg_bundle_iterator reg_bundle_begin(unsigned RegNo) const { return reg_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -314,8 +316,8 @@ class MachineRegisterInfo { /// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses /// of the specified register, skipping those marked as Debug. - typedef defusechain_iterator - reg_nodbg_iterator; + using reg_nodbg_iterator = + defusechain_iterator; reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const { return reg_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -331,8 +333,8 @@ class MachineRegisterInfo { /// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk /// all defs and uses of the specified register, stepping by MachineInstr, /// skipping those marked as Debug. - typedef defusechain_instr_iterator - reg_instr_nodbg_iterator; + using reg_instr_nodbg_iterator = + defusechain_instr_iterator; reg_instr_nodbg_iterator reg_instr_nodbg_begin(unsigned RegNo) const { return reg_instr_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -348,8 +350,8 @@ class MachineRegisterInfo { /// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk /// all defs and uses of the specified register, stepping by bundle, /// skipping those marked as Debug. - typedef defusechain_instr_iterator - reg_bundle_nodbg_iterator; + using reg_bundle_nodbg_iterator = + defusechain_instr_iterator; reg_bundle_nodbg_iterator reg_bundle_nodbg_begin(unsigned RegNo) const { return reg_bundle_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -369,8 +371,8 @@ class MachineRegisterInfo { } /// def_iterator/def_begin/def_end - Walk all defs of the specified register. - typedef defusechain_iterator - def_iterator; + using def_iterator = + defusechain_iterator; def_iterator def_begin(unsigned RegNo) const { return def_iterator(getRegUseDefListHead(RegNo)); } @@ -382,8 +384,8 @@ class MachineRegisterInfo { /// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the /// specified register, stepping by MachineInst. - typedef defusechain_instr_iterator - def_instr_iterator; + using def_instr_iterator = + defusechain_instr_iterator; def_instr_iterator def_instr_begin(unsigned RegNo) const { return def_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -398,8 +400,8 @@ class MachineRegisterInfo { /// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the /// specified register, stepping by bundle. - typedef defusechain_instr_iterator - def_bundle_iterator; + using def_bundle_iterator = + defusechain_instr_iterator; def_bundle_iterator def_bundle_begin(unsigned RegNo) const { return def_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -425,8 +427,8 @@ class MachineRegisterInfo { } /// use_iterator/use_begin/use_end - Walk all uses of the specified register. - typedef defusechain_iterator - use_iterator; + using use_iterator = + defusechain_iterator; use_iterator use_begin(unsigned RegNo) const { return use_iterator(getRegUseDefListHead(RegNo)); } @@ -438,8 +440,8 @@ class MachineRegisterInfo { /// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the /// specified register, stepping by MachineInstr. - typedef defusechain_instr_iterator - use_instr_iterator; + using use_instr_iterator = + defusechain_instr_iterator; use_instr_iterator use_instr_begin(unsigned RegNo) const { return use_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -454,8 +456,8 @@ class MachineRegisterInfo { /// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the /// specified register, stepping by bundle. - typedef defusechain_instr_iterator - use_bundle_iterator; + using use_bundle_iterator = + defusechain_instr_iterator; use_bundle_iterator use_bundle_begin(unsigned RegNo) const { return use_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -482,8 +484,8 @@ class MachineRegisterInfo { /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the /// specified register, skipping those marked as Debug. - typedef defusechain_iterator - use_nodbg_iterator; + using use_nodbg_iterator = + defusechain_iterator; use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const { return use_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -499,8 +501,8 @@ class MachineRegisterInfo { /// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk /// all uses of the specified register, stepping by MachineInstr, skipping /// those marked as Debug. - typedef defusechain_instr_iterator - use_instr_nodbg_iterator; + using use_instr_nodbg_iterator = + defusechain_instr_iterator; use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const { return use_instr_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -516,8 +518,8 @@ class MachineRegisterInfo { /// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk /// all uses of the specified register, stepping by bundle, skipping /// those marked as Debug. - typedef defusechain_instr_iterator - use_bundle_nodbg_iterator; + using use_bundle_nodbg_iterator = + defusechain_instr_iterator; use_bundle_nodbg_iterator use_bundle_nodbg_begin(unsigned RegNo) const { return use_bundle_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -593,7 +595,6 @@ class MachineRegisterInfo { /// Return the register class of the specified virtual register. /// This shouldn't be used directly unless \p Reg has a register class. /// \see getRegClassOrNull when this might happen. - /// const TargetRegisterClass *getRegClass(unsigned Reg) const { assert(VRegInfo[Reg].first.is() && "Register class not set, wrong accessor"); @@ -620,7 +621,6 @@ class MachineRegisterInfo { /// a register bank or has been assigned a register class. /// \note It is possible to get the register bank from the register class via /// RegisterBankInfo::getRegBankFromRegClass. - /// const RegisterBank *getRegBankOrNull(unsigned Reg) const { const RegClassOrRegBank &Val = VRegInfo[Reg].first; return Val.dyn_cast(); @@ -629,26 +629,27 @@ class MachineRegisterInfo { /// Return the register bank or register class of \p Reg. /// \note Before the register bank gets assigned (i.e., before the /// RegBankSelect pass) \p Reg may not have either. - /// const RegClassOrRegBank &getRegClassOrRegBank(unsigned Reg) const { return VRegInfo[Reg].first; } /// setRegClass - Set the register class of the specified virtual register. - /// void setRegClass(unsigned Reg, const TargetRegisterClass *RC); /// Set the register bank to \p RegBank for \p Reg. - /// void setRegBank(unsigned Reg, const RegisterBank &RegBank); + void setRegClassOrRegBank(unsigned Reg, + const RegClassOrRegBank &RCOrRB){ + VRegInfo[Reg].first = RCOrRB; + } + /// constrainRegClass - Constrain the register class of the specified virtual /// register to be a common subclass of RC and the current register class, /// but only if the new class has at least MinNumRegs registers. Return the /// new register class, or NULL if no such class exists. /// This should only be used when the constraint is known to be trivial, like /// GR32 -> GR32_NOSP. Beware of increasing register pressure. - /// const TargetRegisterClass *constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs = 0); @@ -660,12 +661,10 @@ class MachineRegisterInfo { /// This method can be used after constraints have been removed from a /// virtual register, for example after removing instructions or splitting /// the live range. - /// bool recomputeRegClass(unsigned Reg); /// createVirtualRegister - Create and return a new virtual register in the /// function with the specified register class. - /// unsigned createVirtualRegister(const TargetRegisterClass *RegClass); /// Accessor for VRegToType. This accessor should only be used @@ -699,7 +698,6 @@ class MachineRegisterInfo { unsigned createIncompleteVirtualRegister(); /// getNumVirtRegs - Return the number of virtual registers created. - /// unsigned getNumVirtRegs() const { return VRegInfo.size(); } /// clearVirtRegs - Remove all virtual registers (after physreg assignment). @@ -805,7 +803,6 @@ class MachineRegisterInfo { /// /// Reserved registers may belong to an allocatable register class, but the /// target has explicitly requested that they are not used. - /// bool isReserved(unsigned PhysReg) const { return getReservedRegs().test(PhysReg); } @@ -833,8 +830,8 @@ class MachineRegisterInfo { // Iteration support for the live-ins set. It's kept in sorted order // by register number. - typedef std::vector>::const_iterator - livein_iterator; + using livein_iterator = + std::vector>::const_iterator; livein_iterator livein_begin() const { return LiveIns.begin(); } livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } @@ -905,10 +902,10 @@ class MachineRegisterInfo { } public: - typedef std::iterator::reference reference; - typedef std::iterator::pointer pointer; + using reference = std::iterator::reference; + using pointer = std::iterator::pointer; defusechain_iterator() = default; @@ -1011,10 +1008,10 @@ class MachineRegisterInfo { } public: - typedef std::iterator::reference reference; - typedef std::iterator::pointer pointer; + using reference = std::iterator::reference; + using pointer = std::iterator::pointer; defusechain_instr_iterator() = default; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineScheduler.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineScheduler.h index 6b2a16e1d36e6..8590b7a348cfc 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineScheduler.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineScheduler.h @@ -32,7 +32,7 @@ // // ScheduleDAGInstrs *PassConfig:: // createMachineScheduler(MachineSchedContext *C) { -// return new ScheduleDAGMI(C, CustomStrategy(C)); +// return new ScheduleDAGMILive(C, CustomStrategy(C)); // } // // The DAG builder can also be customized in a sense by adding DAG mutations @@ -104,10 +104,15 @@ extern cl::opt ForceBottomUp; class LiveIntervals; class MachineDominatorTree; +class MachineFunction; +class MachineInstr; class MachineLoopInfo; class RegisterClassInfo; class SchedDFSResult; class ScheduleHazardRecognizer; +class TargetInstrInfo; +class TargetPassConfig; +class TargetRegisterInfo; /// MachineSchedContext provides enough context from the MachineScheduler pass /// for the target to instantiate a scheduler. @@ -129,10 +134,10 @@ struct MachineSchedContext { /// schedulers. class MachineSchedRegistry : public MachinePassRegistryNode { public: - typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *); + using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); // RegisterPassParser requires a (misnamed) FunctionPassCtor type. - typedef ScheduleDAGCtor FunctionPassCtor; + using FunctionPassCtor = ScheduleDAGCtor; static MachinePassRegistry Registry; @@ -198,7 +203,7 @@ class MachineSchedStrategy { MachineBasicBlock::iterator End, unsigned NumRegionInstrs) {} - virtual void dumpPolicy() {} + virtual void dumpPolicy() const {} /// Check if pressure tracking is needed before building the DAG and /// initializing this strategy. Called after initPolicy. @@ -527,7 +532,7 @@ class ReadyQueue { unsigned size() const { return Queue.size(); } - typedef std::vector::iterator iterator; + using iterator = std::vector::iterator; iterator begin() { return Queue.begin(); } @@ -550,7 +555,7 @@ class ReadyQueue { return Queue.begin() + idx; } - void dump(); + void dump() const; }; /// Summarize the unscheduled region. @@ -751,7 +756,7 @@ class SchedBoundary { SUnit *pickOnlyChoice(); #ifndef NDEBUG - void dumpScheduledState(); + void dumpScheduledState() const; #endif }; @@ -885,7 +890,7 @@ class GenericScheduler : public GenericSchedulerBase { MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override; - void dumpPolicy() override; + void dumpPolicy() const override; bool shouldTrackPressure() const override { return RegionPolicy.ShouldTrackPressure; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MachineValueType.h b/interpreter/llvm/src/include/llvm/CodeGen/MachineValueType.h index a90fe96227b99..0bdb38bfcbec8 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/MachineValueType.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/MachineValueType.h @@ -18,6 +18,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include namespace llvm { @@ -26,7 +27,7 @@ namespace llvm { /// Machine Value Type. Every type that is supported natively by some /// processor targeted by LLVM occurs here. This means that any legal value /// type can be represented by an MVT. -class MVT { + class MVT { public: enum SimpleValueType : uint8_t { // Simple value types that aren't explicitly part of this enumeration @@ -56,117 +57,119 @@ class MVT { FIRST_FP_VALUETYPE = f16, LAST_FP_VALUETYPE = ppcf128, - v2i1 = 14, // 2 x i1 - v4i1 = 15, // 4 x i1 - v8i1 = 16, // 8 x i1 - v16i1 = 17, // 16 x i1 - v32i1 = 18, // 32 x i1 - v64i1 = 19, // 64 x i1 - v512i1 = 20, // 512 x i1 - v1024i1 = 21, // 1024 x i1 - - v1i8 = 22, // 1 x i8 - v2i8 = 23, // 2 x i8 - v4i8 = 24, // 4 x i8 - v8i8 = 25, // 8 x i8 - v16i8 = 26, // 16 x i8 - v32i8 = 27, // 32 x i8 - v64i8 = 28, // 64 x i8 - v128i8 = 29, //128 x i8 - v256i8 = 30, //256 x i8 - - v1i16 = 31, // 1 x i16 - v2i16 = 32, // 2 x i16 - v4i16 = 33, // 4 x i16 - v8i16 = 34, // 8 x i16 - v16i16 = 35, // 16 x i16 - v32i16 = 36, // 32 x i16 - v64i16 = 37, // 64 x i16 - v128i16 = 38, //128 x i16 - - v1i32 = 39, // 1 x i32 - v2i32 = 40, // 2 x i32 - v4i32 = 41, // 4 x i32 - v8i32 = 42, // 8 x i32 - v16i32 = 43, // 16 x i32 - v32i32 = 44, // 32 x i32 - v64i32 = 45, // 64 x i32 - - v1i64 = 46, // 1 x i64 - v2i64 = 47, // 2 x i64 - v4i64 = 48, // 4 x i64 - v8i64 = 49, // 8 x i64 - v16i64 = 50, // 16 x i64 - v32i64 = 51, // 32 x i64 - - v1i128 = 52, // 1 x i128 + v1i1 = 14, // 1 x i1 + v2i1 = 15, // 2 x i1 + v4i1 = 16, // 4 x i1 + v8i1 = 17, // 8 x i1 + v16i1 = 18, // 16 x i1 + v32i1 = 19, // 32 x i1 + v64i1 = 20, // 64 x i1 + v512i1 = 21, // 512 x i1 + v1024i1 = 22, // 1024 x i1 + + v1i8 = 23, // 1 x i8 + v2i8 = 24, // 2 x i8 + v4i8 = 25, // 4 x i8 + v8i8 = 26, // 8 x i8 + v16i8 = 27, // 16 x i8 + v32i8 = 28, // 32 x i8 + v64i8 = 29, // 64 x i8 + v128i8 = 30, //128 x i8 + v256i8 = 31, //256 x i8 + + v1i16 = 32, // 1 x i16 + v2i16 = 33, // 2 x i16 + v4i16 = 34, // 4 x i16 + v8i16 = 35, // 8 x i16 + v16i16 = 36, // 16 x i16 + v32i16 = 37, // 32 x i16 + v64i16 = 38, // 64 x i16 + v128i16 = 39, //128 x i16 + + v1i32 = 40, // 1 x i32 + v2i32 = 41, // 2 x i32 + v4i32 = 42, // 4 x i32 + v8i32 = 43, // 8 x i32 + v16i32 = 44, // 16 x i32 + v32i32 = 45, // 32 x i32 + v64i32 = 46, // 64 x i32 + + v1i64 = 47, // 1 x i64 + v2i64 = 48, // 2 x i64 + v4i64 = 49, // 4 x i64 + v8i64 = 50, // 8 x i64 + v16i64 = 51, // 16 x i64 + v32i64 = 52, // 32 x i64 + + v1i128 = 53, // 1 x i128 // Scalable integer types - nxv2i1 = 53, // n x 2 x i1 - nxv4i1 = 54, // n x 4 x i1 - nxv8i1 = 55, // n x 8 x i1 - nxv16i1 = 56, // n x 16 x i1 - nxv32i1 = 57, // n x 32 x i1 - - nxv1i8 = 58, // n x 1 x i8 - nxv2i8 = 59, // n x 2 x i8 - nxv4i8 = 60, // n x 4 x i8 - nxv8i8 = 61, // n x 8 x i8 - nxv16i8 = 62, // n x 16 x i8 - nxv32i8 = 63, // n x 32 x i8 - - nxv1i16 = 64, // n x 1 x i16 - nxv2i16 = 65, // n x 2 x i16 - nxv4i16 = 66, // n x 4 x i16 - nxv8i16 = 67, // n x 8 x i16 - nxv16i16 = 68, // n x 16 x i16 - nxv32i16 = 69, // n x 32 x i16 - - nxv1i32 = 70, // n x 1 x i32 - nxv2i32 = 71, // n x 2 x i32 - nxv4i32 = 72, // n x 4 x i32 - nxv8i32 = 73, // n x 8 x i32 - nxv16i32 = 74, // n x 16 x i32 - nxv32i32 = 75, // n x 32 x i32 - - nxv1i64 = 76, // n x 1 x i64 - nxv2i64 = 77, // n x 2 x i64 - nxv4i64 = 78, // n x 4 x i64 - nxv8i64 = 79, // n x 8 x i64 - nxv16i64 = 80, // n x 16 x i64 - nxv32i64 = 81, // n x 32 x i64 - - FIRST_INTEGER_VECTOR_VALUETYPE = v2i1, + nxv1i1 = 54, // n x 1 x i1 + nxv2i1 = 55, // n x 2 x i1 + nxv4i1 = 56, // n x 4 x i1 + nxv8i1 = 57, // n x 8 x i1 + nxv16i1 = 58, // n x 16 x i1 + nxv32i1 = 59, // n x 32 x i1 + + nxv1i8 = 60, // n x 1 x i8 + nxv2i8 = 61, // n x 2 x i8 + nxv4i8 = 62, // n x 4 x i8 + nxv8i8 = 63, // n x 8 x i8 + nxv16i8 = 64, // n x 16 x i8 + nxv32i8 = 65, // n x 32 x i8 + + nxv1i16 = 66, // n x 1 x i16 + nxv2i16 = 67, // n x 2 x i16 + nxv4i16 = 68, // n x 4 x i16 + nxv8i16 = 69, // n x 8 x i16 + nxv16i16 = 70, // n x 16 x i16 + nxv32i16 = 71, // n x 32 x i16 + + nxv1i32 = 72, // n x 1 x i32 + nxv2i32 = 73, // n x 2 x i32 + nxv4i32 = 74, // n x 4 x i32 + nxv8i32 = 75, // n x 8 x i32 + nxv16i32 = 76, // n x 16 x i32 + nxv32i32 = 77, // n x 32 x i32 + + nxv1i64 = 78, // n x 1 x i64 + nxv2i64 = 79, // n x 2 x i64 + nxv4i64 = 80, // n x 4 x i64 + nxv8i64 = 81, // n x 8 x i64 + nxv16i64 = 82, // n x 16 x i64 + nxv32i64 = 83, // n x 32 x i64 + + FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, - FIRST_INTEGER_SCALABLE_VALUETYPE = nxv2i1, + FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, - v2f16 = 82, // 2 x f16 - v4f16 = 83, // 4 x f16 - v8f16 = 84, // 8 x f16 - v1f32 = 85, // 1 x f32 - v2f32 = 86, // 2 x f32 - v4f32 = 87, // 4 x f32 - v8f32 = 88, // 8 x f32 - v16f32 = 89, // 16 x f32 - v1f64 = 90, // 1 x f64 - v2f64 = 91, // 2 x f64 - v4f64 = 92, // 4 x f64 - v8f64 = 93, // 8 x f64 - - nxv2f16 = 94, // n x 2 x f16 - nxv4f16 = 95, // n x 4 x f16 - nxv8f16 = 96, // n x 8 x f16 - nxv1f32 = 97, // n x 1 x f32 - nxv2f32 = 98, // n x 2 x f32 - nxv4f32 = 99, // n x 4 x f32 - nxv8f32 = 100, // n x 8 x f32 - nxv16f32 = 101, // n x 16 x f32 - nxv1f64 = 102, // n x 1 x f64 - nxv2f64 = 103, // n x 2 x f64 - nxv4f64 = 104, // n x 4 x f64 - nxv8f64 = 105, // n x 8 x f64 + v2f16 = 84, // 2 x f16 + v4f16 = 85, // 4 x f16 + v8f16 = 86, // 8 x f16 + v1f32 = 87, // 1 x f32 + v2f32 = 88, // 2 x f32 + v4f32 = 89, // 4 x f32 + v8f32 = 90, // 8 x f32 + v16f32 = 91, // 16 x f32 + v1f64 = 92, // 1 x f64 + v2f64 = 93, // 2 x f64 + v4f64 = 94, // 4 x f64 + v8f64 = 95, // 8 x f64 + + nxv2f16 = 96, // n x 2 x f16 + nxv4f16 = 97, // n x 4 x f16 + nxv8f16 = 98, // n x 8 x f16 + nxv1f32 = 99, // n x 1 x f32 + nxv2f32 = 100, // n x 2 x f32 + nxv4f32 = 101, // n x 4 x f32 + nxv8f32 = 102, // n x 8 x f32 + nxv16f32 = 103, // n x 16 x f32 + nxv1f64 = 104, // n x 1 x f64 + nxv2f64 = 105, // n x 2 x f64 + nxv4f64 = 106, // n x 4 x f64 + nxv8f64 = 107, // n x 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, LAST_FP_VECTOR_VALUETYPE = nxv8f64, @@ -174,21 +177,21 @@ class MVT { FIRST_FP_SCALABLE_VALUETYPE = nxv2f16, LAST_FP_SCALABLE_VALUETYPE = nxv8f64, - FIRST_VECTOR_VALUETYPE = v2i1, + FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 106, // This is an X86 MMX value + x86mmx = 108, // This is an X86 MMX value - Glue = 107, // This glues nodes together during pre-RA sched + Glue = 109, // This glues nodes together during pre-RA sched - isVoid = 108, // This has no value + isVoid = 110, // This has no value - Untyped = 109, // This value takes a register, but has + Untyped = 111, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 110, // This always remains at the end of the list. + LAST_VALUETYPE = 112, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -230,8 +233,7 @@ class MVT { Any = 255 }; - SimpleValueType SimpleTy; - + SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE; // A class to represent the number of elements in a vector // @@ -268,7 +270,7 @@ class MVT { } }; - constexpr MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {} + constexpr MVT() = default; constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {} bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; } @@ -411,6 +413,7 @@ class MVT { switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); + case v1i1: case v2i1: case v4i1: case v8i1: @@ -419,6 +422,7 @@ class MVT { case v64i1: case v512i1: case v1024i1: + case nxv1i1: case nxv2i1: case nxv4i1: case nxv8i1: @@ -589,6 +593,7 @@ class MVT { case nxv2f16: case nxv2f32: case nxv2f64: return 2; + case v1i1: case v1i8: case v1i16: case v1i32: @@ -596,6 +601,7 @@ class MVT { case v1i128: case v1f32: case v1f64: + case nxv1i1: case nxv1i8: case nxv1i16: case nxv1i32: @@ -628,7 +634,9 @@ class MVT { "in codegen and has no size"); case Metadata: llvm_unreachable("Value type is metadata."); - case i1 : return 1; + case i1: + case v1i1: + case nxv1i1: return 1; case v2i1: case nxv2i1: return 2; case v4i1: @@ -772,7 +780,6 @@ class MVT { return getSizeInBits() <= VT.getSizeInBits(); } - static MVT getFloatingPointVT(unsigned BitWidth) { switch (BitWidth) { default: @@ -814,6 +821,7 @@ class MVT { default: break; case MVT::i1: + if (NumElements == 1) return MVT::v1i1; if (NumElements == 2) return MVT::v2i1; if (NumElements == 4) return MVT::v4i1; if (NumElements == 8) return MVT::v8i1; @@ -891,6 +899,7 @@ class MVT { default: break; case MVT::i1: + if (NumElements == 1) return MVT::nxv1i1; if (NumElements == 2) return MVT::nxv2i1; if (NumElements == 4) return MVT::nxv4i1; if (NumElements == 8) return MVT::nxv8i1; @@ -972,9 +981,12 @@ class MVT { /// A simple iterator over the MVT::SimpleValueType enum. struct mvt_iterator { SimpleValueType VT; + mvt_iterator(SimpleValueType VT) : VT(VT) {} + MVT operator*() const { return VT; } bool operator!=(const mvt_iterator &LHS) const { return VT != LHS.VT; } + mvt_iterator& operator++() { VT = (MVT::SimpleValueType)((int)VT + 1); assert((int)VT <= MVT::MAX_ALLOWED_VALUETYPE && @@ -982,8 +994,9 @@ class MVT { return *this; } }; + /// A range of the MVT::SimpleValueType enum. - typedef iterator_range mvt_range; + using mvt_range = iterator_range; public: /// SimpleValueType Iteration @@ -991,32 +1004,39 @@ class MVT { static mvt_range all_valuetypes() { return mvt_range(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE); } + static mvt_range integer_valuetypes() { return mvt_range(MVT::FIRST_INTEGER_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_INTEGER_VALUETYPE + 1)); } + static mvt_range fp_valuetypes() { return mvt_range(MVT::FIRST_FP_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_FP_VALUETYPE + 1)); } + static mvt_range vector_valuetypes() { return mvt_range(MVT::FIRST_VECTOR_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1)); } + static mvt_range integer_vector_valuetypes() { return mvt_range( MVT::FIRST_INTEGER_VECTOR_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1)); } + static mvt_range fp_vector_valuetypes() { return mvt_range( MVT::FIRST_FP_VECTOR_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1)); } + static mvt_range integer_scalable_vector_valuetypes() { return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1)); } + static mvt_range fp_scalable_vector_valuetypes() { return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1)); @@ -1024,6 +1044,6 @@ class MVT { /// @} }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEVALUETYPE_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/MacroFusion.h b/interpreter/llvm/src/include/llvm/CodeGen/MacroFusion.h new file mode 100644 index 0000000000000..dc105fdc68fd8 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/CodeGen/MacroFusion.h @@ -0,0 +1,50 @@ +//===- MacroFusion.h - Macro Fusion -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the definition of the DAG scheduling mutation to +/// pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACROFUSION_H +#define LLVM_CODEGEN_MACROFUSION_H + +#include +#include + +namespace llvm { + +class MachineInstr; +class ScheduleDAGMutation; +class TargetInstrInfo; +class TargetSubtargetInfo; + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +using ShouldSchedulePredTy = std::function; + +/// \brief Create a DAG scheduling mutation to pair instructions back to back +/// for instructions that benefit according to the target-specific +/// shouldScheduleAdjacent predicate function. +std::unique_ptr +createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent); + +/// \brief Create a DAG scheduling mutation to pair branch instructions with one +/// of their predecessors back to back for instructions that benefit according +/// to the target-specific shouldScheduleAdjacent predicate function. +std::unique_ptr +createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_MACROFUSION_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/CostAllocator.h b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/CostAllocator.h index 02d39fe383f1f..bde451ae1fccd 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/CostAllocator.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/CostAllocator.h @@ -1,4 +1,4 @@ -//===---------- CostAllocator.h - PBQP Cost Allocator -----------*- C++ -*-===// +//===- CostAllocator.h - PBQP Cost Allocator --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,26 +19,28 @@ #define LLVM_CODEGEN_PBQP_COSTALLOCATOR_H #include "llvm/ADT/DenseSet.h" +#include +#include #include -#include namespace llvm { namespace PBQP { -template -class ValuePool { +template class ValuePool { public: - typedef std::shared_ptr PoolRef; + using PoolRef = std::shared_ptr; private: - class PoolEntry : public std::enable_shared_from_this { public: template PoolEntry(ValuePool &Pool, ValueKeyT Value) : Pool(Pool), Value(std::move(Value)) {} + ~PoolEntry() { Pool.removeEntry(this); } - const ValueT& getValue() const { return Value; } + + const ValueT &getValue() const { return Value; } + private: ValuePool &Pool; ValueT Value; @@ -46,10 +48,10 @@ class ValuePool { class PoolEntryDSInfo { public: - static inline PoolEntry* getEmptyKey() { return nullptr; } + static inline PoolEntry *getEmptyKey() { return nullptr; } - static inline PoolEntry* getTombstoneKey() { - return reinterpret_cast(static_cast(1)); + static inline PoolEntry *getTombstoneKey() { + return reinterpret_cast(static_cast(1)); } template @@ -66,8 +68,7 @@ class ValuePool { } template - static - bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) { + static bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) { return C1 == C2; } @@ -83,10 +84,9 @@ class ValuePool { return P1 == P2; return isEqual(P1->getValue(), P2); } - }; - typedef DenseSet EntrySetT; + using EntrySetT = DenseSet; EntrySetT EntrySet; @@ -105,28 +105,31 @@ class ValuePool { } }; -template -class PoolCostAllocator { +template class PoolCostAllocator { private: - typedef ValuePool VectorCostPool; - typedef ValuePool MatrixCostPool; + using VectorCostPool = ValuePool; + using MatrixCostPool = ValuePool; + public: - typedef VectorT Vector; - typedef MatrixT Matrix; - typedef typename VectorCostPool::PoolRef VectorPtr; - typedef typename MatrixCostPool::PoolRef MatrixPtr; + using Vector = VectorT; + using Matrix = MatrixT; + using VectorPtr = typename VectorCostPool::PoolRef; + using MatrixPtr = typename MatrixCostPool::PoolRef; + + template VectorPtr getVector(VectorKeyT v) { + return VectorPool.getValue(std::move(v)); + } - template - VectorPtr getVector(VectorKeyT v) { return VectorPool.getValue(std::move(v)); } + template MatrixPtr getMatrix(MatrixKeyT m) { + return MatrixPool.getValue(std::move(m)); + } - template - MatrixPtr getMatrix(MatrixKeyT m) { return MatrixPool.getValue(std::move(m)); } private: VectorCostPool VectorPool; MatrixCostPool MatrixPool; }; -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_PBQP_COSTALLOCATOR_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Graph.h b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Graph.h index 83487e6a808af..e94878ced10dc 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Graph.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Graph.h @@ -1,4 +1,4 @@ -//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===// +//===- Graph.h - PBQP Graph -------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,14 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_PBQP_GRAPH_H #define LLVM_CODEGEN_PBQP_GRAPH_H #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" #include #include +#include #include -#include #include namespace llvm { @@ -28,8 +26,8 @@ namespace PBQP { class GraphBase { public: - typedef unsigned NodeId; - typedef unsigned EdgeId; + using NodeId = unsigned; + using EdgeId = unsigned; /// @brief Returns a value representing an invalid (non-existent) node. static NodeId invalidNodeId() { @@ -48,32 +46,32 @@ namespace PBQP { template class Graph : public GraphBase { private: - typedef typename SolverT::CostAllocator CostAllocator; + using CostAllocator = typename SolverT::CostAllocator; + public: - typedef typename SolverT::RawVector RawVector; - typedef typename SolverT::RawMatrix RawMatrix; - typedef typename SolverT::Vector Vector; - typedef typename SolverT::Matrix Matrix; - typedef typename CostAllocator::VectorPtr VectorPtr; - typedef typename CostAllocator::MatrixPtr MatrixPtr; - typedef typename SolverT::NodeMetadata NodeMetadata; - typedef typename SolverT::EdgeMetadata EdgeMetadata; - typedef typename SolverT::GraphMetadata GraphMetadata; + using RawVector = typename SolverT::RawVector; + using RawMatrix = typename SolverT::RawMatrix; + using Vector = typename SolverT::Vector; + using Matrix = typename SolverT::Matrix; + using VectorPtr = typename CostAllocator::VectorPtr; + using MatrixPtr = typename CostAllocator::MatrixPtr; + using NodeMetadata = typename SolverT::NodeMetadata; + using EdgeMetadata = typename SolverT::EdgeMetadata; + using GraphMetadata = typename SolverT::GraphMetadata; private: - class NodeEntry { public: - typedef std::vector AdjEdgeList; - typedef AdjEdgeList::size_type AdjEdgeIdx; - typedef AdjEdgeList::const_iterator AdjEdgeItr; + using AdjEdgeList = std::vector; + using AdjEdgeIdx = AdjEdgeList::size_type; + using AdjEdgeItr = AdjEdgeList::const_iterator; + + NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {} static AdjEdgeIdx getInvalidAdjEdgeIdx() { return std::numeric_limits::max(); } - NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {} - AdjEdgeIdx addAdjEdgeId(EdgeId EId) { AdjEdgeIdx Idx = AdjEdgeIds.size(); AdjEdgeIds.push_back(EId); @@ -96,6 +94,7 @@ namespace PBQP { VectorPtr Costs; NodeMetadata Metadata; + private: AdjEdgeList AdjEdgeIds; }; @@ -150,8 +149,10 @@ namespace PBQP { NodeId getN1Id() const { return NIds[0]; } NodeId getN2Id() const { return NIds[1]; } + MatrixPtr Costs; EdgeMetadata Metadata; + private: NodeId NIds[2]; typename NodeEntry::AdjEdgeIdx ThisEdgeAdjIdxs[2]; @@ -161,18 +162,20 @@ namespace PBQP { GraphMetadata Metadata; CostAllocator CostAlloc; - SolverT *Solver; + SolverT *Solver = nullptr; - typedef std::vector NodeVector; - typedef std::vector FreeNodeVector; + using NodeVector = std::vector; + using FreeNodeVector = std::vector; NodeVector Nodes; FreeNodeVector FreeNodeIds; - typedef std::vector EdgeVector; - typedef std::vector FreeEdgeVector; + using EdgeVector = std::vector; + using FreeEdgeVector = std::vector; EdgeVector Edges; FreeEdgeVector FreeEdgeIds; + Graph(const Graph &Other) {} + // ----- INTERNAL METHODS ----- NodeEntry &getNode(NodeId NId) { @@ -220,20 +223,18 @@ namespace PBQP { return EId; } - Graph(const Graph &Other) {} void operator=(const Graph &Other) {} public: - - typedef typename NodeEntry::AdjEdgeItr AdjEdgeItr; + using AdjEdgeItr = typename NodeEntry::AdjEdgeItr; class NodeItr { public: - typedef std::forward_iterator_tag iterator_category; - typedef NodeId value_type; - typedef int difference_type; - typedef NodeId* pointer; - typedef NodeId& reference; + using iterator_category = std::forward_iterator_tag; + using value_type = NodeId; + using difference_type = int; + using pointer = NodeId *; + using reference = NodeId &; NodeItr(NodeId CurNId, const Graph &G) : CurNId(CurNId), EndNId(G.Nodes.size()), FreeNodeIds(G.FreeNodeIds) { @@ -283,53 +284,65 @@ namespace PBQP { class NodeIdSet { public: - NodeIdSet(const Graph &G) : G(G) { } + NodeIdSet(const Graph &G) : G(G) {} + NodeItr begin() const { return NodeItr(0, G); } NodeItr end() const { return NodeItr(G.Nodes.size(), G); } + bool empty() const { return G.Nodes.empty(); } + typename NodeVector::size_type size() const { return G.Nodes.size() - G.FreeNodeIds.size(); } + private: const Graph& G; }; class EdgeIdSet { public: - EdgeIdSet(const Graph &G) : G(G) { } + EdgeIdSet(const Graph &G) : G(G) {} + EdgeItr begin() const { return EdgeItr(0, G); } EdgeItr end() const { return EdgeItr(G.Edges.size(), G); } + bool empty() const { return G.Edges.empty(); } + typename NodeVector::size_type size() const { return G.Edges.size() - G.FreeEdgeIds.size(); } + private: const Graph& G; }; class AdjEdgeIdSet { public: - AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) { } + AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) {} + typename NodeEntry::AdjEdgeItr begin() const { return NE.getAdjEdgeIds().begin(); } + typename NodeEntry::AdjEdgeItr end() const { return NE.getAdjEdgeIds().end(); } + bool empty() const { return NE.getAdjEdgeIds().empty(); } + typename NodeEntry::AdjEdgeList::size_type size() const { return NE.getAdjEdgeIds().size(); } + private: const NodeEntry &NE; }; /// @brief Construct an empty PBQP graph. - Graph() : Solver(nullptr) {} + Graph() = default; /// @brief Construct an empty PBQP graph with the given graph metadata. - Graph(GraphMetadata Metadata) - : Metadata(std::move(Metadata)), Solver(nullptr) {} + Graph(GraphMetadata Metadata) : Metadata(std::move(Metadata)) {} /// @brief Get a reference to the graph metadata. GraphMetadata& getMetadata() { return Metadata; } @@ -656,7 +669,7 @@ namespace PBQP { } }; -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm #endif // LLVM_CODEGEN_PBQP_GRAPH_HPP diff --git a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Math.h b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Math.h index 278787550a43c..ba405e816d107 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Math.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Math.h @@ -1,4 +1,4 @@ -//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// +//===- Math.h - PBQP Vector and Matrix classes ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,20 +11,22 @@ #define LLVM_CODEGEN_PBQP_MATH_H #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include #include #include +#include namespace llvm { namespace PBQP { -typedef float PBQPNum; +using PBQPNum = float; /// \brief PBQP Vector class. class Vector { friend hash_code hash_value(const Vector &); -public: +public: /// \brief Construct a PBQP vector of the given size. explicit Vector(unsigned Length) : Length(Length), Data(llvm::make_unique(Length)) {} @@ -120,8 +122,8 @@ OStream& operator<<(OStream &OS, const Vector &V) { class Matrix { private: friend hash_code hash_value(const Matrix &); -public: +public: /// \brief Construct a PBQP Matrix with the given dimensions. Matrix(unsigned Rows, unsigned Cols) : Rows(Rows), Cols(Cols), Data(llvm::make_unique(Rows * Cols)) { @@ -253,9 +255,11 @@ OStream& operator<<(OStream &OS, const Matrix &M) { template class MDVector : public Vector { public: - MDVector(const Vector &v) : Vector(v), md(*this) { } + MDVector(const Vector &v) : Vector(v), md(*this) {} MDVector(Vector &&v) : Vector(std::move(v)), md(*this) { } + const Metadata& getMetadata() const { return md; } + private: Metadata md; }; @@ -268,9 +272,11 @@ inline hash_code hash_value(const MDVector &V) { template class MDMatrix : public Matrix { public: - MDMatrix(const Matrix &m) : Matrix(m), md(*this) { } + MDMatrix(const Matrix &m) : Matrix(m), md(*this) {} MDMatrix(Matrix &&m) : Matrix(std::move(m)), md(*this) { } + const Metadata& getMetadata() const { return md; } + private: Metadata md; }; @@ -280,7 +286,7 @@ inline hash_code hash_value(const MDMatrix &M) { return hash_value(static_cast(M)); } -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm #endif // LLVM_CODEGEN_PBQP_MATH_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/ReductionRules.h b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/ReductionRules.h index d4a544bfe7214..8aeb519367608 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/ReductionRules.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/ReductionRules.h @@ -1,4 +1,4 @@ -//===----------- ReductionRules.h - Reduction Rules -------------*- C++ -*-===// +//===- ReductionRules.h - Reduction Rules -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,8 @@ #include "Graph.h" #include "Math.h" #include "Solution.h" +#include +#include namespace llvm { namespace PBQP { @@ -27,11 +29,11 @@ namespace PBQP { /// neighbor. Notify the problem domain. template void applyR1(GraphT &G, typename GraphT::NodeId NId) { - typedef typename GraphT::NodeId NodeId; - typedef typename GraphT::EdgeId EdgeId; - typedef typename GraphT::Vector Vector; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawVector RawVector; + using NodeId = typename GraphT::NodeId; + using EdgeId = typename GraphT::EdgeId; + using Vector = typename GraphT::Vector; + using Matrix = typename GraphT::Matrix; + using RawVector = typename GraphT::RawVector; assert(G.getNodeDegree(NId) == 1 && "R1 applied to node with degree != 1."); @@ -71,11 +73,11 @@ namespace PBQP { template void applyR2(GraphT &G, typename GraphT::NodeId NId) { - typedef typename GraphT::NodeId NodeId; - typedef typename GraphT::EdgeId EdgeId; - typedef typename GraphT::Vector Vector; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawMatrix RawMatrix; + using NodeId = typename GraphT::NodeId; + using EdgeId = typename GraphT::EdgeId; + using Vector = typename GraphT::Vector; + using Matrix = typename GraphT::Matrix; + using RawMatrix = typename GraphT::RawMatrix; assert(G.getNodeDegree(NId) == 2 && "R2 applied to node with degree != 2."); @@ -177,9 +179,9 @@ namespace PBQP { // state. template Solution backpropagate(GraphT& G, StackT stack) { - typedef GraphBase::NodeId NodeId; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawVector RawVector; + using NodeId = GraphBase::NodeId; + using Matrix = typename GraphT::Matrix; + using RawVector = typename GraphT::RawVector; Solution s; @@ -215,7 +217,7 @@ namespace PBQP { return s; } -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_PBQP_REDUCTIONRULES_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Solution.h b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Solution.h index d96b5eac45200..8d5d2374679d3 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Solution.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/PBQP/Solution.h @@ -26,7 +26,7 @@ namespace PBQP { /// To get the selection for each node in the problem use the getSelection method. class Solution { private: - typedef std::map SelectionsMap; + using SelectionsMap = std::map; SelectionsMap selections; unsigned r0Reductions = 0; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/PBQPRAConstraint.h b/interpreter/llvm/src/include/llvm/CodeGen/PBQPRAConstraint.h index 833b9bad613ff..269b7a7b3a35d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/PBQPRAConstraint.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/PBQPRAConstraint.h @@ -1,4 +1,4 @@ -//===-- RegAllocPBQP.h ------------------------------------------*- C++ -*-===// +//===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,22 @@ #ifndef LLVM_CODEGEN_PBQPRACONSTRAINT_H #define LLVM_CODEGEN_PBQPRACONSTRAINT_H +#include #include #include namespace llvm { + namespace PBQP { namespace RegAlloc { + // Forward declare PBQP graph class. class PBQPRAGraph; -} -} -class LiveIntervals; -class MachineBlockFrequencyInfo; -class MachineFunction; -class TargetRegisterInfo; +} // end namespace RegAlloc +} // end namespace PBQP -typedef PBQP::RegAlloc::PBQPRAGraph PBQPRAGraph; +using PBQPRAGraph = PBQP::RegAlloc::PBQPRAGraph; /// @brief Abstract base for classes implementing PBQP register allocation /// constraints (e.g. Spill-costs, interference, coalescing). @@ -40,6 +39,7 @@ class PBQPRAConstraint { public: virtual ~PBQPRAConstraint() = 0; virtual void apply(PBQPRAGraph &G) = 0; + private: virtual void anchor(); }; @@ -59,11 +59,13 @@ class PBQPRAConstraintList : public PBQPRAConstraint { if (C) Constraints.push_back(std::move(C)); } + private: std::vector> Constraints; + void anchor() override; }; -} +} // end namespace llvm -#endif /* LLVM_CODEGEN_PBQPRACONSTRAINT_H */ +#endif // LLVM_CODEGEN_PBQPRACONSTRAINT_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/Passes.h b/interpreter/llvm/src/include/llvm/CodeGen/Passes.h index 2735a566d4704..96cfce5b84dfe 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/Passes.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/Passes.h @@ -33,7 +33,7 @@ class raw_ostream; /// List of target independent CodeGen pass IDs. namespace llvm { - FunctionPass *createAtomicExpandPass(const TargetMachine *TM); + FunctionPass *createAtomicExpandPass(); /// createUnreachableBlockEliminationPass - The LLVM code generator does not /// work well with unreachable basic blocks (what live ranges make sense for a @@ -66,7 +66,11 @@ namespace llvm { /// createCodeGenPreparePass - Transform the code to expose more pattern /// matching during instruction selection. - FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr); + FunctionPass *createCodeGenPreparePass(); + + /// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather + /// and scatter intrinsics with scalar code when target doesn't support them. + FunctionPass *createScalarizeMaskedMemIntrinPass(); /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg /// load-linked/store-conditional loops. @@ -136,6 +140,9 @@ namespace llvm { /// Greedy register allocator. extern char &RAGreedyID; + /// Basic register allocator. + extern char &RABasicID; + /// VirtRegRewriter pass. Rewrite virtual registers to physical registers as /// assigned in VirtRegMap. extern char &VirtRegRewriterID; @@ -173,7 +180,7 @@ namespace llvm { /// PrologEpilogCodeInserter - This pass inserts prolog and epilog code, /// and eliminates abstract frame references. extern char &PrologEpilogCodeInserterID; - MachineFunctionPass *createPrologEpilogInserterPass(const TargetMachine *TM); + MachineFunctionPass *createPrologEpilogInserterPass(); /// ExpandPostRAPseudos - This pass expands pseudo instructions after /// register allocation. @@ -301,7 +308,7 @@ namespace llvm { /// createStackProtectorPass - This pass adds stack protectors to functions. /// - FunctionPass *createStackProtectorPass(const TargetMachine *TM); + FunctionPass *createStackProtectorPass(); /// createMachineVerifierPass - This pass verifies cenerated machine code /// instructions for correctness. @@ -310,11 +317,11 @@ namespace llvm { /// createDwarfEHPass - This pass mulches exception handling code into a form /// adapted to code generation. Required if using dwarf exception handling. - FunctionPass *createDwarfEHPass(const TargetMachine *TM); + FunctionPass *createDwarfEHPass(); /// createWinEHPass - Prepares personality functions used by MSVC on Windows, /// in addition to the Itanium LSDA based personalities. - FunctionPass *createWinEHPass(const TargetMachine *TM); + FunctionPass *createWinEHPass(); /// createSjLjEHPreparePass - This pass adapts exception handling code to use /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. @@ -358,12 +365,12 @@ namespace llvm { /// InterleavedAccess Pass - This pass identifies and matches interleaved /// memory accesses to target specific intrinsics. /// - FunctionPass *createInterleavedAccessPass(const TargetMachine *TM); + FunctionPass *createInterleavedAccessPass(); /// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all /// TLS variables for the emulated TLS model. /// - ModulePass *createLowerEmuTLSPass(const TargetMachine *TM); + ModulePass *createLowerEmuTLSPass(); /// This pass lowers the @llvm.load.relative intrinsic to instructions. /// This is unsafe to do earlier because a pass may combine the constant @@ -380,7 +387,7 @@ namespace llvm { /// This pass splits the stack into a safe stack and an unsafe stack to /// protect against stack-based overflow vulnerabilities. - FunctionPass *createSafeStackPass(const TargetMachine *TM = nullptr); + FunctionPass *createSafeStackPass(); /// This pass detects subregister lanes in a virtual register that are used /// independently of other lanes and splits them into separate virtual @@ -415,33 +422,4 @@ namespace llvm { } // End llvm namespace -/// Target machine pass initializer for passes with dependencies. Use with -/// INITIALIZE_TM_PASS_END. -#define INITIALIZE_TM_PASS_BEGIN INITIALIZE_PASS_BEGIN - -/// Target machine pass initializer for passes with dependencies. Use with -/// INITIALIZE_TM_PASS_BEGIN. -#define INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis) \ - PassInfo *PI = new PassInfo( \ - name, arg, &passName::ID, \ - PassInfo::NormalCtor_t(callDefaultCtor), cfg, analysis, \ - PassInfo::TargetMachineCtor_t(callTargetMachineCtor)); \ - Registry.registerPass(*PI, true); \ - return PI; \ - } \ - static llvm::once_flag Initialize##passName##PassFlag; \ - void llvm::initialize##passName##Pass(PassRegistry &Registry) { \ - llvm::call_once(Initialize##passName##PassFlag, \ - initialize##passName##PassOnce, std::ref(Registry)); \ - } - -/// This initializer registers TargetMachine constructor, so the pass being -/// initialized can use target dependent interfaces. Please do not move this -/// macro to be together with INITIALIZE_PASS, which is a complete target -/// independent initializer, and we don't want to make libScalarOpts depend -/// on libCodeGen. -#define INITIALIZE_TM_PASS(passName, arg, name, cfg, analysis) \ - INITIALIZE_TM_PASS_BEGIN(passName, arg, name, cfg, analysis) \ - INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis) - #endif diff --git a/interpreter/llvm/src/include/llvm/CodeGen/PseudoSourceValue.h b/interpreter/llvm/src/include/llvm/CodeGen/PseudoSourceValue.h index 681ccb4b997cb..f5aedb07e4d2b 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/PseudoSourceValue.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/PseudoSourceValue.h @@ -94,7 +94,7 @@ class FixedStackPseudoSourceValue : public PseudoSourceValue { explicit FixedStackPseudoSourceValue(int FI) : PseudoSourceValue(FixedStack), FI(FI) {} - static inline bool classof(const PseudoSourceValue *V) { + static bool classof(const PseudoSourceValue *V) { return V->kind() == FixedStack; } @@ -126,7 +126,7 @@ class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue { public: GlobalValuePseudoSourceValue(const GlobalValue *GV); - static inline bool classof(const PseudoSourceValue *V) { + static bool classof(const PseudoSourceValue *V) { return V->kind() == GlobalValueCallEntry; } @@ -140,7 +140,7 @@ class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue { public: ExternalSymbolPseudoSourceValue(const char *ES); - static inline bool classof(const PseudoSourceValue *V) { + static bool classof(const PseudoSourceValue *V) { return V->kind() == ExternalSymbolCallEntry; } diff --git a/interpreter/llvm/src/include/llvm/CodeGen/RegAllocPBQP.h b/interpreter/llvm/src/include/llvm/CodeGen/RegAllocPBQP.h index 8872a5dc54a1d..5b342863eb50d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/RegAllocPBQP.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/RegAllocPBQP.h @@ -130,10 +130,10 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) { /// \brief Holds graph-level metadata relevant to PBQP RA problems. class GraphMetadata { private: - typedef ValuePool AllowedRegVecPool; + using AllowedRegVecPool = ValuePool; public: - typedef AllowedRegVecPool::PoolRef AllowedRegVecRef; + using AllowedRegVecRef = AllowedRegVecPool::PoolRef; GraphMetadata(MachineFunction &MF, LiveIntervals &LIS, @@ -167,17 +167,17 @@ class GraphMetadata { /// \brief Holds solver state and other metadata relevant to each PBQP RA node. class NodeMetadata { public: - typedef RegAlloc::AllowedRegVector AllowedRegVector; + using AllowedRegVector = RegAlloc::AllowedRegVector; // The node's reduction state. The order in this enum is important, // as it is assumed nodes can only progress up (i.e. towards being // optimally reducible) when reducing the graph. - typedef enum { + using ReductionState = enum { Unprocessed, NotProvablyAllocatable, ConservativelyAllocatable, OptimallyReducible - } ReductionState; + }; NodeMetadata() = default; @@ -267,23 +267,23 @@ class NodeMetadata { class RegAllocSolverImpl { private: - typedef MDMatrix RAMatrix; + using RAMatrix = MDMatrix; public: - typedef PBQP::Vector RawVector; - typedef PBQP::Matrix RawMatrix; - typedef PBQP::Vector Vector; - typedef RAMatrix Matrix; - typedef PBQP::PoolCostAllocator CostAllocator; + using RawVector = PBQP::Vector; + using RawMatrix = PBQP::Matrix; + using Vector = PBQP::Vector; + using Matrix = RAMatrix; + using CostAllocator = PBQP::PoolCostAllocator; - typedef GraphBase::NodeId NodeId; - typedef GraphBase::EdgeId EdgeId; + using NodeId = GraphBase::NodeId; + using EdgeId = GraphBase::EdgeId; - typedef RegAlloc::NodeMetadata NodeMetadata; - struct EdgeMetadata { }; - typedef RegAlloc::GraphMetadata GraphMetadata; + using NodeMetadata = RegAlloc::NodeMetadata; + struct EdgeMetadata {}; + using GraphMetadata = RegAlloc::GraphMetadata; - typedef PBQP::Graph Graph; + using Graph = PBQP::Graph; RegAllocSolverImpl(Graph &G) : G(G) {} @@ -426,7 +426,7 @@ class RegAllocSolverImpl { std::vector reduce() { assert(!G.empty() && "Cannot reduce empty graph."); - typedef GraphBase::NodeId NodeId; + using NodeId = GraphBase::NodeId; std::vector NodeStack; // Consume worklists. @@ -459,7 +459,6 @@ class RegAllocSolverImpl { ConservativelyAllocatableNodes.erase(NItr); NodeStack.push_back(NId); G.disconnectAllNeighborsFromNode(NId); - } else if (!NotProvablyAllocatableNodes.empty()) { NodeSet::iterator NItr = std::min_element(NotProvablyAllocatableNodes.begin(), @@ -493,7 +492,7 @@ class RegAllocSolverImpl { }; Graph& G; - typedef std::set NodeSet; + using NodeSet = std::set; NodeSet OptimallyReducibleNodes; NodeSet ConservativelyAllocatableNodes; NodeSet NotProvablyAllocatableNodes; @@ -501,7 +500,7 @@ class RegAllocSolverImpl { class PBQPRAGraph : public PBQP::Graph { private: - typedef PBQP::Graph BaseT; + using BaseT = PBQP::Graph; public: PBQPRAGraph(GraphMetadata Metadata) : BaseT(std::move(Metadata)) {} diff --git a/interpreter/llvm/src/include/llvm/CodeGen/RegAllocRegistry.h b/interpreter/llvm/src/include/llvm/CodeGen/RegAllocRegistry.h index 5c7e9999cc9a3..481747dc163ee 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/RegAllocRegistry.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/RegAllocRegistry.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/RegAllocRegistry.h -------------------------*- C++ -*-===// +//===- llvm/CodeGen/RegAllocRegistry.h --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,16 +19,16 @@ namespace llvm { +class FunctionPass; + //===----------------------------------------------------------------------===// /// /// RegisterRegAlloc class - Track the registration of register allocators. /// //===----------------------------------------------------------------------===// class RegisterRegAlloc : public MachinePassRegistryNode { - public: - - typedef FunctionPass *(*FunctionPassCtor)(); + using FunctionPassCtor = FunctionPass *(*)(); static MachinePassRegistry Registry; @@ -36,22 +36,26 @@ class RegisterRegAlloc : public MachinePassRegistryNode { : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { Registry.Add(this); } + ~RegisterRegAlloc() { Registry.Remove(this); } // Accessors. - // RegisterRegAlloc *getNext() const { return (RegisterRegAlloc *)MachinePassRegistryNode::getNext(); } + static RegisterRegAlloc *getList() { return (RegisterRegAlloc *)Registry.getList(); } + static FunctionPassCtor getDefault() { return (FunctionPassCtor)Registry.getDefault(); } + static void setDefault(FunctionPassCtor C) { Registry.setDefault((MachinePassCtor)C); } + static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } @@ -59,5 +63,4 @@ class RegisterRegAlloc : public MachinePassRegistryNode { } // end namespace llvm - -#endif +#endif // LLVM_CODEGEN_REGALLOCREGISTRY_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/RegisterPressure.h b/interpreter/llvm/src/include/llvm/CodeGen/RegisterPressure.h index a3ea41d5236e3..e997aaf269e31 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/RegisterPressure.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/RegisterPressure.h @@ -32,7 +32,9 @@ namespace llvm { class LiveIntervals; +class MachineFunction; class MachineInstr; +class MachineRegisterInfo; class RegisterClassInfo; struct RegisterMaskPair { @@ -147,12 +149,14 @@ class PressureDiff { PressureChange PressureChanges[MaxPSets]; - typedef PressureChange* iterator; + using iterator = PressureChange *; + iterator nonconst_begin() { return &PressureChanges[0]; } iterator nonconst_end() { return &PressureChanges[MaxPSets]; } public: - typedef const PressureChange* const_iterator; + using const_iterator = const PressureChange *; + const_iterator begin() const { return &PressureChanges[0]; } const_iterator end() const { return &PressureChanges[MaxPSets]; } @@ -269,7 +273,7 @@ class LiveRegSet { } }; - typedef SparseSet RegSet; + using RegSet = SparseSet; RegSet Regs; unsigned NumRegUnits; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/RegisterScavenging.h b/interpreter/llvm/src/include/llvm/CodeGen/RegisterScavenging.h index 1f939e72e1396..489c72b81a985 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/RegisterScavenging.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/RegisterScavenging.h @@ -156,12 +156,24 @@ class RegScavenger { /// available and do the appropriate bookkeeping. SPAdj is the stack /// adjustment due to call frame, it's passed along to eliminateFrameIndex(). /// Returns the scavenged register. + /// This is deprecated as it depends on the quality of the kill flags being + /// present; Use scavengeRegisterBackwards() instead! unsigned scavengeRegister(const TargetRegisterClass *RegClass, MachineBasicBlock::iterator I, int SPAdj); unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj) { return scavengeRegister(RegClass, MBBI, SPAdj); } + /// Make a register of the specific register class available from the current + /// position backwards to the place before \p To. If \p RestoreAfter is true + /// this includes the instruction following the current position. + /// SPAdj is the stack adjustment due to call frame, it's passed along to + /// eliminateFrameIndex(). + /// Returns the scavenged register. + unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC, + MachineBasicBlock::iterator To, + bool RestoreAfter, int SPAdj); + /// Tell the scavenger a register is used. void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); @@ -202,8 +214,18 @@ class RegScavenger { /// Mark live-in registers of basic block as used. void setLiveInsUsed(const MachineBasicBlock &MBB); + + /// Spill a register after position \p After and reload it before position + /// \p UseMI. + ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + MachineBasicBlock::iterator After, + MachineBasicBlock::iterator &UseMI); }; +/// Replaces all frame index virtual registers with physical registers. Uses the +/// register scavenger to find an appropriate register to use. +void scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS); + } // end namespace llvm #endif // LLVM_CODEGEN_REGISTERSCAVENGING_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/RegisterUsageInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/RegisterUsageInfo.h index 3f88032cb6385..0a04bc6a89f4d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/RegisterUsageInfo.h @@ -1,4 +1,4 @@ -//==- RegisterUsageInfo.h - Register Usage Informartion Storage -*- C++ -*-===// +//==- RegisterUsageInfo.h - Register Usage Informartion Storage --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -20,15 +20,15 @@ #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H #include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" +#include +#include namespace llvm { +class Function; +class TargetMachine; + class PhysicalRegisterUsageInfo : public ImmutablePass { virtual void anchor(); @@ -70,6 +70,7 @@ class PhysicalRegisterUsageInfo : public ImmutablePass { const TargetMachine *TM; }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/RuntimeLibcalls.h b/interpreter/llvm/src/include/llvm/CodeGen/RuntimeLibcalls.h index ddfabb0c44d63..08151be110832 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/RuntimeLibcalls.h @@ -333,12 +333,24 @@ namespace RTLIB { MEMSET, MEMMOVE, - // ELEMENT-WISE ATOMIC MEMORY - MEMCPY_ELEMENT_ATOMIC_1, - MEMCPY_ELEMENT_ATOMIC_2, - MEMCPY_ELEMENT_ATOMIC_4, - MEMCPY_ELEMENT_ATOMIC_8, - MEMCPY_ELEMENT_ATOMIC_16, + // ELEMENT-WISE UNORDERED-ATOMIC MEMORY of different element sizes + MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, + + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, + + MEMSET_ELEMENT_UNORDERED_ATOMIC_1, + MEMSET_ELEMENT_UNORDERED_ATOMIC_2, + MEMSET_ELEMENT_UNORDERED_ATOMIC_4, + MEMSET_ELEMENT_UNORDERED_ATOMIC_8, + MEMSET_ELEMENT_UNORDERED_ATOMIC_16, // EXCEPTION HANDLING UNWIND_RESUME, @@ -511,9 +523,21 @@ namespace RTLIB { /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); - /// getMEMCPY_ELEMENT_ATOMIC - Return MEMCPY_ELEMENT_ATOMIC_* value for the - /// given element size or UNKNOW_LIBCALL if there is none. - Libcall getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize); + /// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + + /// getMEMMOVE_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMMOVE_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + + /// getMEMSET_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMSET_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + } } diff --git a/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAG.h b/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAG.h index 99afd8c5c9ab9..25afc5b506df6 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAG.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAG.h @@ -18,9 +18,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" @@ -52,14 +52,14 @@ class TargetRegisterInfo; /// These are the different kinds of scheduling dependencies. enum Kind { Data, ///< Regular data dependence (aka true-dependence). - Anti, ///< A register anti-dependedence (aka WAR). + Anti, ///< A register anti-dependence (aka WAR). Output, ///< A register output-dependence (aka WAW). Order ///< Any other ordering dependency. }; // Strong dependencies must be respected by the scheduler. Artificial // dependencies may be removed only if they are redundant with another - // strong depedence. + // strong dependence. // // Weak dependencies may be violated by the scheduling strategy, but only if // the strategy can prove it is correct to do so. @@ -235,6 +235,9 @@ class TargetRegisterInfo; "SDep::Output edge cannot use the zero register!"); Contents.Reg = Reg; } + + raw_ostream &print(raw_ostream &O, + const TargetRegisterInfo *TRI = nullptr) const; }; template <> @@ -342,7 +345,7 @@ class TargetRegisterInfo; /// BoundaryNodes can have DAG edges, including Data edges, but they do not /// correspond to schedulable entities (e.g. instructions) and do not have a /// valid ID. Consequently, always check for boundary nodes before accessing - /// an assoicative data structure keyed on node ID. + /// an associative data structure keyed on node ID. bool isBoundaryNode() const { return NodeNum == BoundaryID; } /// Assigns the representative SDNode for this SUnit. This may be used @@ -458,7 +461,10 @@ class TargetRegisterInfo; void dump(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const; - void print(raw_ostream &O, const ScheduleDAG *G) const; + raw_ostream &print(raw_ostream &O, + const SUnit *N = nullptr, + const SUnit *X = nullptr) const; + raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const; private: void ComputeDepth(); diff --git a/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAGInstrs.h b/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAGInstrs.h index 21e1740aa6b88..218e22e402349 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -1,4 +1,4 @@ -//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// +//===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,21 +15,38 @@ #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H -#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/Compiler.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include #include +#include +#include namespace llvm { + + class LiveIntervals; class MachineFrameInfo; + class MachineFunction; + class MachineInstr; class MachineLoopInfo; - class MachineDominatorTree; - class RegPressureTracker; + class MachineOperand; + struct MCSchedClassDesc; class PressureDiffs; + class PseudoSourceValue; + class RegPressureTracker; + class UndefValue; + class Value; /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { @@ -69,31 +86,34 @@ namespace llvm { /// Use a SparseMultiSet to track physical registers. Storage is only /// allocated once for the pass. It can be cleared in constant time and reused /// without any frees. - typedef SparseMultiSet, uint16_t> - Reg2SUnitsMap; + using Reg2SUnitsMap = + SparseMultiSet, uint16_t>; /// Use SparseSet as a SparseMap by relying on the fact that it never /// compares ValueT's, only unsigned keys. This allows the set to be cleared /// between scheduling regions in constant time as long as ValueT does not /// require a destructor. - typedef SparseSet VReg2SUnitMap; + using VReg2SUnitMap = SparseSet; /// Track local uses of virtual registers. These uses are gathered by the DAG /// builder and may be consulted by the scheduler to avoid iterating an entire /// vreg use list. - typedef SparseMultiSet VReg2SUnitMultiMap; + using VReg2SUnitMultiMap = SparseMultiSet; + + using VReg2SUnitOperIdxMultiMap = + SparseMultiSet; - typedef SparseMultiSet - VReg2SUnitOperIdxMultiMap; + using ValueType = PointerUnion; - typedef PointerUnion ValueType; struct UnderlyingObject : PointerIntPair { UnderlyingObject(ValueType V, bool MayAlias) : PointerIntPair(V, MayAlias) {} + ValueType getValue() const { return getPointer(); } bool mayAlias() const { return getInt(); } }; - typedef SmallVector UnderlyingObjectsVector; + + using UnderlyingObjectsVector = SmallVector; /// A ScheduleDAG for scheduling lists of MachineInstr. class ScheduleDAGInstrs : public ScheduleDAG { @@ -113,10 +133,10 @@ namespace llvm { /// reordering. A specialized scheduler can override /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate /// it has taken responsibility for scheduling the terminator correctly. - bool CanHandleTerminators; + bool CanHandleTerminators = false; /// Whether lane masks should get tracked. - bool TrackLaneMasks; + bool TrackLaneMasks = false; // State specific to the current scheduling region. // ------------------------------------------------ @@ -154,12 +174,12 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep; + AliasAnalysis *AAForDep = nullptr; /// Remember a generic side-effecting instruction as we proceed. /// No other SU ever gets scheduled around it (except in the special /// case of a huge region that gets reduced). - SUnit *BarrierChain; + SUnit *BarrierChain = nullptr; public: /// A list of SUnits, used in Value2SUsMap, during DAG construction. @@ -167,7 +187,7 @@ namespace llvm { /// implementation of this data structure, such as a singly linked list /// with a memory pool (SmallVector was tried but slow and SparseSet is not /// applicable). - typedef std::list SUList; + using SUList = std::list; protected: /// \brief A map from ValueType to SUList, used during DAG construction, as @@ -215,23 +235,23 @@ namespace llvm { /// For an unanalyzable memory access, this Value is used in maps. UndefValue *UnknownValue; - typedef std::vector> - DbgValueVector; + using DbgValueVector = + std::vector>; /// Remember instruction that precedes DBG_VALUE. /// These are generated by buildSchedGraph but persist so they can be /// referenced when emitting the final schedule. DbgValueVector DbgValues; - MachineInstr *FirstDbgValue; + MachineInstr *FirstDbgValue = nullptr; /// Set of live physical registers for updating kill flags. - BitVector LiveRegs; + LivePhysRegs LiveRegs; public: explicit ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags = false); - ~ScheduleDAGInstrs() override {} + ~ScheduleDAGInstrs() override = default; /// Gets the machine model for instruction scheduling. const TargetSchedModel *getSchedModel() const { return &SchedModel; } @@ -311,7 +331,7 @@ namespace llvm { std::string getDAGName() const override; /// Fixes register kill flags that scheduling has made invalid. - void fixupKills(MachineBasicBlock *MBB); + void fixupKills(MachineBasicBlock &MBB); protected: void initSUnits(); @@ -353,6 +373,7 @@ namespace llvm { return nullptr; return I->second; } + } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDFS.h b/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDFS.h index c2013661cfff8..d6a8c791392ca 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDFS.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/ScheduleDFS.h @@ -17,9 +17,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include #include #include +#include namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/CodeGen/SchedulerRegistry.h b/interpreter/llvm/src/include/llvm/CodeGen/SchedulerRegistry.h index a7a6227664dee..badf927d0e956 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/SchedulerRegistry.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/SchedulerRegistry.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/SchedulerRegistry.h ------------------------*- C++ -*-===// +//===- llvm/CodeGen/SchedulerRegistry.h -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,7 @@ #define LLVM_CODEGEN_SCHEDULERREGISTRY_H #include "llvm/CodeGen/MachinePassRegistry.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CodeGen.h" namespace llvm { @@ -26,15 +26,13 @@ namespace llvm { /// //===----------------------------------------------------------------------===// -class SelectionDAGISel; class ScheduleDAGSDNodes; -class SelectionDAG; -class MachineBasicBlock; +class SelectionDAGISel; class RegisterScheduler : public MachinePassRegistryNode { public: - typedef ScheduleDAGSDNodes *(*FunctionPassCtor)(SelectionDAGISel*, - CodeGenOpt::Level); + using FunctionPassCtor = ScheduleDAGSDNodes *(*)(SelectionDAGISel*, + CodeGenOpt::Level); static MachinePassRegistry Registry; @@ -45,13 +43,14 @@ class RegisterScheduler : public MachinePassRegistryNode { // Accessors. - // RegisterScheduler *getNext() const { return (RegisterScheduler *)MachinePassRegistryNode::getNext(); } + static RegisterScheduler *getList() { return (RegisterScheduler *)Registry.getList(); } + static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } @@ -103,4 +102,4 @@ ScheduleDAGSDNodes *createDAGLinearizer(SelectionDAGISel *IS, } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SCHEDULERREGISTRY_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAG.h b/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAG.h index d761661f763ea..d6851f7143a51 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAG.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAG.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ---------*- C++ -*-===// +//===- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,35 +15,72 @@ #ifndef LLVM_CODEGEN_SELECTIONDAG_H #define LLVM_CODEGEN_SELECTIONDAG_H +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Target/TargetMachine.h" +#include #include +#include +#include #include #include +#include +#include #include namespace llvm { +class BlockAddress; +class Constant; +class ConstantFP; +class ConstantInt; +class DataLayout; +struct fltSemantics; +class GlobalValue; struct KnownBits; +class LLVMContext; +class MachineBasicBlock; class MachineConstantPoolValue; -class MachineFunction; -class MDNode; +class MCSymbol; class OptimizationRemarkEmitter; class SDDbgValue; -class TargetLowering; +class SelectionDAG; class SelectionDAGTargetInfo; +class TargetLowering; +class TargetMachine; +class TargetSubtargetInfo; +class Value; class SDVTListNode : public FoldingSetNode { friend struct FoldingSetTrait; + /// A reference to an Interned FoldingSetNodeID for this node. /// The Allocator in SelectionDAG holds the data. /// SDVTList contains all types which are frequently accessed in SelectionDAG. @@ -55,11 +92,13 @@ class SDVTListNode : public FoldingSetNode { /// The hash value for SDVTList is fixed, so cache it to avoid /// hash calculation. unsigned HashValue; + public: SDVTListNode(const FoldingSetNodeIDRef ID, const EVT *VT, unsigned int Num) : FastID(ID), VTs(VT), NumVTs(Num) { HashValue = ID.ComputeHash(); } + SDVTList getSDVTList() { SDVTList result = {VTs, NumVTs}; return result; @@ -72,12 +111,14 @@ template<> struct FoldingSetTrait : DefaultFoldingSetTrait DbgValues; SmallVector ByvalParmDbgValues; - typedef DenseMap > DbgValMapType; + using DbgValMapType = DenseMap>; DbgValMapType DbgValMap; - void operator=(const SDDbgInfo&) = delete; - SDDbgInfo(const SDDbgInfo&) = delete; public: - SDDbgInfo() {} + SDDbgInfo() = default; + SDDbgInfo(const SDDbgInfo &) = delete; + SDDbgInfo &operator=(const SDDbgInfo &) = delete; void add(SDDbgValue *V, const SDNode *Node, bool isParameter) { if (isParameter) { @@ -144,14 +185,14 @@ class SDDbgInfo { return ArrayRef(); } - typedef SmallVectorImpl::iterator DbgIterator; + using DbgIterator = SmallVectorImpl::iterator; + DbgIterator DbgBegin() { return DbgValues.begin(); } DbgIterator DbgEnd() { return DbgValues.end(); } DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); } DbgIterator ByvalParmDbgEnd() { return ByvalParmDbgValues.end(); } }; -class SelectionDAG; void checkForCycles(const SelectionDAG *DAG, bool force = false); /// This is used to represent a portion of an LLVM function in a low-level @@ -167,8 +208,8 @@ void checkForCycles(const SelectionDAG *DAG, bool force = false); /// class SelectionDAG { const TargetMachine &TM; - const SelectionDAGTargetInfo *TSI; - const TargetLowering *TLI; + const SelectionDAGTargetInfo *TSI = nullptr; + const TargetLowering *TLI = nullptr; MachineFunction *MF; LLVMContext *Context; CodeGenOpt::Level OptLevel; @@ -188,9 +229,9 @@ class SelectionDAG { /// The AllocatorType for allocating SDNodes. We use /// pool allocation with recycling. - typedef RecyclingAllocator - NodeAllocatorType; + using NodeAllocatorType = RecyclingAllocator; /// Pool allocation for nodes. NodeAllocatorType NodeAllocator; @@ -243,9 +284,11 @@ class SelectionDAG { struct DAGNodeDeletedListener : public DAGUpdateListener { std::function Callback; + DAGNodeDeletedListener(SelectionDAG &DAG, std::function Callback) : DAGUpdateListener(DAG), Callback(std::move(Callback)) {} + void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); } }; @@ -254,7 +297,7 @@ class SelectionDAG { /// have legal types. This is important after type legalization since /// any illegally typed nodes generated after this point will not experience /// type legalization. - bool NewNodesMustHaveLegalTypes; + bool NewNodesMustHaveLegalTypes = false; private: /// DAGUpdateListener is a friend so it can manipulate the listener stack. @@ -262,7 +305,7 @@ class SelectionDAG { /// Linked list of registered DAGUpdateListener instances. /// This stack is maintained by DAGUpdateListener RAII. - DAGUpdateListener *UpdateListeners; + DAGUpdateListener *UpdateListeners = nullptr; /// Implementation of setSubgraphColor. /// Return whether we had to truncate the search. @@ -316,11 +359,10 @@ class SelectionDAG { Node->OperandList = nullptr; } - void operator=(const SelectionDAG&) = delete; - SelectionDAG(const SelectionDAG&) = delete; - public: - explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level); + explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level); + SelectionDAG(const SelectionDAG &) = delete; + SelectionDAG &operator=(const SelectionDAG &) = delete; ~SelectionDAG(); /// Prepare this SelectionDAG to process code in the given MachineFunction. @@ -364,12 +406,16 @@ class SelectionDAG { /// Convenience for setting subgraph color attribute. void setSubgraphColor(SDNode *N, const char *Color); - typedef ilist::const_iterator allnodes_const_iterator; + using allnodes_const_iterator = ilist::const_iterator; + allnodes_const_iterator allnodes_begin() const { return AllNodes.begin(); } allnodes_const_iterator allnodes_end() const { return AllNodes.end(); } - typedef ilist::iterator allnodes_iterator; + + using allnodes_iterator = ilist::iterator; + allnodes_iterator allnodes_begin() { return AllNodes.begin(); } allnodes_iterator allnodes_end() { return AllNodes.end(); } + ilist::size_type allnodes_size() const { return AllNodes.size(); } @@ -475,7 +521,6 @@ class SelectionDAG { //===--------------------------------------------------------------------===// // Node creation methods. - // /// \brief Create a ConstantSDNode wrapping a constant value. /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR. @@ -882,7 +927,7 @@ class SelectionDAG { SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO); @@ -892,7 +937,7 @@ class SelectionDAG { SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO); @@ -1070,6 +1115,11 @@ class SelectionDAG { SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef Ops); + /// Mutate the specified strict FP node to its non-strict equivalent, + /// unlinking the node from its chain and dropping the metadata arguments. + /// The node must be a strict FP node. + SDNode *mutateStrictFPToFP(SDNode *Node); + /// These are used for target selectors to create a new node /// with specified return type(s), MachineInstr opcode, and operands. /// @@ -1167,6 +1217,13 @@ class SelectionDAG { void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num); + /// If an existing load has uses of its chain, create a token factor node with + /// that chain and the new memory node's chain and update users of the old + /// chain to the token factor. This ensures that the new memory node will have + /// the same relative memory dependency position as the old load. Returns the + /// new merged load chain. + SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New); + /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their /// topological order. Returns the number of nodes. @@ -1179,39 +1236,6 @@ class SelectionDAG { AllNodes.insert(Position, AllNodes.remove(N)); } - /// Returns true if the opcode is a commutative binary operation. - static bool isCommutativeBinOp(unsigned Opcode) { - // FIXME: This should get its info from the td file, so that we can include - // target info. - switch (Opcode) { - case ISD::ADD: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: - case ISD::MUL: - case ISD::MULHU: - case ISD::MULHS: - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: - case ISD::FADD: - case ISD::FMUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::SADDO: - case ISD::UADDO: - case ISD::ADDC: - case ISD::ADDE: - case ISD::FMINNUM: - case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: - return true; - default: return false; - } - } - /// Returns an APFloat semantics tag appropriate for the given type. If VT is /// a vector type, the element semantics are returned. static const fltSemantics &EVTToAPFloatSemantics(EVT VT) { @@ -1246,9 +1270,11 @@ class SelectionDAG { SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); } SDDbgInfo::DbgIterator DbgEnd() { return DbgInfo->DbgEnd(); } + SDDbgInfo::DbgIterator ByvalParmDbgBegin() { return DbgInfo->ByvalParmDbgBegin(); } + SDDbgInfo::DbgIterator ByvalParmDbgEnd() { return DbgInfo->ByvalParmDbgEnd(); } @@ -1474,10 +1500,12 @@ class SelectionDAG { }; template <> struct GraphTraits : public GraphTraits { - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; + static nodes_iterator nodes_begin(SelectionDAG *G) { return nodes_iterator(G->allnodes_begin()); } + static nodes_iterator nodes_end(SelectionDAG *G) { return nodes_iterator(G->allnodes_end()); } @@ -1488,7 +1516,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, ArrayRef Ops, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO) { - /// Compose node ID and try to find an existing node. FoldingSetNodeID ID; unsigned Opcode = @@ -1519,6 +1546,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, return SDValue(N, 0); } -} // end namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SELECTIONDAG_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h new file mode 100644 index 0000000000000..2107e5a313819 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -0,0 +1,64 @@ +//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.h ------- DAG Address Analysis +//---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H +#define LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H + +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +class BaseIndexOffset { +private: + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + +public: + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) + : Base(Base), Index(Index), Offset(Offset), + IsIndexSignExt(IsIndexSignExt) {} + + SDValue getBase() { return Base; } + SDValue getIndex() { return Index; } + + bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG) { + int64_t Off; + return equalBaseIndex(Other, DAG, Off); + } + + bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, + int64_t &Off); + + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG); +}; +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAGNodes.h b/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAGNodes.h index 35ddcf80c91f1..5fb69ae232af8 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/SelectionDAGNodes.h @@ -24,11 +24,11 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" @@ -37,6 +37,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -53,14 +54,18 @@ namespace llvm { -class SelectionDAG; +class APInt; +class Constant; +template struct DenseMapInfo; class GlobalValue; class MachineBasicBlock; class MachineConstantPoolValue; +class MCSymbol; +class raw_ostream; class SDNode; +class SelectionDAG; +class Type; class Value; -class MCSymbol; -template struct DenseMapInfo; void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, bool force = false); @@ -80,7 +85,10 @@ namespace ISD { /// If N is a BUILD_VECTOR node whose elements are all the same constant or /// undefined, return true and return the constant value in \p SplatValue. - bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); + /// This sets \p SplatValue to the smallest possible splat unless AllowShrink + /// is set to false. + bool isConstantSplatVector(const SDNode *N, APInt &SplatValue, + bool AllowShrink = true); /// Return true if the specified node is a BUILD_VECTOR where all of the /// elements are ~0 or undef. @@ -229,13 +237,15 @@ template <> struct isPodLike { static const bool value = true; }; /// Allow casting operators to work directly on /// SDValues as if they were SDNode*'s. template<> struct simplify_type { - typedef SDNode* SimpleType; + using SimpleType = SDNode *; + static SimpleType getSimplifiedValue(SDValue &Val) { return Val.getNode(); } }; template<> struct simplify_type { - typedef /*const*/ SDNode* SimpleType; + using SimpleType = /*const*/ SDNode *; + static SimpleType getSimplifiedValue(const SDValue &Val) { return Val.getNode(); } @@ -330,7 +340,8 @@ class SDUse { /// simplify_type specializations - Allow casting operators to work directly on /// SDValues as if they were SDNode*'s. template<> struct simplify_type { - typedef SDNode* SimpleType; + using SimpleType = SDNode *; + static SimpleType getSimplifiedValue(SDUse &Val) { return Val.getNode(); } @@ -612,6 +623,32 @@ class SDNode : public FoldingSetNode, public ilist_node { SDNodeBits.IsMemIntrinsic; } + /// Test if this node is a strict floating point pseudo-op. + bool isStrictFPOpcode() { + switch (NodeType) { + default: + return false; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + return true; + } + } + /// Test if this node has a post-isel opcode, directly /// corresponding to a MachineInstr opcode. bool isMachineOpcode() const { return NodeType < 0; } @@ -669,10 +706,10 @@ class SDNode : public FoldingSetNode, public ilist_node { explicit use_iterator(SDUse *op) : Op(op) {} public: - typedef std::iterator::reference reference; - typedef std::iterator::pointer pointer; + using reference = std::iterator::reference; + using pointer = std::iterator::pointer; use_iterator() = default; use_iterator(const use_iterator &I) : Op(I.Op) {} @@ -764,7 +801,8 @@ class SDNode : public FoldingSetNode, public ilist_node { /// if DAG changes. static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl &Visited, - SmallVectorImpl &Worklist) { + SmallVectorImpl &Worklist, + unsigned int MaxSteps = 0) { if (Visited.count(N)) return true; while (!Worklist.empty()) { @@ -779,6 +817,8 @@ class SDNode : public FoldingSetNode, public ilist_node { } if (Found) return true; + if (MaxSteps != 0 && Visited.size() >= MaxSteps) + return false; } return false; } @@ -798,7 +838,7 @@ class SDNode : public FoldingSetNode, public ilist_node { return OperandList[Num]; } - typedef SDUse* op_iterator; + using op_iterator = SDUse *; op_iterator op_begin() const { return OperandList; } op_iterator op_end() const { return OperandList+NumOperands; } @@ -870,7 +910,8 @@ class SDNode : public FoldingSetNode, public ilist_node { return getValueType(ResNo).getSizeInBits(); } - typedef const EVT* value_iterator; + using value_iterator = const EVT *; + value_iterator value_begin() const { return ValueList; } value_iterator value_end() const { return ValueList+NumValues; } @@ -1178,8 +1219,8 @@ class MemSDNode : public SDNode { /// Returns the Ranges that describes the dereference. const MDNode *getRanges() const { return MMO->getRanges(); } - /// Return the synchronization scope for this memory operation. - SynchronizationScope getSynchScope() const { return MMO->getSynchScope(); } + /// Returns the synchronization scope ID for this memory operation. + SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); } /// Return the atomic ordering requirements for this memory operation. For /// cmpxchg atomic operations, return the atomic ordering requirements when @@ -1397,8 +1438,8 @@ class ConstantSDNode : public SDNode { int64_t getSExtValue() const { return Value->getSExtValue(); } bool isOne() const { return Value->isOne(); } - bool isNullValue() const { return Value->isNullValue(); } - bool isAllOnesValue() const { return Value->isAllOnesValue(); } + bool isNullValue() const { return Value->isZero(); } + bool isAllOnesValue() const { return Value->isMinusOne(); } bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; } @@ -1708,7 +1749,7 @@ class BuildVectorSDNode : public SDNode { bool isConstant() const; - static inline bool classof(const SDNode *N) { + static bool classof(const SDNode *N) { return N->getOpcode() == ISD::BUILD_VECTOR; } }; @@ -1796,8 +1837,7 @@ class BlockAddressSDNode : public SDNode { BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, int64_t o, unsigned char Flags) : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), - BA(ba), Offset(o), TargetFlags(Flags) { - } + BA(ba), Offset(o), TargetFlags(Flags) {} public: const BlockAddress *getBlockAddress() const { return BA; } @@ -2073,7 +2113,7 @@ class MaskedGatherScatterSDNode : public MemSDNode { public: friend class SelectionDAG; - MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, + MaskedGatherScatterSDNode(unsigned NodeTy, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} @@ -2128,7 +2168,7 @@ class MaskedScatterSDNode : public MaskedGatherScatterSDNode { /// instruction selection proper phase. class MachineSDNode : public SDNode { public: - typedef MachineMemOperand **mmo_iterator; + using mmo_iterator = MachineMemOperand **; private: friend class SelectionDAG; @@ -2200,8 +2240,8 @@ class SDNodeIterator : public std::iterator struct GraphTraits { - typedef SDNode *NodeRef; - typedef SDNodeIterator ChildIteratorType; + using NodeRef = SDNode *; + using ChildIteratorType = SDNodeIterator; static NodeRef getEntryNode(SDNode *N) { return N; } @@ -2218,12 +2258,12 @@ template <> struct GraphTraits { /// /// This needs to be a union because the largest node differs on 32 bit systems /// with 4 and 8 byte pointer alignment, respectively. -typedef AlignedCharArrayUnion - LargestSDNode; +using LargestSDNode = AlignedCharArrayUnion; /// The SDNode class with the greatest alignment requirement. -typedef GlobalAddressSDNode MostAlignedSDNode; +using MostAlignedSDNode = GlobalAddressSDNode; namespace ISD { diff --git a/interpreter/llvm/src/include/llvm/CodeGen/SlotIndexes.h b/interpreter/llvm/src/include/llvm/CodeGen/SlotIndexes.h index 14fc3a499a082..a7b16e7a9ed22 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/SlotIndexes.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/SlotIndexes.h @@ -24,13 +24,22 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ilist.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include +#include +#include +#include namespace llvm { +class raw_ostream; + /// This class represents an entry in the slot index list held in the /// SlotIndexes pass. It should not be used directly. See the /// SlotIndex & SlotIndexes classes for the public interface to this @@ -40,7 +49,6 @@ namespace llvm { unsigned index; public: - IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {} MachineInstr* getInstr() const { return mi; } @@ -301,7 +309,7 @@ namespace llvm { return os; } - typedef std::pair IdxMBBPair; + using IdxMBBPair = std::pair; inline bool operator<(SlotIndex V, const IdxMBBPair &IM) { return V < IM.first; @@ -325,7 +333,7 @@ namespace llvm { // IndexListEntry allocator. BumpPtrAllocator ileAllocator; - typedef ilist IndexList; + using IndexList = ilist; IndexList indexList; #ifdef EXPENSIVE_CHECKS @@ -334,7 +342,7 @@ namespace llvm { MachineFunction *mf; - typedef DenseMap Mi2IndexMap; + using Mi2IndexMap = DenseMap; Mi2IndexMap mi2iMap; /// MBBRanges - Map MBB number to (start, stop) indexes. @@ -436,7 +444,7 @@ namespace llvm { const MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "MI must be inserted inna basic block"); MachineBasicBlock::const_iterator I = MI, B = MBB->begin(); - for (;;) { + while (true) { if (I == B) return getMBBStartIdx(MBB); --I; @@ -453,7 +461,7 @@ namespace llvm { const MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "MI must be inserted inna basic block"); MachineBasicBlock::const_iterator I = MI, E = MBB->end(); - for (;;) { + while (true) { ++I; if (I == E) return getMBBEndIdx(MBB); @@ -497,21 +505,25 @@ namespace llvm { /// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block /// begin and basic block) - typedef SmallVectorImpl::const_iterator MBBIndexIterator; + using MBBIndexIterator = SmallVectorImpl::const_iterator; + /// Move iterator to the next IdxMBBPair where the SlotIndex is greater or /// equal to \p To. MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const { return std::lower_bound(I, idx2MBBMap.end(), To); } + /// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex /// that is greater or equal to \p Idx. MBBIndexIterator findMBBIndex(SlotIndex Idx) const { return advanceMBBIndex(idx2MBBMap.begin(), Idx); } + /// Returns an iterator for the begin of the idx2MBBMap. MBBIndexIterator MBBIndexBegin() const { return idx2MBBMap.begin(); } + /// Return an iterator for the end of the idx2MBBMap. MBBIndexIterator MBBIndexEnd() const { return idx2MBBMap.end(); diff --git a/interpreter/llvm/src/include/llvm/CodeGen/StackMaps.h b/interpreter/llvm/src/include/llvm/CodeGen/StackMaps.h index a18936feea7b0..8263946ed9280 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/StackMaps.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/StackMaps.h @@ -145,21 +145,27 @@ class PatchPointOpers { /// /// Statepoint operands take the form: /// , , , , -/// [call arguments], , , +/// [call arguments...], +/// , , /// , , -/// , , [other args], -/// [gc values] +/// , , [deopt args...], +/// +/// Note that the last two sets of arguments are not currently length +/// prefixed. class StatepointOpers { -private: + // TODO:: we should change the STATEPOINT representation so that CC and + // Flags should be part of meta operands, with args and deopt operands, and + // gc operands all prefixed by their length and a type code. This would be + // much more consistent. +public: // These values are aboolute offsets into the operands of the statepoint // instruction. enum { IDPos, NBytesPos, NCallArgsPos, CallTargetPos, MetaEnd }; // These values are relative offests from the start of the statepoint meta // arguments (i.e. the end of the call arguments). - enum { CCOffset = 1, FlagsOffset = 3, NumVMSArgsOffset = 5 }; + enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 }; -public: explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {} /// Get starting index of non call related arguments @@ -220,7 +226,7 @@ class StackMaps { // OpTypes are used to encode information about the following logical // operand (which may consist of several MachineOperands) for the // OpParser. - typedef enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp } OpType; + using OpType = enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp }; StackMaps(AsmPrinter &AP); @@ -248,9 +254,10 @@ class StackMaps { private: static const char *WSMP; - typedef SmallVector LocationVec; - typedef SmallVector LiveOutVec; - typedef MapVector ConstantPool; + + using LocationVec = SmallVector; + using LiveOutVec = SmallVector; + using ConstantPool = MapVector; struct FunctionInfo { uint64_t StackSize = 0; @@ -273,8 +280,8 @@ class StackMaps { LiveOuts(std::move(LiveOuts)) {} }; - typedef MapVector FnInfoMap; - typedef std::vector CallsiteInfoList; + using FnInfoMap = MapVector; + using CallsiteInfoList = std::vector; AsmPrinter &AP; CallsiteInfoList CSInfos; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/StackProtector.h b/interpreter/llvm/src/include/llvm/CodeGen/StackProtector.h index 0655f19a323e4..72de212d0df9a 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/StackProtector.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/StackProtector.h @@ -19,17 +19,20 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" namespace llvm { +class BasicBlock; +class DominatorTree; class Function; +class Instruction; class Module; -class PHINode; +class TargetLoweringBase; +class TargetMachine; +class Type; class StackProtector : public FunctionPass { public: @@ -47,7 +50,7 @@ class StackProtector : public FunctionPass { }; /// A mapping of AllocaInsts to their required SSP layout. - typedef ValueMap SSPLayoutMap; + using SSPLayoutMap = ValueMap; private: const TargetMachine *TM = nullptr; @@ -55,7 +58,7 @@ class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// target type sizes. const TargetLoweringBase *TLI = nullptr; - const Triple Trip; + Triple Trip; Function *F; Module *M; @@ -114,19 +117,11 @@ class StackProtector : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID) { + StackProtector() : FunctionPass(ID), SSPBufferSize(8) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } - StackProtector(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), Trip(TM->getTargetTriple()), - SSPBufferSize(8) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override; SSPLayoutKind getSSPLayout(const AllocaInst *AI) const; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/TailDuplicator.h b/interpreter/llvm/src/include/llvm/CodeGen/TailDuplicator.h index b667245fd3c0a..483c0ab1eec9e 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/TailDuplicator.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/TailDuplicator.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/TailDuplicator.h ---------------------------*- C++ -*-===// +//===- llvm/CodeGen/TailDuplicator.h ----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,19 +15,27 @@ #ifndef LLVM_CODEGEN_TAILDUPLICATOR_H #define LLVM_CODEGEN_TAILDUPLICATOR_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include namespace llvm { -extern cl::opt TailDupIndirectBranchSize; +class MachineBasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineInstr; +class MachineModuleInfo; +class MachineRegisterInfo; +class TargetRegisterInfo; /// Utility class to perform tail duplication. class TailDuplicator { @@ -46,7 +54,7 @@ class TailDuplicator { // For each virtual register in SSAUpdateVals keep a list of source virtual // registers. - typedef std::vector> AvailableValsTy; + using AvailableValsTy = std::vector>; DenseMap SSAUpdateVals; @@ -62,11 +70,14 @@ class TailDuplicator { void initMF(MachineFunction &MF, const MachineBranchProbabilityInfo *MBPI, bool LayoutMode, unsigned TailDupSize = 0); + bool tailDuplicateBlocks(); static bool isSimpleBB(MachineBasicBlock *TailBB); bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB); + /// Returns true if TailBB can successfully be duplicated into PredBB bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB); + /// Tail duplicate a single basic block into its predecessors, and then clean /// up. /// If \p DuplicatePreds is not null, it will be updated to contain the list @@ -77,10 +88,10 @@ class TailDuplicator { bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds = nullptr, - llvm::function_ref *RemovalCallback = nullptr); + function_ref *RemovalCallback = nullptr); private: - typedef TargetInstrInfo::RegSubRegPair RegSubRegPair; + using RegSubRegPair = TargetInstrInfo::RegSubRegPair; void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, MachineBasicBlock *BB); @@ -112,9 +123,9 @@ class TailDuplicator { void removeDeadBlock( MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback = nullptr); + function_ref *RemovalCallback = nullptr); }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_TAILDUPLICATOR_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/interpreter/llvm/src/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index adf2b3ea1c9b3..e4d3cc9cecfcc 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -41,6 +41,10 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { TargetLoweringObjectFileELF() = default; ~TargetLoweringObjectFileELF() override = default; + /// Emit Obj-C garbage collection and linker options. + void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const override; + void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, const MCSymbol *Sym) const override; @@ -94,9 +98,8 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile { void Initialize(MCContext &Ctx, const TargetMachine &TM) override; /// Emit the module flags that specify the garbage collection information. - void emitModuleFlags(MCStreamer &Streamer, - ArrayRef ModuleFlags, - const TargetMachine &TM) const override; + void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const override; MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; @@ -149,11 +152,9 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile { MCSection *getSectionForJumpTable(const Function &F, const TargetMachine &TM) const override; - /// Emit Obj-C garbage collection and linker options. Only linker option - /// emission is implemented for COFF. - void emitModuleFlags(MCStreamer &Streamer, - ArrayRef ModuleFlags, - const TargetMachine &TM) const override; + /// Emit Obj-C garbage collection and linker options. + void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const override; MCSection *getStaticCtorSection(unsigned Priority, const MCSymbol *KeySym) const override; diff --git a/interpreter/llvm/src/include/llvm/CodeGen/TargetPassConfig.h b/interpreter/llvm/src/include/llvm/CodeGen/TargetPassConfig.h index f0c826dc1d457..aaf0ab5d5481d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/TargetPassConfig.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/TargetPassConfig.h @@ -1,4 +1,4 @@ -//===-- TargetPassConfig.h - Code Generation pass options -------*- C++ -*-===// +//===- TargetPassConfig.h - Code Generation pass options --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,19 +16,23 @@ #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include #include namespace llvm { +class LLVMTargetMachine; +struct MachineSchedContext; class PassConfigImpl; class ScheduleDAGInstrs; -class TargetMachine; -struct MachineSchedContext; // The old pass manager infrastructure is hidden in a legacy namespace now. namespace legacy { + class PassManagerBase; -} + +} // end namespace legacy + using legacy::PassManagerBase; /// Discriminated union of Pass ID types. @@ -50,10 +54,11 @@ class IdentifyingPassPtr { AnalysisID ID; Pass *P; }; - bool IsInstance; + bool IsInstance = false; + public: - IdentifyingPassPtr() : P(nullptr), IsInstance(false) {} - IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr), IsInstance(false) {} + IdentifyingPassPtr() : P(nullptr) {} + IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr) {} IdentifyingPassPtr(Pass *InstancePtr) : P(InstancePtr), IsInstance(true) {} bool isValid() const { return P; } @@ -63,6 +68,7 @@ class IdentifyingPassPtr { assert(!IsInstance && "Not a Pass ID"); return ID; } + Pass *getInstance() const { assert(IsInstance && "Not a Pass Instance"); return P; @@ -93,34 +99,37 @@ class TargetPassConfig : public ImmutablePass { static char PostRAMachineLICMID; private: - PassManagerBase *PM; + PassManagerBase *PM = nullptr; AnalysisID StartBefore = nullptr; AnalysisID StartAfter = nullptr; AnalysisID StopBefore = nullptr; AnalysisID StopAfter = nullptr; - bool Started; - bool Stopped; - bool AddingMachinePasses; + bool Started = true; + bool Stopped = false; + bool AddingMachinePasses = false; protected: - TargetMachine *TM; - PassConfigImpl *Impl; // Internal data structures - bool Initialized; // Flagged after all passes are configured. + LLVMTargetMachine *TM; + PassConfigImpl *Impl = nullptr; // Internal data structures + bool Initialized = false; // Flagged after all passes are configured. // Target Pass Options // Targets provide a default setting, user flags override. - // - bool DisableVerify; + bool DisableVerify = false; /// Default setting for -enable-tail-merge on this target. - bool EnableTailMerge; + bool EnableTailMerge = true; /// Require processing of functions such that callees are generated before /// callers. - bool RequireCodeGenSCCOrder; + bool RequireCodeGenSCCOrder = false; + + /// Add the actual instruction selection passes. This does not include + /// preparation passes on IR. + bool addCoreISelPasses(); public: - TargetPassConfig(TargetMachine *tm, PassManagerBase &pm); + TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm); // Dummy constructor. TargetPassConfig(); @@ -206,6 +215,13 @@ class TargetPassConfig : public ImmutablePass { /// has not be overriden on the command line with '-regalloc=...' bool usingDefaultRegAlloc() const; + /// High level function that adds all passes necessary to go from llvm IR + /// representation to the MI representation. + /// Adds IR based lowering and target specific optimization passes and finally + /// the core instruction selection passes. + /// \returns true if an error occured, false otherwise. + bool addISelPasses(); + /// Add common target configurable passes that perform LLVM IR to IR /// transforms following machine independent optimization. virtual void addIRPasses(); @@ -285,7 +301,6 @@ class TargetPassConfig : public ImmutablePass { /// printAndVerify - Add a pass to dump then verify the machine function, if /// those steps are enabled. - /// void printAndVerify(const std::string &Banner); /// Add a pass to print the machine function if printing is enabled. @@ -419,4 +434,4 @@ class TargetPassConfig : public ImmutablePass { } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_TARGETPASSCONFIG_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/TargetSchedule.h b/interpreter/llvm/src/include/llvm/CodeGen/TargetSchedule.h index 1992412120aaf..f236679764688 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/TargetSchedule.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/TargetSchedule.h @@ -16,6 +16,7 @@ #ifndef LLVM_CODEGEN_TARGETSCHEDULE_H #define LLVM_CODEGEN_TARGETSCHEDULE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -55,6 +56,9 @@ class TargetSchedModel { /// Return the MCSchedClassDesc for this instruction. const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const; + /// \brief TargetSubtargetInfo getter. + const TargetSubtargetInfo *getSubtargetInfo() const { return STI; } + /// \brief TargetInstrInfo getter. const TargetInstrInfo *getInstrInfo() const { return TII; } @@ -120,7 +124,7 @@ class TargetSchedModel { } #endif - typedef const MCWriteProcResEntry *ProcResIter; + using ProcResIter = const MCWriteProcResEntry *; // \brief Get an iterator into the processor resources consumed by this // scheduling class. diff --git a/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.h b/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.h index b404b4ca701f9..40d501edde10b 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.h @@ -17,7 +17,10 @@ #define LLVM_CODEGEN_VALUETYPES_H #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" #include +#include #include namespace llvm { @@ -30,13 +33,13 @@ namespace llvm { /// can represent. struct EVT { private: - MVT V; - Type *LLVMTy; + MVT V = MVT::INVALID_SIMPLE_VALUE_TYPE; + Type *LLVMTy = nullptr; public: - constexpr EVT() : V(MVT::INVALID_SIMPLE_VALUE_TYPE), LLVMTy(nullptr) {} - constexpr EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(nullptr) {} - constexpr EVT(MVT S) : V(S), LLVMTy(nullptr) {} + constexpr EVT() = default; + constexpr EVT(MVT::SimpleValueType SVT) : V(SVT) {} + constexpr EVT(MVT S) : V(S) {} bool operator==(EVT VT) const { return !(*this != VT); @@ -246,7 +249,6 @@ namespace llvm { return getSizeInBits() <= VT.getSizeInBits(); } - /// Return the SimpleValueType held in the specified simple EVT. MVT getSimpleVT() const { assert(isSimple() && "Expected a SimpleValueType!"); @@ -430,6 +432,6 @@ namespace llvm { unsigned getExtendedSizeInBits() const LLVM_READONLY; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_VALUETYPES_H diff --git a/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.td b/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.td index b87a5e56699eb..b1e62daa5aaeb 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.td +++ b/interpreter/llvm/src/include/llvm/CodeGen/ValueTypes.td @@ -33,115 +33,117 @@ def f80 : ValueType<80 , 11>; // 80-bit floating point value def f128 : ValueType<128, 12>; // 128-bit floating point value def ppcf128: ValueType<128, 13>; // PPC 128-bit floating point value -def v2i1 : ValueType<2 , 14>; // 2 x i1 vector value -def v4i1 : ValueType<4 , 15>; // 4 x i1 vector value -def v8i1 : ValueType<8 , 16>; // 8 x i1 vector value -def v16i1 : ValueType<16, 17>; // 16 x i1 vector value -def v32i1 : ValueType<32 , 18>; // 32 x i1 vector value -def v64i1 : ValueType<64 , 19>; // 64 x i1 vector value -def v512i1 : ValueType<512, 20>; // 512 x i1 vector value -def v1024i1: ValueType<1024,21>; //1024 x i1 vector value - -def v1i8 : ValueType<8, 22>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 23>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 24>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 25>; // 8 x i8 vector value -def v16i8 : ValueType<128, 26>; // 16 x i8 vector value -def v32i8 : ValueType<256, 27>; // 32 x i8 vector value -def v64i8 : ValueType<512, 28>; // 64 x i8 vector value -def v128i8 : ValueType<1024,29>; //128 x i8 vector value -def v256i8 : ValueType<2048,30>; //256 x i8 vector value - -def v1i16 : ValueType<16 , 31>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 32>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 33>; // 4 x i16 vector value -def v8i16 : ValueType<128, 34>; // 8 x i16 vector value -def v16i16 : ValueType<256, 35>; // 16 x i16 vector value -def v32i16 : ValueType<512, 36>; // 32 x i16 vector value -def v64i16 : ValueType<1024,37>; // 64 x i16 vector value -def v128i16: ValueType<2048,38>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 39>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 40>; // 2 x i32 vector value -def v4i32 : ValueType<128, 41>; // 4 x i32 vector value -def v8i32 : ValueType<256, 42>; // 8 x i32 vector value -def v16i32 : ValueType<512, 43>; // 16 x i32 vector value -def v32i32 : ValueType<1024,44>; // 32 x i32 vector value -def v64i32 : ValueType<2048,45>; // 32 x i32 vector value - -def v1i64 : ValueType<64 , 46>; // 1 x i64 vector value -def v2i64 : ValueType<128, 47>; // 2 x i64 vector value -def v4i64 : ValueType<256, 48>; // 4 x i64 vector value -def v8i64 : ValueType<512, 49>; // 8 x i64 vector value -def v16i64 : ValueType<1024,50>; // 16 x i64 vector value -def v32i64 : ValueType<2048,51>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 52>; // 1 x i128 vector value - -def nxv2i1 : ValueType<2, 53>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 54>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 55>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 56>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 57>; // n x 32 x i1 vector value - -def nxv1i8 : ValueType<8, 58>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 59>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 60>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 61>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 62>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 63>; // n x 32 x i8 vector value - -def nxv1i16 : ValueType<16, 64>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 65>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 66>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 67>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 68>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 69>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 70>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 71>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 72>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 73>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 74>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,75>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 76>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 77>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 78>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 79>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,80>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,81>; // n x 32 x i64 vector value - -def v2f16 : ValueType<32 , 82>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 83>; // 4 x f16 vector value -def v8f16 : ValueType<128, 84>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 85>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 86>; // 2 x f32 vector value -def v4f32 : ValueType<128, 87>; // 4 x f32 vector value -def v8f32 : ValueType<256, 88>; // 8 x f32 vector value -def v16f32 : ValueType<512, 89>; // 16 x f32 vector value -def v1f64 : ValueType<64, 90>; // 1 x f64 vector value -def v2f64 : ValueType<128, 91>; // 2 x f64 vector value -def v4f64 : ValueType<256, 92>; // 4 x f64 vector value -def v8f64 : ValueType<512, 93>; // 8 x f64 vector value - -def nxv2f16 : ValueType<32 , 94>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 95>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 96>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 97>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 98>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 99>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 100>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 101>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 102>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 103>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 104>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 105>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 106>; // X86 MMX value -def FlagVT : ValueType<0 , 107>; // Pre-RA sched glue -def isVoid : ValueType<0 , 108>; // Produces no value -def untyped: ValueType<8 , 109>; // Produces an untyped value +def v1i1 : ValueType<1 , 14>; // 1 x i1 vector value +def v2i1 : ValueType<2 , 15>; // 2 x i1 vector value +def v4i1 : ValueType<4 , 16>; // 4 x i1 vector value +def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value +def v16i1 : ValueType<16, 18>; // 16 x i1 vector value +def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value +def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value +def v512i1 : ValueType<512, 21>; // 512 x i1 vector value +def v1024i1: ValueType<1024,22>; //1024 x i1 vector value + +def v1i8 : ValueType<8, 23>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 24>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 25>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 26>; // 8 x i8 vector value +def v16i8 : ValueType<128, 27>; // 16 x i8 vector value +def v32i8 : ValueType<256, 28>; // 32 x i8 vector value +def v64i8 : ValueType<512, 29>; // 64 x i8 vector value +def v128i8 : ValueType<1024,30>; //128 x i8 vector value +def v256i8 : ValueType<2048,31>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 32>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 33>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 34>; // 4 x i16 vector value +def v8i16 : ValueType<128, 35>; // 8 x i16 vector value +def v16i16 : ValueType<256, 36>; // 16 x i16 vector value +def v32i16 : ValueType<512, 37>; // 32 x i16 vector value +def v64i16 : ValueType<1024,38>; // 64 x i16 vector value +def v128i16: ValueType<2048,39>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 40>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 41>; // 2 x i32 vector value +def v4i32 : ValueType<128, 42>; // 4 x i32 vector value +def v8i32 : ValueType<256, 43>; // 8 x i32 vector value +def v16i32 : ValueType<512, 44>; // 16 x i32 vector value +def v32i32 : ValueType<1024,45>; // 32 x i32 vector value +def v64i32 : ValueType<2048,46>; // 32 x i32 vector value + +def v1i64 : ValueType<64 , 47>; // 1 x i64 vector value +def v2i64 : ValueType<128, 48>; // 2 x i64 vector value +def v4i64 : ValueType<256, 49>; // 4 x i64 vector value +def v8i64 : ValueType<512, 50>; // 8 x i64 vector value +def v16i64 : ValueType<1024,51>; // 16 x i64 vector value +def v32i64 : ValueType<2048,52>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 53>; // 1 x i128 vector value + +def nxv1i1 : ValueType<1, 54>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 55>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 56>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 57>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 58>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 59>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 60>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 61>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 62>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 63>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 64>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 65>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 66>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 67>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 68>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 69>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 70>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 71>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 72>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 73>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 74>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 75>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 76>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,77>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 78>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 79>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 80>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 81>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,82>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,83>; // n x 32 x i64 vector value + +def v2f16 : ValueType<32 , 84>; // 2 x f16 vector value +def v4f16 : ValueType<64 , 85>; // 4 x f16 vector value +def v8f16 : ValueType<128, 86>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 87>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 88>; // 2 x f32 vector value +def v4f32 : ValueType<128, 89>; // 4 x f32 vector value +def v8f32 : ValueType<256, 90>; // 8 x f32 vector value +def v16f32 : ValueType<512, 91>; // 16 x f32 vector value +def v1f64 : ValueType<64, 92>; // 1 x f64 vector value +def v2f64 : ValueType<128, 93>; // 2 x f64 vector value +def v4f64 : ValueType<256, 94>; // 4 x f64 vector value +def v8f64 : ValueType<512, 95>; // 8 x f64 vector value + +def nxv2f16 : ValueType<32 , 96>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 97>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 98>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 99>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 100>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 101>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 102>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 103>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 104>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 105>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 106>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 107>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 108>; // X86 MMX value +def FlagVT : ValueType<0 , 109>; // Pre-RA sched glue +def isVoid : ValueType<0 , 110>; // Produces no value +def untyped: ValueType<8 , 111>; // Produces an untyped value def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/interpreter/llvm/src/include/llvm/CodeGen/VirtRegMap.h b/interpreter/llvm/src/include/llvm/CodeGen/VirtRegMap.h index d7e92094877d1..b9076353fd07d 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/VirtRegMap.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/VirtRegMap.h @@ -102,14 +102,7 @@ namespace llvm { /// @brief creates a mapping for the specified virtual register to /// the specified physical register - void assignVirt2Phys(unsigned virtReg, unsigned physReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg) && - TargetRegisterInfo::isPhysicalRegister(physReg)); - assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && - "attempt to assign physical register to already mapped " - "virtual register"); - Virt2PhysMap[virtReg] = physReg; - } + void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg); /// @brief clears the specified virtual register's, physical /// register mapping diff --git a/interpreter/llvm/src/include/llvm/CodeGen/WinEHFuncInfo.h b/interpreter/llvm/src/include/llvm/CodeGen/WinEHFuncInfo.h index dd730495a5f61..8043024626a0c 100644 --- a/interpreter/llvm/src/include/llvm/CodeGen/WinEHFuncInfo.h +++ b/interpreter/llvm/src/include/llvm/CodeGen/WinEHFuncInfo.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/WinEHFuncInfo.h ----------------------------*- C++ -*-===// +//===- llvm/CodeGen/WinEHFuncInfo.h -----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,28 +17,26 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/IR/Instructions.h" +#include +#include +#include namespace llvm { + class AllocaInst; class BasicBlock; -class CatchReturnInst; -class Constant; +class FuncletPadInst; class Function; class GlobalVariable; +class Instruction; class InvokeInst; -class IntrinsicInst; -class LandingPadInst; -class MCExpr; -class MCSymbol; class MachineBasicBlock; -class Value; +class MCSymbol; // The following structs respresent the .xdata tables for various // Windows-related EH personalities. -typedef PointerUnion MBBOrBasicBlock; +using MBBOrBasicBlock = PointerUnion; struct CxxUnwindMapEntry { int ToState; @@ -99,18 +97,18 @@ struct WinEHFuncInfo { SmallVector TryBlockMap; SmallVector SEHUnwindMap; SmallVector ClrEHUnwindMap; - int UnwindHelpFrameIdx = INT_MAX; - int PSPSymFrameIdx = INT_MAX; + int UnwindHelpFrameIdx = std::numeric_limits::max(); + int PSPSymFrameIdx = std::numeric_limits::max(); int getLastStateNumber() const { return CxxUnwindMap.size() - 1; } void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin, MCSymbol *InvokeEnd); - int EHRegNodeFrameIndex = INT_MAX; - int EHRegNodeEndOffset = INT_MAX; - int EHGuardFrameIndex = INT_MAX; - int SEHSetFrameOffset = INT_MAX; + int EHRegNodeFrameIndex = std::numeric_limits::max(); + int EHRegNodeEndOffset = std::numeric_limits::max(); + int EHGuardFrameIndex = std::numeric_limits::max(); + int SEHSetFrameOffset = std::numeric_limits::max(); WinEHFuncInfo(); }; @@ -125,5 +123,7 @@ void calculateSEHStateNumbers(const Function *ParentFn, WinEHFuncInfo &FuncInfo); void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo); -} + +} // end namespace llvm + #endif // LLVM_CODEGEN_WINEHFUNCINFO_H diff --git a/interpreter/llvm/src/include/llvm/Config/abi-breaking.h.cmake b/interpreter/llvm/src/include/llvm/Config/abi-breaking.h.cmake index 4ce487b8f5f3c..7ae401e5b8a8c 100644 --- a/interpreter/llvm/src/include/llvm/Config/abi-breaking.h.cmake +++ b/interpreter/llvm/src/include/llvm/Config/abi-breaking.h.cmake @@ -15,6 +15,9 @@ /* Define to enable checks that alter the LLVM C++ ABI */ #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS +/* Define to enable reverse iteration of unordered llvm containers */ +#cmakedefine01 LLVM_ENABLE_REVERSE_ITERATION + /* Allow selectively disabling link-time mismatch checking so that header-only ADT content from LLVM can be used without linking libSupport. */ #if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING diff --git a/interpreter/llvm/src/include/llvm/Config/config.h.cmake b/interpreter/llvm/src/include/llvm/Config/config.h.cmake index a64e208fa7846..1289551f0739a 100644 --- a/interpreter/llvm/src/include/llvm/Config/config.h.cmake +++ b/interpreter/llvm/src/include/llvm/Config/config.h.cmake @@ -350,33 +350,15 @@ /* Host triple LLVM will be executed on */ #cmakedefine LLVM_HOST_TRIPLE "${LLVM_HOST_TRIPLE}" -/* LLVM architecture name for the native architecture, if available */ -#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH} - -/* LLVM name for the native AsmParser init function, if available */ -#cmakedefine LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser - -/* LLVM name for the native AsmPrinter init function, if available */ -#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter - -/* LLVM name for the native Disassembler init function, if available */ -#cmakedefine LLVM_NATIVE_DISASSEMBLER LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler - -/* LLVM name for the native Target init function, if available */ -#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target - -/* LLVM name for the native TargetInfo init function, if available */ -#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo - -/* LLVM name for the native target MC init function, if available */ -#cmakedefine LLVM_NATIVE_TARGETMC LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC - /* Define if this is Unixish platform */ #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX} /* Define if this is Win32ish platform */ #cmakedefine LLVM_ON_WIN32 ${LLVM_ON_WIN32} +/* Define if overriding target triple is enabled */ +#cmakedefine LLVM_TARGET_TRIPLE_ENV "${LLVM_TARGET_TRIPLE_ENV}" + /* Define if we have the Intel JIT API runtime support library */ #cmakedefine01 LLVM_USE_INTEL_JITEVENTS diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVRecord.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVRecord.h index ac8aaafeadc1b..44040e04388af 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -14,6 +14,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -26,12 +27,19 @@ namespace codeview { template class CVRecord { public: - CVRecord() = default; + CVRecord() : Type(static_cast(0)) {} + CVRecord(Kind K, ArrayRef Data) : Type(K), RecordData(Data) {} + bool valid() const { return Type != static_cast(0); } + uint32_t length() const { return RecordData.size(); } Kind kind() const { return Type; } ArrayRef data() const { return RecordData; } + StringRef str_data() const { + return StringRef(reinterpret_cast(RecordData.data()), + RecordData.size()); + } ArrayRef content() const { return RecordData.drop_front(sizeof(RecordPrefix)); @@ -46,14 +54,19 @@ template class CVRecord { Optional Hash; }; +template struct RemappedRecord { + explicit RemappedRecord(const CVRecord &R) : OriginalRecord(R) {} + + CVRecord OriginalRecord; + SmallVector, 8> Mappings; +}; + } // end namespace codeview template struct VarStreamArrayExtractor> { - typedef void ContextType; - - static Error extract(BinaryStreamRef Stream, uint32_t &Len, - codeview::CVRecord &Item) { + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CVRecord &Item) { using namespace codeview; const RecordPrefix *Prefix = nullptr; BinaryStreamReader Reader(Stream); diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h index b2d3f5ea34a84..7c8cd121751a2 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h @@ -25,7 +25,9 @@ class CVSymbolVisitor { CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks); Error visitSymbolRecord(CVSymbol &Record); + Error visitSymbolRecord(CVSymbol &Record, uint32_t Offset); Error visitSymbolStream(const CVSymbolArray &Symbols); + Error visitSymbolStream(const CVSymbolArray &Symbols, uint32_t InitialOffset); private: SymbolVisitorCallbacks &Callbacks; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h index f3122f0bf7f0f..df55e181364ca 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h @@ -10,42 +10,42 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H #define LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeServerHandler.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" namespace llvm { namespace codeview { +class TypeCollection; +class TypeVisitorCallbacks; + +enum VisitorDataSource { + VDS_BytesPresent, // The record bytes are passed into the the visitation + // function. The algorithm should first deserialize them + // before passing them on through the pipeline. + VDS_BytesExternal // The record bytes are not present, and it is the + // responsibility of the visitor callback interface to + // supply the bytes. +}; -class CVTypeVisitor { -public: - explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks); - - void addTypeServerHandler(TypeServerHandler &Handler); - - Error visitTypeRecord(CVType &Record, TypeIndex Index); - Error visitTypeRecord(CVType &Record); - Error visitMemberRecord(CVMemberRecord &Record); - - /// Visits the type records in Data. Sets the error flag on parse failures. - Error visitTypeStream(const CVTypeArray &Types); - Error visitTypeStream(CVTypeRange Types); - - Error visitFieldListMemberStream(ArrayRef FieldList); - Error visitFieldListMemberStream(BinaryStreamReader Reader); +Error visitTypeRecord(CVType &Record, TypeIndex Index, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent); +Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent); -private: - Expected handleTypeServer(CVType &Record); - Error finishVisitation(CVType &Record); +Error visitMemberRecord(CVMemberRecord Record, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent); +Error visitMemberRecord(TypeLeafKind Kind, ArrayRef Record, + TypeVisitorCallbacks &Callbacks); - /// The interface to the class that gets notified of each visitation. - TypeVisitorCallbacks &Callbacks; +Error visitMemberRecordStream(ArrayRef FieldList, + TypeVisitorCallbacks &Callbacks); - TinyPtrVector Handlers; -}; +Error visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent); +Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks); +Error visitTypeStream(TypeCollection &Types, TypeVisitorCallbacks &Callbacks); } // end namespace codeview } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeView.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeView.h index f881ad0c9d805..b7a7e33abadf8 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeView.h @@ -6,6 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// Defines constants and basic types describing CodeView debug information. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H #define LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H @@ -13,6 +17,8 @@ #include #include +#include "llvm/Support/Endian.h" + namespace llvm { namespace codeview { @@ -20,28 +26,28 @@ namespace codeview { /// documentation and headers talk about this as the "leaf" type. enum class TypeRecordKind : uint16_t { #define TYPE_RECORD(lf_ename, value, name) name = value, -#include "TypeRecords.def" +#include "CodeViewTypes.def" }; /// Duplicate copy of the above enum, but using the official CV names. Useful /// for reference purposes and when dealing with unknown record types. enum TypeLeafKind : uint16_t { #define CV_TYPE(name, val) name = val, -#include "TypeRecords.def" +#include "CodeViewTypes.def" }; /// Distinguishes individual records in the Symbols subsection of a .debug$S /// section. Equivalent to SYM_ENUM_e in cvinfo.h. enum class SymbolRecordKind : uint16_t { #define SYMBOL_RECORD(lf_ename, value, name) name = value, -#include "CVSymbolTypes.def" +#include "CodeViewSymbols.def" }; /// Duplicate copy of the above enum, but using the official CV names. Useful /// for reference purposes and when dealing with unknown record types. enum SymbolKind : uint16_t { #define CV_SYMBOL(name, val) name = val, -#include "CVSymbolTypes.def" +#include "CodeViewSymbols.def" }; #define CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class) \ @@ -278,7 +284,7 @@ CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(MethodOptions) /// Equivalent to CV_LABEL_TYPE_e. enum class LabelType : uint16_t { Near = 0x0, - Far = 0x4, + Far = 0x4, }; /// Equivalent to CV_modifier_t. @@ -291,7 +297,7 @@ enum class ModifierOptions : uint16_t { }; CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ModifierOptions) -enum class ModuleDebugFragmentKind : uint32_t { +enum class DebugSubsectionKind : uint32_t { None = 0, Symbols = 0xf1, Lines = 0xf2, @@ -396,6 +402,16 @@ enum class LocalSymFlags : uint16_t { }; CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(LocalSymFlags) +/// Corresponds to the CV_PUBSYMFLAGS bitfield. +enum class PublicSymFlags : uint32_t { + None = 0, + Code = 1 << 0, + Function = 1 << 1, + Managed = 1 << 2, + MSIL = 1 << 3, +}; +CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(PublicSymFlags) + /// Corresponds to the CV_PROCFLAGS bitfield. enum class ProcSymFlags : uint8_t { None = 0, @@ -412,6 +428,8 @@ CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ProcSymFlags) /// Corresponds to COMPILESYM2::Flags bitfield. enum class CompileSym2Flags : uint32_t { + None = 0, + SourceLanguageMask = 0xFF, EC = 1 << 8, NoDbgInfo = 1 << 9, LTCG = 1 << 10, @@ -426,6 +444,8 @@ CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(CompileSym2Flags) /// Corresponds to COMPILESYM3::Flags bitfield. enum class CompileSym3Flags : uint32_t { + None = 0, + SourceLanguageMask = 0xFF, EC = 1 << 8, NoDbgInfo = 1 << 9, LTCG = 1 << 10, @@ -442,6 +462,7 @@ enum class CompileSym3Flags : uint32_t { CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(CompileSym3Flags) enum class ExportFlags : uint16_t { + None = 0, IsConstant = 1 << 0, IsData = 1 << 1, IsPrivate = 1 << 2, @@ -550,6 +571,50 @@ enum LineFlags : uint16_t { LF_None = 0, LF_HaveColumns = 1, // CV_LINES_HAVE_COLUMNS }; + +/// Data in the the SUBSEC_FRAMEDATA subection. +struct FrameData { + support::ulittle32_t RvaStart; + support::ulittle32_t CodeSize; + support::ulittle32_t LocalSize; + support::ulittle32_t ParamsSize; + support::ulittle32_t MaxStackSize; + support::ulittle32_t FrameFunc; + support::ulittle16_t PrologSize; + support::ulittle16_t SavedRegsSize; + support::ulittle32_t Flags; + enum : uint32_t { + HasSEH = 1 << 0, + HasEH = 1 << 1, + IsFunctionStart = 1 << 2, + }; +}; + +// Corresponds to LocalIdAndGlobalIdPair structure. +// This structure information allows cross-referencing between PDBs. For +// example, when a PDB is being built during compilation it is not yet known +// what other modules may end up in the PDB at link time. So certain types of +// IDs may clash between the various compile time PDBs. For each affected +// module, a subsection would be put into the PDB containing a mapping from its +// local IDs to a single ID namespace for all items in the PDB file. +struct CrossModuleExport { + support::ulittle32_t Local; + support::ulittle32_t Global; +}; + +struct CrossModuleImport { + support::ulittle32_t ModuleNameOffset; + support::ulittle32_t Count; // Number of elements + // support::ulittle32_t ids[Count]; // id from referenced module +}; + +enum class CodeViewContainer { ObjectFile, Pdb }; + +inline uint32_t alignOf(CodeViewContainer Container) { + if (Container == CodeViewContainer::ObjectFile) + return 1; + return 4; +} } } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index b3976826a316c..94f104ff772c2 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -84,7 +84,7 @@ class CodeViewRecordIO { Error mapEncodedInteger(uint64_t &Value); Error mapEncodedInteger(APSInt &Value); Error mapStringZ(StringRef &Value); - Error mapGuid(StringRef &Guid); + Error mapGuid(GUID &Guid); Error mapStringZVectorZ(std::vector &Value); @@ -136,6 +136,7 @@ class CodeViewRecordIO { Error mapByteVectorTail(ArrayRef &Bytes); Error mapByteVectorTail(std::vector &Bytes); + Error padToAlignment(uint32_t Align); Error skipPadding(); private: diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def new file mode 100644 index 0000000000000..32813d861d909 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def @@ -0,0 +1,258 @@ +//===-- CVLeafTypes.def - All CodeView leaf types ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// See LEAF_ENUM_e in cvinfo.h. This should match the constants there. +// +//===----------------------------------------------------------------------===// + +#ifndef CV_SYMBOL +#define CV_SYMBOL(ename, value) +#endif + +#ifndef SYMBOL_RECORD +#define SYMBOL_RECORD(lf_ename, value, name) CV_SYMBOL(lf_ename, value) +#endif + +#ifndef SYMBOL_RECORD_ALIAS +#define SYMBOL_RECORD_ALIAS(lf_ename, value, name, alias_name) \ + SYMBOL_RECORD(lf_ename, value, name) +#endif + +// 16 bit symbol types. Not very useful, provided only for reference. +CV_SYMBOL(S_COMPILE , 0x0001) +CV_SYMBOL(S_REGISTER_16t , 0x0002) +CV_SYMBOL(S_CONSTANT_16t , 0x0003) +CV_SYMBOL(S_UDT_16t , 0x0004) +CV_SYMBOL(S_SSEARCH , 0x0005) +CV_SYMBOL(S_SKIP , 0x0007) +CV_SYMBOL(S_CVRESERVE , 0x0008) +CV_SYMBOL(S_OBJNAME_ST , 0x0009) +CV_SYMBOL(S_ENDARG , 0x000a) +CV_SYMBOL(S_COBOLUDT_16t , 0x000b) +CV_SYMBOL(S_MANYREG_16t , 0x000c) +CV_SYMBOL(S_RETURN , 0x000d) +CV_SYMBOL(S_ENTRYTHIS , 0x000e) +CV_SYMBOL(S_BPREL16 , 0x0100) +CV_SYMBOL(S_LDATA16 , 0x0101) +CV_SYMBOL(S_GDATA16 , 0x0102) +CV_SYMBOL(S_PUB16 , 0x0103) +CV_SYMBOL(S_LPROC16 , 0x0104) +CV_SYMBOL(S_GPROC16 , 0x0105) +CV_SYMBOL(S_THUNK16 , 0x0106) +CV_SYMBOL(S_BLOCK16 , 0x0107) +CV_SYMBOL(S_WITH16 , 0x0108) +CV_SYMBOL(S_LABEL16 , 0x0109) +CV_SYMBOL(S_CEXMODEL16 , 0x010a) +CV_SYMBOL(S_VFTABLE16 , 0x010b) +CV_SYMBOL(S_REGREL16 , 0x010c) +CV_SYMBOL(S_BPREL32_16t , 0x0200) +CV_SYMBOL(S_LDATA32_16t , 0x0201) +CV_SYMBOL(S_GDATA32_16t , 0x0202) +CV_SYMBOL(S_PUB32_16t , 0x0203) +CV_SYMBOL(S_LPROC32_16t , 0x0204) +CV_SYMBOL(S_GPROC32_16t , 0x0205) +CV_SYMBOL(S_THUNK32_ST , 0x0206) +CV_SYMBOL(S_BLOCK32_ST , 0x0207) +CV_SYMBOL(S_WITH32_ST , 0x0208) +CV_SYMBOL(S_LABEL32_ST , 0x0209) +CV_SYMBOL(S_CEXMODEL32 , 0x020a) +CV_SYMBOL(S_VFTABLE32_16t , 0x020b) +CV_SYMBOL(S_REGREL32_16t , 0x020c) +CV_SYMBOL(S_LTHREAD32_16t , 0x020d) +CV_SYMBOL(S_GTHREAD32_16t , 0x020e) +CV_SYMBOL(S_SLINK32 , 0x020f) +CV_SYMBOL(S_LPROCMIPS_16t , 0x0300) +CV_SYMBOL(S_GPROCMIPS_16t , 0x0301) +CV_SYMBOL(S_PROCREF_ST , 0x0400) +CV_SYMBOL(S_DATAREF_ST , 0x0401) +CV_SYMBOL(S_ALIGN , 0x0402) +CV_SYMBOL(S_LPROCREF_ST , 0x0403) +CV_SYMBOL(S_OEM , 0x0404) + +// All post 16 bit symbol types have the 0x1000 bit set. +CV_SYMBOL(S_TI16_MAX , 0x1000) + +// Mostly unused "start" symbol types. +CV_SYMBOL(S_REGISTER_ST , 0x1001) +CV_SYMBOL(S_CONSTANT_ST , 0x1002) +CV_SYMBOL(S_UDT_ST , 0x1003) +CV_SYMBOL(S_COBOLUDT_ST , 0x1004) +CV_SYMBOL(S_MANYREG_ST , 0x1005) +CV_SYMBOL(S_BPREL32_ST , 0x1006) +CV_SYMBOL(S_LDATA32_ST , 0x1007) +CV_SYMBOL(S_GDATA32_ST , 0x1008) +CV_SYMBOL(S_PUB32_ST , 0x1009) +CV_SYMBOL(S_LPROC32_ST , 0x100a) +CV_SYMBOL(S_GPROC32_ST , 0x100b) +CV_SYMBOL(S_VFTABLE32 , 0x100c) +CV_SYMBOL(S_REGREL32_ST , 0x100d) +CV_SYMBOL(S_LTHREAD32_ST , 0x100e) +CV_SYMBOL(S_GTHREAD32_ST , 0x100f) +CV_SYMBOL(S_LPROCMIPS_ST , 0x1010) +CV_SYMBOL(S_GPROCMIPS_ST , 0x1011) + +CV_SYMBOL(S_COMPILE2_ST , 0x1013) +CV_SYMBOL(S_MANYREG2_ST , 0x1014) +CV_SYMBOL(S_LPROCIA64_ST , 0x1015) +CV_SYMBOL(S_GPROCIA64_ST , 0x1016) +CV_SYMBOL(S_LOCALSLOT_ST , 0x1017) +CV_SYMBOL(S_PARAMSLOT_ST , 0x1018) +CV_SYMBOL(S_ANNOTATION , 0x1019) +CV_SYMBOL(S_GMANPROC_ST , 0x101a) +CV_SYMBOL(S_LMANPROC_ST , 0x101b) +CV_SYMBOL(S_RESERVED1 , 0x101c) +CV_SYMBOL(S_RESERVED2 , 0x101d) +CV_SYMBOL(S_RESERVED3 , 0x101e) +CV_SYMBOL(S_RESERVED4 , 0x101f) +CV_SYMBOL(S_LMANDATA_ST , 0x1020) +CV_SYMBOL(S_GMANDATA_ST , 0x1021) +CV_SYMBOL(S_MANFRAMEREL_ST, 0x1022) +CV_SYMBOL(S_MANREGISTER_ST, 0x1023) +CV_SYMBOL(S_MANSLOT_ST , 0x1024) +CV_SYMBOL(S_MANMANYREG_ST , 0x1025) +CV_SYMBOL(S_MANREGREL_ST , 0x1026) +CV_SYMBOL(S_MANMANYREG2_ST, 0x1027) +CV_SYMBOL(S_MANTYPREF , 0x1028) +CV_SYMBOL(S_UNAMESPACE_ST , 0x1029) + +// End of S_*_ST symbols, which do not appear to be generated by modern +// compilers. +CV_SYMBOL(S_ST_MAX , 0x1100) + + +CV_SYMBOL(S_WITH32 , 0x1104) +CV_SYMBOL(S_MANYREG , 0x110a) +CV_SYMBOL(S_LPROCMIPS , 0x1114) +CV_SYMBOL(S_GPROCMIPS , 0x1115) +CV_SYMBOL(S_MANYREG2 , 0x1117) +CV_SYMBOL(S_LPROCIA64 , 0x1118) +CV_SYMBOL(S_GPROCIA64 , 0x1119) +CV_SYMBOL(S_LOCALSLOT , 0x111a) +CV_SYMBOL(S_PARAMSLOT , 0x111b) + +// Managed code symbols. +CV_SYMBOL(S_MANFRAMEREL , 0x111e) +CV_SYMBOL(S_MANREGISTER , 0x111f) +CV_SYMBOL(S_MANSLOT , 0x1120) +CV_SYMBOL(S_MANMANYREG , 0x1121) +CV_SYMBOL(S_MANREGREL , 0x1122) +CV_SYMBOL(S_MANMANYREG2 , 0x1123) +CV_SYMBOL(S_UNAMESPACE , 0x1124) +CV_SYMBOL(S_DATAREF , 0x1126) +CV_SYMBOL(S_ANNOTATIONREF , 0x1128) +CV_SYMBOL(S_TOKENREF , 0x1129) +CV_SYMBOL(S_GMANPROC , 0x112a) +CV_SYMBOL(S_LMANPROC , 0x112b) +CV_SYMBOL(S_ATTR_FRAMEREL , 0x112e) +CV_SYMBOL(S_ATTR_REGISTER , 0x112f) +CV_SYMBOL(S_ATTR_REGREL , 0x1130) +CV_SYMBOL(S_ATTR_MANYREG , 0x1131) + + +CV_SYMBOL(S_SEPCODE , 0x1132) +CV_SYMBOL(S_LOCAL_2005 , 0x1133) +CV_SYMBOL(S_DEFRANGE_2005 , 0x1134) +CV_SYMBOL(S_DEFRANGE2_2005, 0x1135) +CV_SYMBOL(S_DISCARDED , 0x113b) + +// Current symbol types for most procedures as of this writing. +CV_SYMBOL(S_LPROCMIPS_ID , 0x1148) +CV_SYMBOL(S_GPROCMIPS_ID , 0x1149) +CV_SYMBOL(S_LPROCIA64_ID , 0x114a) +CV_SYMBOL(S_GPROCIA64_ID , 0x114b) + +CV_SYMBOL(S_DEFRANGE_HLSL , 0x1150) +CV_SYMBOL(S_GDATA_HLSL , 0x1151) +CV_SYMBOL(S_LDATA_HLSL , 0x1152) +CV_SYMBOL(S_LOCAL_DPC_GROUPSHARED, 0x1154) +CV_SYMBOL(S_DEFRANGE_DPC_PTR_TAG, 0x1157) +CV_SYMBOL(S_DPC_SYM_TAG_MAP, 0x1158) +CV_SYMBOL(S_ARMSWITCHTABLE , 0x1159) +CV_SYMBOL(S_POGODATA , 0x115c) +CV_SYMBOL(S_INLINESITE2 , 0x115d) +CV_SYMBOL(S_MOD_TYPEREF , 0x115f) +CV_SYMBOL(S_REF_MINIPDB , 0x1160) +CV_SYMBOL(S_PDBMAP , 0x1161) +CV_SYMBOL(S_GDATA_HLSL32 , 0x1162) +CV_SYMBOL(S_LDATA_HLSL32 , 0x1163) +CV_SYMBOL(S_GDATA_HLSL32_EX, 0x1164) +CV_SYMBOL(S_LDATA_HLSL32_EX, 0x1165) + +// Known symbol types +SYMBOL_RECORD(S_END , 0x0006, ScopeEndSym) +SYMBOL_RECORD_ALIAS(S_INLINESITE_END , 0x114e, InlineSiteEnd, ScopeEndSym) +SYMBOL_RECORD_ALIAS(S_PROC_ID_END , 0x114f, ProcEnd, ScopeEndSym) + +SYMBOL_RECORD(S_THUNK32 , 0x1102, Thunk32Sym) +SYMBOL_RECORD(S_TRAMPOLINE , 0x112c, TrampolineSym) +SYMBOL_RECORD(S_SECTION , 0x1136, SectionSym) +SYMBOL_RECORD(S_COFFGROUP , 0x1137, CoffGroupSym) +SYMBOL_RECORD(S_EXPORT , 0x1138, ExportSym) + +SYMBOL_RECORD(S_LPROC32 , 0x110f, ProcSym) +SYMBOL_RECORD_ALIAS(S_GPROC32 , 0x1110, GlobalProcSym, ProcSym) +SYMBOL_RECORD_ALIAS(S_LPROC32_ID , 0x1146, ProcIdSym, ProcSym) +SYMBOL_RECORD_ALIAS(S_GPROC32_ID , 0x1147, GlobalProcIdSym, ProcSym) +SYMBOL_RECORD_ALIAS(S_LPROC32_DPC , 0x1155, DPCProcSym, ProcSym) +SYMBOL_RECORD_ALIAS(S_LPROC32_DPC_ID , 0x1156, DPCProcIdSym, ProcSym) + +SYMBOL_RECORD(S_REGISTER , 0x1106, RegisterSym) +SYMBOL_RECORD(S_PUB32 , 0x110e, PublicSym32) + +SYMBOL_RECORD(S_PROCREF , 0x1125, ProcRefSym) +SYMBOL_RECORD_ALIAS(S_LPROCREF, 0x1127, LocalProcRef, ProcRefSym) + + +SYMBOL_RECORD(S_ENVBLOCK , 0x113d, EnvBlockSym) + +SYMBOL_RECORD(S_INLINESITE , 0x114d, InlineSiteSym) +SYMBOL_RECORD(S_LOCAL , 0x113e, LocalSym) +SYMBOL_RECORD(S_DEFRANGE , 0x113f, DefRangeSym) +SYMBOL_RECORD(S_DEFRANGE_SUBFIELD, 0x1140, DefRangeSubfieldSym) +SYMBOL_RECORD(S_DEFRANGE_REGISTER, 0x1141, DefRangeRegisterSym) +SYMBOL_RECORD(S_DEFRANGE_FRAMEPOINTER_REL, 0x1142, DefRangeFramePointerRelSym) +SYMBOL_RECORD(S_DEFRANGE_SUBFIELD_REGISTER, 0x1143, DefRangeSubfieldRegisterSym) +SYMBOL_RECORD(S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE, 0x1144, DefRangeFramePointerRelFullScopeSym) +SYMBOL_RECORD(S_DEFRANGE_REGISTER_REL, 0x1145, DefRangeRegisterRelSym) +SYMBOL_RECORD(S_BLOCK32 , 0x1103, BlockSym) +SYMBOL_RECORD(S_LABEL32 , 0x1105, LabelSym) +SYMBOL_RECORD(S_OBJNAME , 0x1101, ObjNameSym) +SYMBOL_RECORD(S_COMPILE2 , 0x1116, Compile2Sym) +SYMBOL_RECORD(S_COMPILE3 , 0x113c, Compile3Sym) +SYMBOL_RECORD(S_FRAMEPROC , 0x1012, FrameProcSym) +SYMBOL_RECORD(S_CALLSITEINFO , 0x1139, CallSiteInfoSym) +SYMBOL_RECORD(S_FILESTATIC , 0x1153, FileStaticSym) +SYMBOL_RECORD(S_HEAPALLOCSITE , 0x115e, HeapAllocationSiteSym) +SYMBOL_RECORD(S_FRAMECOOKIE , 0x113a, FrameCookieSym) + +SYMBOL_RECORD(S_CALLEES , 0x115a, CallerSym) +SYMBOL_RECORD_ALIAS(S_CALLERS , 0x115b, CalleeSym, CallerSym) + +SYMBOL_RECORD(S_UDT , 0x1108, UDTSym) +SYMBOL_RECORD_ALIAS(S_COBOLUDT , 0x1109, CobolUDT, UDTSym) + +SYMBOL_RECORD(S_BUILDINFO , 0x114c, BuildInfoSym) +SYMBOL_RECORD(S_BPREL32 , 0x110b, BPRelativeSym) +SYMBOL_RECORD(S_REGREL32 , 0x1111, RegRelativeSym) + +SYMBOL_RECORD(S_CONSTANT , 0x1107, ConstantSym) +SYMBOL_RECORD_ALIAS(S_MANCONSTANT , 0x112d, ManagedConstant, ConstantSym) + +SYMBOL_RECORD(S_LDATA32 , 0x110c, DataSym) +SYMBOL_RECORD_ALIAS(S_GDATA32 , 0x110d, GlobalData, DataSym) +SYMBOL_RECORD_ALIAS(S_LMANDATA , 0x111c, ManagedLocalData, DataSym) +SYMBOL_RECORD_ALIAS(S_GMANDATA , 0x111d, ManagedGlobalData, DataSym) + +SYMBOL_RECORD(S_LTHREAD32 , 0x1112, ThreadLocalDataSym) +SYMBOL_RECORD_ALIAS(S_GTHREAD32 , 0x1113, GlobalTLS, ThreadLocalDataSym) + + +#undef CV_SYMBOL +#undef SYMBOL_RECORD +#undef SYMBOL_RECORD_ALIAS diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewTypes.def b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewTypes.def new file mode 100644 index 0000000000000..8c193bb13cb7e --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/CodeViewTypes.def @@ -0,0 +1,251 @@ + +//===-- CVLeafTypes.def - All CodeView leaf types ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// See LEAF_ENUM_e in cvinfo.h. This should match the constants there. +// +//===----------------------------------------------------------------------===// + +// If the type is known, then we have a record describing it in TypeRecord.h. + +#ifndef CV_TYPE +#define CV_TYPE(lf_ename, value) +#endif + +// If the type is known, then we have a record describing it in TypeRecord.h. +#ifndef TYPE_RECORD +#define TYPE_RECORD(lf_ename, value, name) CV_TYPE(lf_ename, value) +#endif + +#ifndef TYPE_RECORD_ALIAS +#define TYPE_RECORD_ALIAS(lf_ename, value, name, alias_name) \ + TYPE_RECORD(lf_ename, value, name) +#endif + +#ifndef MEMBER_RECORD +#define MEMBER_RECORD(lf_ename, value, name) TYPE_RECORD(lf_ename, value, name) +#endif + +#ifndef MEMBER_RECORD_ALIAS +#define MEMBER_RECORD_ALIAS(lf_ename, value, name, alias_name) \ + MEMBER_RECORD(lf_ename, value, name) +#endif + +TYPE_RECORD(LF_POINTER, 0x1002, Pointer) +TYPE_RECORD(LF_MODIFIER, 0x1001, Modifier) +TYPE_RECORD(LF_PROCEDURE, 0x1008, Procedure) +TYPE_RECORD(LF_MFUNCTION, 0x1009, MemberFunction) +TYPE_RECORD(LF_LABEL, 0x000e, Label) +TYPE_RECORD(LF_ARGLIST, 0x1201, ArgList) + +TYPE_RECORD(LF_FIELDLIST, 0x1203, FieldList) + +TYPE_RECORD(LF_ARRAY, 0x1503, Array) +TYPE_RECORD(LF_CLASS, 0x1504, Class) +TYPE_RECORD_ALIAS(LF_STRUCTURE, 0x1505, Struct, Class) +TYPE_RECORD_ALIAS(LF_INTERFACE, 0x1519, Interface, Class) +TYPE_RECORD(LF_UNION, 0x1506, Union) +TYPE_RECORD(LF_ENUM, 0x1507, Enum) +TYPE_RECORD(LF_TYPESERVER2, 0x1515, TypeServer2) +TYPE_RECORD(LF_VFTABLE, 0x151d, VFTable) +TYPE_RECORD(LF_VTSHAPE, 0x000a, VFTableShape) + +TYPE_RECORD(LF_BITFIELD, 0x1205, BitField) + +// Member type records. These are generally not length prefixed, and appear +// inside of a field list record. +MEMBER_RECORD(LF_BCLASS, 0x1400, BaseClass) +MEMBER_RECORD_ALIAS(LF_BINTERFACE, 0x151a, BaseInterface, BaseClass) + +MEMBER_RECORD(LF_VBCLASS, 0x1401, VirtualBaseClass) +MEMBER_RECORD_ALIAS(LF_IVBCLASS, 0x1402, IndirectVirtualBaseClass, + VirtualBaseClass) + +MEMBER_RECORD(LF_VFUNCTAB, 0x1409, VFPtr) +MEMBER_RECORD(LF_STMEMBER, 0x150e, StaticDataMember) +MEMBER_RECORD(LF_METHOD, 0x150f, OverloadedMethod) +MEMBER_RECORD(LF_MEMBER, 0x150d, DataMember) +MEMBER_RECORD(LF_NESTTYPE, 0x1510, NestedType) +MEMBER_RECORD(LF_ONEMETHOD, 0x1511, OneMethod) +MEMBER_RECORD(LF_ENUMERATE, 0x1502, Enumerator) +MEMBER_RECORD(LF_INDEX, 0x1404, ListContinuation) + +// ID leaf records. Subsequent leaf types may be referenced from .debug$S. +TYPE_RECORD(LF_FUNC_ID, 0x1601, FuncId) +TYPE_RECORD(LF_MFUNC_ID, 0x1602, MemberFuncId) +TYPE_RECORD(LF_BUILDINFO, 0x1603, BuildInfo) +TYPE_RECORD(LF_SUBSTR_LIST, 0x1604, StringList) +TYPE_RECORD(LF_STRING_ID, 0x1605, StringId) +TYPE_RECORD(LF_UDT_SRC_LINE, 0x1606, UdtSourceLine) +TYPE_RECORD(LF_UDT_MOD_SRC_LINE, 0x1607, UdtModSourceLine) + + +TYPE_RECORD(LF_METHODLIST, 0x1206, MethodOverloadList) + + +// 16 bit type records. +CV_TYPE(LF_MODIFIER_16t, 0x0001) +CV_TYPE(LF_POINTER_16t, 0x0002) +CV_TYPE(LF_ARRAY_16t, 0x0003) +CV_TYPE(LF_CLASS_16t, 0x0004) +CV_TYPE(LF_STRUCTURE_16t, 0x0005) +CV_TYPE(LF_UNION_16t, 0x0006) +CV_TYPE(LF_ENUM_16t, 0x0007) +CV_TYPE(LF_PROCEDURE_16t, 0x0008) +CV_TYPE(LF_MFUNCTION_16t, 0x0009) +CV_TYPE(LF_COBOL0_16t, 0x000b) +CV_TYPE(LF_COBOL1, 0x000c) +CV_TYPE(LF_BARRAY_16t, 0x000d) +CV_TYPE(LF_NULLLEAF, 0x000f) // LF_NULL +CV_TYPE(LF_NOTTRAN, 0x0010) +CV_TYPE(LF_DIMARRAY_16t, 0x0011) +CV_TYPE(LF_VFTPATH_16t, 0x0012) +CV_TYPE(LF_PRECOMP_16t, 0x0013) +CV_TYPE(LF_ENDPRECOMP, 0x0014) +CV_TYPE(LF_OEM_16t, 0x0015) +CV_TYPE(LF_TYPESERVER_ST, 0x0016) + +CV_TYPE(LF_SKIP_16t, 0x0200) +CV_TYPE(LF_ARGLIST_16t, 0x0201) +CV_TYPE(LF_DEFARG_16t, 0x0202) +CV_TYPE(LF_LIST, 0x0203) +CV_TYPE(LF_FIELDLIST_16t, 0x0204) +CV_TYPE(LF_DERIVED_16t, 0x0205) +CV_TYPE(LF_BITFIELD_16t, 0x0206) +CV_TYPE(LF_METHODLIST_16t, 0x0207) +CV_TYPE(LF_DIMCONU_16t, 0x0208) +CV_TYPE(LF_DIMCONLU_16t, 0x0209) +CV_TYPE(LF_DIMVARU_16t, 0x020a) +CV_TYPE(LF_DIMVARLU_16t, 0x020b) +CV_TYPE(LF_REFSYM, 0x020c) + +// 16 bit member types. Generally not length prefixed. +CV_TYPE(LF_BCLASS_16t, 0x0400) +CV_TYPE(LF_VBCLASS_16t, 0x0401) +CV_TYPE(LF_IVBCLASS_16t, 0x0402) +CV_TYPE(LF_ENUMERATE_ST, 0x0403) +CV_TYPE(LF_FRIENDFCN_16t, 0x0404) +CV_TYPE(LF_INDEX_16t, 0x0405) +CV_TYPE(LF_MEMBER_16t, 0x0406) +CV_TYPE(LF_STMEMBER_16t, 0x0407) +CV_TYPE(LF_METHOD_16t, 0x0408) +CV_TYPE(LF_NESTTYPE_16t, 0x0409) +CV_TYPE(LF_VFUNCTAB_16t, 0x040a) +CV_TYPE(LF_FRIENDCLS_16t, 0x040b) +CV_TYPE(LF_ONEMETHOD_16t, 0x040c) +CV_TYPE(LF_VFUNCOFF_16t, 0x040d) + +CV_TYPE(LF_TI16_MAX, 0x1000) + +CV_TYPE(LF_ARRAY_ST, 0x1003) +CV_TYPE(LF_CLASS_ST, 0x1004) +CV_TYPE(LF_STRUCTURE_ST, 0x1005) +CV_TYPE(LF_UNION_ST, 0x1006) +CV_TYPE(LF_ENUM_ST, 0x1007) +CV_TYPE(LF_COBOL0, 0x100a) +CV_TYPE(LF_BARRAY, 0x100b) +CV_TYPE(LF_DIMARRAY_ST, 0x100c) +CV_TYPE(LF_VFTPATH, 0x100d) +CV_TYPE(LF_PRECOMP_ST, 0x100e) +CV_TYPE(LF_OEM, 0x100f) +CV_TYPE(LF_ALIAS_ST, 0x1010) +CV_TYPE(LF_OEM2, 0x1011) + +CV_TYPE(LF_SKIP, 0x1200) +CV_TYPE(LF_DEFARG_ST, 0x1202) +CV_TYPE(LF_DERIVED, 0x1204) +CV_TYPE(LF_DIMCONU, 0x1207) +CV_TYPE(LF_DIMCONLU, 0x1208) +CV_TYPE(LF_DIMVARU, 0x1209) +CV_TYPE(LF_DIMVARLU, 0x120a) + +// Member type records. These are generally not length prefixed, and appear +// inside of a field list record. +CV_TYPE(LF_FRIENDFCN_ST, 0x1403) +CV_TYPE(LF_MEMBER_ST, 0x1405) +CV_TYPE(LF_STMEMBER_ST, 0x1406) +CV_TYPE(LF_METHOD_ST, 0x1407) +CV_TYPE(LF_NESTTYPE_ST, 0x1408) +CV_TYPE(LF_FRIENDCLS, 0x140a) +CV_TYPE(LF_ONEMETHOD_ST, 0x140b) +CV_TYPE(LF_VFUNCOFF, 0x140c) +CV_TYPE(LF_NESTTYPEEX_ST, 0x140d) +CV_TYPE(LF_MEMBERMODIFY_ST, 0x140e) +CV_TYPE(LF_MANAGED_ST, 0x140f) + +CV_TYPE(LF_ST_MAX, 0x1500) +CV_TYPE(LF_TYPESERVER, 0x1501) +CV_TYPE(LF_DIMARRAY, 0x1508) +CV_TYPE(LF_PRECOMP, 0x1509) +CV_TYPE(LF_ALIAS, 0x150a) +CV_TYPE(LF_DEFARG, 0x150b) +CV_TYPE(LF_FRIENDFCN, 0x150c) +CV_TYPE(LF_NESTTYPEEX, 0x1512) +CV_TYPE(LF_MEMBERMODIFY, 0x1513) +CV_TYPE(LF_MANAGED, 0x1514) +CV_TYPE(LF_STRIDED_ARRAY, 0x1516) +CV_TYPE(LF_HLSL, 0x1517) +CV_TYPE(LF_MODIFIER_EX, 0x1518) +CV_TYPE(LF_VECTOR, 0x151b) +CV_TYPE(LF_MATRIX, 0x151c) + +// ID leaf records. Subsequent leaf types may be referenced from .debug$S. + +// Numeric leaf types. These are generally contained in other records, and not +// encountered in the main type stream. + +CV_TYPE(LF_NUMERIC, 0x8000) +CV_TYPE(LF_CHAR, 0x8000) +CV_TYPE(LF_SHORT, 0x8001) +CV_TYPE(LF_USHORT, 0x8002) +CV_TYPE(LF_LONG, 0x8003) +CV_TYPE(LF_ULONG, 0x8004) +CV_TYPE(LF_REAL32, 0x8005) +CV_TYPE(LF_REAL64, 0x8006) +CV_TYPE(LF_REAL80, 0x8007) +CV_TYPE(LF_REAL128, 0x8008) +CV_TYPE(LF_QUADWORD, 0x8009) +CV_TYPE(LF_UQUADWORD, 0x800a) +CV_TYPE(LF_REAL48, 0x800b) +CV_TYPE(LF_COMPLEX32, 0x800c) +CV_TYPE(LF_COMPLEX64, 0x800d) +CV_TYPE(LF_COMPLEX80, 0x800e) +CV_TYPE(LF_COMPLEX128, 0x800f) +CV_TYPE(LF_VARSTRING, 0x8010) +CV_TYPE(LF_OCTWORD, 0x8017) +CV_TYPE(LF_UOCTWORD, 0x8018) +CV_TYPE(LF_DECIMAL, 0x8019) +CV_TYPE(LF_DATE, 0x801a) +CV_TYPE(LF_UTF8STRING, 0x801b) +CV_TYPE(LF_REAL16, 0x801c) + +// Padding bytes. These are emitted into alignment bytes in the type stream. + +CV_TYPE(LF_PAD0, 0xf0) +CV_TYPE(LF_PAD1, 0xf1) +CV_TYPE(LF_PAD2, 0xf2) +CV_TYPE(LF_PAD3, 0xf3) +CV_TYPE(LF_PAD4, 0xf4) +CV_TYPE(LF_PAD5, 0xf5) +CV_TYPE(LF_PAD6, 0xf6) +CV_TYPE(LF_PAD7, 0xf7) +CV_TYPE(LF_PAD8, 0xf8) +CV_TYPE(LF_PAD9, 0xf9) +CV_TYPE(LF_PAD10, 0xfa) +CV_TYPE(LF_PAD11, 0xfb) +CV_TYPE(LF_PAD12, 0xfc) +CV_TYPE(LF_PAD13, 0xfd) +CV_TYPE(LF_PAD14, 0xfe) +CV_TYPE(LF_PAD15, 0xff) + +#undef CV_TYPE +#undef TYPE_RECORD +#undef TYPE_RECORD_ALIAS +#undef MEMBER_RECORD +#undef MEMBER_RECORD_ALIAS diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h new file mode 100644 index 0000000000000..78b284563afd9 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h @@ -0,0 +1,104 @@ +//===- DebugChecksumsSubsection.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { + +namespace codeview { + +class DebugStringTableSubsection; + +struct FileChecksumEntry { + uint32_t FileNameOffset; // Byte offset of filename in global stringtable. + FileChecksumKind Kind; // The type of checksum. + ArrayRef Checksum; // The bytes of the checksum. +}; + +} // end namespace codeview + +template <> struct VarStreamArrayExtractor { +public: + using ContextType = void; + + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::FileChecksumEntry &Item); +}; + +namespace codeview { + +class DebugChecksumsSubsectionRef final : public DebugSubsectionRef { + using FileChecksumArray = VarStreamArray; + using Iterator = FileChecksumArray::Iterator; + +public: + DebugChecksumsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::FileChecksums) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::FileChecksums; + } + + bool valid() const { return Checksums.valid(); } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return Checksums.begin(); } + Iterator end() const { return Checksums.end(); } + + const FileChecksumArray &getArray() const { return Checksums; } + +private: + FileChecksumArray Checksums; +}; + +class DebugChecksumsSubsection final : public DebugSubsection { +public: + explicit DebugChecksumsSubsection(DebugStringTableSubsection &Strings); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::FileChecksums; + } + + void addChecksum(StringRef FileName, FileChecksumKind Kind, + ArrayRef Bytes); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + uint32_t mapChecksumOffset(StringRef FileName) const; + +private: + DebugStringTableSubsection &Strings; + + DenseMap OffsetMap; + uint32_t SerializedSize = 0; + BumpPtrAllocator Storage; + std::vector Checksums; +}; + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h new file mode 100644 index 0000000000000..2f9e9814d998d --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h @@ -0,0 +1,68 @@ +//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace codeview { + +class DebugCrossModuleExportsSubsectionRef final : public DebugSubsectionRef { + using ReferenceArray = FixedStreamArray; + using Iterator = ReferenceArray::Iterator; + +public: + DebugCrossModuleExportsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CrossScopeExports) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CrossScopeExports; + } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return References.begin(); } + Iterator end() const { return References.end(); } + +private: + FixedStreamArray References; +}; + +class DebugCrossModuleExportsSubsection final : public DebugSubsection { +public: + DebugCrossModuleExportsSubsection() + : DebugSubsection(DebugSubsectionKind::CrossScopeExports) {} + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CrossScopeExports; + } + + void addMapping(uint32_t Local, uint32_t Global); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + +private: + std::map Mappings; +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h new file mode 100644 index 0000000000000..8be7ef265c82e --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h @@ -0,0 +1,95 @@ +//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { + +namespace codeview { + +struct CrossModuleImportItem { + const CrossModuleImport *Header = nullptr; + FixedStreamArray Imports; +}; + +} // end namespace codeview + +template <> struct VarStreamArrayExtractor { +public: + using ContextType = void; + + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CrossModuleImportItem &Item); +}; + +namespace codeview { + +class DebugStringTableSubsection; + +class DebugCrossModuleImportsSubsectionRef final : public DebugSubsectionRef { + using ReferenceArray = VarStreamArray; + using Iterator = ReferenceArray::Iterator; + +public: + DebugCrossModuleImportsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CrossScopeImports) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CrossScopeImports; + } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return References.begin(); } + Iterator end() const { return References.end(); } + +private: + ReferenceArray References; +}; + +class DebugCrossModuleImportsSubsection final : public DebugSubsection { +public: + explicit DebugCrossModuleImportsSubsection( + DebugStringTableSubsection &Strings) + : DebugSubsection(DebugSubsectionKind::CrossScopeImports), + Strings(Strings) {} + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CrossScopeImports; + } + + void addImport(StringRef Module, uint32_t ImportId); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + +private: + DebugStringTableSubsection &Strings; + StringMap> Mappings; +}; + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h new file mode 100644 index 0000000000000..1e329c7c3f141 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h @@ -0,0 +1,60 @@ +//===- DebugFrameDataSubsection.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGFRAMEDATASUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGFRAMEDATASUBSECTION_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +class DebugFrameDataSubsectionRef final : public DebugSubsectionRef { +public: + DebugFrameDataSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::FrameData) {} + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::FrameData; + } + + Error initialize(BinaryStreamReader Reader); + + FixedStreamArray::Iterator begin() const { return Frames.begin(); } + FixedStreamArray::Iterator end() const { return Frames.end(); } + + const void *getRelocPtr() const { return RelocPtr; } + +private: + const uint32_t *RelocPtr = nullptr; + FixedStreamArray Frames; +}; + +class DebugFrameDataSubsection final : public DebugSubsection { +public: + DebugFrameDataSubsection() + : DebugSubsection(DebugSubsectionKind::FrameData) {} + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::FrameData; + } + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + void addFrameData(const FrameData &Frame); + void setFrames(ArrayRef Frames); + +private: + std::vector Frames; +}; +} +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h new file mode 100644 index 0000000000000..b88c0eae1de28 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h @@ -0,0 +1,121 @@ +//===- DebugInlineeLinesSubsection.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGINLINEELINESSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGINLINEELINESSUBSECTION_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { + +namespace codeview { + +class DebugChecksumsSubsection; + +enum class InlineeLinesSignature : uint32_t { + Normal, // CV_INLINEE_SOURCE_LINE_SIGNATURE + ExtraFiles // CV_INLINEE_SOURCE_LINE_SIGNATURE_EX +}; + +struct InlineeSourceLineHeader { + TypeIndex Inlinee; // ID of the function that was inlined. + support::ulittle32_t FileID; // Offset into FileChecksums subsection. + support::ulittle32_t SourceLineNum; // First line of inlined code. + // If extra files present: + // ulittle32_t ExtraFileCount; + // ulittle32_t Files[]; +}; + +struct InlineeSourceLine { + const InlineeSourceLineHeader *Header; + FixedStreamArray ExtraFiles; +}; + +} // end namespace codeview + +template <> struct VarStreamArrayExtractor { + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::InlineeSourceLine &Item); + + bool HasExtraFiles = false; +}; + +namespace codeview { + +class DebugInlineeLinesSubsectionRef final : public DebugSubsectionRef { + using LinesArray = VarStreamArray; + using Iterator = LinesArray::Iterator; + +public: + DebugInlineeLinesSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::InlineeLines; + } + + Error initialize(BinaryStreamReader Reader); + bool hasExtraFiles() const; + + Iterator begin() const { return Lines.begin(); } + Iterator end() const { return Lines.end(); } + +private: + InlineeLinesSignature Signature; + VarStreamArray Lines; +}; + +class DebugInlineeLinesSubsection final : public DebugSubsection { +public: + struct Entry { + std::vector ExtraFiles; + InlineeSourceLineHeader Header; + }; + + DebugInlineeLinesSubsection(DebugChecksumsSubsection &Checksums, + bool HasExtraFiles = false); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::InlineeLines; + } + + Error commit(BinaryStreamWriter &Writer) const override; + uint32_t calculateSerializedSize() const override; + + void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine); + void addExtraFile(StringRef FileName); + + bool hasExtraFiles() const { return HasExtraFiles; } + void setHasExtraFiles(bool Has) { HasExtraFiles = Has; } + + std::vector::const_iterator begin() const { return Entries.begin(); } + std::vector::const_iterator end() const { return Entries.end(); } + +private: + DebugChecksumsSubsection &Checksums; + bool HasExtraFiles = false; + uint32_t ExtraFileCount = 0; + std::vector Entries; +}; + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGINLINEELINESSUBSECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h new file mode 100644 index 0000000000000..53044b6c3dc8b --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h @@ -0,0 +1,150 @@ +//===- DebugLinesSubsection.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGLINESSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGLINESSUBSECTION_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace codeview { + +class DebugChecksumsSubsection; +class DebugStringTableSubsection; + +// Corresponds to the `CV_DebugSLinesHeader_t` structure. +struct LineFragmentHeader { + support::ulittle32_t RelocOffset; // Code offset of line contribution. + support::ulittle16_t RelocSegment; // Code segment of line contribution. + support::ulittle16_t Flags; // See LineFlags enumeration. + support::ulittle32_t CodeSize; // Code size of this line contribution. +}; + +// Corresponds to the `CV_DebugSLinesFileBlockHeader_t` structure. +struct LineBlockFragmentHeader { + support::ulittle32_t NameIndex; // Offset of FileChecksum entry in File + // checksums buffer. The checksum entry then + // contains another offset into the string + // table of the actual name. + support::ulittle32_t NumLines; // Number of lines + support::ulittle32_t BlockSize; // Code size of block, in bytes. + // The following two variable length arrays appear immediately after the + // header. The structure definitions follow. + // LineNumberEntry Lines[NumLines]; + // ColumnNumberEntry Columns[NumLines]; +}; + +// Corresponds to `CV_Line_t` structure +struct LineNumberEntry { + support::ulittle32_t Offset; // Offset to start of code bytes for line number + support::ulittle32_t Flags; // Start:24, End:7, IsStatement:1 +}; + +// Corresponds to `CV_Column_t` structure +struct ColumnNumberEntry { + support::ulittle16_t StartColumn; + support::ulittle16_t EndColumn; +}; + +struct LineColumnEntry { + support::ulittle32_t NameIndex; + FixedStreamArray LineNumbers; + FixedStreamArray Columns; +}; + +class LineColumnExtractor { +public: + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + LineColumnEntry &Item); + + const LineFragmentHeader *Header = nullptr; +}; + +class DebugLinesSubsectionRef final : public DebugSubsectionRef { + friend class LineColumnExtractor; + + using LineInfoArray = VarStreamArray; + using Iterator = LineInfoArray::Iterator; + +public: + DebugLinesSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::Lines; + } + + Error initialize(BinaryStreamReader Reader); + + Iterator begin() const { return LinesAndColumns.begin(); } + Iterator end() const { return LinesAndColumns.end(); } + + const LineFragmentHeader *header() const { return Header; } + + bool hasColumnInfo() const; + +private: + const LineFragmentHeader *Header = nullptr; + LineInfoArray LinesAndColumns; +}; + +class DebugLinesSubsection final : public DebugSubsection { + struct Block { + Block(uint32_t ChecksumBufferOffset) + : ChecksumBufferOffset(ChecksumBufferOffset) {} + + uint32_t ChecksumBufferOffset; + std::vector Lines; + std::vector Columns; + }; + +public: + DebugLinesSubsection(DebugChecksumsSubsection &Checksums, + DebugStringTableSubsection &Strings); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::Lines; + } + + void createBlock(StringRef FileName); + void addLineInfo(uint32_t Offset, const LineInfo &Line); + void addLineAndColumnInfo(uint32_t Offset, const LineInfo &Line, + uint32_t ColStart, uint32_t ColEnd); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + void setRelocationAddress(uint16_t Segment, uint32_t Offset); + void setCodeSize(uint32_t Size); + void setFlags(LineFlags Flags); + + bool hasColumnInfo() const; + +private: + DebugChecksumsSubsection &Checksums; + uint32_t RelocOffset = 0; + uint16_t RelocSegment = 0; + uint32_t CodeSize = 0; + LineFlags Flags = LF_None; + std::vector Blocks; +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGLINESSUBSECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h new file mode 100644 index 0000000000000..7f0f10e4fbfa2 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h @@ -0,0 +1,89 @@ +//===- DebugStringTableSubsection.h - CodeView String Table -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { + +class BinaryStreamReader; + +namespace codeview { + +/// Represents a read-only view of a CodeView string table. This is a very +/// simple flat buffer consisting of null-terminated strings, where strings +/// are retrieved by their offset in the buffer. DebugStringTableSubsectionRef +/// does not own the underlying storage for the buffer. +class DebugStringTableSubsectionRef : public DebugSubsectionRef { +public: + DebugStringTableSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::StringTable; + } + + Error initialize(BinaryStreamRef Contents); + Error initialize(BinaryStreamReader &Reader); + + Expected getString(uint32_t Offset) const; + + bool valid() const { return Stream.valid(); } + + BinaryStreamRef getBuffer() const { return Stream; } + +private: + BinaryStreamRef Stream; +}; + +/// Represents a read-write view of a CodeView string table. +/// DebugStringTableSubsection owns the underlying storage for the table, and is +/// capable of serializing the string table into a format understood by +/// DebugStringTableSubsectionRef. +class DebugStringTableSubsection : public DebugSubsection { +public: + DebugStringTableSubsection(); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::StringTable; + } + + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + // Return the ID for string S. Assumes S exists in the table. + uint32_t getStringId(StringRef S) const; + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + uint32_t size() const; + + StringMap::const_iterator begin() const { return Strings.begin(); } + + StringMap::const_iterator end() const { return Strings.end(); } + +private: + StringMap Strings; + uint32_t StringSize = 1; +}; + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsection.h new file mode 100644 index 0000000000000..e427e0006a55b --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsection.h @@ -0,0 +1,52 @@ +//===- DebugSubsection.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Casting.h" + +namespace llvm { +namespace codeview { + +class DebugSubsectionRef { +public: + explicit DebugSubsectionRef(DebugSubsectionKind Kind) : Kind(Kind) {} + virtual ~DebugSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { return true; } + + DebugSubsectionKind kind() const { return Kind; } + +protected: + DebugSubsectionKind Kind; +}; + +class DebugSubsection { +public: + explicit DebugSubsection(DebugSubsectionKind Kind) : Kind(Kind) {} + virtual ~DebugSubsection(); + + static bool classof(const DebugSubsection *S) { return true; } + + DebugSubsectionKind kind() const { return Kind; } + + virtual Error commit(BinaryStreamWriter &Writer) const = 0; + virtual uint32_t calculateSerializedSize() const = 0; + +protected: + DebugSubsectionKind Kind; +}; + +} // namespace codeview +} // namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h new file mode 100644 index 0000000000000..fc0cf0d1d90ec --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h @@ -0,0 +1,103 @@ +//===- DebugSubsectionRecord.h ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONRECORD_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONRECORD_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include +#include + +namespace llvm { + +class BinaryStreamWriter; + +namespace codeview { + +class DebugSubsection; + +// Corresponds to the `CV_DebugSSubsectionHeader_t` structure. +struct DebugSubsectionHeader { + support::ulittle32_t Kind; // codeview::DebugSubsectionKind enum + support::ulittle32_t Length; // number of bytes occupied by this record. +}; + +class DebugSubsectionRecord { +public: + DebugSubsectionRecord(); + DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data, + CodeViewContainer Container); + + static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info, + CodeViewContainer Container); + + uint32_t getRecordLength() const; + DebugSubsectionKind kind() const; + BinaryStreamRef getRecordData() const; + +private: + CodeViewContainer Container = CodeViewContainer::ObjectFile; + DebugSubsectionKind Kind = DebugSubsectionKind::None; + BinaryStreamRef Data; +}; + +class DebugSubsectionRecordBuilder { +public: + DebugSubsectionRecordBuilder(std::shared_ptr Subsection, + CodeViewContainer Container); + + /// Use this to copy existing subsections directly from source to destination. + /// For example, line table subsections in an object file only need to be + /// relocated before being copied into the PDB. + DebugSubsectionRecordBuilder(const DebugSubsectionRecord &Contents, + CodeViewContainer Container); + + uint32_t calculateSerializedLength(); + Error commit(BinaryStreamWriter &Writer) const; + +private: + /// The subsection to build. Will be null if Contents is non-empty. + std::shared_ptr Subsection; + + /// The bytes of the subsection. Only non-empty if Subsection is null. + DebugSubsectionRecord Contents; + + CodeViewContainer Container; +}; + +} // end namespace codeview + +template <> struct VarStreamArrayExtractor { + Error operator()(BinaryStreamRef Stream, uint32_t &Length, + codeview::DebugSubsectionRecord &Info) { + // FIXME: We need to pass the container type through to this function. In + // practice this isn't super important since the subsection header describes + // its length and we can just skip it. It's more important when writing. + if (auto EC = codeview::DebugSubsectionRecord::initialize( + Stream, Info, codeview::CodeViewContainer::Pdb)) + return EC; + Length = alignTo(Info.getRecordLength(), 4); + return Error::success(); + } +}; + +namespace codeview { + +using DebugSubsectionArray = VarStreamArray; + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONRECORD_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h new file mode 100644 index 0000000000000..75f749dfa9334 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h @@ -0,0 +1,114 @@ +//===- DebugSubsectionVisitor.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { + +namespace codeview { + +class DebugChecksumsSubsectionRef; +class DebugSubsectionRecord; +class DebugInlineeLinesSubsectionRef; +class DebugCrossModuleExportsSubsectionRef; +class DebugCrossModuleImportsSubsectionRef; +class DebugFrameDataSubsectionRef; +class DebugLinesSubsectionRef; +class DebugStringTableSubsectionRef; +class DebugSymbolRVASubsectionRef; +class DebugSymbolsSubsectionRef; +class DebugUnknownSubsectionRef; +class StringsAndChecksumsRef; + +class DebugSubsectionVisitor { +public: + virtual ~DebugSubsectionVisitor() = default; + + virtual Error visitUnknown(DebugUnknownSubsectionRef &Unknown) { + return Error::success(); + } + virtual Error visitLines(DebugLinesSubsectionRef &Lines, + const StringsAndChecksumsRef &State) = 0; + virtual Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) = 0; + virtual Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) = 0; + virtual Error + visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &CSE, + const StringsAndChecksumsRef &State) = 0; + virtual Error + visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &CSE, + const StringsAndChecksumsRef &State) = 0; + + virtual Error visitStringTable(DebugStringTableSubsectionRef &ST, + const StringsAndChecksumsRef &State) = 0; + + virtual Error visitSymbols(DebugSymbolsSubsectionRef &CSE, + const StringsAndChecksumsRef &State) = 0; + + virtual Error visitFrameData(DebugFrameDataSubsectionRef &FD, + const StringsAndChecksumsRef &State) = 0; + virtual Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &RVAs, + const StringsAndChecksumsRef &State) = 0; +}; + +Error visitDebugSubsection(const DebugSubsectionRecord &R, + DebugSubsectionVisitor &V, + const StringsAndChecksumsRef &State); + +namespace detail { +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + StringsAndChecksumsRef &State) { + State.initialize(std::forward(FragmentRange)); + + for (const DebugSubsectionRecord &L : FragmentRange) { + if (auto EC = visitDebugSubsection(L, V, State)) + return EC; + } + return Error::success(); +} +} // namespace detail + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V) { + StringsAndChecksumsRef State; + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + const DebugStringTableSubsectionRef &Strings) { + StringsAndChecksumsRef State(Strings); + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) { + StringsAndChecksumsRef State(Strings, Checksums); + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h new file mode 100644 index 0000000000000..a4c04b55eb4c2 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h @@ -0,0 +1,67 @@ +//===- DebugSymbolRVASubsection.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { + +class BinaryStreamReader; + +namespace codeview { + +class DebugSymbolRVASubsectionRef final : public DebugSubsectionRef { +public: + using ArrayType = FixedStreamArray; + + DebugSymbolRVASubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CoffSymbolRVA; + } + + ArrayType::Iterator begin() const { return RVAs.begin(); } + ArrayType::Iterator end() const { return RVAs.end(); } + + Error initialize(BinaryStreamReader &Reader); + +private: + ArrayType RVAs; +}; + +class DebugSymbolRVASubsection final : public DebugSubsection { +public: + DebugSymbolRVASubsection(); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CoffSymbolRVA; + } + + Error commit(BinaryStreamWriter &Writer) const override; + uint32_t calculateSerializedSize() const override; + + void addRVA(uint32_t RVA) { RVAs.push_back(support::ulittle32_t(RVA)); } + +private: + std::vector RVAs; +}; + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h new file mode 100644 index 0000000000000..dfda7deb6cb41 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h @@ -0,0 +1,56 @@ +//===- DebugSymbolsSubsection.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +class DebugSymbolsSubsectionRef final : public DebugSubsectionRef { +public: + DebugSymbolsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::Symbols) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::Symbols; + } + + Error initialize(BinaryStreamReader Reader); + + CVSymbolArray::Iterator begin() const { return Records.begin(); } + CVSymbolArray::Iterator end() const { return Records.end(); } + +private: + CVSymbolArray Records; +}; + +class DebugSymbolsSubsection final : public DebugSubsection { +public: + DebugSymbolsSubsection() : DebugSubsection(DebugSubsectionKind::Symbols) {} + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::Symbols; + } + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + void addSymbol(CVSymbol Symbol); + +private: + uint32_t Length = 0; + std::vector Records; +}; +} +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h new file mode 100644 index 0000000000000..ea9a96ca8d68e --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h @@ -0,0 +1,32 @@ +//===- DebugUnknownSubsection.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamRef.h" + +namespace llvm { +namespace codeview { + +class DebugUnknownSubsectionRef final : public DebugSubsectionRef { +public: + DebugUnknownSubsectionRef(DebugSubsectionKind Kind, BinaryStreamRef Data) + : DebugSubsectionRef(Kind), Data(Data) {} + + BinaryStreamRef getData() const { return Data; } + +private: + BinaryStreamRef Data; +}; +} +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/EnumTables.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/EnumTables.h index 10d1c581a1966..ee0f0f7c6023a 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/EnumTables.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/EnumTables.h @@ -1,4 +1,4 @@ -//===- EnumTables.h Enum to string conversion tables ------------*- C++ -*-===// +//===- EnumTables.h - Enum to string conversion tables ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,17 +11,18 @@ #define LLVM_DEBUGINFO_CODEVIEW_ENUMTABLES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ScopedPrinter.h" - -#include +#include namespace llvm { namespace codeview { + ArrayRef> getSymbolTypeNames(); ArrayRef> getTypeLeafNames(); ArrayRef> getRegisterNames(); +ArrayRef> getPublicSymFlagNames(); ArrayRef> getProcSymFlagNames(); ArrayRef> getLocalFlagNames(); ArrayRef> getFrameCookieKindNames(); @@ -37,7 +38,8 @@ ArrayRef> getThunkOrdinalNames(); ArrayRef> getTrampolineNames(); ArrayRef> getImageSectionCharacteristicNames(); -} // namespace codeview -} // namespace llvm + +} // end namespace codeview +} // end namespace llvm #endif // LLVM_DEBUGINFO_CODEVIEW_ENUMTABLES_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/Formatters.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/Formatters.h index 37a91098a8b65..278ad02a39cd7 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/Formatters.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/Formatters.h @@ -12,20 +12,30 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/GUID.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include namespace llvm { + namespace codeview { + namespace detail { -class GuidAdapter final : public llvm::FormatAdapter> { + +class GuidAdapter final : public FormatAdapter> { ArrayRef Guid; public: explicit GuidAdapter(ArrayRef Guid); explicit GuidAdapter(StringRef Guid); - void format(llvm::raw_ostream &Stream, StringRef Style); + + void format(raw_ostream &Stream, StringRef Style) override; }; -} + +} // end namespace detail inline detail::GuidAdapter fmt_guid(StringRef Item) { return detail::GuidAdapter(Item); @@ -34,7 +44,30 @@ inline detail::GuidAdapter fmt_guid(StringRef Item) { inline detail::GuidAdapter fmt_guid(ArrayRef Item) { return detail::GuidAdapter(Item); } -} -} -#endif +} // end namespace codeview + +template <> struct format_provider { +public: + static void format(const codeview::TypeIndex &V, raw_ostream &Stream, + StringRef Style) { + if (V.isNoneType()) + Stream << ""; + else { + Stream << formatv("{0:X+4}", V.getIndex()); + if (V.isSimple()) + Stream << " (" << codeview::TypeIndex::simpleTypeName(V) << ")"; + } + } +}; + +template <> struct format_provider { + static void format(const codeview::GUID &V, llvm::raw_ostream &Stream, + StringRef Style) { + Stream << V; + } +}; + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_FORMATTERS_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/GUID.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/GUID.h new file mode 100644 index 0000000000000..a055ce9e2e451 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/GUID.h @@ -0,0 +1,55 @@ +//===- GUID.h ---------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_GUID_H +#define LLVM_DEBUGINFO_CODEVIEW_GUID_H + +#include +#include + +namespace llvm { +class raw_ostream; + +namespace codeview { + +/// This represents the 'GUID' type from windows.h. +struct GUID { + uint8_t Guid[16]; +}; + +inline bool operator==(const GUID &LHS, const GUID &RHS) { + return 0 == ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)); +} + +inline bool operator<(const GUID &LHS, const GUID &RHS) { + return ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)) < 0; +} + +inline bool operator<=(const GUID &LHS, const GUID &RHS) { + return ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)) <= 0; +} + +inline bool operator>(const GUID &LHS, const GUID &RHS) { + return !(LHS <= RHS); +} + +inline bool operator>=(const GUID &LHS, const GUID &RHS) { + return !(LHS < RHS); +} + +inline bool operator!=(const GUID &LHS, const GUID &RHS) { + return !(LHS == RHS); +} + +raw_ostream &operator<<(raw_ostream &OS, const GUID &Guid); + +} // namespace codeview +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h new file mode 100644 index 0000000000000..cc0c24301d498 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h @@ -0,0 +1,113 @@ +//===- LazyRandomTypeCollection.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/StringSaver.h" +#include +#include + +namespace llvm { +namespace codeview { + +/// \brief Provides amortized O(1) random access to a CodeView type stream. +/// Normally to access a type from a type stream, you must know its byte +/// offset into the type stream, because type records are variable-lengthed. +/// However, this is not the way we prefer to access them. For example, given +/// a symbol record one of the fields may be the TypeIndex of the symbol's +/// type record. Or given a type record such as an array type, there might +/// be a TypeIndex for the element type. Sequential access is perfect when +/// we're just dumping every entry, but it's very poor for real world usage. +/// +/// Type streams in PDBs contain an additional field which is a list of pairs +/// containing indices and their corresponding offsets, roughly every ~8KB of +/// record data. This general idea need not be confined to PDBs though. By +/// supplying such an array, the producer of a type stream can allow the +/// consumer much better access time, because the consumer can find the nearest +/// index in this array, and do a linear scan forward only from there. +/// +/// LazyRandomTypeCollection implements this algorithm, but additionally goes +/// one step further by caching offsets of every record that has been visited at +/// least once. This way, even repeated visits of the same record will never +/// require more than one linear scan. For a type stream of N elements divided +/// into M chunks of roughly equal size, this yields a worst case lookup time +/// of O(N/M) and an amortized time of O(1). +class LazyRandomTypeCollection : public TypeCollection { + using PartialOffsetArray = FixedStreamArray; + + struct CacheEntry { + CVType Type; + uint32_t Offset; + StringRef Name; + }; + +public: + explicit LazyRandomTypeCollection(uint32_t RecordCountHint); + LazyRandomTypeCollection(StringRef Data, uint32_t RecordCountHint); + LazyRandomTypeCollection(ArrayRef Data, uint32_t RecordCountHint); + LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t RecordCountHint, + PartialOffsetArray PartialOffsets); + LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t RecordCountHint); + + void reset(ArrayRef Data, uint32_t RecordCountHint); + void reset(StringRef Data, uint32_t RecordCountHint); + + uint32_t getOffsetOfType(TypeIndex Index); + + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + +private: + Error ensureTypeExists(TypeIndex Index); + void ensureCapacityFor(TypeIndex Index); + + Error visitRangeForType(TypeIndex TI); + Error fullScanForType(TypeIndex TI); + void visitRange(TypeIndex Begin, uint32_t BeginOffset, TypeIndex End); + + /// Number of actual records. + uint32_t Count = 0; + + /// The largest type index which we've visited. + TypeIndex LargestTypeIndex = TypeIndex::None(); + + BumpPtrAllocator Allocator; + StringSaver NameStorage; + + /// The type array to allow random access visitation of. + CVTypeArray Types; + + std::vector Records; + + /// An array of index offsets for the given type stream, allowing log(N) + /// lookups of a type record by index. Similar to KnownOffsets but only + /// contains offsets for some type indices, some of which may not have + /// ever been visited. + PartialOffsetArray PartialOffsets; +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h new file mode 100644 index 0000000000000..1a83882246652 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h @@ -0,0 +1,102 @@ +//===- StringsAndChecksums.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H +#define LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include + +namespace llvm { +namespace codeview { + +class StringsAndChecksumsRef { +public: + // If no subsections are known about initially, we find as much as we can. + StringsAndChecksumsRef(); + + // If only a string table subsection is given, we find a checksums subsection. + explicit StringsAndChecksumsRef(const DebugStringTableSubsectionRef &Strings); + + // If both subsections are given, we don't need to find anything. + StringsAndChecksumsRef(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums); + + void setChecksums(const DebugChecksumsSubsectionRef &CS); + + template void initialize(T &&FragmentRange) { + for (const DebugSubsectionRecord &R : FragmentRange) { + if (Strings && Checksums) + return; + if (R.kind() == DebugSubsectionKind::FileChecksums) { + initializeChecksums(R); + continue; + } + if (R.kind() == DebugSubsectionKind::StringTable && !Strings) { + // While in practice we should never encounter a string table even + // though the string table is already initialized, in theory it's + // possible. PDBs are supposed to have one global string table and + // then this subsection should not appear. Whereas object files are + // supposed to have this subsection appear exactly once. However, + // for testing purposes it's nice to be able to test this subsection + // independently of one format or the other, so for some tests we + // manually construct a PDB that contains this subsection in addition + // to a global string table. + initializeStrings(R); + continue; + } + } + } + + const DebugStringTableSubsectionRef &strings() const { return *Strings; } + const DebugChecksumsSubsectionRef &checksums() const { return *Checksums; } + + bool hasStrings() const { return Strings != nullptr; } + bool hasChecksums() const { return Checksums != nullptr; } + +private: + void initializeStrings(const DebugSubsectionRecord &SR); + void initializeChecksums(const DebugSubsectionRecord &FCR); + + std::unique_ptr OwnedStrings; + std::unique_ptr OwnedChecksums; + + const DebugStringTableSubsectionRef *Strings = nullptr; + const DebugChecksumsSubsectionRef *Checksums = nullptr; +}; + +class StringsAndChecksums { +public: + using StringsPtr = std::shared_ptr; + using ChecksumsPtr = std::shared_ptr; + + // If no subsections are known about initially, we find as much as we can. + StringsAndChecksums() = default; + + void setStrings(const StringsPtr &SP) { Strings = SP; } + void setChecksums(const ChecksumsPtr &CP) { Checksums = CP; } + + const StringsPtr &strings() const { return Strings; } + const ChecksumsPtr &checksums() const { return Checksums; } + + bool hasStrings() const { return Strings != nullptr; } + bool hasChecksums() const { return Checksums != nullptr; } + +private: + StringsPtr Strings; + ChecksumsPtr Checksums; +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h index c1a5152930fff..5b6599d8c1db0 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h @@ -24,9 +24,9 @@ namespace codeview { class SymbolVisitorDelegate; class SymbolDeserializer : public SymbolVisitorCallbacks { struct MappingInfo { - explicit MappingInfo(ArrayRef RecordData) + MappingInfo(ArrayRef RecordData, CodeViewContainer Container) : Stream(RecordData, llvm::support::little), Reader(Stream), - Mapping(Reader) {} + Mapping(Reader, Container) {} BinaryByteStream Stream; BinaryStreamReader Reader; @@ -34,12 +34,30 @@ class SymbolDeserializer : public SymbolVisitorCallbacks { }; public: - explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate) - : Delegate(Delegate) {} + template static Error deserializeAs(CVSymbol Symbol, T &Record) { + // If we're just deserializing one record, then don't worry about alignment + // as there's nothing that comes after. + SymbolDeserializer S(nullptr, CodeViewContainer::ObjectFile); + if (auto EC = S.visitSymbolBegin(Symbol)) + return EC; + if (auto EC = S.visitKnownRecord(Symbol, Record)) + return EC; + if (auto EC = S.visitSymbolEnd(Symbol)) + return EC; + return Error::success(); + } + + explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate, + CodeViewContainer Container) + : Delegate(Delegate), Container(Container) {} + + Error visitSymbolBegin(CVSymbol &Record, uint32_t Offset) override { + return visitSymbolBegin(Record); + } Error visitSymbolBegin(CVSymbol &Record) override { assert(!Mapping && "Already in a symbol mapping!"); - Mapping = llvm::make_unique(Record.content()); + Mapping = llvm::make_unique(Record.content(), Container); return Mapping->Mapping.visitSymbolBegin(Record); } Error visitSymbolEnd(CVSymbol &Record) override { @@ -54,7 +72,7 @@ class SymbolDeserializer : public SymbolVisitorCallbacks { return visitKnownRecordImpl(CVR, Record); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: template Error visitKnownRecordImpl(CVSymbol &CVR, T &Record) { @@ -67,6 +85,7 @@ class SymbolDeserializer : public SymbolVisitorCallbacks { } SymbolVisitorDelegate *Delegate; + CodeViewContainer Container; std::unique_ptr Mapping; }; } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDumper.h index a5419b37e7761..293daa851bddf 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDumper.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolDumper.h @@ -20,15 +20,17 @@ namespace llvm { class ScopedPrinter; namespace codeview { -class TypeDatabase; +class TypeCollection; /// Dumper for CodeView symbol streams found in COFF object files and PDB files. class CVSymbolDumper { public: - CVSymbolDumper(ScopedPrinter &W, TypeDatabase &TypeDB, + CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types, + CodeViewContainer Container, std::unique_ptr ObjDelegate, bool PrintRecordBytes) - : W(W), TypeDB(TypeDB), ObjDelegate(std::move(ObjDelegate)), + : W(W), Types(Types), Container(Container), + ObjDelegate(std::move(ObjDelegate)), PrintRecordBytes(PrintRecordBytes) {} /// Dumps one type record. Returns false if there was a type parsing error, @@ -43,7 +45,8 @@ class CVSymbolDumper { private: ScopedPrinter &W; - TypeDatabase &TypeDB; + TypeCollection &Types; + CodeViewContainer Container; std::unique_ptr ObjDelegate; bool PrintRecordBytes; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecord.h index c5a5549bf818a..f3086cf3dbb91 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -21,8 +21,6 @@ #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include #include #include @@ -36,7 +34,6 @@ class SymbolRecord { public: SymbolRecordKind getKind() const { return Kind; } -private: SymbolRecordKind Kind; }; @@ -155,6 +152,7 @@ class CallerSym : public SymbolRecord { : SymbolRecord(Kind), RecordOffset(RecordOffset) {} std::vector Indices; + uint32_t RecordOffset; }; @@ -167,8 +165,8 @@ struct BinaryAnnotationIterator { int32_t S1; }; - BinaryAnnotationIterator(ArrayRef Annotations) : Data(Annotations) {} BinaryAnnotationIterator() = default; + BinaryAnnotationIterator(ArrayRef Annotations) : Data(Annotations) {} BinaryAnnotationIterator(const BinaryAnnotationIterator &Other) : Data(Other.Data) {} @@ -344,9 +342,9 @@ class InlineSiteSym : public SymbolRecord { : SymbolRecord(SymbolRecordKind::InlineSiteSym), RecordOffset(RecordOffset) {} - llvm::iterator_range annotations() const { - return llvm::make_range(BinaryAnnotationIterator(AnnotationData), - BinaryAnnotationIterator()); + iterator_range annotations() const { + return make_range(BinaryAnnotationIterator(AnnotationData), + BinaryAnnotationIterator()); } uint32_t Parent; @@ -365,7 +363,7 @@ class PublicSym32 : public SymbolRecord { : SymbolRecord(SymbolRecordKind::PublicSym32), RecordOffset(RecordOffset) {} - uint32_t Index; + PublicSymFlags Flags; uint32_t Offset; uint16_t Segment; StringRef Name; @@ -381,7 +379,7 @@ class RegisterSym : public SymbolRecord { : SymbolRecord(SymbolRecordKind::RegisterSym), RecordOffset(RecordOffset) {} - uint32_t Index; + TypeIndex Index; RegisterId Register; StringRef Name; @@ -481,6 +479,7 @@ class DefRangeRegisterSym : public SymbolRecord { ulittle16_t Register; ulittle16_t MayHaveNoName; }; + explicit DefRangeRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} DefRangeRegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterSym), @@ -503,6 +502,7 @@ class DefRangeSubfieldRegisterSym : public SymbolRecord { ulittle16_t MayHaveNoName; ulittle32_t OffsetInParent; }; + explicit DefRangeSubfieldRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} DefRangeSubfieldRegisterSym(uint32_t RecordOffset) @@ -548,6 +548,7 @@ class DefRangeRegisterRelSym : public SymbolRecord { ulittle16_t Flags; little32_t BasePointerOffset; }; + explicit DefRangeRegisterRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit DefRangeRegisterRelSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym), @@ -681,7 +682,7 @@ class FileStaticSym : public SymbolRecord { : SymbolRecord(SymbolRecordKind::FileStaticSym), RecordOffset(RecordOffset) {} - uint32_t Index; + TypeIndex Index; uint32_t ModFilenameOffset; LocalSymFlags Flags; StringRef Name; @@ -734,6 +735,10 @@ class Compile3Sym : public SymbolRecord { uint16_t VersionBackendQFE; StringRef Version; + void setLanguage(SourceLanguage Lang) { + Flags = CompileSym3Flags((uint32_t(Flags) & 0xFFFFFF00) | uint32_t(Lang)); + } + uint8_t getLanguage() const { return static_cast(Flags) & 0xFF; } uint32_t getFlags() const { return static_cast(Flags) & ~0xFF; } @@ -816,7 +821,7 @@ class FrameCookieSym : public SymbolRecord { uint32_t CodeOffset; uint16_t Register; - uint8_t CookieKind; + FrameCookieKind CookieKind; uint8_t Flags; uint32_t RecordOffset; @@ -843,7 +848,7 @@ class BuildInfoSym : public SymbolRecord { : SymbolRecord(SymbolRecordKind::BuildInfoSym), RecordOffset(RecordOffset) {} - uint32_t BuildId; + TypeIndex BuildId; uint32_t RecordOffset; }; @@ -873,7 +878,7 @@ class RegRelativeSym : public SymbolRecord { uint32_t Offset; TypeIndex Type; - uint16_t Register; + RegisterId Register; StringRef Name; uint32_t RecordOffset; @@ -937,8 +942,8 @@ class ThreadLocalDataSym : public SymbolRecord { uint32_t RecordOffset; }; -typedef CVRecord CVSymbol; -typedef VarStreamArray CVSymbolArray; +using CVSymbol = CVRecord; +using CVSymbolArray = VarStreamArray; } // end namespace codeview } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h index 0a1837a0d935f..391e8f127665a 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h @@ -20,8 +20,12 @@ class BinaryStreamWriter; namespace codeview { class SymbolRecordMapping : public SymbolVisitorCallbacks { public: - explicit SymbolRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {} - explicit SymbolRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {} + explicit SymbolRecordMapping(BinaryStreamReader &Reader, + CodeViewContainer Container) + : IO(Reader), Container(Container) {} + explicit SymbolRecordMapping(BinaryStreamWriter &Writer, + CodeViewContainer Container) + : IO(Writer), Container(Container) {} Error visitSymbolBegin(CVSymbol &Record) override; Error visitSymbolEnd(CVSymbol &Record) override; @@ -29,12 +33,13 @@ class SymbolRecordMapping : public SymbolVisitorCallbacks { #define SYMBOL_RECORD(EnumName, EnumVal, Name) \ Error visitKnownRecord(CVSymbol &CVR, Name &Record) override; #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: Optional Kind; CodeViewRecordIO IO; + CodeViewContainer Container; }; } } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolSerializer.h index f2e99bd833260..b63ced5217b44 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolSerializer.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolSerializer.h @@ -1,4 +1,4 @@ -//===- symbolSerializer.h ---------------------------------------*- C++ -*-===// +//===- SymbolSerializer.h ---------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,21 +10,20 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H +#include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h" #include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h" - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Error.h" +#include +#include namespace llvm { -class BinaryStreamWriter; namespace codeview { class SymbolSerializer : public SymbolVisitorCallbacks { @@ -45,17 +44,29 @@ class SymbolSerializer : public SymbolVisitorCallbacks { } public: - explicit SymbolSerializer(BumpPtrAllocator &Storage); + SymbolSerializer(BumpPtrAllocator &Storage, CodeViewContainer Container); - virtual Error visitSymbolBegin(CVSymbol &Record) override; - virtual Error visitSymbolEnd(CVSymbol &Record) override; + template + static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage, + CodeViewContainer Container) { + CVSymbol Result; + Result.Type = static_cast(Sym.Kind); + SymbolSerializer Serializer(Storage, Container); + consumeError(Serializer.visitSymbolBegin(Result)); + consumeError(Serializer.visitKnownRecord(Result, Sym)); + consumeError(Serializer.visitSymbolEnd(Result)); + return Result; + } + + Error visitSymbolBegin(CVSymbol &Record) override; + Error visitSymbolEnd(CVSymbol &Record) override; #define SYMBOL_RECORD(EnumName, EnumVal, Name) \ - virtual Error visitKnownRecord(CVSymbol &CVR, Name &Record) override { \ + Error visitKnownRecord(CVSymbol &CVR, Name &Record) override { \ return visitKnownRecordImpl(CVR, Record); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: template @@ -63,7 +74,8 @@ class SymbolSerializer : public SymbolVisitorCallbacks { return Mapping.visitKnownRecord(CVR, Record); } }; -} -} -#endif +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h index 96a93bf7e5764..e29511a67b7f3 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h @@ -30,6 +30,14 @@ class SymbolVisitorCallbackPipeline : public SymbolVisitorCallbacks { return Error::success(); } + Error visitSymbolBegin(CVSymbol &Record, uint32_t Offset) override { + for (auto Visitor : Pipeline) { + if (auto EC = Visitor->visitSymbolBegin(Record, Offset)) + return EC; + } + return Error::success(); + } + Error visitSymbolBegin(CVSymbol &Record) override { for (auto Visitor : Pipeline) { if (auto EC = Visitor->visitSymbolBegin(Record)) @@ -59,7 +67,7 @@ class SymbolVisitorCallbackPipeline : public SymbolVisitorCallbacks { return Error::success(); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: std::vector Pipeline; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h index aaa9d2e85e136..0816f7c62656f 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h @@ -29,8 +29,10 @@ class SymbolVisitorCallbacks { /// Paired begin/end actions for all symbols. Receives all record data, /// including the fixed-length record prefix. visitSymbolBegin() should - /// return - /// the type of the Symbol, or an error if it cannot be determined. + /// return the type of the Symbol, or an error if it cannot be determined. + virtual Error visitSymbolBegin(CVSymbol &Record, uint32_t Offset) { + return Error::success(); + } virtual Error visitSymbolBegin(CVSymbol &Record) { return Error::success(); } virtual Error visitSymbolEnd(CVSymbol &Record) { return Error::success(); } @@ -39,7 +41,7 @@ class SymbolVisitorCallbacks { return Error::success(); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" }; } // end namespace codeview diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h index 96c8a47a36690..a2a3c6f18fba1 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h @@ -19,7 +19,7 @@ class BinaryStreamReader; namespace codeview { -class StringTableRef; +class DebugStringTableSubsectionRef; class SymbolVisitorDelegate { public: @@ -27,7 +27,7 @@ class SymbolVisitorDelegate { virtual uint32_t getRecordOffset(BinaryStreamReader Reader) = 0; virtual StringRef getFileNameForFileOffset(uint32_t FileOffset) = 0; - virtual StringTableRef getStringTable() = 0; + virtual DebugStringTableSubsectionRef getStringTable() = 0; }; } // end namespace codeview diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeCollection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeCollection.h new file mode 100644 index 0000000000000..0f856f57a7275 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeCollection.h @@ -0,0 +1,38 @@ +//===- TypeCollection.h - A collection of CodeView type records -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPECOLLECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPECOLLECTION_H + +#include "llvm/ADT/StringRef.h" + +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" + +namespace llvm { +namespace codeview { +class TypeCollection { +public: + virtual ~TypeCollection() = default; + + bool empty() { return size() == 0; } + + virtual Optional getFirst() = 0; + virtual Optional getNext(TypeIndex Prev) = 0; + + virtual CVType getType(TypeIndex Index) = 0; + virtual StringRef getTypeName(TypeIndex Index) = 0; + virtual bool contains(TypeIndex Index) = 0; + virtual uint32_t size() = 0; + virtual uint32_t capacity() = 0; +}; +} +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDeserializer.h index 2142d4a2dec70..965cdfd85f489 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDeserializer.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDeserializer.h @@ -40,6 +40,18 @@ class TypeDeserializer : public TypeVisitorCallbacks { public: TypeDeserializer() = default; + template static Error deserializeAs(CVType &CVT, T &Record) { + Record.Kind = static_cast(CVT.kind()); + MappingInfo I(CVT.content()); + if (auto EC = I.Mapping.visitTypeBegin(CVT)) + return EC; + if (auto EC = I.Mapping.visitKnownRecord(CVT, Record)) + return EC; + if (auto EC = I.Mapping.visitTypeEnd(CVT)) + return EC; + return Error::success(); + } + Error visitTypeBegin(CVType &Record) override { assert(!Mapping && "Already in a type mapping!"); Mapping = llvm::make_unique(Record.content()); @@ -64,7 +76,7 @@ class TypeDeserializer : public TypeVisitorCallbacks { #define MEMBER_RECORD(EnumName, EnumVal, Name) #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template @@ -116,7 +128,7 @@ class FieldListDeserializer : public TypeVisitorCallbacks { } #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h index 6f10afb30d606..afb8b3636361b 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h @@ -12,7 +12,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSet.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" @@ -22,17 +21,20 @@ class ScopedPrinter; namespace codeview { +class TypeCollection; + /// Dumper for CodeView type streams found in COFF object files and PDB files. class TypeDumpVisitor : public TypeVisitorCallbacks { public: - TypeDumpVisitor(TypeDatabase &TypeDB, ScopedPrinter *W, bool PrintRecordBytes) - : W(W), PrintRecordBytes(PrintRecordBytes), TypeDB(TypeDB) {} + TypeDumpVisitor(TypeCollection &TpiTypes, ScopedPrinter *W, + bool PrintRecordBytes) + : W(W), PrintRecordBytes(PrintRecordBytes), TpiTypes(TpiTypes) {} /// When dumping types from an IPI stream in a PDB, a type index may refer to /// a type or an item ID. The dumper will lookup the "name" of the index in /// the item database if appropriate. If ItemDB is null, it will use TypeDB, /// which is correct when dumping types from an object file (/Z7). - void setItemDB(TypeDatabase &DB) { ItemDB = &DB; } + void setIpiTypes(TypeCollection &Types) { IpiTypes = &Types; } void printTypeIndex(StringRef FieldName, TypeIndex TI) const; @@ -56,7 +58,7 @@ class TypeDumpVisitor : public TypeVisitorCallbacks { Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: void printMemberAttributes(MemberAttributes Attrs); @@ -66,14 +68,16 @@ class TypeDumpVisitor : public TypeVisitorCallbacks { /// Get the database of indices for the stream that we are dumping. If ItemDB /// is set, then we must be dumping an item (IPI) stream. This will also /// always get the appropriate DB for printing item names. - TypeDatabase &getSourceDB() const { return ItemDB ? *ItemDB : TypeDB; } + TypeCollection &getSourceTypes() const { + return IpiTypes ? *IpiTypes : TpiTypes; + } ScopedPrinter *W; bool PrintRecordBytes = false; - TypeDatabase &TypeDB; - TypeDatabase *ItemDB = nullptr; + TypeCollection &TpiTypes; + TypeCollection *IpiTypes = nullptr; }; } // end namespace codeview diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeIndex.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeIndex.h index b5d695fc49d5b..e0c2226bdbd74 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -10,13 +10,20 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Endian.h" #include #include +#include namespace llvm { + +class ScopedPrinter; + namespace codeview { +class TypeCollection; + enum class SimpleTypeKind : uint32_t { None = 0x0000, // uncharacterized type (no type) Void = 0x0003, // void @@ -238,6 +245,13 @@ class TypeIndex { return Result; } + friend inline uint32_t operator-(const TypeIndex &A, const TypeIndex &B) { + assert(A >= B); + return A.toArrayIndex() - B.toArrayIndex(); + } + + static StringRef simpleTypeName(TypeIndex TI); + private: support::ulittle32_t Index; }; @@ -249,7 +263,27 @@ struct TypeIndexOffset { TypeIndex Type; support::ulittle32_t Offset; }; + +void printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, TypeIndex TI, + TypeCollection &Types); } -} + +template <> struct DenseMapInfo { + static inline codeview::TypeIndex getEmptyKey() { + return codeview::TypeIndex{DenseMapInfo::getEmptyKey()}; + } + static inline codeview::TypeIndex getTombstoneKey() { + return codeview::TypeIndex{DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const codeview::TypeIndex &TI) { + return DenseMapInfo::getHashValue(TI.getIndex()); + } + static bool isEqual(const codeview::TypeIndex &LHS, + const codeview::TypeIndex &RHS) { + return LHS == RHS; + } +}; + +} // namespace llvm #endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h new file mode 100644 index 0000000000000..afe8942159e84 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h @@ -0,0 +1,41 @@ +//===- TypeIndexDiscovery.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +enum class TiRefKind { TypeRef, IndexRef }; +struct TiReference { + TiRefKind Kind; + uint32_t Offset; + uint32_t Count; +}; + +void discoverTypeIndices(ArrayRef RecordData, + SmallVectorImpl &Refs); +void discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Refs); +void discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Indices); + +/// Discover type indices in symbol records. Returns false if this is an unknown +/// record. +bool discoverTypeIndices(const CVSymbol &Symbol, + SmallVectorImpl &Refs); +} +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeName.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeName.h new file mode 100644 index 0000000000000..a987b4afd283a --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeName.h @@ -0,0 +1,22 @@ +//===- TypeName.h --------------------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPENAME_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPENAME_H + +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +namespace llvm { +namespace codeview { +std::string computeTypeName(TypeCollection &Types, TypeIndex Index); +} +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecord.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecord.h index 1f10872c87680..7942c0c0bc215 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -15,8 +15,10 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Endian.h" @@ -25,30 +27,30 @@ #include namespace llvm { - -class BinaryStreamReader; - namespace codeview { using support::little32_t; using support::ulittle16_t; using support::ulittle32_t; -typedef CVRecord CVType; +using CVType = CVRecord; +using RemappedType = RemappedRecord; struct CVMemberRecord { TypeLeafKind Kind; ArrayRef Data; }; -typedef VarStreamArray CVTypeArray; -typedef iterator_range CVTypeRange; +using CVTypeArray = VarStreamArray; +using CVTypeRange = iterator_range; /// Equvalent to CV_fldattr_t in cvinfo.h. struct MemberAttributes { uint16_t Attrs = 0; + enum { MethodKindShift = 2, }; + MemberAttributes() = default; explicit MemberAttributes(MemberAccess Access) @@ -122,13 +124,13 @@ class TypeRecord { public: TypeRecordKind getKind() const { return Kind; } -private: TypeRecordKind Kind; }; // LF_MODIFIER class ModifierRecord : public TypeRecord { public: + ModifierRecord() = default; explicit ModifierRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ModifierRecord(TypeIndex ModifiedType, ModifierOptions Modifiers) : TypeRecord(TypeRecordKind::Modifier), ModifiedType(ModifiedType), @@ -144,6 +146,7 @@ class ModifierRecord : public TypeRecord { // LF_PROCEDURE class ProcedureRecord : public TypeRecord { public: + ProcedureRecord() = default; explicit ProcedureRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ProcedureRecord(TypeIndex ReturnType, CallingConvention CallConv, FunctionOptions Options, uint16_t ParameterCount, @@ -168,6 +171,7 @@ class ProcedureRecord : public TypeRecord { // LF_MFUNCTION class MemberFunctionRecord : public TypeRecord { public: + MemberFunctionRecord() = default; explicit MemberFunctionRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} MemberFunctionRecord(TypeIndex ReturnType, TypeIndex ClassType, @@ -202,6 +206,7 @@ class MemberFunctionRecord : public TypeRecord { // LF_LABEL class LabelRecord : public TypeRecord { public: + LabelRecord() = default; explicit LabelRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} LabelRecord(LabelType Mode) : TypeRecord(TypeRecordKind::Label), Mode(Mode) {} @@ -212,6 +217,7 @@ class LabelRecord : public TypeRecord { // LF_MFUNC_ID class MemberFuncIdRecord : public TypeRecord { public: + MemberFuncIdRecord() = default; explicit MemberFuncIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} MemberFuncIdRecord(TypeIndex ClassType, TypeIndex FunctionType, StringRef Name) @@ -221,6 +227,7 @@ class MemberFuncIdRecord : public TypeRecord { TypeIndex getClassType() const { return ClassType; } TypeIndex getFunctionType() const { return FunctionType; } StringRef getName() const { return Name; } + TypeIndex ClassType; TypeIndex FunctionType; StringRef Name; @@ -229,6 +236,7 @@ class MemberFuncIdRecord : public TypeRecord { // LF_ARGLIST class ArgListRecord : public TypeRecord { public: + ArgListRecord() = default; explicit ArgListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ArgListRecord(TypeRecordKind Kind, ArrayRef Indices) @@ -242,6 +250,7 @@ class ArgListRecord : public TypeRecord { // LF_SUBSTR_LIST class StringListRecord : public TypeRecord { public: + StringListRecord() = default; explicit StringListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} StringListRecord(TypeRecordKind Kind, ArrayRef Indices) @@ -266,6 +275,7 @@ class PointerRecord : public TypeRecord { static const uint32_t PointerSizeShift = 13; static const uint32_t PointerSizeMask = 0xFF; + PointerRecord() = default; explicit PointerRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} PointerRecord(TypeIndex ReferentType, uint32_t Attrs) @@ -278,15 +288,9 @@ class PointerRecord : public TypeRecord { Attrs(calcAttrs(PK, PM, PO, Size)) {} PointerRecord(TypeIndex ReferentType, PointerKind PK, PointerMode PM, - PointerOptions PO, uint8_t Size, - const MemberPointerInfo &Member) + PointerOptions PO, uint8_t Size, const MemberPointerInfo &MPI) : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType), - Attrs(calcAttrs(PK, PM, PO, Size)), MemberInfo(Member) {} - - PointerRecord(TypeIndex ReferentType, uint32_t Attrs, - const MemberPointerInfo &Member) - : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType), - Attrs(Attrs), MemberInfo(Member) {} + Attrs(calcAttrs(PK, PM, PO, Size)), MemberInfo(MPI) {} TypeIndex getReferentType() const { return ReferentType; } @@ -328,7 +332,6 @@ class PointerRecord : public TypeRecord { TypeIndex ReferentType; uint32_t Attrs; - Optional MemberInfo; private: @@ -346,6 +349,7 @@ class PointerRecord : public TypeRecord { // LF_NESTTYPE class NestedTypeRecord : public TypeRecord { public: + NestedTypeRecord() = default; explicit NestedTypeRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} NestedTypeRecord(TypeIndex Type, StringRef Name) : TypeRecord(TypeRecordKind::NestedType), Type(Type), Name(Name) {} @@ -360,6 +364,7 @@ class NestedTypeRecord : public TypeRecord { // LF_FIELDLIST class FieldListRecord : public TypeRecord { public: + FieldListRecord() = default; explicit FieldListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} explicit FieldListRecord(ArrayRef Data) : TypeRecord(TypeRecordKind::FieldList), Data(Data) {} @@ -370,6 +375,7 @@ class FieldListRecord : public TypeRecord { // LF_ARRAY class ArrayRecord : public TypeRecord { public: + ArrayRecord() = default; explicit ArrayRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ArrayRecord(TypeIndex ElementType, TypeIndex IndexType, uint64_t Size, StringRef Name) @@ -389,6 +395,7 @@ class ArrayRecord : public TypeRecord { class TagRecord : public TypeRecord { protected: + TagRecord() = default; explicit TagRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} TagRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, StringRef Name, StringRef UniqueName) @@ -421,6 +428,7 @@ class TagRecord : public TypeRecord { // LF_CLASS, LF_STRUCTURE, LF_INTERFACE class ClassRecord : public TagRecord { public: + ClassRecord() = default; explicit ClassRecord(TypeRecordKind Kind) : TagRecord(Kind) {} ClassRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, TypeIndex DerivationList, @@ -452,6 +460,7 @@ class ClassRecord : public TagRecord { // LF_UNION struct UnionRecord : public TagRecord { + UnionRecord() = default; explicit UnionRecord(TypeRecordKind Kind) : TagRecord(Kind) {} UnionRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, uint64_t Size, StringRef Name, StringRef UniqueName) @@ -473,6 +482,7 @@ struct UnionRecord : public TagRecord { // LF_ENUM class EnumRecord : public TagRecord { public: + EnumRecord() = default; explicit EnumRecord(TypeRecordKind Kind) : TagRecord(Kind) {} EnumRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, StringRef Name, StringRef UniqueName, TypeIndex UnderlyingType) @@ -481,12 +491,14 @@ class EnumRecord : public TagRecord { UnderlyingType(UnderlyingType) {} TypeIndex getUnderlyingType() const { return UnderlyingType; } + TypeIndex UnderlyingType; }; // LF_BITFIELD class BitFieldRecord : public TypeRecord { public: + BitFieldRecord() = default; explicit BitFieldRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} BitFieldRecord(TypeIndex Type, uint8_t BitSize, uint8_t BitOffset) : TypeRecord(TypeRecordKind::BitField), Type(Type), BitSize(BitSize), @@ -495,6 +507,7 @@ class BitFieldRecord : public TypeRecord { TypeIndex getType() const { return Type; } uint8_t getBitOffset() const { return BitOffset; } uint8_t getBitSize() const { return BitSize; } + TypeIndex Type; uint8_t BitSize; uint8_t BitOffset; @@ -503,6 +516,7 @@ class BitFieldRecord : public TypeRecord { // LF_VTSHAPE class VFTableShapeRecord : public TypeRecord { public: + VFTableShapeRecord() = default; explicit VFTableShapeRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} explicit VFTableShapeRecord(ArrayRef Slots) : TypeRecord(TypeRecordKind::VFTableShape), SlotsRef(Slots) {} @@ -516,6 +530,7 @@ class VFTableShapeRecord : public TypeRecord { } uint32_t getEntryCount() const { return getSlots().size(); } + ArrayRef SlotsRef; std::vector Slots; }; @@ -523,18 +538,19 @@ class VFTableShapeRecord : public TypeRecord { // LF_TYPESERVER2 class TypeServer2Record : public TypeRecord { public: + TypeServer2Record() = default; explicit TypeServer2Record(TypeRecordKind Kind) : TypeRecord(Kind) {} - TypeServer2Record(StringRef Guid, uint32_t Age, StringRef Name) - : TypeRecord(TypeRecordKind::TypeServer2), Guid(Guid), Age(Age), - Name(Name) {} - - StringRef getGuid() const { return Guid; } + TypeServer2Record(StringRef GuidStr, uint32_t Age, StringRef Name) + : TypeRecord(TypeRecordKind::TypeServer2), Age(Age), Name(Name) { + assert(GuidStr.size() == 16 && "guid isn't 16 bytes"); + ::memcpy(Guid.Guid, GuidStr.data(), 16); + } + const GUID &getGuid() const { return Guid; } uint32_t getAge() const { return Age; } - StringRef getName() const { return Name; } - StringRef Guid; + GUID Guid; uint32_t Age; StringRef Name; }; @@ -542,13 +558,14 @@ class TypeServer2Record : public TypeRecord { // LF_STRING_ID class StringIdRecord : public TypeRecord { public: + StringIdRecord() = default; explicit StringIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} StringIdRecord(TypeIndex Id, StringRef String) : TypeRecord(TypeRecordKind::StringId), Id(Id), String(String) {} TypeIndex getId() const { return Id; } - StringRef getString() const { return String; } + TypeIndex Id; StringRef String; }; @@ -556,15 +573,14 @@ class StringIdRecord : public TypeRecord { // LF_FUNC_ID class FuncIdRecord : public TypeRecord { public: + FuncIdRecord() = default; explicit FuncIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} FuncIdRecord(TypeIndex ParentScope, TypeIndex FunctionType, StringRef Name) : TypeRecord(TypeRecordKind::FuncId), ParentScope(ParentScope), FunctionType(FunctionType), Name(Name) {} TypeIndex getParentScope() const { return ParentScope; } - TypeIndex getFunctionType() const { return FunctionType; } - StringRef getName() const { return Name; } TypeIndex ParentScope; @@ -575,6 +591,7 @@ class FuncIdRecord : public TypeRecord { // LF_UDT_SRC_LINE class UdtSourceLineRecord : public TypeRecord { public: + UdtSourceLineRecord() = default; explicit UdtSourceLineRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} UdtSourceLineRecord(TypeIndex UDT, TypeIndex SourceFile, uint32_t LineNumber) : TypeRecord(TypeRecordKind::UdtSourceLine), UDT(UDT), @@ -592,6 +609,7 @@ class UdtSourceLineRecord : public TypeRecord { // LF_UDT_MOD_SRC_LINE class UdtModSourceLineRecord : public TypeRecord { public: + UdtModSourceLineRecord() = default; explicit UdtModSourceLineRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} UdtModSourceLineRecord(TypeIndex UDT, TypeIndex SourceFile, uint32_t LineNumber, uint16_t Module) @@ -612,18 +630,21 @@ class UdtModSourceLineRecord : public TypeRecord { // LF_BUILDINFO class BuildInfoRecord : public TypeRecord { public: + BuildInfoRecord() = default; explicit BuildInfoRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} BuildInfoRecord(ArrayRef ArgIndices) : TypeRecord(TypeRecordKind::BuildInfo), ArgIndices(ArgIndices.begin(), ArgIndices.end()) {} ArrayRef getArgs() const { return ArgIndices; } + SmallVector ArgIndices; }; // LF_VFTABLE class VFTableRecord : public TypeRecord { public: + VFTableRecord() = default; explicit VFTableRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} VFTableRecord(TypeIndex CompleteClass, TypeIndex OverriddenVFTable, uint32_t VFPtrOffset, StringRef Name, @@ -638,6 +659,7 @@ class VFTableRecord : public TypeRecord { TypeIndex getOverriddenVTable() const { return OverriddenVFTable; } uint32_t getVFPtrOffset() const { return VFPtrOffset; } StringRef getName() const { return makeArrayRef(MethodNames).front(); } + ArrayRef getMethodNames() const { return makeArrayRef(MethodNames).drop_front(); } @@ -651,7 +673,7 @@ class VFTableRecord : public TypeRecord { // LF_ONEMETHOD class OneMethodRecord : public TypeRecord { public: - OneMethodRecord() : TypeRecord(TypeRecordKind::OneMethod) {} + OneMethodRecord() = default; explicit OneMethodRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} OneMethodRecord(TypeIndex Type, MemberAttributes Attrs, int32_t VFTableOffset, StringRef Name) @@ -683,17 +705,20 @@ class OneMethodRecord : public TypeRecord { // LF_METHODLIST class MethodOverloadListRecord : public TypeRecord { public: + MethodOverloadListRecord() = default; explicit MethodOverloadListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} MethodOverloadListRecord(ArrayRef Methods) : TypeRecord(TypeRecordKind::MethodOverloadList), Methods(Methods) {} ArrayRef getMethods() const { return Methods; } + std::vector Methods; }; /// For method overload sets. LF_METHOD class OverloadedMethodRecord : public TypeRecord { public: + OverloadedMethodRecord() = default; explicit OverloadedMethodRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} OverloadedMethodRecord(uint16_t NumOverloads, TypeIndex MethodList, StringRef Name) @@ -703,6 +728,7 @@ class OverloadedMethodRecord : public TypeRecord { uint16_t getNumOverloads() const { return NumOverloads; } TypeIndex getMethodList() const { return MethodList; } StringRef getName() const { return Name; } + uint16_t NumOverloads; TypeIndex MethodList; StringRef Name; @@ -711,6 +737,7 @@ class OverloadedMethodRecord : public TypeRecord { // LF_MEMBER class DataMemberRecord : public TypeRecord { public: + DataMemberRecord() = default; explicit DataMemberRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} DataMemberRecord(MemberAttributes Attrs, TypeIndex Type, uint64_t Offset, StringRef Name) @@ -735,6 +762,7 @@ class DataMemberRecord : public TypeRecord { // LF_STMEMBER class StaticDataMemberRecord : public TypeRecord { public: + StaticDataMemberRecord() = default; explicit StaticDataMemberRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} StaticDataMemberRecord(MemberAttributes Attrs, TypeIndex Type, StringRef Name) : TypeRecord(TypeRecordKind::StaticDataMember), Attrs(Attrs), Type(Type), @@ -755,6 +783,7 @@ class StaticDataMemberRecord : public TypeRecord { // LF_ENUMERATE class EnumeratorRecord : public TypeRecord { public: + EnumeratorRecord() = default; explicit EnumeratorRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} EnumeratorRecord(MemberAttributes Attrs, APSInt Value, StringRef Name) : TypeRecord(TypeRecordKind::Enumerator), Attrs(Attrs), @@ -775,6 +804,7 @@ class EnumeratorRecord : public TypeRecord { // LF_VFUNCTAB class VFPtrRecord : public TypeRecord { public: + VFPtrRecord() = default; explicit VFPtrRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} VFPtrRecord(TypeIndex Type) : TypeRecord(TypeRecordKind::VFPtr), Type(Type) {} @@ -787,6 +817,7 @@ class VFPtrRecord : public TypeRecord { // LF_BCLASS, LF_BINTERFACE class BaseClassRecord : public TypeRecord { public: + BaseClassRecord() = default; explicit BaseClassRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} BaseClassRecord(MemberAttributes Attrs, TypeIndex Type, uint64_t Offset) : TypeRecord(TypeRecordKind::BaseClass), Attrs(Attrs), Type(Type), @@ -807,6 +838,7 @@ class BaseClassRecord : public TypeRecord { // LF_VBCLASS, LF_IVBCLASS class VirtualBaseClassRecord : public TypeRecord { public: + VirtualBaseClassRecord() = default; explicit VirtualBaseClassRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} VirtualBaseClassRecord(TypeRecordKind Kind, MemberAttributes Attrs, TypeIndex BaseType, TypeIndex VBPtrType, @@ -836,6 +868,7 @@ class VirtualBaseClassRecord : public TypeRecord { /// together. The first will end in an LF_INDEX record that points to the next. class ListContinuationRecord : public TypeRecord { public: + ListContinuationRecord() = default; explicit ListContinuationRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ListContinuationRecord(TypeIndex ContinuationIndex) : TypeRecord(TypeRecordKind::ListContinuation), @@ -847,7 +880,6 @@ class ListContinuationRecord : public TypeRecord { }; } // end namespace codeview - } // end namespace llvm #endif // LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h index 924ca0470fad4..cbe8d6066bb9f 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h @@ -25,6 +25,7 @@ class TypeRecordMapping : public TypeVisitorCallbacks { explicit TypeRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {} explicit TypeRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {} + using TypeVisitorCallbacks::visitTypeBegin; Error visitTypeBegin(CVType &Record) override; Error visitTypeEnd(CVType &Record) override; @@ -37,7 +38,7 @@ class TypeRecordMapping : public TypeVisitorCallbacks { Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: Optional TypeKind; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeSerializer.h index 1f4873c4f9693..0e734a8170bdf 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeSerializer.h @@ -10,22 +10,29 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H #define LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamWriter.h" - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" +#include +#include +#include +#include namespace llvm { - namespace codeview { +class TypeHasher; + class TypeSerializer : public TypeVisitorCallbacks { struct SubRecord { SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {} @@ -45,14 +52,13 @@ class TypeSerializer : public TypeVisitorCallbacks { } }; - typedef SmallVector, 2> RecordList; + using MutableRecordList = SmallVector, 2>; static constexpr uint8_t ContinuationLength = 8; BumpPtrAllocator &RecordStorage; RecordSegment CurrentSegment; - RecordList FieldListSegments; + MutableRecordList FieldListSegments; - TypeIndex LastTypeIndex; Optional TypeKind; Optional MemberKind; std::vector RecordBuffer; @@ -60,28 +66,40 @@ class TypeSerializer : public TypeVisitorCallbacks { BinaryStreamWriter Writer; TypeRecordMapping Mapping; - RecordList SeenRecords; - StringMap HashedRecords; + /// Private type record hashing implementation details are handled here. + std::unique_ptr Hasher; + + /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). + SmallVector, 2> SeenRecords; + + /// Temporary storage that we use to copy a record's data while re-writing + /// its type indices. + SmallVector RemapStorage; + + TypeIndex nextTypeIndex() const; bool isInFieldList() const; - TypeIndex calcNextTypeIndex() const; - TypeIndex incrementTypeIndex(); MutableArrayRef getCurrentSubRecordData(); MutableArrayRef getCurrentRecordData(); Error writeRecordPrefix(TypeLeafKind Kind); - TypeIndex insertRecordBytesPrivate(MutableArrayRef Record); Expected> addPadding(MutableArrayRef Record); public: - explicit TypeSerializer(BumpPtrAllocator &Storage); + explicit TypeSerializer(BumpPtrAllocator &Storage, bool Hash = true); + ~TypeSerializer() override; - ArrayRef> records() const; - TypeIndex getLastTypeIndex() const; - TypeIndex insertRecordBytes(MutableArrayRef Record); + void reset(); + + BumpPtrAllocator &getAllocator() { return RecordStorage; } + + ArrayRef> records() const; + TypeIndex insertRecordBytes(ArrayRef &Record); + TypeIndex insertRecord(const RemappedType &Record); Expected visitTypeEndGetIndex(CVType &Record); + using TypeVisitorCallbacks::visitTypeBegin; Error visitTypeBegin(CVType &Record) override; Error visitTypeEnd(CVType &Record) override; Error visitMemberBegin(CVMemberRecord &Record) override; @@ -97,7 +115,7 @@ class TypeSerializer : public TypeVisitorCallbacks { return visitKnownMemberImpl(CVR, Record); \ } #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template @@ -134,7 +152,8 @@ class TypeSerializer : public TypeVisitorCallbacks { return Error::success(); } }; -} -} -#endif +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index 2246f197e7843..d78fab47db668 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -12,18 +12,76 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Error.h" namespace llvm { namespace codeview { -class TypeServerHandler; +class TypeIndex; +class TypeTableBuilder; -/// Merges one type stream into another. Returns true on success. -Error mergeTypeStreams(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, - TypeServerHandler *Handler, const CVTypeArray &Types); +/// \brief Merge one set of type records into another. This method assumes +/// that all records are type records, and there are no Id records present. +/// +/// \param Dest The table to store the re-written type records into. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// type stream, that contains the index of the corresponding type record +/// in the destination stream. +/// +/// \param Types The collection of types to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeTypeRecords(TypeTableBuilder &Dest, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Types); + +/// \brief Merge one set of id records into another. This method assumes +/// that all records are id records, and there are no Type records present. +/// However, since Id records can refer back to Type records, this method +/// assumes that the referenced type records have also been merged into +/// another type stream (for example using the above method), and accepts +/// the mapping from source to dest for that stream so that it can re-write +/// the type record mappings accordingly. +/// +/// \param Dest The table to store the re-written id records into. +/// +/// \param Types The mapping to use for the type records that these id +/// records refer to. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// id stream, that contains the index of the corresponding id record +/// in the destination stream. +/// +/// \param Ids The collection of id records to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids); + +/// \brief Merge a unified set of type and id records, splitting them into +/// separate output streams. +/// +/// \param DestIds The table to store the re-written id records into. +/// +/// \param DestTypes the table to store the re-written type records into. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// id stream, that contains the index of the corresponding id record +/// in the destination stream. +/// +/// \param IdsAndTypes The collection of id records to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeTypeAndIdRecords(TypeTableBuilder &DestIds, + TypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, + const CVTypeArray &IdsAndTypes); } // end namespace codeview } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h index 102bee4b0801e..1069dcd453349 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h @@ -13,8 +13,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeSerializer.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include @@ -37,8 +37,9 @@ class TypeTableBuilder { TypeSerializer Serializer; public: - explicit TypeTableBuilder(BumpPtrAllocator &Allocator) - : Allocator(Allocator), Serializer(Allocator) {} + explicit TypeTableBuilder(BumpPtrAllocator &Allocator, + bool WriteUnique = true) + : Allocator(Allocator), Serializer(Allocator, WriteUnique) {} TypeTableBuilder(const TypeTableBuilder &) = delete; TypeTableBuilder &operator=(const TypeTableBuilder &) = delete; @@ -64,10 +65,14 @@ class TypeTableBuilder { return *ExpectedIndex; } - TypeIndex writeSerializedRecord(MutableArrayRef Record) { + TypeIndex writeSerializedRecord(ArrayRef Record) { return Serializer.insertRecordBytes(Record); } + TypeIndex writeSerializedRecord(const RemappedType &Record) { + return Serializer.insertRecord(Record); + } + template void ForEachRecord(TFunc Func) { uint32_t Index = TypeIndex::FirstNonSimpleIndex; @@ -77,23 +82,24 @@ class TypeTableBuilder { } } - ArrayRef> records() const { - return Serializer.records(); - } + ArrayRef> records() const { return Serializer.records(); } }; class FieldListRecordBuilder { TypeTableBuilder &TypeTable; + BumpPtrAllocator Allocator; TypeSerializer TempSerializer; CVType Type; public: explicit FieldListRecordBuilder(TypeTableBuilder &TypeTable) - : TypeTable(TypeTable), TempSerializer(TypeTable.getAllocator()) { + : TypeTable(TypeTable), TempSerializer(Allocator, false) { Type.Type = TypeLeafKind::LF_FIELDLIST; } void begin() { + TempSerializer.reset(); + if (auto EC = TempSerializer.visitTypeBegin(Type)) consumeError(std::move(EC)); } @@ -109,23 +115,19 @@ class FieldListRecordBuilder { consumeError(std::move(EC)); } - TypeIndex end() { + TypeIndex end(bool Write) { + TypeIndex Index; if (auto EC = TempSerializer.visitTypeEnd(Type)) { consumeError(std::move(EC)); return TypeIndex(); } - TypeIndex Index; - for (auto Record : TempSerializer.records()) { - Index = TypeTable.writeSerializedRecord(Record); + if (Write) { + for (auto Record : TempSerializer.records()) + Index = TypeTable.writeSerializedRecord(Record); } - return Index; - } - /// Stop building the record. - void reset() { - if (auto EC = TempSerializer.visitTypeEnd(Type)) - consumeError(std::move(EC)); + return Index; } }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeTableCollection.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeTableCollection.h new file mode 100644 index 0000000000000..80326a0ffd39d --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeTableCollection.h @@ -0,0 +1,43 @@ +//===- TypeTableCollection.h ---------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPETABLECOLLECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPETABLECOLLECTION_H + +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/Support/StringSaver.h" + +#include + +namespace llvm { +namespace codeview { + +class TypeTableCollection : public TypeCollection { +public: + explicit TypeTableCollection(ArrayRef> Records); + + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + +private: + BumpPtrAllocator Allocator; + StringSaver NameStorage; + std::vector Names; + ArrayRef> Records; +}; +} +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h index ed48df33249f4..126fb8abb0da8 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h @@ -94,7 +94,7 @@ class TypeVisitorCallbackPipeline : public TypeVisitorCallbacks { } #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template Error visitKnownRecordImpl(CVType &CVR, T &Record) { diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h index 2950c7d27cb68..d7a473306bc20 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h @@ -17,8 +17,6 @@ namespace llvm { namespace codeview { class TypeVisitorCallbacks { - friend class CVTypeVisitor; - public: virtual ~TypeVisitorCallbacks() = default; @@ -60,7 +58,11 @@ class TypeVisitorCallbacks { #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +#undef TYPE_RECORD +#undef TYPE_RECORD_ALIAS +#undef MEMBER_RECORD +#undef MEMBER_RECORD_ALIAS }; } // end namespace codeview diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DIContext.h b/interpreter/llvm/src/include/llvm/DebugInfo/DIContext.h index d51408122fc9b..936813dc6abc0 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DIContext.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DIContext.h @@ -57,7 +57,7 @@ struct DILineInfo { } }; -typedef SmallVector, 16> DILineInfoTable; +using DILineInfoTable = SmallVector, 16>; /// DIInliningInfo - a format-neutral container for inlined code description. class DIInliningInfo { @@ -102,7 +102,7 @@ enum class DINameKind { None, ShortName, LinkageName }; /// should be filled with data. struct DILineInfoSpecifier { enum class FileLineInfoKind { None, Default, AbsoluteFilePath }; - typedef DINameKind FunctionNameKind; + using FunctionNameKind = DINameKind; FileLineInfoKind FLIKind; FunctionNameKind FNKind; @@ -135,6 +135,7 @@ enum DIDumpType { DIDT_GnuPubnames, DIDT_GnuPubtypes, DIDT_Str, + DIDT_StrOffsets, DIDT_StrDwo, DIDT_StrOffsetsDwo, DIDT_AppleNames, @@ -146,6 +147,15 @@ enum DIDumpType { DIDT_TUIndex, }; +/// Container for dump options that control which debug information will be +/// dumped. +struct DIDumpOptions { + DIDumpType DumpType = DIDT_All; + bool DumpEH = false; + bool SummarizeTypes = false; + bool Brief = false; +}; + class DIContext { public: enum DIContextKind { @@ -158,13 +168,13 @@ class DIContext { DIContextKind getKind() const { return Kind; } - virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All, - bool DumpEH = false, bool SummarizeTypes = false) = 0; + virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0; virtual bool verify(raw_ostream &OS, DIDumpType DumpType = DIDT_All) { // No verifier? Just say things went well. return true; } + virtual DILineInfo getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address, @@ -194,7 +204,9 @@ class LoadedObjectInfo { /// need to be consistent with the addresses used to query the DIContext and /// the output of this function should be deterministic, i.e. repeated calls with /// the same Sec should give the same address. - virtual uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const = 0; + virtual uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const { + return 0; + } /// If conveniently available, return the content of the given Section. /// @@ -211,12 +223,28 @@ class LoadedObjectInfo { return false; } + // FIXME: This is untested and unused anywhere in the LLVM project, it's + // used/needed by Julia (an external project). It should have some coverage + // (at least tests, but ideally example functionality). /// Obtain a copy of this LoadedObjectInfo. - /// - /// The caller is responsible for deallocation once the copy is no longer required. virtual std::unique_ptr clone() const = 0; }; +template +struct LoadedObjectInfoHelper : Base { +protected: + LoadedObjectInfoHelper(const LoadedObjectInfoHelper &) = default; + LoadedObjectInfoHelper() = default; + +public: + template + LoadedObjectInfoHelper(Ts &&... Args) : Base(std::forward(Args)...) {} + + std::unique_ptr clone() const override { + return llvm::make_unique(static_cast(*this)); + } +}; + } // end namespace llvm #endif // LLVM_DEBUGINFO_DICONTEXT_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index 7324f6e3eb387..190a69b757390 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -10,11 +10,11 @@ #ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include #include @@ -33,6 +33,7 @@ class DWARFAbbreviationDeclaration { dwarf::Attribute Attr; dwarf::Form Form; + /// The following field is used for ByteSize for non-implicit_const /// attributes and as value for implicit_const ones, indicated by /// Form == DW_FORM_implicit_const. @@ -58,7 +59,7 @@ class DWARFAbbreviationDeclaration { /// the ByteSize member. Optional getByteSize(const DWARFUnit &U) const; }; - typedef SmallVector AttributeSpecVector; + using AttributeSpecVector = SmallVector; DWARFAbbreviationDeclaration(); @@ -67,8 +68,8 @@ class DWARFAbbreviationDeclaration { dwarf::Tag getTag() const { return Tag; } bool hasChildren() const { return HasChildren; } - typedef iterator_range - attr_iterator_range; + using attr_iterator_range = + iterator_range; attr_iterator_range attributes() const { return attr_iterator_range(AttributeSpecs.begin(), AttributeSpecs.end()); diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index f95a013d75523..eb6d0f541c1ed 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -11,9 +11,9 @@ #define LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include @@ -32,24 +32,40 @@ class DWARFAcceleratorTable { }; struct HeaderData { - typedef uint16_t AtomType; - typedef dwarf::Form Form; + using AtomType = uint16_t; + using Form = dwarf::Form; + uint32_t DIEOffsetBase; SmallVector, 3> Atoms; }; struct Header Hdr; struct HeaderData HdrData; - DataExtractor AccelSection; + DWARFDataExtractor AccelSection; DataExtractor StringSection; - const RelocAddrMap& Relocs; public: - DWARFAcceleratorTable(DataExtractor AccelSection, DataExtractor StringSection, - const RelocAddrMap &Relocs) - : AccelSection(AccelSection), StringSection(StringSection), Relocs(Relocs) {} + DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection, + DataExtractor StringSection) + : AccelSection(AccelSection), StringSection(StringSection) {} bool extract(); + uint32_t getNumBuckets(); + uint32_t getNumHashes(); + uint32_t getSizeHdr(); + uint32_t getHeaderDataLength(); + ArrayRef> getAtomsDesc(); + bool validateForms(); + + /// Return information related to the DWARF DIE we're looking for when + /// performing a lookup by name. + /// + /// \param HashDataOffset an offset into the hash data table + /// \returns DIEOffset the offset into the .debug_info section for the DIE + /// related to the input hash data offset. Currently this function returns + /// only the DIEOffset but it can be modified to return more data regarding + /// the DIE + uint32_t readAtoms(uint32_t &HashDataOffset); void dump(raw_ostream &OS) const; }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAttribute.h index 5919aaddea409..f0672bb0ca758 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAttribute.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFAttribute.h @@ -10,8 +10,8 @@ #ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H #define LLVM_DEBUGINFO_DWARFATTRIBUTE_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/Support/Dwarf.h" #include namespace llvm { @@ -31,10 +31,10 @@ struct DWARFAttribute { dwarf::Attribute Attr; /// The form and value for this attribute. DWARFFormValue Value; - + DWARFAttribute(uint32_t O, dwarf::Attribute A = dwarf::Attribute(0), dwarf::Form F = dwarf::Form(0)) : Attr(A), Value(F) {} - + bool isValid() const { return Offset != 0 && Attr != dwarf::Attribute(0); } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h index a46d46a5bff31..a18adf87bf8e1 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h @@ -19,8 +19,9 @@ class DWARFCompileUnit : public DWARFUnit { public: DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, - bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, const DWARFSection &LS, bool LE, + bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, UnitSection, Entry) {} @@ -28,7 +29,7 @@ class DWARFCompileUnit : public DWARFUnit { // VTable anchor. ~DWARFCompileUnit() override; - void dump(raw_ostream &OS); + void dump(raw_ostream &OS, DIDumpOptions DumpOpts); static const DWARFSectionKind Section = DW_SECT_INFO; }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFContext.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFContext.h index 3fae8b4414394..ee2e805050c01 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -10,13 +10,12 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H #define LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" @@ -25,36 +24,27 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Host.h" #include #include #include #include -#include namespace llvm { +class DataExtractor; class MemoryBuffer; class raw_ostream; -// In place of applying the relocations to the data we've read from disk we use -// a separate mapping table to the side and checking that at locations in the -// dwarf where we expect relocated values. This adds a bit of complexity to the -// dwarf parsing/extraction at the benefit of not allocating memory for the -// entire size of the debug info sections. -typedef DenseMap> RelocAddrMap; - -/// Reads a value from data extractor and applies a relocation to the result if -/// one exists for the given offset. -uint64_t getRelocatedValue(const DataExtractor &Data, uint32_t Size, - uint32_t *Off, const RelocAddrMap *Relocs); - /// DWARFContext /// This data structure is the top level entity that deals with dwarf debug /// information parsing. The actual data is supplied through pure virtual @@ -78,6 +68,17 @@ class DWARFContext : public DIContext { std::unique_ptr AbbrevDWO; std::unique_ptr LocDWO; + /// The maximum DWARF version of all units. + unsigned MaxVersion = 0; + + struct DWOFile { + object::OwningBinary File; + std::unique_ptr Context; + }; + StringMap> DWOFiles; + std::weak_ptr DWP; + bool CheckedForDWP = false; + /// Read compile units from the debug_info section (if necessary) /// and store them in CUs. void parseCompileUnits(); @@ -103,14 +104,13 @@ class DWARFContext : public DIContext { return DICtx->getKind() == CK_DWARF; } - void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All, - bool DumpEH = false, bool SummarizeTypes = false) override; + void dump(raw_ostream &OS, DIDumpOptions DumpOpts) override; bool verify(raw_ostream &OS, DIDumpType DumpType = DIDT_All) override; - typedef DWARFUnitSection::iterator_range cu_iterator_range; - typedef DWARFUnitSection::iterator_range tu_iterator_range; - typedef iterator_range tu_section_iterator_range; + using cu_iterator_range = DWARFUnitSection::iterator_range; + using tu_iterator_range = DWARFUnitSection::iterator_range; + using tu_section_iterator_range = iterator_range; /// Get compile units in this context. cu_iterator_range compile_units() { @@ -172,9 +172,18 @@ class DWARFContext : public DIContext { return DWOCUs[index].get(); } + DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash); + /// Get a DIE given an exact offset. DWARFDie getDIEForOffset(uint32_t Offset); + unsigned getMaxVersion() const { return MaxVersion; } + + void setMaxVersionIfGreater(unsigned Version) { + if (Version > MaxVersion) + MaxVersion = Version; + } + const DWARFUnitIndex &getCUIndex(); DWARFGdbIndex &getGdbIndex(); const DWARFUnitIndex &getTUIndex(); @@ -213,12 +222,11 @@ class DWARFContext : public DIContext { DIInliningInfo getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + virtual StringRef getFileName() const = 0; virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const = 0; virtual const DWARFSection &getInfoSection() = 0; - typedef MapVector> TypeSectionMap; - virtual const TypeSectionMap &getTypesSections() = 0; + virtual void forEachTypesSections(function_ref F) = 0; virtual StringRef getAbbrevSection() = 0; virtual const DWARFSection &getLocSection() = 0; virtual StringRef getARangeSection() = 0; @@ -233,16 +241,22 @@ class DWARFContext : public DIContext { virtual StringRef getGnuPubNamesSection() = 0; virtual StringRef getGnuPubTypesSection() = 0; + /// DWARF v5 + /// @{ + virtual const DWARFSection &getStringOffsetSection() = 0; + /// @} + // Sections for DWARF5 split dwarf proposal. virtual const DWARFSection &getInfoDWOSection() = 0; - virtual const TypeSectionMap &getTypesDWOSections() = 0; + virtual void + forEachTypesDWOSections(function_ref F) = 0; virtual StringRef getAbbrevDWOSection() = 0; virtual const DWARFSection &getLineDWOSection() = 0; virtual const DWARFSection &getLocDWOSection() = 0; virtual StringRef getStringDWOSection() = 0; - virtual StringRef getStringOffsetDWOSection() = 0; + virtual const DWARFSection &getStringOffsetDWOSection() = 0; virtual const DWARFSection &getRangeDWOSection() = 0; - virtual StringRef getAddrSection() = 0; + virtual const DWARFSection &getAddrSection() = 0; virtual const DWARFSection& getAppleNamesSection() = 0; virtual const DWARFSection& getAppleTypesSection() = 0; virtual const DWARFSection& getAppleNamespacesSection() = 0; @@ -255,6 +269,8 @@ class DWARFContext : public DIContext { return version == 2 || version == 3 || version == 4 || version == 5; } + std::shared_ptr getDWOContext(StringRef AbsolutePath); + private: /// Return the compile unit that includes an offset (relative to .debug_info). DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); @@ -264,12 +280,21 @@ class DWARFContext : public DIContext { DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address); }; +/// Used as a return value for a error callback passed to DWARF context. +/// Callback should return Halt if client application wants to stop +/// object parsing, or should return Continue otherwise. +enum class ErrorPolicy { Halt, Continue }; + /// DWARFContextInMemory is the simplest possible implementation of a /// DWARFContext. It assumes all content is available in memory and stores /// pointers to it. class DWARFContextInMemory : public DWARFContext { virtual void anchor(); + using TypeSectionMap = MapVector>; + + StringRef FileName; bool IsLittleEndian; uint8_t AddressSize; DWARFSection InfoSection; @@ -288,6 +313,11 @@ class DWARFContextInMemory : public DWARFContext { StringRef GnuPubNamesSection; StringRef GnuPubTypesSection; + /// DWARF v5 + /// @{ + DWARFSection StringOffsetSection; + /// @} + // Sections for DWARF5 split dwarf proposal. DWARFSection InfoDWOSection; TypeSectionMap TypesDWOSections; @@ -295,9 +325,9 @@ class DWARFContextInMemory : public DWARFContext { DWARFSection LineDWOSection; DWARFSection LocDWOSection; StringRef StringDWOSection; - StringRef StringOffsetDWOSection; + DWARFSection StringOffsetDWOSection; DWARFSection RangeDWOSection; - StringRef AddrSection; + DWARFSection AddrSection; DWARFSection AppleNamesSection; DWARFSection AppleTypesSection; DWARFSection AppleNamespacesSection; @@ -308,25 +338,35 @@ class DWARFContextInMemory : public DWARFContext { SmallVector, 4> UncompressedSections; - StringRef *MapSectionToMember(StringRef Name); + DWARFSection *mapNameToDWARFSection(StringRef Name); + StringRef *mapSectionToMember(StringRef Name); /// If Sec is compressed section, decompresses and updates its contents /// provided by Data. Otherwise leaves it unchanged. Error maybeDecompress(const object::SectionRef &Sec, StringRef Name, StringRef &Data); + /// Function used to handle default error reporting policy. Prints a error + /// message and returns Continue, so DWARF context ignores the error. + static ErrorPolicy defaultErrorHandler(Error E); + public: - DWARFContextInMemory(const object::ObjectFile &Obj, - const LoadedObjectInfo *L = nullptr); + DWARFContextInMemory( + const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr, + function_ref HandleError = defaultErrorHandler); DWARFContextInMemory(const StringMap> &Sections, uint8_t AddrSize, bool isLittleEndian = sys::IsLittleEndianHost); + StringRef getFileName() const override { return FileName; } bool isLittleEndian() const override { return IsLittleEndian; } uint8_t getAddressSize() const override { return AddressSize; } const DWARFSection &getInfoSection() override { return InfoSection; } - const TypeSectionMap &getTypesSections() override { return TypesSections; } + void forEachTypesSections(function_ref F) override { + for (auto &P : TypesSections) + F(P.second); + } StringRef getAbbrevSection() override { return AbbrevSection; } const DWARFSection &getLocSection() override { return LocSection; } StringRef getARangeSection() override { return ARangeSection; } @@ -345,11 +385,17 @@ class DWARFContextInMemory : public DWARFContext { const DWARFSection& getAppleNamespacesSection() override { return AppleNamespacesSection; } const DWARFSection& getAppleObjCSection() override { return AppleObjCSection; } + // DWARF v5 + const DWARFSection &getStringOffsetSection() override { + return StringOffsetSection; + } + // Sections for DWARF5 split dwarf proposal. const DWARFSection &getInfoDWOSection() override { return InfoDWOSection; } - const TypeSectionMap &getTypesDWOSections() override { - return TypesDWOSections; + void forEachTypesDWOSections(function_ref F) override { + for (auto &P : TypesDWOSections) + F(P.second); } StringRef getAbbrevDWOSection() override { return AbbrevDWOSection; } @@ -357,15 +403,13 @@ class DWARFContextInMemory : public DWARFContext { const DWARFSection &getLocDWOSection() override { return LocDWOSection; } StringRef getStringDWOSection() override { return StringDWOSection; } - StringRef getStringOffsetDWOSection() override { + const DWARFSection &getStringOffsetDWOSection() override { return StringOffsetDWOSection; } const DWARFSection &getRangeDWOSection() override { return RangeDWOSection; } - StringRef getAddrSection() override { - return AddrSection; - } + const DWARFSection &getAddrSection() override { return AddrSection; } StringRef getCUIndexSection() override { return CUIndexSection; } StringRef getGdbIndexSection() override { return GdbIndexSection; } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h new file mode 100644 index 0000000000000..ef4360f666218 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h @@ -0,0 +1,48 @@ +//===- DWARFDataExtractor.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDATAEXTRACTOR_H +#define LLVM_DEBUGINFO_DWARFDATAEXTRACTOR_H + +#include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/Support/DataExtractor.h" + +namespace llvm { + +/// A DataExtractor (typically for an in-memory copy of an object-file section) +/// plus a relocation map for that section, if there is one. +class DWARFDataExtractor : public DataExtractor { + const RelocAddrMap *RelocMap = nullptr; +public: + /// Constructor for the normal case of extracting data from a DWARF section. + /// The DWARFSection's lifetime must be at least as long as the extractor's. + DWARFDataExtractor(const DWARFSection &Section, bool IsLittleEndian, + uint8_t AddressSize) + : DataExtractor(Section.Data, IsLittleEndian, AddressSize), + RelocMap(&Section.Relocs) {} + + /// Constructor for cases when there are no relocations. + DWARFDataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize) + : DataExtractor(Data, IsLittleEndian, AddressSize) {} + + /// Extracts a value and applies a relocation to the result if + /// one exists for the given offset. + uint64_t getRelocatedValue(uint32_t Size, uint32_t *Off, + uint64_t *SectionIndex = nullptr) const; + + /// Extracts an address-sized value and applies a relocation to the result if + /// one exists for the given offset. + uint64_t getRelocatedAddress(uint32_t *Off, uint64_t *SecIx = nullptr) const { + return getRelocatedValue(getAddressSize(), Off, SecIx); + } +}; + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_DWARFDATAEXTRACTOR_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h index 9f86fe5083896..65571598d7432 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h @@ -18,6 +18,8 @@ namespace llvm { +class raw_ostream; + class DWARFAbbreviationDeclarationSet { uint32_t Offset; /// Code of the first abbreviation, if all abbreviations in the set have @@ -25,8 +27,8 @@ class DWARFAbbreviationDeclarationSet { uint32_t FirstAbbrCode; std::vector Decls; - typedef std::vector::const_iterator - const_iterator; + using const_iterator = + std::vector::const_iterator; public: DWARFAbbreviationDeclarationSet(); @@ -51,8 +53,8 @@ class DWARFAbbreviationDeclarationSet { }; class DWARFDebugAbbrev { - typedef std::map - DWARFAbbreviationDeclarationSetMap; + using DWARFAbbreviationDeclarationSetMap = + std::map; DWARFAbbreviationDeclarationSetMap AbbrDeclSets; mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h index 40eb7e9a88364..dfbbb95076e81 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h @@ -22,19 +22,19 @@ class raw_ostream; class DWARFDebugArangeSet { public: struct Header { - // The total length of the entries for that set, not including the length - // field itself. + /// The total length of the entries for that set, not including the length + /// field itself. uint32_t Length; - // The offset from the beginning of the .debug_info section of the - // compilation unit entry referenced by the table. + /// The offset from the beginning of the .debug_info section of the + /// compilation unit entry referenced by the table. uint32_t CuOffset; - // The DWARF version number. + /// The DWARF version number. uint16_t Version; - // The size in bytes of an address on the target architecture. For segmented - // addressing, this is the size of the offset portion of the address. + /// The size in bytes of an address on the target architecture. For segmented + /// addressing, this is the size of the offset portion of the address. uint8_t AddrSize; - // The size in bytes of a segment descriptor on the target architecture. - // If the target system uses a flat address space, this value is 0. + /// The size in bytes of a segment descriptor on the target architecture. + /// If the target system uses a flat address space, this value is 0. uint8_t SegSize; }; @@ -46,8 +46,8 @@ class DWARFDebugArangeSet { }; private: - typedef std::vector DescriptorColl; - typedef iterator_range desc_iterator_range; + using DescriptorColl = std::vector; + using desc_iterator_range = iterator_range; uint32_t Offset; Header HeaderData; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h index c06771d6afb43..ea71a50f3270d 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h @@ -28,7 +28,7 @@ class DWARFDebugAranges { void clear(); void extract(DataExtractor DebugArangesData); - // Call appendRange multiple times and then call construct. + /// Call appendRange multiple times and then call construct. void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); void construct(); @@ -58,9 +58,9 @@ class DWARFDebugAranges { return LowPC < other.LowPC; } - uint64_t LowPC; // Start of address range. - uint32_t Length; // End of address range (not including this address). - uint32_t CUOffset; // Offset of the compile unit or die. + uint64_t LowPC; /// Start of address range. + uint32_t Length; /// End of address range (not including this address). + uint32_t CUOffset; /// Offset of the compile unit or die. }; struct RangeEndpoint { @@ -76,8 +76,8 @@ class DWARFDebugAranges { } }; - typedef std::vector RangeColl; - typedef RangeColl::const_iterator RangeCollIterator; + using RangeColl = std::vector; + using RangeCollIterator = RangeColl::const_iterator; std::vector Endpoints; RangeColl Aranges; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index fc2423a2708b8..88c8f57bc33ca 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -10,8 +10,9 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include namespace llvm { @@ -40,8 +41,7 @@ class DWARFDebugInfoEntry { /// High performance extraction should use this call. bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, - const DataExtractor &DebugInfoData, - uint32_t UEndOffset, + const DWARFDataExtractor &DebugInfoData, uint32_t UEndOffset, uint32_t Depth); uint32_t getOffset() const { return Offset; } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 39a7ef71de97d..0c8f98aa62f9e 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -10,9 +10,11 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H #define LLVM_DEBUGINFO_DWARFDEBUGLINE_H +#include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/DataExtractor.h" #include #include #include @@ -24,9 +26,6 @@ class raw_ostream; class DWARFDebugLine { public: - DWARFDebugLine(const RelocAddrMap *LineInfoRelocMap) - : RelocMap(LineInfoRelocMap) {} - struct FileNameEntry { FileNameEntry() = default; @@ -42,10 +41,10 @@ class DWARFDebugLine { /// The size in bytes of the statement information for this compilation unit /// (not including the total_length field itself). uint64_t TotalLength; - /// Version identifier for the statement information format. - uint16_t Version; - /// In v5, size in bytes of an address (or segment offset). - uint8_t AddressSize; + /// Version, address size (starting in v5), and DWARF32/64 format; these + /// parameters affect interpretation of forms (used in the directory and + /// file tables starting with v5). + DWARFFormParams FormParams; /// In v5, size in bytes of a segment selector. uint8_t SegSelectorSize; /// The number of bytes following the prologue_length field to the beginning @@ -70,15 +69,18 @@ class DWARFDebugLine { std::vector IncludeDirectories; std::vector FileNames; - bool IsDWARF64; + const DWARFFormParams getFormParams() const { return FormParams; } + uint16_t getVersion() const { return FormParams.Version; } + uint8_t getAddressSize() const { return FormParams.AddrSize; } + bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; } - uint32_t sizeofTotalLength() const { return IsDWARF64 ? 12 : 4; } + uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; } - uint32_t sizeofPrologueLength() const { return IsDWARF64 ? 8 : 4; } + uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; } /// Length of the prologue in bytes. uint32_t getLength() const { - return PrologueLength + sizeofTotalLength() + sizeof(Version) + + return PrologueLength + sizeofTotalLength() + sizeof(getVersion()) + sizeofPrologueLength(); } @@ -93,7 +95,7 @@ class DWARFDebugLine { void clear(); void dump(raw_ostream &OS) const; - bool parse(DataExtractor DebugLineData, uint32_t *OffsetPtr); + bool parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr); }; /// Standard .debug_line state machine structure. @@ -104,7 +106,9 @@ class DWARFDebugLine { void postAppend(); void reset(bool DefaultIsStmt); void dump(raw_ostream &OS) const; + static void dumpTableHeader(raw_ostream &OS); + static bool orderByAddress(const Row &LHS, const Row &RHS) { return LHS.Address < RHS.Address; } @@ -213,14 +217,14 @@ class DWARFDebugLine { void clear(); /// Parse prologue and all rows. - bool parse(DataExtractor DebugLineData, const RelocAddrMap *RMap, - uint32_t *OffsetPtr); + bool parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr); + + using RowVector = std::vector; + using RowIter = RowVector::const_iterator; + using SequenceVector = std::vector; + using SequenceIter = SequenceVector::const_iterator; struct Prologue Prologue; - typedef std::vector RowVector; - typedef RowVector::const_iterator RowIter; - typedef std::vector SequenceVector; - typedef SequenceVector::const_iterator SequenceIter; RowVector Rows; SequenceVector Sequences; @@ -230,7 +234,7 @@ class DWARFDebugLine { }; const LineTable *getLineTable(uint32_t Offset) const; - const LineTable *getOrParseLineTable(DataExtractor DebugLineData, + const LineTable *getOrParseLineTable(const DWARFDataExtractor &DebugLineData, uint32_t Offset); private: @@ -244,16 +248,15 @@ class DWARFDebugLine { struct LineTable *LineTable; /// The row number that starts at zero for the prologue, and increases for /// each row added to the matrix. - unsigned RowNumber; + unsigned RowNumber = 0; struct Row Row; struct Sequence Sequence; }; - typedef std::map LineTableMapTy; - typedef LineTableMapTy::iterator LineTableIter; - typedef LineTableMapTy::const_iterator LineTableConstIter; + using LineTableMapTy = std::map; + using LineTableIter = LineTableMapTy::iterator; + using LineTableConstIter = LineTableMapTy::const_iterator; - const RelocAddrMap *RelocMap; LineTableMapTy LineTableMap; }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index 6d4cd8d1b5a3c..c2b8d0cd73d82 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -11,8 +11,8 @@ #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLOC_H #include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/DataExtractor.h" #include namespace llvm { @@ -39,24 +39,19 @@ class DWARFDebugLoc { SmallVector Entries; }; - typedef SmallVector LocationLists; + using LocationLists = SmallVector; /// A list of all the variables in the debug_loc section, each one describing /// the locations in which the variable is stored. LocationLists Locations; - /// A map used to resolve binary relocations. - const RelocAddrMap &RelocMap; - public: - DWARFDebugLoc(const RelocAddrMap &LocRelocMap) : RelocMap(LocRelocMap) {} - /// Print the location lists found within the debug_loc section. void dump(raw_ostream &OS) const; /// Parse the debug_loc section accessible via the 'data' parameter using the - /// specified address size to interpret the address ranges. - void parse(DataExtractor data, unsigned AddressSize); + /// address size also given in 'data' to interpret the address ranges. + void parse(const DWARFDataExtractor &data); }; class DWARFDebugLocDWO { @@ -71,7 +66,7 @@ class DWARFDebugLocDWO { SmallVector Entries; }; - typedef SmallVector LocationLists; + using LocationLists = SmallVector; LocationLists Locations; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h index 85d98b45afcd5..135c50761e36a 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -40,7 +40,7 @@ class DWARFDebugMacro { }; }; - typedef SmallVector MacroList; + using MacroList = SmallVector; /// A list of all the macro entries in the debug_macinfo section. MacroList Macros; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h index 9d36bb7ad211c..a309fd104f938 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h @@ -12,7 +12,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include #include diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index 9172df5bfac6f..bcba14b1630d1 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -10,49 +10,55 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H -#include "llvm/Support/DataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" - #include #include -#include #include namespace llvm { class raw_ostream; +struct DWARFAddressRange { + uint64_t LowPC; + uint64_t HighPC; + uint64_t SectionIndex; +}; + /// DWARFAddressRangesVector - represents a set of absolute address ranges. -typedef std::vector> DWARFAddressRangesVector; +using DWARFAddressRangesVector = std::vector; class DWARFDebugRangeList { public: struct RangeListEntry { - // A beginning address offset. This address offset has the size of an - // address and is relative to the applicable base address of the - // compilation unit referencing this range list. It marks the beginning - // of an address range. + /// A beginning address offset. This address offset has the size of an + /// address and is relative to the applicable base address of the + /// compilation unit referencing this range list. It marks the beginning + /// of an address range. uint64_t StartAddress; - // An ending address offset. This address offset again has the size of - // an address and is relative to the applicable base address of the - // compilation unit referencing this range list. It marks the first - // address past the end of the address range. The ending address must - // be greater than or equal to the beginning address. + /// An ending address offset. This address offset again has the size of + /// an address and is relative to the applicable base address of the + /// compilation unit referencing this range list. It marks the first + /// address past the end of the address range. The ending address must + /// be greater than or equal to the beginning address. uint64_t EndAddress; + /// A section index this range belongs to. + uint64_t SectionIndex; - // The end of any given range list is marked by an end of list entry, - // which consists of a 0 for the beginning address offset - // and a 0 for the ending address offset. + /// The end of any given range list is marked by an end of list entry, + /// which consists of a 0 for the beginning address offset + /// and a 0 for the ending address offset. bool isEndOfListEntry() const { return (StartAddress == 0) && (EndAddress == 0); } - // A base address selection entry consists of: - // 1. The value of the largest representable address offset - // (for example, 0xffffffff when the size of an address is 32 bits). - // 2. An address, which defines the appropriate base address for - // use in interpreting the beginning and ending address offsets of - // subsequent entries of the location list. + /// A base address selection entry consists of: + /// 1. The value of the largest representable address offset + /// (for example, 0xffffffff when the size of an address is 32 bits). + /// 2. An address, which defines the appropriate base address for + /// use in interpreting the beginning and ending address offsets of + /// subsequent entries of the location list. bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { assert(AddressSize == 4 || AddressSize == 8); if (AddressSize == 4) @@ -63,7 +69,7 @@ class DWARFDebugRangeList { }; private: - // Offset in .debug_ranges section. + /// Offset in .debug_ranges section. uint32_t Offset; uint8_t AddressSize; std::vector Entries; @@ -73,7 +79,7 @@ class DWARFDebugRangeList { void clear(); void dump(raw_ostream &OS) const; - bool extract(DataExtractor data, uint32_t *offset_ptr, const RelocAddrMap& Relocs); + bool extract(const DWARFDataExtractor &data, uint32_t *offset_ptr); const std::vector &getEntries() { return Entries; } /// getAbsoluteRanges - Returns absolute address ranges defined by this range diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDie.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDie.h index ee06125ea2786..b216491b615a2 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -11,23 +11,23 @@ #define LLVM_DEBUGINFO_DWARFDIE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFAttribute.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" -#include "llvm/Support/Dwarf.h" #include #include #include namespace llvm { - + class DWARFUnit; class raw_ostream; - + //===----------------------------------------------------------------------===// /// Utility class that carries the DWARF compile/type unit and the debug info /// entry in an object. @@ -47,7 +47,7 @@ class DWARFDie { public: DWARFDie() = default; DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry * D) : U(Unit), Die(D) {} - + bool isValid() const { return U && Die; } explicit operator bool() const { return isValid(); } const DWARFDebugInfoEntry *getDebugInfoEntry() const { return Die; } @@ -68,7 +68,7 @@ class DWARFDie { assert(isValid() && "must check validity prior to calling"); return Die->getOffset(); } - + dwarf::Tag getTag() const { auto AbbrevDecl = getAbbreviationDeclarationPtr(); if (AbbrevDecl) @@ -80,7 +80,7 @@ class DWARFDie { assert(isValid() && "must check validity prior to calling"); return Die->hasChildren(); } - + /// Returns true for a valid DIE that terminates a sibling chain. bool isNULL() const { return getAbbreviationDeclarationPtr() == nullptr; @@ -97,13 +97,13 @@ class DWARFDie { /// \returns a valid DWARFDie instance if this object has a parent or an /// invalid DWARFDie instance if it doesn't. DWARFDie getParent() const; - + /// Get the sibling of this DIE object. /// /// \returns a valid DWARFDie instance if this object has a sibling or an /// invalid DWARFDie instance if it doesn't. DWARFDie getSibling() const; - + /// Get the first child of this DIE object. /// /// \returns a valid DWARFDie instance if this object has children or an @@ -113,15 +113,16 @@ class DWARFDie { return DWARFDie(U, Die + 1); return DWARFDie(); } - + /// Dump the DIE and all of its attributes to the supplied stream. /// /// \param OS the stream to use for output. /// \param recurseDepth the depth to recurse to when dumping this DIE and its /// children. /// \param indent the number of characters to indent each line that is output. - void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0) const; - + void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0, + DIDumpOptions DumpOpts = DIDumpOptions()) const; + /// Extract the specified attribute from this DIE. /// /// Extract an attribute value from this DIE only. This call doesn't look @@ -132,7 +133,7 @@ class DWARFDie { /// \returns an optional DWARFFormValue that will have the form value if the /// attribute was successfully extracted. Optional find(dwarf::Attribute Attr) const; - + /// Extract the first value of any attribute in Attrs from this DIE. /// /// Extract the first attribute that matches from this DIE only. This call @@ -180,7 +181,7 @@ class DWARFDie { /// /// \returns anm optional absolute section offset value for the attribute. Optional getRangesBaseAttribute() const; - + /// Get the DW_AT_high_pc attribute value as an address. /// /// In DWARF version 4 and later the high PC can be encoded as an offset from @@ -195,8 +196,9 @@ class DWARFDie { /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. /// Returns true if both attributes are present. - bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const; - + bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, + uint64_t &SectionIndex) const; + /// Get the address ranges for this DIE. /// /// Get the hi/low PC range if both attributes are available or exrtracts the @@ -208,7 +210,7 @@ class DWARFDie { /// \returns a address range vector that might be empty if no address range /// information is available. DWARFAddressRangesVector getAddressRanges() const; - + /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any /// of its children. /// @@ -218,19 +220,19 @@ class DWARFDie { /// /// \param Ranges the addres range vector to fill in. void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const; - + bool addressRangeContainsAddress(const uint64_t Address) const; - + /// If a DIE represents a subprogram (or inlined subroutine), returns its /// mangled name (or short name, if mangled is missing). This name may be /// fetched from specification or abstract origin for this subprogram. /// Returns null if no name is found. const char *getSubroutineName(DINameKind Kind) const; - + /// Return the DIE name resolving DW_AT_sepcification or DW_AT_abstract_origin /// references if necessary. Returns null if no name is found. const char *getName(DINameKind Kind) const; - + /// Returns the declaration line (start line) for a DIE, assuming it specifies /// a subprogram. This may be fetched from specification or abstract origin /// for this subprogram by resolving DW_AT_sepcification or @@ -251,21 +253,21 @@ class DWARFDie { /// there is no DW_AT_GNU_discriminator attribute in this DIE. void getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, uint32_t &CallColumn, uint32_t &CallDiscriminator) const; - + class attribute_iterator; /// Get an iterator range to all attributes in the current DIE only. /// /// \returns an iterator range for the attributes of the current DIE. iterator_range attributes() const; - + class iterator; - + iterator begin() const; iterator end() const; iterator_range children() const; }; - + class DWARFDie::attribute_iterator : public iterator_facade_base { @@ -275,7 +277,7 @@ class DWARFDie::attribute_iterator : DWARFAttribute AttrValue; /// The attribute index within the abbreviation declaration in Die. uint32_t Index; - + /// Update the attribute index and attempt to read the attribute value. If the /// attribute is able to be read, update AttrValue and the Index member /// variable. If the attribute value is not able to be read, an appropriate diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index f3516ebdecba0..008dba9b42acd 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -13,8 +13,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include namespace llvm { @@ -22,6 +22,35 @@ namespace llvm { class DWARFUnit; class raw_ostream; +/// A helper struct for DWARFFormValue methods, providing information that +/// allows it to know the byte size of DW_FORM values that vary in size +/// depending on the DWARF version, address byte size, or DWARF32/DWARF64. +struct DWARFFormParams { + uint16_t Version; + uint8_t AddrSize; + dwarf::DwarfFormat Format; + + /// The definition of the size of form DW_FORM_ref_addr depends on the + /// version. In DWARF v2 it's the size of an address; after that, it's the + /// size of a reference. + uint8_t getRefAddrByteSize() const { + if (Version == 2) + return AddrSize; + return getDwarfOffsetByteSize(); + } + + /// The size of a reference is determined by the DWARF 32/64-bit format. + uint8_t getDwarfOffsetByteSize() const { + switch (Format) { + case dwarf::DwarfFormat::DWARF32: + return 4; + case dwarf::DwarfFormat::DWARF64: + return 8; + } + llvm_unreachable("Invalid Format value"); + } +}; + class DWARFFormValue { public: enum FormClass { @@ -47,17 +76,19 @@ class DWARFFormValue { const char *cstr; }; const uint8_t *data = nullptr; + uint64_t SectionIndex; /// Section index for reference forms. }; - dwarf::Form Form; // Form for this value. - ValueType Value; // Contains all data for the form. - const DWARFUnit *U = nullptr; // Remember the DWARFUnit at extract time. + dwarf::Form Form; /// Form for this value. + ValueType Value; /// Contains all data for the form. + const DWARFUnit *U = nullptr; /// Remember the DWARFUnit at extract time. public: DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F) {} dwarf::Form getForm() const { return Form; } uint64_t getRawUValue() const { return Value.uval; } + uint64_t getSectionIndex() const { return Value.SectionIndex; } void setForm(dwarf::Form F) { Form = F; } void setUValue(uint64_t V) { Value.uval = V; } void setSValue(int64_t V) { Value.sval = V; } @@ -72,13 +103,15 @@ class DWARFFormValue { const DWARFUnit *getUnit() const { return U; } void dump(raw_ostream &OS) const; - /// \brief extracts a value in data at offset *offset_ptr. + /// Extracts a value in \p Data at offset \p *OffsetPtr. /// - /// The passed DWARFUnit is allowed to be nullptr, in which - /// case no relocation processing will be performed and some + /// The passed DWARFUnit is allowed to be nullptr, in which case some /// kind of forms that depend on Unit information are disallowed. + /// \param Data The DWARFDataExtractor to use. + /// \param OffsetPtr The offset within \p Data where the data starts. + /// \param U The optional DWARFUnit supplying information for some forms. /// \returns whether the extraction succeeded. - bool extractValue(const DataExtractor &Data, uint32_t *OffsetPtr, + bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr, const DWARFUnit *U); bool isInlinedCStr() const { @@ -99,79 +132,43 @@ class DWARFFormValue { /// Get the fixed byte size for a given form. /// - /// If the form always has a fixed valid byte size that doesn't depend on a - /// DWARFUnit, then an Optional with a value will be returned. If the form - /// can vary in size depending on the DWARFUnit (DWARF version, address byte - /// size, or DWARF 32/64) and the DWARFUnit is valid, then an Optional with a - /// valid value is returned. If the form is always encoded using a variable - /// length storage format (ULEB or SLEB numbers or blocks) or the size - /// depends on a DWARFUnit and the DWARFUnit is NULL, then None will be - /// returned. - /// \param Form The DWARF form to get the fixed byte size for - /// \param U The DWARFUnit that can be used to help determine the byte size. - /// - /// \returns Optional value with the fixed byte size or None if - /// \p Form doesn't have a fixed byte size or a DWARFUnit wasn't supplied - /// and was needed to calculate the byte size. - static Optional getFixedByteSize(dwarf::Form Form, - const DWARFUnit *U = nullptr); - - /// Get the fixed byte size for a given form. - /// - /// If the form has a fixed byte size given a valid DWARF version and address - /// byte size, then an Optional with a valid value is returned. If the form - /// is always encoded using a variable length storage format (ULEB or SLEB - /// numbers or blocks) then None will be returned. + /// If the form has a fixed byte size, then an Optional with a value will be + /// returned. If the form is always encoded using a variable length storage + /// format (ULEB or SLEB numbers or blocks) then None will be returned. /// - /// \param Form DWARF form to get the fixed byte size for - /// \param Version DWARF version number. - /// \param AddrSize size of an address in bytes. - /// \param Format enum value from llvm::dwarf::DwarfFormat. + /// \param Form DWARF form to get the fixed byte size for. + /// \param FormParams DWARF parameters to help interpret forms. /// \returns Optional value with the fixed byte size or None if /// \p Form doesn't have a fixed byte size. - static Optional getFixedByteSize(dwarf::Form Form, uint16_t Version, - uint8_t AddrSize, - llvm::dwarf::DwarfFormat Format); + static Optional getFixedByteSize(dwarf::Form Form, + const DWARFFormParams FormParams); - /// Skip a form in \p DebugInfoData at offset specified by \p OffsetPtr. + /// Skip a form's value in \p DebugInfoData at the offset specified by + /// \p OffsetPtr. /// - /// Skips the bytes for this form in the debug info and updates the offset. + /// Skips the bytes for the current form and updates the offset. /// - /// \param DebugInfoData the .debug_info data to use to skip the value. - /// \param OffsetPtr a reference to the offset that will be updated. - /// \param U the DWARFUnit to use when skipping the form in case the form - /// size differs according to data in the DWARFUnit. + /// \param DebugInfoData The data where we want to skip the value. + /// \param OffsetPtr A reference to the offset that will be updated. + /// \param Params DWARF parameters to help interpret forms. /// \returns true on success, false if the form was not skipped. bool skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr, - const DWARFUnit *U) const; - - /// Skip a form in \p DebugInfoData at offset specified by \p OffsetPtr. - /// - /// Skips the bytes for this form in the debug info and updates the offset. - /// - /// \param Form the DW_FORM enumeration that indicates the form to skip. - /// \param DebugInfoData the .debug_info data to use to skip the value. - /// \param OffsetPtr a reference to the offset that will be updated. - /// \param U the DWARFUnit to use when skipping the form in case the form - /// size differs according to data in the DWARFUnit. - /// \returns true on success, false if the form was not skipped. - static bool skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, const DWARFUnit *U); + const DWARFFormParams Params) const { + return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, Params); + } - /// Skip a form in \p DebugInfoData at offset specified by \p OffsetPtr. + /// Skip a form's value in \p DebugInfoData at the offset specified by + /// \p OffsetPtr. /// - /// Skips the bytes for this form in the debug info and updates the offset. + /// Skips the bytes for the specified form and updates the offset. /// - /// \param Form the DW_FORM enumeration that indicates the form to skip. - /// \param DebugInfoData the .debug_info data to use to skip the value. - /// \param OffsetPtr a reference to the offset that will be updated. - /// \param Version DWARF version number. - /// \param AddrSize size of an address in bytes. - /// \param Format enum value from llvm::dwarf::DwarfFormat. + /// \param Form The DW_FORM enumeration that indicates the form to skip. + /// \param DebugInfoData The data where we want to skip the value. + /// \param OffsetPtr A reference to the offset that will be updated. + /// \param FormParams DWARF parameters to help interpret forms. /// \returns true on success, false if the form was not skipped. static bool skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, uint16_t Version, uint8_t AddrSize, - llvm::dwarf::DwarfFormat Format); + uint32_t *OffsetPtr, const DWARFFormParams FormParams); private: void dumpString(raw_ostream &OS) const; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h index 7a52218663b9d..8d1ac5c83c234 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h @@ -29,25 +29,25 @@ class DWARFGdbIndex { uint32_t ConstantPoolOffset; struct CompUnitEntry { - uint64_t Offset; // Offset of a CU in the .debug_info section. - uint64_t Length; // Length of that CU. + uint64_t Offset; /// Offset of a CU in the .debug_info section. + uint64_t Length; /// Length of that CU. }; SmallVector CuList; struct AddressEntry { - uint64_t LowAddress; // The low address. - uint64_t HighAddress; // The high address. - uint32_t CuIndex; // The CU index. + uint64_t LowAddress; /// The low address. + uint64_t HighAddress; /// The high address. + uint32_t CuIndex; /// The CU index. }; SmallVector AddressArea; struct SymTableEntry { - uint32_t NameOffset; // Offset of the symbol's name in the constant pool. - uint32_t VecOffset; // Offset of the CU vector in the constant pool. + uint32_t NameOffset; /// Offset of the symbol's name in the constant pool. + uint32_t VecOffset; /// Offset of the CU vector in the constant pool. }; SmallVector SymbolTable; - // Each value is CU index + attributes. + /// Each value is CU index + attributes. SmallVector>, 0> ConstantPoolVectors; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h index af01bddeed153..f51838424614a 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h @@ -12,11 +12,22 @@ #include "llvm/ADT/DenseMap.h" #include -#include namespace llvm { -typedef DenseMap> RelocAddrMap; +/// RelocAddrEntry contains relocated value and section index. +/// Section index is -1LL if relocation points to absolute symbol. +struct RelocAddrEntry { + uint64_t SectionIndex; + uint64_t Value; +}; + +/// In place of applying the relocations to the data we've read from disk we use +/// a separate mapping table to the side and checking that at locations in the +/// dwarf where we expect relocated values. This adds a bit of complexity to the +/// dwarf parsing/extraction at the benefit of not allocating memory for the +/// entire size of the debug info sections. +using RelocAddrMap = DenseMap; } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index c9da2c9a3e164..4a5793ecb8fa6 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -31,8 +31,9 @@ class DWARFTypeUnit : public DWARFUnit { public: DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, - bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, + StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, + const DWARFSection &LS, bool LE, bool IsDWO, + const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, UnitSection, Entry) {} diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 68e541bac73c6..056c1b77c65d1 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -10,28 +10,28 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include #include #include +#include #include +#include #include -#include namespace llvm { @@ -57,8 +57,9 @@ class DWARFUnitSectionBase { virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, StringRef AOS, - StringRef LS, bool isLittleEndian, bool isDWO) = 0; + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, const DWARFSection &LS, + bool isLittleEndian, bool isDWO) = 0; }; const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, @@ -71,9 +72,9 @@ class DWARFUnitSection final : public SmallVector, 1>, bool Parsed = false; public: - typedef SmallVectorImpl> UnitVector; - typedef typename UnitVector::iterator iterator; - typedef llvm::iterator_range iterator_range; + using UnitVector = SmallVectorImpl>; + using iterator = typename UnitVector::iterator; + using iterator_range = llvm::iterator_range; UnitType *getUnitForOffset(uint32_t Offset) const override { auto *CU = std::upper_bound( @@ -89,8 +90,8 @@ class DWARFUnitSection final : public SmallVector, 1>, private: void parseImpl(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, - bool LE, bool IsDWO) override { + StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, + const DWARFSection &LS, bool LE, bool IsDWO) override { if (Parsed) return; const auto &Index = getDWARFUnitIndex(Context, UnitType::Section); @@ -111,49 +112,42 @@ class DWARFUnitSection final : public SmallVector, 1>, class DWARFUnit { DWARFContext &Context; - // Section containing this DWARFUnit. + /// Section containing this DWARFUnit. const DWARFSection &InfoSection; const DWARFDebugAbbrev *Abbrev; const DWARFSection *RangeSection; uint32_t RangeSectionBase; - StringRef LineSection; + const DWARFSection &LineSection; StringRef StringSection; - StringRef StringOffsetSection; - StringRef AddrOffsetSection; + const DWARFSection &StringOffsetSection; + uint64_t StringOffsetSectionBase = 0; + const DWARFSection *AddrOffsetSection; uint32_t AddrOffsetSectionBase; bool isLittleEndian; bool isDWO; const DWARFUnitSectionBase &UnitSection; + // Version, address size, and DWARF format. + DWARFFormParams FormParams; + uint32_t Offset; uint32_t Length; const DWARFAbbreviationDeclarationSet *Abbrevs; - uint16_t Version; uint8_t UnitType; - uint8_t AddrSize; uint64_t BaseAddr; - // The compile unit debug information entry items. + /// The compile unit debug information entry items. std::vector DieArray; - // Map from range's start address to end address and corresponding DIE. - // IntervalMap does not support range removal, as a result, we use the - // std::map::upper_bound for address range lookup. + /// Map from range's start address to end address and corresponding DIE. + /// IntervalMap does not support range removal, as a result, we use the + /// std::map::upper_bound for address range lookup. std::map> AddrDieMap; - typedef iterator_range::iterator> - die_iterator_range; - class DWOHolder { - object::OwningBinary DWOFile; - std::unique_ptr DWOContext; - DWARFUnit *DWOU = nullptr; + using die_iterator_range = + iterator_range::iterator>; - public: - DWOHolder(StringRef DWOPath); - - DWARFUnit *getUnit() const { return DWOU; } - }; - std::unique_ptr DWO; + std::shared_ptr DWO; const DWARFUnitIndex::Entry *IndexEntry; @@ -167,12 +161,13 @@ class DWARFUnit { virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr); /// Size in bytes of the unit header. - virtual uint32_t getHeaderSize() const { return Version <= 4 ? 11 : 12; } + virtual uint32_t getHeaderSize() const { return getVersion() <= 4 ? 11 : 12; } public: DWARFUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, bool IsDWO, + const DWARFSection &SOS, const DWARFSection *AOS, + const DWARFSection &LS, bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry = nullptr); @@ -180,16 +175,18 @@ class DWARFUnit { DWARFContext& getContext() const { return Context; } - StringRef getLineSection() const { return LineSection; } + const DWARFSection &getLineSection() const { return LineSection; } StringRef getStringSection() const { return StringSection; } - StringRef getStringOffsetSection() const { return StringOffsetSection; } + const DWARFSection &getStringOffsetSection() const { + return StringOffsetSection; + } - void setAddrOffsetSection(StringRef AOS, uint32_t Base) { + void setAddrOffsetSection(const DWARFSection *AOS, uint32_t Base) { AddrOffsetSection = AOS; AddrOffsetSectionBase = Base; } - // Recursively update address to Die map. + /// Recursively update address to Die map. void updateAddressDieMap(DWARFDie Die); void setRangesSection(const DWARFSection *RS, uint32_t Base) { @@ -198,11 +195,11 @@ class DWARFUnit { } bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const; - // FIXME: Result should be uint64_t in DWARF64. - bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const; + bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const; - DataExtractor getDebugInfoExtractor() const { - return DataExtractor(InfoSection.Data, isLittleEndian, AddrSize); + DWARFDataExtractor getDebugInfoExtractor() const { + return DWARFDataExtractor(InfoSection, isLittleEndian, + getAddressByteSize()); } DataExtractor getStringExtractor() const { @@ -210,6 +207,9 @@ class DWARFUnit { } const RelocAddrMap *getRelocMap() const { return &InfoSection.Relocs; } + const RelocAddrMap &getStringOffsetsRelocMap() const { + return StringOffsetSection.Relocs; + } bool extract(DataExtractor debug_info, uint32_t* offset_ptr); @@ -222,10 +222,14 @@ class DWARFUnit { uint32_t getOffset() const { return Offset; } uint32_t getNextUnitOffset() const { return Offset + Length + 4; } uint32_t getLength() const { return Length; } - uint16_t getVersion() const { return Version; } - dwarf::DwarfFormat getFormat() const { - return dwarf::DwarfFormat::DWARF32; // FIXME: Support DWARF64. + const DWARFFormParams &getFormParams() const { return FormParams; } + uint16_t getVersion() const { return FormParams.Version; } + dwarf::DwarfFormat getFormat() const { return FormParams.Format; } + uint8_t getAddressByteSize() const { return FormParams.AddrSize; } + uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); } + uint8_t getDwarfOffsetByteSize() const { + return FormParams.getDwarfOffsetByteSize(); } const DWARFAbbreviationDeclarationSet *getAbbreviations() const { @@ -233,18 +237,33 @@ class DWARFUnit { } uint8_t getUnitType() const { return UnitType; } - uint8_t getAddressByteSize() const { return AddrSize; } - uint8_t getRefAddrByteSize() const { - if (Version == 2) - return AddrSize; - return getDwarfOffsetByteSize(); + static bool isValidUnitType(uint8_t UnitType) { + return UnitType == dwarf::DW_UT_compile || UnitType == dwarf::DW_UT_type || + UnitType == dwarf::DW_UT_partial || + UnitType == dwarf::DW_UT_skeleton || + UnitType == dwarf::DW_UT_split_compile || + UnitType == dwarf::DW_UT_split_type; } - uint8_t getDwarfOffsetByteSize() const { - if (getFormat() == dwarf::DwarfFormat::DWARF64) - return 8; - return 4; + /// \brief Return the number of bytes for the header of a unit of + /// UnitType type. + /// + /// This function must be called with a valid unit type which in + /// DWARF5 is defined as one of the following six types. + static uint32_t getDWARF5HeaderSize(uint8_t UnitType) { + switch (UnitType) { + case dwarf::DW_UT_compile: + case dwarf::DW_UT_partial: + return 12; + case dwarf::DW_UT_skeleton: + case dwarf::DW_UT_split_compile: + return 20; + case dwarf::DW_UT_type: + case dwarf::DW_UT_split_type: + return 24; + } + llvm_unreachable("Invalid UnitType."); } uint64_t getBaseAddress() const { return BaseAddr; } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index b9f14be859268..c0291a83ed973 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -20,6 +20,8 @@ struct DWARFAttribute; class DWARFContext; class DWARFDie; class DWARFUnit; +class DWARFAcceleratorTable; +class DWARFDataExtractor; /// A class that verifies DWARF debug information given a DWARF Context. class DWARFVerifier { @@ -29,18 +31,48 @@ class DWARFVerifier { /// can verify each reference points to a valid DIE and not an offset that /// lies between to valid DIEs. std::map> ReferenceToDIEOffsets; - uint32_t NumDebugInfoErrors; - uint32_t NumDebugLineErrors; + uint32_t NumDebugLineErrors = 0; + uint32_t NumAppleNamesErrors = 0; + /// Verifies the header of a unit in the .debug_info section. + /// + /// This function currently checks for: + /// - Unit is in 32-bit DWARF format. The function can be modified to + /// support 64-bit format. + /// - The DWARF version is valid + /// - The unit type is valid (if unit is in version >=5) + /// - The unit doesn't extend beyond .debug_info section + /// - The address size is valid + /// - The offset in the .debug_abbrev section is valid + /// + /// \param DebugInfoData The .debug_info section data + /// \param Offset A reference to the offset start of the unit. The offset will + /// be updated to point to the next unit in .debug_info + /// \param UnitIndex The index of the unit to be verified + /// \param UnitType A reference to the type of the unit + /// \param isUnitDWARF64 A reference to a flag that shows whether the unit is + /// in 64-bit format. + /// + /// \returns true if the header is verified successfully, false otherwise. + bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData, + uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType, + bool &isUnitDWARF64); + + + bool verifyUnitContents(DWARFUnit Unit); /// Verifies the attribute's DWARF attribute and its value. /// /// This function currently checks for: /// - DW_AT_ranges values is a valid .debug_ranges offset /// - DW_AT_stmt_list is a valid .debug_line offset /// - /// @param Die The DWARF DIE that owns the attribute value - /// @param AttrValue The DWARF attribute value to check - void verifyDebugInfoAttribute(const DWARFDie &Die, DWARFAttribute &AttrValue); + /// \param Die The DWARF DIE that owns the attribute value + /// \param AttrValue The DWARF attribute value to check + /// + /// \returns NumErrors The number of errors occured during verification of + /// attributes' values in a .debug_info section unit + unsigned verifyDebugInfoAttribute(const DWARFDie &Die, + DWARFAttribute &AttrValue); /// Verifies the attribute's DWARF form. /// @@ -49,9 +81,12 @@ class DWARFVerifier { /// - All DW_FORM_ref_addr values have valid .debug_info offsets /// - All DW_FORM_strp values have valid .debug_str offsets /// - /// @param Die The DWARF DIE that owns the attribute value - /// @param AttrValue The DWARF attribute value to check - void verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue); + /// \param Die The DWARF DIE that owns the attribute value + /// \param AttrValue The DWARF attribute value to check + /// + /// \returns NumErrors The number of errors occured during verification of + /// attributes' forms in a .debug_info section unit + unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue); /// Verifies the all valid references that were found when iterating through /// all of the DIE attributes. @@ -60,7 +95,10 @@ class DWARFVerifier { /// offset matches. This helps to ensure if a DWARF link phase moved things /// around, that it doesn't create invalid references by failing to relocate /// CU relative and absolute references. - void verifyDebugInfoReferences(); + /// + /// \returns NumErrors The number of errors occured during verification of + /// references for the .debug_info section + unsigned verifyDebugInfoReferences(); /// Verify the the DW_AT_stmt_list encoding and value and ensure that no /// compile units that have the same DW_AT_stmt_list value. @@ -75,13 +113,13 @@ class DWARFVerifier { public: DWARFVerifier(raw_ostream &S, DWARFContext &D) - : OS(S), DCtx(D), NumDebugInfoErrors(0), NumDebugLineErrors(0) {} + : OS(S), DCtx(D) {} /// Verify the information in the .debug_info section. /// /// Any errors are reported to the stream that was this object was /// constructed with. /// - /// @return True if the .debug_info verifies successfully, false otherwise. + /// \returns true if the .debug_info verifies successfully, false otherwise. bool handleDebugInfo(); /// Verify the information in the .debug_line section. @@ -89,8 +127,16 @@ class DWARFVerifier { /// Any errors are reported to the stream that was this object was /// constructed with. /// - /// @return True if the .debug_line verifies successfully, false otherwise. + /// \returns true if the .debug_line verifies successfully, false otherwise. bool handleDebugLine(); + + /// Verify the information in the .apple_names accelerator table. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// \returns true if the .apple_names verifies successfully, false otherwise. + bool handleAppleNames(); }; } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFBuilder.h index 6d067cc1c2381..b2c8f2d1c20da 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFBuilder.h @@ -12,18 +12,16 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" - #include "llvm/DebugInfo/MSF/MSFCommon.h" - #include "llvm/Support/Allocator.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" - +#include #include #include namespace llvm { namespace msf { + class MSFBuilder { public: /// \brief Create a new `MSFBuilder`. @@ -122,7 +120,7 @@ class MSFBuilder { Error allocateBlocks(uint32_t NumBlocks, MutableArrayRef Blocks); uint32_t computeDirectoryByteSize() const; - typedef std::vector BlockList; + using BlockList = std::vector; BumpPtrAllocator &Allocator; @@ -136,7 +134,8 @@ class MSFBuilder { std::vector DirectoryBlocks; std::vector> StreamData; }; -} // namespace msf -} // namespace llvm + +} // end namespace msf +} // end namespace llvm #endif // LLVM_DEBUGINFO_MSF_MSFBUILDER_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFCommon.h b/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFCommon.h index 93a9c808b7368..eca1b8b89ebd8 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFCommon.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MSFCommon.h @@ -12,15 +12,15 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" - #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" - +#include #include namespace llvm { namespace msf { + static const char Magic[] = {'M', 'i', 'c', 'r', 'o', 's', 'o', 'f', 't', ' ', 'C', '/', 'C', '+', '+', ' ', 'M', 'S', 'F', ' ', '7', '.', '0', '0', @@ -50,8 +50,9 @@ struct SuperBlock { }; struct MSFLayout { - MSFLayout() : SB(nullptr) {} - const SuperBlock *SB; + MSFLayout() = default; + + const SuperBlock *SB = nullptr; BitVector FreePageMap; ArrayRef DirectoryBlocks; ArrayRef StreamSizes; @@ -90,15 +91,16 @@ inline uint32_t getFpmIntervalLength(const MSFLayout &L) { inline uint32_t getNumFpmIntervals(const MSFLayout &L) { uint32_t Length = getFpmIntervalLength(L); - return llvm::alignTo(L.SB->NumBlocks, Length) / Length; + return alignTo(L.SB->NumBlocks, Length) / Length; } inline uint32_t getFullFpmByteSize(const MSFLayout &L) { - return llvm::alignTo(L.SB->NumBlocks, 8) / 8; + return alignTo(L.SB->NumBlocks, 8) / 8; } Error validateSuperBlock(const SuperBlock &SB); -} // namespace msf -} // namespace llvm + +} // end namespace msf +} // end namespace llvm #endif // LLVM_DEBUGINFO_MSF_MSFCOMMON_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MappedBlockStream.h index c91f6f725c806..6d88d2be85c9b 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -1,5 +1,4 @@ -//===- MappedBlockStream.h - Discontiguous stream data in an MSF -*- C++ -//-*-===// +//==- MappedBlockStream.h - Discontiguous stream data in an MSF --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -13,15 +12,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/MSF/MSFStreamLayout.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStream.h" -#include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include +#include #include namespace llvm { @@ -41,23 +39,26 @@ struct MSFLayout; /// of bytes. class MappedBlockStream : public BinaryStream { friend class WritableMappedBlockStream; + public: static std::unique_ptr - createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, BinaryStreamRef MsfData); + createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, + BinaryStreamRef MsfData, BumpPtrAllocator &Allocator); static std::unique_ptr createIndexedStream(const MSFLayout &Layout, BinaryStreamRef MsfData, - uint32_t StreamIndex); + uint32_t StreamIndex, BumpPtrAllocator &Allocator); static std::unique_ptr - createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData); + createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); static std::unique_ptr - createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData); + createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); - llvm::support::endianness getEndian() const override { - return llvm::support::little; + support::endianness getEndian() const override { + return support::little; } Error readBytes(uint32_t Offset, uint32_t Size, @@ -67,20 +68,17 @@ class MappedBlockStream : public BinaryStream { uint32_t getLength() override; - uint32_t getNumBytesCopied() const; - - llvm::BumpPtrAllocator &getAllocator() { return Pool; } + BumpPtrAllocator &getAllocator() { return Allocator; } void invalidateCache(); uint32_t getBlockSize() const { return BlockSize; } - uint32_t getNumBlocks() const { return NumBlocks; } + uint32_t getNumBlocks() const { return StreamLayout.Blocks.size(); } uint32_t getStreamLength() const { return StreamLayout.Length; } protected: - MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &StreamLayout, - BinaryStreamRef MsfData); + MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, + BinaryStreamRef MsfData, BumpPtrAllocator &Allocator); private: const MSFStreamLayout &getStreamLayout() const { return StreamLayout; } @@ -91,34 +89,43 @@ class MappedBlockStream : public BinaryStream { ArrayRef &Buffer); const uint32_t BlockSize; - const uint32_t NumBlocks; const MSFStreamLayout StreamLayout; BinaryStreamRef MsfData; - typedef MutableArrayRef CacheEntry; - llvm::BumpPtrAllocator Pool; + using CacheEntry = MutableArrayRef; + + // We just store the allocator by reference. We use this to allocate + // contiguous memory for things like arrays or strings that cross a block + // boundary, and this memory is expected to outlive the stream. For example, + // someone could create a stream, read some stuff, then close the stream, and + // we would like outstanding references to fields to remain valid since the + // entire file is mapped anyway. Because of that, the user must supply the + // allocator to allocate broken records from. + BumpPtrAllocator &Allocator; DenseMap> CacheMap; }; class WritableMappedBlockStream : public WritableBinaryStream { public: static std::unique_ptr - createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, WritableBinaryStreamRef MsfData); + createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, + WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator); static std::unique_ptr createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, - uint32_t StreamIndex); + uint32_t StreamIndex, BumpPtrAllocator &Allocator); static std::unique_ptr createDirectoryStream(const MSFLayout &Layout, - WritableBinaryStreamRef MsfData); + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); static std::unique_ptr - createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData); + createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); - llvm::support::endianness getEndian() const override { - return llvm::support::little; + support::endianness getEndian() const override { + return support::little; } Error readBytes(uint32_t Offset, uint32_t Size, @@ -134,18 +141,19 @@ class WritableMappedBlockStream : public WritableBinaryStream { const MSFStreamLayout &getStreamLayout() const { return ReadInterface.getStreamLayout(); } + uint32_t getBlockSize() const { return ReadInterface.getBlockSize(); } uint32_t getNumBlocks() const { return ReadInterface.getNumBlocks(); } uint32_t getStreamLength() const { return ReadInterface.getStreamLength(); } protected: - WritableMappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, + WritableMappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, - WritableBinaryStreamRef MsfData); + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); private: MappedBlockStream ReadInterface; - WritableBinaryStreamRef WriteInterface; }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h index 941e16a35fac2..ffae6645e94b7 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_PDB_DIA_DIAENUMDEBUGSTREAMS_H #include "DIASupport.h" +#include "llvm/DebugInfo/PDB/IPDBDataStream.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h index 106b84cecfffa..08f0de124ede5 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" namespace llvm { namespace pdb { diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h index 6c00d6a5e29d3..e69d18f5ba370 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBSourceFile.h" namespace llvm { namespace pdb { diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h index b206ff59a6a49..f779cd1f4be35 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" namespace llvm { namespace pdb { diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h index 3710eb29e7f98..d37b48540ffa2 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h @@ -106,7 +106,7 @@ class DIARawSymbol : public IPDBRawSymbol { getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; - PDB_UniqueId getGuid() const override; + codeview::GUID getGuid() const override; int32_t getOffset() const override; int32_t getThisAdjust() const override; int32_t getVirtualBasePointerOffset() const override; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIASession.h index 3f5818631e7bc..350442556bef8 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIASession.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/DIA/DIASession.h @@ -31,7 +31,7 @@ class DIASession : public IPDBSession { uint64_t getLoadAddress() const override; void setLoadAddress(uint64_t Address) override; - std::unique_ptr getGlobalScope() const override; + std::unique_ptr getGlobalScope() override; std::unique_ptr getSymbolById(uint32_t SymbolId) const override; std::unique_ptr diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/GenericError.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/GenericError.h index 466cb455651b1..03205a986f1a5 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/GenericError.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/GenericError.h @@ -19,6 +19,7 @@ namespace pdb { enum class generic_error_code { invalid_path = 1, dia_sdk_not_present, + type_server_not_found, unspecified, }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBDataStream.h index 9594dc1591a76..67b5a06d7c59e 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBDataStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBDataStream.h @@ -1,4 +1,4 @@ -//===- IPDBDataStream.h - base interface for child enumerator -*- C++ ---*-===// +//===- IPDBDataStream.h - base interface for child enumerator ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,9 +10,10 @@ #ifndef LLVM_DEBUGINFO_PDB_IPDBDATASTREAM_H #define LLVM_DEBUGINFO_PDB_IPDBDATASTREAM_H -#include "PDBTypes.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include +#include namespace llvm { namespace pdb { @@ -22,18 +23,19 @@ namespace pdb { /// stream type. class IPDBDataStream { public: - typedef llvm::SmallVector RecordType; + using RecordType = SmallVector; virtual ~IPDBDataStream(); virtual uint32_t getRecordCount() const = 0; virtual std::string getName() const = 0; - virtual llvm::Optional getItemAtIndex(uint32_t Index) const = 0; + virtual Optional getItemAtIndex(uint32_t Index) const = 0; virtual bool getNext(RecordType &Record) = 0; virtual void reset() = 0; virtual IPDBDataStream *clone() const = 0; }; -} -} -#endif +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_IPDBDATASTREAM_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h index e48dc250822e0..b6b7d95f6282d 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h @@ -18,8 +18,8 @@ namespace pdb { template class IPDBEnumChildren { public: - typedef std::unique_ptr ChildTypePtr; - typedef IPDBEnumChildren MyType; + using ChildTypePtr = std::unique_ptr; + using MyType = IPDBEnumChildren; virtual ~IPDBEnumChildren() = default; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h index fab086c62c72e..eefc365187288 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h @@ -118,7 +118,7 @@ class IPDBRawSymbol { virtual uint32_t getVirtualTableShapeId() const = 0; virtual PDB_DataKind getDataKind() const = 0; virtual PDB_SymType getSymTag() const = 0; - virtual PDB_UniqueId getGuid() const = 0; + virtual codeview::GUID getGuid() const = 0; virtual int32_t getOffset() const = 0; virtual int32_t getThisAdjust() const = 0; virtual int32_t getVirtualBasePointerOffset() const = 0; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBSession.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBSession.h index 85d9fe1248599..cf195095c8d22 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBSession.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/IPDBSession.h @@ -29,7 +29,7 @@ class IPDBSession { virtual uint64_t getLoadAddress() const = 0; virtual void setLoadAddress(uint64_t Address) = 0; - virtual std::unique_ptr getGlobalScope() const = 0; + virtual std::unique_ptr getGlobalScope() = 0; virtual std::unique_ptr getSymbolById(uint32_t SymbolId) const = 0; template diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h index 7e77f5a3eef92..8200f51e3da9d 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h @@ -56,9 +56,8 @@ class DbiModuleDescriptor { } // end namespace pdb template <> struct VarStreamArrayExtractor { - typedef void ContextType; - static Error extract(BinaryStreamRef Stream, uint32_t &Length, - pdb::DbiModuleDescriptor &Info) { + Error operator()(BinaryStreamRef Stream, uint32_t &Length, + pdb::DbiModuleDescriptor &Info) { if (auto EC = pdb::DbiModuleDescriptor::initialize(Stream, Info)) return EC; Length = Info.getRecordLength(); diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h index 8cc5db981f565..c918a5d5e976d 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -11,9 +11,10 @@ #define LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Error.h" @@ -25,7 +26,7 @@ namespace llvm { class BinaryStreamWriter; namespace codeview { -class ModuleDebugFragmentRecordBuilder; +class DebugSubsectionRecordBuilder; } namespace msf { @@ -46,25 +47,32 @@ class DbiModuleDescriptorBuilder { DbiModuleDescriptorBuilder & operator=(const DbiModuleDescriptorBuilder &) = delete; + void setPdbFilePathNI(uint32_t NI); void setObjFileName(StringRef Name); void addSymbol(codeview::CVSymbol Symbol); - void addC13Fragment(std::unique_ptr Lines); - void addC13Fragment( - std::unique_ptr Inlinees); - void setC13FileChecksums( - std::unique_ptr Checksums); + void + addDebugSubsection(std::shared_ptr Subsection); + + void + addDebugSubsection(const codeview::DebugSubsectionRecord &SubsectionContents); uint16_t getStreamIndex() const; StringRef getModuleName() const { return ModuleName; } StringRef getObjFileName() const { return ObjFileName; } + unsigned getModuleIndex() const { return Layout.Mod; } + ArrayRef source_files() const { return makeArrayRef(SourceFiles); } uint32_t calculateSerializedLength() const; + /// Return the offset within the module symbol stream of the next symbol + /// record passed to addSymbol. Add four to account for the signature. + uint32_t getNextSymbolOffset() const { return SymbolByteSize + 4; } + void finalize(); Error finalizeMsfLayout(); @@ -78,17 +86,13 @@ class DbiModuleDescriptorBuilder { msf::MSFBuilder &MSF; uint32_t SymbolByteSize = 0; + uint32_t PdbFilePathNI = 0; std::string ModuleName; std::string ObjFileName; std::vector SourceFiles; std::vector Symbols; - std::unique_ptr ChecksumInfo; - std::vector> LineInfo; - std::vector> - Inlinees; - - std::vector> + std::vector> C13Builders; ModuleInfoHeader Layout; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h index bcf1cff8f6e5d..5f6e7ab92a967 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h @@ -7,17 +7,20 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H -#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULELIST_H +#define LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULELIST_H #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" +#include #include +#include #include namespace llvm { @@ -29,9 +32,9 @@ struct FileInfoSubstreamHeader; class DbiModuleSourceFilesIterator : public iterator_facade_base { - typedef iterator_facade_base - BaseType; + using BaseType = + iterator_facade_base; public: DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi, @@ -108,7 +111,8 @@ class DbiModuleList { BinaryStreamRef FileInfoSubstream; BinaryStreamRef NamesBuffer; }; -} -} -#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H \ No newline at end of file +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULELIST_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStream.h index 8f95481f41521..4be113f28d6f5 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -10,7 +10,7 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H #define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H -#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" @@ -19,8 +19,6 @@ #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -65,6 +63,13 @@ class DbiStream { PDB_Machine getMachineType() const; + BinarySubstreamRef getSectionContributionData() const; + BinarySubstreamRef getSecMapSubstreamData() const; + BinarySubstreamRef getModiSubstreamData() const; + BinarySubstreamRef getFileInfoSubstreamData() const; + BinarySubstreamRef getTypeServerMapSubstreamData() const; + BinarySubstreamRef getECSubstreamData() const; + /// If the given stream type is present, returns its stream index. If it is /// not present, returns InvalidStreamIndex. uint32_t getDebugStreamIndex(DbgHeaderType Type) const; @@ -78,6 +83,8 @@ class DbiStream { FixedStreamArray getSectionMap() const; void visitSectionContributions(ISectionContribVisitor &Visitor) const; + Expected getECName(uint32_t NI) const; + private: Error initializeSectionContributionData(); Error initializeSectionHeadersData(); @@ -89,16 +96,19 @@ class DbiStream { PDBStringTable ECNames; - BinaryStreamRef SecContrSubstream; - BinaryStreamRef SecMapSubstream; - BinaryStreamRef TypeServerMapSubstream; - BinaryStreamRef ECSubstream; + BinarySubstreamRef SecContrSubstream; + BinarySubstreamRef SecMapSubstream; + BinarySubstreamRef ModiSubstream; + BinarySubstreamRef FileInfoSubstream; + BinarySubstreamRef TypeServerMapSubstream; + BinarySubstreamRef ECSubstream; DbiModuleList Modules; FixedStreamArray DbgStreams; - PdbRaw_DbiSecContribVer SectionContribVersion; + PdbRaw_DbiSecContribVer SectionContribVersion = + PdbRaw_DbiSecContribVer::DbiSecContribVer60; FixedStreamArray SectionContribs; FixedStreamArray SectionContribs2; FixedStreamArray SectionMap; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h index bcac182e2145b..63eb34f0326af 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h @@ -15,6 +15,7 @@ #include "llvm/Support/Error.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryByteStream.h" @@ -49,26 +50,29 @@ class DbiStreamBuilder { void setPdbDllRbld(uint16_t R); void setFlags(uint16_t F); void setMachineType(PDB_Machine M); - void setSectionContribs(ArrayRef SecMap); void setSectionMap(ArrayRef SecMap); // Add given bytes as a new stream. Error addDbgStream(pdb::DbgHeaderType Type, ArrayRef Data); + uint32_t addECName(StringRef Name); + uint32_t calculateSerializedLength() const; + void setPublicsStreamIndex(uint32_t Index); + void setSymbolRecordStreamIndex(uint32_t Index); + Expected addModuleInfo(StringRef ModuleName); Error addModuleSourceFile(StringRef Module, StringRef File); + Error addModuleSourceFile(DbiModuleDescriptorBuilder &Module, StringRef File); Expected getSourceFileNameIndex(StringRef FileName); Error finalizeMsfLayout(); Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef MsfBuffer); - // A helper function to create Section Contributions from COFF input - // section headers. - static std::vector - createSectionContribs(ArrayRef SecHdrs); + void addSectionContrib(DbiModuleDescriptorBuilder *ModuleDbi, + const llvm::object::coff_section *SecHdr); // A helper function to create a Section Map from a COFF section header. static std::vector @@ -77,18 +81,18 @@ class DbiStreamBuilder { private: struct DebugStream { ArrayRef Data; - uint16_t StreamNumber = 0; + uint16_t StreamNumber = kInvalidStreamIndex; }; Error finalize(); uint32_t calculateModiSubstreamSize() const; + uint32_t calculateNamesOffset() const; uint32_t calculateSectionContribsStreamSize() const; uint32_t calculateSectionMapStreamSize() const; uint32_t calculateFileInfoSubstreamSize() const; uint32_t calculateNamesBufferSize() const; uint32_t calculateDbgStreamsSize() const; - Error generateModiSubstream(); Error generateFileInfoSubstream(); msf::MSFBuilder &Msf; @@ -101,6 +105,8 @@ class DbiStreamBuilder { uint16_t PdbDllRbld; uint16_t Flags; PDB_Machine MachineType; + uint32_t PublicsStreamIndex = kInvalidStreamIndex; + uint32_t SymRecordStreamIndex = kInvalidStreamIndex; const DbiStreamHeader *Header; @@ -109,9 +115,10 @@ class DbiStreamBuilder { StringMap SourceFileNames; + PDBStringTableBuilder ECNamesBuilder; WritableBinaryStreamRef NamesBuffer; MutableBinaryByteStream FileInfoBuffer; - ArrayRef SectionContribs; + std::vector SectionContribs; ArrayRef SectionMap; llvm::SmallVector DbgStreams; }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Formatters.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Formatters.h index 183f0ad8307e3..7d5eab2e2a090 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Formatters.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Formatters.h @@ -23,13 +23,6 @@ break; namespace llvm { -template <> struct format_provider { - static void format(const pdb::PDB_UniqueId &V, llvm::raw_ostream &Stream, - StringRef Style) { - codeview::fmt_guid(V.Guid).format(Stream, Style); - } -}; - template <> struct format_provider { static void format(const pdb::PdbRaw_ImplVer &V, llvm::raw_ostream &Stream, StringRef Style) { diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Hash.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Hash.h index 0340554d7b0b3..1f11d43ecdd4e 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Hash.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/Hash.h @@ -7,19 +7,21 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_HASH_H -#define LLVM_DEBUGINFO_PDB_RAW_HASH_H +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASH_H +#define LLVM_DEBUGINFO_PDB_NATIVE_HASH_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include +#include namespace llvm { namespace pdb { + uint32_t hashStringV1(StringRef Str); uint32_t hashStringV2(StringRef Str); uint32_t hashBufferV8(ArrayRef Data); -} -} -#endif +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASH_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/HashTable.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/HashTable.h index 46eefa968e523..05c70c4f2175a 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/HashTable.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/HashTable.h @@ -7,36 +7,36 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H -#define LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H +#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MathExtras.h" - #include +#include #include +#include namespace llvm { + +class BinaryStreamReader; +class BinaryStreamWriter; + namespace pdb { class HashTableIterator; class HashTable { friend class HashTableIterator; + struct Header { support::ulittle32_t Size; support::ulittle32_t Capacity; }; - typedef std::vector> BucketList; + using BucketList = std::vector>; public: HashTable(); @@ -63,6 +63,7 @@ class HashTable { protected: bool isPresent(uint32_t K) const { return Present.test(K); } bool isDeleted(uint32_t K) const { return Deleted.test(K); } + BucketList Buckets; mutable SparseBitVector<> Present; mutable SparseBitVector<> Deleted; @@ -81,6 +82,7 @@ class HashTableIterator : public iterator_facade_base> { friend class HashTable; + HashTableIterator(const HashTable &Map, uint32_t Index, bool IsEnd); public: @@ -101,6 +103,7 @@ class HashTableIterator }; } // end namespace pdb + } // end namespace llvm -#endif // LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H +#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStream.h index 1c38c2b6194fc..fb8271cb5ebca 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStream.h @@ -12,6 +12,7 @@ #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" @@ -35,10 +36,11 @@ class InfoStream { uint32_t getStreamSize() const; + bool containsIdStream() const; PdbRaw_ImplVer getVersion() const; uint32_t getSignature() const; uint32_t getAge() const; - PDB_UniqueId getGuid() const; + codeview::GUID getGuid() const; uint32_t getNamedStreamMapByteSize() const; PdbRaw_Features getFeatures() const; @@ -46,6 +48,8 @@ class InfoStream { const NamedStreamMap &getNamedStreams() const; + BinarySubstreamRef getNamedStreamsBuffer() const; + uint32_t getNamedStreamIndex(llvm::StringRef Name) const; iterator_range> named_streams() const; @@ -68,7 +72,9 @@ class InfoStream { // Due to the aforementioned limitations with `Signature`, this is a new // signature present on VC70 and higher PDBs which is guaranteed to be // universally unique. - PDB_UniqueId Guid; + codeview::GUID Guid; + + BinarySubstreamRef SubNamedStreams; std::vector FeatureSignatures; PdbRaw_Features Features = PdbFeatureNone; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h index 90c28a90d2523..c6cb0e221e700 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h @@ -37,7 +37,7 @@ class InfoStreamBuilder { void setVersion(PdbRaw_ImplVer V); void setSignature(uint32_t S); void setAge(uint32_t A); - void setGuid(PDB_UniqueId G); + void setGuid(codeview::GUID G); void addFeature(PdbRaw_FeatureSig Sig); uint32_t finalize(); @@ -54,7 +54,7 @@ class InfoStreamBuilder { PdbRaw_ImplVer Ver; uint32_t Sig; uint32_t Age; - PDB_UniqueId Guid; + codeview::GUID Guid; NamedStreamMap &NamedStreams; }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h index 2c95690ed5806..f413fd1b336ec 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h @@ -1,4 +1,4 @@ -//===- ModuleDebugStream.h - PDB Module Info Stream Access ----------------===// +//===- ModuleDebugStream.h - PDB Module Info Stream Access ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,30 +7,31 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H -#define LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H +#define LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H #include "llvm/ADT/iterator_range.h" -#include "llvm/DebugInfo/CodeView/CVRecord.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" +#include +#include namespace llvm { namespace pdb { -class PDBFile; + class DbiModuleDescriptor; class ModuleDebugStreamRef { - typedef codeview::ModuleDebugFragmentArray::Iterator - LinesAndChecksumsIterator; + using DebugSubsectionIterator = codeview::DebugSubsectionArray::Iterator; public: ModuleDebugStreamRef(const DbiModuleDescriptor &Module, std::unique_ptr Stream); + ModuleDebugStreamRef(ModuleDebugStreamRef &&Other) = default; ~ModuleDebugStreamRef(); Error reload(); @@ -40,27 +41,42 @@ class ModuleDebugStreamRef { iterator_range symbols(bool *HadError) const; - llvm::iterator_range linesAndChecksums() const; + const codeview::CVSymbolArray &getSymbolArray() const { return SymbolArray; } + + BinarySubstreamRef getSymbolsSubstream() const; + BinarySubstreamRef getC11LinesSubstream() const; + BinarySubstreamRef getC13LinesSubstream() const; + BinarySubstreamRef getGlobalRefsSubstream() const; + + ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = default; + + iterator_range subsections() const; - bool hasLineInfo() const; + bool hasDebugSubsections() const; Error commit(); + Expected + findChecksumsSubsection() const; + private: const DbiModuleDescriptor &Mod; uint32_t Signature; - std::unique_ptr Stream; + std::shared_ptr Stream; + + codeview::CVSymbolArray SymbolArray; - codeview::CVSymbolArray SymbolsSubstream; - BinaryStreamRef C11LinesSubstream; - BinaryStreamRef C13LinesSubstream; - BinaryStreamRef GlobalRefsSubstream; + BinarySubstreamRef SymbolsSubstream; + BinarySubstreamRef C11LinesSubstream; + BinarySubstreamRef C13LinesSubstream; + BinarySubstreamRef GlobalRefsSubstream; - codeview::ModuleDebugFragmentArray LinesAndChecksums; + codeview::DebugSubsectionArray Subsections; }; -} -} -#endif +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h index d4206503e7dca..17a82b7ce12db 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h @@ -7,27 +7,31 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBNAMEDSTREAMMAP_H -#define LLVM_DEBUGINFO_PDB_RAW_PDBNAMEDSTREAMMAP_H +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/Support/Error.h" #include namespace llvm { + class BinaryStreamReader; class BinaryStreamWriter; namespace pdb { -class NamedStreamMapBuilder; + class NamedStreamMap { + friend class NamedStreamMapBuilder; + struct FinalizationInfo { uint32_t StringDataBytes = 0; uint32_t SerializedLength = 0; }; - friend NamedStreamMapBuilder; public: NamedStreamMap(); @@ -40,7 +44,7 @@ class NamedStreamMap { bool get(StringRef Stream, uint32_t &StreamNo) const; void set(StringRef Stream, uint32_t StreamNo); void remove(StringRef Stream); - + const StringMap &getStringMap() const { return Mapping; } iterator_range> entries() const; private: @@ -50,6 +54,7 @@ class NamedStreamMap { }; } // end namespace pdb + } // end namespace llvm -#endif // LLVM_DEBUGINFO_PDB_RAW_PDBNAMEDSTREAMMAP_H +#endif // LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h new file mode 100644 index 0000000000000..4f532c6e38299 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h @@ -0,0 +1,49 @@ +//===- NativeBuiltinSymbol.h -------------------------------------- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H + +#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" + +#include "llvm/DebugInfo/PDB/PDBTypes.h" + +namespace llvm { +namespace pdb { + +class NativeSession; + +class NativeBuiltinSymbol : public NativeRawSymbol { +public: + NativeBuiltinSymbol(NativeSession &PDBSession, SymIndexId Id, + PDB_BuiltinType T, uint64_t L); + ~NativeBuiltinSymbol() override; + + virtual std::unique_ptr clone() const override; + + void dump(raw_ostream &OS, int Indent) const override; + + PDB_SymType getSymTag() const override; + + PDB_BuiltinType getBuiltinType() const override; + bool isConstType() const override; + uint64_t getLength() const override; + bool isUnalignedType() const override; + bool isVolatileType() const override; + +protected: + NativeSession &Session; + PDB_BuiltinType Type; + uint64_t Length; +}; + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h index 22ed61910d94a..bd5c09e5ff76d 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h @@ -18,7 +18,11 @@ namespace pdb { class NativeCompilandSymbol : public NativeRawSymbol { public: - NativeCompilandSymbol(NativeSession &Session, DbiModuleDescriptor MI); + NativeCompilandSymbol(NativeSession &Session, SymIndexId SymbolId, + DbiModuleDescriptor MI); + + std::unique_ptr clone() const override; + PDB_SymType getSymTag() const override; bool isEditAndContinueEnabled() const override; uint32_t getLexicalParentId() const override; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h index 9516810539b6b..587c7ff2b0927 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h @@ -18,14 +18,16 @@ namespace pdb { class NativeExeSymbol : public NativeRawSymbol { public: - NativeExeSymbol(NativeSession &Session); + NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId); + + std::unique_ptr clone() const override; std::unique_ptr findChildren(PDB_SymType Type) const override; uint32_t getAge() const override; std::string getSymbolsFileName() const override; - PDB_UniqueId getGuid() const override; + codeview::GUID getGuid() const override; bool hasCTypes() const override; bool hasPrivateSymbols() const override; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h index e1e78035ff389..2c6548dcce21f 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h @@ -1,4 +1,4 @@ -//===- NativeRawSymbol.h - Native implementation of IPDBRawSymbol - C++ -*-===// +//==- NativeRawSymbol.h - Native implementation of IPDBRawSymbol -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -11,15 +11,21 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVERAWSYMBOL_H #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" +#include +#include namespace llvm { namespace pdb { class NativeSession; +typedef uint32_t SymIndexId; + class NativeRawSymbol : public IPDBRawSymbol { public: - explicit NativeRawSymbol(NativeSession &PDBSession); + NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId); + + virtual std::unique_ptr clone() const = 0; void dump(raw_ostream &OS, int Indent) const override; @@ -34,7 +40,7 @@ class NativeRawSymbol : public IPDBRawSymbol { std::unique_ptr findInlineFramesByRVA(uint32_t RVA) const override; - void getDataBytes(llvm::SmallVector &Bytes) const override; + void getDataBytes(SmallVector &Bytes) const override; void getFrontEndVersion(VersionInfo &Version) const override; void getBackEndVersion(VersionInfo &Version) const override; PDB_MemberAccess getAccess() const override; @@ -105,7 +111,7 @@ class NativeRawSymbol : public IPDBRawSymbol { getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; - PDB_UniqueId getGuid() const override; + codeview::GUID getGuid() const override; int32_t getOffset() const override; int32_t getThisAdjust() const override; int32_t getVirtualBasePointerOffset() const override; @@ -201,9 +207,10 @@ class NativeRawSymbol : public IPDBRawSymbol { protected: NativeSession &Session; + SymIndexId SymbolId; }; -} -} +} // end namespace pdb +} // end namespace llvm -#endif +#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVERAWSYMBOL_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeSession.h index e6da266f796d5..b16ce231c349c 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -7,11 +7,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_RAWSESSION_H -#define LLVM_DEBUGINFO_PDB_RAW_RAWSESSION_H +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -30,9 +36,14 @@ class NativeSession : public IPDBSession { static Error createFromExe(StringRef Path, std::unique_ptr &Session); + std::unique_ptr + createCompilandSymbol(DbiModuleDescriptor MI); + + SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI); + uint64_t getLoadAddress() const override; void setLoadAddress(uint64_t Address) override; - std::unique_ptr getGlobalScope() const override; + std::unique_ptr getGlobalScope() override; std::unique_ptr getSymbolById(uint32_t SymbolId) const override; std::unique_ptr @@ -71,6 +82,8 @@ class NativeSession : public IPDBSession { private: std::unique_ptr Pdb; std::unique_ptr Allocator; + std::vector> SymbolCache; + DenseMap TypeIndexToSymbolId; }; } } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFile.h index 3bed67141c56a..4f6ad115e7dfd 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFile.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFile.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/DebugInfo/MSF/IMSFFile.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MSFStreamLayout.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -85,6 +86,8 @@ class PDBFile : public msf::IMSFFile { ArrayRef getDirectoryBlockArray() const; + msf::MSFStreamLayout getStreamLayout(uint32_t StreamIdx) const; + Error parseFileHeaders(); Error parseStreamData(); @@ -108,6 +111,8 @@ class PDBFile : public msf::IMSFFile { bool hasPDBTpiStream() const; bool hasPDBStringTable(); + uint32_t getPointerSize(); + private: Expected> safelyCreateIndexedStream(const msf::MSFLayout &Layout, diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index cd7d3b0637933..2dc23f819d3bd 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -31,11 +31,13 @@ class MSFBuilder; namespace pdb { class DbiStreamBuilder; class InfoStreamBuilder; +class PublicsStreamBuilder; class TpiStreamBuilder; class PDBFileBuilder { public: explicit PDBFileBuilder(BumpPtrAllocator &Allocator); + ~PDBFileBuilder(); PDBFileBuilder(const PDBFileBuilder &) = delete; PDBFileBuilder &operator=(const PDBFileBuilder &) = delete; @@ -47,6 +49,7 @@ class PDBFileBuilder { TpiStreamBuilder &getTpiBuilder(); TpiStreamBuilder &getIpiBuilder(); PDBStringTableBuilder &getStringTableBuilder(); + PublicsStreamBuilder &getPublicsBuilder(); Error commit(StringRef Filename); @@ -61,6 +64,7 @@ class PDBFileBuilder { std::unique_ptr Msf; std::unique_ptr Info; std::unique_ptr Dbi; + std::unique_ptr Publics; std::unique_ptr Tpi; std::unique_ptr Ipi; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h index 7c7f16bd1c732..29167c966d427 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h @@ -12,7 +12,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/StringTable.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -45,6 +45,8 @@ class PDBStringTable { FixedStreamArray name_ids() const; + const codeview::DebugStringTableSubsectionRef &getStringTable() const; + private: Error readHeader(BinaryStreamReader &Reader); Error readStrings(BinaryStreamReader &Reader); @@ -52,9 +54,8 @@ class PDBStringTable { Error readEpilogue(BinaryStreamReader &Reader); const PDBStringTableHeader *Header = nullptr; - codeview::StringTableRef Strings; + codeview::DebugStringTableSubsectionRef Strings; FixedStreamArray IDs; - uint32_t ByteSize = 0; uint32_t NameCount = 0; }; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h index 6f85e7a4a0741..b57707ee79231 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h @@ -16,7 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/StringTable.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/Support/Error.h" #include @@ -41,8 +41,7 @@ class PDBStringTableBuilder { uint32_t calculateSerializedSize() const; Error commit(BinaryStreamWriter &Writer) const; - codeview::StringTable &getStrings() { return Strings; } - const codeview::StringTable &getStrings() const { return Strings; } + void setStrings(const codeview::DebugStringTableSubsection &Strings); private: uint32_t calculateHashTableSize() const; @@ -51,7 +50,7 @@ class PDBStringTableBuilder { Error writeHashTable(BinaryStreamWriter &Writer) const; Error writeEpilogue(BinaryStreamWriter &Writer) const; - codeview::StringTable Strings; + codeview::DebugStringTableSubsection Strings; }; } // end namespace pdb diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PublicsStream.h index 4a541edd6a7b4..9ace826bd8f71 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PublicsStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PublicsStream.h @@ -25,8 +25,6 @@ struct GSIHashHeader; class PDBFile; class PublicsStream { - struct HeaderInfo; - public: PublicsStream(PDBFile &File, std::unique_ptr Stream); ~PublicsStream(); @@ -35,6 +33,7 @@ class PublicsStream { uint32_t getSymHash() const; uint32_t getAddrMap() const; uint32_t getNumBuckets() const { return NumBuckets; } + Expected getSymbolArray() const; iterator_range getSymbols(bool *HadError) const; FixedStreamArray getHashBuckets() const { @@ -64,7 +63,7 @@ class PublicsStream { FixedStreamArray ThunkMap; FixedStreamArray SectionOffsets; - const HeaderInfo *Header; + const PublicsStreamHeader *Header; const GSIHashHeader *HashHdr; }; } diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h new file mode 100644 index 0000000000000..5ab57ebef53d4 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h @@ -0,0 +1,54 @@ +//===- PublicsStreamBuilder.h - PDB Publics Stream Creation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H + +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace msf { +class MSFBuilder; +} +namespace pdb { +class PublicsStream; +struct PublicsStreamHeader; + +class PublicsStreamBuilder { +public: + explicit PublicsStreamBuilder(msf::MSFBuilder &Msf); + ~PublicsStreamBuilder(); + + PublicsStreamBuilder(const PublicsStreamBuilder &) = delete; + PublicsStreamBuilder &operator=(const PublicsStreamBuilder &) = delete; + + Error finalizeMsfLayout(); + uint32_t calculateSerializedLength() const; + + Error commit(BinaryStreamWriter &PublicsWriter); + + uint32_t getStreamIndex() const { return StreamIdx; } + uint32_t getRecordStreamIdx() const { return RecordStreamIdx; } + +private: + uint32_t StreamIdx = kInvalidStreamIndex; + uint32_t RecordStreamIdx = kInvalidStreamIndex; + std::vector HashRecords; + msf::MSFBuilder &Msf; +}; +} // namespace pdb +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawConstants.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawConstants.h index f5d4df8feb2ed..bb1d097b5123f 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawConstants.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawConstants.h @@ -12,7 +12,6 @@ #include "llvm/ADT/BitmaskEnum.h" #include "llvm/DebugInfo/CodeView/CodeView.h" - #include namespace llvm { @@ -99,15 +98,19 @@ enum class DbgHeaderType : uint16_t { }; enum class OMFSegDescFlags : uint16_t { + None = 0, Read = 1 << 0, // Segment is readable. Write = 1 << 1, // Segment is writable. Execute = 1 << 2, // Segment is executable. AddressIs32Bit = 1 << 3, // Descriptor describes a 32-bit linear address. IsSelector = 1 << 8, // Frame represents a selector. IsAbsoluteAddress = 1 << 9, // Frame represents an absolute address. - IsGroup = 1 << 10 // If set, descriptor represents a group. + IsGroup = 1 << 10, // If set, descriptor represents a group. + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ IsGroup) }; +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + } // end namespace pdb } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawTypes.h index 771272d6a47d1..b6321cbf45a82 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawTypes.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/RawTypes.h @@ -10,6 +10,7 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_RAWTYPES_H #define LLVM_DEBUGINFO_PDB_RAW_RAWTYPES_H +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Endian.h" @@ -255,17 +256,19 @@ struct ModuleInfoHeader { /// char ObjFileName[]; }; -/// Defines a 128-bit unique identifier. This maps to a GUID on Windows, but -/// is abstracted here for the purposes of non-Windows platforms that don't have -/// the GUID structure defined. -struct PDB_UniqueId { - uint8_t Guid[16]; +// This is PSGSIHDR struct defined in +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +struct PublicsStreamHeader { + support::ulittle32_t SymHash; + support::ulittle32_t AddrMap; + support::ulittle32_t NumThunks; + support::ulittle32_t SizeOfThunk; + support::ulittle16_t ISectThunkTable; + char Padding[2]; + support::ulittle32_t OffThunkTable; + support::ulittle32_t NumSections; }; -inline bool operator==(const PDB_UniqueId &LHS, const PDB_UniqueId &RHS) { - return 0 == ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)); -} - // The header preceeding the global TPI stream. // This corresponds to `HDR` in PDB/dbi/tpi.h. struct TpiStreamHeader { @@ -299,7 +302,7 @@ struct InfoStreamHeader { support::ulittle32_t Version; support::ulittle32_t Signature; support::ulittle32_t Age; - PDB_UniqueId Guid; + codeview::GUID Guid; }; /// The header preceeding the /names stream. diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/SymbolStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/SymbolStream.h index 41d5e6ad64a0e..17695f587849e 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/SymbolStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/SymbolStream.h @@ -27,6 +27,10 @@ class SymbolStream { ~SymbolStream(); Error reload(); + const codeview::CVSymbolArray &getSymbolArray() const { + return SymbolRecords; + } + iterator_range getSymbols(bool *HadError) const; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiHashing.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiHashing.h index dd2698c354a20..c1edec7a26fec 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiHashing.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiHashing.h @@ -10,84 +10,13 @@ #ifndef LLVM_DEBUGINFO_PDB_TPIHASHING_H #define LLVM_DEBUGINFO_PDB_TPIHASHING_H -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include -#include namespace llvm { namespace pdb { -class TpiHashUpdater : public codeview::TypeVisitorCallbacks { -public: - TpiHashUpdater() = default; - -#define TYPE_RECORD(EnumName, EnumVal, Name) \ - virtual Error visitKnownRecord(codeview::CVType &CVR, \ - codeview::Name##Record &Record) override { \ - visitKnownRecordImpl(CVR, Record); \ - return Error::success(); \ - } -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#define MEMBER_RECORD(EnumName, EnumVal, Name) -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" - -private: - template - void visitKnownRecordImpl(codeview::CVType &CVR, RecordKind &Record) { - CVR.Hash = 0; - } - - void visitKnownRecordImpl(codeview::CVType &CVR, - codeview::UdtSourceLineRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, - codeview::UdtModSourceLineRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, codeview::ClassRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, codeview::EnumRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, codeview::UnionRecord &Rec); -}; - -class TpiHashVerifier : public codeview::TypeVisitorCallbacks { -public: - TpiHashVerifier(FixedStreamArray &HashValues, - uint32_t NumHashBuckets) - : HashValues(HashValues), NumHashBuckets(NumHashBuckets) {} - - Error visitKnownRecord(codeview::CVType &CVR, - codeview::UdtSourceLineRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::UdtModSourceLineRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::ClassRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::EnumRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::UnionRecord &Rec) override; - Error visitTypeBegin(codeview::CVType &CVR) override; - -private: - Error verifySourceLine(codeview::TypeIndex TI); - - Error errorInvalidHash() { - return make_error( - raw_error_code::invalid_tpi_hash, - "Type index is 0x" + - utohexstr(codeview::TypeIndex::FirstNonSimpleIndex + Index)); - } - - FixedStreamArray HashValues; - codeview::CVType RawRecord; - uint32_t NumHashBuckets; - uint32_t Index = -1; -}; +Expected hashTypeRecord(const llvm::codeview::CVType &Type); } // end namespace pdb } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStream.h index 4579cbf4227b7..d3475205a6c26 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -16,11 +16,15 @@ #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Error.h" namespace llvm { +namespace codeview { +class LazyRandomTypeCollection; +} namespace msf { class MappedBlockStream; } @@ -31,8 +35,7 @@ class TpiStream { friend class TpiStreamBuilder; public: - TpiStream(const PDBFile &File, - std::unique_ptr Stream); + TpiStream(PDBFile &File, std::unique_ptr Stream); ~TpiStream(); Error reload(); @@ -51,13 +54,22 @@ class TpiStream { HashTable &getHashAdjusters(); codeview::CVTypeRange types(bool *HadError) const; + const codeview::CVTypeArray &typeArray() const { return TypeRecords; } + + codeview::LazyRandomTypeCollection &typeCollection() { return *Types; } + + BinarySubstreamRef getTypeRecordsSubstream() const; Error commit(); private: - const PDBFile &Pdb; + PDBFile &Pdb; std::unique_ptr Stream; + std::unique_ptr Types; + + BinarySubstreamRef TypeRecordsSubstream; + codeview::CVTypeArray TypeRecords; std::unique_ptr HashStream; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index 6c609c34665ca..411720d6f56b5 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -58,6 +58,8 @@ class TpiStreamBuilder { Error finalizeMsfLayout(); + uint32_t getRecordCount() const { return TypeRecords.size(); } + Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); uint32_t calculateSerializedLength(); @@ -72,7 +74,7 @@ class TpiStreamBuilder { size_t TypeRecordBytes = 0; - Optional VerHeader; + PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80; std::vector> TypeRecords; std::vector TypeHashes; std::vector TypeIndexOffsets; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDB.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDB.h index 1f5a066b9a1bd..9f9da39ca6ccd 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDB.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDB.h @@ -10,21 +10,23 @@ #ifndef LLVM_DEBUGINFO_PDB_PDB_H #define LLVM_DEBUGINFO_PDB_PDB_H -#include "PDBTypes.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/Error.h" #include -#include namespace llvm { -class StringRef; - namespace pdb { +class IPDBSession; + Error loadDataForPDB(PDB_ReaderType Type, StringRef Path, std::unique_ptr &Session); Error loadDataForEXE(PDB_ReaderType Type, StringRef Path, std::unique_ptr &Session); -} -} -#endif + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_PDB_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBContext.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBContext.h index 84ab8ed173cb0..0ce49f5ef9223 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBContext.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBContext.h @@ -41,8 +41,7 @@ namespace pdb { return DICtx->getKind() == CK_PDB; } - void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All, - bool DumpEH = false, bool SummarizeTypes = false) override; + void dump(raw_ostream &OS, DIDumpOptions DIDumpOpts) override; DILineInfo getLineInfoForAddress( uint64_t Address, diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBExtras.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBExtras.h index fc5787556a6d1..778121c8eb79c 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBExtras.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBExtras.h @@ -1,4 +1,4 @@ -//===- PDBExtras.h - helper functions and classes for PDBs -------*- C++-*-===// +//===- PDBExtras.h - helper functions and classes for PDBs ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,15 +10,17 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBEXTRAS_H #define LLVM_DEBUGINFO_PDB_PDBEXTRAS_H -#include "PDBTypes.h" #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" #include namespace llvm { +class raw_ostream; + namespace pdb { -typedef std::unordered_map TagStats; + +using TagStats = std::unordered_map; raw_ostream &operator<<(raw_ostream &OS, const PDB_VariantType &Value); raw_ostream &operator<<(raw_ostream &OS, const PDB_CallingConv &Conv); @@ -30,14 +32,15 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_Checksum &Checksum); raw_ostream &operator<<(raw_ostream &OS, const PDB_Lang &Lang); raw_ostream &operator<<(raw_ostream &OS, const PDB_SymType &Tag); raw_ostream &operator<<(raw_ostream &OS, const PDB_MemberAccess &Access); -raw_ostream &operator<<(raw_ostream &OS, const PDB_UniqueId &Guid); raw_ostream &operator<<(raw_ostream &OS, const PDB_UdtType &Type); raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine); raw_ostream &operator<<(raw_ostream &OS, const Variant &Value); raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version); raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats); -} -} -#endif +} // end namespace pdb + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_PDBEXTRAS_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBSymbol.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBSymbol.h index b114b7afb0b03..9e883d2f99a7a 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -89,6 +89,8 @@ class PDBSymbol { template std::unique_ptr findOneChild() const { auto Enumerator(findAllChildren()); + if (!Enumerator) + return nullptr; return Enumerator->getNext(); } @@ -97,6 +99,8 @@ class PDBSymbol { template std::unique_ptr> findAllChildren() const { auto BaseIter = RawSymbol->findChildren(T::Tag); + if (!BaseIter) + return nullptr; return llvm::make_unique>(std::move(BaseIter)); } std::unique_ptr findAllChildren(PDB_SymType Type) const; diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBTypes.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBTypes.h index dd2fc4f2c55f7..79ec7ce906d57 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBTypes.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/PDBTypes.h @@ -1,4 +1,4 @@ -//===- PDBTypes.h - Defines enums for various fields contained in PDB ---*-===// +//===- PDBTypes.h - Defines enums for various fields contained in PDB ----====// // // The LLVM Compiler Infrastructure // @@ -10,9 +10,10 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBTYPES_H #define LLVM_DEBUGINFO_PDB_PDBTYPES_H -#include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include #include #include #include @@ -20,21 +21,11 @@ namespace llvm { namespace pdb { -class PDBSymDumper; -class PDBSymbol; - class IPDBDataStream; -template class IPDBEnumChildren; class IPDBLineNumber; -class IPDBRawSymbol; -class IPDBSession; class IPDBSourceFile; - -typedef IPDBEnumChildren IPDBEnumSymbols; -typedef IPDBEnumChildren IPDBEnumSourceFiles; -typedef IPDBEnumChildren IPDBEnumDataStreams; -typedef IPDBEnumChildren IPDBEnumLineNumbers; - +class PDBSymDumper; +class PDBSymbol; class PDBSymbolExe; class PDBSymbolCompiland; class PDBSymbolCompilandDetails; @@ -67,6 +58,11 @@ class PDBSymbolTypeManaged; class PDBSymbolTypeDimension; class PDBSymbolUnknown; +using IPDBEnumSymbols = IPDBEnumChildren; +using IPDBEnumSourceFiles = IPDBEnumChildren; +using IPDBEnumDataStreams = IPDBEnumChildren; +using IPDBEnumLineNumbers = IPDBEnumChildren; + /// Specifies which PDB reader implementation is to be used. Only a value /// of PDB_ReaderType::DIA is currently supported, but Native is in the works. enum class PDB_ReaderType { @@ -104,7 +100,7 @@ enum class PDB_Checksum { None = 0, MD5 = 1, SHA1 = 2 }; /// These values correspond to the CV_CPU_TYPE_e enumeration, and are documented /// here: https://msdn.microsoft.com/en-us/library/b2fc64ek.aspx -typedef codeview::CPUType PDB_Cpu; +using PDB_Cpu = codeview::CPUType; enum class PDB_Machine { Invalid = 0xffff, @@ -135,12 +131,11 @@ enum class PDB_Machine { /// at the following locations: /// https://msdn.microsoft.com/en-us/library/b2fc64ek.aspx /// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680207(v=vs.85).aspx -/// -typedef codeview::CallingConvention PDB_CallingConv; +using PDB_CallingConv = codeview::CallingConvention; /// These values correspond to the CV_CFL_LANG enumeration, and are documented /// here: https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx -typedef codeview::SourceLanguage PDB_Lang; +using PDB_Lang = codeview::SourceLanguage; /// These values correspond to the DataKind enumeration, and are documented /// here: https://msdn.microsoft.com/en-us/library/b2x2t313.aspx @@ -273,9 +268,9 @@ enum PDB_VariantType { }; struct Variant { - Variant() : Type(PDB_VariantType::Empty) {} + Variant() = default; - Variant(const Variant &Other) : Type(PDB_VariantType::Empty) { + Variant(const Variant &Other) { *this = Other; } @@ -284,7 +279,7 @@ struct Variant { delete[] Value.String; } - PDB_VariantType Type; + PDB_VariantType Type = PDB_VariantType::Empty; union { bool Bool; int8_t Int8; @@ -344,18 +339,20 @@ struct Variant { } }; +} // end namespace pdb } // end namespace llvm -} namespace std { + template <> struct hash { - typedef llvm::pdb::PDB_SymType argument_type; - typedef std::size_t result_type; + using argument_type = llvm::pdb::PDB_SymType; + using result_type = std::size_t; result_type operator()(const argument_type &Arg) const { return std::hash()(static_cast(Arg)); } }; + } // end namespace std #endif // LLVM_DEBUGINFO_PDB_PDBTYPES_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/UDTLayout.h b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/UDTLayout.h index 6bc3660fbe517..c4234c191e21c 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/PDB/UDTLayout.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/PDB/UDTLayout.h @@ -10,30 +10,26 @@ #ifndef LLVM_DEBUGINFO_PDB_UDTLAYOUT_H #define LLVM_DEBUGINFO_PDB_UDTLAYOUT_H -#include "PDBSymbol.h" -#include "PDBTypes.h" - #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" - -#include +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" +#include "llvm/DebugInfo/PDB/PDBSymbolData.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" +#include #include +#include +#include namespace llvm { - -class raw_ostream; - namespace pdb { -class PDBSymTypeBaseClass; -class PDBSymbolData; -class PDBSymbolTypeUDT; -class PDBSymbolTypeVTable; - -class ClassLayout; class BaseClassLayout; -class LayoutItemBase; +class ClassLayout; class UDTLayoutBase; class LayoutItemBase { @@ -41,7 +37,7 @@ class LayoutItemBase { LayoutItemBase(const UDTLayoutBase *Parent, const PDBSymbol *Symbol, const std::string &Name, uint32_t OffsetInParent, uint32_t Size, bool IsElided); - virtual ~LayoutItemBase() {} + virtual ~LayoutItemBase() = default; uint32_t deepPaddingSize() const; virtual uint32_t immediatePadding() const { return 0; } @@ -79,7 +75,8 @@ class VBPtrLayoutItem : public LayoutItemBase { VBPtrLayoutItem(const UDTLayoutBase &Parent, std::unique_ptr Sym, uint32_t Offset, uint32_t Size); - virtual bool isVBPtr() const { return true; } + + bool isVBPtr() const override { return true; } private: std::unique_ptr Type; @@ -120,17 +117,12 @@ class UDTLayoutBase : public LayoutItemBase { bool IsElided); uint32_t tailPadding() const override; - ArrayRef layout_items() const { return LayoutItems; } - ArrayRef bases() const { return AllBases; } ArrayRef regular_bases() const { return NonVirtualBases; } ArrayRef virtual_bases() const { return VirtualBases; } - uint32_t directVirtualBaseCount() const { return DirectVBaseCount; } - ArrayRef> funcs() const { return Funcs; } - ArrayRef> other_items() const { return Other; } protected: @@ -183,7 +175,8 @@ class ClassLayout : public UDTLayoutBase { std::unique_ptr OwnedStorage; const PDBSymbolTypeUDT &UDT; }; -} -} // namespace llvm + +} // end namespace pdb +} // end namespace llvm #endif // LLVM_DEBUGINFO_PDB_UDTLAYOUT_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h index e0bec6f6cf859..e576a91e887cc 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h @@ -1,4 +1,4 @@ -//===-- SymbolizableModule.h ------------------------------------ C++ -----===// +//===- SymbolizableModule.h -------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,12 +14,7 @@ #define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H #include "llvm/DebugInfo/DIContext.h" - -namespace llvm { -namespace object { -class ObjectFile; -} -} +#include namespace llvm { namespace symbolize { @@ -28,7 +23,8 @@ using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; class SymbolizableModule { public: - virtual ~SymbolizableModule() {} + virtual ~SymbolizableModule() = default; + virtual DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const = 0; @@ -45,7 +41,7 @@ class SymbolizableModule { virtual uint64_t getModulePreferredBase() const = 0; }; -} // namespace symbolize -} // namespace llvm +} // end namespace symbolize +} // end namespace llvm #endif // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H diff --git a/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/Symbolize.h b/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/Symbolize.h index 5103cc03a6bdb..d98d49b24bca2 100644 --- a/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/interpreter/llvm/src/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -1,4 +1,4 @@ -//===-- Symbolize.h --------------------------------------------- C++ -----===// +//===- Symbolize.h ----------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,21 +10,27 @@ // Header for LLVM symbolization library. // //===----------------------------------------------------------------------===// + #ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H #define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Error.h" +#include +#include #include #include #include #include +#include namespace llvm { namespace symbolize { using namespace object; + using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; class LLVMSymbolizer { @@ -36,6 +42,7 @@ class LLVMSymbolizer { bool RelativeAddresses : 1; std::string DefaultArch; std::vector DsymHints; + Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName, bool UseSymbolTable = true, bool Demangle = true, bool RelativeAddresses = false, std::string DefaultArch = "") @@ -45,6 +52,7 @@ class LLVMSymbolizer { }; LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {} + ~LLVMSymbolizer() { flush(); } @@ -56,6 +64,7 @@ class LLVMSymbolizer { Expected symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); void flush(); + static std::string DemangleName(const std::string &Name, const SymbolizableModule *DbiModuleDescriptor); @@ -63,7 +72,7 @@ class LLVMSymbolizer { private: // Bundles together object file with code/data and object file with // corresponding debug info. These objects can be the same. - typedef std::pair ObjectPair; + using ObjectPair = std::pair; /// Returns a SymbolizableModule or an error if loading debug info failed. /// Only one attempt is made to load a module, and errors during loading are @@ -106,7 +115,7 @@ class LLVMSymbolizer { Options Opts; }; -} // namespace symbolize -} // namespace llvm +} // end namespace symbolize +} // end namespace llvm -#endif +#endif // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/ExecutionEngine.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/ExecutionEngine.h index f68337c432718..2830a26287536 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -15,54 +15,58 @@ #ifndef LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H #define LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H -#include "RuntimeDyld.h" #include "llvm-c/ExecutionEngine.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/IR/ValueMap.h" #include "llvm/Object/Binary.h" +#include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mutex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include +#include +#include #include +#include #include #include -#include namespace llvm { -struct GenericValue; class Constant; -class DataLayout; -class ExecutionEngine; class Function; -class GlobalVariable; +struct GenericValue; class GlobalValue; +class GlobalVariable; class JITEventListener; -class MachineCodeInfo; class MCJITMemoryManager; -class MutexGuard; class ObjectCache; class RTDyldMemoryManager; class Triple; class Type; namespace object { - class Archive; - class ObjectFile; -} + +class Archive; +class ObjectFile; + +} // end namespace object /// \brief Helper class for helping synchronize access to the global address map /// table. Access to this class should be serialized under a mutex. class ExecutionEngineState { public: - typedef StringMap GlobalAddressMapTy; + using GlobalAddressMapTy = StringMap; private: - /// GlobalAddressMap - A mapping between LLVM global symbol names values and /// their actualized version... GlobalAddressMapTy GlobalAddressMap; @@ -74,7 +78,6 @@ class ExecutionEngineState { std::map GlobalAddressReverseMap; public: - GlobalAddressMapTy &getGlobalAddressMap() { return GlobalAddressMap; } @@ -509,13 +512,15 @@ class ExecutionEngine { }; namespace EngineKind { + // These are actually bitmasks that get or-ed together. enum Kind { JIT = 0x1, Interpreter = 0x2 }; const static Kind Either = (Kind)(JIT | Interpreter); -} + +} // end namespace EngineKind /// Builder class for ExecutionEngines. Use this by stack-allocating a builder, /// chaining the various set* methods, and terminating it with a .create() @@ -655,6 +660,6 @@ class EngineBuilder { // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef) -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/GenericValue.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/GenericValue.h index 537745519ddb6..504e30a018b69 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/GenericValue.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/GenericValue.h @@ -1,4 +1,4 @@ -//===-- GenericValue.h - Represent any type of LLVM value -------*- C++ -*-===// +//===- GenericValue.h - Represent any type of LLVM value --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,18 +11,15 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_EXECUTIONENGINE_GENERICVALUE_H #define LLVM_EXECUTIONENGINE_GENERICVALUE_H #include "llvm/ADT/APInt.h" -#include "llvm/Support/DataTypes.h" #include namespace llvm { -typedef void* PointerTy; -class APInt; +using PointerTy = void *; struct GenericValue { struct IntPair { @@ -30,25 +27,29 @@ struct GenericValue { unsigned int second; }; union { - double DoubleVal; - float FloatVal; - PointerTy PointerVal; - struct IntPair UIntPairVal; - unsigned char Untyped[8]; + double DoubleVal; + float FloatVal; + PointerTy PointerVal; + struct IntPair UIntPairVal; + unsigned char Untyped[8]; }; - APInt IntVal; // also used for long doubles. + APInt IntVal; // also used for long doubles. // For aggregate data types. std::vector AggregateVal; // to make code faster, set GenericValue to zero could be omitted, but it is // potentially can cause problems, since GenericValue to store garbage // instead of zero. - GenericValue() : IntVal(1,0) {UIntPairVal.first = 0; UIntPairVal.second = 0;} - explicit GenericValue(void *V) : PointerVal(V), IntVal(1,0) { } + GenericValue() : IntVal(1, 0) { + UIntPairVal.first = 0; + UIntPairVal.second = 0; + } + explicit GenericValue(void *V) : PointerVal(V), IntVal(1, 0) {} }; inline GenericValue PTOGV(void *P) { return GenericValue(P); } -inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; } +inline void *GVTOP(const GenericValue &GV) { return GV.PointerVal; } + +} // end namespace llvm -} // End llvm namespace. -#endif +#endif // LLVM_EXECUTIONENGINE_GENERICVALUE_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/JITEventListener.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/JITEventListener.h index 94ec4e36a199e..ff7840f00a44a 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/JITEventListener.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/JITEventListener.h @@ -15,8 +15,8 @@ #ifndef LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H #define LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H -#include "RuntimeDyld.h" #include "llvm/Config/llvm-config.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/DebugLoc.h" #include #include @@ -28,7 +28,9 @@ class MachineFunction; class OProfileWrapper; namespace object { - class ObjectFile; + +class ObjectFile; + } // end namespace object /// JITEvent_EmittedFunctionDetails - Helper struct for containing information @@ -57,7 +59,7 @@ struct JITEvent_EmittedFunctionDetails { /// The default implementation of each method does nothing. class JITEventListener { public: - typedef JITEvent_EmittedFunctionDetails EmittedFunctionDetails; + using EmittedFunctionDetails = JITEvent_EmittedFunctionDetails; public: JITEventListener() = default; diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/JITSymbol.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/JITSymbol.h index 88929482ce765..4172f240ba392 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/JITSymbol.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/JITSymbol.h @@ -1,4 +1,4 @@ -//===----------- JITSymbol.h - JIT symbol abstraction -----------*- C++ -*-===// +//===- JITSymbol.h - JIT symbol abstraction ---------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,36 +21,46 @@ #include #include +#include "llvm/Support/Error.h" + namespace llvm { class GlobalValue; namespace object { - class BasicSymbolRef; + +class BasicSymbolRef; + } // end namespace object /// @brief Represents an address in the target process's address space. -typedef uint64_t JITTargetAddress; +using JITTargetAddress = uint64_t; /// @brief Flags for symbols in the JIT. class JITSymbolFlags { public: - typedef uint8_t UnderlyingType; + using UnderlyingType = uint8_t; enum FlagNames : UnderlyingType { None = 0, - Weak = 1U << 0, - Common = 1U << 1, - Absolute = 1U << 2, - Exported = 1U << 3 + HasError = 1U << 0, + Weak = 1U << 1, + Common = 1U << 2, + Absolute = 1U << 3, + Exported = 1U << 4 }; /// @brief Default-construct a JITSymbolFlags instance. - JITSymbolFlags() : Flags(None) {} + JITSymbolFlags() = default; /// @brief Construct a JITSymbolFlags instance from the given flags. JITSymbolFlags(FlagNames Flags) : Flags(Flags) {} + /// @brief Return true if there was an error retrieving this symbol. + bool hasError() const { + return (Flags & HasError) == HasError; + } + /// @brief Returns true is the Weak flag is set. bool isWeak() const { return (Flags & Weak) == Weak; @@ -81,15 +91,14 @@ class JITSymbolFlags { static JITSymbolFlags fromObjectSymbol(const object::BasicSymbolRef &Symbol); private: - UnderlyingType Flags; + UnderlyingType Flags = None; }; /// @brief Represents a symbol that has been evaluated to an address already. class JITEvaluatedSymbol { public: /// @brief Create a 'null' symbol. - JITEvaluatedSymbol(std::nullptr_t) - : Address(0) {} + JITEvaluatedSymbol(std::nullptr_t) {} /// @brief Create a symbol for the given address and flags. JITEvaluatedSymbol(JITTargetAddress Address, JITSymbolFlags Flags) @@ -105,20 +114,25 @@ class JITEvaluatedSymbol { JITSymbolFlags getFlags() const { return Flags; } private: - JITTargetAddress Address; + JITTargetAddress Address = 0; JITSymbolFlags Flags; }; /// @brief Represents a symbol in the JIT. class JITSymbol { public: - typedef std::function GetAddressFtor; + using GetAddressFtor = std::function()>; - /// @brief Create a 'null' symbol that represents failure to find a symbol - /// definition. + /// @brief Create a 'null' symbol, used to represent a "symbol not found" + /// result from a successful (non-erroneous) lookup. JITSymbol(std::nullptr_t) : CachedAddr(0) {} + /// @brief Create a JITSymbol representing an error in the symbol lookup + /// process (e.g. a network failure during a remote lookup). + JITSymbol(Error Err) + : Err(std::move(Err)), Flags(JITSymbolFlags::HasError) {} + /// @brief Create a symbol for a definition with a known address. JITSymbol(JITTargetAddress Addr, JITSymbolFlags Flags) : CachedAddr(Addr), Flags(Flags) {} @@ -139,16 +153,57 @@ class JITSymbol { JITSymbol(GetAddressFtor GetAddress, JITSymbolFlags Flags) : GetAddress(std::move(GetAddress)), CachedAddr(0), Flags(Flags) {} + JITSymbol(const JITSymbol&) = delete; + JITSymbol& operator=(const JITSymbol&) = delete; + + JITSymbol(JITSymbol &&Other) + : GetAddress(std::move(Other.GetAddress)), Flags(std::move(Other.Flags)) { + if (Flags.hasError()) + Err = std::move(Other.Err); + else + CachedAddr = std::move(Other.CachedAddr); + } + + JITSymbol& operator=(JITSymbol &&Other) { + GetAddress = std::move(Other.GetAddress); + Flags = std::move(Other.Flags); + if (Flags.hasError()) + Err = std::move(Other.Err); + else + CachedAddr = std::move(Other.CachedAddr); + return *this; + } + + ~JITSymbol() { + if (Flags.hasError()) + Err.~Error(); + else + CachedAddr.~JITTargetAddress(); + } + /// @brief Returns true if the symbol exists, false otherwise. - explicit operator bool() const { return CachedAddr || GetAddress; } + explicit operator bool() const { + return !Flags.hasError() && (CachedAddr || GetAddress); + } + + /// @brief Move the error field value out of this JITSymbol. + Error takeError() { + if (Flags.hasError()) + return std::move(Err); + return Error::success(); + } /// @brief Get the address of the symbol in the target address space. Returns /// '0' if the symbol does not exist. - JITTargetAddress getAddress() { + Expected getAddress() { + assert(!Flags.hasError() && "getAddress called on error value"); if (GetAddress) { - CachedAddr = GetAddress(); - assert(CachedAddr && "Symbol could not be materialized."); - GetAddress = nullptr; + if (auto CachedAddrOrErr = GetAddress()) { + GetAddress = nullptr; + CachedAddr = *CachedAddrOrErr; + assert(CachedAddr && "Symbol could not be materialized."); + } else + return CachedAddrOrErr.takeError(); } return CachedAddr; } @@ -157,7 +212,10 @@ class JITSymbol { private: GetAddressFtor GetAddress; - JITTargetAddress CachedAddr; + union { + JITTargetAddress CachedAddr; + Error Err; + }; JITSymbolFlags Flags; }; diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 1bb911d09cfb4..27b5457fc8ffd 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -20,20 +20,25 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/Orc/OrcError.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include #include #include @@ -46,6 +51,9 @@ #include namespace llvm { + +class Value; + namespace orc { /// @brief Compile-on-demand layer. @@ -77,15 +85,15 @@ class CompileOnDemandLayer { return LambdaMaterializer(std::move(M)); } - typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT; + using BaseLayerModuleHandleT = typename BaseLayerT::ModuleHandleT; // Provide type-erasure for the Modules and MemoryManagers. template class ResourceOwner { public: ResourceOwner() = default; - ResourceOwner(const ResourceOwner&) = delete; - ResourceOwner& operator=(const ResourceOwner&) = delete; + ResourceOwner(const ResourceOwner &) = delete; + ResourceOwner &operator=(const ResourceOwner &) = delete; virtual ~ResourceOwner() = default; virtual ResourceT& getResource() const = 0; @@ -106,7 +114,7 @@ class CompileOnDemandLayer { template std::unique_ptr> wrapOwnership(ResourcePtrT ResourcePtr) { - typedef ResourceOwnerImpl RO; + using RO = ResourceOwnerImpl; return llvm::make_unique(std::move(ResourcePtr)); } @@ -130,24 +138,24 @@ class CompileOnDemandLayer { }; struct LogicalDylib { - typedef std::function SymbolResolverFtor; + using SymbolResolverFtor = std::function; - typedef std::function, - std::unique_ptr)> - ModuleAdderFtor; + using ModuleAdderFtor = + std::function, + std::unique_ptr)>; struct SourceModuleEntry { - std::unique_ptr> SourceMod; + std::shared_ptr SourceMod; std::set StubsToClone; }; - typedef std::vector SourceModulesList; - typedef typename SourceModulesList::size_type SourceModuleHandle; + using SourceModulesList = std::vector; + using SourceModuleHandle = typename SourceModulesList::size_type; SourceModuleHandle - addSourceModule(std::unique_ptr> M) { + addSourceModule(std::shared_ptr M) { SourceModuleHandle H = SourceModules.size(); SourceModules.push_back(SourceModuleEntry()); SourceModules.back().SourceMod = std::move(M); @@ -155,7 +163,7 @@ class CompileOnDemandLayer { } Module& getSourceModule(SourceModuleHandle H) { - return SourceModules[H].SourceMod->getResource(); + return *SourceModules[H].SourceMod; } std::set& getStubsToClone(SourceModuleHandle H) { @@ -169,35 +177,38 @@ class CompileOnDemandLayer { for (auto BLH : BaseLayerHandles) if (auto Sym = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); return nullptr; } - void removeModulesFromBaseLayer(BaseLayerT &BaseLayer) { + Error removeModulesFromBaseLayer(BaseLayerT &BaseLayer) { for (auto &BLH : BaseLayerHandles) - BaseLayer.removeModuleSet(BLH); + if (auto Err = BaseLayer.removeModule(BLH)) + return Err; + return Error::success(); } - std::unique_ptr ExternalSymbolResolver; - std::unique_ptr> MemMgr; + std::shared_ptr ExternalSymbolResolver; std::unique_ptr StubsMgr; StaticGlobalRenamer StaticRenamer; - ModuleAdderFtor ModuleAdder; SourceModulesList SourceModules; - std::vector BaseLayerHandles; + std::vector BaseLayerHandles; }; - typedef std::list LogicalDylibList; + using LogicalDylibList = std::list; public: - /// @brief Handle to a set of loaded modules. - typedef typename LogicalDylibList::iterator ModuleSetHandleT; + + /// @brief Handle to loaded module. + using ModuleHandleT = typename LogicalDylibList::iterator; /// @brief Module partitioning functor. - typedef std::function(Function&)> PartitioningFtor; + using PartitioningFtor = std::function(Function&)>; /// @brief Builder for IndirectStubsManagers. - typedef std::function()> - IndirectStubsManagerBuilderT; + using IndirectStubsManagerBuilderT = + std::function()>; /// @brief Construct a compile-on-demand layer instance. CompileOnDemandLayer(BaseLayerT &BaseLayer, PartitioningFtor Partition, @@ -210,47 +221,41 @@ class CompileOnDemandLayer { CloneStubsIntoPartitions(CloneStubsIntoPartitions) {} ~CompileOnDemandLayer() { + // FIXME: Report error on log. while (!LogicalDylibs.empty()) - removeModuleSet(LogicalDylibs.begin()); + consumeError(removeModule(LogicalDylibs.begin())); } - + /// @brief Add a module to the compile-on-demand layer. - template - ModuleSetHandleT addModuleSet(ModuleSetT Ms, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { LogicalDylibs.push_back(LogicalDylib()); auto &LD = LogicalDylibs.back(); LD.ExternalSymbolResolver = std::move(Resolver); LD.StubsMgr = CreateIndirectStubsManager(); - auto &MemMgrRef = *MemMgr; - LD.MemMgr = wrapOwnership(std::move(MemMgr)); - - LD.ModuleAdder = - [&MemMgrRef](BaseLayerT &B, std::unique_ptr M, - std::unique_ptr R) { - std::vector> Ms; - Ms.push_back(std::move(M)); - return B.addModuleSet(std::move(Ms), &MemMgrRef, std::move(R)); - }; - // Process each of the modules in this module set. - for (auto &M : Ms) - addLogicalModule(LogicalDylibs.back(), std::move(M)); + if (auto Err = addLogicalModule(LD, std::move(M))) + return std::move(Err); return std::prev(LogicalDylibs.end()); } + /// @brief Add extra modules to an existing logical module. + Error addExtraModule(ModuleHandleT H, std::shared_ptr M) { + return addLogicalModule(*H, std::move(M)); + } + /// @brief Remove the module represented by the given handle. /// /// This will remove all modules in the layers below that were derived from /// the module represented by H. - void removeModuleSet(ModuleSetHandleT H) { - H->removeModulesFromBaseLayer(BaseLayer); + Error removeModule(ModuleHandleT H) { + auto Err = H->removeModulesFromBaseLayer(BaseLayer); LogicalDylibs.erase(H); + return Err; } /// @brief Search for the given named symbol. @@ -264,13 +269,15 @@ class CompileOnDemandLayer { return Sym; if (auto Sym = findSymbolIn(LDI, Name, ExportedSymbolsOnly)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); } return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); } /// @brief Get the address of a symbol provided by this layer, or some layer /// below this one. - JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name, + JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, bool ExportedSymbolsOnly) { return H->findSymbol(BaseLayer, Name, ExportedSymbolsOnly); } @@ -283,26 +290,26 @@ class CompileOnDemandLayer { // FIXME: We should track and free associated resources (unused compile // callbacks, uncompiled IR, and no-longer-needed/reachable function // implementations). - // FIXME: Return Error once the JIT APIs are Errorized. - bool updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) { + Error updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) { //Find out which logical dylib contains our symbol auto LDI = LogicalDylibs.begin(); for (auto LDE = LogicalDylibs.end(); LDI != LDE; ++LDI) { - if (auto LMResources = LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) { + if (auto LMResources = + LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) { Module &SrcM = LMResources->SourceModule->getResource(); std::string CalledFnName = mangle(FuncName, SrcM.getDataLayout()); - if (auto EC = LMResources->StubsMgr->updatePointer(CalledFnName, FnBodyAddr)) - return false; - else - return true; + if (auto Err = LMResources->StubsMgr->updatePointer(CalledFnName, + FnBodyAddr)) + return Err; + return Error::success(); } } - return false; + return make_error(FuncName); } private: - template - void addLogicalModule(LogicalDylib &LD, ModulePtrT SrcMPtr) { + + Error addLogicalModule(LogicalDylib &LD, std::shared_ptr SrcMPtr) { // Rename all static functions / globals to $static.X : // This will unique the names across all modules in the logical dylib, @@ -315,7 +322,7 @@ class CompileOnDemandLayer { // Create a logical module handle for SrcM within the logical dylib. Module &SrcM = *SrcMPtr; - auto LMId = LD.addSourceModule(wrapOwnership(std::move(SrcMPtr))); + auto LMId = LD.addSourceModule(std::move(SrcMPtr)); // Create stub functions. const DataLayout &DL = SrcM.getDataLayout(); @@ -328,9 +335,12 @@ class CompileOnDemandLayer { // Skip weak functions for which we already have definitions. auto MangledName = mangle(F.getName(), DL); - if (F.hasWeakLinkage() || F.hasLinkOnceLinkage()) + if (F.hasWeakLinkage() || F.hasLinkOnceLinkage()) { if (auto Sym = LD.findSymbol(BaseLayer, MangledName, false)) continue; + else if (auto Err = Sym.takeError()) + return std::move(Err); + } // Record all functions defined by this module. if (CloneStubsIntoPartitions) @@ -343,16 +353,19 @@ class CompileOnDemandLayer { StubInits[MangledName] = std::make_pair(CCInfo.getAddress(), JITSymbolFlags::fromGlobalValue(F)); - CCInfo.setCompileAction([this, &LD, LMId, &F]() { - return this->extractAndCompile(LD, LMId, F); - }); + CCInfo.setCompileAction([this, &LD, LMId, &F]() -> JITTargetAddress { + if (auto FnImplAddrOrErr = this->extractAndCompile(LD, LMId, F)) + return *FnImplAddrOrErr; + else { + // FIXME: Report error, return to 'abort' or something similar. + consumeError(FnImplAddrOrErr.takeError()); + return 0; + } + }); } - auto EC = LD.StubsMgr->createStubs(StubInits); - (void)EC; - // FIXME: This should be propagated back to the user. Stub creation may - // fail for remote JITs. - assert(!EC && "Error generating stubs"); + if (auto Err = LD.StubsMgr->createStubs(StubInits)) + return Err; } // If this module doesn't contain any globals, aliases, or module flags then @@ -360,7 +373,7 @@ class CompileOnDemandLayer { // empty globals module. if (SrcM.global_empty() && SrcM.alias_empty() && !SrcM.getModuleFlagsMetadata()) - return; + return Error::success(); // Create the GlobalValues module. auto GVsM = llvm::make_unique((SrcM.getName() + ".globals").str(), @@ -386,8 +399,9 @@ class CompileOnDemandLayer { // Initializers may refer to functions declared (but not defined) in this // module. Build a materializer to clone decls on demand. + Error MaterializerErrors = Error::success(); auto Materializer = createLambdaMaterializer( - [&LD, &GVsM](Value *V) -> Value* { + [&LD, &GVsM, &MaterializerErrors](Value *V) -> Value* { if (auto *F = dyn_cast(V)) { // Decls in the original module just get cloned. if (F->isDeclaration()) @@ -398,13 +412,24 @@ class CompileOnDemandLayer { // instead. const DataLayout &DL = GVsM->getDataLayout(); std::string FName = mangle(F->getName(), DL); - auto StubSym = LD.StubsMgr->findStub(FName, false); unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType()); - ConstantInt *StubAddr = - ConstantInt::get(GVsM->getContext(), - APInt(PtrBitWidth, StubSym.getAddress())); + JITTargetAddress StubAddr = 0; + + // Get the address for the stub. If we encounter an error while + // doing so, stash it in the MaterializerErrors variable and use a + // null address as a placeholder. + if (auto StubSym = LD.StubsMgr->findStub(FName, false)) { + if (auto StubAddrOrErr = StubSym.getAddress()) + StubAddr = *StubAddrOrErr; + else + MaterializerErrors = joinErrors(std::move(MaterializerErrors), + StubAddrOrErr.takeError()); + } + + ConstantInt *StubAddrCI = + ConstantInt::get(GVsM->getContext(), APInt(PtrBitWidth, StubAddr)); Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr, - StubAddr, F->getType()); + StubAddrCI, F->getType()); return GlobalAlias::create(F->getFunctionType(), F->getType()->getAddressSpace(), F->getLinkage(), F->getName(), @@ -428,22 +453,31 @@ class CompileOnDemandLayer { NewA->setAliasee(cast(Init)); } + if (MaterializerErrors) + return MaterializerErrors; + // Build a resolver for the globals module and add it to the base layer. auto GVsResolver = createLambdaResolver( - [this, &LD](const std::string &Name) { + [this, &LD](const std::string &Name) -> JITSymbol { if (auto Sym = LD.StubsMgr->findStub(Name, false)) return Sym; if (auto Sym = LD.findSymbol(BaseLayer, Name, false)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); return LD.ExternalSymbolResolver->findSymbolInLogicalDylib(Name); }, [&LD](const std::string &Name) { return LD.ExternalSymbolResolver->findSymbol(Name); }); - auto GVsH = LD.ModuleAdder(BaseLayer, std::move(GVsM), - std::move(GVsResolver)); - LD.BaseLayerHandles.push_back(GVsH); + if (auto GVsHOrErr = + BaseLayer.addModule(std::move(GVsM), std::move(GVsResolver))) + LD.BaseLayerHandles.push_back(*GVsHOrErr); + else + return GVsHOrErr.takeError(); + + return Error::success(); } static std::string mangle(StringRef Name, const DataLayout &DL) { @@ -455,7 +489,7 @@ class CompileOnDemandLayer { return MangledName; } - JITTargetAddress + Expected extractAndCompile(LogicalDylib &LD, typename LogicalDylib::SourceModuleHandle LMId, Function &F) { @@ -468,34 +502,42 @@ class CompileOnDemandLayer { // Grab the name of the function being called here. std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout()); - auto Part = Partition(F); - auto PartH = emitPartition(LD, LMId, Part); - JITTargetAddress CalledAddr = 0; - for (auto *SubF : Part) { - std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout()); - auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false); - assert(FnBodySym && "Couldn't find function body."); - - JITTargetAddress FnBodyAddr = FnBodySym.getAddress(); - - // If this is the function we're calling record the address so we can - // return it from this function. - if (SubF == &F) - CalledAddr = FnBodyAddr; - - // Update the function body pointer for the stub. - if (auto EC = LD.StubsMgr->updatePointer(FnName, FnBodyAddr)) - return 0; - } + auto Part = Partition(F); + if (auto PartHOrErr = emitPartition(LD, LMId, Part)) { + auto &PartH = *PartHOrErr; + for (auto *SubF : Part) { + std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout()); + if (auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false)) { + if (auto FnBodyAddrOrErr = FnBodySym.getAddress()) { + JITTargetAddress FnBodyAddr = *FnBodyAddrOrErr; + + // If this is the function we're calling record the address so we can + // return it from this function. + if (SubF == &F) + CalledAddr = FnBodyAddr; + + // Update the function body pointer for the stub. + if (auto EC = LD.StubsMgr->updatePointer(FnName, FnBodyAddr)) + return 0; + + } else + return FnBodyAddrOrErr.takeError(); + } else if (auto Err = FnBodySym.takeError()) + return std::move(Err); + else + llvm_unreachable("Function not emitted for partition"); + } - LD.BaseLayerHandles.push_back(PartH); + LD.BaseLayerHandles.push_back(PartH); + } else + return PartHOrErr.takeError(); return CalledAddr; } template - BaseLayerModuleSetHandleT + Expected emitPartition(LogicalDylib &LD, typename LogicalDylib::SourceModuleHandle LMId, const PartitionT &Part) { @@ -559,16 +601,18 @@ class CompileOnDemandLayer { // Create memory manager and symbol resolver. auto Resolver = createLambdaResolver( - [this, &LD](const std::string &Name) { + [this, &LD](const std::string &Name) -> JITSymbol { if (auto Sym = LD.findSymbol(BaseLayer, Name, false)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); return LD.ExternalSymbolResolver->findSymbolInLogicalDylib(Name); }, [&LD](const std::string &Name) { return LD.ExternalSymbolResolver->findSymbol(Name); }); - return LD.ModuleAdder(BaseLayer, std::move(M), std::move(Resolver)); + return BaseLayer.addModule(std::move(M), std::move(Resolver)); } BaseLayerT &BaseLayer; @@ -581,6 +625,7 @@ class CompileOnDemandLayer { }; } // end namespace orc + } // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileUtils.h index ce0864fbd9c9a..b9f7d6accc308 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileUtils.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/CompileUtils.h @@ -1,4 +1,4 @@ -//===-- CompileUtils.h - Utilities for compiling IR in the JIT --*- C++ -*-===// +//===- CompileUtils.h - Utilities for compiling IR in the JIT ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,24 +14,47 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H #define LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/MC/MCContext.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include +#include namespace llvm { + +class MCContext; +class Module; + namespace orc { /// @brief Simple compile functor: Takes a single IR module and returns an /// ObjectFile. class SimpleCompiler { public: + + using CompileResult = object::OwningBinary; + /// @brief Construct a simple compile functor with the given target. - SimpleCompiler(TargetMachine &TM) : TM(TM) {} + SimpleCompiler(TargetMachine &TM, ObjectCache *ObjCache = nullptr) + : TM(TM), ObjCache(ObjCache) {} + + /// @brief Set an ObjectCache to query before compiling. + void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; } /// @brief Compile a Module to an ObjectFile. - object::OwningBinary operator()(Module &M) const { + CompileResult operator()(Module &M) { + CompileResult CachedObject = tryToLoadFromObjectCache(M); + if (CachedObject.getBinary()) + return CachedObject; + SmallVector ObjBufferSV; raw_svector_ostream ObjStream(ObjBufferSV); @@ -44,19 +67,47 @@ class SimpleCompiler { new ObjectMemoryBuffer(std::move(ObjBufferSV))); Expected> Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef()); - typedef object::OwningBinary OwningObj; - if (Obj) - return OwningObj(std::move(*Obj), std::move(ObjBuffer)); + if (Obj) { + notifyObjectCompiled(M, *ObjBuffer); + return CompileResult(std::move(*Obj), std::move(ObjBuffer)); + } // TODO: Actually report errors helpfully. consumeError(Obj.takeError()); - return OwningObj(nullptr, nullptr); + return CompileResult(nullptr, nullptr); } private: + + CompileResult tryToLoadFromObjectCache(const Module &M) { + if (!ObjCache) + return CompileResult(); + + std::unique_ptr ObjBuffer = ObjCache->getObject(&M); + if (!ObjBuffer) + return CompileResult(); + + Expected> Obj = + object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef()); + if (!Obj) { + // TODO: Actually report errors helpfully. + consumeError(Obj.takeError()); + return CompileResult(); + } + + return CompileResult(std::move(*Obj), std::move(ObjBuffer)); + } + + void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer) { + if (ObjCache) + ObjCache->notifyObjectCompiled(&M, ObjBuffer.getMemBufferRef()); + } + TargetMachine &TM; + ObjectCache *ObjCache = nullptr; }; -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc + +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index a32278b8a81ec..d9b45c6a1e297 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -1,4 +1,4 @@ -//===-- ExecutionUtils.h - Utilities for executing code in Orc --*- C++ -*-===// +//===- ExecutionUtils.h - Utilities for executing code in Orc ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,11 +14,16 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H #define LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/Orc/OrcError.h" +#include +#include +#include #include +#include namespace llvm { @@ -37,7 +42,6 @@ namespace orc { /// getConstructors/getDestructors functions. class CtorDtorIterator { public: - /// @brief Accessor for an element of the global_ctors/global_dtors array. /// /// This class provides a read-only view of the element with any casts on @@ -89,33 +93,37 @@ iterator_range getDestructors(const Module &M); template class CtorDtorRunner { public: - /// @brief Construct a CtorDtorRunner for the given range using the given /// name mangling function. CtorDtorRunner(std::vector CtorDtorNames, - typename JITLayerT::ModuleSetHandleT H) + typename JITLayerT::ModuleHandleT H) : CtorDtorNames(std::move(CtorDtorNames)), H(H) {} /// @brief Run the recorded constructors/destructors through the given JIT /// layer. - bool runViaLayer(JITLayerT &JITLayer) const { - typedef void (*CtorDtorTy)(); + Error runViaLayer(JITLayerT &JITLayer) const { + using CtorDtorTy = void (*)(); - bool Error = false; for (const auto &CtorDtorName : CtorDtorNames) if (auto CtorDtorSym = JITLayer.findSymbolIn(H, CtorDtorName, false)) { - CtorDtorTy CtorDtor = - reinterpret_cast( - static_cast(CtorDtorSym.getAddress())); - CtorDtor(); - } else - Error = true; - return !Error; + if (auto AddrOrErr = CtorDtorSym.getAddress()) { + CtorDtorTy CtorDtor = + reinterpret_cast(static_cast(*AddrOrErr)); + CtorDtor(); + } else + return AddrOrErr.takeError(); + } else { + if (auto Err = CtorDtorSym.takeError()) + return Err; + else + return make_error(CtorDtorName); + } + return Error::success(); } private: std::vector CtorDtorNames; - typename JITLayerT::ModuleSetHandleT H; + typename JITLayerT::ModuleHandleT H; }; /// @brief Support class for static dtor execution. For hosted (in-process) JITs @@ -135,7 +143,6 @@ class CtorDtorRunner { /// called. class LocalCXXRuntimeOverrides { public: - /// Create a runtime-overrides class. template LocalCXXRuntimeOverrides(const MangleFtorT &Mangle) { @@ -156,7 +163,6 @@ class LocalCXXRuntimeOverrides { void runDestructors(); private: - template JITTargetAddress toTargetAddress(PtrTy* P) { return static_cast(reinterpret_cast(P)); @@ -168,15 +174,16 @@ class LocalCXXRuntimeOverrides { StringMap CXXRuntimeOverrides; - typedef void (*DestructorPtr)(void*); - typedef std::pair CXXDestructorDataPair; - typedef std::vector CXXDestructorDataPairList; + using DestructorPtr = void (*)(void *); + using CXXDestructorDataPair = std::pair; + using CXXDestructorDataPairList = std::vector; CXXDestructorDataPairList DSOHandleOverride; static int CXAAtExitOverride(DestructorPtr Destructor, void *Arg, void *DSOHandle); }; -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc + +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h index 634d1480ae4c0..ff54ef625ebb7 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h @@ -1,4 +1,4 @@ -//===---- GlobalMappingLayer.h - Run all IR through a functor ---*- C++ -*-===// +//===- GlobalMappingLayer.h - Run all IR through a functor ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,8 +17,14 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include +#include +#include namespace llvm { + +class Module; +class JITSymbolResolver; + namespace orc { /// @brief Global mapping layer. @@ -31,25 +37,22 @@ namespace orc { template class GlobalMappingLayer { public: - /// @brief Handle to a set of added modules. - typedef typename BaseLayerT::ModuleSetHandleT ModuleSetHandleT; + + /// @brief Handle to an added module. + using ModuleHandleT = typename BaseLayerT::ModuleHandleT; /// @brief Construct an GlobalMappingLayer with the given BaseLayer GlobalMappingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {} - /// @brief Add the given module set to the JIT. + /// @brief Add the given module to the JIT. /// @return A handle for the added modules. - template - ModuleSetHandleT addModuleSet(ModuleSetT Ms, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr), - std::move(Resolver)); + ModuleHandleT addModule(std::shared_ptr M, + std::shared_ptr Resolver) { + return BaseLayer.addModule(std::move(M), std::move(Resolver)); } /// @brief Remove the module set associated with the handle H. - void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); } + void removeModule(ModuleHandleT H) { BaseLayer.removeModule(H); } /// @brief Manually set the address to return for the given symbol. void setGlobalMapping(const std::string &Name, JITTargetAddress Addr) { @@ -77,15 +80,15 @@ class GlobalMappingLayer { return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); } - /// @brief Get the address of the given symbol in the context of the set of - /// modules represented by the handle H. This call is forwarded to the + /// @brief Get the address of the given symbol in the context of the of the + /// module represented by the handle H. This call is forwarded to the /// base layer's implementation. - /// @param H The handle for the module set to search in. + /// @param H The handle for the module to search in. /// @param Name The name of the symbol to search for. /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it is found in the - /// given module set. - JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name, + /// given module. + JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, bool ExportedSymbolsOnly) { return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly); } @@ -93,7 +96,7 @@ class GlobalMappingLayer { /// @brief Immediately emit and finalize the module set represented by the /// given handle. /// @param H Handle for module set to emit/finalize. - void emitAndFinalize(ModuleSetHandleT H) { + void emitAndFinalize(ModuleHandleT H) { BaseLayer.emitAndFinalize(H); } @@ -102,7 +105,7 @@ class GlobalMappingLayer { std::map SymbolTable; }; -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index f16dd021ea518..fadd334bed0f1 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -1,4 +1,4 @@ -//===------ IRCompileLayer.h -- Eagerly compile IR for JIT ------*- C++ -*-===// +//===- IRCompileLayer.h -- Eagerly compile IR for JIT -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,79 +14,54 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H #define LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H -#include "llvm/ExecutionEngine/ObjectCache.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" #include +#include namespace llvm { + +class Module; + namespace orc { /// @brief Eager IR compiling layer. /// -/// This layer accepts sets of LLVM IR Modules (via addModuleSet). It -/// immediately compiles each IR module to an object file (each IR Module is -/// compiled separately). The resulting set of object files is then added to -/// the layer below, which must implement the object layer concept. -template class IRCompileLayer { +/// This layer immediately compiles each IR module added via addModule to an +/// object file and adds this module file to the layer below, which must +/// implement the object layer concept. +template +class IRCompileLayer { public: - typedef std::function(Module &)> - CompileFtor; -private: - typedef typename BaseLayerT::ObjSetHandleT ObjSetHandleT; - -public: - /// @brief Handle to a set of compiled modules. - typedef ObjSetHandleT ModuleSetHandleT; + /// @brief Handle to a compiled module. + using ModuleHandleT = typename BaseLayerT::ObjHandleT; /// @brief Construct an IRCompileLayer with the given BaseLayer, which must /// implement the ObjectLayer concept. IRCompileLayer(BaseLayerT &BaseLayer, CompileFtor Compile) - : BaseLayer(BaseLayer), Compile(std::move(Compile)), ObjCache(nullptr) {} + : BaseLayer(BaseLayer), Compile(std::move(Compile)) {} - /// @brief Set an ObjectCache to query before compiling. - void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; } + /// @brief Get a reference to the compiler functor. + CompileFtor& getCompiler() { return Compile; } - /// @brief Compile each module in the given module set, then add the resulting - /// set of objects to the base layer along with the memory manager and - /// symbol resolver. + /// @brief Compile the module, and add the resulting object to the base layer + /// along with the given memory manager and symbol resolver. /// - /// @return A handle for the added modules. - template - ModuleSetHandleT addModuleSet(ModuleSetT Ms, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - std::vector>> - Objects; - - for (const auto &M : Ms) { - auto Object = - llvm::make_unique>(); - - if (ObjCache) - *Object = tryToLoadFromObjectCache(*M); - - if (!Object->getBinary()) { - *Object = Compile(*M); - if (ObjCache) - ObjCache->notifyObjectCompiled(&*M, - Object->getBinary()->getMemoryBufferRef()); - } - - Objects.push_back(std::move(Object)); - } - - ModuleSetHandleT H = - BaseLayer.addObjectSet(std::move(Objects), std::move(MemMgr), - std::move(Resolver)); - - return H; + /// @return A handle for the added module. + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { + using CompileResult = decltype(Compile(*M)); + auto Obj = std::make_shared(Compile(*M)); + return BaseLayer.addObject(std::move(Obj), std::move(Resolver)); } - /// @brief Remove the module set associated with the handle H. - void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeObjectSet(H); } + /// @brief Remove the module associated with the handle H. + Error removeModule(ModuleHandleT H) { + return BaseLayer.removeObject(H); + } /// @brief Search for the given named symbol. /// @param Name The name of the symbol to search for. @@ -96,51 +71,33 @@ template class IRCompileLayer { return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); } - /// @brief Get the address of the given symbol in the context of the set of - /// compiled modules represented by the handle H. This call is - /// forwarded to the base layer's implementation. - /// @param H The handle for the module set to search in. + /// @brief Get the address of the given symbol in compiled module represented + /// by the handle H. This call is forwarded to the base layer's + /// implementation. + /// @param H The handle for the module to search in. /// @param Name The name of the symbol to search for. /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it is found in the - /// given module set. - JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name, + /// given module. + JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, bool ExportedSymbolsOnly) { return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly); } - /// @brief Immediately emit and finalize the moduleOB set represented by the - /// given handle. - /// @param H Handle for module set to emit/finalize. - void emitAndFinalize(ModuleSetHandleT H) { - BaseLayer.emitAndFinalize(H); + /// @brief Immediately emit and finalize the module represented by the given + /// handle. + /// @param H Handle for module to emit/finalize. + Error emitAndFinalize(ModuleHandleT H) { + return BaseLayer.emitAndFinalize(H); } private: - object::OwningBinary - tryToLoadFromObjectCache(const Module &M) { - std::unique_ptr ObjBuffer = ObjCache->getObject(&M); - if (!ObjBuffer) - return object::OwningBinary(); - - Expected> Obj = - object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef()); - if (!Obj) { - // TODO: Actually report errors helpfully. - consumeError(Obj.takeError()); - return object::OwningBinary(); - } - - return object::OwningBinary(std::move(*Obj), - std::move(ObjBuffer)); - } - BaseLayerT &BaseLayer; CompileFtor Compile; - ObjectCache *ObjCache; }; -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc + +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_IRCOMPILINGLAYER_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index c67297b111b93..476061afda599 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -1,4 +1,4 @@ -//===----- IRTransformLayer.h - Run all IR through a functor ----*- C++ -*-===// +//===- IRTransformLayer.h - Run all IR through a functor --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,46 +15,41 @@ #define LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H #include "llvm/ExecutionEngine/JITSymbol.h" +#include +#include namespace llvm { +class Module; namespace orc { /// @brief IR mutating layer. /// -/// This layer accepts sets of LLVM IR Modules (via addModuleSet). It -/// immediately applies the user supplied functor to each module, then adds -/// the set of transformed modules to the layer below. +/// This layer applies a user supplied transform to each module that is added, +/// then adds the transformed module to the layer below. template class IRTransformLayer { public: + /// @brief Handle to a set of added modules. - typedef typename BaseLayerT::ModuleSetHandleT ModuleSetHandleT; + using ModuleHandleT = typename BaseLayerT::ModuleHandleT; /// @brief Construct an IRTransformLayer with the given BaseLayer IRTransformLayer(BaseLayerT &BaseLayer, TransformFtor Transform = TransformFtor()) : BaseLayer(BaseLayer), Transform(std::move(Transform)) {} - /// @brief Apply the transform functor to each module in the module set, then - /// add the resulting set of modules to the base layer, along with the - /// memory manager and symbol resolver. + /// @brief Apply the transform functor to the module, then add the module to + /// the layer below, along with the memory manager and symbol resolver. /// /// @return A handle for the added modules. - template - ModuleSetHandleT addModuleSet(ModuleSetT Ms, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - - for (auto I = Ms.begin(), E = Ms.end(); I != E; ++I) - *I = Transform(std::move(*I)); - - return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr), - std::move(Resolver)); + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { + return BaseLayer.addModule(Transform(std::move(M)), std::move(Resolver)); } - /// @brief Remove the module set associated with the handle H. - void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); } + /// @brief Remove the module associated with the handle H. + Error removeModule(ModuleHandleT H) { return BaseLayer.removeModule(H); } /// @brief Search for the given named symbol. /// @param Name The name of the symbol to search for. @@ -64,24 +59,24 @@ class IRTransformLayer { return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); } - /// @brief Get the address of the given symbol in the context of the set of - /// modules represented by the handle H. This call is forwarded to the - /// base layer's implementation. - /// @param H The handle for the module set to search in. + /// @brief Get the address of the given symbol in the context of the module + /// represented by the handle H. This call is forwarded to the base + /// layer's implementation. + /// @param H The handle for the module to search in. /// @param Name The name of the symbol to search for. /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it is found in the - /// given module set. - JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name, + /// given module. + JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, bool ExportedSymbolsOnly) { return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly); } - /// @brief Immediately emit and finalize the module set represented by the - /// given handle. - /// @param H Handle for module set to emit/finalize. - void emitAndFinalize(ModuleSetHandleT H) { - BaseLayer.emitAndFinalize(H); + /// @brief Immediately emit and finalize the module represented by the given + /// handle. + /// @param H Handle for module to emit/finalize. + Error emitAndFinalize(ModuleHandleT H) { + return BaseLayer.emitAndFinalize(H); } /// @brief Access the transform functor directly. @@ -95,7 +90,7 @@ class IRTransformLayer { TransformFtor Transform; }; -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index 07bbd921dad62..e038093d7628c 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -1,4 +1,4 @@ -//===-- IndirectionUtils.h - Utilities for adding indirections --*- C++ -*-===// +//===- IndirectionUtils.h - Utilities for adding indirections ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" #include "llvm/Support/Error.h" #include "llvm/Support/Memory.h" #include "llvm/Support/Process.h" @@ -36,12 +33,23 @@ #include namespace llvm { + +class Constant; +class Function; +class FunctionType; +class GlobalAlias; +class GlobalVariable; +class Module; +class PointerType; +class Triple; +class Value; + namespace orc { /// @brief Target-independent base class for compile callback management. class JITCompileCallbackManager { public: - typedef std::function CompileFtor; + using CompileFtor = std::function; /// @brief Handle to a newly created compile callback. Can be used to get an /// IR constant representing the address of the trampoline, and to set @@ -125,7 +133,7 @@ class JITCompileCallbackManager { protected: JITTargetAddress ErrorHandlerAddress; - typedef std::map TrampolineMapT; + using TrampolineMapT = std::map; TrampolineMapT ActiveTrampolines; std::vector AvailableTrampolines; @@ -155,7 +163,6 @@ class LocalJITCompileCallbackManager : public JITCompileCallbackManager { /// process to be used if a compile callback fails. LocalJITCompileCallbackManager(JITTargetAddress ErrorHandlerAddress) : JITCompileCallbackManager(ErrorHandlerAddress) { - /// Set up the resolver block. std::error_code EC; ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( @@ -220,7 +227,7 @@ class LocalJITCompileCallbackManager : public JITCompileCallbackManager { class IndirectStubsManager { public: /// @brief Map type for initializing the manager. See init. - typedef StringMap> StubInitsMap; + using StubInitsMap = StringMap>; virtual ~IndirectStubsManager() = default; @@ -336,7 +343,7 @@ class LocalIndirectStubsManager : public IndirectStubsManager { } std::vector IndirectStubsInfos; - typedef std::pair StubKey; + using StubKey = std::pair; std::vector FreeStubs; StringMap> StubIndexes; }; @@ -432,6 +439,7 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src, ValueToValueMapTy &VMap); } // end namespace orc + } // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_INDIRECTIONUTILS_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LambdaResolver.h index cbe2a80edf1c1..228392ae0d4ac 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LambdaResolver.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LambdaResolver.h @@ -1,4 +1,4 @@ -//===-- LambdaResolverMM - Redirect symbol lookup via a functor -*- C++ -*-===// +//===- LambdaResolverMM - Redirect symbol lookup via a functor --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,7 @@ #define LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H #include "llvm/ADT/STLExtras.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/JITSymbol.h" #include namespace llvm { @@ -25,7 +25,6 @@ namespace orc { template class LambdaResolver : public JITSymbolResolver { public: - LambdaResolver(DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) : DylibLookupFtor(DylibLookupFtor), @@ -46,15 +45,15 @@ class LambdaResolver : public JITSymbolResolver { template -std::unique_ptr> +std::shared_ptr> createLambdaResolver(DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) { - typedef LambdaResolver LR; + using LR = LambdaResolver; return make_unique(std::move(DylibLookupFtor), std::move(ExternalLookupFtor)); } -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h index 53d4c0cfe5d4d..b7e462e85d9dc 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h @@ -34,19 +34,21 @@ namespace orc { /// @brief Lazy-emitting IR layer. /// -/// This layer accepts sets of LLVM IR Modules (via addModuleSet), but does -/// not immediately emit them the layer below. Instead, emissing to the base -/// layer is deferred until the first time the client requests the address -/// (via JITSymbol::getAddress) for a symbol contained in this layer. +/// This layer accepts LLVM IR Modules (via addModule), but does not +/// immediately emit them the layer below. Instead, emissing to the base layer +/// is deferred until the first time the client requests the address (via +/// JITSymbol::getAddress) for a symbol contained in this layer. template class LazyEmittingLayer { public: - typedef typename BaseLayerT::ModuleSetHandleT BaseLayerHandleT; + + using BaseLayerHandleT = typename BaseLayerT::ModuleHandleT; private: - class EmissionDeferredSet { + class EmissionDeferredModule { public: - EmissionDeferredSet() = default; - virtual ~EmissionDeferredSet() = default; + EmissionDeferredModule(std::shared_ptr M, + std::shared_ptr Resolver) + : M(std::move(M)), Resolver(std::move(Resolver)) {} JITSymbol find(StringRef Name, bool ExportedSymbolsOnly, BaseLayerT &B) { switch (EmitState) { @@ -58,16 +60,24 @@ template class LazyEmittingLayer { std::string PName = Name; JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV); auto GetAddress = - [this, ExportedSymbolsOnly, PName, &B]() -> JITTargetAddress { + [this, ExportedSymbolsOnly, PName, &B]() -> Expected { if (this->EmitState == Emitting) return 0; else if (this->EmitState == NotEmitted) { this->EmitState = Emitting; - Handle = this->emitToBaseLayer(B); + if (auto HandleOrErr = this->emitToBaseLayer(B)) + Handle = std::move(*HandleOrErr); + else + return HandleOrErr.takeError(); this->EmitState = Emitted; } - auto Sym = B.findSymbolIn(Handle, PName, ExportedSymbolsOnly); - return Sym.getAddress(); + if (auto Sym = B.findSymbolIn(Handle, PName, ExportedSymbolsOnly)) + return Sym.getAddress(); + else if (auto Err = Sym.takeError()) + return std::move(Err); + else + llvm_unreachable("Successful symbol lookup should return " + "definition address here"); }; return JITSymbol(std::move(GetAddress), Flags); } else @@ -84,9 +94,9 @@ template class LazyEmittingLayer { llvm_unreachable("Invalid emit-state."); } - void removeModulesFromBaseLayer(BaseLayerT &BaseLayer) { - if (EmitState != NotEmitted) - BaseLayer.removeModuleSet(Handle); + Error removeModuleFromBaseLayer(BaseLayerT& BaseLayer) { + return EmitState != NotEmitted ? BaseLayer.removeModule(Handle) + : Error::success(); } void emitAndFinalize(BaseLayerT &BaseLayer) { @@ -100,35 +110,10 @@ template class LazyEmittingLayer { BaseLayer.emitAndFinalize(Handle); } - template - static std::unique_ptr - create(BaseLayerT &B, ModuleSetT Ms, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver); - - protected: - virtual const GlobalValue* searchGVs(StringRef Name, - bool ExportedSymbolsOnly) const = 0; - virtual BaseLayerHandleT emitToBaseLayer(BaseLayerT &BaseLayer) = 0; - private: - enum { NotEmitted, Emitting, Emitted } EmitState = NotEmitted; - BaseLayerHandleT Handle; - }; - - template - class EmissionDeferredSetImpl : public EmissionDeferredSet { - public: - EmissionDeferredSetImpl(ModuleSetT Ms, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) - : Ms(std::move(Ms)), MemMgr(std::move(MemMgr)), - Resolver(std::move(Resolver)) {} - protected: const GlobalValue* searchGVs(StringRef Name, - bool ExportedSymbolsOnly) const override { + bool ExportedSymbolsOnly) const { // FIXME: We could clean all this up if we had a way to reliably demangle // names: We could just demangle name and search, rather than // mangling everything else. @@ -150,15 +135,13 @@ template class LazyEmittingLayer { return buildMangledSymbols(Name, ExportedSymbolsOnly); } - BaseLayerHandleT emitToBaseLayer(BaseLayerT &BaseLayer) override { + Expected emitToBaseLayer(BaseLayerT &BaseLayer) { // We don't need the mangled names set any more: Once we've emitted this // to the base layer we'll just look for symbols there. MangledSymbols.reset(); - return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr), - std::move(Resolver)); + return BaseLayer.addModule(std::move(M), std::move(Resolver)); } - private: // If the mangled name of the given GlobalValue matches the given search // name (and its visibility conforms to the ExportedSymbolsOnly flag) then // return the symbol. Otherwise, add the mangled name to the Names map and @@ -197,56 +180,55 @@ template class LazyEmittingLayer { auto Symbols = llvm::make_unique>(); - for (const auto &M : Ms) { - Mangler Mang; + Mangler Mang; - for (const auto &GO : M->global_objects()) + for (const auto &GO : M->global_objects()) if (auto GV = addGlobalValue(*Symbols, GO, Mang, SearchName, ExportedSymbolsOnly)) return GV; - } MangledSymbols = std::move(Symbols); return nullptr; } - ModuleSetT Ms; - MemoryManagerPtrT MemMgr; - SymbolResolverPtrT Resolver; + enum { NotEmitted, Emitting, Emitted } EmitState = NotEmitted; + BaseLayerHandleT Handle; + std::shared_ptr M; + std::shared_ptr Resolver; mutable std::unique_ptr> MangledSymbols; }; - typedef std::list> ModuleSetListT; + using ModuleListT = std::list>; BaseLayerT &BaseLayer; - ModuleSetListT ModuleSetList; + ModuleListT ModuleList; public: - /// @brief Handle to a set of loaded modules. - typedef typename ModuleSetListT::iterator ModuleSetHandleT; + + /// @brief Handle to a loaded module. + using ModuleHandleT = typename ModuleListT::iterator; /// @brief Construct a lazy emitting layer. LazyEmittingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {} - /// @brief Add the given set of modules to the lazy emitting layer. - template - ModuleSetHandleT addModuleSet(ModuleSetT Ms, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - return ModuleSetList.insert( - ModuleSetList.end(), - EmissionDeferredSet::create(BaseLayer, std::move(Ms), std::move(MemMgr), - std::move(Resolver))); + /// @brief Add the given module to the lazy emitting layer. + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { + return ModuleList.insert( + ModuleList.end(), + llvm::make_unique(std::move(M), + std::move(Resolver))); } - /// @brief Remove the module set represented by the given handle. + /// @brief Remove the module represented by the given handle. /// - /// This method will free the memory associated with the given module set, - /// both in this layer, and the base layer. - void removeModuleSet(ModuleSetHandleT H) { - (*H)->removeModulesFromBaseLayer(BaseLayer); - ModuleSetList.erase(H); + /// This method will free the memory associated with the given module, both + /// in this layer, and the base layer. + Error removeModule(ModuleHandleT H) { + Error Err = (*H)->removeModuleFromBaseLayer(BaseLayer); + ModuleList.erase(H); + return Err; } /// @brief Search for the given named symbol. @@ -258,45 +240,32 @@ template class LazyEmittingLayer { if (auto Symbol = BaseLayer.findSymbol(Name, ExportedSymbolsOnly)) return Symbol; - // If not found then search the deferred sets. If any of these contain a + // If not found then search the deferred modules. If any of these contain a // definition of 'Name' then they will return a JITSymbol that will emit // the corresponding module when the symbol address is requested. - for (auto &DeferredSet : ModuleSetList) - if (auto Symbol = DeferredSet->find(Name, ExportedSymbolsOnly, BaseLayer)) + for (auto &DeferredMod : ModuleList) + if (auto Symbol = DeferredMod->find(Name, ExportedSymbolsOnly, BaseLayer)) return Symbol; // If no definition found anywhere return a null symbol. return nullptr; } - /// @brief Get the address of the given symbol in the context of the set of + /// @brief Get the address of the given symbol in the context of the of /// compiled modules represented by the handle H. - JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name, + JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, bool ExportedSymbolsOnly) { return (*H)->find(Name, ExportedSymbolsOnly, BaseLayer); } - /// @brief Immediately emit and finalize the moduleOB set represented by the - /// given handle. - /// @param H Handle for module set to emit/finalize. - void emitAndFinalize(ModuleSetHandleT H) { - (*H)->emitAndFinalize(BaseLayer); + /// @brief Immediately emit and finalize the module represented by the given + /// handle. + /// @param H Handle for module to emit/finalize. + Error emitAndFinalize(ModuleHandleT H) { + return (*H)->emitAndFinalize(BaseLayer); } }; -template -template -std::unique_ptr::EmissionDeferredSet> -LazyEmittingLayer::EmissionDeferredSet::create( - BaseLayerT &B, ModuleSetT Ms, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - typedef EmissionDeferredSetImpl - EDS; - return llvm::make_unique(std::move(Ms), std::move(MemMgr), - std::move(Resolver)); -} - } // end namespace orc } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index 173c106cd3ec9..cb47e7520b1ab 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -15,20 +15,23 @@ #define LLVM_EXECUTIONENGINE_ORC_OBJECTTRANSFORMLAYER_H #include "llvm/ExecutionEngine/JITSymbol.h" +#include +#include +#include namespace llvm { namespace orc { /// @brief Object mutating layer. /// -/// This layer accepts sets of ObjectFiles (via addObjectSet). It +/// This layer accepts sets of ObjectFiles (via addObject). It /// immediately applies the user supplied functor to each object, then adds /// the set of transformed objects to the layer below. template class ObjectTransformLayer { public: /// @brief Handle to a set of added objects. - typedef typename BaseLayerT::ObjSetHandleT ObjSetHandleT; + using ObjHandleT = typename BaseLayerT::ObjHandleT; /// @brief Construct an ObjectTransformLayer with the given BaseLayer ObjectTransformLayer(BaseLayerT &BaseLayer, @@ -40,20 +43,14 @@ class ObjectTransformLayer { /// memory manager and symbol resolver. /// /// @return A handle for the added objects. - template - ObjSetHandleT addObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - - for (auto I = Objects.begin(), E = Objects.end(); I != E; ++I) - *I = Transform(std::move(*I)); - - return BaseLayer.addObjectSet(std::move(Objects), std::move(MemMgr), - std::move(Resolver)); + template + Expected addObject(ObjectPtr Obj, + std::shared_ptr Resolver) { + return BaseLayer.addObject(Transform(std::move(Obj)), std::move(Resolver)); } /// @brief Remove the object set associated with the handle H. - void removeObjectSet(ObjSetHandleT H) { BaseLayer.removeObjectSet(H); } + Error removeObject(ObjHandleT H) { return BaseLayer.removeObject(H); } /// @brief Search for the given named symbol. /// @param Name The name of the symbol to search for. @@ -71,7 +68,7 @@ class ObjectTransformLayer { /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it is found in the /// given object set. - JITSymbol findSymbolIn(ObjSetHandleT H, const std::string &Name, + JITSymbol findSymbolIn(ObjHandleT H, const std::string &Name, bool ExportedSymbolsOnly) { return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly); } @@ -79,10 +76,12 @@ class ObjectTransformLayer { /// @brief Immediately emit and finalize the object set represented by the /// given handle. /// @param H Handle for object set to emit/finalize. - void emitAndFinalize(ObjSetHandleT H) { BaseLayer.emitAndFinalize(H); } + Error emitAndFinalize(ObjHandleT H) { + return BaseLayer.emitAndFinalize(H); + } /// @brief Map section addresses for the objects associated with the handle H. - void mapSectionAddress(ObjSetHandleT H, const void *LocalAddress, + void mapSectionAddress(ObjHandleT H, const void *LocalAddress, JITTargetAddress TargetAddr) { BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr); } @@ -98,7 +97,7 @@ class ObjectTransformLayer { TransformFtor Transform; }; -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_OBJECTTRANSFORMLAYER_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcABISupport.h index fa236b0de88a3..e1b55649b9f2c 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -1,4 +1,4 @@ -//===-------------- OrcABISupport.h - ABI support code ---------*- C++ -*-===// +//===- OrcABISupport.h - ABI support code -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,12 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H #define LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H -#include "IndirectionUtils.h" +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Memory.h" -#include "llvm/Support/Process.h" +#include +#include namespace llvm { namespace orc { @@ -37,8 +40,8 @@ class OrcGenericABI { static const unsigned TrampolineSize = 1; static const unsigned ResolverCodeSize = 1; - typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr, - void *TrampolineId); + using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, + void *TrampolineId); static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, void *CallbackMgr) { @@ -55,6 +58,7 @@ class OrcGenericABI { class IndirectStubsInfo { public: const static unsigned StubSize = 1; + unsigned getNumStubs() const { llvm_unreachable("Not supported"); } void *getStub(unsigned Idx) const { llvm_unreachable("Not supported"); } void **getPtr(unsigned Idx) const { llvm_unreachable("Not supported"); } @@ -73,13 +77,14 @@ template class GenericIndirectStubsInfo { public: const static unsigned StubSize = StubSizeVal; - GenericIndirectStubsInfo() : NumStubs(0) {} + GenericIndirectStubsInfo() = default; GenericIndirectStubsInfo(unsigned NumStubs, sys::OwningMemoryBlock StubsMem) : NumStubs(NumStubs), StubsMem(std::move(StubsMem)) {} GenericIndirectStubsInfo(GenericIndirectStubsInfo &&Other) : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) { Other.NumStubs = 0; } + GenericIndirectStubsInfo &operator=(GenericIndirectStubsInfo &&Other) { NumStubs = Other.NumStubs; Other.NumStubs = 0; @@ -104,7 +109,7 @@ template class GenericIndirectStubsInfo { } private: - unsigned NumStubs; + unsigned NumStubs = 0; sys::OwningMemoryBlock StubsMem; }; @@ -114,10 +119,10 @@ class OrcAArch64 { static const unsigned TrampolineSize = 12; static const unsigned ResolverCodeSize = 0x120; - typedef GenericIndirectStubsInfo<8> IndirectStubsInfo; + using IndirectStubsInfo = GenericIndirectStubsInfo<8>; - typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr, - void *TrampolineId); + using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, + void *TrampolineId); /// @brief Write the resolver code into the given memory. The user is be /// responsible for allocating the memory and setting permissions. @@ -148,7 +153,7 @@ class OrcX86_64_Base { static const unsigned PointerSize = 8; static const unsigned TrampolineSize = 8; - typedef GenericIndirectStubsInfo<8> IndirectStubsInfo; + using IndirectStubsInfo = GenericIndirectStubsInfo<8>; /// @brief Write the requsted number of trampolines into the given memory, /// which must be big enough to hold 1 pointer, plus NumTrampolines @@ -172,8 +177,9 @@ class OrcX86_64_Base { class OrcX86_64_SysV : public OrcX86_64_Base { public: static const unsigned ResolverCodeSize = 0x6C; - typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr, - void *TrampolineId); + + using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, + void *TrampolineId); /// @brief Write the resolver code into the given memory. The user is be /// responsible for allocating the memory and setting permissions. @@ -187,8 +193,9 @@ class OrcX86_64_SysV : public OrcX86_64_Base { class OrcX86_64_Win32 : public OrcX86_64_Base { public: static const unsigned ResolverCodeSize = 0x74; - typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr, - void *TrampolineId); + + using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, + void *TrampolineId); /// @brief Write the resolver code into the given memory. The user is be /// responsible for allocating the memory and setting permissions. @@ -205,10 +212,10 @@ class OrcI386 { static const unsigned TrampolineSize = 8; static const unsigned ResolverCodeSize = 0x4a; - typedef GenericIndirectStubsInfo<8> IndirectStubsInfo; + using IndirectStubsInfo = GenericIndirectStubsInfo<8>; - typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr, - void *TrampolineId); + using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, + void *TrampolineId); /// @brief Write the resolver code into the given memory. The user is be /// responsible for allocating the memory and setting permissions. @@ -231,7 +238,7 @@ class OrcI386 { unsigned MinStubs, void *InitialPtrVal); }; -} // End namespace orc. -} // End namespace llvm. +} // end namespace orc +} // end namespace llvm #endif // LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcError.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcError.h index cbb40fad02230..e6374b70967ae 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcError.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcError.h @@ -22,7 +22,8 @@ namespace orc { enum class OrcErrorCode : int { // RPC Errors - RemoteAllocatorDoesNotExist = 1, + JITSymbolNotFound = 1, + RemoteAllocatorDoesNotExist, RemoteAllocatorIdAlreadyInUse, RemoteMProtectAddrUnrecognized, RemoteIndirectStubsOwnerDoesNotExist, @@ -37,6 +38,18 @@ enum class OrcErrorCode : int { std::error_code orcError(OrcErrorCode ErrCode); +class JITSymbolNotFound : public ErrorInfo { +public: + static char ID; + + JITSymbolNotFound(std::string SymbolName); + std::error_code convertToErrorCode() const override; + void log(raw_ostream &OS) const override; + const std::string &getSymbolName() const; +private: + std::string SymbolName; +}; + } // End namespace orc. } // End namespace llvm. diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h index a19c30631c573..da02250ba1692 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h @@ -1,4 +1,4 @@ -//===---- OrcRemoteTargetClient.h - Orc Remote-target Client ----*- C++ -*-===// +//===- OrcRemoteTargetClient.h - Orc Remote-target Client -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,10 +16,29 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H -#include "IndirectionUtils.h" -#include "OrcRemoteTargetRPCAPI.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include +#include #define DEBUG_TYPE "orc-remote" @@ -207,7 +226,6 @@ class OrcRemoteTargetClient : public OrcRemoteTargetRPCAPI { DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n"); for (auto &ObjAllocs : Unfinalized) { - for (auto &Alloc : ObjAllocs.CodeAllocs) { DEBUG(dbgs() << " copying code: " << static_cast(Alloc.getLocalAddress()) << " -> " @@ -469,7 +487,7 @@ class OrcRemoteTargetClient : public OrcRemoteTargetRPCAPI { OrcRemoteTargetClient &Remote; ResourceIdMgr::ResourceId Id; std::vector RemoteIndirectStubsInfos; - typedef std::pair StubKey; + using StubKey = std::pair; std::vector FreeStubs; StringMap> StubIndexes; @@ -710,7 +728,6 @@ class OrcRemoteTargetClient : public OrcRemoteTargetRPCAPI { Expected reserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size, uint32_t Align) { - // Check for an 'out-of-band' error, e.g. from an MM destructor. if (ExistingError) return std::move(ExistingError); diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h index 3086ef0cdf803..07ae7f04d1a02 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h @@ -1,4 +1,4 @@ -//===--- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ----*- C++ -*-===// +//===- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,12 +16,13 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H -#include "RPCUtils.h" -#include "RawByteChannel.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/RPCUtils.h" +#include "llvm/ExecutionEngine/Orc/RawByteChannel.h" namespace llvm { namespace orc { + namespace remote { class DirectBufferWriter { @@ -72,7 +73,7 @@ class SerializationTraits< return EC; char *Addr = reinterpret_cast(static_cast(Dst)); - DBW = remote::DirectBufferWriter(0, Dst, Size); + DBW = remote::DirectBufferWriter(nullptr, Dst, Size); return C.readBytes(Addr, Size); } @@ -87,7 +88,7 @@ class OrcRemoteTargetRPCAPI protected: class ResourceIdMgr { public: - typedef uint64_t ResourceId; + using ResourceId = uint64_t; static const ResourceId InvalidId = ~0U; ResourceId getNext() { @@ -98,6 +99,7 @@ class OrcRemoteTargetRPCAPI } return NextId++; } + void release(ResourceId I) { FreeIds.push_back(I); } private: @@ -261,7 +263,8 @@ class OrcRemoteTargetRPCAPI }; } // end namespace remote + } // end namespace orc } // end namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h index a61ff102be0b0..e7b6d64931b6a 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h @@ -1,4 +1,4 @@ -//===---- OrcRemoteTargetServer.h - Orc Remote-target Server ----*- C++ -*-===// +//===- OrcRemoteTargetServer.h - Orc Remote-target Server -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,10 +15,9 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H -#include "OrcRemoteTargetRPCAPI.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" -#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" @@ -48,20 +47,18 @@ namespace remote { template class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { public: - typedef std::function - SymbolLookupFtor; + using SymbolLookupFtor = + std::function; - typedef std::function - EHFrameRegistrationFtor; + using EHFrameRegistrationFtor = + std::function; OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup, EHFrameRegistrationFtor EHFramesRegister, EHFrameRegistrationFtor EHFramesDeregister) : OrcRemoteTargetRPCAPI(Channel), SymbolLookup(std::move(SymbolLookup)), EHFramesRegister(std::move(EHFramesRegister)), - EHFramesDeregister(std::move(EHFramesDeregister)), - TerminateFlag(false) { - + EHFramesDeregister(std::move(EHFramesDeregister)) { using ThisT = typename std::remove_reference::type; addHandler(*this, &ThisT::handleCallIntVoid); addHandler(*this, &ThisT::handleCallMain); @@ -106,6 +103,7 @@ class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { struct Allocator { Allocator() = default; Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {} + Allocator &operator=(Allocator &&Other) { Allocs = std::move(Other.Allocs); return *this; @@ -153,7 +151,8 @@ class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { } Expected handleCallIntVoid(JITTargetAddress Addr) { - typedef int (*IntVoidFnTy)(); + using IntVoidFnTy = int (*)(); + IntVoidFnTy Fn = reinterpret_cast(static_cast(Addr)); @@ -166,7 +165,7 @@ class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { Expected handleCallMain(JITTargetAddress Addr, std::vector Args) { - typedef int (*MainFnTy)(int, const char *[]); + using MainFnTy = int (*)(int, const char *[]); MainFnTy Fn = reinterpret_cast(static_cast(Addr)); int ArgC = Args.size() + 1; @@ -175,6 +174,12 @@ class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { ArgV[0] = ""; for (auto &Arg : Args) ArgV[Idx++] = Arg.c_str(); + ArgV[ArgC] = 0; + DEBUG( + for (int Idx = 0; Idx < ArgC; ++Idx) { + llvm::dbgs() << "Arg " << Idx << ": " << ArgV[Idx] << "\n"; + } + ); DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n"); int Result = Fn(ArgC, ArgV.get()); @@ -184,7 +189,8 @@ class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { } Error handleCallVoidVoid(JITTargetAddress Addr) { - typedef void (*VoidVoidFnTy)(); + using VoidVoidFnTy = void (*)(); + VoidVoidFnTy Fn = reinterpret_cast(static_cast(Addr)); @@ -420,11 +426,11 @@ class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { SymbolLookupFtor SymbolLookup; EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister; std::map Allocators; - typedef std::vector ISBlockOwnerList; + using ISBlockOwnerList = std::vector; std::map IndirectStubsOwners; sys::OwningMemoryBlock ResolverBlock; std::vector TrampolineBlocks; - bool TerminateFlag; + bool TerminateFlag = false; }; } // end namespace remote @@ -433,4 +439,4 @@ class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { #undef DEBUG_TYPE -#endif +#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 5b3426afe584b..e1016ef95f0c2 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -1,4 +1,4 @@ -//===-- RTDyldObjectLinkingLayer.h - RTDyld-based jit linking --*- C++ -*-===// +//===- RTDyldObjectLinkingLayer.h - RTDyld-based jit linking ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,14 +17,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" -#include #include +#include #include #include #include @@ -36,20 +34,26 @@ namespace llvm { namespace orc { class RTDyldObjectLinkingLayerBase { +public: + + using ObjectPtr = + std::shared_ptr>; + protected: + /// @brief Holds a set of objects to be allocated/linked as a unit in the JIT. /// /// An instance of this class will be created for each set of objects added - /// via JITObjectLayer::addObjectSet. Deleting the instance (via - /// removeObjectSet) frees its memory, removing all symbol definitions that + /// via JITObjectLayer::addObject. Deleting the instance (via + /// removeObject) frees its memory, removing all symbol definitions that /// had been provided by this instance. Higher level layers are responsible /// for taking any action required to handle the missing symbols. - class LinkedObjectSet { + class LinkedObject { public: - LinkedObjectSet() = default; - LinkedObjectSet(const LinkedObjectSet&) = delete; - void operator=(const LinkedObjectSet&) = delete; - virtual ~LinkedObjectSet() = default; + LinkedObject() = default; + LinkedObject(const LinkedObject&) = delete; + void operator=(const LinkedObject&) = delete; + virtual ~LinkedObject() = default; virtual void finalize() = 0; @@ -76,19 +80,11 @@ class RTDyldObjectLinkingLayerBase { bool Finalized = false; }; - typedef std::list> LinkedObjectSetListT; + using LinkedObjectListT = std::list>; public: /// @brief Handle to a set of loaded objects. - typedef LinkedObjectSetListT::iterator ObjSetHandleT; -}; - -/// @brief Default (no-op) action to perform when loading objects. -class DoNothingOnNotifyLoaded { -public: - template - void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT, const ObjSetT &, - const LoadResult &) {} + using ObjHandleT = LinkedObjectListT::iterator; }; /// @brief Bare bones object linking layer. @@ -97,46 +93,54 @@ class DoNothingOnNotifyLoaded { /// object files to be loaded into memory, linked, and the addresses of their /// symbols queried. All objects added to this layer can see each other's /// symbols. -template class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { public: + + using RTDyldObjectLinkingLayerBase::ObjectPtr; + + /// @brief Functor for receiving object-loaded notifications. + using NotifyLoadedFtor = std::function; + /// @brief Functor for receiving finalization notifications. - typedef std::function NotifyFinalizedFtor; + using NotifyFinalizedFtor = std::function; private: - template - class ConcreteLinkedObjectSet : public LinkedObjectSet { + + + template + class ConcreteLinkedObject : public LinkedObject { public: - ConcreteLinkedObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver, - FinalizerFtor Finalizer, - bool ProcessAllSections) + ConcreteLinkedObject(ObjectPtr Obj, MemoryManagerPtrT MemMgr, + SymbolResolverPtrT Resolver, + FinalizerFtor Finalizer, + bool ProcessAllSections) : MemMgr(std::move(MemMgr)), - PFC(llvm::make_unique(std::move(Objects), + PFC(llvm::make_unique(std::move(Obj), std::move(Resolver), std::move(Finalizer), ProcessAllSections)) { - buildInitialSymbolTable(PFC->Objects); + buildInitialSymbolTable(PFC->Obj); } - ~ConcreteLinkedObjectSet() override { + ~ConcreteLinkedObject() override { MemMgr->deregisterEHFrames(); } - - void setHandle(ObjSetHandleT H) { + + void setHandle(ObjHandleT H) { PFC->Handle = H; } void finalize() override { - assert(PFC && "mapSectionAddress called on finalized LinkedObjectSet"); + assert(PFC && "mapSectionAddress called on finalized LinkedObject"); RuntimeDyld RTDyld(*MemMgr, *PFC->Resolver); RTDyld.setProcessAllSections(PFC->ProcessAllSections); PFC->RTDyld = &RTDyld; this->Finalized = true; - PFC->Finalizer(PFC->Handle, RTDyld, std::move(PFC->Objects), + PFC->Finalizer(PFC->Handle, RTDyld, std::move(PFC->Obj), [&]() { this->updateSymbolTable(RTDyld); }); @@ -158,27 +162,27 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { void mapSectionAddress(const void *LocalAddress, JITTargetAddress TargetAddr) const override { - assert(PFC && "mapSectionAddress called on finalized LinkedObjectSet"); - assert(PFC->RTDyld && "mapSectionAddress called on raw LinkedObjectSet"); + assert(PFC && "mapSectionAddress called on finalized LinkedObject"); + assert(PFC->RTDyld && "mapSectionAddress called on raw LinkedObject"); PFC->RTDyld->mapSectionAddress(LocalAddress, TargetAddr); } private: - void buildInitialSymbolTable(const ObjSetT &Objects) { - for (const auto &Obj : Objects) - for (auto &Symbol : getObject(*Obj).symbols()) { - if (Symbol.getFlags() & object::SymbolRef::SF_Undefined) - continue; - Expected SymbolName = Symbol.getName(); - // FIXME: Raise an error for bad symbols. - if (!SymbolName) { - consumeError(SymbolName.takeError()); - continue; - } - auto Flags = JITSymbolFlags::fromObjectSymbol(Symbol); - SymbolTable.insert( - std::make_pair(*SymbolName, JITEvaluatedSymbol(0, Flags))); + + void buildInitialSymbolTable(const ObjectPtr &Obj) { + for (auto &Symbol : Obj->getBinary()->symbols()) { + if (Symbol.getFlags() & object::SymbolRef::SF_Undefined) + continue; + Expected SymbolName = Symbol.getName(); + // FIXME: Raise an error for bad symbols. + if (!SymbolName) { + consumeError(SymbolName.takeError()); + continue; } + auto Flags = JITSymbolFlags::fromObjectSymbol(Symbol); + SymbolTable.insert( + std::make_pair(*SymbolName, JITEvaluatedSymbol(0, Flags))); + } } void updateSymbolTable(const RuntimeDyld &RTDyld) { @@ -189,17 +193,17 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { // Contains the information needed prior to finalization: the object files, // memory manager, resolver, and flags needed for RuntimeDyld. struct PreFinalizeContents { - PreFinalizeContents(ObjSetT Objects, SymbolResolverPtrT Resolver, + PreFinalizeContents(ObjectPtr Obj, SymbolResolverPtrT Resolver, FinalizerFtor Finalizer, bool ProcessAllSections) - : Objects(std::move(Objects)), Resolver(std::move(Resolver)), + : Obj(std::move(Obj)), Resolver(std::move(Resolver)), Finalizer(std::move(Finalizer)), ProcessAllSections(ProcessAllSections) {} - ObjSetT Objects; + ObjectPtr Obj; SymbolResolverPtrT Resolver; FinalizerFtor Finalizer; bool ProcessAllSections; - ObjSetHandleT Handle; + ObjHandleT Handle; RuntimeDyld *RTDyld; }; @@ -207,34 +211,35 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { std::unique_ptr PFC; }; - template + template std::unique_ptr< - ConcreteLinkedObjectSet> - createLinkedObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver, - FinalizerFtor Finalizer, - bool ProcessAllSections) { - typedef ConcreteLinkedObjectSet LOS; - return llvm::make_unique(std::move(Objects), std::move(MemMgr), + ConcreteLinkedObject> + createLinkedObject(ObjectPtr Obj, MemoryManagerPtrT MemMgr, + SymbolResolverPtrT Resolver, + FinalizerFtor Finalizer, + bool ProcessAllSections) { + using LOS = ConcreteLinkedObject; + return llvm::make_unique(std::move(Obj), std::move(MemMgr), std::move(Resolver), std::move(Finalizer), ProcessAllSections); } public: - /// @brief LoadedObjectInfo list. Contains a list of owning pointers to - /// RuntimeDyld::LoadedObjectInfo instances. - typedef std::vector> - LoadedObjInfoList; + + /// @brief Functor for creating memory managers. + using MemoryManagerGetter = + std::function()>; /// @brief Construct an ObjectLinkingLayer with the given NotifyLoaded, /// and NotifyFinalized functors. RTDyldObjectLinkingLayer( + MemoryManagerGetter GetMemMgr, NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(), NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor()) - : NotifyLoaded(std::move(NotifyLoaded)), + : GetMemMgr(GetMemMgr), + NotifyLoaded(std::move(NotifyLoaded)), NotifyFinalized(std::move(NotifyFinalized)), ProcessAllSections(false) {} @@ -253,23 +258,18 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// /// @return A handle that can be used to refer to the loaded objects (for /// symbol searching, finalization, freeing memory, etc.). - template - ObjSetHandleT addObjectSet(ObjSetT Objects, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - auto Finalizer = [&](ObjSetHandleT H, RuntimeDyld &RTDyld, - const ObjSetT &Objs, + Expected addObject(ObjectPtr Obj, + std::shared_ptr Resolver) { + auto Finalizer = [&](ObjHandleT H, RuntimeDyld &RTDyld, + const ObjectPtr &ObjToLoad, std::function LOSHandleLoad) { - LoadedObjInfoList LoadedObjInfos; - - for (auto &Obj : Objs) - LoadedObjInfos.push_back(RTDyld.loadObject(this->getObject(*Obj))); + std::unique_ptr Info = + RTDyld.loadObject(*ObjToLoad->getBinary()); LOSHandleLoad(); - this->NotifyLoaded(H, Objs, LoadedObjInfos); + if (this->NotifyLoaded) + this->NotifyLoaded(H, ObjToLoad, *Info); RTDyld.finalizeWithMemoryManagerLocking(); @@ -277,17 +277,16 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { this->NotifyFinalized(H); }; - auto LOS = - createLinkedObjectSet(std::move(Objects), std::move(MemMgr), - std::move(Resolver), std::move(Finalizer), - ProcessAllSections); + auto LO = + createLinkedObject(std::move(Obj), GetMemMgr(), + std::move(Resolver), std::move(Finalizer), + ProcessAllSections); // LOS is an owning-ptr. Keep a non-owning one so that we can set the handle // below. - auto *LOSPtr = LOS.get(); + auto *LOPtr = LO.get(); - ObjSetHandleT Handle = LinkedObjSetList.insert(LinkedObjSetList.end(), - std::move(LOS)); - LOSPtr->setHandle(Handle); + ObjHandleT Handle = LinkedObjList.insert(LinkedObjList.end(), std::move(LO)); + LOPtr->setHandle(Handle); return Handle; } @@ -300,9 +299,10 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// indirectly) will result in undefined behavior. If dependence tracking is /// required to detect or resolve such issues it should be added at a higher /// layer. - void removeObjectSet(ObjSetHandleT H) { + Error removeObject(ObjHandleT H) { // How do we invalidate the symbols in H? - LinkedObjSetList.erase(H); + LinkedObjList.erase(H); + return Error::success(); } /// @brief Search for the given named symbol. @@ -310,7 +310,7 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it exists. JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { - for (auto I = LinkedObjSetList.begin(), E = LinkedObjSetList.end(); I != E; + for (auto I = LinkedObjList.begin(), E = LinkedObjList.end(); I != E; ++I) if (auto Symbol = findSymbolIn(I, Name, ExportedSymbolsOnly)) return Symbol; @@ -325,13 +325,13 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it is found in the /// given object set. - JITSymbol findSymbolIn(ObjSetHandleT H, StringRef Name, + JITSymbol findSymbolIn(ObjHandleT H, StringRef Name, bool ExportedSymbolsOnly) { return (*H)->getSymbol(Name, ExportedSymbolsOnly); } /// @brief Map section addresses for the objects associated with the handle H. - void mapSectionAddress(ObjSetHandleT H, const void *LocalAddress, + void mapSectionAddress(ObjHandleT H, const void *LocalAddress, JITTargetAddress TargetAddr) { (*H)->mapSectionAddress(LocalAddress, TargetAddr); } @@ -339,25 +339,18 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// @brief Immediately emit and finalize the object set represented by the /// given handle. /// @param H Handle for object set to emit/finalize. - void emitAndFinalize(ObjSetHandleT H) { + Error emitAndFinalize(ObjHandleT H) { (*H)->finalize(); + return Error::success(); } private: - static const object::ObjectFile& getObject(const object::ObjectFile &Obj) { - return Obj; - } - - template - static const object::ObjectFile& - getObject(const object::OwningBinary &Obj) { - return *Obj.getBinary(); - } - LinkedObjectSetListT LinkedObjSetList; + LinkedObjectListT LinkedObjList; + MemoryManagerGetter GetMemMgr; NotifyLoadedFtor NotifyLoaded; NotifyFinalizedFtor NotifyFinalized; - bool ProcessAllSections; + bool ProcessAllSections = false; }; } // end namespace orc diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RawByteChannel.h index 52a546f7c6eb9..db810f4ef2e53 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RawByteChannel.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/Orc/RawByteChannel.h @@ -10,20 +10,14 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H #define LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H -#include "OrcError.h" -#include "RPCSerialization.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/Orc/RPCSerialization.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include #include #include #include -#include #include -#include namespace llvm { namespace orc { @@ -32,7 +26,7 @@ namespace rpc { /// Interface for byte-streams to be used with RPC. class RawByteChannel { public: - virtual ~RawByteChannel() {} + virtual ~RawByteChannel() = default; /// Read Size bytes from the stream into *Dst. virtual Error readBytes(char *Dst, unsigned Size) = 0; diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/RTDyldMemoryManager.h index 74535fe948fff..0c1862c5c3ea7 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/RTDyldMemoryManager.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/RTDyldMemoryManager.h @@ -14,10 +14,10 @@ #ifndef LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H #define LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H +#include "llvm-c/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Support/CBindingWrapping.h" -#include "llvm-c/ExecutionEngine.h" #include #include #include @@ -135,12 +135,13 @@ class RTDyldMemoryManager : public MCJITMemoryManager, virtual void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true); -private: +protected: struct EHFrame { uint8_t *Addr; size_t Size; }; - std::vector EHFrames; + typedef std::vector EHFrameInfos; + EHFrameInfos EHFrames; }; // Create wrappers for C Binding types (see CBindingWrapping.h). diff --git a/interpreter/llvm/src/include/llvm/ExecutionEngine/RuntimeDyld.h b/interpreter/llvm/src/include/llvm/ExecutionEngine/RuntimeDyld.h index 9470866dc0d6f..56aa04ce694a6 100644 --- a/interpreter/llvm/src/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/interpreter/llvm/src/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -1,4 +1,4 @@ -//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +//===- RuntimeDyld.h - Run-time dynamic linker for MC-JIT -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -32,7 +32,9 @@ namespace llvm { namespace object { - template class OwningBinary; + +template class OwningBinary; + } // end namespace object /// Base class for errors originating in RuntimeDyld, e.g. missing relocation @@ -51,8 +53,8 @@ class RuntimeDyldError : public ErrorInfo { std::string ErrMsg; }; -class RuntimeDyldImpl; class RuntimeDyldCheckerImpl; +class RuntimeDyldImpl; class RuntimeDyld { friend class RuntimeDyldCheckerImpl; @@ -68,7 +70,7 @@ class RuntimeDyld { friend class RuntimeDyldImpl; public: - typedef std::map ObjSectionToIDMap; + using ObjSectionToIDMap = std::map; LoadedObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) : RTDyld(RTDyld), ObjSecToIDMap(std::move(ObjSecToIDMap)) {} @@ -86,21 +88,6 @@ class RuntimeDyld { ObjSectionToIDMap ObjSecToIDMap; }; - template struct LoadedObjectInfoHelper : LoadedObjectInfo { - protected: - LoadedObjectInfoHelper(const LoadedObjectInfoHelper &) = default; - LoadedObjectInfoHelper() = default; - - public: - LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld, - LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap) - : LoadedObjectInfo(RTDyld, std::move(ObjSecToIDMap)) {} - - std::unique_ptr clone() const override { - return llvm::make_unique(static_cast(*this)); - } - }; - /// \brief Memory Management. class MemoryManager { friend class RuntimeDyld; @@ -186,7 +173,7 @@ class RuntimeDyld { /// \brief Construct a RuntimeDyld instance. RuntimeDyld(MemoryManager &MemMgr, JITSymbolResolver &Resolver); RuntimeDyld(const RuntimeDyld &) = delete; - void operator=(const RuntimeDyld &) = delete; + RuntimeDyld &operator=(const RuntimeDyld &) = delete; ~RuntimeDyld(); /// Add the referenced object file to the list of objects to be loaded and diff --git a/interpreter/llvm/src/include/llvm/IR/Argument.h b/interpreter/llvm/src/include/llvm/IR/Argument.h index 5ed6d030c9843..497dca44547cd 100644 --- a/interpreter/llvm/src/include/llvm/IR/Argument.h +++ b/interpreter/llvm/src/include/llvm/IR/Argument.h @@ -27,8 +27,7 @@ namespace llvm { /// for a specific function. When used in the body of said function, the /// argument of course represents the value of the actual argument that the /// function was called with. -class Argument : public Value { - virtual void anchor(); +class Argument final : public Value { Function *Parent; unsigned ArgNo; @@ -121,7 +120,7 @@ class Argument : public Value { bool hasAttribute(Attribute::AttrKind Kind) const; /// Method for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == ArgumentVal; } }; diff --git a/interpreter/llvm/src/include/llvm/IR/Attributes.h b/interpreter/llvm/src/include/llvm/IR/Attributes.h index d4a896c018676..0cab8bbb8eade 100644 --- a/interpreter/llvm/src/include/llvm/IR/Attributes.h +++ b/interpreter/llvm/src/include/llvm/IR/Attributes.h @@ -16,13 +16,13 @@ #ifndef LLVM_IR_ATTRIBUTES_H #define LLVM_IR_ATTRIBUTES_H +#include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm-c/Types.h" #include #include #include @@ -228,34 +228,31 @@ class AttributeSet { bool operator==(const AttributeSet &O) { return SetNode == O.SetNode; } bool operator!=(const AttributeSet &O) { return !(*this == O); } - /// Add an argument attribute. Because - /// attribute sets are immutable, this returns a new set. - AttributeSet addAttribute(LLVMContext &C, - Attribute::AttrKind Kind) const; + /// Add an argument attribute. Returns a new set because attribute sets are + /// immutable. + AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const; - /// Add a target-dependent attribute. Because - /// attribute sets are immutable, this returns a new set. + /// Add a target-dependent attribute. Returns a new set because attribute sets + /// are immutable. AttributeSet addAttribute(LLVMContext &C, StringRef Kind, StringRef Value = StringRef()) const; - /// Add attributes to the attribute set. Because - /// attribute sets are immutable, this returns a new set. + /// Add attributes to the attribute set. Returns a new set because attribute + /// sets are immutable. AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const; - /// Remove the specified attribute from this set. Because - /// attribute sets are immutable, this returns a new set. - AttributeSet removeAttribute(LLVMContext &C, - Attribute::AttrKind Kind) const; + /// Remove the specified attribute from this set. Returns a new set because + /// attribute sets are immutable. + AttributeSet removeAttribute(LLVMContext &C, Attribute::AttrKind Kind) const; - /// Remove the specified attribute from this set. Because - /// attribute sets are immutable, this returns a new set. - AttributeSet removeAttribute(LLVMContext &C, - StringRef Kind) const; + /// Remove the specified attribute from this set. Returns a new set because + /// attribute sets are immutable. + AttributeSet removeAttribute(LLVMContext &C, StringRef Kind) const; - /// Remove the specified attributes from this set. Because - /// attribute sets are immutable, this returns a new set. + /// Remove the specified attributes from this set. Returns a new set because + /// attribute sets are immutable. AttributeSet removeAttributes(LLVMContext &C, - const AttrBuilder &AttrsToRemove) const; + const AttrBuilder &AttrsToRemove) const; /// Return the number of attributes in this set. unsigned getNumAttributes() const; @@ -322,7 +319,7 @@ template <> struct DenseMapInfo { /// the AttributeList object. The function attributes are at index /// `AttributeList::FunctionIndex', the return value is at index /// `AttributeList::ReturnIndex', and the attributes for the parameters start at -/// index `1'. +/// index `AttributeList::FirstArgIndex'. class AttributeList { public: enum AttrIndex : unsigned { @@ -347,8 +344,8 @@ class AttributeList { /// \brief Create an AttributeList with the specified parameters in it. static AttributeList get(LLVMContext &C, ArrayRef> Attrs); - static AttributeList - get(LLVMContext &C, ArrayRef> Attrs); + static AttributeList get(LLVMContext &C, + ArrayRef> Attrs); /// \brief Create an AttributeList from attribute sets for a function, its /// return value, and all of its arguments. @@ -356,13 +353,11 @@ class AttributeList { AttributeSet RetAttrs, ArrayRef ArgAttrs); - static AttributeList - getImpl(LLVMContext &C, - ArrayRef> Attrs); - private: explicit AttributeList(AttributeListImpl *LI) : pImpl(LI) {} + static AttributeList getImpl(LLVMContext &C, ArrayRef AttrSets); + public: AttributeList() = default; @@ -379,73 +374,138 @@ class AttributeList { static AttributeList get(LLVMContext &C, unsigned Index, const AttrBuilder &B); - /// Add an argument attribute to the list. Returns a new list because - /// attribute lists are immutable. - AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, - Attribute::AttrKind Kind) const { - return addAttribute(C, ArgNo + FirstArgIndex, Kind); - } - - /// \brief Add an attribute to the attribute set at the given index. Because - /// attribute sets are immutable, this returns a new set. + /// \brief Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const; - /// \brief Add an attribute to the attribute set at the given index. Because - /// attribute sets are immutable, this returns a new set. + /// \brief Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. AttributeList addAttribute(LLVMContext &C, unsigned Index, StringRef Kind, StringRef Value = StringRef()) const; - /// Add an attribute to the attribute set at the given indices. Because - /// attribute sets are immutable, this returns a new set. - AttributeList addAttribute(LLVMContext &C, ArrayRef Indices, - Attribute A) const; + /// Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. + AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute A) const; - /// \brief Add attributes to the attribute set at the given index. Because - /// attribute sets are immutable, this returns a new set. + /// \brief Add attributes to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. AttributeList addAttributes(LLVMContext &C, unsigned Index, const AttrBuilder &B) const; + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, + Attribute::AttrKind Kind) const { + return addAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, + StringRef Kind, + StringRef Value = StringRef()) const { + return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value); + } + + /// Add an attribute to the attribute list at the given arg indices. Returns a + /// new list because attribute lists are immutable. + AttributeList addParamAttribute(LLVMContext &C, ArrayRef ArgNos, + Attribute A) const; + + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + AttributeList addParamAttributes(LLVMContext &C, unsigned ArgNo, + const AttrBuilder &B) const { + return addAttributes(C, ArgNo + FirstArgIndex, B); + } + /// \brief Remove the specified attribute at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const; /// \brief Remove the specified attribute at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const; /// \brief Remove the specified attributes at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttributes(LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const; /// \brief Remove all attributes at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttributes(LLVMContext &C, unsigned Index) const; - /// \brief Add the dereferenceable attribute to the attribute set at the given - /// index. Because attribute sets are immutable, this returns a new set. + /// \brief Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo, + Attribute::AttrKind Kind) const { + return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// \brief Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo, + StringRef Kind) const { + return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// \brief Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo, + const AttrBuilder &AttrsToRemove) const { + return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove); + } + + /// \brief Remove all attributes at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo) const { + return removeAttributes(C, ArgNo + FirstArgIndex); + } + + /// \Brief Add the dereferenceable attribute to the attribute set at the given + /// index. Returns a new list because attribute lists are immutable. AttributeList addDereferenceableAttr(LLVMContext &C, unsigned Index, uint64_t Bytes) const; + /// \Brief Add the dereferenceable attribute to the attribute set at the given + /// arg index. Returns a new list because attribute lists are immutable. + AttributeList addDereferenceableParamAttr(LLVMContext &C, unsigned ArgNo, + uint64_t Bytes) const { + return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes); + } + /// \brief Add the dereferenceable_or_null attribute to the attribute set at - /// the given index. Because attribute sets are immutable, this returns a new - /// set. + /// the given index. Returns a new list because attribute lists are immutable. AttributeList addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index, uint64_t Bytes) const; + /// \brief Add the dereferenceable_or_null attribute to the attribute set at + /// the given arg index. Returns a new list because attribute lists are + /// immutable. + AttributeList addDereferenceableOrNullParamAttr(LLVMContext &C, + unsigned ArgNo, + uint64_t Bytes) const { + return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes); + } + /// Add the allocsize attribute to the attribute set at the given index. - /// Because attribute sets are immutable, this returns a new set. + /// Returns a new list because attribute lists are immutable. AttributeList addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg, const Optional &NumElemsArg); + /// Add the allocsize attribute to the attribute set at the given arg index. + /// Returns a new list because attribute lists are immutable. + AttributeList addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, + unsigned ElemSizeArg, + const Optional &NumElemsArg) { + return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg); + } + //===--------------------------------------------------------------------===// // AttributeList Accessors //===--------------------------------------------------------------------===// @@ -475,6 +535,21 @@ class AttributeList { /// \brief Return true if attribute exists at the given index. bool hasAttributes(unsigned Index) const; + /// \brief Return true if the attribute exists for the given argument + bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + return hasAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// \brief Return true if the attribute exists for the given argument + bool hasParamAttr(unsigned ArgNo, StringRef Kind) const { + return hasAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// \brief Return true if attributes exists for the given argument + bool hasParamAttrs(unsigned ArgNo) const { + return hasAttributes(ArgNo + FirstArgIndex); + } + /// \brief Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but /// may be faster. bool hasFnAttribute(Attribute::AttrKind Kind) const; @@ -498,6 +573,16 @@ class AttributeList { /// \brief Return the attribute object that exists at the given index. Attribute getAttribute(unsigned Index, StringRef Kind) const; + /// \brief Return the attribute object that exists at the arg index. + Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + return getAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// \brief Return the attribute object that exists at the given index. + Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const { + return getAttribute(ArgNo + FirstArgIndex, Kind); + } + /// \brief Return the alignment of the return value. unsigned getRetAlignment() const; @@ -510,10 +595,22 @@ class AttributeList { /// \brief Get the number of dereferenceable bytes (or zero if unknown). uint64_t getDereferenceableBytes(unsigned Index) const; + /// \brief Get the number of dereferenceable bytes (or zero if unknown) of an + /// arg. + uint64_t getParamDereferenceableBytes(unsigned ArgNo) const { + return getDereferenceableBytes(ArgNo + FirstArgIndex); + } + /// \brief Get the number of dereferenceable_or_null bytes (or zero if /// unknown). uint64_t getDereferenceableOrNullBytes(unsigned Index) const; + /// \brief Get the number of dereferenceable_or_null bytes (or zero if + /// unknown) of an arg. + uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const { + return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex); + } + /// Get the allocsize argument numbers (or pair(0, 0) if unknown). std::pair> getAllocSizeArgs(unsigned Index) const; @@ -521,39 +618,32 @@ class AttributeList { /// \brief Return the attributes at the index as a string. std::string getAsString(unsigned Index, bool InAttrGrp = false) const; - using iterator = ArrayRef::iterator; + //===--------------------------------------------------------------------===// + // AttributeList Introspection + //===--------------------------------------------------------------------===// + + using iterator = const AttributeSet *; + + iterator begin() const; + iterator end() const; + + unsigned getNumAttrSets() const; - iterator begin(unsigned Slot) const; - iterator end(unsigned Slot) const; + /// Use these to iterate over the valid attribute indices. + unsigned index_begin() const { return AttributeList::FunctionIndex; } + unsigned index_end() const { return getNumAttrSets() - 1; } /// operator==/!= - Provide equality predicates. bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; } bool operator!=(const AttributeList &RHS) const { return pImpl != RHS.pImpl; } - //===--------------------------------------------------------------------===// - // AttributeList Introspection - //===--------------------------------------------------------------------===// - /// \brief Return a raw pointer that uniquely identifies this attribute list. void *getRawPointer() const { return pImpl; } /// \brief Return true if there are no attributes. - bool isEmpty() const { - return getNumSlots() == 0; - } - - /// \brief Return the number of slots used in this attribute list. This is - /// the number of arguments that have an attribute set on them (including the - /// function itself). - unsigned getNumSlots() const; - - /// \brief Return the index for the given slot. - unsigned getSlotIndex(unsigned Slot) const; - - /// \brief Return the attributes at the given slot. - AttributeSet getSlotAttributes(unsigned Slot) const; + bool isEmpty() const { return pImpl == nullptr; } void dump() const; }; @@ -741,8 +831,8 @@ bool areInlineCompatible(const Function &Caller, const Function &Callee); /// \brief Merge caller's and callee's attributes. void mergeAttributesForInlining(Function &Caller, const Function &Callee); -} // end AttributeFuncs namespace +} // end namespace AttributeFuncs -} // end llvm namespace +} // end namespace llvm #endif // LLVM_IR_ATTRIBUTES_H diff --git a/interpreter/llvm/src/include/llvm/IR/Attributes.td b/interpreter/llvm/src/include/llvm/IR/Attributes.td index 75867a6e58335..616387816bf81 100644 --- a/interpreter/llvm/src/include/llvm/IR/Attributes.td +++ b/interpreter/llvm/src/include/llvm/IR/Attributes.td @@ -214,3 +214,5 @@ def : MergeRule<"setAND">; def : MergeRule<"setOR">; def : MergeRule<"setOR">; def : MergeRule<"adjustCallerSSPLevel">; +def : MergeRule<"adjustCallerStackProbes">; +def : MergeRule<"adjustCallerStackProbeSize">; diff --git a/interpreter/llvm/src/include/llvm/IR/BasicBlock.h b/interpreter/llvm/src/include/llvm/IR/BasicBlock.h index 97989cf5c6525..6714f2c97473f 100644 --- a/interpreter/llvm/src/include/llvm/IR/BasicBlock.h +++ b/interpreter/llvm/src/include/llvm/IR/BasicBlock.h @@ -1,4 +1,4 @@ -//===-- llvm/BasicBlock.h - Represent a basic block in the VM ---*- C++ -*-===// +//===- llvm/BasicBlock.h - Represent a basic block in the VM ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,17 +14,21 @@ #ifndef LLVM_IR_BASICBLOCK_H #define LLVM_IR_BASICBLOCK_H +#include "llvm-c/Types.h" +#include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Value.h" #include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" -#include "llvm-c/Types.h" #include #include +#include namespace llvm { @@ -33,6 +37,7 @@ class Function; class LandingPadInst; class LLVMContext; class Module; +class PHINode; class TerminatorInst; class ValueSymbolTable; @@ -51,8 +56,8 @@ class ValueSymbolTable; /// occur because it may be useful in the intermediate stage of constructing or /// modifying a program. However, the verifier will ensure that basic blocks /// are "well formed". -class BasicBlock : public Value, // Basic blocks are data objects also - public ilist_node_with_parent { +class BasicBlock final : public Value, // Basic blocks are data objects also + public ilist_node_with_parent { public: using InstListType = SymbolTableList; @@ -77,7 +82,7 @@ class BasicBlock : public Value, // Basic blocks are data objects also public: BasicBlock(const BasicBlock &) = delete; BasicBlock &operator=(const BasicBlock &) = delete; - ~BasicBlock() override; + ~BasicBlock(); /// \brief Get the context in which this basic block lives. LLVMContext &getContext() const; @@ -261,6 +266,50 @@ class BasicBlock : public Value, // Basic blocks are data objects also inline const Instruction &back() const { return InstList.back(); } inline Instruction &back() { return InstList.back(); } + /// Iterator to walk just the phi nodes in the basic block. + template + class phi_iterator_impl + : public iterator_facade_base, + std::forward_iterator_tag, PHINodeT> { + friend BasicBlock; + + PHINodeT *PN; + + phi_iterator_impl(PHINodeT *PN) : PN(PN) {} + + public: + // Allow default construction to build variables, but this doesn't build + // a useful iterator. + phi_iterator_impl() = default; + + // Allow conversion between instantiations where valid. + template + phi_iterator_impl(const phi_iterator_impl &Arg) + : PN(Arg.PN) {} + + bool operator==(const phi_iterator_impl &Arg) const { return PN == Arg.PN; } + + PHINodeT &operator*() const { return *PN; } + + using phi_iterator_impl::iterator_facade_base::operator++; + phi_iterator_impl &operator++() { + assert(PN && "Cannot increment the end iterator!"); + PN = dyn_cast(std::next(BBIteratorT(PN))); + return *this; + } + }; + using phi_iterator = phi_iterator_impl<>; + using const_phi_iterator = + phi_iterator_impl; + + /// Returns a range that iterates over the phis in the basic block. + /// + /// Note that this cannot be used with basic blocks that have no terminator. + iterator_range phis() const { + return const_cast(this)->phis(); + } + iterator_range phis(); + /// \brief Return the underlying instruction list container. /// /// Currently you need to access the underlying instruction list container @@ -277,7 +326,7 @@ class BasicBlock : public Value, // Basic blocks are data objects also ValueSymbolTable *getValueSymbolTable(); /// \brief Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::BasicBlockVal; } @@ -346,6 +395,9 @@ class BasicBlock : public Value, // Basic blocks are data objects also static_cast(this)->getLandingPadInst()); } + /// \brief Return true if it is legal to hoist instructions into this block. + bool isLegalToHoistInto() const; + private: /// \brief Increment the internal refcount of the number of BlockAddresses /// referencing this BasicBlock by \p Amt. diff --git a/interpreter/llvm/src/include/llvm/IR/CallSite.h b/interpreter/llvm/src/include/llvm/IR/CallSite.h index 4a806ab501e57..96fbebf42c38b 100644 --- a/interpreter/llvm/src/include/llvm/IR/CallSite.h +++ b/interpreter/llvm/src/include/llvm/IR/CallSite.h @@ -26,9 +26,9 @@ #ifndef LLVM_IR_CALLSITE_H #define LLVM_IR_CALLSITE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" @@ -36,10 +36,10 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/Support/Casting.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include #include #include diff --git a/interpreter/llvm/src/include/llvm/IR/CallingConv.h b/interpreter/llvm/src/include/llvm/IR/CallingConv.h index 801e88aba4d1b..850964afc3076 100644 --- a/interpreter/llvm/src/include/llvm/IR/CallingConv.h +++ b/interpreter/llvm/src/include/llvm/IR/CallingConv.h @@ -143,11 +143,15 @@ namespace CallingConv { /// System V ABI, used on most non-Windows systems. X86_64_SysV = 78, - /// \brief The C convention as implemented on Windows/x86-64. This - /// convention differs from the more common \c X86_64_SysV convention - /// in a number of ways, most notably in that XMM registers used to pass - /// arguments are shadowed by GPRs, and vice versa. - X86_64_Win64 = 79, + /// \brief The C convention as implemented on Windows/x86-64 and + /// AArch64. This convention differs from the more common + /// \c X86_64_SysV convention in a number of ways, most notably in + /// that XMM registers used to pass arguments are shadowed by GPRs, + /// and vice versa. + /// On AArch64, this is identical to the normal C (AAPCS) calling + /// convention for normal functions, but floats are passed in integer + /// registers to variadic functions. + Win64 = 79, /// \brief MSVC calling convention that passes vectors and vector aggregates /// in SSE registers. diff --git a/interpreter/llvm/src/include/llvm/IR/Constant.h b/interpreter/llvm/src/include/llvm/IR/Constant.h index 3b3694e7e60d0..9daeac6ad6e7d 100644 --- a/interpreter/llvm/src/include/llvm/IR/Constant.h +++ b/interpreter/llvm/src/include/llvm/IR/Constant.h @@ -40,8 +40,6 @@ class APInt; /// don't have to worry about the lifetime of the objects. /// @brief LLVM Constant Representation class Constant : public User { - void anchor() override; - protected: Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps) : User(ty, vty, Ops, NumOps) {} @@ -118,7 +116,7 @@ class Constant : public User { void destroyConstant(); //// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() >= ConstantFirstVal && V->getValueID() <= ConstantLastVal; } diff --git a/interpreter/llvm/src/include/llvm/IR/ConstantRange.h b/interpreter/llvm/src/include/llvm/IR/ConstantRange.h index 6a50a8801f86f..ff6495e7f0757 100644 --- a/interpreter/llvm/src/include/llvm/IR/ConstantRange.h +++ b/interpreter/llvm/src/include/llvm/IR/ConstantRange.h @@ -34,11 +34,14 @@ #include "llvm/ADT/APInt.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/Compiler.h" +#include namespace llvm { class MDNode; +class raw_ostream; /// This class represents a range of values. class LLVM_NODISCARD ConstantRange { @@ -330,6 +333,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ConstantRange &CR) { /// E.g. if RangeMD is !{i32 0, i32 10, i32 15, i32 20} then return [0, 20). ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_IR_CONSTANTRANGE_H diff --git a/interpreter/llvm/src/include/llvm/IR/Constants.h b/interpreter/llvm/src/include/llvm/IR/Constants.h index 5db9b3bb50483..0094fd54992a6 100644 --- a/interpreter/llvm/src/include/llvm/IR/Constants.h +++ b/interpreter/llvm/src/include/llvm/IR/Constants.h @@ -58,8 +58,6 @@ template struct ConstantAggrKeyType; class ConstantData : public Constant { friend class Constant; - void anchor() override; - Value *handleOperandChangeImpl(Value *From, Value *To) { llvm_unreachable("Constant data does not have operands!"); } @@ -70,11 +68,8 @@ class ConstantData : public Constant { void *operator new(size_t s) { return User::operator new(s, 0); } public: - ConstantData() = delete; ConstantData(const ConstantData &) = delete; - void *operator new(size_t, unsigned) = delete; - /// Methods to support type inquiry through isa, cast, and dyn_cast. static bool classof(const Value *V) { return V->getValueID() >= ConstantDataFirstVal && @@ -93,7 +88,6 @@ class ConstantInt final : public ConstantData { ConstantInt(IntegerType *Ty, const APInt& V); - void anchor() override; void destroyConstantImpl(); public: @@ -197,7 +191,7 @@ class ConstantInt final : public ConstantData { /// common code. It also correctly performs the comparison without the /// potential for an assertion from getZExtValue(). bool isZero() const { - return Val == 0; + return Val.isNullValue(); } /// This is just a convenience method to make client code smaller for a @@ -205,7 +199,7 @@ class ConstantInt final : public ConstantData { /// potential for an assertion from getZExtValue(). /// @brief Determine if the value is one. bool isOne() const { - return Val == 1; + return Val.isOneValue(); } /// This function will return true iff every bit in this constant is set @@ -246,7 +240,7 @@ class ConstantInt final : public ConstantData { /// @returns true iff this constant is greater or equal to the given number. /// @brief Determine if the value is greater or equal to the given number. bool uge(uint64_t Num) const { - return Val.getActiveBits() > 64 || Val.getZExtValue() >= Num; + return Val.uge(Num); } /// getLimitedValue - If the value is smaller than the specified limit, @@ -274,7 +268,6 @@ class ConstantFP final : public ConstantData { ConstantFP(Type *Ty, const APFloat& V); - void anchor() override; void destroyConstantImpl(); public: @@ -588,7 +581,7 @@ class ConstantDataSequential : public ConstantData { protected: explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data) : ConstantData(ty, VT), DataElements(Data), Next(nullptr) {} - ~ConstantDataSequential() override { delete Next; } + ~ConstantDataSequential() { delete Next; } static Constant *getImpl(StringRef Bytes, Type *Ty); @@ -605,6 +598,10 @@ class ConstantDataSequential : public ConstantData { /// specified element in the low bits of a uint64_t. uint64_t getElementAsInteger(unsigned i) const; + /// If this is a sequential container of integers (of any size), return the + /// specified element as an APInt. + APInt getElementAsAPInt(unsigned i) const; + /// If this is a sequential container of floating point type, return the /// specified element as an APFloat. APFloat getElementAsAPFloat(unsigned i) const; @@ -638,8 +635,8 @@ class ConstantDataSequential : public ConstantData { /// The size of the elements is known to be a multiple of one byte. uint64_t getElementByteSize() const; - /// This method returns true if this is an array of i8. - bool isString() const; + /// This method returns true if this is an array of \p CharSize integers. + bool isString(unsigned CharSize = 8) const; /// This method returns true if the array "isString", ends with a null byte, /// and does not contains any other null bytes. @@ -687,18 +684,9 @@ class ConstantDataArray final : public ConstantDataSequential { explicit ConstantDataArray(Type *ty, const char *Data) : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {} - /// Allocate space for exactly zero operands. - void *operator new(size_t s) { - return User::operator new(s, 0); - } - - void anchor() override; - public: ConstantDataArray(const ConstantDataArray &) = delete; - void *operator new(size_t, unsigned) = delete; - /// get() constructors - Return a constant with array type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. @@ -750,18 +738,9 @@ class ConstantDataVector final : public ConstantDataSequential { explicit ConstantDataVector(Type *ty, const char *Data) : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {} - // allocate space for exactly zero operands. - void *operator new(size_t s) { - return User::operator new(s, 0); - } - - void anchor() override; - public: ConstantDataVector(const ConstantDataVector &) = delete; - void *operator new(size_t, unsigned) = delete; - /// get() constructors - Return a constant with vector type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. @@ -786,6 +765,10 @@ class ConstantDataVector final : public ConstantDataSequential { /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// Returns true if this is a splat constant, meaning that all elements have + /// the same value. + bool isSplat() const; + /// If this is a splat constant, meaning that all of the elements have the /// same value, return that value. Otherwise return NULL. Constant *getSplatValue() const; @@ -838,8 +821,6 @@ class BlockAddress final : public Constant { Value *handleOperandChangeImpl(Value *From, Value *To); public: - void *operator new(size_t, unsigned) = delete; - /// Return a BlockAddress for the specified function and basic block. static BlockAddress *get(Function *F, BasicBlock *BB); @@ -859,7 +840,7 @@ class BlockAddress final : public Constant { BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == BlockAddressVal; } }; @@ -1234,7 +1215,7 @@ class ConstantExpr : public Constant { Instruction *getAsInstruction(); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == ConstantExprVal; } diff --git a/interpreter/llvm/src/include/llvm/IR/DIBuilder.h b/interpreter/llvm/src/include/llvm/IR/DIBuilder.h index 4afb5d9d63b25..6a14f783005d6 100644 --- a/interpreter/llvm/src/include/llvm/IR/DIBuilder.h +++ b/interpreter/llvm/src/include/llvm/IR/DIBuilder.h @@ -86,6 +86,10 @@ namespace llvm { /// Construct any deferred debug info descriptors. void finalize(); + /// Finalize a specific subprogram - no new variables may be added to this + /// subprogram afterwards. + void finalizeSubprogram(DISubprogram *SP); + /// A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. /// \param Lang Source programming language, eg. dwarf::DW_LANG_C99 @@ -670,32 +674,37 @@ namespace llvm { /// Create a descriptor for an imported module. /// \param Context The scope this module is imported into - /// \param NS The namespace being imported here - /// \param Line Line number + /// \param NS The namespace being imported here. + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedModule(DIScope *Context, DINamespace *NS, - unsigned Line); + DIFile *File, unsigned Line); /// Create a descriptor for an imported module. - /// \param Context The scope this module is imported into - /// \param NS An aliased namespace - /// \param Line Line number + /// \param Context The scope this module is imported into. + /// \param NS An aliased namespace. + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedModule(DIScope *Context, - DIImportedEntity *NS, unsigned Line); + DIImportedEntity *NS, DIFile *File, + unsigned Line); /// Create a descriptor for an imported module. - /// \param Context The scope this module is imported into - /// \param M The module being imported here - /// \param Line Line number + /// \param Context The scope this module is imported into. + /// \param M The module being imported here + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M, - unsigned Line); + DIFile *File, unsigned Line); /// Create a descriptor for an imported function. - /// \param Context The scope this module is imported into - /// \param Decl The declaration (or definition) of a function, type, or - /// variable - /// \param Line Line number + /// \param Context The scope this module is imported into. + /// \param Decl The declaration (or definition) of a function, type, or + /// variable. + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedDeclaration(DIScope *Context, DINode *Decl, - unsigned Line, + DIFile *File, unsigned Line, StringRef Name = ""); /// Insert a new llvm.dbg.declare intrinsic call. diff --git a/interpreter/llvm/src/include/llvm/IR/DataLayout.h b/interpreter/llvm/src/include/llvm/IR/DataLayout.h index c1d398f17b59e..daf8f8da689d6 100644 --- a/interpreter/llvm/src/include/llvm/IR/DataLayout.h +++ b/interpreter/llvm/src/include/llvm/IR/DataLayout.h @@ -21,8 +21,8 @@ #define LLVM_IR_DATALAYOUT_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" diff --git a/interpreter/llvm/src/include/llvm/IR/DebugInfoMetadata.h b/interpreter/llvm/src/include/llvm/IR/DebugInfoMetadata.h index 36bc5ce91b693..678a43ae7926d 100644 --- a/interpreter/llvm/src/include/llvm/IR/DebugInfoMetadata.h +++ b/interpreter/llvm/src/include/llvm/IR/DebugInfoMetadata.h @@ -17,11 +17,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include #include #include @@ -56,10 +59,6 @@ namespace llvm { -class DIBuilder; - -template class Optional; - /// Holds a subclass of DINode. /// /// FIXME: This class doesn't currently make much sense. Previously it was a @@ -94,9 +93,9 @@ template class TypedDINodeRef { bool operator!=(const TypedDINodeRef &X) const { return MD != X.MD; } }; -typedef TypedDINodeRef DINodeRef; -typedef TypedDINodeRef DIScopeRef; -typedef TypedDINodeRef DITypeRef; +using DINodeRef = TypedDINodeRef; +using DIScopeRef = TypedDINodeRef; +using DITypeRef = TypedDINodeRef; class DITypeRefArray { const MDTuple *N = nullptr; @@ -149,7 +148,7 @@ class DITypeRefArray { /// Tagged DWARF-like metadata node. /// /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*, -/// defined in llvm/Support/Dwarf.h). Called \a DINode because it's +/// defined in llvm/BinaryFormat/Dwarf.h). Called \a DINode because it's /// potentially used for non-DWARF output. class DINode : public MDNode { friend class LLVMContextImpl; @@ -240,7 +239,8 @@ class DINode : public MDNode { }; template struct simplify_type> { - typedef Metadata *SimpleType; + using SimpleType = Metadata *; + static SimpleType getSimplifiedValue(const TypedDINodeRef &MD) { return MD; } @@ -435,10 +435,10 @@ class DIScope : public DINode { /// Return the raw underlying file. /// - /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file - /// (it\em is the file). If \c this is an \a DIFile, we need to return \c - /// this. Otherwise, return the first operand, which is where all other - /// subclasses store their file pointer. + /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file (it + /// \em is the file). If \c this is an \a DIFile, we need to return \c this. + /// Otherwise, return the first operand, which is where all other subclasses + /// store their file pointer. Metadata *getRawFile() const { return isa(this) ? const_cast(this) : static_cast(getOperand(0)); @@ -799,15 +799,18 @@ class DIDerivedType : public DIType { assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); return DITypeRef(getExtraData()); } + DIObjCProperty *getObjCProperty() const { return dyn_cast_or_null(getExtraData()); } + Constant *getStorageOffsetInBits() const { assert(getTag() == dwarf::DW_TAG_member && isBitField()); if (auto *C = cast_or_null(getExtraData())) return C->getValue(); return nullptr; } + Constant *getConstant() const { assert(getTag() == dwarf::DW_TAG_member && isStaticMember()); if (auto *C = cast_or_null(getExtraData())) @@ -970,9 +973,11 @@ class DICompositeType : public DIType { #endif replaceOperandWith(4, Elements.get()); } + void replaceVTableHolder(DITypeRef VTableHolder) { replaceOperandWith(5, VTableHolder); } + void replaceTemplateParams(DITemplateParameterArray TemplateParams) { replaceOperandWith(6, TemplateParams.get()); } @@ -1031,6 +1036,7 @@ class DISubroutineType : public DIType { DITypeRefArray getTypeArray() const { return cast_or_null(getRawTypeArray()); } + Metadata *getRawTypeArray() const { return getOperand(3); } static bool classof(const Metadata *MD) { @@ -1319,6 +1325,7 @@ class DILocation : public MDNode { unsigned getLine() const { return SubclassData32; } unsigned getColumn() const { return SubclassData16; } DILocalScope *getScope() const { return cast(getRawScope()); } + DILocation *getInlinedAt() const { return cast_or_null(getRawInlinedAt()); } @@ -1452,7 +1459,6 @@ class DILocation : public MDNode { static bool classof(const Metadata *MD) { return MD->getMetadataID() == DILocationKind; } - }; /// Subprogram description. @@ -2087,6 +2093,7 @@ class DIVariable : public DINode { return F->getFilename(); return ""; } + StringRef getDirectory() const { if (auto *F = getFile()) return F->getDirectory(); @@ -2110,9 +2117,6 @@ class DIVariable : public DINode { /// variable, or the location of a single piece of a variable, or (when using /// DW_OP_stack_value) is the constant variable value. /// -/// FIXME: Instead of DW_OP_plus taking an argument, this should use DW_OP_const -/// and have DW_OP_plus consume the topmost elements on the stack. -/// /// TODO: Co-allocate the expression elements. /// TODO: Separate from MDNode, or otherwise drop Distinct and Temporary /// storage types. @@ -2143,6 +2147,7 @@ class DIExpression : public MDNode { ArrayRef getElements() const { return Elements; } unsigned getNumElements() const { return Elements.size(); } + uint64_t getElement(unsigned I) const { assert(I < Elements.size() && "Index out of range"); return Elements[I]; @@ -2151,7 +2156,8 @@ class DIExpression : public MDNode { /// Determine whether this represents a standalone constant value. bool isConstant() const; - typedef ArrayRef::iterator element_iterator; + using element_iterator = ArrayRef::iterator; + element_iterator elements_begin() const { return getElements().begin(); } element_iterator elements_end() const { return getElements().end(); } @@ -2513,6 +2519,7 @@ class DIObjCProperty : public DINode { return F->getFilename(); return ""; } + StringRef getDirectory() const { if (auto *F = getFile()) return F->getDirectory(); @@ -2544,32 +2551,32 @@ class DIImportedEntity : public DINode { static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag, DIScope *Scope, DINodeRef Entity, - unsigned Line, StringRef Name, + DIFile *File, unsigned Line, StringRef Name, StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, Tag, Scope, Entity, Line, + return getImpl(Context, Tag, Scope, Entity, File, Line, getCanonicalMDString(Context, Name), Storage, ShouldCreate); } static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity, - unsigned Line, MDString *Name, - StorageType Storage, + Metadata *File, unsigned Line, + MDString *Name, StorageType Storage, bool ShouldCreate = true); TempDIImportedEntity cloneImpl() const { return getTemporary(getContext(), getTag(), getScope(), getEntity(), - getLine(), getName()); + getFile(), getLine(), getName()); } public: DEFINE_MDNODE_GET(DIImportedEntity, (unsigned Tag, DIScope *Scope, DINodeRef Entity, - unsigned Line, StringRef Name = ""), - (Tag, Scope, Entity, Line, Name)) + DIFile *File, unsigned Line, StringRef Name = ""), + (Tag, Scope, Entity, File, Line, Name)) DEFINE_MDNODE_GET(DIImportedEntity, (unsigned Tag, Metadata *Scope, Metadata *Entity, - unsigned Line, MDString *Name), - (Tag, Scope, Entity, Line, Name)) + Metadata *File, unsigned Line, MDString *Name), + (Tag, Scope, Entity, File, Line, Name)) TempDIImportedEntity clone() const { return cloneImpl(); } @@ -2577,10 +2584,12 @@ class DIImportedEntity : public DINode { DIScope *getScope() const { return cast_or_null(getRawScope()); } DINodeRef getEntity() const { return DINodeRef(getRawEntity()); } StringRef getName() const { return getStringOperand(2); } + DIFile *getFile() const { return cast_or_null(getRawFile()); } Metadata *getRawScope() const { return getOperand(0); } Metadata *getRawEntity() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs(2); } + Metadata *getRawFile() const { return getOperand(3); } static bool classof(const Metadata *MD) { return MD->getMetadataID() == DIImportedEntityKind; @@ -2613,10 +2622,13 @@ class DIGlobalVariableExpression : public MDNode { TempDIGlobalVariableExpression clone() const { return cloneImpl(); } Metadata *getRawVariable() const { return getOperand(0); } + DIGlobalVariable *getVariable() const { return cast_or_null(getRawVariable()); } + Metadata *getRawExpression() const { return getOperand(1); } + DIExpression *getExpression() const { return cast_or_null(getRawExpression()); } @@ -2629,7 +2641,8 @@ class DIGlobalVariableExpression : public MDNode { /// Macro Info DWARF-like metadata node. /// /// A metadata node with a DWARF macro info (i.e., a constant named -/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h). Called \a DIMacroNode +/// \c DW_MACINFO_*, defined in llvm/BinaryFormat/Dwarf.h). Called \a +/// DIMacroNode /// because it's potentially used for non-DWARF output. class DIMacroNode : public MDNode { friend class LLVMContextImpl; diff --git a/interpreter/llvm/src/include/llvm/IR/DebugLoc.h b/interpreter/llvm/src/include/llvm/IR/DebugLoc.h index aa74f361cda25..eef1212abc4b7 100644 --- a/interpreter/llvm/src/include/llvm/IR/DebugLoc.h +++ b/interpreter/llvm/src/include/llvm/IR/DebugLoc.h @@ -90,12 +90,6 @@ namespace llvm { DenseMap &Cache, bool ReplaceLast = false); - /// Reparent all debug locations referenced by \c I that belong to \c OrigSP - /// to become (possibly indirect) children of \c NewSP. - static void reparentDebugInfo(Instruction &I, DISubprogram *OrigSP, - DISubprogram *NewSP, - DenseMap &Cache); - unsigned getLine() const; unsigned getCol() const; MDNode *getScope() const; diff --git a/interpreter/llvm/src/include/llvm/IR/DerivedTypes.h b/interpreter/llvm/src/include/llvm/IR/DerivedTypes.h index a92321a445112..6e5e085873ab8 100644 --- a/interpreter/llvm/src/include/llvm/IR/DerivedTypes.h +++ b/interpreter/llvm/src/include/llvm/IR/DerivedTypes.h @@ -89,7 +89,7 @@ class IntegerType : public Type { bool isPowerOf2ByteWidth() const; /// Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == IntegerTyID; } }; @@ -139,7 +139,7 @@ class FunctionType : public Type { unsigned getNumParams() const { return NumContainedTys - 1; } /// Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == FunctionTyID; } }; @@ -171,7 +171,7 @@ class CompositeType : public Type { bool indexValid(unsigned Idx) const; /// Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == ArrayTyID || T->getTypeID() == StructTyID || T->getTypeID() == VectorTyID; @@ -317,7 +317,7 @@ class StructType : public CompositeType { } /// Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == StructTyID; } }; @@ -360,7 +360,7 @@ class SequentialType : public CompositeType { Type *getElementType() const { return ContainedType; } /// Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == ArrayTyID || T->getTypeID() == VectorTyID; } }; @@ -380,7 +380,7 @@ class ArrayType : public SequentialType { static bool isValidElementType(Type *ElemTy); /// Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == ArrayTyID; } }; @@ -454,7 +454,7 @@ class VectorType : public SequentialType { } /// Methods for support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == VectorTyID; } }; @@ -495,7 +495,7 @@ class PointerType : public Type { inline unsigned getAddressSpace() const { return getSubclassData(); } /// Implement support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const Type *T) { + static bool classof(const Type *T) { return T->getTypeID() == PointerTyID; } }; diff --git a/interpreter/llvm/src/include/llvm/IR/DerivedUser.h b/interpreter/llvm/src/include/llvm/IR/DerivedUser.h new file mode 100644 index 0000000000000..67c483d3c4977 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/IR/DerivedUser.h @@ -0,0 +1,45 @@ +//===- DerivedUser.h - Base for non-IR Users --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_DERIVEDUSER_H +#define LLVM_IR_DERIVEDUSER_H + +#include "llvm/IR/User.h" + +namespace llvm { + +class Type; +class Use; + +/// Extension point for the Value hierarchy. All classes outside of lib/IR +/// that wish to inherit from User should instead inherit from DerivedUser +/// instead. Inheriting from this class is discouraged. +/// +/// Generally speaking, Value is the base of a closed class hierarchy +/// that can't be extended by code outside of lib/IR. This class creates a +/// loophole that allows classes outside of lib/IR to extend User to leverage +/// its use/def list machinery. +class DerivedUser : public User { +protected: + using DeleteValueTy = void (*)(DerivedUser *); + +private: + friend class Value; + + DeleteValueTy DeleteValue; + +public: + DerivedUser(Type *Ty, unsigned VK, Use *U, unsigned NumOps, + DeleteValueTy DeleteValue) + : User(Ty, VK, U, NumOps), DeleteValue(DeleteValue) {} +}; + +} // end namespace llvm + +#endif // LLVM_IR_DERIVEDUSER_H diff --git a/interpreter/llvm/src/include/llvm/IR/DiagnosticInfo.h b/interpreter/llvm/src/include/llvm/IR/DiagnosticInfo.h index 458c3cf29b0d1..15d3325771136 100644 --- a/interpreter/llvm/src/include/llvm/IR/DiagnosticInfo.h +++ b/interpreter/llvm/src/include/llvm/IR/DiagnosticInfo.h @@ -15,7 +15,7 @@ #ifndef LLVM_IR_DIAGNOSTICINFO_H #define LLVM_IR_DIAGNOSTICINFO_H -#include "llvm/ADT/None.h" +#include "llvm-c/Types.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -23,10 +23,9 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm-c/Types.h" -#include #include #include +#include #include #include @@ -120,18 +119,18 @@ class DiagnosticInfo { virtual void print(DiagnosticPrinter &DP) const = 0; }; -typedef std::function DiagnosticHandlerFunction; +using DiagnosticHandlerFunction = std::function; /// Diagnostic information for inline asm reporting. /// This is basically a message and an optional location. class DiagnosticInfoInlineAsm : public DiagnosticInfo { private: /// Optional line information. 0 if not set. - unsigned LocCookie; + unsigned LocCookie = 0; /// Message to be reported. const Twine &MsgStr; /// Optional origin of the problem. - const Instruction *Instr; + const Instruction *Instr = nullptr; public: /// \p MsgStr is the message to be reported to the frontend. @@ -139,8 +138,7 @@ class DiagnosticInfoInlineAsm : public DiagnosticInfo { /// for the whole life time of the Diagnostic. DiagnosticInfoInlineAsm(const Twine &MsgStr, DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(0), MsgStr(MsgStr), - Instr(nullptr) {} + : DiagnosticInfo(DK_InlineAsm, Severity), MsgStr(MsgStr) {} /// \p LocCookie if non-zero gives the line number for this report. /// \p MsgStr gives the message. @@ -149,7 +147,7 @@ class DiagnosticInfoInlineAsm : public DiagnosticInfo { DiagnosticInfoInlineAsm(unsigned LocCookie, const Twine &MsgStr, DiagnosticSeverity Severity = DS_Error) : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(LocCookie), - MsgStr(MsgStr), Instr(nullptr) {} + MsgStr(MsgStr) {} /// \p Instr gives the original instruction that triggered the diagnostic. /// \p MsgStr gives the message. @@ -294,10 +292,10 @@ class DiagnosticInfoSampleProfile : public DiagnosticInfo { DiagnosticInfoSampleProfile(StringRef FileName, const Twine &Msg, DiagnosticSeverity Severity = DS_Error) : DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName), - LineNum(0), Msg(Msg) {} + Msg(Msg) {} DiagnosticInfoSampleProfile(const Twine &Msg, DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(DK_SampleProfile, Severity), LineNum(0), Msg(Msg) {} + : DiagnosticInfo(DK_SampleProfile, Severity), Msg(Msg) {} /// \see DiagnosticInfo::print. void print(DiagnosticPrinter &DP) const override; @@ -316,7 +314,7 @@ class DiagnosticInfoSampleProfile : public DiagnosticInfo { /// Line number where the diagnostic occurred. If 0, no line number will /// be emitted in the message. - unsigned LineNum; + unsigned LineNum = 0; /// Message to report. const Twine &Msg; @@ -351,8 +349,9 @@ class DiagnosticLocation { StringRef Filename; unsigned Line = 0; unsigned Column = 0; + public: - DiagnosticLocation() {} + DiagnosticLocation() = default; DiagnosticLocation(const DebugLoc &DL); DiagnosticLocation(const DISubprogram *SP); @@ -796,6 +795,7 @@ class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis { const Twine &Msg) : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, Loc, Msg) {} + friend void emitOptimizationRemarkAnalysisFPCommute( LLVMContext &Ctx, const char *PassName, const Function &Fn, const DiagnosticLocation &Loc, const Twine &Msg); @@ -1012,6 +1012,7 @@ class DiagnosticInfoUnsupported : public DiagnosticInfoWithLocationBase { void print(DiagnosticPrinter &DP) const override; }; + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/interpreter/llvm/src/include/llvm/IR/Dominators.h b/interpreter/llvm/src/include/llvm/IR/Dominators.h index def91e73eb1d1..5b21a2c83e4a8 100644 --- a/interpreter/llvm/src/include/llvm/IR/Dominators.h +++ b/interpreter/llvm/src/include/llvm/IR/Dominators.h @@ -34,13 +34,32 @@ class Module; class raw_ostream; extern template class DomTreeNodeBase; -extern template class DominatorTreeBase; +extern template class DominatorTreeBase; // DomTree +extern template class DominatorTreeBase; // PostDomTree -extern template void Calculate( - DominatorTreeBaseByGraphTraits> &DT, Function &F); -extern template void Calculate>( - DominatorTreeBaseByGraphTraits>> &DT, - Function &F); +namespace DomTreeBuilder { +using BBDomTree = DomTreeBase; +using BBPostDomTree = PostDomTreeBase; + +extern template void Calculate(BBDomTree &DT, Function &F); +extern template void Calculate(BBPostDomTree &DT, + Function &F); + +extern template void InsertEdge(BBDomTree &DT, BasicBlock *From, + BasicBlock *To); +extern template void InsertEdge(BBPostDomTree &DT, + BasicBlock *From, + BasicBlock *To); + +extern template void DeleteEdge(BBDomTree &DT, BasicBlock *From, + BasicBlock *To); +extern template void DeleteEdge(BBPostDomTree &DT, + BasicBlock *From, + BasicBlock *To); + +extern template bool Verify(const BBDomTree &DT); +extern template bool Verify(const BBPostDomTree &DT); +} // namespace DomTreeBuilder using DomTreeNode = DomTreeNodeBase; @@ -66,6 +85,7 @@ class BasicBlockEdge { return End; } + /// Check if this is the only edge between Start and End. bool isSingleEdge() const; }; @@ -111,14 +131,12 @@ template <> struct DenseMapInfo { /// the dominator tree is initially constructed may still exist in the tree, /// even if the tree is properly updated. Calling code should not rely on the /// preceding statements; this is stated only to assist human understanding. -class DominatorTree : public DominatorTreeBase { -public: - using Base = DominatorTreeBase; +class DominatorTree : public DominatorTreeBase { + public: + using Base = DominatorTreeBase; - DominatorTree() : DominatorTreeBase(false) {} - explicit DominatorTree(Function &F) : DominatorTreeBase(false) { - recalculate(F); - } + DominatorTree() = default; + explicit DominatorTree(Function &F) { recalculate(F); } /// Handle invalidation explicitly. bool invalidate(Function &F, const PreservedAnalyses &PA, @@ -143,6 +161,11 @@ class DominatorTree : public DominatorTreeBase { bool dominates(const Instruction *Def, const Use &U) const; bool dominates(const Instruction *Def, const Instruction *User) const; bool dominates(const Instruction *Def, const BasicBlock *BB) const; + + /// Return true if an edge dominates a use. + /// + /// If BBE is not a unique edge between start and end of the edge, it can + /// never dominate the use. bool dominates(const BasicBlockEdge &BBE, const Use &U) const; bool dominates(const BasicBlockEdge &BBE, const BasicBlock *BB) const; diff --git a/interpreter/llvm/src/include/llvm/IR/Function.h b/interpreter/llvm/src/include/llvm/IR/Function.h index 8a2a6ed87eb28..75fccc135daeb 100644 --- a/interpreter/llvm/src/include/llvm/IR/Function.h +++ b/interpreter/llvm/src/include/llvm/IR/Function.h @@ -19,10 +19,10 @@ #define LLVM_IR_FUNCTION_H #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -123,7 +123,7 @@ class Function : public GlobalObject, public ilist_node { public: Function(const Function&) = delete; void operator=(const Function&) = delete; - ~Function() override; + ~Function(); static Function *Create(FunctionType *Ty, LinkageTypes Linkage, const Twine &N = "", Module *M = nullptr) { @@ -214,10 +214,6 @@ class Function : public GlobalObject, public ilist_node { addAttribute(AttributeList::FunctionIndex, Attr); } - void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); - } - /// @brief Remove function attributes from this function. void removeFnAttr(Attribute::AttrKind Kind) { removeAttribute(AttributeList::FunctionIndex, Kind); @@ -229,10 +225,6 @@ class Function : public GlobalObject, public ilist_node { getContext(), AttributeList::FunctionIndex, Kind)); } - void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); - } - /// \brief Set the entry count for this function. /// /// Entry count is the number of times this function was executed based on @@ -299,6 +291,15 @@ class Function : public GlobalObject, public ilist_node { /// @brief adds the attributes to the list of attributes. void addAttributes(unsigned i, const AttrBuilder &Attrs); + /// @brief adds the attribute to the list of attributes for the given arg. + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + + /// @brief adds the attribute to the list of attributes for the given arg. + void addParamAttr(unsigned ArgNo, Attribute Attr); + + /// @brief adds the attributes to the list of attributes for the given arg. + void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs); + /// @brief removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); @@ -308,6 +309,15 @@ class Function : public GlobalObject, public ilist_node { /// @brief removes the attributes from the list of attributes. void removeAttributes(unsigned i, const AttrBuilder &Attrs); + /// @brief removes the attribute from the list of attributes. + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + + /// @brief removes the attribute from the list of attributes. + void removeParamAttr(unsigned ArgNo, StringRef Kind); + + /// @brief removes the attribute from the list of attributes. + void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs); + /// @brief check if an attributes is in the list of attributes. bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const { return getAttributes().hasAttribute(i, Kind); @@ -329,10 +339,18 @@ class Function : public GlobalObject, public ilist_node { /// @brief adds the dereferenceable attribute to the list of attributes. void addDereferenceableAttr(unsigned i, uint64_t Bytes); + /// @brief adds the dereferenceable attribute to the list of attributes for + /// the given arg. + void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes); + /// @brief adds the dereferenceable_or_null attribute to the list of /// attributes. void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes); + /// @brief adds the dereferenceable_or_null attribute to the list of + /// attributes for the given arg. + void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes); + /// @brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned ArgNo) const { return AttributeSets.getParamAlignment(ArgNo); @@ -345,6 +363,12 @@ class Function : public GlobalObject, public ilist_node { return AttributeSets.getDereferenceableBytes(i); } + /// @brief Extract the number of dereferenceable bytes for a parameter. + /// @param ArgNo Index of an argument, with 0 being the first function arg. + uint64_t getParamDereferenceableBytes(unsigned ArgNo) const { + return AttributeSets.getParamDereferenceableBytes(ArgNo); + } + /// @brief Extract the number of dereferenceable_or_null bytes for a call or /// parameter (0=unknown). /// @param i AttributeList index, referring to a return value or argument. @@ -352,6 +376,13 @@ class Function : public GlobalObject, public ilist_node { return AttributeSets.getDereferenceableOrNullBytes(i); } + /// @brief Extract the number of dereferenceable_or_null bytes for a + /// parameter. + /// @param ArgNo AttributeList ArgNo, referring to an argument. + uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const { + return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo); + } + /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { return hasFnAttribute(Attribute::ReadNone); @@ -640,7 +671,7 @@ class Function : public GlobalObject, public ilist_node { void viewCFGOnly() const; /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::FunctionVal; } diff --git a/interpreter/llvm/src/include/llvm/IR/GetElementPtrTypeIterator.h b/interpreter/llvm/src/include/llvm/IR/GetElementPtrTypeIterator.h index f017a449d33f4..3c143ea5f703e 100644 --- a/interpreter/llvm/src/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/interpreter/llvm/src/include/llvm/IR/GetElementPtrTypeIterator.h @@ -21,9 +21,9 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" -#include +#include #include -#include +#include #include namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/IR/GlobalAlias.h b/interpreter/llvm/src/include/llvm/IR/GlobalAlias.h index d4bf0d7e1ed41..450583baaa3c1 100644 --- a/interpreter/llvm/src/include/llvm/IR/GlobalAlias.h +++ b/interpreter/llvm/src/include/llvm/IR/GlobalAlias.h @@ -88,7 +88,7 @@ class GlobalAlias : public GlobalIndirectSymbol, } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::GlobalAliasVal; } }; diff --git a/interpreter/llvm/src/include/llvm/IR/GlobalIFunc.h b/interpreter/llvm/src/include/llvm/IR/GlobalIFunc.h index d90c7c78ed268..ef51315a6f5d9 100644 --- a/interpreter/llvm/src/include/llvm/IR/GlobalIFunc.h +++ b/interpreter/llvm/src/include/llvm/IR/GlobalIFunc.h @@ -70,7 +70,7 @@ class GlobalIFunc final : public GlobalIndirectSymbol, } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::GlobalIFuncVal; } }; diff --git a/interpreter/llvm/src/include/llvm/IR/GlobalIndirectSymbol.h b/interpreter/llvm/src/include/llvm/IR/GlobalIndirectSymbol.h index 212703af71019..22c00686c549f 100644 --- a/interpreter/llvm/src/include/llvm/IR/GlobalIndirectSymbol.h +++ b/interpreter/llvm/src/include/llvm/IR/GlobalIndirectSymbol.h @@ -75,7 +75,7 @@ class GlobalIndirectSymbol : public GlobalValue { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::GlobalAliasVal || V->getValueID() == Value::GlobalIFuncVal; } diff --git a/interpreter/llvm/src/include/llvm/IR/GlobalObject.h b/interpreter/llvm/src/include/llvm/IR/GlobalObject.h index fc38f698027b1..278b193567f1f 100644 --- a/interpreter/llvm/src/include/llvm/IR/GlobalObject.h +++ b/interpreter/llvm/src/include/llvm/IR/GlobalObject.h @@ -155,7 +155,7 @@ class GlobalObject : public GlobalValue { public: // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::FunctionVal || V->getValueID() == Value::GlobalVariableVal; } diff --git a/interpreter/llvm/src/include/llvm/IR/GlobalValue.h b/interpreter/llvm/src/include/llvm/IR/GlobalValue.h index b595c225ce89e..d65d43cc5957d 100644 --- a/interpreter/llvm/src/include/llvm/IR/GlobalValue.h +++ b/interpreter/llvm/src/include/llvm/IR/GlobalValue.h @@ -23,9 +23,9 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Value.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include #include #include @@ -161,6 +161,10 @@ class GlobalValue : public Constant { Parent = parent; } + ~GlobalValue() { + removeDeadConstantUsers(); // remove any dead constants using this. + } + public: enum ThreadLocalMode { NotThreadLocal = 0, @@ -172,10 +176,6 @@ class GlobalValue : public Constant { GlobalValue(const GlobalValue &) = delete; - ~GlobalValue() override { - removeDeadConstantUsers(); // remove any dead constants using this. - } - unsigned getAlignment() const; enum class UnnamedAddr { @@ -441,10 +441,14 @@ class GlobalValue : public Constant { void copyAttributesFrom(const GlobalValue *Src); public: - /// If special LLVM prefix that is used to inform the asm printer to not emit - /// usual symbol prefix before the symbol name is used then return linkage - /// name after skipping this special LLVM prefix. - static StringRef getRealLinkageName(StringRef Name) { + /// If the given string begins with the GlobalValue name mangling escape + /// character '\1', drop it. + /// + /// This function applies a specific mangling that is used in PGO profiles, + /// among other things. If you're trying to get a symbol name for an + /// arbitrary GlobalValue, this is not the function you're looking for; see + /// Mangler.h. + static StringRef dropLLVMManglingEscape(StringRef Name) { if (!Name.empty() && Name[0] == '\1') return Name.substr(1); return Name; diff --git a/interpreter/llvm/src/include/llvm/IR/GlobalVariable.h b/interpreter/llvm/src/include/llvm/IR/GlobalVariable.h index 21d334c8f01db..34ace6f2b4f4b 100644 --- a/interpreter/llvm/src/include/llvm/IR/GlobalVariable.h +++ b/interpreter/llvm/src/include/llvm/IR/GlobalVariable.h @@ -23,8 +23,8 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/IR/GlobalObject.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Value.h" #include @@ -66,7 +66,7 @@ class GlobalVariable : public GlobalObject, public ilist_node { GlobalVariable(const GlobalVariable &) = delete; GlobalVariable &operator=(const GlobalVariable &) = delete; - ~GlobalVariable() override { + ~GlobalVariable() { dropAllReferences(); // FIXME: needed by operator delete @@ -78,8 +78,6 @@ class GlobalVariable : public GlobalObject, public ilist_node { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -235,8 +233,15 @@ class GlobalVariable : public GlobalObject, public ilist_node { Attrs = A; } + /// Check if section name is present + bool hasImplicitSection() const { + return getAttributes().hasAttribute("bss-section") || + getAttributes().hasAttribute("data-section") || + getAttributes().hasAttribute("rodata-section"); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::GlobalVariableVal; } }; diff --git a/interpreter/llvm/src/include/llvm/IR/IRBuilder.h b/interpreter/llvm/src/include/llvm/IR/IRBuilder.h index 9d4c13c29f68e..5344a93efb335 100644 --- a/interpreter/llvm/src/include/llvm/IR/IRBuilder.h +++ b/interpreter/llvm/src/include/llvm/IR/IRBuilder.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_IRBUILDER_H #define LLVM_IR_IRBUILDER_H +#include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" @@ -41,11 +42,10 @@ #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" -#include "llvm-c/Types.h" +#include #include #include #include -#include #include namespace llvm { @@ -435,6 +435,26 @@ class IRBuilderBase { MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + /// \brief Create and insert an element unordered-atomic memcpy between the + /// specified pointers. + /// + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. + CallInst *CreateElementUnorderedAtomicMemCpy( + Value *Dst, Value *Src, uint64_t Size, uint32_t ElementSize, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { + return CreateElementUnorderedAtomicMemCpy( + Dst, Src, getInt64(Size), ElementSize, TBAATag, TBAAStructTag, ScopeTag, + NoAliasTag); + } + + CallInst *CreateElementUnorderedAtomicMemCpy( + Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + /// \brief Create and insert a memmove between the specified /// pointers. /// @@ -1042,7 +1062,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *RC = dyn_cast(RHS)) { - if (isa(RC) && cast(RC)->isAllOnesValue()) + if (isa(RC) && cast(RC)->isMinusOne()) return LHS; // LHS & -1 -> LHS if (Constant *LC = dyn_cast(LHS)) return Insert(Folder.CreateAnd(LC, RC), Name); @@ -1183,22 +1203,22 @@ class IRBuilder : public IRBuilderBase, public Inserter { return SI; } FenceInst *CreateFence(AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, const Twine &Name = "") { - return Insert(new FenceInst(Context, Ordering, SynchScope), Name); + return Insert(new FenceInst(Context, Ordering, SSID), Name); } AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope = CrossThread) { + SyncScope::ID SSID = SyncScope::System) { return Insert(new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, - FailureOrdering, SynchScope)); + FailureOrdering, SSID)); } AtomicRMWInst *CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread) { - return Insert(new AtomicRMWInst(Op, Ptr, Val, Ordering, SynchScope)); + SyncScope::ID SSID = SyncScope::System) { + return Insert(new AtomicRMWInst(Op, Ptr, Val, Ordering, SSID)); } Value *CreateGEP(Value *Ptr, ArrayRef IdxList, const Twine &Name = "") { @@ -1497,11 +1517,9 @@ class IRBuilder : public IRBuilderBase, public Inserter { const Twine &Name = "") { if (V->getType() == DestTy) return V; - if (V->getType()->getScalarType()->isPointerTy() && - DestTy->getScalarType()->isIntegerTy()) + if (V->getType()->isPtrOrPtrVectorTy() && DestTy->isIntOrIntVectorTy()) return CreatePtrToInt(V, DestTy, Name); - if (V->getType()->getScalarType()->isIntegerTy() && - DestTy->getScalarType()->isPointerTy()) + if (V->getType()->isIntOrIntVectorTy() && DestTy->isPtrOrPtrVectorTy()) return CreateIntToPtr(V, DestTy, Name); return CreateBitCast(V, DestTy, Name); diff --git a/interpreter/llvm/src/include/llvm/IR/InlineAsm.h b/interpreter/llvm/src/include/llvm/IR/InlineAsm.h index a57e7d63012b3..59874b05b0cef 100644 --- a/interpreter/llvm/src/include/llvm/IR/InlineAsm.h +++ b/interpreter/llvm/src/include/llvm/IR/InlineAsm.h @@ -28,7 +28,7 @@ class FunctionType; class PointerType; template class ConstantUniqueMap; -class InlineAsm : public Value { +class InlineAsm final : public Value { public: enum AsmDialect { AD_ATT, @@ -48,7 +48,6 @@ class InlineAsm : public Value { InlineAsm(FunctionType *Ty, const std::string &AsmString, const std::string &Constraints, bool hasSideEffects, bool isAlignStack, AsmDialect asmDialect); - ~InlineAsm() override; /// When the ConstantUniqueMap merges two types and makes two InlineAsms /// identical, it destroys one of them with this method. @@ -184,7 +183,7 @@ class InlineAsm : public Value { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() == Value::InlineAsmVal; } diff --git a/interpreter/llvm/src/include/llvm/IR/InstrTypes.h b/interpreter/llvm/src/include/llvm/IR/InstrTypes.h index 61ca90de7393c..d749077fd34a1 100644 --- a/interpreter/llvm/src/include/llvm/IR/InstrTypes.h +++ b/interpreter/llvm/src/include/llvm/IR/InstrTypes.h @@ -17,13 +17,13 @@ #define LLVM_IR_INSTRTYPES_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" @@ -62,9 +62,6 @@ class TerminatorInst : public Instruction { Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd) : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {} - // Out of line virtual method, so the vtable, etc has a home. - ~TerminatorInst() override; - public: /// Return the number of successors that this terminator has. unsigned getNumSuccessors() const; @@ -76,10 +73,10 @@ class TerminatorInst : public Instruction { void setSuccessor(unsigned idx, BasicBlock *B); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->isTerminator(); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -297,23 +294,18 @@ class UnaryInstruction : public Instruction { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - - // Out of line virtual method, so the vtable, etc has a home. - ~UnaryInstruction() override; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Alloca || I->getOpcode() == Instruction::Load || I->getOpcode() == Instruction::VAArg || I->getOpcode() == Instruction::ExtractValue || (I->getOpcode() >= CastOpsBegin && I->getOpcode() < CastOpsEnd); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -330,14 +322,14 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value) //===----------------------------------------------------------------------===// class BinaryOperator : public Instruction { + void AssertOK(); + protected: BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty, const Twine &Name, Instruction *InsertBefore); BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd); - void init(BinaryOps iType); - // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -349,8 +341,6 @@ class BinaryOperator : public Instruction { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -542,10 +532,10 @@ class BinaryOperator : public Instruction { bool swapOperands(); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->isBinaryOp(); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -568,8 +558,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value) /// if (isa(Instr)) { ... } /// @brief Base class of casting instructions. class CastInst : public UnaryInstruction { - void anchor() override; - protected: /// @brief Constructor with insert-before-instruction semantics for subclasses CastInst(Type *Ty, unsigned iType, Value *S, @@ -845,10 +833,10 @@ class CastInst : public UnaryInstruction { static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy); /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->isCast(); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -914,18 +902,12 @@ class CmpInst : public Instruction { Value *LHS, Value *RHS, const Twine &Name, BasicBlock *InsertAtEnd); - void anchor() override; // Out of line virtual method. - public: - CmpInst() = delete; - // allocate space for exactly two operands void *operator new(size_t s) { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Construct a compare instruction, given the opcode, the predicate and /// the two operands. Optionally (if InstBefore is specified) insert the /// instruction into a BasicBlock right before the specified instruction. @@ -1080,11 +1062,11 @@ class CmpInst : public Instruction { static bool isImpliedFalseByMatchingCmp(Predicate Pred1, Predicate Pred2); /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ICmp || I->getOpcode() == Instruction::FCmp; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -1170,8 +1152,8 @@ class FuncletPadInst : public Instruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { return I->isFuncletPad(); } - static inline bool classof(const Value *V) { + static bool classof(const Instruction *I) { return I->isFuncletPad(); } + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; diff --git a/interpreter/llvm/src/include/llvm/IR/Instruction.def b/interpreter/llvm/src/include/llvm/IR/Instruction.def index 18711abb8060d..86617299c44ac 100644 --- a/interpreter/llvm/src/include/llvm/IR/Instruction.def +++ b/interpreter/llvm/src/include/llvm/IR/Instruction.def @@ -102,6 +102,10 @@ #define LAST_OTHER_INST(num) #endif +#ifndef HANDLE_USER_INST +#define HANDLE_USER_INST(num, opc, Class) HANDLE_OTHER_INST(num, opc, Class) +#endif + // Terminator Instructions - These instructions are used to terminate a basic // block of the program. Every basic block must end with one of these // instructions for it to be a well formed basic block. @@ -185,8 +189,8 @@ HANDLE_OTHER_INST(52, FCmp , FCmpInst ) // Floating point comparison instr. HANDLE_OTHER_INST(53, PHI , PHINode ) // PHI node instruction HANDLE_OTHER_INST(54, Call , CallInst ) // Call a function HANDLE_OTHER_INST(55, Select , SelectInst ) // select instruction -HANDLE_OTHER_INST(56, UserOp1, Instruction) // May be used internally in a pass -HANDLE_OTHER_INST(57, UserOp2, Instruction) // Internal to passes only +HANDLE_USER_INST (56, UserOp1, Instruction) // May be used internally in a pass +HANDLE_USER_INST (57, UserOp2, Instruction) // Internal to passes only HANDLE_OTHER_INST(58, VAArg , VAArgInst ) // vaarg instruction HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector HANDLE_OTHER_INST(60, InsertElement, InsertElementInst) // insert into vector @@ -220,6 +224,8 @@ HANDLE_OTHER_INST(64, LandingPad, LandingPadInst) // Landing pad instruction. #undef HANDLE_OTHER_INST #undef LAST_OTHER_INST +#undef HANDLE_USER_INST + #ifdef HANDLE_INST #undef HANDLE_INST #endif diff --git a/interpreter/llvm/src/include/llvm/IR/Instruction.h b/interpreter/llvm/src/include/llvm/IR/Instruction.h index fca29900f4c29..8dc02111b8663 100644 --- a/interpreter/llvm/src/include/llvm/IR/Instruction.h +++ b/interpreter/llvm/src/include/llvm/IR/Instruction.h @@ -16,9 +16,9 @@ #define LLVM_IR_INSTRUCTION_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/User.h" @@ -36,6 +36,10 @@ class FastMathFlags; class MDNode; struct AAMDNodes; +template <> struct ilist_alloc_traits { + static inline void deleteNode(Instruction *V); +}; + class Instruction : public User, public ilist_node_with_parent { BasicBlock *Parent; @@ -47,13 +51,13 @@ class Instruction : public User, HasMetadataBit = 1 << 15 }; +protected: + ~Instruction(); // Use deleteValue() to delete a generic Instruction. + public: Instruction(const Instruction &) = delete; Instruction &operator=(const Instruction &) = delete; - // Out of line virtual method, so the vtable, etc has a home. - ~Instruction() override; - /// Specialize the methods defined in Value, as we know that an instruction /// can only be used by other instructions. Instruction *user_back() { return cast(*user_begin());} @@ -148,9 +152,14 @@ class Instruction : public User, return getOpcode() == AShr; } + /// Determine if the Opcode is and/or/xor. + static inline bool isBitwiseLogicOp(unsigned Opcode) { + return Opcode == And || Opcode == Or || Opcode == Xor; + } + /// Return true if this is and/or/xor. inline bool isBitwiseLogicOp() const { - return getOpcode() == And || getOpcode() == Or || getOpcode() == Xor; + return isBitwiseLogicOp(getOpcode()); } /// Determine if the OpCode is one of the CastInst instructions. @@ -356,9 +365,9 @@ class Instruction : public User, /// Copy I's fast-math flags void copyFastMathFlags(const Instruction *I); - /// Convenience method to copy supported wrapping, exact, and fast-math flags - /// from V to this instruction. - void copyIRFlags(const Value *V); + /// Convenience method to copy supported exact, fast-math, and (optionally) + /// wrapping flags from V to this instruction. + void copyIRFlags(const Value *V, bool IncludeWrapFlags = true); /// Logical 'and' of any supported wrapping, exact, and fast-math flags of /// V and this instruction. @@ -547,7 +556,7 @@ class Instruction : public User, /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return V->getValueID() >= Value::InstructionVal; } @@ -640,6 +649,10 @@ class Instruction : public User, Instruction *cloneImpl() const; }; +inline void ilist_alloc_traits::deleteNode(Instruction *V) { + V->deleteValue(); +} + } // end namespace llvm #endif // LLVM_IR_INSTRUCTION_H diff --git a/interpreter/llvm/src/include/llvm/IR/Instructions.h b/interpreter/llvm/src/include/llvm/IR/Instructions.h index 28245e46aa23e..60ae98869e552 100644 --- a/interpreter/llvm/src/include/llvm/IR/Instructions.h +++ b/interpreter/llvm/src/include/llvm/IR/Instructions.h @@ -1,4 +1,4 @@ -//===-- llvm/Instructions.h - Instruction subclass definitions --*- C++ -*-===// +//===- llvm/Instructions.h - Instruction subclass definitions ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,28 +17,33 @@ #define LLVM_IR_INSTRUCTIONS_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include #include #include +#include namespace llvm { @@ -47,11 +52,6 @@ class ConstantInt; class DataLayout; class LLVMContext; -enum SynchronizationScope { - SingleThread = 0, - CrossThread = 1 -}; - //===----------------------------------------------------------------------===// // AllocaInst Class //===----------------------------------------------------------------------===// @@ -84,9 +84,6 @@ class AllocaInst : public UnaryInstruction { AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, unsigned Align, const Twine &Name, BasicBlock *InsertAtEnd); - // Out of line virtual method, so the vtable, etc. has a home. - ~AllocaInst() override; - /// Return true if there is an allocation size parameter to the allocation /// instruction that is not 1. bool isArrayAllocation() const; @@ -143,10 +140,10 @@ class AllocaInst : public UnaryInstruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Alloca); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -193,17 +190,16 @@ class LoadInst : public UnaryInstruction { LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, BasicBlock *InsertAtEnd); LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, - AtomicOrdering Order, SynchronizationScope SynchScope = CrossThread, + AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr) : LoadInst(cast(Ptr->getType())->getElementType(), Ptr, - NameStr, isVolatile, Align, Order, SynchScope, InsertBefore) {} + NameStr, isVolatile, Align, Order, SSID, InsertBefore) {} LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore); LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd); @@ -233,37 +229,38 @@ class LoadInst : public UnaryInstruction { void setAlignment(unsigned Align); - /// Returns the ordering effect of this fence. + /// Returns the ordering constraint of this load instruction. AtomicOrdering getOrdering() const { return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7); } - /// Set the ordering constraint on this load. May not be Release or - /// AcquireRelease. + /// Sets the ordering constraint of this load instruction. May not be Release + /// or AcquireRelease. void setOrdering(AtomicOrdering Ordering) { setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) | ((unsigned)Ordering << 7)); } - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1); + /// Returns the synchronization scope ID of this load instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Specify whether this load is ordered with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope xthread) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) | - (xthread << 6)); + /// Sets the synchronization scope ID of this load instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } + /// Sets the ordering constraint and the synchronization scope ID of this load + /// instruction. void setAtomic(AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread) { + SyncScope::ID SSID = SyncScope::System) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } bool isSimple() const { return !isAtomic() && !isVolatile(); } + bool isUnordered() const { return (getOrdering() == AtomicOrdering::NotAtomic || getOrdering() == AtomicOrdering::Unordered) && @@ -281,10 +278,10 @@ class LoadInst : public UnaryInstruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Load; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -294,6 +291,11 @@ class LoadInst : public UnaryInstruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this load instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; //===----------------------------------------------------------------------===// @@ -322,11 +324,10 @@ class StoreInst : public Instruction { unsigned Align, BasicBlock *InsertAtEnd); StoreInst(Value *Val, Value *Ptr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly two operands @@ -334,8 +335,6 @@ class StoreInst : public Instruction { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Return true if this is a store to a volatile memory location. bool isVolatile() const { return getSubclassDataFromInstruction() & 1; } @@ -355,37 +354,38 @@ class StoreInst : public Instruction { void setAlignment(unsigned Align); - /// Returns the ordering effect of this store. + /// Returns the ordering constraint of this store instruction. AtomicOrdering getOrdering() const { return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7); } - /// Set the ordering constraint on this store. May not be Acquire or - /// AcquireRelease. + /// Sets the ordering constraint of this store instruction. May not be + /// Acquire or AcquireRelease. void setOrdering(AtomicOrdering Ordering) { setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) | ((unsigned)Ordering << 7)); } - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1); + /// Returns the synchronization scope ID of this store instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Specify whether this store instruction is ordered with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope xthread) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) | - (xthread << 6)); + /// Sets the synchronization scope ID of this store instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } + /// Sets the ordering constraint and the synchronization scope ID of this + /// store instruction. void setAtomic(AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread) { + SyncScope::ID SSID = SyncScope::System) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } bool isSimple() const { return !isAtomic() && !isVolatile(); } + bool isUnordered() const { return (getOrdering() == AtomicOrdering::NotAtomic || getOrdering() == AtomicOrdering::Unordered) && @@ -406,10 +406,10 @@ class StoreInst : public Instruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Store; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -419,6 +419,11 @@ class StoreInst : public Instruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this store instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; template <> @@ -433,7 +438,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) /// An instruction for ordering other memory operations. class FenceInst : public Instruction { - void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope); + void Init(AtomicOrdering Ordering, SyncScope::ID SSID); protected: // Note: Instruction needs to be a friend here to call cloneImpl. @@ -445,10 +450,9 @@ class FenceInst : public Instruction { // Ordering may only be Acquire, Release, AcquireRelease, or // SequentiallyConsistent. FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); - FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + FenceInst(LLVMContext &C, AtomicOrdering Ordering, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly zero operands @@ -456,37 +460,33 @@ class FenceInst : public Instruction { return User::operator new(s, 0); } - void *operator new(size_t, unsigned) = delete; - - /// Returns the ordering effect of this fence. + /// Returns the ordering constraint of this fence instruction. AtomicOrdering getOrdering() const { return AtomicOrdering(getSubclassDataFromInstruction() >> 1); } - /// Set the ordering constraint on this fence. May only be Acquire, Release, - /// AcquireRelease, or SequentiallyConsistent. + /// Sets the ordering constraint of this fence instruction. May only be + /// Acquire, Release, AcquireRelease, or SequentiallyConsistent. void setOrdering(AtomicOrdering Ordering) { setInstructionSubclassData((getSubclassDataFromInstruction() & 1) | ((unsigned)Ordering << 1)); } - SynchronizationScope getSynchScope() const { - return SynchronizationScope(getSubclassDataFromInstruction() & 1); + /// Returns the synchronization scope ID of this fence instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Specify whether this fence orders other operations with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope xthread) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) | - xthread); + /// Sets the synchronization scope ID of this fence instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Fence; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -496,6 +496,11 @@ class FenceInst : public Instruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this fence instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; //===----------------------------------------------------------------------===// @@ -509,7 +514,7 @@ class FenceInst : public Instruction { class AtomicCmpXchgInst : public Instruction { void Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); protected: // Note: Instruction needs to be a friend here to call cloneImpl. @@ -521,21 +526,17 @@ class AtomicCmpXchgInst : public Instruction { AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, - Instruction *InsertBefore = nullptr); + SyncScope::ID SSID, Instruction *InsertBefore = nullptr); AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, - BasicBlock *InsertAtEnd); + SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly three operands void *operator new(size_t s) { return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Return true if this is a cmpxchg from a volatile memory /// location. /// @@ -563,7 +564,12 @@ class AtomicCmpXchgInst : public Instruction { /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - /// Set the ordering constraint on this cmpxchg. + /// Returns the success ordering constraint of this cmpxchg instruction. + AtomicOrdering getSuccessOrdering() const { + return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); + } + + /// Sets the success ordering constraint of this cmpxchg instruction. void setSuccessOrdering(AtomicOrdering Ordering) { assert(Ordering != AtomicOrdering::NotAtomic && "CmpXchg instructions can only be atomic."); @@ -571,6 +577,12 @@ class AtomicCmpXchgInst : public Instruction { ((unsigned)Ordering << 2)); } + /// Returns the failure ordering constraint of this cmpxchg instruction. + AtomicOrdering getFailureOrdering() const { + return AtomicOrdering((getSubclassDataFromInstruction() >> 5) & 7); + } + + /// Sets the failure ordering constraint of this cmpxchg instruction. void setFailureOrdering(AtomicOrdering Ordering) { assert(Ordering != AtomicOrdering::NotAtomic && "CmpXchg instructions can only be atomic."); @@ -578,28 +590,14 @@ class AtomicCmpXchgInst : public Instruction { ((unsigned)Ordering << 5)); } - /// Specify whether this cmpxchg is atomic and orders other operations with - /// respect to all concurrently executing threads, or only with respect to - /// signal handlers executing in the same thread. - void setSynchScope(SynchronizationScope SynchScope) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) | - (SynchScope << 1)); - } - - /// Returns the ordering constraint on this cmpxchg. - AtomicOrdering getSuccessOrdering() const { - return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); - } - - /// Returns the ordering constraint on this cmpxchg. - AtomicOrdering getFailureOrdering() const { - return AtomicOrdering((getSubclassDataFromInstruction() >> 5) & 7); + /// Returns the synchronization scope ID of this cmpxchg instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Returns whether this cmpxchg is atomic between threads or only within a - /// single thread. - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1); + /// Sets the synchronization scope ID of this cmpxchg instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } Value *getPointerOperand() { return getOperand(0); } @@ -641,10 +639,10 @@ class AtomicCmpXchgInst : public Instruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::AtomicCmpXchg; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -654,6 +652,11 @@ class AtomicCmpXchgInst : public Instruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this cmpxchg instruction. Not quite + /// enough room in SubClassData for everything, so synchronization scope ID + /// gets its own field. + SyncScope::ID SSID; }; template <> @@ -713,10 +716,10 @@ class AtomicRMWInst : public Instruction { }; AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, - AtomicOrdering Ordering, SynchronizationScope SynchScope, + AtomicOrdering Ordering, SyncScope::ID SSID, Instruction *InsertBefore = nullptr); AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, - AtomicOrdering Ordering, SynchronizationScope SynchScope, + AtomicOrdering Ordering, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly two operands @@ -724,8 +727,6 @@ class AtomicRMWInst : public Instruction { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - BinOp getOperation() const { return static_cast(getSubclassDataFromInstruction() >> 5); } @@ -752,7 +753,12 @@ class AtomicRMWInst : public Instruction { /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - /// Set the ordering constraint on this RMW. + /// Returns the ordering constraint of this rmw instruction. + AtomicOrdering getOrdering() const { + return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); + } + + /// Sets the ordering constraint of this rmw instruction. void setOrdering(AtomicOrdering Ordering) { assert(Ordering != AtomicOrdering::NotAtomic && "atomicrmw instructions can only be atomic."); @@ -760,23 +766,14 @@ class AtomicRMWInst : public Instruction { ((unsigned)Ordering << 2)); } - /// Specify whether this RMW orders other operations with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope SynchScope) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) | - (SynchScope << 1)); + /// Returns the synchronization scope ID of this rmw instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Returns the ordering constraint on this RMW. - AtomicOrdering getOrdering() const { - return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); - } - - /// Returns whether this RMW is atomic between threads or only within a - /// single thread. - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1); + /// Sets the synchronization scope ID of this rmw instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } Value *getPointerOperand() { return getOperand(0); } @@ -792,22 +789,27 @@ class AtomicRMWInst : public Instruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::AtomicRMW; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } private: void Init(BinOp Operation, Value *Ptr, Value *Val, - AtomicOrdering Ordering, SynchronizationScope SynchScope); + AtomicOrdering Ordering, SyncScope::ID SSID); // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this rmw instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; template <> @@ -836,10 +838,7 @@ class GetElementPtrInst : public Instruction { Type *SourceElementType; Type *ResultElementType; - void anchor() override; - GetElementPtrInst(const GetElementPtrInst &GEPI); - void init(Value *Ptr, ArrayRef IdxList, const Twine &NameStr); /// Constructors - Create a getelementptr instruction with a base pointer an /// list of indices. The first ctor can optionally insert before an existing @@ -852,6 +851,8 @@ class GetElementPtrInst : public Instruction { ArrayRef IdxList, unsigned Values, const Twine &NameStr, BasicBlock *InsertAtEnd); + void init(Value *Ptr, ArrayRef IdxList, const Twine &NameStr); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -1053,10 +1054,10 @@ class GetElementPtrInst : public Instruction { bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::GetElementPtr); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -1105,11 +1106,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value) /// must be identical types. /// Represent an integer comparison operator. class ICmpInst: public CmpInst { - void anchor() override; - void AssertOK() { - assert(getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && - getPredicate() <= CmpInst::LAST_ICMP_PREDICATE && + assert(isIntPredicate() && "Invalid ICmp predicate value"); assert(getOperand(0)->getType() == getOperand(1)->getType() && "Both operands to ICmp instruction are not of the same type!"); @@ -1233,10 +1231,10 @@ class ICmpInst: public CmpInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ICmp; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -1250,6 +1248,15 @@ class ICmpInst: public CmpInst { /// vectors of floating point values. The operands must be identical types. /// Represents a floating point comparison operator. class FCmpInst: public CmpInst { + void AssertOK() { + assert(isFPPredicate() && "Invalid FCmp predicate value"); + assert(getOperand(0)->getType() == getOperand(1)->getType() && + "Both operands to FCmp instruction are not of the same type!"); + // Check that the operands are the right type + assert(getOperand(0)->getType()->isFPOrFPVectorTy() && + "Invalid operand types for FCmp instruction"); + } + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -1268,13 +1275,7 @@ class FCmpInst: public CmpInst { ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::FCmp, pred, LHS, RHS, NameStr, InsertBefore) { - assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && - "Invalid FCmp predicate value"); - assert(getOperand(0)->getType() == getOperand(1)->getType() && - "Both operands to FCmp instruction are not of the same type!"); - // Check that the operands are the right type - assert(getOperand(0)->getType()->isFPOrFPVectorTy() && - "Invalid operand types for FCmp instruction"); + AssertOK(); } /// Constructor with insert-at-end semantics. @@ -1287,13 +1288,7 @@ class FCmpInst: public CmpInst { ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::FCmp, pred, LHS, RHS, NameStr, &InsertAtEnd) { - assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && - "Invalid FCmp predicate value"); - assert(getOperand(0)->getType() == getOperand(1)->getType() && - "Both operands to FCmp instruction are not of the same type!"); - // Check that the operands are the right type - assert(getOperand(0)->getType()->isFPOrFPVectorTy() && - "Invalid operand types for FCmp instruction"); + AssertOK(); } /// Constructor with no-insertion semantics @@ -1304,13 +1299,7 @@ class FCmpInst: public CmpInst { const Twine &NameStr = "" ///< Name of the instruction ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::FCmp, pred, LHS, RHS, NameStr) { - assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && - "Invalid FCmp predicate value"); - assert(getOperand(0)->getType() == getOperand(1)->getType() && - "Both operands to FCmp instruction are not of the same type!"); - // Check that the operands are the right type - assert(getOperand(0)->getType()->isFPOrFPVectorTy() && - "Invalid operand types for FCmp instruction"); + AssertOK(); } /// @returns true if the predicate of this instruction is EQ or NE. @@ -1349,10 +1338,10 @@ class FCmpInst: public CmpInst { } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::FCmp; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -1419,8 +1408,6 @@ class CallInst : public Instruction, CallInst *cloneImpl() const; public: - ~CallInst() override; - static CallInst *Create(Value *Func, ArrayRef Args, ArrayRef Bundles = None, const Twine &NameStr = "", @@ -1661,6 +1648,9 @@ class CallInst : public Instruction, /// Adds the attribute to the indicated argument void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// Adds the attribute to the indicated argument + void addParamAttr(unsigned ArgNo, Attribute Attr); + /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); @@ -1670,6 +1660,9 @@ class CallInst : public Instruction, /// Removes the attribute from the given argument void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// Removes the attribute from the given argument + void removeParamAttr(unsigned ArgNo, StringRef Kind); + /// adds the dereferenceable attribute to the list of attributes. void addDereferenceableAttr(unsigned i, uint64_t Bytes); @@ -1705,6 +1698,18 @@ class CallInst : public Instruction, return getAttributes().getAttribute(i, Kind); } + /// Get the attribute of a given kind from a given arg + Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + return getAttributes().getParamAttr(ArgNo, Kind); + } + + /// Get the attribute of a given kind from a given arg + Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + return getAttributes().getParamAttr(ArgNo, Kind); + } + /// Return true if the data operand at index \p i has the attribute \p /// A. /// @@ -1872,10 +1877,10 @@ class CallInst : public Instruction, } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Call; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -2010,10 +2015,10 @@ class SelectInst : public Instruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Select; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2056,10 +2061,10 @@ class VAArgInst : public UnaryInstruction { static unsigned getPointerOperandIndex() { return 0U; } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == VAArg; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2113,10 +2118,10 @@ class ExtractElementInst : public Instruction { DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ExtractElement; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2176,10 +2181,10 @@ class InsertElementInst : public Instruction { DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::InsertElement; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2275,10 +2280,10 @@ class ShuffleVectorInst : public Instruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ShuffleVector; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2301,6 +2306,7 @@ class ExtractValueInst : public UnaryInstruction { SmallVector Indices; ExtractValueInst(const ExtractValueInst &EVI); + /// Constructors - Create a extractvalue instruction with a base aggregate /// value and a list of indices. The first ctor can optionally insert before /// an existing instruction, the second appends the new instruction to the @@ -2313,9 +2319,6 @@ class ExtractValueInst : public UnaryInstruction { ArrayRef Idxs, const Twine &NameStr, BasicBlock *InsertAtEnd); - // allocate space for exactly one operand - void *operator new(size_t s) { return User::operator new(s, 1); } - void init(ArrayRef Idxs, const Twine &NameStr); protected: @@ -2346,7 +2349,8 @@ class ExtractValueInst : public UnaryInstruction { /// Null is returned if the indices are invalid for the specified type. static Type *getIndexedType(Type *Agg, ArrayRef Idxs); - typedef const unsigned* idx_iterator; + using idx_iterator = const unsigned*; + inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { @@ -2376,10 +2380,10 @@ class ExtractValueInst : public UnaryInstruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ExtractValue; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2449,8 +2453,6 @@ class InsertValueInst : public Instruction { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - static InsertValueInst *Create(Value *Agg, Value *Val, ArrayRef Idxs, const Twine &NameStr = "", @@ -2468,7 +2470,8 @@ class InsertValueInst : public Instruction { /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - typedef const unsigned* idx_iterator; + using idx_iterator = const unsigned*; + inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { @@ -2508,10 +2511,10 @@ class InsertValueInst : public Instruction { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::InsertValue; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2559,7 +2562,6 @@ class PHINode : public Instruction { unsigned ReservedSpace; PHINode(const PHINode &PN); - // allocate space for exactly zero operands explicit PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr = "", @@ -2578,12 +2580,6 @@ class PHINode : public Instruction { allocHungoffUses(ReservedSpace); } - void *operator new(size_t s) { - return User::operator new(s); - } - - void anchor() override; - protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -2598,8 +2594,6 @@ class PHINode : public Instruction { } public: - void *operator new(size_t, unsigned) = delete; - /// Constructors - NumReservedValues is a hint for the number of incoming /// edges that this phi node will have (use 0 if you really have no idea). static PHINode *Create(Type *Ty, unsigned NumReservedValues, @@ -2619,8 +2613,8 @@ class PHINode : public Instruction { // Block iterator interface. This provides access to the list of incoming // basic blocks, which parallels the list of incoming values. - typedef BasicBlock **block_iterator; - typedef BasicBlock * const *const_block_iterator; + using block_iterator = BasicBlock **; + using const_block_iterator = BasicBlock * const *; block_iterator block_begin() { Use::UserRef *ref = @@ -2669,9 +2663,11 @@ class PHINode : public Instruction { "All operands to PHI node must be the same type as the PHI node!"); setOperand(i, V); } + static unsigned getOperandNumForIncomingValue(unsigned i) { return i; } + static unsigned getIncomingValueNumForOperand(unsigned i) { return i; } @@ -2755,10 +2751,10 @@ class PHINode : public Instruction { bool hasConstantOrUndefValue() const; /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::PHI; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -2815,8 +2811,6 @@ class LandingPadInst : public Instruction { LandingPadInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - /// Constructors - NumReservedClauses is a hint for the number of incoming /// clauses that this landingpad will have (use 0 if you really have no idea). static LandingPadInst *Create(Type *RetTy, unsigned NumReservedClauses, @@ -2866,10 +2860,10 @@ class LandingPadInst : public Instruction { void reserveClauses(unsigned Size) { growOperands(Size); } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::LandingPad; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -2915,8 +2909,6 @@ class ReturnInst : public TerminatorInst { ReturnInst *cloneImpl() const; public: - ~ReturnInst() override; - static ReturnInst* Create(LLVMContext &C, Value *retVal = nullptr, Instruction *InsertBefore = nullptr) { return new(!!retVal) ReturnInst(C, retVal, InsertBefore); @@ -2942,18 +2934,23 @@ class ReturnInst : public TerminatorInst { unsigned getNumSuccessors() const { return 0; } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Ret); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); + + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("ReturnInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *B) { + llvm_unreachable("ReturnInst has no successors!"); + } }; template <> @@ -3053,18 +3050,12 @@ class BranchInst : public TerminatorInst { void swapSuccessors(); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Br); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); }; template <> @@ -3118,8 +3109,6 @@ class SwitchInst : public TerminatorInst { SwitchInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - // -2 static const unsigned DefaultPseudoIndex = static_cast(~0L-1); @@ -3138,7 +3127,7 @@ class SwitchInst : public TerminatorInst { protected: // Expose the switch type we're parameterized with to the iterator. - typedef SwitchInstT SwitchInstType; + using SwitchInstType = SwitchInstT; SwitchInstT *SI; ptrdiff_t Index; @@ -3179,8 +3168,8 @@ class SwitchInst : public TerminatorInst { } }; - typedef CaseHandleImpl - ConstCaseHandle; + using ConstCaseHandle = + CaseHandleImpl; class CaseHandle : public CaseHandleImpl { @@ -3207,7 +3196,7 @@ class SwitchInst : public TerminatorInst { : public iterator_facade_base, std::random_access_iterator_tag, CaseHandleT> { - typedef typename CaseHandleT::SwitchInstType SwitchInstT; + using SwitchInstT = typename CaseHandleT::SwitchInstType; CaseHandleT Case; @@ -3269,8 +3258,8 @@ class SwitchInst : public TerminatorInst { const CaseHandleT &operator*() const { return Case; } }; - typedef CaseIteratorImpl CaseIt; - typedef CaseIteratorImpl ConstCaseIt; + using CaseIt = CaseIteratorImpl; + using ConstCaseIt = CaseIteratorImpl; static SwitchInst *Create(Value *Value, BasicBlock *Default, unsigned NumCases, @@ -3418,18 +3407,12 @@ class SwitchInst : public TerminatorInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Switch; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); }; template <> @@ -3479,8 +3462,6 @@ class IndirectBrInst : public TerminatorInst { IndirectBrInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - static IndirectBrInst *Create(Value *Address, unsigned NumDests, Instruction *InsertBefore = nullptr) { return new IndirectBrInst(Address, NumDests, InsertBefore); @@ -3524,18 +3505,12 @@ class IndirectBrInst : public TerminatorInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::IndirectBr; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); }; template <> @@ -3656,6 +3631,7 @@ class InvokeInst : public TerminatorInst, return new (Values) InvokeInst(Func, IfNormal, IfException, Args, None, Values, NameStr, InsertAtEnd); } + static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, ArrayRef Bundles, @@ -4005,19 +3981,14 @@ class InvokeInst : public TerminatorInst, unsigned getNumSuccessors() const { return 2; } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Invoke); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } private: - friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); - template bool hasFnAttrImpl(AttrKind Kind) const { if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind)) return true; @@ -4105,18 +4076,23 @@ class ResumeInst : public TerminatorInst { unsigned getNumSuccessors() const { return 0; } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Resume; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); + + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("ResumeInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *NewSucc) { + llvm_unreachable("ResumeInst has no successors!"); + } }; template <> @@ -4168,8 +4144,6 @@ class CatchSwitchInst : public TerminatorInst { CatchSwitchInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumHandlers, const Twine &NameStr = "", @@ -4221,13 +4195,14 @@ class CatchSwitchInst : public TerminatorInst { } public: - typedef std::pointer_to_unary_function DerefFnTy; - typedef mapped_iterator handler_iterator; - typedef iterator_range handler_range; - typedef std::pointer_to_unary_function - ConstDerefFnTy; - typedef mapped_iterator const_handler_iterator; - typedef iterator_range const_handler_range; + using DerefFnTy = std::pointer_to_unary_function; + using handler_iterator = mapped_iterator; + using handler_range = iterator_range; + using ConstDerefFnTy = + std::pointer_to_unary_function; + using const_handler_iterator = + mapped_iterator; + using const_handler_range = iterator_range; /// Returns an iterator that points to the first handler in CatchSwitchInst. handler_iterator handler_begin() { @@ -4289,18 +4264,12 @@ class CatchSwitchInst : public TerminatorInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::CatchSwitch; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned Idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned Idx, BasicBlock *B); }; template <> @@ -4341,10 +4310,10 @@ class CleanupPadInst : public FuncletPadInst { } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::CleanupPad; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4391,10 +4360,10 @@ class CatchPadInst : public FuncletPadInst { } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::CatchPad; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4455,18 +4424,25 @@ class CatchReturnInst : public TerminatorInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::CatchRet); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned Idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned Idx, BasicBlock *B); + + BasicBlock *getSuccessor(unsigned Idx) const { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + return getSuccessor(); + } + + void setSuccessor(unsigned Idx, BasicBlock *B) { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + setSuccessor(B); + } }; template <> @@ -4544,18 +4520,25 @@ class CleanupReturnInst : public TerminatorInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::CleanupRet); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned Idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned Idx, BasicBlock *B); + + BasicBlock *getSuccessor(unsigned Idx) const { + assert(Idx == 0); + return getUnwindDest(); + } + + void setSuccessor(unsigned Idx, BasicBlock *B) { + assert(Idx == 0); + setUnwindDest(B); + } // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -4595,23 +4578,26 @@ class UnreachableInst : public TerminatorInst { return User::operator new(s, 0); } - void *operator new(size_t, unsigned) = delete; - unsigned getNumSuccessors() const { return 0; } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Unreachable; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); + + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("UnreachableInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *B) { + llvm_unreachable("UnreachableInst has no successors!"); + } }; //===----------------------------------------------------------------------===// @@ -4645,10 +4631,10 @@ class TruncInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Trunc; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4684,10 +4670,10 @@ class ZExtInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == ZExt; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4723,10 +4709,10 @@ class SExtInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == SExt; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4762,10 +4748,10 @@ class FPTruncInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == FPTrunc; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4801,10 +4787,10 @@ class FPExtInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == FPExt; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4840,10 +4826,10 @@ class UIToFPInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == UIToFP; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4879,10 +4865,10 @@ class SIToFPInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == SIToFP; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4918,10 +4904,10 @@ class FPToUIInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == FPToUI; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -4957,10 +4943,10 @@ class FPToSIInst : public CastInst { ); /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == FPToSI; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -5000,10 +4986,10 @@ class IntToPtrInst : public CastInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == IntToPtr; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -5051,10 +5037,10 @@ class PtrToIntInst : public CastInst { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == PtrToInt; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -5090,10 +5076,10 @@ class BitCastInst : public CastInst { ); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == BitCast; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -5130,10 +5116,10 @@ class AddrSpaceCastInst : public CastInst { ); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == AddrSpaceCast; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } diff --git a/interpreter/llvm/src/include/llvm/IR/IntrinsicInst.h b/interpreter/llvm/src/include/llvm/IR/IntrinsicInst.h index 05e3315cbab2f..f55d17ec72c8a 100644 --- a/interpreter/llvm/src/include/llvm/IR/IntrinsicInst.h +++ b/interpreter/llvm/src/include/llvm/IR/IntrinsicInst.h @@ -53,12 +53,12 @@ namespace llvm { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const CallInst *I) { + static bool classof(const CallInst *I) { if (const Function *CF = I->getCalledFunction()) return CF->isIntrinsic(); return false; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -72,7 +72,7 @@ namespace llvm { Value *getVariableLocation(bool AllowNullOp = true) const; // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::dbg_declare: case Intrinsic::dbg_value: @@ -80,7 +80,7 @@ namespace llvm { default: return false; } } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -107,10 +107,10 @@ namespace llvm { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::dbg_declare; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -144,10 +144,10 @@ namespace llvm { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::dbg_value; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -171,46 +171,294 @@ namespace llvm { ebStrict }; + bool isUnaryOp() const; RoundingMode getRoundingMode() const; ExceptionBehavior getExceptionBehavior() const; // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::experimental_constrained_fadd: case Intrinsic::experimental_constrained_fsub: case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: return true; default: return false; } } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; /// This class represents atomic memcpy intrinsic - /// TODO: Integrate this class into MemIntrinsic hierarchy. - class ElementAtomicMemCpyInst : public IntrinsicInst { + /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is + /// C&P of all methods from that hierarchy + class ElementUnorderedAtomicMemCpyInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_SOURCE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + + public: + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + /// Return the arguments to the instruction. + Value *getRawSource() const { + return const_cast(getArgOperand(ARG_SOURCE)); + } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } + + bool isVolatile() const { return false; } + + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } + + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + /// This is just like getRawSource, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getSource() const { return getRawSource()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } + + unsigned getSourceAddressSpace() const { + return cast(getRawSource()->getType())->getAddressSpace(); + } + + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setSource(Value *Ptr) { + assert(getRawSource()->getType() == Ptr->getType() && + "setSource called with pointer of wrong type!"); + setArgOperand(ARG_SOURCE, Ptr); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } + + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); + } + + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memcpy_element_unordered_atomic; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + + class ElementUnorderedAtomicMemMoveInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_SOURCE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + + public: + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + /// Return the arguments to the instruction. + Value *getRawSource() const { + return const_cast(getArgOperand(ARG_SOURCE)); + } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } + + bool isVolatile() const { return false; } + + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } + + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + /// This is just like getRawSource, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getSource() const { return getRawSource()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } + + unsigned getSourceAddressSpace() const { + return cast(getRawSource()->getType())->getAddressSpace(); + } + + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setSource(Value *Ptr) { + assert(getRawSource()->getType() == Ptr->getType() && + "setSource called with pointer of wrong type!"); + setArgOperand(ARG_SOURCE, Ptr); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } + + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); + } + + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memmove_element_unordered_atomic; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + + /// This class represents atomic memset intrinsic + /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is + /// C&P of all methods from that hierarchy + class ElementUnorderedAtomicMemSetInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_VALUE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + public: - Value *getRawDest() const { return getArgOperand(0); } - Value *getRawSource() const { return getArgOperand(1); } + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + Value *getValue() const { return const_cast(getArgOperand(ARG_VALUE)); } + const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); } + Use &getValueUse() { return getArgOperandUse(ARG_VALUE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } - Value *getNumElements() const { return getArgOperand(2); } - void setNumElements(Value *V) { setArgOperand(2, V); } + bool isVolatile() const { return false; } - uint64_t getSrcAlignment() const { return getParamAlignment(0); } - uint64_t getDstAlignment() const { return getParamAlignment(1); } + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } - uint64_t getElementSizeInBytes() const { - Value *Arg = getArgOperand(3); - return cast(Arg)->getZExtValue(); + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } + + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setValue(Value *Val) { + assert(getValue()->getType() == Val->getType() && + "setValue called with value of wrong type!"); + setArgOperand(ARG_VALUE, Val); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } + + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); } static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memcpy_element_atomic; + return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic; } static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); @@ -279,7 +527,7 @@ namespace llvm { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::memcpy: case Intrinsic::memmove: @@ -288,7 +536,7 @@ namespace llvm { default: return false; } } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -308,10 +556,10 @@ namespace llvm { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::memset; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -340,11 +588,11 @@ namespace llvm { } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::memcpy || I->getIntrinsicID() == Intrinsic::memmove; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -353,10 +601,10 @@ namespace llvm { class MemCpyInst : public MemTransferInst { public: // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::memcpy; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -365,10 +613,10 @@ namespace llvm { class MemMoveInst : public MemTransferInst { public: // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::memmove; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -376,10 +624,10 @@ namespace llvm { /// This represents the llvm.va_start intrinsic. class VAStartInst : public IntrinsicInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::vastart; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -389,10 +637,10 @@ namespace llvm { /// This represents the llvm.va_end intrinsic. class VAEndInst : public IntrinsicInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::vaend; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -402,10 +650,10 @@ namespace llvm { /// This represents the llvm.va_copy intrinsic. class VACopyInst : public IntrinsicInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::vacopy; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -416,10 +664,10 @@ namespace llvm { /// This represents the llvm.instrprof_increment intrinsic. class InstrProfIncrementInst : public IntrinsicInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_increment; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -445,10 +693,10 @@ namespace llvm { class InstrProfIncrementInstStep : public InstrProfIncrementInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_increment_step; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -456,10 +704,10 @@ namespace llvm { /// This represents the llvm.instrprof_value_profile intrinsic. class InstrProfValueProfileInst : public IntrinsicInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_value_profile; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } diff --git a/interpreter/llvm/src/include/llvm/IR/Intrinsics.td b/interpreter/llvm/src/include/llvm/IR/Intrinsics.td index 19f6045568f49..14c88e5194356 100644 --- a/interpreter/llvm/src/include/llvm/IR/Intrinsics.td +++ b/interpreter/llvm/src/include/llvm/IR/Intrinsics.td @@ -489,8 +489,64 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + + // These intrinsics are sensitive to the rounding mode so we need constrained + // versions of each of them. When strict rounding and exception control are + // not required the non-constrained versions of these intrinsics should be + // used. + def int_experimental_constrained_sqrt : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_powi : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_i32_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_sin : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_cos : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_pow : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log10: Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log2 : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_exp2 : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_rint : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_nearbyint : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; } -// FIXME: Add intrinsic for fcmp, fptrunc, fpext, fptoui and fptosi. +// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi. +// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round? //===------------------------- Expect Intrinsics --------------------------===// @@ -806,11 +862,32 @@ def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], //===------ Memory intrinsics with element-wise atomicity guarantees ------===// // -def int_memcpy_element_atomic : Intrinsic<[], - [llvm_anyptr_ty, llvm_anyptr_ty, - llvm_i64_ty, llvm_i32_ty], - [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, - WriteOnly<0>, ReadOnly<1>]>; +// @llvm.memcpy.element.unordered.atomic.*(dest, src, length, elementsize) +def int_memcpy_element_unordered_atomic + : Intrinsic<[], + [ + llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty + ], + [ + IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + ReadOnly<1> + ]>; + +// @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize) +def int_memmove_element_unordered_atomic + : Intrinsic<[], + [ + llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty + ], + [ + IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + ReadOnly<1> + ]>; + +// @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) +def int_memset_element_unordered_atomic + : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], + [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>; //===------------------------ Reduction Intrinsics ------------------------===// // diff --git a/interpreter/llvm/src/include/llvm/IR/IntrinsicsAMDGPU.td b/interpreter/llvm/src/include/llvm/IR/IntrinsicsAMDGPU.td index d7413fe9e56f8..4e0529a32d29d 100644 --- a/interpreter/llvm/src/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/interpreter/llvm/src/include/llvm/IR/IntrinsicsAMDGPU.td @@ -475,6 +475,33 @@ class AMDGPUBufferStore : Intrinsic < def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; +def int_amdgcn_tbuffer_load : Intrinsic < + [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 + [llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // voffset(VGPR) + llvm_i32_ty, // soffset(SGPR) + llvm_i32_ty, // offset(imm) + llvm_i32_ty, // dfmt(imm) + llvm_i32_ty, // nfmt(imm) + llvm_i1_ty, // glc(imm) + llvm_i1_ty], // slc(imm) + []>; + +def int_amdgcn_tbuffer_store : Intrinsic < + [], + [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 + llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // voffset(VGPR) + llvm_i32_ty, // soffset(SGPR) + llvm_i32_ty, // offset(imm) + llvm_i32_ty, // dfmt(imm) + llvm_i32_ty, // nfmt(imm) + llvm_i1_ty, // glc(imm) + llvm_i1_ty], // slc(imm) + []>; + class AMDGPUBufferAtomic : Intrinsic < [llvm_i32_ty], [llvm_i32_ty, // vdata(VGPR) @@ -566,6 +593,16 @@ def int_amdgcn_s_getreg : [IntrReadMem, IntrSpeculatable] >; +// int_amdgcn_s_getpc is provided to allow a specific style of position +// independent code to determine the high part of its address when it is +// known (through convention) that the code and any data of interest does +// not cross a 4Gb address boundary. Use for any other purpose may not +// produce the desired results as optimizations may cause code movement, +// especially as we explicitly use IntrNoMem to allow optimizations. +def int_amdgcn_s_getpc : + GCCBuiltin<"__builtin_amdgcn_s_getpc">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; + // __builtin_amdgcn_interp_mov , , , // param values: 0 = P10, 1 = P20, 2 = P0 def int_amdgcn_interp_mov : @@ -693,6 +730,16 @@ def int_amdgcn_readlane : GCCBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; +def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + +def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/include/llvm/IR/IntrinsicsHexagon.td b/interpreter/llvm/src/include/llvm/IR/IntrinsicsHexagon.td index 8ac56e03be6a6..0982453447259 100644 --- a/interpreter/llvm/src/include/llvm/IR/IntrinsicsHexagon.td +++ b/interpreter/llvm/src/include/llvm/IR/IntrinsicsHexagon.td @@ -32,16 +32,6 @@ class Hexagon_qi_mem_Intrinsic : Hexagon_Intrinsic; - -// -// DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) -> -// Hexagon_void_si_Intrinsic -// -class Hexagon_void_si_Intrinsic - : Hexagon_Intrinsic; - // // DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) -> // Hexagon_hi_si_Intrinsic @@ -4959,11 +4949,25 @@ Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">; // def int_hexagon_S2_deinterleave : Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">; + // // BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1) // def int_hexagon_prefetch : -Hexagon_void_si_Intrinsic<"HEXAGON_prefetch">; +Hexagon_Intrinsic<"HEXAGON_prefetch", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dccleana : +Hexagon_Intrinsic<"HEXAGON_Y2_dccleana", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dccleaninva : +Hexagon_Intrinsic<"HEXAGON_Y2_dccleaninva", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dcinva : +Hexagon_Intrinsic<"HEXAGON_Y2_dcinva", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dczeroa : +Hexagon_Intrinsic<"HEXAGON_Y2_dczeroa", [], [llvm_ptr_ty], + [IntrWriteMem, IntrArgMemOnly, IntrHasSideEffects]>; +def int_hexagon_Y4_l2fetch : +Hexagon_Intrinsic<"HEXAGON_Y4_l2fetch", [], [llvm_ptr_ty, llvm_i32_ty], []>; +def int_hexagon_Y5_l2fetch : +Hexagon_Intrinsic<"HEXAGON_Y5_l2fetch", [], [llvm_ptr_ty, llvm_i64_ty], []>; def llvm_ptr32_ty : LLVMPointerType; def llvm_ptr64_ty : LLVMPointerType; diff --git a/interpreter/llvm/src/include/llvm/IR/IntrinsicsPowerPC.td b/interpreter/llvm/src/include/llvm/IR/IntrinsicsPowerPC.td index 64240a9297826..6321bb81b8cbc 100644 --- a/interpreter/llvm/src/include/llvm/IR/IntrinsicsPowerPC.td +++ b/interpreter/llvm/src/include/llvm/IR/IntrinsicsPowerPC.td @@ -1132,4 +1132,6 @@ def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">, def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">, Intrinsic<[llvm_i64_ty], [], []>; + +def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>; } diff --git a/interpreter/llvm/src/include/llvm/IR/IntrinsicsSystemZ.td b/interpreter/llvm/src/include/llvm/IR/IntrinsicsSystemZ.td index 9be37d3645b22..98065bc51d992 100644 --- a/interpreter/llvm/src/include/llvm/IR/IntrinsicsSystemZ.td +++ b/interpreter/llvm/src/include/llvm/IR/IntrinsicsSystemZ.td @@ -373,6 +373,49 @@ let TargetPrefix = "s390" in { def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + + // Instructions from the Vector Enhancements Facility 1 + def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty, + llvm_v16i8_ty>; + + def int_s390_vmslg : GCCBuiltin<"__builtin_s390_vmslg">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_s390_vfmaxdb : Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_s390_vfcesbs : SystemZBinaryConvCC; + def int_s390_vfchsbs : SystemZBinaryConvCC; + def int_s390_vfchesbs : SystemZBinaryConvCC; + + def int_s390_vftcisb : SystemZBinaryConvIntCC; + + def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + + // Instructions from the Vector Packed Decimal Facility + def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">, + Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + + def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">, + Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], + // In fact write-only but there's no property + // for that. + [IntrArgMemOnly]>; } //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/include/llvm/IR/IntrinsicsWebAssembly.td b/interpreter/llvm/src/include/llvm/IR/IntrinsicsWebAssembly.td index 3a0957dfa39bc..640ef627bc466 100644 --- a/interpreter/llvm/src/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/interpreter/llvm/src/include/llvm/IR/IntrinsicsWebAssembly.td @@ -19,4 +19,8 @@ let TargetPrefix = "wasm" in { // All intrinsics start with "llvm.wasm.". def int_wasm_current_memory : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>; def int_wasm_grow_memory : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>; +// Exception handling intrinsics +def int_wasm_throw: Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [Throws]>; +def int_wasm_rethrow: Intrinsic<[], [], [Throws]>; + } diff --git a/interpreter/llvm/src/include/llvm/IR/IntrinsicsX86.td b/interpreter/llvm/src/include/llvm/IR/IntrinsicsX86.td index 1c466e73eb1bb..80c528768dc7f 100644 --- a/interpreter/llvm/src/include/llvm/IR/IntrinsicsX86.td +++ b/interpreter/llvm/src/include/llvm/IR/IntrinsicsX86.td @@ -5420,86 +5420,6 @@ let TargetPrefix = "x86" in { def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_cmp_b_512: - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, - llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_w_512: - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_d_512: - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, - llvm_i16_ty], [IntrNoMem ]>; - def int_x86_avx512_mask_cmp_q_512: - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_ucmp_b_512: - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, - llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_w_512: - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_d_512: - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_q_512: - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - - // 256-bit - def int_x86_avx512_mask_cmp_b_256: - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_w_256: - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_d_256: - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_q_256: - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_ucmp_b_256: - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_w_256: - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_d_256: - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_q_256: - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - - // 128-bit - def int_x86_avx512_mask_cmp_b_128: - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_w_128: - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_d_128: - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cmp_q_128: - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_ucmp_b_128: - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_w_128: - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_d_128: - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_ucmp_q_128: - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, - llvm_i8_ty], [IntrNoMem]>; } // Compress, Expand diff --git a/interpreter/llvm/src/include/llvm/IR/LLVMContext.h b/interpreter/llvm/src/include/llvm/IR/LLVMContext.h index ad011fb72e6a1..4cb77701f7620 100644 --- a/interpreter/llvm/src/include/llvm/IR/LLVMContext.h +++ b/interpreter/llvm/src/include/llvm/IR/LLVMContext.h @@ -42,6 +42,24 @@ class Output; } // end namespace yaml +namespace SyncScope { + +typedef uint8_t ID; + +/// Known synchronization scope IDs, which always have the same value. All +/// synchronization scope IDs that LLVM has special knowledge of are listed +/// here. Additionally, this scheme allows LLVM to efficiently check for +/// specific synchronization scope ID without comparing strings. +enum { + /// Synchronized with respect to signal handlers executing in the same thread. + SingleThread = 0, + + /// Synchronized with respect to all concurrently executing threads. + System = 1 +}; + +} // end namespace SyncScope + /// This is an important class for using LLVM in a threaded context. It /// (opaquely) owns and manages the core "global" data of LLVM's core /// infrastructure, including the type and constant uniquing tables. @@ -111,6 +129,16 @@ class LLVMContext { /// tag registered with an LLVMContext has an unique ID. uint32_t getOperandBundleTagID(StringRef Tag) const; + /// getOrInsertSyncScopeID - Maps synchronization scope name to + /// synchronization scope ID. Every synchronization scope registered with + /// LLVMContext has unique ID except pre-defined ones. + SyncScope::ID getOrInsertSyncScopeID(StringRef SSN); + + /// getSyncScopeNames - Populates client supplied SmallVector with + /// synchronization scope names registered with LLVMContext. Synchronization + /// scope names are ordered by increasing synchronization scope IDs. + void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// Define the GC for a function void setGC(const Function &Fn, std::string GCName); @@ -188,10 +216,19 @@ class LLVMContext { /// \brief Return if a code hotness metric should be included in optimization /// diagnostics. - bool getDiagnosticHotnessRequested() const; + bool getDiagnosticsHotnessRequested() const; /// \brief Set if a code hotness metric should be included in optimization /// diagnostics. - void setDiagnosticHotnessRequested(bool Requested); + void setDiagnosticsHotnessRequested(bool Requested); + + /// \brief Return the minimum hotness value a diagnostic would need in order + /// to be included in optimization diagnostics. If there is no minimum, this + /// returns None. + uint64_t getDiagnosticsHotnessThreshold() const; + + /// \brief Set the minimum hotness value a diagnostic needs in order to be + /// included in optimization diagnostics. + void setDiagnosticsHotnessThreshold(uint64_t Threshold); /// \brief Return the YAML file used by the backend to save optimization /// diagnostics. If null, diagnostics are not saved in a file but only diff --git a/interpreter/llvm/src/include/llvm/IR/LegacyPassManager.h b/interpreter/llvm/src/include/llvm/IR/LegacyPassManager.h index 5257a0eed488c..9a376a151505e 100644 --- a/interpreter/llvm/src/include/llvm/IR/LegacyPassManager.h +++ b/interpreter/llvm/src/include/llvm/IR/LegacyPassManager.h @@ -98,6 +98,9 @@ class FunctionPassManager : public PassManagerBase { // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_STDCXX_CONVERSION_FUNCTIONS(legacy::PassManagerBase, LLVMPassManagerRef) +/// If -time-passes has been specified, report the timings immediately and then +/// reset the timers to zero. +void reportAndResetTimings(); } // End llvm namespace #endif diff --git a/interpreter/llvm/src/include/llvm/IR/LegacyPassNameParser.h b/interpreter/llvm/src/include/llvm/IR/LegacyPassNameParser.h index fd9d468b06cba..4cec081964081 100644 --- a/interpreter/llvm/src/include/llvm/IR/LegacyPassNameParser.h +++ b/interpreter/llvm/src/include/llvm/IR/LegacyPassNameParser.h @@ -81,15 +81,15 @@ class PassNameParser : public PassRegistrationListener, // default implementation to sort the table before we print... void printOptionInfo(const cl::Option &O, size_t GlobalWidth) const override { PassNameParser *PNP = const_cast(this); - array_pod_sort(PNP->Values.begin(), PNP->Values.end(), ValLessThan); + array_pod_sort(PNP->Values.begin(), PNP->Values.end(), ValCompare); cl::parser::printOptionInfo(O, GlobalWidth); } private: - // ValLessThan - Provide a sorting comparator for Values elements... - static int ValLessThan(const PassNameParser::OptionInfo *VT1, - const PassNameParser::OptionInfo *VT2) { - return VT1->Name < VT2->Name; + // ValCompare - Provide a sorting comparator for Values elements... + static int ValCompare(const PassNameParser::OptionInfo *VT1, + const PassNameParser::OptionInfo *VT2) { + return VT1->Name.compare(VT2->Name); } }; diff --git a/interpreter/llvm/src/include/llvm/IR/Metadata.h b/interpreter/llvm/src/include/llvm/IR/Metadata.h index 8f24a6a1d69d8..3462cc02fd27f 100644 --- a/interpreter/llvm/src/include/llvm/IR/Metadata.h +++ b/interpreter/llvm/src/include/llvm/IR/Metadata.h @@ -19,18 +19,18 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Constant.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include #include @@ -45,6 +45,8 @@ namespace llvm { class Module; class ModuleSlotTracker; +class raw_ostream; +class Type; enum LLVMConstants : uint32_t { DEBUG_METADATA_VERSION = 3 // Current debug info version number. @@ -67,8 +69,8 @@ class Metadata { unsigned char Storage; // TODO: expose remaining bits to subclasses. - unsigned short SubclassData16; - unsigned SubclassData32; + unsigned short SubclassData16 = 0; + unsigned SubclassData32 = 0; public: enum MetadataKind { @@ -78,7 +80,7 @@ class Metadata { protected: Metadata(unsigned ID, StorageType Storage) - : SubclassID(ID), Storage(Storage), SubclassData16(0), SubclassData32(0) { + : SubclassID(ID), Storage(Storage) { static_assert(sizeof(*this) == 8, "Metadata fields poorly packed"); } @@ -174,14 +176,16 @@ class MetadataAsValue : public Value { Metadata *MD; MetadataAsValue(Type *Ty, Metadata *MD); - ~MetadataAsValue() override; /// \brief Drop use of metadata (during teardown). void dropUse() { MD = nullptr; } public: + ~MetadataAsValue(); + static MetadataAsValue *get(LLVMContext &Context, Metadata *MD); static MetadataAsValue *getIfExists(LLVMContext &Context, Metadata *MD); + Metadata *getMetadata() const { return MD; } static bool classof(const Value *V) { @@ -256,7 +260,7 @@ class MetadataTracking { /// \brief Check whether metadata is replaceable. static bool isReplaceable(const Metadata &MD); - typedef PointerUnion OwnerTy; + using OwnerTy = PointerUnion; private: /// \brief Track a reference to metadata for an owner. @@ -274,7 +278,7 @@ class ReplaceableMetadataImpl { friend class MetadataTracking; public: - typedef MetadataTracking::OwnerTy OwnerTy; + using OwnerTy = MetadataTracking::OwnerTy; private: LLVMContext &Context; @@ -351,17 +355,21 @@ class ValueAsMetadata : public Metadata, ReplaceableMetadataImpl { public: static ValueAsMetadata *get(Value *V); + static ConstantAsMetadata *getConstant(Value *C) { return cast(get(C)); } + static LocalAsMetadata *getLocal(Value *Local) { return cast(get(Local)); } static ValueAsMetadata *getIfExists(Value *V); + static ConstantAsMetadata *getConstantIfExists(Value *C) { return cast_or_null(getIfExists(C)); } + static LocalAsMetadata *getLocalIfExists(Value *Local) { return cast_or_null(getIfExists(Local)); } @@ -490,8 +498,8 @@ namespace detail { template T &make(); template struct HasDereference { - typedef char Yes[1]; - typedef char No[2]; + using Yes = char[1]; + using No = char[2]; template struct SFINAE {}; template @@ -612,7 +620,7 @@ class MDString : public Metadata { unsigned getLength() const { return (unsigned)getString().size(); } - typedef StringRef::iterator iterator; + using iterator = StringRef::iterator; /// \brief Pointer to the first byte of the string. iterator begin() const { return getString().begin(); } @@ -652,6 +660,19 @@ struct AAMDNodes { /// \brief The tag specifying the noalias scope. MDNode *NoAlias; + + /// \brief Given two sets of AAMDNodes that apply to the same pointer, + /// give the best AAMDNodes that are compatible with both (i.e. a set of + /// nodes whose allowable aliasing conclusions are a subset of those + /// allowable by both of the inputs). However, for efficiency + /// reasons, do not create any new MDNodes. + AAMDNodes intersect(const AAMDNodes &Other) { + AAMDNodes Result; + Result.TBAA = Other.TBAA == TBAA ? TBAA : nullptr; + Result.Scope = Other.Scope == Scope ? Scope : nullptr; + Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr; + return Result; + } }; // Specialize DenseMapInfo for AAMDNodes. @@ -729,12 +750,14 @@ class MDOperand { }; template <> struct simplify_type { - typedef Metadata *SimpleType; + using SimpleType = Metadata *; + static SimpleType getSimplifiedValue(MDOperand &MD) { return MD.get(); } }; template <> struct simplify_type { - typedef Metadata *SimpleType; + using SimpleType = Metadata *; + static SimpleType getSimplifiedValue(const MDOperand &MD) { return MD.get(); } }; @@ -816,7 +839,7 @@ struct TempMDNodeDeleter { }; #define HANDLE_MDNODE_LEAF(CLASS) \ - typedef std::unique_ptr Temp##CLASS; + using Temp##CLASS = std::unique_ptr; #define HANDLE_MDNODE_BRANCH(CLASS) HANDLE_MDNODE_LEAF(CLASS) #include "llvm/IR/Metadata.def" @@ -846,6 +869,10 @@ class MDNode : public Metadata { ContextAndReplaceableUses Context; protected: + MDNode(LLVMContext &Context, unsigned ID, StorageType Storage, + ArrayRef Ops1, ArrayRef Ops2 = None); + ~MDNode() = default; + void *operator new(size_t Size, unsigned NumOps); void operator delete(void *Mem); @@ -859,16 +886,13 @@ class MDNode : public Metadata { llvm_unreachable("Constructor throws?"); } - MDNode(LLVMContext &Context, unsigned ID, StorageType Storage, - ArrayRef Ops1, ArrayRef Ops2 = None); - ~MDNode() = default; - void dropAllReferences(); MDOperand *mutable_begin() { return mutable_end() - NumOperands; } MDOperand *mutable_end() { return reinterpret_cast(this); } - typedef iterator_range mutable_op_range; + using mutable_op_range = iterator_range; + mutable_op_range mutable_operands() { return mutable_op_range(mutable_begin(), mutable_end()); } @@ -1027,8 +1051,8 @@ class MDNode : public Metadata { static void dispatchResetHash(NodeTy *, std::false_type) {} public: - typedef const MDOperand *op_iterator; - typedef iterator_range op_range; + using op_iterator = const MDOperand *; + using op_range = iterator_range; op_iterator op_begin() const { return const_cast(this)->mutable_begin(); @@ -1222,16 +1246,18 @@ template class MDTupleTypedArrayWrapper { // FIXME: Fix callers and remove condition on N. unsigned size() const { return N ? N->getNumOperands() : 0u; } + bool empty() const { return N ? N->getNumOperands() == 0 : true; } T *operator[](unsigned I) const { return cast_or_null(N->getOperand(I)); } // FIXME: Fix callers and remove condition on N. - typedef TypedMDOperandIterator iterator; + using iterator = TypedMDOperandIterator; + iterator begin() const { return N ? iterator(N->op_begin()) : iterator(); } iterator end() const { return N ? iterator(N->op_end()) : iterator(); } }; #define HANDLE_METADATA(CLASS) \ - typedef MDTupleTypedArrayWrapper CLASS##Array; + using CLASS##Array = MDTupleTypedArrayWrapper; #include "llvm/IR/Metadata.def" /// Placeholder metadata for operands of distinct MDNodes. @@ -1302,12 +1328,12 @@ class NamedMDNode : public ilist_node { template class op_iterator_impl : public std::iterator { + friend class NamedMDNode; + const NamedMDNode *Node = nullptr; unsigned Idx = 0; - op_iterator_impl(const NamedMDNode *N, unsigned i) : Node(N), Idx(i) { } - - friend class NamedMDNode; + op_iterator_impl(const NamedMDNode *N, unsigned i) : Node(N), Idx(i) {} public: op_iterator_impl() = default; @@ -1369,11 +1395,13 @@ class NamedMDNode : public ilist_node { // --------------------------------------------------------------------------- // Operand Iterator interface... // - typedef op_iterator_impl op_iterator; + using op_iterator = op_iterator_impl; + op_iterator op_begin() { return op_iterator(this, 0); } op_iterator op_end() { return op_iterator(this, getNumOperands()); } - typedef op_iterator_impl const_op_iterator; + using const_op_iterator = op_iterator_impl; + const_op_iterator op_begin() const { return const_op_iterator(this, 0); } const_op_iterator op_end() const { return const_op_iterator(this, getNumOperands()); } diff --git a/interpreter/llvm/src/include/llvm/IR/Module.h b/interpreter/llvm/src/include/llvm/IR/Module.h index 3024d9e27a2fc..196e32e3615c0 100644 --- a/interpreter/llvm/src/include/llvm/IR/Module.h +++ b/interpreter/llvm/src/include/llvm/IR/Module.h @@ -15,10 +15,11 @@ #ifndef LLVM_IR_MODULE_H #define LLVM_IR_MODULE_H -#include "llvm/ADT/iterator_range.h" +#include "llvm-c/Types.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DataLayout.h" @@ -30,7 +31,6 @@ #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/CodeGen.h" -#include "llvm-c/Types.h" #include #include #include @@ -139,9 +139,12 @@ class Module { /// during the append operation. AppendUnique = 6, + /// Takes the max of the two values, which are required to be integers. + Max = 7, + // Markers: ModFlagBehaviorFirstVal = Error, - ModFlagBehaviorLastVal = AppendUnique + ModFlagBehaviorLastVal = Max }; /// Checks if Metadata represents a valid ModFlagBehavior, and stores the @@ -246,7 +249,7 @@ class Module { /// when other randomness consuming passes are added or removed. In /// addition, the random stream will be reproducible across LLVM /// versions when the pass does not change. - RandomNumberGenerator *createRNG(const Pass* P) const; + std::unique_ptr createRNG(const Pass* P) const; /// @} /// @name Module Level Mutators diff --git a/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndex.h b/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndex.h index c46c609609e22..4aa8a0199ab15 100644 --- a/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndex.h +++ b/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndex.h @@ -18,13 +18,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Module.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" #include #include #include @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,13 @@ template struct MappingTraits; /// \brief Class to accumulate and hold information about a callee. struct CalleeInfo { - enum class HotnessType : uint8_t { Unknown = 0, Cold = 1, None = 2, Hot = 3 }; + enum class HotnessType : uint8_t { + Unknown = 0, + Cold = 1, + None = 2, + Hot = 3, + Critical = 4 + }; HotnessType Hotness = HotnessType::Unknown; CalleeInfo() = default; @@ -134,16 +141,18 @@ class GlobalValueSummary { /// be renamed or references something that can't be renamed). unsigned NotEligibleToImport : 1; - /// Indicate that the global value must be considered a live root for - /// index-based liveness analysis. Used for special LLVM values such as - /// llvm.global_ctors that the linker does not know about. - unsigned LiveRoot : 1; + /// In per-module summary, indicate that the global value must be considered + /// a live root for index-based liveness analysis. Used for special LLVM + /// values such as llvm.global_ctors that the linker does not know about. + /// + /// In combined summary, indicate that the global value is live. + unsigned Live : 1; /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, - bool NotEligibleToImport, bool LiveRoot) + bool NotEligibleToImport, bool Live) : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport), - LiveRoot(LiveRoot) {} + Live(Live) {} }; private: @@ -172,6 +181,8 @@ class GlobalValueSummary { /// are listed in the derived FunctionSummary object. std::vector RefEdgeList; + bool isLive() const { return Flags.Live; } + protected: GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector Refs) : Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {} @@ -213,19 +224,17 @@ class GlobalValueSummary { /// Return true if this global value can't be imported. bool notEligibleToImport() const { return Flags.NotEligibleToImport; } - /// Return true if this global value must be considered a root for live - /// value analysis on the index. - bool liveRoot() const { return Flags.LiveRoot; } - - /// Flag that this global value must be considered a root for live - /// value analysis on the index. - void setLiveRoot() { Flags.LiveRoot = true; } + void setLive(bool Live) { Flags.Live = Live; } /// Flag that this global value cannot be imported. void setNotEligibleToImport() { Flags.NotEligibleToImport = true; } /// Return the list of values referenced by this global value definition. ArrayRef refs() const { return RefEdgeList; } + + friend class ModuleSummaryIndex; + friend void computeDeadSymbols(class ModuleSummaryIndex &, + const DenseSet &); }; /// \brief Alias summary information. @@ -513,7 +522,7 @@ using ModulePathStringTableTy = StringMap>; /// Map of global value GUID to its summary, used to identify values defined in /// a particular module, and provide efficient access to their summary. -using GVSummaryMapTy = std::map; +using GVSummaryMapTy = DenseMap; /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. @@ -535,6 +544,14 @@ class ModuleSummaryIndex { /// GUIDs, it will be mapped to 0. std::map OidGuidMap; + /// Indicates that summary-based GlobalValue GC has run, and values with + /// GVFlags::Live==false are really dead. Otherwise, all values must be + /// considered live. + bool WithGlobalValueDeadStripping = false; + + std::set CfiFunctionDefs; + std::set CfiFunctionDecls; + // YAML I/O support. friend yaml::MappingTraits; @@ -550,6 +567,18 @@ class ModuleSummaryIndex { const_gvsummary_iterator end() const { return GlobalValueMap.end(); } size_t size() const { return GlobalValueMap.size(); } + bool withGlobalValueDeadStripping() const { + return WithGlobalValueDeadStripping; + } + void setWithGlobalValueDeadStripping() { + WithGlobalValueDeadStripping = true; + } + + bool isGlobalValueLive(const GlobalValueSummary *GVS) const { + return !WithGlobalValueDeadStripping || GVS->isLive(); + } + bool isGUIDLive(GlobalValue::GUID GUID) const; + /// Return a ValueInfo for GUID if it exists, otherwise return ValueInfo(). ValueInfo getValueInfo(GlobalValue::GUID GUID) const { auto I = GlobalValueMap.find(GUID); @@ -574,6 +603,12 @@ class ModuleSummaryIndex { return I == OidGuidMap.end() ? 0 : I->second; } + std::set &cfiFunctionDefs() { return CfiFunctionDefs; } + const std::set &cfiFunctionDefs() const { return CfiFunctionDefs; } + + std::set &cfiFunctionDecls() { return CfiFunctionDecls; } + const std::set &cfiFunctionDecls() const { return CfiFunctionDecls; } + /// Add a global value summary for a value of the given name. void addGlobalValueSummary(StringRef ValueName, std::unique_ptr Summary) { @@ -673,14 +708,13 @@ class ModuleSummaryIndex { return Pair.first; } - /// Add a new module path with the given \p Hash, mapped to the given \p - /// ModID, and return an iterator to the entry in the index. - ModulePathStringTableTy::iterator - addModulePath(StringRef ModPath, uint64_t ModId, - ModuleHash Hash = ModuleHash{{0}}) { - return ModulePathStringTable.insert(std::make_pair( - ModPath, - std::make_pair(ModId, Hash))).first; + typedef ModulePathStringTableTy::value_type ModuleInfo; + + /// Add a new module with the given \p Hash, mapped to the given \p + /// ModID, and return a reference to the module. + ModuleInfo *addModule(StringRef ModPath, uint64_t ModId, + ModuleHash Hash = ModuleHash{{0}}) { + return &*ModulePathStringTable.insert({ModPath, {ModId, Hash}}).first; } /// Check if the given Module has any functions available for exporting diff --git a/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndexYAML.h b/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndexYAML.h index 78fdb602027d6..7f6cb5bee5a62 100644 --- a/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/interpreter/llvm/src/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -128,6 +128,8 @@ template <> struct MappingTraits { }; struct FunctionSummaryYaml { + unsigned Linkage; + bool NotEligibleToImport, Live; std::vector TypeTests; std::vector TypeTestAssumeVCalls, TypeCheckedLoadVCalls; @@ -138,8 +140,6 @@ struct FunctionSummaryYaml { } // End yaml namespace } // End llvm namespace -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint64_t) - namespace llvm { namespace yaml { @@ -168,6 +168,9 @@ namespace yaml { template <> struct MappingTraits { static void mapping(IO &io, FunctionSummaryYaml& summary) { + io.mapOptional("Linkage", summary.Linkage); + io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); + io.mapOptional("Live", summary.Live); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); io.mapOptional("TypeCheckedLoadVCalls", summary.TypeCheckedLoadVCalls); @@ -199,12 +202,12 @@ template <> struct CustomMappingTraits { } auto &Elem = V[KeyInt]; for (auto &FSum : FSums) { - GlobalValueSummary::GVFlags GVFlags(GlobalValue::ExternalLinkage, false, - false); Elem.SummaryList.push_back(llvm::make_unique( - GVFlags, 0, ArrayRef{}, - ArrayRef{}, std::move(FSum.TypeTests), - std::move(FSum.TypeTestAssumeVCalls), + GlobalValueSummary::GVFlags( + static_cast(FSum.Linkage), + FSum.NotEligibleToImport, FSum.Live), + 0, ArrayRef{}, ArrayRef{}, + std::move(FSum.TypeTests), std::move(FSum.TypeTestAssumeVCalls), std::move(FSum.TypeCheckedLoadVCalls), std::move(FSum.TypeTestAssumeConstVCalls), std::move(FSum.TypeCheckedLoadConstVCalls))); @@ -216,8 +219,10 @@ template <> struct CustomMappingTraits { for (auto &Sum : P.second.SummaryList) { if (auto *FSum = dyn_cast(Sum.get())) FSums.push_back(FunctionSummaryYaml{ - FSum->type_tests(), FSum->type_test_assume_vcalls(), - FSum->type_checked_load_vcalls(), + FSum->flags().Linkage, + static_cast(FSum->flags().NotEligibleToImport), + static_cast(FSum->flags().Live), FSum->type_tests(), + FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); } @@ -231,6 +236,25 @@ template <> struct MappingTraits { static void mapping(IO &io, ModuleSummaryIndex& index) { io.mapOptional("GlobalValueMap", index.GlobalValueMap); io.mapOptional("TypeIdMap", index.TypeIdMap); + io.mapOptional("WithGlobalValueDeadStripping", + index.WithGlobalValueDeadStripping); + + if (io.outputting()) { + std::vector CfiFunctionDefs(index.CfiFunctionDefs.begin(), + index.CfiFunctionDefs.end()); + io.mapOptional("CfiFunctionDefs", CfiFunctionDefs); + std::vector CfiFunctionDecls(index.CfiFunctionDecls.begin(), + index.CfiFunctionDecls.end()); + io.mapOptional("CfiFunctionDecls", CfiFunctionDecls); + } else { + std::vector CfiFunctionDefs; + io.mapOptional("CfiFunctionDefs", CfiFunctionDefs); + index.CfiFunctionDefs = {CfiFunctionDefs.begin(), CfiFunctionDefs.end()}; + std::vector CfiFunctionDecls; + io.mapOptional("CfiFunctionDecls", CfiFunctionDecls); + index.CfiFunctionDecls = {CfiFunctionDecls.begin(), + CfiFunctionDecls.end()}; + } } }; diff --git a/interpreter/llvm/src/include/llvm/IR/OperandTraits.h b/interpreter/llvm/src/include/llvm/IR/OperandTraits.h index e97a8009ccc03..c618aff3df9ae 100644 --- a/interpreter/llvm/src/include/llvm/IR/OperandTraits.h +++ b/interpreter/llvm/src/include/llvm/IR/OperandTraits.h @@ -30,6 +30,9 @@ namespace llvm { template struct FixedNumOperandTraits { static Use *op_begin(SubClass* U) { + static_assert( + !std::is_polymorphic::value, + "adding virtual methods to subclasses of User breaks use lists"); return reinterpret_cast(U) - ARITY; } static Use *op_end(SubClass* U) { @@ -65,6 +68,9 @@ struct OptionalOperandTraits : public FixedNumOperandTraits { template struct VariadicOperandTraits { static Use *op_begin(SubClass* U) { + static_assert( + !std::is_polymorphic::value, + "adding virtual methods to subclasses of User breaks use lists"); return reinterpret_cast(U) - static_cast(U)->getNumOperands(); } static Use *op_end(SubClass* U) { @@ -82,9 +88,6 @@ struct VariadicOperandTraits { /// HungoffOperandTraits - determine the allocation regime of the Use array /// when it is not a prefix to the User object, but allocated at an unrelated /// heap address. -/// Assumes that the User subclass that is determined by this traits class -/// has an OperandList member of type User::op_iterator. [Note: this is now -/// trivially satisfied, because User has that member for historic reasons.] /// /// This is the traits class that is needed when the Use array must be /// resizable. diff --git a/interpreter/llvm/src/include/llvm/IR/Operator.h b/interpreter/llvm/src/include/llvm/IR/Operator.h index 997a85340c259..9df6bfc54cd46 100644 --- a/interpreter/llvm/src/include/llvm/IR/Operator.h +++ b/interpreter/llvm/src/include/llvm/IR/Operator.h @@ -29,18 +29,12 @@ namespace llvm { /// This is a utility class that provides an abstraction for the common /// functionality between Instructions and ConstantExprs. class Operator : public User { -protected: - // NOTE: Cannot use = delete because it's not legal to delete - // an overridden method that's not deleted in the base class. Cannot leave - // this unimplemented because that leads to an ODR-violation. - ~Operator() override; - public: // The Operator class is intended to be used as a utility, and is never itself // instantiated. Operator() = delete; + ~Operator() = delete; - void *operator new(size_t, unsigned) = delete; void *operator new(size_t s) = delete; /// Return the opcode for this Instruction or ConstantExpr. @@ -60,9 +54,9 @@ class Operator : public User { return Instruction::UserOp1; } - static inline bool classof(const Instruction *) { return true; } - static inline bool classof(const ConstantExpr *) { return true; } - static inline bool classof(const Value *V) { + static bool classof(const Instruction *) { return true; } + static bool classof(const ConstantExpr *) { return true; } + static bool classof(const Value *V) { return isa(V) || isa(V); } }; @@ -103,19 +97,19 @@ class OverflowingBinaryOperator : public Operator { return (SubclassOptionalData & NoSignedWrap) != 0; } - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Sub || I->getOpcode() == Instruction::Mul || I->getOpcode() == Instruction::Shl; } - static inline bool classof(const ConstantExpr *CE) { + static bool classof(const ConstantExpr *CE) { return CE->getOpcode() == Instruction::Add || CE->getOpcode() == Instruction::Sub || CE->getOpcode() == Instruction::Mul || CE->getOpcode() == Instruction::Shl; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return (isa(V) && classof(cast(V))) || (isa(V) && classof(cast(V))); } @@ -150,13 +144,13 @@ class PossiblyExactOperator : public Operator { OpC == Instruction::LShr; } - static inline bool classof(const ConstantExpr *CE) { + static bool classof(const ConstantExpr *CE) { return isPossiblyExactOpcode(CE->getOpcode()); } - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return isPossiblyExactOpcode(I->getOpcode()); } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return (isa(V) && classof(cast(V))) || (isa(V) && classof(cast(V))); } @@ -330,12 +324,19 @@ class FPMathOperator : public Operator { /// precision. float getFPAccuracy() const; - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getType()->isFPOrFPVectorTy() || I->getOpcode() == Instruction::FCmp; } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); + + static bool classof(const ConstantExpr *CE) { + return CE->getType()->isFPOrFPVectorTy() || + CE->getOpcode() == Instruction::FCmp; + } + + static bool classof(const Value *V) { + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); } }; @@ -343,13 +344,13 @@ class FPMathOperator : public Operator { template class ConcreteOperator : public SuperClass { public: - static inline bool classof(const Instruction *I) { + static bool classof(const Instruction *I) { return I->getOpcode() == Opc; } - static inline bool classof(const ConstantExpr *CE) { + static bool classof(const ConstantExpr *CE) { return CE->getOpcode() == Opc; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return (isa(V) && classof(cast(V))) || (isa(V) && classof(cast(V))); } diff --git a/interpreter/llvm/src/include/llvm/IR/PassManager.h b/interpreter/llvm/src/include/llvm/IR/PassManager.h index d03b7b65f81e2..393175675034d 100644 --- a/interpreter/llvm/src/include/llvm/IR/PassManager.h +++ b/interpreter/llvm/src/include/llvm/IR/PassManager.h @@ -162,6 +162,14 @@ class PreservedAnalyses { return PA; } + /// \brief Construct a preserved analyses object with a single preserved set. + template + static PreservedAnalyses allInSet() { + PreservedAnalyses PA; + PA.preserveSet(); + return PA; + } + /// Mark an analysis as preserved. template void preserve() { preserve(AnalysisT::ID()); } @@ -1062,10 +1070,27 @@ class OuterAnalysisManagerProxy const AnalysisManagerT &getManager() const { return *AM; } - /// \brief Handle invalidation by ignoring it; this pass is immutable. + /// When invalidation occurs, remove any registered invalidation events. bool invalidate( - IRUnitT &, const PreservedAnalyses &, - typename AnalysisManager::Invalidator &) { + IRUnitT &IRUnit, const PreservedAnalyses &PA, + typename AnalysisManager::Invalidator &Inv) { + // Loop over the set of registered outer invalidation mappings and if any + // of them map to an analysis that is now invalid, clear it out. + SmallVector DeadKeys; + for (auto &KeyValuePair : OuterAnalysisInvalidationMap) { + AnalysisKey *OuterID = KeyValuePair.first; + auto &InnerIDs = KeyValuePair.second; + InnerIDs.erase(llvm::remove_if(InnerIDs, [&](AnalysisKey *InnerID) { + return Inv.invalidate(InnerID, IRUnit, PA); }), + InnerIDs.end()); + if (InnerIDs.empty()) + DeadKeys.push_back(OuterID); + } + + for (auto OuterID : DeadKeys) + OuterAnalysisInvalidationMap.erase(OuterID); + + // The proxy itself remains valid regardless of anything else. return false; } diff --git a/interpreter/llvm/src/include/llvm/IR/PatternMatch.h b/interpreter/llvm/src/include/llvm/IR/PatternMatch.h index 6b2b22e82b95c..acb8952116441 100644 --- a/interpreter/llvm/src/include/llvm/IR/PatternMatch.h +++ b/interpreter/llvm/src/include/llvm/IR/PatternMatch.h @@ -158,19 +158,23 @@ struct match_neg_zero { /// zero inline match_neg_zero m_NegZero() { return match_neg_zero(); } +struct match_any_zero { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isZeroValue(); + return false; + } +}; + /// \brief - Match an arbitrary zero/null constant. This includes /// zero_initializer for vectors and ConstantPointerNull for pointers. For /// floating point constants, this will match negative zero and positive zero -inline match_combine_or m_AnyZero() { - return m_CombineOr(m_Zero(), m_NegZero()); -} +inline match_any_zero m_AnyZero() { return match_any_zero(); } struct match_nan { template bool match(ITy *V) { - if (const auto *C = dyn_cast(V)) { - const APFloat &APF = C->getValueAPF(); - return APF.isNaN(); - } + if (const auto *C = dyn_cast(V)) + return C->isNaN(); return false; } }; @@ -178,6 +182,39 @@ struct match_nan { /// Match an arbitrary NaN constant. This includes quiet and signalling nans. inline match_nan m_NaN() { return match_nan(); } +struct match_one { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isOneValue(); + return false; + } +}; + +/// \brief Match an integer 1 or a vector with all elements equal to 1. +inline match_one m_One() { return match_one(); } + +struct match_all_ones { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isAllOnesValue(); + return false; + } +}; + +/// \brief Match an integer or vector with all bits set to true. +inline match_all_ones m_AllOnes() { return match_all_ones(); } + +struct match_sign_mask { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isMinSignedValue(); + return false; + } +}; + +/// \brief Match an integer or vector with only the sign bit(s) set. +inline match_sign_mask m_SignMask() { return match_sign_mask(); } + struct apint_match { const APInt *&Res; @@ -261,34 +298,6 @@ template struct api_pred_ty : public Predicate { } }; -struct is_one { - bool isValue(const APInt &C) { return C == 1; } -}; - -/// \brief Match an integer 1 or a vector with all elements equal to 1. -inline cst_pred_ty m_One() { return cst_pred_ty(); } -inline api_pred_ty m_One(const APInt *&V) { return V; } - -struct is_all_ones { - bool isValue(const APInt &C) { return C.isAllOnesValue(); } -}; - -/// \brief Match an integer or vector with all bits set to true. -inline cst_pred_ty m_AllOnes() { - return cst_pred_ty(); -} -inline api_pred_ty m_AllOnes(const APInt *&V) { return V; } - -struct is_sign_mask { - bool isValue(const APInt &C) { return C.isSignMask(); } -}; - -/// \brief Match an integer or vector with only the sign bit(s) set. -inline cst_pred_ty m_SignMask() { - return cst_pred_ty(); -} -inline api_pred_ty m_SignMask(const APInt *&V) { return V; } - struct is_power2 { bool isValue(const APInt &C) { return C.isPowerOf2(); } }; @@ -380,7 +389,7 @@ struct bind_const_intval_ty { template bool match(ITy *V) { if (const auto *CV = dyn_cast(V)) - if (CV->getBitWidth() <= 64) { + if (CV->getValue().ule(UINT64_MAX)) { VR = CV->getZExtValue(); return true; } @@ -401,10 +410,7 @@ struct specific_intval { if (const auto *C = dyn_cast(V)) CI = dyn_cast_or_null(C->getSplatValue()); - if (CI && CI->getBitWidth() <= 64) - return CI->getZExtValue() == Val; - - return false; + return CI && CI->getValue() == Val; } }; @@ -419,7 +425,8 @@ inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } //===----------------------------------------------------------------------===// // Matcher for any binary operator. // -template struct AnyBinaryOp_match { +template +struct AnyBinaryOp_match { LHS_t L; RHS_t R; @@ -427,7 +434,9 @@ template struct AnyBinaryOp_match { template bool match(OpTy *V) { if (auto *I = dyn_cast(V)) - return L.match(I->getOperand(0)) && R.match(I->getOperand(1)); + return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || + (Commutable && R.match(I->getOperand(0)) && + L.match(I->getOperand(1))); return false; } }; @@ -441,7 +450,8 @@ inline AnyBinaryOp_match m_BinOp(const LHS &L, const RHS &R) { // Matchers for specific binary operators. // -template +template struct BinaryOp_match { LHS_t L; RHS_t R; @@ -451,11 +461,15 @@ struct BinaryOp_match { template bool match(OpTy *V) { if (V->getValueID() == Value::InstructionVal + Opcode) { auto *I = cast(V); - return L.match(I->getOperand(0)) && R.match(I->getOperand(1)); + return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || + (Commutable && R.match(I->getOperand(0)) && + L.match(I->getOperand(1))); } if (auto *CE = dyn_cast(V)) - return CE->getOpcode() == Opcode && L.match(CE->getOperand(0)) && - R.match(CE->getOperand(1)); + return CE->getOpcode() == Opcode && + ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) || + (Commutable && R.match(CE->getOperand(0)) && + L.match(CE->getOperand(1)))); return false; } }; @@ -660,47 +674,87 @@ m_NUWShl(const LHS &L, const RHS &R) { } //===----------------------------------------------------------------------===// -// Class that matches two different binary ops. +// Class that matches a group of binary opcodes. // -template -struct BinOp2_match { +template +struct BinOpPred_match : Predicate { LHS_t L; RHS_t R; - BinOp2_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} + BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} template bool match(OpTy *V) { - if (V->getValueID() == Value::InstructionVal + Opc1 || - V->getValueID() == Value::InstructionVal + Opc2) { - auto *I = cast(V); - return L.match(I->getOperand(0)) && R.match(I->getOperand(1)); - } + if (auto *I = dyn_cast(V)) + return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) && + R.match(I->getOperand(1)); if (auto *CE = dyn_cast(V)) - return (CE->getOpcode() == Opc1 || CE->getOpcode() == Opc2) && - L.match(CE->getOperand(0)) && R.match(CE->getOperand(1)); + return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) && + R.match(CE->getOperand(1)); return false; } }; -/// \brief Matches LShr or AShr. +struct is_shift_op { + bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); } +}; + +struct is_right_shift_op { + bool isOpType(unsigned Opcode) { + return Opcode == Instruction::LShr || Opcode == Instruction::AShr; + } +}; + +struct is_logical_shift_op { + bool isOpType(unsigned Opcode) { + return Opcode == Instruction::LShr || Opcode == Instruction::Shl; + } +}; + +struct is_bitwiselogic_op { + bool isOpType(unsigned Opcode) { + return Instruction::isBitwiseLogicOp(Opcode); + } +}; + +struct is_idiv_op { + bool isOpType(unsigned Opcode) { + return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + } +}; + +/// \brief Matches shift operations. template -inline BinOp2_match -m_Shr(const LHS &L, const RHS &R) { - return BinOp2_match(L, R); +inline BinOpPred_match m_Shift(const LHS &L, + const RHS &R) { + return BinOpPred_match(L, R); } -/// \brief Matches LShr or Shl. +/// \brief Matches logical shift operations. template -inline BinOp2_match +inline BinOpPred_match m_Shr(const LHS &L, + const RHS &R) { + return BinOpPred_match(L, R); +} + +/// \brief Matches logical shift operations. +template +inline BinOpPred_match m_LogicalShift(const LHS &L, const RHS &R) { - return BinOp2_match(L, R); + return BinOpPred_match(L, R); } -/// \brief Matches UDiv and SDiv. +/// \brief Matches bitwise logic operations. template -inline BinOp2_match -m_IDiv(const LHS &L, const RHS &R) { - return BinOp2_match(L, R); +inline BinOpPred_match +m_BitwiseLogic(const LHS &L, const RHS &R) { + return BinOpPred_match(L, R); +} + +/// \brief Matches integer division operations. +template +inline BinOpPred_match m_IDiv(const LHS &L, + const RHS &R) { + return BinOpPred_match(L, R); } //===----------------------------------------------------------------------===// @@ -726,7 +780,8 @@ template inline Exact_match m_Exact(const T &SubPattern) { // Matchers for CmpInst classes // -template +template struct CmpClass_match { PredicateTy &Predicate; LHS_t L; @@ -737,7 +792,9 @@ struct CmpClass_match { template bool match(OpTy *V) { if (auto *I = dyn_cast(V)) - if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { + if ((L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || + (Commutable && R.match(I->getOperand(0)) && + L.match(I->getOperand(1)))) { Predicate = I->getPredicate(); return true; } @@ -886,17 +943,18 @@ template struct not_match { template bool match(OpTy *V) { if (auto *O = dyn_cast(V)) - if (O->getOpcode() == Instruction::Xor) - return matchIfNot(O->getOperand(0), O->getOperand(1)); + if (O->getOpcode() == Instruction::Xor) { + if (isAllOnes(O->getOperand(1))) + return L.match(O->getOperand(0)); + if (isAllOnes(O->getOperand(0))) + return L.match(O->getOperand(1)); + } return false; } private: - bool matchIfNot(Value *LHS, Value *RHS) { - return (isa(RHS) || isa(RHS) || - // FIXME: Remove CV. - isa(RHS)) && - cast(RHS)->isAllOnesValue() && L.match(LHS); + bool isAllOnes(Value *V) { + return isa(V) && cast(V)->isAllOnesValue(); } }; @@ -998,7 +1056,8 @@ inline brc_match m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { // Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y). // -template +template struct MaxMin_match { LHS_t L; RHS_t R; @@ -1023,12 +1082,13 @@ struct MaxMin_match { (TrueVal != RHS || FalseVal != LHS)) return false; typename CmpInst_t::Predicate Pred = - LHS == TrueVal ? Cmp->getPredicate() : Cmp->getSwappedPredicate(); + LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate(); // Does "(x pred y) ? x : y" represent the desired max/min operation? if (!Pred_t::match(Pred)) return false; // It does! Bind the operands. - return L.match(LHS) && R.match(RHS); + return (L.match(LHS) && R.match(RHS)) || + (Commutable && R.match(LHS) && L.match(RHS)); } }; @@ -1134,7 +1194,7 @@ inline MaxMin_match m_OrdFMax(const LHS &L, /// semantics. In the presence of 'NaN' we have to preserve the original /// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate. /// -/// max(L, R) iff L and R are not NaN +/// min(L, R) iff L and R are not NaN /// m_OrdFMin(L, R) = R iff L or R are NaN template inline MaxMin_match m_OrdFMin(const LHS &L, @@ -1150,13 +1210,28 @@ inline MaxMin_match m_OrdFMin(const LHS &L, /// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate. /// /// max(L, R) iff L and R are not NaN -/// m_UnordFMin(L, R) = L iff L or R are NaN +/// m_UnordFMax(L, R) = L iff L or R are NaN template inline MaxMin_match m_UnordFMax(const LHS &L, const RHS &R) { return MaxMin_match(L, R); } +/// \brief Match an 'unordered' floating point minimum function. +/// Floating point has one special value 'NaN'. Therefore, there is no total +/// order. However, if we can ignore the 'NaN' value (for example, because of a +/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' +/// semantics. In the presence of 'NaN' we have to preserve the original +/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. +/// +/// min(L, R) iff L and R are not NaN +/// m_UnordFMin(L, R) = L iff L or R are NaN +template +inline MaxMin_match +m_UnordFMin(const LHS &L, const RHS &R) { + return MaxMin_match(L, R); +} + //===----------------------------------------------------------------------===// // Matchers for overflow check patterns: e.g. (a + b) u< a // @@ -1203,21 +1278,6 @@ m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { return UAddWithOverflow_match(L, R, S); } -/// \brief Match an 'unordered' floating point minimum function. -/// Floating point has one special value 'NaN'. Therefore, there is no total -/// order. However, if we can ignore the 'NaN' value (for example, because of a -/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' -/// semantics. In the presence of 'NaN' we have to preserve the original -/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. -/// -/// max(L, R) iff L and R are not NaN -/// m_UnordFMin(L, R) = L iff L or R are NaN -template -inline MaxMin_match -m_UnordFMin(const LHS &L, const RHS &R) { - return MaxMin_match(L, R); -} - template struct Argument_match { unsigned OpI; Opnd_t Val; @@ -1310,6 +1370,11 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) { } // Helper intrinsic matching specializations. +template +inline typename m_Intrinsic_Ty::Ty m_BitReverse(const Opnd0 &Op0) { + return m_Intrinsic(Op0); +} + template inline typename m_Intrinsic_Ty::Ty m_BSwap(const Opnd0 &Op0) { return m_Intrinsic(Op0); @@ -1372,89 +1437,78 @@ template inline Signum_match m_Signum(const Val_t &V) { // /// \brief Matches a BinaryOperator with LHS and RHS in either order. -template -inline match_combine_or, - AnyBinaryOp_match> -m_c_BinOp(const LHS &L, const RHS &R) { - return m_CombineOr(m_BinOp(L, R), m_BinOp(R, L)); +template +inline AnyBinaryOp_match m_c_BinOp(const LHS &L, const RHS &R) { + return AnyBinaryOp_match(L, R); } /// \brief Matches an ICmp with a predicate over LHS and RHS in either order. /// Does not swap the predicate. -template -inline match_combine_or, - CmpClass_match> +template +inline CmpClass_match m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { - return m_CombineOr(m_ICmp(Pred, L, R), m_ICmp(Pred, R, L)); + return CmpClass_match(Pred, L, + R); } /// \brief Matches a Add with LHS and RHS in either order. -template -inline match_combine_or, - BinaryOp_match> -m_c_Add(const LHS &L, const RHS &R) { - return m_CombineOr(m_Add(L, R), m_Add(R, L)); +template +inline BinaryOp_match m_c_Add(const LHS &L, + const RHS &R) { + return BinaryOp_match(L, R); } /// \brief Matches a Mul with LHS and RHS in either order. -template -inline match_combine_or, - BinaryOp_match> -m_c_Mul(const LHS &L, const RHS &R) { - return m_CombineOr(m_Mul(L, R), m_Mul(R, L)); +template +inline BinaryOp_match m_c_Mul(const LHS &L, + const RHS &R) { + return BinaryOp_match(L, R); } /// \brief Matches an And with LHS and RHS in either order. -template -inline match_combine_or, - BinaryOp_match> -m_c_And(const LHS &L, const RHS &R) { - return m_CombineOr(m_And(L, R), m_And(R, L)); +template +inline BinaryOp_match m_c_And(const LHS &L, + const RHS &R) { + return BinaryOp_match(L, R); } /// \brief Matches an Or with LHS and RHS in either order. -template -inline match_combine_or, - BinaryOp_match> -m_c_Or(const LHS &L, const RHS &R) { - return m_CombineOr(m_Or(L, R), m_Or(R, L)); +template +inline BinaryOp_match m_c_Or(const LHS &L, + const RHS &R) { + return BinaryOp_match(L, R); } /// \brief Matches an Xor with LHS and RHS in either order. -template -inline match_combine_or, - BinaryOp_match> -m_c_Xor(const LHS &L, const RHS &R) { - return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); +template +inline BinaryOp_match m_c_Xor(const LHS &L, + const RHS &R) { + return BinaryOp_match(L, R); } /// Matches an SMin with LHS and RHS in either order. template -inline match_combine_or, - MaxMin_match> +inline MaxMin_match m_c_SMin(const LHS &L, const RHS &R) { - return m_CombineOr(m_SMin(L, R), m_SMin(R, L)); + return MaxMin_match(L, R); } /// Matches an SMax with LHS and RHS in either order. template -inline match_combine_or, - MaxMin_match> +inline MaxMin_match m_c_SMax(const LHS &L, const RHS &R) { - return m_CombineOr(m_SMax(L, R), m_SMax(R, L)); + return MaxMin_match(L, R); } /// Matches a UMin with LHS and RHS in either order. template -inline match_combine_or, - MaxMin_match> +inline MaxMin_match m_c_UMin(const LHS &L, const RHS &R) { - return m_CombineOr(m_UMin(L, R), m_UMin(R, L)); + return MaxMin_match(L, R); } /// Matches a UMax with LHS and RHS in either order. template -inline match_combine_or, - MaxMin_match> +inline MaxMin_match m_c_UMax(const LHS &L, const RHS &R) { - return m_CombineOr(m_UMax(L, R), m_UMax(R, L)); + return MaxMin_match(L, R); } } // end namespace PatternMatch diff --git a/interpreter/llvm/src/include/llvm/IR/SafepointIRVerifier.h b/interpreter/llvm/src/include/llvm/IR/SafepointIRVerifier.h new file mode 100644 index 0000000000000..092050d1d2079 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/IR/SafepointIRVerifier.h @@ -0,0 +1,35 @@ +//===- SafepointIRVerifier.h - Checks for GC relocation problems *- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a verifier which is useful for enforcing the relocation +// properties required by a relocating GC. Specifically, it looks for uses of +// the unrelocated value of pointer SSA values after a possible safepoint. It +// attempts to report no false negatives, but may end up reporting false +// positives in rare cases (see the note at the top of the corresponding cpp +// file.) +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_SAFEPOINT_IR_VERIFIER +#define LLVM_IR_SAFEPOINT_IR_VERIFIER + +namespace llvm { + +class Function; +class FunctionPass; + +/// Run the safepoint verifier over a single function. Crashes on failure. +void verifySafepointIR(Function &F); + +/// Create an instance of the safepoint verifier pass which can be added to +/// a pass pipeline to check for relocation bugs. +FunctionPass *createSafepointIRVerifierPass(); +} + +#endif // LLVM_IR_SAFEPOINT_IR_VERIFIER diff --git a/interpreter/llvm/src/include/llvm/IR/Statepoint.h b/interpreter/llvm/src/include/llvm/IR/Statepoint.h index f01607614a0c3..ad9537e9762ed 100644 --- a/interpreter/llvm/src/include/llvm/IR/Statepoint.h +++ b/interpreter/llvm/src/include/llvm/IR/Statepoint.h @@ -17,8 +17,8 @@ #ifndef LLVM_IR_STATEPOINT_H #define LLVM_IR_STATEPOINT_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -62,7 +62,10 @@ bool isStatepoint(const Value *V); bool isStatepoint(const Value &V); bool isGCRelocate(ImmutableCallSite CS); +bool isGCRelocate(const Value *V); + bool isGCResult(ImmutableCallSite CS); +bool isGCResult(const Value *V); /// Analogous to CallSiteBase, this provides most of the actual /// functionality for Statepoint and ImmutableStatepoint. It is @@ -228,24 +231,24 @@ class StatepointBase { return cast(NumVMSArgs)->getZExtValue(); } - typename CallSiteTy::arg_iterator vm_state_begin() const { + typename CallSiteTy::arg_iterator deopt_begin() const { auto I = gc_transition_args_end() + 1; assert((getCallSite().arg_end() - I) >= 0); return I; } - typename CallSiteTy::arg_iterator vm_state_end() const { - auto I = vm_state_begin() + getNumTotalVMSArgs(); + typename CallSiteTy::arg_iterator deopt_end() const { + auto I = deopt_begin() + getNumTotalVMSArgs(); assert((getCallSite().arg_end() - I) >= 0); return I; } /// range adapter for vm state arguments - iterator_range vm_state_args() const { - return make_range(vm_state_begin(), vm_state_end()); + iterator_range deopt_operands() const { + return make_range(deopt_begin(), deopt_end()); } typename CallSiteTy::arg_iterator gc_args_begin() const { - return vm_state_end(); + return deopt_end(); } typename CallSiteTy::arg_iterator gc_args_end() const { return getCallSite().arg_end(); @@ -289,8 +292,8 @@ class StatepointBase { (void)arg_end(); (void)gc_transition_args_begin(); (void)gc_transition_args_end(); - (void)vm_state_begin(); - (void)vm_state_end(); + (void)deopt_begin(); + (void)deopt_end(); (void)gc_args_begin(); (void)gc_args_end(); } @@ -326,12 +329,12 @@ class Statepoint /// Currently, the only projections available are gc.result and gc.relocate. class GCProjectionInst : public IntrinsicInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate || I->getIntrinsicID() == Intrinsic::experimental_gc_result; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -370,11 +373,11 @@ class GCProjectionInst : public IntrinsicInst { /// Represents calls to the gc.relocate intrinsic. class GCRelocateInst : public GCProjectionInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -405,11 +408,11 @@ class GCRelocateInst : public GCProjectionInst { /// Represents calls to the gc.result intrinsic. class GCResultInst : public GCProjectionInst { public: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::experimental_gc_result; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; diff --git a/interpreter/llvm/src/include/llvm/IR/Type.h b/interpreter/llvm/src/include/llvm/IR/Type.h index 82362107e41ea..ef7801266777c 100644 --- a/interpreter/llvm/src/include/llvm/IR/Type.h +++ b/interpreter/llvm/src/include/llvm/IR/Type.h @@ -18,8 +18,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include @@ -202,6 +202,12 @@ class Type { /// Return true if this is an integer type or a vector of integer types. bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); } + /// Return true if this is an integer type or a vector of integer types of + /// the given width. + bool isIntOrIntVectorTy(unsigned BitWidth) const { + return getScalarType()->isIntegerTy(BitWidth); + } + /// True if this is an instance of FunctionType. bool isFunctionTy() const { return getTypeID() == FunctionTyID; } diff --git a/interpreter/llvm/src/include/llvm/IR/Use.h b/interpreter/llvm/src/include/llvm/IR/Use.h index d3a59d8a060e8..0ac13935c7cea 100644 --- a/interpreter/llvm/src/include/llvm/IR/Use.h +++ b/interpreter/llvm/src/include/llvm/IR/Use.h @@ -25,10 +25,10 @@ #ifndef LLVM_IR_USE_H #define LLVM_IR_USE_H +#include "llvm-c/Types.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Compiler.h" -#include "llvm-c/Types.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/IR/User.h b/interpreter/llvm/src/include/llvm/IR/User.h index 7b9d451aaf537..4dfa19cf241fa 100644 --- a/interpreter/llvm/src/include/llvm/IR/User.h +++ b/interpreter/llvm/src/include/llvm/IR/User.h @@ -46,8 +46,6 @@ class User : public Value { template friend struct HungoffOperandTraits; - virtual void anchor(); - LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void * allocateFixedOperandUser(size_t, unsigned, unsigned); @@ -93,9 +91,11 @@ class User : public Value { /// should be called if there are no uses. void growHungoffUses(unsigned N, bool IsPhi = false); +protected: + ~User() = default; // Use deleteValue() to delete a generic Instruction. + public: User(const User &) = delete; - ~User() override = default; /// \brief Free memory allocated for User and Use objects. void operator delete(void *Usr); @@ -288,7 +288,7 @@ class User : public Value { void replaceUsesOfWith(Value *From, Value *To); // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) || isa(V); } }; diff --git a/interpreter/llvm/src/include/llvm/IR/Value.def b/interpreter/llvm/src/include/llvm/IR/Value.def index 48842d7f9cd56..cebd7f7297ef3 100644 --- a/interpreter/llvm/src/include/llvm/IR/Value.def +++ b/interpreter/llvm/src/include/llvm/IR/Value.def @@ -20,10 +20,14 @@ #if !(defined HANDLE_GLOBAL_VALUE || defined HANDLE_CONSTANT || \ defined HANDLE_INSTRUCTION || defined HANDLE_INLINE_ASM_VALUE || \ defined HANDLE_METADATA_VALUE || defined HANDLE_VALUE || \ - defined HANDLE_CONSTANT_MARKER) + defined HANDLE_CONSTANT_MARKER || defined HANDLE_MEMORY_VALUE) #error "Missing macro definition of HANDLE_VALUE*" #endif +#ifndef HANDLE_MEMORY_VALUE +#define HANDLE_MEMORY_VALUE(ValueName) HANDLE_VALUE(ValueName) +#endif + #ifndef HANDLE_GLOBAL_VALUE #define HANDLE_GLOBAL_VALUE(ValueName) HANDLE_CONSTANT(ValueName) #endif @@ -54,9 +58,13 @@ HANDLE_VALUE(Argument) HANDLE_VALUE(BasicBlock) -HANDLE_VALUE(MemoryUse) -HANDLE_VALUE(MemoryDef) -HANDLE_VALUE(MemoryPhi) + +// FIXME: It's awkward that Value.def knows about classes in Analysis. While +// this doesn't introduce a strict link or include dependency, we should remove +// the circular dependency eventually. +HANDLE_MEMORY_VALUE(MemoryUse) +HANDLE_MEMORY_VALUE(MemoryDef) +HANDLE_MEMORY_VALUE(MemoryPhi) HANDLE_GLOBAL_VALUE(Function) HANDLE_GLOBAL_VALUE(GlobalAlias) @@ -94,6 +102,7 @@ HANDLE_CONSTANT_MARKER(ConstantDataLastVal, ConstantTokenNone) HANDLE_CONSTANT_MARKER(ConstantAggregateFirstVal, ConstantArray) HANDLE_CONSTANT_MARKER(ConstantAggregateLastVal, ConstantVector) +#undef HANDLE_MEMORY_VALUE #undef HANDLE_GLOBAL_VALUE #undef HANDLE_CONSTANT #undef HANDLE_INSTRUCTION diff --git a/interpreter/llvm/src/include/llvm/IR/Value.h b/interpreter/llvm/src/include/llvm/IR/Value.h index 96a370dcc35f0..9e4914973edf9 100644 --- a/interpreter/llvm/src/include/llvm/IR/Value.h +++ b/interpreter/llvm/src/include/llvm/IR/Value.h @@ -14,13 +14,14 @@ #ifndef LLVM_IR_VALUE_H #define LLVM_IR_VALUE_H +#include "llvm-c/Types.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Use.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" -#include "llvm-c/Types.h" #include #include +#include namespace llvm { @@ -48,8 +49,9 @@ template class StringMapEntry; class StringRef; class Twine; class Type; +class User; -using ValueName = StringMapEntry; +using ValueName = StringMapEntry; //===----------------------------------------------------------------------===// // Value Class @@ -69,6 +71,8 @@ using ValueName = StringMapEntry; /// objects that watch it and listen to RAUW and Destroy events. See /// llvm/IR/ValueHandle.h for details. class Value { + // The least-significant bit of the first word of Value *must* be zero: + // http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm Type *VTy; Use *UseList; @@ -200,10 +204,19 @@ class Value { protected: Value(Type *Ty, unsigned scid); + /// Value's destructor should be virtual by design, but that would require + /// that Value and all of its subclasses have a vtable that effectively + /// duplicates the information in the value ID. As a size optimization, the + /// destructor has been protected, and the caller should manually call + /// deleteValue. + ~Value(); // Use deleteValue() to delete a generic Value. + public: Value(const Value &) = delete; - void operator=(const Value &) = delete; - virtual ~Value(); + Value &operator=(const Value &) = delete; + + /// Delete a pointer to a generic Value. + void deleteValue(); /// \brief Support for debugging, callable in GDB: V->dump() void dump() const; @@ -643,6 +656,13 @@ class Value { void setValueSubclassData(unsigned short D) { SubclassData = D; } }; +struct ValueDeleter { void operator()(Value *V) { V->deleteValue(); } }; + +/// Use this instead of std::unique_ptr or std::unique_ptr. +/// Those don't work because Value and Instruction's destructors are protected, +/// aren't virtual, and won't destroy the complete object. +using unique_value = std::unique_ptr; + inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) { V.print(OS); return OS; diff --git a/interpreter/llvm/src/include/llvm/InitializePasses.h b/interpreter/llvm/src/include/llvm/InitializePasses.h index 49ca8acabc226..39ac4649b70d4 100644 --- a/interpreter/llvm/src/include/llvm/InitializePasses.h +++ b/interpreter/llvm/src/include/llvm/InitializePasses.h @@ -70,7 +70,6 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); -void initializeBBVectorizePass(PassRegistry&); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAAWrapperPassPass(PassRegistry&); @@ -86,7 +85,6 @@ void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); -void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeCFGViewerLegacyPassPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); @@ -145,6 +143,7 @@ void initializeGCModuleInfoPass(PassRegistry&); void initializeGCOVProfilerLegacyPassPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); void initializeGVNLegacyPassPass(PassRegistry&); +void initializeGVNSinkLegacyPassPass(PassRegistry&); void initializeGlobalDCELegacyPassPass(PassRegistry&); void initializeGlobalMergePass(PassRegistry&); void initializeGlobalOptLegacyPassPass(PassRegistry&); @@ -174,13 +173,14 @@ void initializeIntervalPartitionPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAVerificationPassPass(PassRegistry&); void initializeLCSSAWrapperPassPass(PassRegistry&); +void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&); void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&); void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&); +void initializeLazyValueInfoPrinterPass(PassRegistry&); void initializeLazyValueInfoWrapperPassPass(PassRegistry&); void initializeLegacyLICMPassPass(PassRegistry&); void initializeLegacyLoopSinkPassPass(PassRegistry&); -void initializeLazyValueInfoPrinterPass(PassRegistry&); void initializeLegalizerPass(PassRegistry&); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); void initializeLintPass(PassRegistry&); @@ -191,10 +191,10 @@ void initializeLiveRangeShrinkPass(PassRegistry&); void initializeLiveRegMatrixPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); void initializeLiveVariablesPass(PassRegistry&); -void initializeLoadCombinePass(PassRegistry&); void initializeLoadStoreVectorizerPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); +void initializeLocalizerPass(PassRegistry&); void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&); void initializeLoopDeletionLegacyPassPass(PassRegistry&); @@ -302,6 +302,8 @@ void initializeProcessImplicitDefsPass(PassRegistry&); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); +void initializeRABasicPass(PassRegistry&); +void initializeRAFastPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); void initializeReassociateLegacyPassPass(PassRegistry&); void initializeRegBankSelectPass(PassRegistry&); @@ -317,6 +319,7 @@ void initializeResetMachineFunctionPass(PassRegistry&); void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializeRewriteStatepointsForGCPass(PassRegistry&); void initializeRewriteSymbolsLegacyPassPass(PassRegistry&); +void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSCCPLegacyPassPass(PassRegistry&); void initializeSCEVAAWrapperPassPass(PassRegistry&); void initializeSLPVectorizerPass(PassRegistry&); @@ -325,7 +328,9 @@ void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeSanitizerCoverageModulePass(PassRegistry&); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); +void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); void initializeScalarizerPass(PassRegistry&); +void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); diff --git a/interpreter/llvm/src/include/llvm/LTO/Config.h b/interpreter/llvm/src/include/llvm/LTO/Config.h index ede6637dfa4dd..73106f77ca55c 100644 --- a/interpreter/llvm/src/include/llvm/LTO/Config.h +++ b/interpreter/llvm/src/include/llvm/LTO/Config.h @@ -39,13 +39,16 @@ struct Config { std::string CPU; TargetOptions Options; std::vector MAttrs; - Reloc::Model RelocModel = Reloc::PIC_; + Optional RelocModel = Reloc::PIC_; CodeModel::Model CodeModel = CodeModel::Default; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; TargetMachine::CodeGenFileType CGFileType = TargetMachine::CGFT_ObjectFile; unsigned OptLevel = 2; bool DisableVerify = false; + /// Use the new pass manager + bool UseNewPM = false; + /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; diff --git a/interpreter/llvm/src/include/llvm/LTO/LTO.h b/interpreter/llvm/src/include/llvm/LTO/LTO.h index 3772592757bec..d678a68ed8605 100644 --- a/interpreter/llvm/src/include/llvm/LTO/LTO.h +++ b/interpreter/llvm/src/include/llvm/LTO/LTO.h @@ -281,6 +281,16 @@ class LTO { bool HasModule = false; std::unique_ptr CombinedModule; std::unique_ptr Mover; + + // This stores the information about a regular LTO module that we have added + // to the link. It will either be linked immediately (for modules without + // summaries) or after summary-based dead stripping (for modules with + // summaries). + struct AddedModule { + std::unique_ptr M; + std::vector Keep; + }; + std::vector ModsWithSummaries; } RegularLTO; struct ThinLTOState { @@ -303,9 +313,10 @@ class LTO { /// The unmangled name of the global. std::string IRName; - /// Keep track if the symbol is visible outside of ThinLTO (i.e. in - /// either a regular object or the regular LTO partition). - bool VisibleOutsideThinLTO = false; + /// Keep track if the symbol is visible outside of a module with a summary + /// (i.e. in either a regular object or a regular LTO module without a + /// summary). + bool VisibleOutsideSummary = false; bool UnnamedAddr = true; @@ -339,8 +350,9 @@ class LTO { // Global mapping from mangled symbol names to resolutions. StringMap GlobalResolutions; - void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, - unsigned Partition); + void addModuleToGlobalRes(ArrayRef Syms, + ArrayRef Res, unsigned Partition, + bool InSummary); // These functions take a range of symbol resolutions [ResI, ResE) and consume // the resolutions used by a single input module by incrementing ResI. After @@ -348,10 +360,13 @@ class LTO { // the remaining modules in the InputFile. Error addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addRegularLTO(BitcodeModule BM, - ArrayRef Syms, - const SymbolResolution *&ResI, - const SymbolResolution *ResE); + + Expected + addRegularLTO(BitcodeModule BM, ArrayRef Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE); + Error linkRegularLTO(RegularLTOState::AddedModule Mod, + bool LivenessFromIndex); + Error addThinLTO(BitcodeModule BM, ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE); @@ -366,8 +381,9 @@ class LTO { /// each global symbol based on its internal resolution of that symbol. struct SymbolResolution { SymbolResolution() - : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0) { - } + : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0), + LinkerRedefined(0) {} + /// The linker has chosen this definition of the symbol. unsigned Prevailing : 1; @@ -377,6 +393,10 @@ struct SymbolResolution { /// The definition of this symbol is visible outside of the LTO unit. unsigned VisibleToRegularObj : 1; + + /// Linker redefined version of the symbol which appeared in -wrap or -defsym + /// linker option. + unsigned LinkerRedefined : 1; }; } // namespace lto diff --git a/interpreter/llvm/src/include/llvm/LTO/legacy/LTOModule.h b/interpreter/llvm/src/include/llvm/LTO/legacy/LTOModule.h index 2a8758587a112..017e223ed8a61 100644 --- a/interpreter/llvm/src/include/llvm/LTO/legacy/LTOModule.h +++ b/interpreter/llvm/src/include/llvm/LTO/legacy/LTOModule.h @@ -158,7 +158,7 @@ struct LTOModule { private: /// Parse metadata from the module - // FIXME: it only parses "Linker Options" metadata at the moment + // FIXME: it only parses "llvm.linker.options" metadata at the moment void parseMetadata(); /// Parse the symbols from the module and model-level ASM and add them to diff --git a/interpreter/llvm/src/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/interpreter/llvm/src/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h index f9545333aabdf..14f0c48266f0b 100644 --- a/interpreter/llvm/src/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h +++ b/interpreter/llvm/src/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h @@ -177,7 +177,7 @@ class ThinLTOCodeGenerator { */ void setMaxCacheSizeRelativeToAvailableSpace(unsigned Percentage) { if (Percentage) - CacheOptions.Policy.PercentageOfAvailableSpace = Percentage; + CacheOptions.Policy.MaxSizePercentageOfAvailableSpace = Percentage; } /**@}*/ diff --git a/interpreter/llvm/src/include/llvm/LinkAllIR.h b/interpreter/llvm/src/include/llvm/LinkAllIR.h index f078c73f979ea..de1d305f8e772 100644 --- a/interpreter/llvm/src/include/llvm/LinkAllIR.h +++ b/interpreter/llvm/src/include/llvm/LinkAllIR.h @@ -16,13 +16,13 @@ #ifndef LLVM_LINKALLIR_H #define LLVM_LINKALLIR_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Memory.h" diff --git a/interpreter/llvm/src/include/llvm/LinkAllPasses.h b/interpreter/llvm/src/include/llvm/LinkAllPasses.h index 39a86e838bde3..d07c15c1013bb 100644 --- a/interpreter/llvm/src/include/llvm/LinkAllPasses.h +++ b/interpreter/llvm/src/include/llvm/LinkAllPasses.h @@ -16,8 +16,8 @@ #define LLVM_LINKALLPASSES_H #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysisEvaluator.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" @@ -38,6 +38,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/Support/Valgrind.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -48,7 +49,6 @@ #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Support/Valgrind.h" #include namespace { @@ -195,7 +195,6 @@ namespace { (void) llvm::createLoopVectorizePass(); (void) llvm::createSLPVectorizerPass(); (void) llvm::createLoadStoreVectorizerPass(); - (void) llvm::createBBVectorizePass(); (void) llvm::createPartiallyInlineLibCallsPass(); (void) llvm::createScalarizerPass(); (void) llvm::createSeparateConstOffsetFromGEPPass(); @@ -206,6 +205,7 @@ namespace { (void) llvm::createMemDerefPrinter(); (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); + (void) llvm::createScalarizeMaskedMemIntrinPass(); (void)new llvm::IntervalPartition(); (void)new llvm::ScalarEvolutionWrapperPass(); diff --git a/interpreter/llvm/src/include/llvm/MC/ConstantPools.h b/interpreter/llvm/src/include/llvm/MC/ConstantPools.h index 5d4e32a672ddd..ef33250204ecf 100644 --- a/interpreter/llvm/src/include/llvm/MC/ConstantPools.h +++ b/interpreter/llvm/src/include/llvm/MC/ConstantPools.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/SMLoc.h" #include +#include namespace llvm { @@ -44,7 +45,7 @@ struct ConstantPoolEntry { class ConstantPool { using EntryVecTy = SmallVector; EntryVecTy Entries; - DenseMap CachedEntries; + std::map CachedEntries; public: // Initialize a new empty constant pool diff --git a/interpreter/llvm/src/include/llvm/MC/LaneBitmask.h b/interpreter/llvm/src/include/llvm/MC/LaneBitmask.h index 5ca06d1148e28..73b987b074dba 100644 --- a/interpreter/llvm/src/include/llvm/MC/LaneBitmask.h +++ b/interpreter/llvm/src/include/llvm/MC/LaneBitmask.h @@ -75,6 +75,9 @@ namespace llvm { static LaneBitmask getNone() { return LaneBitmask(0); } static LaneBitmask getAll() { return ~LaneBitmask(0); } + static LaneBitmask getLane(unsigned Lane) { + return LaneBitmask(Type(1) << Lane); + } private: Type Mask = 0; diff --git a/interpreter/llvm/src/include/llvm/MC/MCAsmBackend.h b/interpreter/llvm/src/include/llvm/MC/MCAsmBackend.h index fb21e195b1dfe..5a8e29d08ad23 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCAsmBackend.h +++ b/interpreter/llvm/src/include/llvm/MC/MCAsmBackend.h @@ -60,22 +60,20 @@ class MCAsmBackend { /// Get information on a fixup kind. virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const; - /// Target hook to adjust the literal value of a fixup if necessary. - /// IsResolved signals whether the caller believes a relocation is needed; the - /// target can modify the value. The default does nothing. - virtual void processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) {} + /// Hook to check if a relocation is needed for some target specific reason. + virtual bool shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { + return false; + } /// Apply the \p Value for given \p Fixup into the provided data fragment, at /// the offset specified by the fixup and following the fixup kind as /// appropriate. Errors (such as an out of range fixup value) should be /// reported via \p Ctx. - virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, - MCContext &Ctx) const = 0; + virtual void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const = 0; /// @} diff --git a/interpreter/llvm/src/include/llvm/MC/MCAsmInfo.h b/interpreter/llvm/src/include/llvm/MC/MCAsmInfo.h index 869706c454834..234762f36dd47 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCAsmInfo.h +++ b/interpreter/llvm/src/include/llvm/MC/MCAsmInfo.h @@ -51,12 +51,6 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment }; } // end namespace LCOMM -enum class DebugCompressionType { - DCT_None, // no compression - DCT_Zlib, // zlib style complession - DCT_ZlibGnu // zlib-gnu style compression -}; - /// This class is intended to be used as a base class for asm /// properties and features specific to the target. class MCAsmInfo { @@ -366,7 +360,7 @@ class MCAsmInfo { bool PreserveAsmComments; /// Compress DWARF debug sections. Defaults to no compression. - DebugCompressionType CompressDebugSections = DebugCompressionType::DCT_None; + DebugCompressionType CompressDebugSections = DebugCompressionType::None; /// True if the integrated assembler should interpret 'a >> b' constant /// expressions as logical rather than arithmetic. diff --git a/interpreter/llvm/src/include/llvm/MC/MCAssembler.h b/interpreter/llvm/src/include/llvm/MC/MCAssembler.h index 185b892d96214..4f1b5a8b3d72e 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCAssembler.h +++ b/interpreter/llvm/src/include/llvm/MC/MCAssembler.h @@ -11,11 +11,11 @@ #define LLVM_MC_MCASSEMBLER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCFixup.h" @@ -195,8 +195,8 @@ class MCAssembler { /// finishLayout - Finalize a layout, including fragment lowering. void finishLayout(MCAsmLayout &Layout); - std::pair handleFixup(const MCAsmLayout &Layout, - MCFragment &F, const MCFixup &Fixup); + std::tuple + handleFixup(const MCAsmLayout &Layout, MCFragment &F, const MCFixup &Fixup); public: /// Construct a new assembler instance. @@ -413,7 +413,7 @@ class MCAssembler { /// @} - void dump(); + void dump() const; }; /// \brief Compute the amount of padding required before the fragment \p F to diff --git a/interpreter/llvm/src/include/llvm/MC/MCCodeView.h b/interpreter/llvm/src/include/llvm/MC/MCCodeView.h index 41521a6549b86..c3f1cecc97f48 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCCodeView.h +++ b/interpreter/llvm/src/include/llvm/MC/MCCodeView.h @@ -14,10 +14,10 @@ #ifndef LLVM_MC_MCCODEVIEW_H #define LLVM_MC_MCCODEVIEW_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" -#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCFragment.h" +#include "llvm/MC/MCObjectStreamer.h" #include #include diff --git a/interpreter/llvm/src/include/llvm/MC/MCContext.h b/interpreter/llvm/src/include/llvm/MC/MCContext.h index 9bea196313036..2c60014adf239 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCContext.h +++ b/interpreter/llvm/src/include/llvm/MC/MCContext.h @@ -17,12 +17,12 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/interpreter/llvm/src/include/llvm/MC/MCDisassembler/MCDisassembler.h b/interpreter/llvm/src/include/llvm/MC/MCDisassembler/MCDisassembler.h index 5e626f1869861..7f09c05ccf2ab 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/interpreter/llvm/src/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -68,6 +68,7 @@ class MCDisassembler { /// an invalid instruction. /// \param Address - The address, in the memory space of region, of the first /// byte of the instruction. + /// \param Bytes - A reference to the actual bytes of the instruction. /// \param VStream - The stream to print warnings and diagnostic messages on. /// \param CStream - The stream to print comments and annotations on. /// \return - MCDisassembler::Success if the instruction is valid, diff --git a/interpreter/llvm/src/include/llvm/MC/MCELFObjectWriter.h b/interpreter/llvm/src/include/llvm/MC/MCELFObjectWriter.h index f22fc11f9b073..2efd37924e2ef 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCELFObjectWriter.h +++ b/interpreter/llvm/src/include/llvm/MC/MCELFObjectWriter.h @@ -11,8 +11,8 @@ #define LLVM_MC_MCELFOBJECTWRITER_H #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/interpreter/llvm/src/include/llvm/MC/MCFixup.h b/interpreter/llvm/src/include/llvm/MC/MCFixup.h index b493ca0b0ea7c..b83086c327f2e 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCFixup.h +++ b/interpreter/llvm/src/include/llvm/MC/MCFixup.h @@ -69,7 +69,7 @@ class MCFixup { /// an instruction or an assembler directive. const MCExpr *Value; - /// The byte index of start of the relocation inside the encoded instruction. + /// The byte index of start of the relocation inside the MCFragment. uint32_t Offset; /// The target dependent kind of fixup item this is. The kind is used to diff --git a/interpreter/llvm/src/include/llvm/MC/MCFragment.h b/interpreter/llvm/src/include/llvm/MC/MCFragment.h index 0ca530c451028..284ca50e19d5b 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCFragment.h +++ b/interpreter/llvm/src/include/llvm/MC/MCFragment.h @@ -11,10 +11,10 @@ #define LLVM_MC_MCFRAGMENT_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/SMLoc.h" @@ -130,7 +130,7 @@ class MCFragment : public ilist_node_with_parent { /// \brief Return true if given frgment has FT_Dummy type. bool isDummy() const { return Kind == FT_Dummy; } - void dump(); + void dump() const; }; class MCDummyFragment : public MCFragment { diff --git a/interpreter/llvm/src/include/llvm/MC/MCInstrDesc.h b/interpreter/llvm/src/include/llvm/MC/MCInstrDesc.h index 340d8253b8c99..9150a8b5c80a1 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCInstrDesc.h +++ b/interpreter/llvm/src/include/llvm/MC/MCInstrDesc.h @@ -209,6 +209,15 @@ class MCInstrDesc { /// well. unsigned getNumOperands() const { return NumOperands; } + using const_opInfo_iterator = const MCOperandInfo *; + + const_opInfo_iterator opInfo_begin() const { return OpInfo; } + const_opInfo_iterator opInfo_end() const { return OpInfo + NumOperands; } + + iterator_range operands() const { + return make_range(opInfo_begin(), opInfo_end()); + } + /// \brief Return the number of MachineOperands that are register /// definitions. Register definitions always occur at the start of the /// machine operand list. This is the number of "outs" in the .td file, diff --git a/interpreter/llvm/src/include/llvm/MC/MCMachObjectWriter.h b/interpreter/llvm/src/include/llvm/MC/MCMachObjectWriter.h index b93638f864084..42dc90da3049a 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCMachObjectWriter.h +++ b/interpreter/llvm/src/include/llvm/MC/MCMachObjectWriter.h @@ -12,11 +12,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/StringTableBuilder.h" -#include "llvm/Support/MachO.h" #include #include #include @@ -233,8 +233,7 @@ class MachObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void bindIndirectSymbols(MCAssembler &Asm); diff --git a/interpreter/llvm/src/include/llvm/MC/MCObjectFileInfo.h b/interpreter/llvm/src/include/llvm/MC/MCObjectFileInfo.h index 8b2a1261b2204..4d634447987b2 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCObjectFileInfo.h +++ b/interpreter/llvm/src/include/llvm/MC/MCObjectFileInfo.h @@ -109,6 +109,9 @@ class MCObjectFileInfo { MCSection *DwarfLineDWOSection; MCSection *DwarfLocDWOSection; MCSection *DwarfStrOffDWOSection; + + /// The DWARF v5 string offset and address table sections. + MCSection *DwarfStrOffSection; MCSection *DwarfAddrSection; // These are for Fission DWP files. @@ -260,6 +263,7 @@ class MCObjectFileInfo { MCSection *getDwarfLineDWOSection() const { return DwarfLineDWOSection; } MCSection *getDwarfLocDWOSection() const { return DwarfLocDWOSection; } MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; } + MCSection *getDwarfStrOffSection() const { return DwarfStrOffSection; } MCSection *getDwarfAddrSection() const { return DwarfAddrSection; } MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } diff --git a/interpreter/llvm/src/include/llvm/MC/MCObjectWriter.h b/interpreter/llvm/src/include/llvm/MC/MCObjectWriter.h index 86bcbb6861d7f..cd90690fb1863 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCObjectWriter.h +++ b/interpreter/llvm/src/include/llvm/MC/MCObjectWriter.h @@ -86,7 +86,7 @@ class MCObjectWriter { virtual void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) = 0; + uint64_t &FixedValue) = 0; /// Check whether the difference (A - B) between two symbol references is /// fully resolved. diff --git a/interpreter/llvm/src/include/llvm/MC/MCParser/MCAsmParser.h b/interpreter/llvm/src/include/llvm/MC/MCParser/MCAsmParser.h index 75d45f490bde4..3a659f048ccf6 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCParser/MCAsmParser.h +++ b/interpreter/llvm/src/include/llvm/MC/MCParser/MCAsmParser.h @@ -11,9 +11,9 @@ #define LLVM_MC_MCPARSER_MCASMPARSER_H #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" diff --git a/interpreter/llvm/src/include/llvm/MC/MCSection.h b/interpreter/llvm/src/include/llvm/MC/MCSection.h index 7bfffbcdb7c2e..2771b1e67eaba 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCSection.h +++ b/interpreter/llvm/src/include/llvm/MC/MCSection.h @@ -14,8 +14,8 @@ #ifndef LLVM_MC_MCSECTION_H #define LLVM_MC_MCSECTION_H -#include "llvm/ADT/ilist.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" #include @@ -167,7 +167,7 @@ class MCSection { MCSection::iterator getSubsectionInsertionPoint(unsigned Subsection); - void dump(); + void dump() const; virtual void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, diff --git a/interpreter/llvm/src/include/llvm/MC/MCSectionMachO.h b/interpreter/llvm/src/include/llvm/MC/MCSectionMachO.h index 3bc5408a4f75e..89db09cbdbdc1 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCSectionMachO.h +++ b/interpreter/llvm/src/include/llvm/MC/MCSectionMachO.h @@ -15,8 +15,8 @@ #define LLVM_MC_MCSECTIONMACHO_H #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCSection.h" -#include "llvm/Support/MachO.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/MC/MCSymbolWasm.h b/interpreter/llvm/src/include/llvm/MC/MCSymbolWasm.h index 4445be006eb0d..9bae6c582faa3 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCSymbolWasm.h +++ b/interpreter/llvm/src/include/llvm/MC/MCSymbolWasm.h @@ -9,16 +9,20 @@ #ifndef LLVM_MC_MCSYMBOLWASM_H #define LLVM_MC_MCSYMBOLWASM_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Wasm.h" namespace llvm { + class MCSymbolWasm : public MCSymbol { private: bool IsFunction = false; + bool IsWeak = false; std::string ModuleName; SmallVector Returns; SmallVector Params; + bool ParamsSet = false; + bool ReturnsSet = false; /// An expression describing how to calculate the size of a symbol. If a /// symbol has no size this field will be NULL. @@ -38,20 +42,32 @@ class MCSymbolWasm : public MCSymbol { bool isFunction() const { return IsFunction; } void setIsFunction(bool isFunc) { IsFunction = isFunc; } + bool isWeak() const { return IsWeak; } + void setWeak(bool isWeak) { IsWeak = isWeak; } + const StringRef getModuleName() const { return ModuleName; } - const SmallVector &getReturns() const { return Returns; } + const SmallVector &getReturns() const { + assert(ReturnsSet); + return Returns; + } void setReturns(SmallVectorImpl &&Rets) { + ReturnsSet = true; Returns = std::move(Rets); } - const SmallVector &getParams() const { return Params; } + const SmallVector &getParams() const { + assert(ParamsSet); + return Params; + } void setParams(SmallVectorImpl &&Pars) { + ParamsSet = true; Params = std::move(Pars); } }; -} -#endif +} // end namespace llvm + +#endif // LLVM_MC_MCSYMBOLWASM_H diff --git a/interpreter/llvm/src/include/llvm/MC/MCTargetOptions.h b/interpreter/llvm/src/include/llvm/MC/MCTargetOptions.h index ab027ab27a411..5509bb3bdc7c7 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCTargetOptions.h +++ b/interpreter/llvm/src/include/llvm/MC/MCTargetOptions.h @@ -23,6 +23,12 @@ enum class ExceptionHandling { WinEH, /// Windows Exception Handling }; +enum class DebugCompressionType { + None, /// No compression + GNU, /// zlib-gnu style compression + Z, /// zlib style complession +}; + class StringRef; class MCTargetOptions { diff --git a/interpreter/llvm/src/include/llvm/MC/MCValue.h b/interpreter/llvm/src/include/llvm/MC/MCValue.h index ead08fd90ca06..aa1eaf022c555 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCValue.h +++ b/interpreter/llvm/src/include/llvm/MC/MCValue.h @@ -42,7 +42,7 @@ class MCValue { int64_t Cst; uint32_t RefKind; public: - + MCValue() : SymA(nullptr), SymB(nullptr), Cst(0), RefKind(0) {} int64_t getConstant() const { return Cst; } const MCSymbolRefExpr *getSymA() const { return SymA; } const MCSymbolRefExpr *getSymB() const { return SymB; } diff --git a/interpreter/llvm/src/include/llvm/MC/MCWasmObjectWriter.h b/interpreter/llvm/src/include/llvm/MC/MCWasmObjectWriter.h index a4dd382706d78..bebc0a8258100 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCWasmObjectWriter.h +++ b/interpreter/llvm/src/include/llvm/MC/MCWasmObjectWriter.h @@ -11,44 +11,16 @@ #define LLVM_MC_MCWASMOBJECTWRITER_H #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCValue.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/raw_ostream.h" -#include namespace llvm { -class MCAssembler; -class MCContext; + class MCFixup; -class MCFragment; class MCObjectWriter; -class MCSectionWasm; -class MCSymbol; -class MCSymbolWasm; class MCValue; class raw_pwrite_stream; -// Information about a single relocation. -struct WasmRelocationEntry { - uint64_t Offset; // Where is the relocation. - const MCSymbolWasm *Symbol; // The symbol to relocate with. - int64_t Addend; // A value to add to the symbol. - unsigned Type; // The type of the relocation. - MCSectionWasm *FixupSection;// The section the relocation is targeting. - - WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, - int64_t Addend, unsigned Type, - MCSectionWasm *FixupSection) - : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), - FixupSection(FixupSection) {} - - void print(raw_ostream &Out) const { - Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend - << ", Type=" << Type << ", FixupSection=" << FixupSection; - } - void dump() const { print(errs()); } -}; - class MCWasmObjectTargetWriter { const unsigned Is64Bit : 1; @@ -56,16 +28,10 @@ class MCWasmObjectTargetWriter { explicit MCWasmObjectTargetWriter(bool Is64Bit_); public: - virtual ~MCWasmObjectTargetWriter() {} - - virtual unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const = 0; + virtual ~MCWasmObjectTargetWriter(); - virtual bool needsRelocateWithSymbol(const MCSymbol &Sym, - unsigned Type) const; - - virtual void sortRelocs(const MCAssembler &Asm, - std::vector &Relocs); + virtual unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup) const = 0; /// \name Accessors /// @{ @@ -80,6 +46,7 @@ class MCWasmObjectTargetWriter { /// \returns The constructed object writer. MCObjectWriter *createWasmObjectWriter(MCWasmObjectTargetWriter *MOTW, raw_pwrite_stream &OS); + } // End llvm namespace #endif diff --git a/interpreter/llvm/src/include/llvm/MC/MCWinCOFFObjectWriter.h b/interpreter/llvm/src/include/llvm/MC/MCWinCOFFObjectWriter.h index 57bed213aad47..198a08b5f5394 100644 --- a/interpreter/llvm/src/include/llvm/MC/MCWinCOFFObjectWriter.h +++ b/interpreter/llvm/src/include/llvm/MC/MCWinCOFFObjectWriter.h @@ -13,6 +13,7 @@ namespace llvm { class MCAsmBackend; +class MCContext; class MCFixup; class MCObjectWriter; class MCValue; @@ -30,8 +31,8 @@ class raw_pwrite_stream; virtual ~MCWinCOFFObjectTargetWriter() = default; unsigned getMachine() const { return Machine; } - virtual unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection, + virtual unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const = 0; virtual bool recordRelocation(const MCFixup &) const { return true; } }; diff --git a/interpreter/llvm/src/include/llvm/Object/Archive.h b/interpreter/llvm/src/include/llvm/Object/Archive.h index 807508107c56d..e56e8e464de3f 100644 --- a/interpreter/llvm/src/include/llvm/Object/Archive.h +++ b/interpreter/llvm/src/include/llvm/Object/Archive.h @@ -14,9 +14,9 @@ #ifndef LLVM_OBJECT_ARCHIVE_H #define LLVM_OBJECT_ARCHIVE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/Error.h" @@ -253,7 +253,7 @@ class Archive : public Binary { } // Cast methods. - static inline bool classof(Binary const *v) { + static bool classof(Binary const *v) { return v->isArchive(); } diff --git a/interpreter/llvm/src/include/llvm/Object/ArchiveWriter.h b/interpreter/llvm/src/include/llvm/Object/ArchiveWriter.h index 3e84a5814d796..1ed758d40df2e 100644 --- a/interpreter/llvm/src/include/llvm/Object/ArchiveWriter.h +++ b/interpreter/llvm/src/include/llvm/Object/ArchiveWriter.h @@ -22,6 +22,7 @@ namespace llvm { struct NewArchiveMember { std::unique_ptr Buf; + StringRef MemberName; sys::TimePoint ModTime; unsigned UID = 0, GID = 0, Perms = 0644; diff --git a/interpreter/llvm/src/include/llvm/Object/Binary.h b/interpreter/llvm/src/include/llvm/Object/Binary.h index f42048e48ee3a..3f5a233c1ee18 100644 --- a/interpreter/llvm/src/include/llvm/Object/Binary.h +++ b/interpreter/llvm/src/include/llvm/Object/Binary.h @@ -57,6 +57,8 @@ class Binary { ID_MachO64L, // MachO 64-bit, little endian ID_MachO64B, // MachO 64-bit, big endian + ID_WinRes, // Windows resource (.res) file. + ID_Wasm, ID_EndObjects @@ -93,9 +95,7 @@ class Binary { return TypeID > ID_StartObjects && TypeID < ID_EndObjects; } - bool isSymbolic() const { - return isIR() || isObject(); - } + bool isSymbolic() const { return isIR() || isObject() || isCOFFImportFile(); } bool isArchive() const { return TypeID == ID_Archive; @@ -132,6 +132,8 @@ class Binary { TypeID == ID_MachO32B || TypeID == ID_MachO64B); } + bool isWinRes() const { return TypeID == ID_WinRes; } + Triple::ObjectFormatType getTripleObjectFormat() const { if (isCOFF()) return Triple::COFF; diff --git a/interpreter/llvm/src/include/llvm/Object/COFF.h b/interpreter/llvm/src/include/llvm/Object/COFF.h index 8b9b497371705..89c1ba6be35f0 100644 --- a/interpreter/llvm/src/include/llvm/Object/COFF.h +++ b/interpreter/llvm/src/include/llvm/Object/COFF.h @@ -15,13 +15,13 @@ #define LLVM_OBJECT_COFF_H #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CVDebugRecord.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" @@ -562,8 +562,26 @@ struct coff_tls_directory { using coff_tls_directory32 = coff_tls_directory; using coff_tls_directory64 = coff_tls_directory; +/// Bits in control flow guard flags as we understand them. +enum class coff_guard_flags : uint32_t { + CFInstrumented = 0x00000100, + HasFidTable = 0x00000400, + ProtectDelayLoadIAT = 0x00001000, + DelayLoadIATSection = 0x00002000, // Delay load in separate section + HasLongJmpTable = 0x00010000, + FidTableHasFlags = 0x10000000, // Indicates that fid tables are 5 bytes +}; + +struct coff_load_config_code_integrity { + support::ulittle16_t Flags; + support::ulittle16_t Catalog; + support::ulittle32_t CatalogOffset; + support::ulittle32_t Reserved; +}; + +/// 32-bit load config (IMAGE_LOAD_CONFIG_DIRECTORY32) struct coff_load_configuration32 { - support::ulittle32_t Characteristics; + support::ulittle32_t Size; support::ulittle32_t TimeDateStamp; support::ulittle16_t MajorVersion; support::ulittle16_t MinorVersion; @@ -578,34 +596,81 @@ struct coff_load_configuration32 { support::ulittle32_t ProcessAffinityMask; support::ulittle32_t ProcessHeapFlags; support::ulittle16_t CSDVersion; - support::ulittle16_t Reserved; + support::ulittle16_t DependentLoadFlags; support::ulittle32_t EditList; support::ulittle32_t SecurityCookie; support::ulittle32_t SEHandlerTable; support::ulittle32_t SEHandlerCount; + + // Added in MSVC 2015 for /guard:cf. + support::ulittle32_t GuardCFCheckFunction; + support::ulittle32_t GuardCFCheckDispatch; + support::ulittle32_t GuardCFFunctionTable; + support::ulittle32_t GuardCFFunctionCount; + support::ulittle32_t GuardFlags; // coff_guard_flags + + // Added in MSVC 2017 + coff_load_config_code_integrity CodeIntegrity; + support::ulittle32_t GuardAddressTakenIatEntryTable; + support::ulittle32_t GuardAddressTakenIatEntryCount; + support::ulittle32_t GuardLongJumpTargetTable; + support::ulittle32_t GuardLongJumpTargetCount; + support::ulittle32_t DynamicValueRelocTable; + support::ulittle32_t CHPEMetadataPointer; + support::ulittle32_t GuardRFFailureRoutine; + support::ulittle32_t GuardRFFailureRoutineFunctionPointer; + support::ulittle32_t DynamicValueRelocTableOffset; + support::ulittle16_t DynamicValueRelocTableSection; + support::ulittle16_t Reserved2; + support::ulittle32_t GuardRFVerifyStackPointerFunctionPointer; + support::ulittle32_t HotPatchTableOffset; }; +/// 64-bit load config (IMAGE_LOAD_CONFIG_DIRECTORY64) struct coff_load_configuration64 { - support::ulittle32_t Characteristics; + support::ulittle32_t Size; support::ulittle32_t TimeDateStamp; support::ulittle16_t MajorVersion; support::ulittle16_t MinorVersion; support::ulittle32_t GlobalFlagsClear; support::ulittle32_t GlobalFlagsSet; support::ulittle32_t CriticalSectionDefaultTimeout; - support::ulittle32_t DeCommitFreeBlockThreshold; - support::ulittle32_t DeCommitTotalFreeThreshold; - support::ulittle32_t LockPrefixTable; - support::ulittle32_t MaximumAllocationSize; - support::ulittle32_t VirtualMemoryThreshold; - support::ulittle32_t ProcessAffinityMask; + support::ulittle64_t DeCommitFreeBlockThreshold; + support::ulittle64_t DeCommitTotalFreeThreshold; + support::ulittle64_t LockPrefixTable; + support::ulittle64_t MaximumAllocationSize; + support::ulittle64_t VirtualMemoryThreshold; + support::ulittle64_t ProcessAffinityMask; support::ulittle32_t ProcessHeapFlags; support::ulittle16_t CSDVersion; - support::ulittle16_t Reserved; - support::ulittle32_t EditList; + support::ulittle16_t DependentLoadFlags; + support::ulittle64_t EditList; support::ulittle64_t SecurityCookie; support::ulittle64_t SEHandlerTable; support::ulittle64_t SEHandlerCount; + + // Added in MSVC 2015 for /guard:cf. + support::ulittle64_t GuardCFCheckFunction; + support::ulittle64_t GuardCFCheckDispatch; + support::ulittle64_t GuardCFFunctionTable; + support::ulittle64_t GuardCFFunctionCount; + support::ulittle32_t GuardFlags; + + // Added in MSVC 2017 + coff_load_config_code_integrity CodeIntegrity; + support::ulittle64_t GuardAddressTakenIatEntryTable; + support::ulittle64_t GuardAddressTakenIatEntryCount; + support::ulittle64_t GuardLongJumpTargetTable; + support::ulittle64_t GuardLongJumpTargetCount; + support::ulittle64_t DynamicValueRelocTable; + support::ulittle64_t CHPEMetadataPointer; + support::ulittle64_t GuardRFFailureRoutine; + support::ulittle64_t GuardRFFailureRoutineFunctionPointer; + support::ulittle32_t DynamicValueRelocTableOffset; + support::ulittle16_t DynamicValueRelocTableSection; + support::ulittle16_t Reserved2; + support::ulittle64_t GuardRFVerifyStackPointerFunctionPointer; + support::ulittle32_t HotPatchTableOffset; }; struct coff_runtime_function_x64 { @@ -633,6 +698,9 @@ struct coff_resource_dir_entry { uint32_t getNameOffset() const { return maskTrailingOnes(31) & NameOffset; } + // Even though the PE/COFF spec doesn't mention this, the high bit of a name + // offset is set. + void setNameOffset(uint32_t Offset) { NameOffset = Offset | (1 << 31); } } Identifier; union { support::ulittle32_t DataEntryOffset; @@ -646,6 +714,13 @@ struct coff_resource_dir_entry { } Offset; }; +struct coff_resource_data_entry { + support::ulittle32_t DataRVA; + support::ulittle32_t DataSize; + support::ulittle32_t Codepage; + support::ulittle32_t Reserved; +}; + struct coff_resource_dir_table { support::ulittle32_t Characteristics; support::ulittle32_t TimeDateStamp; @@ -677,6 +752,8 @@ class COFFObjectFile : public ObjectFile { const coff_base_reloc_block_header *BaseRelocEnd; const debug_directory *DebugDirectoryBegin; const debug_directory *DebugDirectoryEnd; + // Either coff_load_configuration32 or coff_load_configuration64. + const void *LoadConfig; std::error_code getString(uint32_t offset, StringRef &Res) const; @@ -691,6 +768,7 @@ class COFFObjectFile : public ObjectFile { std::error_code initExportTablePtr(); std::error_code initBaseRelocPtr(); std::error_code initDebugDirectoryPtr(); + std::error_code initLoadConfigPtr(); public: uintptr_t getSymbolTable() const { @@ -768,6 +846,16 @@ class COFFObjectFile : public ObjectFile { return getRawNumberOfSymbols(); } + const coff_load_configuration32 *getLoadConfig32() const { + assert(!is64()); + return reinterpret_cast(LoadConfig); + } + + const coff_load_configuration64 *getLoadConfig64() const { + assert(is64()); + return reinterpret_cast(LoadConfig); + } + protected: void moveSymbolNext(DataRefImpl &Symb) const override; Expected getSymbolName(DataRefImpl Symb) const override; @@ -782,6 +870,7 @@ class COFFObjectFile : public ObjectFile { std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -940,7 +1029,7 @@ class COFFObjectFile : public ObjectFile { bool isRelocatableObject() const override; bool is64() const { return PE32PlusHeader; } - static inline bool classof(const Binary *v) { return v->isCOFF(); } + static bool classof(const Binary *v) { return v->isCOFF(); } }; // The iterator for the import directory table. diff --git a/interpreter/llvm/src/include/llvm/Object/COFFImportFile.h b/interpreter/llvm/src/include/llvm/Object/COFFImportFile.h index 78d9d679acd31..cf9c80a06f49a 100644 --- a/interpreter/llvm/src/include/llvm/Object/COFFImportFile.h +++ b/interpreter/llvm/src/include/llvm/Object/COFFImportFile.h @@ -9,13 +9,15 @@ // // COFF short import file is a special kind of file which contains // only symbol names for DLL-exported symbols. This class implements -// SymbolicFile interface for the file. +// exporting of Symbols to create libraries and a SymbolicFile +// interface for the file type. // //===----------------------------------------------------------------------===// #ifndef LLVM_OBJECT_COFF_IMPORT_FILE_H #define LLVM_OBJECT_COFF_IMPORT_FILE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/Object/COFF.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" @@ -31,7 +33,7 @@ class COFFImportFile : public SymbolicFile { COFFImportFile(MemoryBufferRef Source) : SymbolicFile(ID_COFFImportFile, Source) {} - static inline bool classof(Binary const *V) { return V->isCOFFImportFile(); } + static bool classof(Binary const *V) { return V->isCOFFImportFile(); } void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; } @@ -68,6 +70,38 @@ class COFFImportFile : public SymbolicFile { } }; +struct COFFShortExport { + std::string Name; + std::string ExtName; + std::string SymbolName; + + uint16_t Ordinal = 0; + bool Noname = false; + bool Data = false; + bool Private = false; + bool Constant = false; + + bool isWeak() { + return ExtName.size() && ExtName != Name; + } + + friend bool operator==(const COFFShortExport &L, const COFFShortExport &R) { + return L.Name == R.Name && L.ExtName == R.ExtName && + L.Ordinal == R.Ordinal && L.Noname == R.Noname && + L.Data == R.Data && L.Private == R.Private; + } + + friend bool operator!=(const COFFShortExport &L, const COFFShortExport &R) { + return !(L == R); + } +}; + +std::error_code writeImportLibrary(StringRef ImportName, + StringRef Path, + ArrayRef Exports, + COFF::MachineTypes Machine, + bool MakeWeakAliases); + } // namespace object } // namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Object/COFFModuleDefinition.h b/interpreter/llvm/src/include/llvm/Object/COFFModuleDefinition.h new file mode 100644 index 0000000000000..be139a2833b0b --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Object/COFFModuleDefinition.h @@ -0,0 +1,53 @@ +//===--- COFFModuleDefinition.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// Parsed results are directly written to Config global variable. +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_COFF_MODULE_DEFINITION_H +#define LLVM_OBJECT_COFF_MODULE_DEFINITION_H + +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" + +namespace llvm { +namespace object { + +struct COFFModuleDefinition { + std::vector Exports; + std::string OutputFile; + std::string ImportName; + uint64_t ImageBase = 0; + uint64_t StackReserve = 0; + uint64_t StackCommit = 0; + uint64_t HeapReserve = 0; + uint64_t HeapCommit = 0; + uint32_t MajorImageVersion = 0; + uint32_t MinorImageVersion = 0; + uint32_t MajorOSVersion = 0; + uint32_t MinorOSVersion = 0; +}; + +// mingw and wine def files do not mangle _ for x86 which +// is a consequence of legacy binutils' dlltool functionality. +// This MingwDef flag should be removed once mingw stops this pratice. +Expected +parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, + bool MingwDef = false); + +} // End namespace object. +} // End namespace llvm. + +#endif diff --git a/interpreter/llvm/src/include/llvm/Object/Decompressor.h b/interpreter/llvm/src/include/llvm/Object/Decompressor.h index a11857d546aae..c8e888d285e4a 100644 --- a/interpreter/llvm/src/include/llvm/Object/Decompressor.h +++ b/interpreter/llvm/src/include/llvm/Object/Decompressor.h @@ -10,8 +10,8 @@ #ifndef LLVM_OBJECT_DECOMPRESSOR_H #define LLVM_OBJECT_DECOMPRESSOR_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Object/ObjectFile.h" namespace llvm { @@ -30,7 +30,10 @@ class Decompressor { /// @brief Resize the buffer and uncompress section data into it. /// @param Out Destination buffer. - Error decompress(SmallString<32> &Out); + template Error resizeAndDecompress(T &Out) { + Out.resize(DecompressedSize); + return decompress({Out.data(), (size_t)DecompressedSize}); + } /// @brief Uncompress section data to raw buffer provided. /// @param Buffer Destination buffer. diff --git a/interpreter/llvm/src/include/llvm/Object/ELF.h b/interpreter/llvm/src/include/llvm/Object/ELF.h index 42fdfe3e5a744..670c0bbce3ac6 100644 --- a/interpreter/llvm/src/include/llvm/Object/ELF.h +++ b/interpreter/llvm/src/include/llvm/Object/ELF.h @@ -17,9 +17,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include @@ -235,10 +235,7 @@ ELFFile::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols, uint32_t Index = *IndexOrErr; if (Index == 0) return nullptr; - auto SectionsOrErr = sections(); - if (!SectionsOrErr) - return SectionsOrErr.takeError(); - return object::getSection(*SectionsOrErr, Index); + return getSection(Index); } template diff --git a/interpreter/llvm/src/include/llvm/Object/ELFObjectFile.h b/interpreter/llvm/src/include/llvm/Object/ELFObjectFile.h index d8b58b8079fa0..73011f6f9fe1a 100644 --- a/interpreter/llvm/src/include/llvm/Object/ELFObjectFile.h +++ b/interpreter/llvm/src/include/llvm/Object/ELFObjectFile.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELF.h" @@ -29,7 +30,6 @@ #include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -70,7 +70,7 @@ class ELFObjectFileBase : public ObjectFile { elf_symbol_iterator_range symbols() const; - static inline bool classof(const Binary *v) { return v->isELF(); } + static bool classof(const Binary *v) { return v->isELF(); } SubtargetFeatures getFeatures() const override; @@ -235,6 +235,7 @@ template class ELFObjectFile : public ELFObjectFileBase { std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -388,7 +389,7 @@ template class ELFObjectFile : public ELFObjectFileBase { const ELFFile *getELFFile() const { return &EF; } bool isDyldType() const { return isDyldELFObject; } - static inline bool classof(const Binary *v) { + static bool classof(const Binary *v) { return v->getType() == getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits); } @@ -645,6 +646,17 @@ uint64_t ELFObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec)->sh_addr; } +template +uint64_t ELFObjectFile::getSectionIndex(DataRefImpl Sec) const { + auto SectionsOrErr = EF.sections(); + handleAllErrors(std::move(SectionsOrErr.takeError()), + [](const ErrorInfoBase &) { + llvm_unreachable("unable to get section index"); + }); + const Elf_Shdr *First = SectionsOrErr->begin(); + return getSection(Sec) - First; +} + template uint64_t ELFObjectFile::getSectionSize(DataRefImpl Sec) const { return getSection(Sec)->sh_size; diff --git a/interpreter/llvm/src/include/llvm/Object/ELFTypes.h b/interpreter/llvm/src/include/llvm/Object/ELFTypes.h index 99346fe1a882c..808144694acb8 100644 --- a/interpreter/llvm/src/include/llvm/Object/ELFTypes.h +++ b/interpreter/llvm/src/include/llvm/Object/ELFTypes.h @@ -12,8 +12,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Error.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include diff --git a/interpreter/llvm/src/include/llvm/Object/IRObjectFile.h b/interpreter/llvm/src/include/llvm/Object/IRObjectFile.h index 0ea89011e8836..9a696bffd1f0b 100644 --- a/interpreter/llvm/src/include/llvm/Object/IRObjectFile.h +++ b/interpreter/llvm/src/include/llvm/Object/IRObjectFile.h @@ -15,10 +15,12 @@ #define LLVM_OBJECT_IROBJECTFILE_H #include "llvm/ADT/PointerUnion.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" namespace llvm { +class BitcodeModule; class Mangler; class Module; class GlobalValue; @@ -44,7 +46,7 @@ class IRObjectFile : public SymbolicFile { StringRef getTargetTriple() const; - static inline bool classof(const Binary *v) { + static bool classof(const Binary *v) { return v->isIR(); } @@ -61,7 +63,20 @@ class IRObjectFile : public SymbolicFile { static Expected> create(MemoryBufferRef Object, LLVMContext &Context); }; + +/// The contents of a bitcode file and its irsymtab. Any underlying data +/// for the irsymtab are owned by Symtab and Strtab. +struct IRSymtabFile { + std::vector Mods; + SmallVector Symtab, Strtab; + irsymtab::Reader TheReader; +}; + +/// Reads a bitcode file, creating its irsymtab if necessary. +Expected readIRSymtab(MemoryBufferRef MBRef); + } + } #endif diff --git a/interpreter/llvm/src/include/llvm/Object/IRSymtab.h b/interpreter/llvm/src/include/llvm/Object/IRSymtab.h index b425543bf637d..824a67a672faf 100644 --- a/interpreter/llvm/src/include/llvm/Object/IRSymtab.h +++ b/interpreter/llvm/src/include/llvm/Object/IRSymtab.h @@ -25,8 +25,8 @@ #define LLVM_OBJECT_IRSYMTAB_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Endian.h" @@ -36,6 +36,10 @@ #include namespace llvm { + +struct BitcodeFileContents; +class StringTableBuilder; + namespace irsymtab { namespace storage { @@ -120,6 +124,18 @@ struct Uncommon { }; struct Header { + /// Version number of the symtab format. This number should be incremented + /// when the format changes, but it does not need to be incremented if a + /// change to LLVM would cause it to create a different symbol table. + Word Version; + enum { kCurrentVersion = 0 }; + + /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). + /// Consumers should rebuild the symbol table from IR if the producer's + /// version does not match the consumer's version due to potential differences + /// in symbol table format, symbol enumeration order and so on. + Str Producer; + Range Modules; Range Comdats; Range Symbols; @@ -133,9 +149,10 @@ struct Header { } // end namespace storage -/// Fills in Symtab and Strtab with a valid symbol and string table for Mods. +/// Fills in Symtab and StrtabBuilder with a valid symbol and string table for +/// Mods. Error build(ArrayRef Mods, SmallVector &Symtab, - SmallVector &Strtab); + StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); /// This represents a symbol that has been read from a storage::Symbol and /// possibly a storage::Uncommon. @@ -238,6 +255,8 @@ class Reader { /// copied into an irsymtab::Symbol object. symbol_range symbols() const; + size_t getNumModules() const { return Modules.size(); } + /// Returns a slice of the symbol table for the I'th module in the file. /// The symbols enumerated by this method are ephemeral, but they can be /// copied into an irsymtab::Symbol object. @@ -314,6 +333,16 @@ inline Reader::symbol_range Reader::module_symbols(unsigned I) const { SymbolRef(MEnd, MEnd, nullptr, this)}; } +/// The contents of the irsymtab in a bitcode file. Any underlying data for the +/// irsymtab are owned by Symtab and Strtab. +struct FileContents { + SmallVector Symtab, Strtab; + Reader TheReader; +}; + +/// Reads the contents of a bitcode file, creating its irsymtab if necessary. +Expected readBitcode(const BitcodeFileContents &BFC); + } // end namespace irsymtab } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Object/MachO.h b/interpreter/llvm/src/include/llvm/Object/MachO.h index 29553558f72f5..2c3c89d10546f 100644 --- a/interpreter/llvm/src/include/llvm/Object/MachO.h +++ b/interpreter/llvm/src/include/llvm/Object/MachO.h @@ -16,19 +16,19 @@ #define LLVM_OBJECT_MACHO_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -290,6 +290,7 @@ class MachOObjectFile : public ObjectFile { std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -303,6 +304,12 @@ class MachOObjectFile : public ObjectFile { relocation_iterator section_rel_begin(DataRefImpl Sec) const override; relocation_iterator section_rel_end(DataRefImpl Sec) const override; + relocation_iterator extrel_begin() const; + relocation_iterator extrel_end() const; + iterator_range external_relocations() const { + return make_range(extrel_begin(), extrel_end()); + } + void moveRelocationNext(DataRefImpl &Rel) const override; uint64_t getRelocationOffset(DataRefImpl Rel) const override; symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override; @@ -549,6 +556,8 @@ class MachOObjectFile : public ObjectFile { bool isRelocatableObject() const override; + StringRef mapDebugSectionName(StringRef Name) const override; + bool hasPageZeroSegment() const { return HasPageZeroSegment; } static bool classof(const Binary *v) { diff --git a/interpreter/llvm/src/include/llvm/Object/MachOUniversal.h b/interpreter/llvm/src/include/llvm/Object/MachOUniversal.h index a14c4ca012237..72837d0970c4b 100644 --- a/interpreter/llvm/src/include/llvm/Object/MachOUniversal.h +++ b/interpreter/llvm/src/include/llvm/Object/MachOUniversal.h @@ -16,10 +16,10 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/MachO.h" -#include "llvm/Support/MachO.h" namespace llvm { class StringRef; @@ -154,7 +154,7 @@ class MachOUniversalBinary : public Binary { uint32_t getNumberOfObjects() const { return NumberOfObjects; } // Cast methods. - static inline bool classof(Binary const *V) { + static bool classof(Binary const *V) { return V->isMachOUniversalBinary(); } diff --git a/interpreter/llvm/src/include/llvm/Object/ObjectFile.h b/interpreter/llvm/src/include/llvm/Object/ObjectFile.h index 9a7bc618ffd0a..afcad3090703b 100644 --- a/interpreter/llvm/src/include/llvm/Object/ObjectFile.h +++ b/interpreter/llvm/src/include/llvm/Object/ObjectFile.h @@ -14,8 +14,9 @@ #ifndef LLVM_OBJECT_OBJECTFILE_H #define LLVM_OBJECT_OBJECTFILE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" @@ -95,6 +96,7 @@ class SectionRef { std::error_code getName(StringRef &Result) const; uint64_t getAddress() const; + uint64_t getIndex() const; uint64_t getSize() const; std::error_code getContents(StringRef &Result) const; @@ -222,6 +224,7 @@ class ObjectFile : public SymbolicFile { virtual std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const = 0; virtual uint64_t getSectionAddress(DataRefImpl Sec) const = 0; + virtual uint64_t getSectionIndex(DataRefImpl Sec) const = 0; virtual uint64_t getSectionSize(DataRefImpl Sec) const = 0; virtual std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const = 0; @@ -290,6 +293,9 @@ class ObjectFile : public SymbolicFile { return std::error_code(); } + /// Maps a debug section name to a standard DWARF section name. + virtual StringRef mapDebugSectionName(StringRef Name) const { return Name; } + /// True if this is a relocatable object (.o/.obj). virtual bool isRelocatableObject() const = 0; @@ -301,13 +307,13 @@ class ObjectFile : public SymbolicFile { createObjectFile(StringRef ObjectPath); static Expected> - createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type); + createObjectFile(MemoryBufferRef Object, llvm::file_magic Type); static Expected> createObjectFile(MemoryBufferRef Object) { - return createObjectFile(Object, sys::fs::file_magic::unknown); + return createObjectFile(Object, llvm::file_magic::unknown); } - static inline bool classof(const Binary *v) { + static bool classof(const Binary *v) { return v->isObject(); } @@ -393,6 +399,10 @@ inline uint64_t SectionRef::getAddress() const { return OwningObject->getSectionAddress(SectionPimpl); } +inline uint64_t SectionRef::getIndex() const { + return OwningObject->getSectionIndex(SectionPimpl); +} + inline uint64_t SectionRef::getSize() const { return OwningObject->getSectionSize(SectionPimpl); } diff --git a/interpreter/llvm/src/include/llvm/Object/RelocVisitor.h b/interpreter/llvm/src/include/llvm/Object/RelocVisitor.h index 73c7ce367cb0c..c358d39964352 100644 --- a/interpreter/llvm/src/include/llvm/Object/RelocVisitor.h +++ b/interpreter/llvm/src/include/llvm/Object/RelocVisitor.h @@ -17,33 +17,21 @@ #define LLVM_OBJECT_RELOCVISITOR_H #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MachO.h" #include #include namespace llvm { namespace object { -struct RelocToApply { - // The computed value after applying the relevant relocations. - int64_t Value = 0; - - // The width of the value; how many bytes to touch when applying the - // relocation. - char Width = 0; - - RelocToApply() = default; - RelocToApply(int64_t Value, char Width) : Value(Value), Width(Width) {} -}; - /// @brief Base class for object file relocation visitors. class RelocVisitor { public: @@ -52,16 +40,16 @@ class RelocVisitor { // TODO: Should handle multiple applied relocations via either passing in the // previously computed value or just count paired relocations as a single // visit. - RelocToApply visit(uint32_t RelocType, RelocationRef R, uint64_t Value = 0) { + uint64_t visit(uint32_t Rel, RelocationRef R, uint64_t Value = 0) { if (isa(ObjToVisit)) - return visitELF(RelocType, R, Value); + return visitELF(Rel, R, Value); if (isa(ObjToVisit)) - return visitCOFF(RelocType, R, Value); + return visitCOFF(Rel, R, Value); if (isa(ObjToVisit)) - return visitMachO(RelocType, R, Value); + return visitMachO(Rel, R, Value); HasError = true; - return RelocToApply(); + return 0; } bool error() { return HasError; } @@ -70,214 +58,60 @@ class RelocVisitor { const ObjectFile &ObjToVisit; bool HasError = false; - RelocToApply visitELF(uint32_t RelocType, RelocationRef R, uint64_t Value) { + uint64_t visitELF(uint32_t Rel, RelocationRef R, uint64_t Value) { if (ObjToVisit.getBytesInAddress() == 8) { // 64-bit object file switch (ObjToVisit.getArch()) { case Triple::x86_64: - switch (RelocType) { - case ELF::R_X86_64_NONE: - return visitELF_X86_64_NONE(R); - case ELF::R_X86_64_64: - return visitELF_X86_64_64(R, Value); - case ELF::R_X86_64_PC32: - return visitELF_X86_64_PC32(R, Value); - case ELF::R_X86_64_32: - return visitELF_X86_64_32(R, Value); - case ELF::R_X86_64_32S: - return visitELF_X86_64_32S(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitX86_64(Rel, R, Value); case Triple::aarch64: case Triple::aarch64_be: - switch (RelocType) { - case ELF::R_AARCH64_ABS32: - return visitELF_AARCH64_ABS32(R, Value); - case ELF::R_AARCH64_ABS64: - return visitELF_AARCH64_ABS64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitAarch64(Rel, R, Value); case Triple::bpfel: case Triple::bpfeb: - switch (RelocType) { - case ELF::R_BPF_64_64: - return visitELF_BPF_64_64(R, Value); - case ELF::R_BPF_64_32: - return visitELF_BPF_64_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitBpf(Rel, R, Value); case Triple::mips64el: case Triple::mips64: - switch (RelocType) { - case ELF::R_MIPS_32: - return visitELF_MIPS64_32(R, Value); - case ELF::R_MIPS_64: - return visitELF_MIPS64_64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitMips64(Rel, R, Value); case Triple::ppc64le: case Triple::ppc64: - switch (RelocType) { - case ELF::R_PPC64_ADDR32: - return visitELF_PPC64_ADDR32(R, Value); - case ELF::R_PPC64_ADDR64: - return visitELF_PPC64_ADDR64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitPPC64(Rel, R, Value); case Triple::systemz: - switch (RelocType) { - case ELF::R_390_32: - return visitELF_390_32(R, Value); - case ELF::R_390_64: - return visitELF_390_64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitSystemz(Rel, R, Value); case Triple::sparcv9: - switch (RelocType) { - case ELF::R_SPARC_32: - case ELF::R_SPARC_UA32: - return visitELF_SPARCV9_32(R, Value); - case ELF::R_SPARC_64: - case ELF::R_SPARC_UA64: - return visitELF_SPARCV9_64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitSparc64(Rel, R, Value); case Triple::amdgcn: - switch (RelocType) { - case ELF::R_AMDGPU_ABS32: - return visitELF_AMDGPU_ABS32(R, Value); - case ELF::R_AMDGPU_ABS64: - return visitELF_AMDGPU_ABS64(R, Value); - default: - HasError = true; - return RelocToApply(); - } - default: - HasError = true; - return RelocToApply(); - } - } else if (ObjToVisit.getBytesInAddress() == 4) { // 32-bit object file - switch (ObjToVisit.getArch()) { - case Triple::x86: - switch (RelocType) { - case ELF::R_386_NONE: - return visitELF_386_NONE(R); - case ELF::R_386_32: - return visitELF_386_32(R, Value); - case ELF::R_386_PC32: - return visitELF_386_PC32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::ppc: - switch (RelocType) { - case ELF::R_PPC_ADDR32: - return visitELF_PPC_ADDR32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::arm: - case Triple::armeb: - switch (RelocType) { - default: - HasError = true; - return RelocToApply(); - case ELF::R_ARM_ABS32: - return visitELF_ARM_ABS32(R, Value); - } - case Triple::lanai: - switch (RelocType) { - case ELF::R_LANAI_32: - return visitELF_Lanai_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::mipsel: - case Triple::mips: - switch (RelocType) { - case ELF::R_MIPS_32: - return visitELF_MIPS_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::sparc: - switch (RelocType) { - case ELF::R_SPARC_32: - case ELF::R_SPARC_UA32: - return visitELF_SPARC_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::hexagon: - switch (RelocType) { - case ELF::R_HEX_32: - return visitELF_HEX_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitAmdgpu(Rel, R, Value); default: HasError = true; - return RelocToApply(); + return 0; } - } else { - report_fatal_error("Invalid word size in object file"); } - } - RelocToApply visitCOFF(uint32_t RelocType, RelocationRef R, uint64_t Value) { - switch (ObjToVisit.getArch()) { - case Triple::x86: - switch (RelocType) { - case COFF::IMAGE_REL_I386_SECREL: - return visitCOFF_I386_SECREL(R, Value); - case COFF::IMAGE_REL_I386_DIR32: - return visitCOFF_I386_DIR32(R, Value); - } - break; - case Triple::x86_64: - switch (RelocType) { - case COFF::IMAGE_REL_AMD64_SECREL: - return visitCOFF_AMD64_SECREL(R, Value); - case COFF::IMAGE_REL_AMD64_ADDR64: - return visitCOFF_AMD64_ADDR64(R, Value); - } - break; - } - HasError = true; - return RelocToApply(); - } + // 32-bit object file + assert(ObjToVisit.getBytesInAddress() == 4 && + "Invalid word size in object file"); - RelocToApply visitMachO(uint32_t RelocType, RelocationRef R, uint64_t Value) { switch (ObjToVisit.getArch()) { - default: break; - case Triple::x86_64: - switch (RelocType) { - default: break; - case MachO::X86_64_RELOC_UNSIGNED: - return visitMACHO_X86_64_UNSIGNED(R, Value); - } + case Triple::x86: + return visitX86(Rel, R, Value); + case Triple::ppc: + return visitPPC32(Rel, R, Value); + case Triple::arm: + case Triple::armeb: + return visitARM(Rel, R, Value); + case Triple::lanai: + return visitLanai(Rel, R, Value); + case Triple::mipsel: + case Triple::mips: + return visitMips32(Rel, R, Value); + case Triple::sparc: + return visitSparc32(Rel, R, Value); + case Triple::hexagon: + return visitHexagon(Rel, R, Value); + default: + HasError = true; + return 0; } - HasError = true; - return RelocToApply(); } int64_t getELFAddend(RelocationRef R) { @@ -287,203 +121,193 @@ class RelocVisitor { return *AddendOrErr; } - uint8_t getLengthMachO64(RelocationRef R) { - const MachOObjectFile *Obj = cast(R.getObject()); - return Obj->getRelocationLength(R.getRawDataRefImpl()); - } - - /// Operations - - /// 386-ELF - RelocToApply visitELF_386_NONE(RelocationRef R) { - return RelocToApply(0, 0); - } - - // Ideally the Addend here will be the addend in the data for - // the relocation. It's not actually the case for Rel relocations. - RelocToApply visitELF_386_32(RelocationRef R, uint64_t Value) { - return RelocToApply(Value, 4); - } - - RelocToApply visitELF_386_PC32(RelocationRef R, uint64_t Value) { - uint64_t Address = R.getOffset(); - return RelocToApply(Value - Address, 4); - } - - /// X86-64 ELF - RelocToApply visitELF_X86_64_NONE(RelocationRef R) { - return RelocToApply(0, 0); - } - RelocToApply visitELF_X86_64_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); - } - RelocToApply visitELF_X86_64_PC32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint64_t Address = R.getOffset(); - return RelocToApply(Value + Addend - Address, 4); - } - RelocToApply visitELF_X86_64_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - RelocToApply visitELF_X86_64_32S(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// BPF ELF - RelocToApply visitELF_BPF_64_32(RelocationRef R, uint64_t Value) { - uint32_t Res = Value & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - RelocToApply visitELF_BPF_64_64(RelocationRef R, uint64_t Value) { - return RelocToApply(Value, 8); - } - - /// PPC64 ELF - RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - RelocToApply visitELF_PPC64_ADDR64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); - } - - /// PPC32 ELF - RelocToApply visitELF_PPC_ADDR32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// Lanai ELF - RelocToApply visitELF_Lanai_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// MIPS ELF - RelocToApply visitELF_MIPS_32(RelocationRef R, uint64_t Value) { - uint32_t Res = Value & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// MIPS64 ELF - RelocToApply visitELF_MIPS64_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - RelocToApply visitELF_MIPS64_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint64_t Res = (Value + Addend); - return RelocToApply(Res, 8); + uint64_t visitX86_64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_X86_64_NONE: + return 0; + case ELF::R_X86_64_64: + return Value + getELFAddend(R); + case ELF::R_X86_64_PC32: + return Value + getELFAddend(R) - R.getOffset(); + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + } + HasError = true; + return 0; } - // AArch64 ELF - RelocToApply visitELF_AARCH64_ABS32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int64_t Res = Value + Addend; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return RelocToApply(static_cast(Res), 4); + uint64_t visitAarch64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_AARCH64_ABS32: { + int64_t Res = Value + getELFAddend(R); + if (Res < INT32_MIN || Res > UINT32_MAX) + HasError = true; + return static_cast(Res); + } + case ELF::R_AARCH64_ABS64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_AARCH64_ABS64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitBpf(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_BPF_64_32: + return Value & 0xFFFFFFFF; + case ELF::R_BPF_64_64: + return Value; + } + HasError = true; + return 0; } - // SystemZ ELF - RelocToApply visitELF_390_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int64_t Res = Value + Addend; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return RelocToApply(static_cast(Res), 4); + uint64_t visitMips64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_MIPS_32: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + case ELF::R_MIPS_64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_390_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitPPC64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_PPC64_ADDR32: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + case ELF::R_PPC64_ADDR64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_SPARC_32(RelocationRef R, uint32_t Value) { - int32_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitSystemz(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_390_32: { + int64_t Res = Value + getELFAddend(R); + if (Res < INT32_MIN || Res > UINT32_MAX) + HasError = true; + return static_cast(Res); + } + case ELF::R_390_64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_SPARCV9_32(RelocationRef R, uint64_t Value) { - int32_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitSparc64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_SPARC_32: + case ELF::R_SPARC_64: + case ELF::R_SPARC_UA32: + case ELF::R_SPARC_UA64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_SPARCV9_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitAmdgpu(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_AMDGPU_ABS32: + case ELF::R_AMDGPU_ABS64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_ARM_ABS32(RelocationRef R, uint64_t Value) { - int64_t Res = Value; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return RelocToApply(static_cast(Res), 4); + uint64_t visitX86(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_386_NONE: + return 0; + case ELF::R_386_32: + return Value; + case ELF::R_386_PC32: + return Value - R.getOffset(); + } + HasError = true; + return 0; } - RelocToApply visitELF_HEX_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitPPC32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_PPC_ADDR32) + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + HasError = true; + return 0; } - RelocToApply visitELF_AMDGPU_ABS32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitARM(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_ARM_ABS32) { + if ((int64_t)Value < INT32_MIN || (int64_t)Value > UINT32_MAX) + HasError = true; + return static_cast(Value); + } + HasError = true; + return 0; } - RelocToApply visitELF_AMDGPU_ABS64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitLanai(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_LANAI_32) + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + HasError = true; + return 0; } - /// I386 COFF - RelocToApply visitCOFF_I386_SECREL(RelocationRef R, uint64_t Value) { - return RelocToApply(static_cast(Value), /*Width=*/4); + uint64_t visitMips32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_MIPS_32) + return Value & 0xFFFFFFFF; + HasError = true; + return 0; } - RelocToApply visitCOFF_I386_DIR32(RelocationRef R, uint64_t Value) { - return RelocToApply(static_cast(Value), /*Width=*/4); + uint64_t visitSparc32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32) + return Value + getELFAddend(R); + HasError = true; + return 0; } - /// AMD64 COFF - RelocToApply visitCOFF_AMD64_SECREL(RelocationRef R, uint64_t Value) { - return RelocToApply(static_cast(Value), /*Width=*/4); + uint64_t visitHexagon(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_HEX_32) + return Value + getELFAddend(R); + HasError = true; + return 0; } - RelocToApply visitCOFF_AMD64_ADDR64(RelocationRef R, uint64_t Value) { - return RelocToApply(Value, /*Width=*/8); + uint64_t visitCOFF(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (ObjToVisit.getArch()) { + case Triple::x86: + switch (Rel) { + case COFF::IMAGE_REL_I386_SECREL: + case COFF::IMAGE_REL_I386_DIR32: + return static_cast(Value); + } + break; + case Triple::x86_64: + switch (Rel) { + case COFF::IMAGE_REL_AMD64_SECREL: + return static_cast(Value); + case COFF::IMAGE_REL_AMD64_ADDR64: + return Value; + } + break; + } + HasError = true; + return 0; } - // X86_64 MachO - RelocToApply visitMACHO_X86_64_UNSIGNED(RelocationRef R, uint64_t Value) { - uint8_t Length = getLengthMachO64(R); - Length = 1<> - createSymbolicFile(MemoryBufferRef Object, sys::fs::file_magic Type, + createSymbolicFile(MemoryBufferRef Object, llvm::file_magic Type, LLVMContext *Context); static Expected> createSymbolicFile(MemoryBufferRef Object) { - return createSymbolicFile(Object, sys::fs::file_magic::unknown, nullptr); + return createSymbolicFile(Object, llvm::file_magic::unknown, nullptr); } static Expected> createSymbolicFile(StringRef ObjectPath); - static inline bool classof(const Binary *v) { + static bool classof(const Binary *v) { return v->isSymbolic(); } }; diff --git a/interpreter/llvm/src/include/llvm/Object/Wasm.h b/interpreter/llvm/src/include/llvm/Object/Wasm.h index d200d4a148e35..07ee4a4d6c4da 100644 --- a/interpreter/llvm/src/include/llvm/Object/Wasm.h +++ b/interpreter/llvm/src/include/llvm/Object/Wasm.h @@ -19,11 +19,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Wasm.h" #include #include #include @@ -48,11 +49,27 @@ class WasmSymbol { StringRef Name; SymbolType Type; uint32_t Section; + uint32_t Flags = 0; + + // Index into the imports, exports or functions array of the object depending + // on the type uint32_t ElementIndex; + + bool isWeak() const { + return Flags & wasm::WASM_SYMBOL_FLAG_WEAK; + } + + void print(raw_ostream &Out) const { + Out << "Name=" << Name << ", Type=" << static_cast(Type) + << ", Flags=" << Flags << " ElemIndex=" << ElementIndex; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { print(dbgs()); } +#endif }; -class WasmSection { -public: +struct WasmSection { WasmSection() = default; uint32_t Type = 0; // Section type (See below) @@ -62,7 +79,13 @@ class WasmSection { std::vector Relocations; // Relocations for this section }; +struct WasmSegment { + uint32_t SectionOffset; + wasm::WasmDataSegment Data; +}; + class WasmObjectFile : public ObjectFile { + public: WasmObjectFile(MemoryBufferRef Object, Error &Err); @@ -81,6 +104,7 @@ class WasmObjectFile : public ObjectFile { const std::vector& memories() const { return Memories; } const std::vector& globals() const { return Globals; } const std::vector& exports() const { return Exports; } + const wasm::WasmLinkingData& linkingData() const { return LinkingData; } uint32_t getNumberOfSymbols() const { return Symbols.size(); @@ -90,7 +114,7 @@ class WasmObjectFile : public ObjectFile { return ElemSegments; } - const std::vector& dataSegments() const { + const std::vector& dataSegments() const { return DataSegments; } @@ -119,6 +143,7 @@ class WasmObjectFile : public ObjectFile { std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -175,6 +200,7 @@ class WasmObjectFile : public ObjectFile { // Custom section types Error parseNameSection(const uint8_t *Ptr, const uint8_t *End); + Error parseLinkingSection(const uint8_t *Ptr, const uint8_t *End); Error parseRelocSection(StringRef Name, const uint8_t *Ptr, const uint8_t *End); @@ -188,14 +214,25 @@ class WasmObjectFile : public ObjectFile { std::vector Imports; std::vector Exports; std::vector ElemSegments; - std::vector DataSegments; - std::vector Symbols; + std::vector DataSegments; std::vector Functions; + std::vector Symbols; ArrayRef CodeSection; uint32_t StartFunction = -1; + bool HasLinkingSection = false; + wasm::WasmLinkingData LinkingData; + + StringMap SymbolMap; }; } // end namespace object + +inline raw_ostream &operator<<(raw_ostream &OS, + const object::WasmSymbol &Sym) { + Sym.print(OS); + return OS; +} + } // end namespace llvm #endif // LLVM_OBJECT_WASM_H diff --git a/interpreter/llvm/src/include/llvm/Object/WindowsResource.h b/interpreter/llvm/src/include/llvm/Object/WindowsResource.h new file mode 100644 index 0000000000000..3d32409fd4aca --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Object/WindowsResource.h @@ -0,0 +1,219 @@ +//===-- WindowsResource.h ---------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file declares the .res file class. .res files are intermediate +// products of the typical resource-compilation process on Windows. This +// process is as follows: +// +// .rc file(s) ---(rc.exe)---> .res file(s) ---(cvtres.exe)---> COFF file +// +// .rc files are human-readable scripts that list all resources a program uses. +// +// They are compiled into .res files, which are a list of the resources in +// binary form. +// +// Finally the data stored in the .res is compiled into a COFF file, where it +// is organized in a directory tree structure for optimized access by the +// program during runtime. +// +// Ref: msdn.microsoft.com/en-us/library/windows/desktop/ms648007(v=vs.85).aspx +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_INCLUDE_LLVM_OBJECT_RESFILE_H +#define LLVM_INCLUDE_LLVM_OBJECT_RESFILE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ScopedPrinter.h" + +#include + +namespace llvm { +namespace object { + +class WindowsResource; + +const size_t WIN_RES_MAGIC_SIZE = 16; +const size_t WIN_RES_NULL_ENTRY_SIZE = 16; +const uint32_t WIN_RES_HEADER_ALIGNMENT = 4; +const uint32_t WIN_RES_DATA_ALIGNMENT = 4; +const uint16_t WIN_RES_PURE_MOVEABLE = 0x0030; + +struct WinResHeaderPrefix { + support::ulittle32_t DataSize; + support::ulittle32_t HeaderSize; +}; + +// Type and Name may each either be an integer ID or a string. This struct is +// only used in the case where they are both IDs. +struct WinResIDs { + uint16_t TypeFlag; + support::ulittle16_t TypeID; + uint16_t NameFlag; + support::ulittle16_t NameID; + + void setType(uint16_t ID) { + TypeFlag = 0xffff; + TypeID = ID; + } + + void setName(uint16_t ID) { + NameFlag = 0xffff; + NameID = ID; + } +}; + +struct WinResHeaderSuffix { + support::ulittle32_t DataVersion; + support::ulittle16_t MemoryFlags; + support::ulittle16_t Language; + support::ulittle32_t Version; + support::ulittle32_t Characteristics; +}; + +class ResourceEntryRef { +public: + Error moveNext(bool &End); + bool checkTypeString() const { return IsStringType; } + ArrayRef getTypeString() const { return Type; } + uint16_t getTypeID() const { return TypeID; } + bool checkNameString() const { return IsStringName; } + ArrayRef getNameString() const { return Name; } + uint16_t getNameID() const { return NameID; } + uint16_t getLanguage() const { return Suffix->Language; } + uint16_t getMajorVersion() const { return Suffix->Version >> 16; } + uint16_t getMinorVersion() const { return Suffix->Version; } + uint32_t getCharacteristics() const { return Suffix->Characteristics; } + ArrayRef getData() const { return Data; } + +private: + friend class WindowsResource; + + ResourceEntryRef(BinaryStreamRef Ref, const WindowsResource *Owner, + Error &Err); + + Error loadNext(); + + BinaryStreamReader Reader; + bool IsStringType; + ArrayRef Type; + uint16_t TypeID; + bool IsStringName; + ArrayRef Name; + uint16_t NameID; + const WinResHeaderSuffix *Suffix = nullptr; + ArrayRef Data; + const WindowsResource *OwningRes = nullptr; +}; + +class WindowsResource : public Binary { +public: + Expected getHeadEntry(); + + static bool classof(const Binary *V) { return V->isWinRes(); } + + static Expected> + createWindowsResource(MemoryBufferRef Source); + +private: + friend class ResourceEntryRef; + + WindowsResource(MemoryBufferRef Source); + + BinaryByteStream BBS; +}; + +class WindowsResourceParser { +public: + class TreeNode; + WindowsResourceParser(); + Error parse(WindowsResource *WR); + void printTree(raw_ostream &OS) const; + const TreeNode &getTree() const { return Root; } + const ArrayRef> getData() const { return Data; } + const ArrayRef> getStringTable() const { + return StringTable; + } + + class TreeNode { + public: + template + using Children = std::map>; + + void print(ScopedPrinter &Writer, StringRef Name) const; + uint32_t getTreeSize() const; + uint32_t getStringIndex() const { return StringIndex; } + uint32_t getDataIndex() const { return DataIndex; } + uint16_t getMajorVersion() const { return MajorVersion; } + uint16_t getMinorVersion() const { return MinorVersion; } + uint32_t getCharacteristics() const { return Characteristics; } + bool checkIsDataNode() const { return IsDataNode; } + const Children &getIDChildren() const { return IDChildren; } + const Children &getStringChildren() const { + return StringChildren; + } + + private: + friend class WindowsResourceParser; + + static uint32_t StringCount; + static uint32_t DataCount; + + static std::unique_ptr createStringNode(); + static std::unique_ptr createIDNode(); + static std::unique_ptr createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics); + + explicit TreeNode(bool IsStringNode); + TreeNode(uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics); + + void addEntry(const ResourceEntryRef &Entry, bool &IsNewTypeString, + bool &IsNewNameString); + TreeNode &addTypeNode(const ResourceEntryRef &Entry, bool &IsNewTypeString); + TreeNode &addNameNode(const ResourceEntryRef &Entry, bool &IsNewNameString); + TreeNode &addLanguageNode(const ResourceEntryRef &Entry); + TreeNode &addChild(uint32_t ID, bool IsDataNode = false, + uint16_t MajorVersion = 0, uint16_t MinorVersion = 0, + uint32_t Characteristics = 0); + TreeNode &addChild(ArrayRef NameRef, bool &IsNewString); + + bool IsDataNode = false; + uint32_t StringIndex; + uint32_t DataIndex; + Children IDChildren; + Children StringChildren; + uint16_t MajorVersion = 0; + uint16_t MinorVersion = 0; + uint32_t Characteristics = 0; + }; + +private: + TreeNode Root; + std::vector> Data; + std::vector> StringTable; +}; + +Expected> +writeWindowsResourceCOFF(llvm::COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser); + +} // namespace object +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/COFFYAML.h b/interpreter/llvm/src/include/llvm/ObjectYAML/COFFYAML.h index 65ad1dde67f52..bbceefac3d947 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/COFFYAML.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/COFFYAML.h @@ -15,12 +15,18 @@ #define LLVM_OBJECTYAML_COFFYAML_H #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h" +#include "llvm/ObjectYAML/CodeViewYAMLTypes.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/COFF.h" +#include +#include namespace llvm { namespace COFF { + inline Characteristics operator|(Characteristics a, Characteristics b) { uint32_t Ret = static_cast(a) | static_cast(b); return static_cast(Ret); @@ -37,58 +43,67 @@ inline DLLCharacteristics operator|(DLLCharacteristics a, uint16_t Ret = static_cast(a) | static_cast(b); return static_cast(Ret); } -} + +} // end namespace COFF // The structure of the yaml files is not an exact 1:1 match to COFF. In order // to use yaml::IO, we use these structures which are closer to the source. namespace COFFYAML { - LLVM_YAML_STRONG_TYPEDEF(uint8_t, COMDATType) - LLVM_YAML_STRONG_TYPEDEF(uint32_t, WeakExternalCharacteristics) - LLVM_YAML_STRONG_TYPEDEF(uint8_t, AuxSymbolType) - - struct Relocation { - uint32_t VirtualAddress; - uint16_t Type; - StringRef SymbolName; - }; - - struct Section { - COFF::section Header; - unsigned Alignment = 0; - yaml::BinaryRef SectionData; - std::vector Relocations; - StringRef Name; - Section(); - }; - - struct Symbol { - COFF::symbol Header; - COFF::SymbolBaseType SimpleType = COFF::IMAGE_SYM_TYPE_NULL; - COFF::SymbolComplexType ComplexType = COFF::IMAGE_SYM_DTYPE_NULL; - Optional FunctionDefinition; - Optional bfAndefSymbol; - Optional WeakExternal; - StringRef File; - Optional SectionDefinition; - Optional CLRToken; - StringRef Name; - Symbol(); - }; - - struct PEHeader { - COFF::PE32Header Header; - Optional DataDirectories[COFF::NUM_DATA_DIRECTORIES]; - }; - - struct Object { - Optional OptionalHeader; - COFF::header Header; - std::vector
Sections; - std::vector Symbols; - Object(); - }; -} -} + +LLVM_YAML_STRONG_TYPEDEF(uint8_t, COMDATType) +LLVM_YAML_STRONG_TYPEDEF(uint32_t, WeakExternalCharacteristics) +LLVM_YAML_STRONG_TYPEDEF(uint8_t, AuxSymbolType) + +struct Relocation { + uint32_t VirtualAddress; + uint16_t Type; + StringRef SymbolName; +}; + +struct Section { + COFF::section Header; + unsigned Alignment = 0; + yaml::BinaryRef SectionData; + std::vector DebugS; + std::vector DebugT; + std::vector Relocations; + StringRef Name; + + Section(); +}; + +struct Symbol { + COFF::symbol Header; + COFF::SymbolBaseType SimpleType = COFF::IMAGE_SYM_TYPE_NULL; + COFF::SymbolComplexType ComplexType = COFF::IMAGE_SYM_DTYPE_NULL; + Optional FunctionDefinition; + Optional bfAndefSymbol; + Optional WeakExternal; + StringRef File; + Optional SectionDefinition; + Optional CLRToken; + StringRef Name; + + Symbol(); +}; + +struct PEHeader { + COFF::PE32Header Header; + Optional DataDirectories[COFF::NUM_DATA_DIRECTORIES]; +}; + +struct Object { + Optional OptionalHeader; + COFF::header Header; + std::vector
Sections; + std::vector Symbols; + + Object(); +}; + +} // end namespace COFFYAML + +} // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section) LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol) @@ -220,4 +235,4 @@ struct MappingTraits { } // end namespace yaml } // end namespace llvm -#endif +#endif // LLVM_OBJECTYAML_COFFYAML_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h new file mode 100644 index 0000000000000..d620008e22d21 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h @@ -0,0 +1,140 @@ +//=- CodeViewYAMLDebugSections.h - CodeView YAMLIO debug sections -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H +#define LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include + +namespace llvm { + +namespace codeview { + +class StringsAndChecksums; +class StringsAndChecksumsRef; + +} // end namespace codeview + +namespace CodeViewYAML { + +namespace detail { + +struct YAMLSubsectionBase; + +} // end namespace detail + +struct YAMLFrameData { + uint32_t RvaStart; + uint32_t CodeSize; + uint32_t LocalSize; + uint32_t ParamsSize; + uint32_t MaxStackSize; + StringRef FrameFunc; + uint32_t PrologSize; + uint32_t SavedRegsSize; + uint32_t Flags; +}; + +struct YAMLCrossModuleImport { + StringRef ModuleName; + std::vector ImportIds; +}; + +struct SourceLineEntry { + uint32_t Offset; + uint32_t LineStart; + uint32_t EndDelta; + bool IsStatement; +}; + +struct SourceColumnEntry { + uint16_t StartColumn; + uint16_t EndColumn; +}; + +struct SourceLineBlock { + StringRef FileName; + std::vector Lines; + std::vector Columns; +}; + +struct HexFormattedString { + std::vector Bytes; +}; + +struct SourceFileChecksumEntry { + StringRef FileName; + codeview::FileChecksumKind Kind; + HexFormattedString ChecksumBytes; +}; + +struct SourceLineInfo { + uint32_t RelocOffset; + uint32_t RelocSegment; + codeview::LineFlags Flags; + uint32_t CodeSize; + std::vector Blocks; +}; + +struct InlineeSite { + uint32_t Inlinee; + StringRef FileName; + uint32_t SourceLineNum; + std::vector ExtraFiles; +}; + +struct InlineeInfo { + bool HasExtraFiles; + std::vector Sites; +}; + +struct YAMLDebugSubsection { + static Expected + fromCodeViewSubection(const codeview::StringsAndChecksumsRef &SC, + const codeview::DebugSubsectionRecord &SS); + + std::shared_ptr Subsection; +}; + +struct DebugSubsectionState {}; + +Expected>> +toCodeViewSubsectionList(BumpPtrAllocator &Allocator, + ArrayRef Subsections, + const codeview::StringsAndChecksums &SC); + +std::vector +fromDebugS(ArrayRef Data, const codeview::StringsAndChecksumsRef &SC); + +void initializeStringsAndChecksums(ArrayRef Sections, + codeview::StringsAndChecksums &SC); + +} // end namespace CodeViewYAML + +} // end namespace llvm + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::YAMLDebugSubsection) + +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::YAMLDebugSubsection) + +#endif // LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h b/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h new file mode 100644 index 0000000000000..791193c78f193 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h @@ -0,0 +1,49 @@ +//===- CodeViewYAMLSymbols.h - CodeView YAMLIO Symbol implementation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLSYMBOLS_H +#define LLVM_OBJECTYAML_CODEVIEWYAMLSYMBOLS_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/YAMLTraits.h" +#include + +namespace llvm { +namespace CodeViewYAML { + +namespace detail { + +struct SymbolRecordBase; + +} // end namespace detail + +struct SymbolRecord { + std::shared_ptr Symbol; + + codeview::CVSymbol + toCodeViewSymbol(BumpPtrAllocator &Allocator, + codeview::CodeViewContainer Container) const; + + static Expected fromCodeViewSymbol(codeview::CVSymbol Symbol); +}; + +} // end namespace CodeViewYAML +} // end namespace llvm + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SymbolRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::SymbolRecord) + +#endif // LLVM_OBJECTYAML_CODEVIEWYAMLSYMBOLS_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLTypes.h new file mode 100644 index 0000000000000..88a5668f0a14f --- /dev/null +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/CodeViewYAMLTypes.h @@ -0,0 +1,71 @@ +//==- CodeViewYAMLTypes.h - CodeView YAMLIO Type implementation --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H +#define LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include + +namespace llvm { + +namespace codeview { + +class TypeTableBuilder; + +} // end namespace codeview + +namespace CodeViewYAML { + +namespace detail { + +struct LeafRecordBase; +struct MemberRecordBase; + +} // end namespace detail + +struct MemberRecord { + std::shared_ptr Member; +}; + +struct LeafRecord { + std::shared_ptr Leaf; + + codeview::CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const; + codeview::CVType toCodeViewRecord(codeview::TypeTableBuilder &TS) const; + static Expected fromCodeViewRecord(codeview::CVType Type); +}; + +std::vector fromDebugT(ArrayRef DebugT); +ArrayRef toDebugT(ArrayRef, BumpPtrAllocator &Alloc); + +} // end namespace CodeViewYAML + +} // end namespace llvm + +LLVM_YAML_DECLARE_SCALAR_TRAITS(codeview::GUID, true) + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::LeafRecord) +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord) + +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::LeafRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::MemberRecord) + +#endif // LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFEmitter.h b/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFEmitter.h index ce231cc0ce685..0d7d8b4efbdf6 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFEmitter.h @@ -1,5 +1,4 @@ -//===--- DWARFEmitter.h - -------------------------------------------*- C++ -//-*-===// +//===--- DWARFEmitter.h - ---------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,6 +9,7 @@ /// \file /// \brief Common declarations for yaml2obj //===----------------------------------------------------------------------===// + #ifndef LLVM_OBJECTYAML_DWARFEMITTER_H #define LLVM_OBJECTYAML_DWARFEMITTER_H @@ -19,30 +19,31 @@ #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include -#include namespace llvm { + class raw_ostream; namespace DWARFYAML { + struct Data; struct PubSection; -void EmitDebugAbbrev(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI); -void EmitDebugStr(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI); +void EmitDebugAbbrev(raw_ostream &OS, const Data &DI); +void EmitDebugStr(raw_ostream &OS, const Data &DI); -void EmitDebugAranges(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI); -void EmitPubSection(llvm::raw_ostream &OS, - const llvm::DWARFYAML::PubSection &Sect, +void EmitDebugAranges(raw_ostream &OS, const Data &DI); +void EmitPubSection(raw_ostream &OS, const PubSection &Sect, bool IsLittleEndian); -void EmitDebugInfo(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI); -void EmitDebugLine(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI); +void EmitDebugInfo(raw_ostream &OS, const Data &DI); +void EmitDebugLine(raw_ostream &OS, const Data &DI); Expected>> EmitDebugSections(StringRef YAMLString, bool IsLittleEndian = sys::IsLittleEndianHost); -} // namespace DWARFYAML -} // namespace llvm +} // end namespace DWARFYAML + +} // end namespace llvm -#endif +#endif // LLVM_OBJECTYAML_DWARFEMITTER_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFYAML.h b/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFYAML.h index 3f39cfc7bb3d7..2162f0fef8520 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFYAML.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/DWARFYAML.h @@ -16,8 +16,11 @@ #ifndef LLVM_OBJECTYAML_DWARFYAML_H #define LLVM_OBJECTYAML_DWARFYAML_H -#include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include namespace llvm { namespace DWARFYAML { @@ -76,13 +79,11 @@ struct PubEntry { }; struct PubSection { - PubSection() : IsGNUStyle(false) {} - InitialLength Length; uint16_t Version; uint32_t UnitOffset; uint32_t UnitSize; - bool IsGNUStyle; + bool IsGNUStyle = false; std::vector Entries; }; @@ -158,12 +159,10 @@ struct Data { bool isEmpty() const; }; -} // namespace llvm::DWARFYAML -} // namespace llvm +} // end namespace DWARFYAML +} // end namespace llvm -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex64) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::StringRef) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex8) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AttributeAbbrev) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Abbrev) @@ -241,7 +240,7 @@ template <> struct MappingTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Tag &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -251,7 +250,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::LineNumberOps &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -261,7 +260,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::LineNumberExtendedOps &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -271,7 +270,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Attribute &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -281,7 +280,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Form &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -291,7 +290,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::UnitType &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -304,7 +303,7 @@ template <> struct ScalarEnumerationTraits { } }; -} // namespace llvm::yaml -} // namespace llvm +} // end namespace yaml +} // end namespace llvm -#endif +#endif // LLVM_OBJECTYAML_DWARFYAML_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/ELFYAML.h b/interpreter/llvm/src/include/llvm/ObjectYAML/ELFYAML.h index 81a4ec28c94fa..ed455311696ea 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/ELFYAML.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/ELFYAML.h @@ -16,8 +16,12 @@ #ifndef LLVM_OBJECTYAML_ELFYAML_H #define LLVM_OBJECTYAML_ELFYAML_H +#include "llvm/ADT/StringRef.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/ELF.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include namespace llvm { namespace ELFYAML { @@ -66,6 +70,7 @@ struct FileHeader { ELF_EF Flags; llvm::yaml::Hex64 Entry; }; + struct Symbol { StringRef Name; ELF_STT Type; @@ -74,6 +79,7 @@ struct Symbol { llvm::yaml::Hex64 Size; uint8_t Other; }; + struct LocalGlobalWeakSymbols { std::vector Local; std::vector Global; @@ -100,13 +106,16 @@ struct Section { StringRef Link; StringRef Info; llvm::yaml::Hex64 AddressAlign; + Section(SectionKind Kind) : Kind(Kind) {} virtual ~Section(); }; struct RawContentSection : Section { yaml::BinaryRef Content; llvm::yaml::Hex64 Size; + RawContentSection() : Section(SectionKind::RawContent) {} + static bool classof(const Section *S) { return S->Kind == SectionKind::RawContent; } @@ -114,7 +123,9 @@ struct RawContentSection : Section { struct NoBitsSection : Section { llvm::yaml::Hex64 Size; + NoBitsSection() : Section(SectionKind::NoBits) {} + static bool classof(const Section *S) { return S->Kind == SectionKind::NoBits; } @@ -124,7 +135,9 @@ struct Group : Section { // Members of a group contain a flag and a list of section indices // that are part of the group. std::vector Members; + Group() : Section(SectionKind::Group) {} + static bool classof(const Section *S) { return S->Kind == SectionKind::Group; } @@ -136,9 +149,12 @@ struct Relocation { ELF_REL Type; StringRef Symbol; }; + struct RelocationSection : Section { std::vector Relocations; + RelocationSection() : Section(SectionKind::Relocation) {} + static bool classof(const Section *S) { return S->Kind == SectionKind::Relocation; } @@ -157,7 +173,9 @@ struct MipsABIFlags : Section { MIPS_AFL_ASE ASEs; MIPS_AFL_FLAGS1 Flags1; llvm::yaml::Hex32 Flags2; + MipsABIFlags() : Section(SectionKind::MipsABIFlags) {} + static bool classof(const Section *S) { return S->Kind == SectionKind::MipsABIFlags; } @@ -316,4 +334,4 @@ template <> struct MappingTraits { } // end namespace yaml } // end namespace llvm -#endif +#endif // LLVM_OBJECTYAML_ELFYAML_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/MachOYAML.h b/interpreter/llvm/src/include/llvm/ObjectYAML/MachOYAML.h index ae858c8f4aafd..305497b6aa6a6 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/MachOYAML.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/MachOYAML.h @@ -16,9 +16,13 @@ #ifndef LLVM_OBJECTYAML_MACHOYAML_H #define LLVM_OBJECTYAML_MACHOYAML_H -#include "llvm/ObjectYAML/YAML.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/ObjectYAML/DWARFYAML.h" -#include "llvm/Support/MachO.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include namespace llvm { namespace MachOYAML { @@ -51,6 +55,7 @@ struct FileHeader { struct LoadCommand { virtual ~LoadCommand(); + llvm::MachO::macho_load_command Data; std::vector
Sections; std::vector Tools; @@ -66,6 +71,7 @@ struct NListEntry { uint16_t n_desc; uint64_t n_value; }; + struct RebaseOpcode { MachO::RebaseOpcode Opcode; uint8_t Imm; @@ -81,15 +87,12 @@ struct BindOpcode { }; struct ExportEntry { - ExportEntry() - : TerminalSize(0), NodeOffset(0), Name(), Flags(0), Address(0), Other(0), - ImportName(), Children() {} - uint64_t TerminalSize; - uint64_t NodeOffset; + uint64_t TerminalSize = 0; + uint64_t NodeOffset = 0; std::string Name; - llvm::yaml::Hex64 Flags; - llvm::yaml::Hex64 Address; - llvm::yaml::Hex64 Other; + llvm::yaml::Hex64 Flags = 0; + llvm::yaml::Hex64 Address = 0; + llvm::yaml::Hex64 Other = 0; std::string ImportName; std::vector Children; }; @@ -135,12 +138,11 @@ struct UniversalBinary { std::vector Slices; }; -} // namespace llvm::MachOYAML -} // namespace llvm +} // end namespace MachOYAML +} // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::LoadCommand) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::Section) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(int64_t) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::RebaseOpcode) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::BindOpcode) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::ExportEntry) @@ -150,6 +152,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::FatArch) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachO::build_tool_version) namespace llvm { + +class raw_ostream; + namespace yaml { template <> struct MappingTraits { @@ -209,7 +214,7 @@ template <> struct MappingTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, MachO::LoadCommandType &value) { -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" io.enumFallback(value); } }; @@ -251,22 +256,20 @@ template <> struct ScalarEnumerationTraits { }; // This trait is used for 16-byte chars in Mach structures used for strings -typedef char char_16[16]; +using char_16 = char[16]; template <> struct ScalarTraits { - static void output(const char_16 &Val, void *, llvm::raw_ostream &Out); - + static void output(const char_16 &Val, void *, raw_ostream &Out); static StringRef input(StringRef Scalar, void *, char_16 &Val); static bool mustQuote(StringRef S); }; // This trait is used for UUIDs. It reads and writes them matching otool's // formatting style. -typedef uint8_t uuid_t[16]; +using uuid_t = uint8_t[16]; template <> struct ScalarTraits { - static void output(const uuid_t &Val, void *, llvm::raw_ostream &Out); - + static void output(const uuid_t &Val, void *, raw_ostream &Out); static StringRef input(StringRef Scalar, void *, uuid_t &Val); static bool mustQuote(StringRef S); }; @@ -278,7 +281,7 @@ template <> struct ScalarTraits { static void mapping(IO &IO, MachO::LCStruct &LoadCommand); \ }; -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" // Extra structures used by load commands template <> struct MappingTraits { @@ -297,8 +300,8 @@ template <> struct MappingTraits { static void mapping(IO &IO, MachO::section_64 &LoadCommand); }; -} // namespace llvm::yaml +} // end namespace yaml -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_OBJECTYAML_MACHOYAML_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/ObjectYAML.h b/interpreter/llvm/src/include/llvm/ObjectYAML/ObjectYAML.h index 36d6ed5417cf5..00ce86430fca4 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/ObjectYAML.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/ObjectYAML.h @@ -15,10 +15,13 @@ #include "llvm/ObjectYAML/MachOYAML.h" #include "llvm/ObjectYAML/WasmYAML.h" #include "llvm/Support/YAMLTraits.h" +#include namespace llvm { namespace yaml { +class IO; + struct YamlObjectFile { std::unique_ptr Elf; std::unique_ptr Coff; @@ -31,7 +34,7 @@ template <> struct MappingTraits { static void mapping(IO &IO, YamlObjectFile &ObjectFile); }; -} // namespace yaml -} // namespace llvm +} // end namespace yaml +} // end namespace llvm -#endif +#endif // LLVM_OBJECTYAML_OBJECTYAML_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/WasmYAML.h b/interpreter/llvm/src/include/llvm/ObjectYAML/WasmYAML.h index 7b70c9537827a..709ad8ec3b776 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/WasmYAML.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/WasmYAML.h @@ -16,8 +16,13 @@ #ifndef LLVM_OBJECTYAML_WASMYAML_H #define LLVM_OBJECTYAML_WASMYAML_H +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/Wasm.h" +#include "llvm/Support/Casting.h" +#include +#include +#include namespace llvm { namespace WasmYAML { @@ -93,7 +98,8 @@ struct Relocation { }; struct DataSegment { - uint32_t Index; + uint32_t MemoryIndex; + uint32_t SectionOffset; wasm::WasmInitExpr Offset; yaml::BinaryRef Content; }; @@ -104,16 +110,19 @@ struct NameEntry { }; struct Signature { - Signature() : Form(wasm::WASM_TYPE_FUNC) {} - uint32_t Index; - SignatureForm Form; + SignatureForm Form = wasm::WASM_TYPE_FUNC; std::vector ParamTypes; ValueType ReturnType; }; +struct SymbolInfo { + StringRef Name; + uint32_t Flags; +}; + struct Section { - Section(SectionType SecType) : Type(SecType) {} + explicit Section(SectionType SecType) : Type(SecType) {} virtual ~Section(); SectionType Type; @@ -121,22 +130,44 @@ struct Section { }; struct CustomSection : Section { - CustomSection() : Section(wasm::WASM_SEC_CUSTOM) {} + explicit CustomSection(StringRef Name) + : Section(wasm::WASM_SEC_CUSTOM), Name(Name) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_CUSTOM; } StringRef Name; yaml::BinaryRef Payload; +}; + +struct NameSection : CustomSection { + NameSection() : CustomSection("name") {} + + static bool classof(const Section *S) { + auto C = dyn_cast(S); + return C && C->Name == "name"; + } - // The follow is used by the "name" custom section. - // TODO(sbc): Add support for more then just functions names. The wasm - // name section can support multiple sub-sections. std::vector FunctionNames; }; +struct LinkingSection : CustomSection { + LinkingSection() : CustomSection("linking") {} + + static bool classof(const Section *S) { + auto C = dyn_cast(S); + return C && C->Name == "linking"; + } + + std::vector SymbolInfos; + uint32_t DataSize; + uint32_t DataAlignment; +}; + struct TypeSection : Section { TypeSection() : Section(wasm::WASM_SEC_TYPE) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_TYPE; } @@ -146,6 +177,7 @@ struct TypeSection : Section { struct ImportSection : Section { ImportSection() : Section(wasm::WASM_SEC_IMPORT) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_IMPORT; } @@ -155,6 +187,7 @@ struct ImportSection : Section { struct FunctionSection : Section { FunctionSection() : Section(wasm::WASM_SEC_FUNCTION) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_FUNCTION; } @@ -164,6 +197,7 @@ struct FunctionSection : Section { struct TableSection : Section { TableSection() : Section(wasm::WASM_SEC_TABLE) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_TABLE; } @@ -173,6 +207,7 @@ struct TableSection : Section { struct MemorySection : Section { MemorySection() : Section(wasm::WASM_SEC_MEMORY) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_MEMORY; } @@ -182,6 +217,7 @@ struct MemorySection : Section { struct GlobalSection : Section { GlobalSection() : Section(wasm::WASM_SEC_GLOBAL) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_GLOBAL; } @@ -191,6 +227,7 @@ struct GlobalSection : Section { struct ExportSection : Section { ExportSection() : Section(wasm::WASM_SEC_EXPORT) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_EXPORT; } @@ -200,6 +237,7 @@ struct ExportSection : Section { struct StartSection : Section { StartSection() : Section(wasm::WASM_SEC_START) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_START; } @@ -209,6 +247,7 @@ struct StartSection : Section { struct ElemSection : Section { ElemSection() : Section(wasm::WASM_SEC_ELEM) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_ELEM; } @@ -218,6 +257,7 @@ struct ElemSection : Section { struct CodeSection : Section { CodeSection() : Section(wasm::WASM_SEC_CODE) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_CODE; } @@ -227,6 +267,7 @@ struct CodeSection : Section { struct DataSection : Section { DataSection() : Section(wasm::WASM_SEC_DATA) {} + static bool classof(const Section *S) { return S->Type == wasm::WASM_SEC_DATA; } @@ -256,7 +297,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Function) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::LocalDecl) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Relocation) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::NameEntry) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo) namespace llvm { namespace yaml { @@ -329,6 +370,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::ElemSegment &Segment); }; +template <> struct MappingTraits { + static void mapping(IO &IO, WasmYAML::SymbolInfo &Info); +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, WasmYAML::ValueType &Type); }; @@ -352,4 +397,4 @@ template <> struct ScalarEnumerationTraits { } // end namespace yaml } // end namespace llvm -#endif +#endif // LLVM_OBJECTYAML_WASMYAML_H diff --git a/interpreter/llvm/src/include/llvm/ObjectYAML/YAML.h b/interpreter/llvm/src/include/llvm/ObjectYAML/YAML.h index 7f6836809b6d9..29151a269df09 100644 --- a/interpreter/llvm/src/include/llvm/ObjectYAML/YAML.h +++ b/interpreter/llvm/src/include/llvm/ObjectYAML/YAML.h @@ -10,10 +10,17 @@ #ifndef LLVM_OBJECTYAML_YAML_H #define LLVM_OBJECTYAML_YAML_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/YAMLTraits.h" +#include namespace llvm { + +class raw_ostream; + namespace yaml { + /// \brief Specialized YAMLIO scalar type for representing a binary blob. /// /// A typical use case would be to represent the content of a section in a @@ -56,18 +63,20 @@ namespace yaml { /// \endcode class BinaryRef { friend bool operator==(const BinaryRef &LHS, const BinaryRef &RHS); + /// \brief Either raw binary data, or a string of hex bytes (must always /// be an even number of characters). ArrayRef Data; + /// \brief Discriminator between the two states of the `Data` member. - bool DataIsHexString; + bool DataIsHexString = true; public: + BinaryRef() = default; BinaryRef(ArrayRef Data) : Data(Data), DataIsHexString(false) {} BinaryRef(StringRef Data) - : Data(reinterpret_cast(Data.data()), Data.size()), - DataIsHexString(true) {} - BinaryRef() : DataIsHexString(true) {} + : Data(reinterpret_cast(Data.data()), Data.size()) {} + /// \brief The number of bytes that are represented by this BinaryRef. /// This is the number of bytes that writeAsBinary() will write. ArrayRef::size_type binary_size() const { @@ -75,9 +84,11 @@ class BinaryRef { return Data.size() / 2; return Data.size(); } + /// \brief Write the contents (regardless of whether it is binary or a /// hex string) as binary to the given raw_ostream. void writeAsBinary(raw_ostream &OS) const; + /// \brief Write the contents (regardless of whether it is binary or a /// hex string) as hex to the given raw_ostream. /// @@ -94,10 +105,13 @@ inline bool operator==(const BinaryRef &LHS, const BinaryRef &RHS) { } template <> struct ScalarTraits { - static void output(const BinaryRef &, void *, llvm::raw_ostream &); + static void output(const BinaryRef &, void *, raw_ostream &); static StringRef input(StringRef, void *, BinaryRef &); static bool mustQuote(StringRef S) { return needsQuotes(S); } }; -} -} -#endif + +} // end namespace yaml + +} // end namespace llvm + +#endif // LLVM_OBJECTYAML_YAML_H diff --git a/interpreter/llvm/src/include/llvm/Option/Arg.h b/interpreter/llvm/src/include/llvm/Option/Arg.h index 99d329693de2e..c519a4a824c51 100644 --- a/interpreter/llvm/src/include/llvm/Option/Arg.h +++ b/interpreter/llvm/src/include/llvm/Option/Arg.h @@ -1,4 +1,4 @@ -//===--- Arg.h - Parsed Argument Classes ------------------------*- C++ -*-===// +//===- Arg.h - Parsed Argument Classes --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,7 +21,11 @@ #include namespace llvm { + +class raw_ostream; + namespace opt { + class ArgList; /// \brief A concrete instance of a particular driver option. @@ -29,9 +33,6 @@ class ArgList; /// The Arg class encodes just enough information to be able to /// derive the argument values efficiently. class Arg { - Arg(const Arg &) = delete; - void operator=(const Arg &) = delete; - private: /// \brief The option this argument is an instance of. const Option Opt; @@ -65,6 +66,8 @@ class Arg { const char *Value0, const Arg *BaseArg = nullptr); Arg(const Option Opt, StringRef Spelling, unsigned Index, const char *Value0, const char *Value1, const Arg *BaseArg = nullptr); + Arg(const Arg &) = delete; + Arg &operator=(const Arg &) = delete; ~Arg(); const Option &getOption() const { return Opt; } @@ -89,6 +92,7 @@ class Arg { void claim() const { getBaseArg().Claimed = true; } unsigned getNumValues() const { return Values.size(); } + const char *getValue(unsigned N = 0) const { return Values[N]; } @@ -122,6 +126,7 @@ class Arg { }; } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_ARG_H diff --git a/interpreter/llvm/src/include/llvm/Option/ArgList.h b/interpreter/llvm/src/include/llvm/Option/ArgList.h index 4ed28d7a852b4..aaea68bf8e278 100644 --- a/interpreter/llvm/src/include/llvm/Option/ArgList.h +++ b/interpreter/llvm/src/include/llvm/Option/ArgList.h @@ -1,4 +1,4 @@ -//===--- ArgList.h - Argument List Management -------------------*- C++ -*-===// +//===- ArgList.h - Argument List Management ---------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,23 +10,31 @@ #ifndef LLVM_OPTION_ARGLIST_H #define LLVM_OPTION_ARGLIST_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/Option.h" +#include +#include +#include +#include #include #include #include +#include #include namespace llvm { + +class raw_ostream; + namespace opt { -class ArgList; -class Option; /// arg_iterator - Iterates through arguments stored inside an ArgList. template @@ -59,14 +67,14 @@ class arg_iterator { } } - typedef std::iterator_traits Traits; + using Traits = std::iterator_traits; public: - typedef typename Traits::value_type value_type; - typedef typename Traits::reference reference; - typedef typename Traits::pointer pointer; - typedef std::forward_iterator_tag iterator_category; - typedef std::ptrdiff_t difference_type; + using value_type = typename Traits::value_type; + using reference = typename Traits::reference; + using pointer = typename Traits::pointer; + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; arg_iterator( BaseIter Current, BaseIter End, @@ -111,12 +119,12 @@ class arg_iterator { /// and to iterate over groups of arguments. class ArgList { public: - typedef SmallVector arglist_type; - typedef arg_iterator iterator; - typedef arg_iterator const_iterator; - typedef arg_iterator reverse_iterator; - typedef arg_iterator - const_reverse_iterator; + using arglist_type = SmallVector; + using iterator = arg_iterator; + using const_iterator = arg_iterator; + using reverse_iterator = arg_iterator; + using const_reverse_iterator = + arg_iterator; template using filtered_iterator = arg_iterator; @@ -127,7 +135,7 @@ class ArgList { /// The internal list of arguments. arglist_type Args; - typedef std::pair OptRange; + using OptRange = std::pair; static OptRange emptyRange() { return {-1u, 0u}; } /// The first and last index of each different OptSpecifier ID. @@ -142,6 +150,7 @@ class ArgList { // derived objects, but can still be used by derived objects to implement // their own special members. ArgList() = default; + // Explicit move operations to ensure the container is cleared post-move // otherwise it could lead to a double-delete in the case of moving of an // InputArgList which deletes the contents of the container. If we could fix @@ -152,6 +161,7 @@ class ArgList { RHS.Args.clear(); RHS.OptRanges.clear(); } + ArgList &operator=(ArgList &&RHS) { Args = std::move(RHS.Args); RHS.Args.clear(); @@ -159,6 +169,7 @@ class ArgList { RHS.OptRanges.clear(); return *this; } + // Protect the dtor to ensure this type is never destroyed polymorphically. ~ArgList() = default; @@ -380,10 +391,12 @@ class InputArgList final : public ArgList { public: InputArgList(const char* const *ArgBegin, const char* const *ArgEnd); + InputArgList(InputArgList &&RHS) : ArgList(std::move(RHS)), ArgStrings(std::move(RHS.ArgStrings)), SynthesizedStrings(std::move(RHS.SynthesizedStrings)), NumInputArgStrings(RHS.NumInputArgStrings) {} + InputArgList &operator=(InputArgList &&RHS) { releaseMemory(); ArgList::operator=(std::move(RHS)); @@ -392,6 +405,7 @@ class InputArgList final : public ArgList { NumInputArgStrings = RHS.NumInputArgStrings; return *this; } + ~InputArgList() { releaseMemory(); } const char *getArgString(unsigned Index) const override { @@ -464,7 +478,6 @@ class DerivedArgList final : public ArgList { append(MakePositionalArg(BaseArg, Opt, Value)); } - /// AddSeparateArg - Construct a new Positional arg for the given option /// \p Id, with the provided \p Value and append it to the argument /// list. @@ -473,7 +486,6 @@ class DerivedArgList final : public ArgList { append(MakeSeparateArg(BaseArg, Opt, Value)); } - /// AddJoinedArg - Construct a new Positional arg for the given option /// \p Id, with the provided \p Value and append it to the argument list. void AddJoinedArg(const Arg *BaseArg, const Option Opt, @@ -481,7 +493,6 @@ class DerivedArgList final : public ArgList { append(MakeJoinedArg(BaseArg, Opt, Value)); } - /// MakeFlagArg - Construct a new FlagArg for the given option \p Id. Arg *MakeFlagArg(const Arg *BaseArg, const Option Opt) const; @@ -504,6 +515,7 @@ class DerivedArgList final : public ArgList { }; } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_ARGLIST_H diff --git a/interpreter/llvm/src/include/llvm/Option/OptParser.td b/interpreter/llvm/src/include/llvm/Option/OptParser.td index 4da86f09750db..481223698719b 100644 --- a/interpreter/llvm/src/include/llvm/Option/OptParser.td +++ b/interpreter/llvm/src/include/llvm/Option/OptParser.td @@ -92,6 +92,7 @@ class Option prefixes, string name, OptionKind kind> { int NumArgs = 0; string HelpText = ?; string MetaVarName = ?; + string Values = ?; list Flags = []; OptionGroup Group = ?; Option Alias = ?; @@ -126,6 +127,7 @@ class Flags flags> { list Flags = flags; } class Group { OptionGroup Group = group; } class HelpText { string HelpText = text; } class MetaVarName { string MetaVarName = name; } +class Values { string Values = value; } // Predefined options. diff --git a/interpreter/llvm/src/include/llvm/Option/OptSpecifier.h b/interpreter/llvm/src/include/llvm/Option/OptSpecifier.h index 0b2aaaec3afc8..84c3cf8ad534d 100644 --- a/interpreter/llvm/src/include/llvm/Option/OptSpecifier.h +++ b/interpreter/llvm/src/include/llvm/Option/OptSpecifier.h @@ -1,4 +1,4 @@ -//===--- OptSpecifier.h - Option Specifiers ---------------------*- C++ -*-===// +//===- OptSpecifier.h - Option Specifiers -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,32 +10,30 @@ #ifndef LLVM_OPTION_OPTSPECIFIER_H #define LLVM_OPTION_OPTSPECIFIER_H -#include "llvm/Support/Compiler.h" - namespace llvm { namespace opt { - class Option; - /// OptSpecifier - Wrapper class for abstracting references to option IDs. - class OptSpecifier { - unsigned ID; +class Option; + +/// OptSpecifier - Wrapper class for abstracting references to option IDs. +class OptSpecifier { + unsigned ID = 0; - private: - explicit OptSpecifier(bool) = delete; +public: + OptSpecifier() = default; + explicit OptSpecifier(bool) = delete; + /*implicit*/ OptSpecifier(unsigned ID) : ID(ID) {} + /*implicit*/ OptSpecifier(const Option *Opt); - public: - OptSpecifier() : ID(0) {} - /*implicit*/ OptSpecifier(unsigned ID) : ID(ID) {} - /*implicit*/ OptSpecifier(const Option *Opt); + bool isValid() const { return ID != 0; } - bool isValid() const { return ID != 0; } + unsigned getID() const { return ID; } - unsigned getID() const { return ID; } + bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); } + bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); } +}; - bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); } - bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); } - }; -} -} +} // end namespace opt +} // end namespace llvm -#endif +#endif // LLVM_OPTION_OPTSPECIFIER_H diff --git a/interpreter/llvm/src/include/llvm/Option/OptTable.h b/interpreter/llvm/src/include/llvm/Option/OptTable.h index 390e52774fea5..a35e182f00e53 100644 --- a/interpreter/llvm/src/include/llvm/Option/OptTable.h +++ b/interpreter/llvm/src/include/llvm/Option/OptTable.h @@ -1,4 +1,4 @@ -//===--- OptTable.h - Option Table ------------------------------*- C++ -*-===// +//===- OptTable.h - Option Table --------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,19 @@ #define LLVM_OPTION_OPTTABLE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Option/OptSpecifier.h" +#include +#include +#include namespace llvm { + class raw_ostream; + namespace opt { + class Arg; class ArgList; class InputArgList; @@ -46,6 +53,7 @@ class OptTable { unsigned short GroupID; unsigned short AliasID; const char *AliasArgs; + const char *Values; }; private: @@ -53,12 +61,12 @@ class OptTable { ArrayRef OptionInfos; bool IgnoreCase; - unsigned TheInputOptionID; - unsigned TheUnknownOptionID; + unsigned TheInputOptionID = 0; + unsigned TheUnknownOptionID = 0; /// The index of the first option which can be parsed (i.e., is not a /// special option like 'input' or 'unknown', and is not an option group). - unsigned FirstSearchableIndex; + unsigned FirstSearchableIndex = 0; /// The union of all option prefixes. If an argument does not begin with /// one of these, it is an input. @@ -113,6 +121,28 @@ class OptTable { return getInfo(id).MetaVar; } + /// Find possible value for given flags. This is used for shell + /// autocompletion. + /// + /// \param [in] Option - Key flag like "-stdlib=" when "-stdlib=l" + /// was passed to clang. + /// + /// \param [in] Arg - Value which we want to autocomplete like "l" + /// when "-stdlib=l" was passed to clang. + /// + /// \return The vector of possible values. + std::vector suggestValueCompletions(StringRef Option, + StringRef Arg) const; + + /// Find flags from OptTable which starts with Cur. + /// + /// \param [in] Cur - String prefix that all returned flags need + // to start with. + /// + /// \return The vector of flags which start with Cur. + std::vector findByPrefix(StringRef Cur, + unsigned short DisableFlags) const; + /// \brief Parse a single argument; returning the new argument and /// updating Index. /// @@ -168,7 +198,9 @@ class OptTable { void PrintHelp(raw_ostream &OS, const char *Name, const char *Title, bool ShowHidden = false) const; }; + } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_OPTTABLE_H diff --git a/interpreter/llvm/src/include/llvm/Option/Option.h b/interpreter/llvm/src/include/llvm/Option/Option.h index 139f281b3c4ce..d9aebd5b07570 100644 --- a/interpreter/llvm/src/include/llvm/Option/Option.h +++ b/interpreter/llvm/src/include/llvm/Option/Option.h @@ -1,4 +1,4 @@ -//===--- Option.h - Abstract Driver Options ---------------------*- C++ -*-===// +//===- Option.h - Abstract Driver Options -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,15 +12,23 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Option/OptSpecifier.h" #include "llvm/Option/OptTable.h" #include "llvm/Support/ErrorHandling.h" +#include +#include namespace llvm { + +class raw_ostream; + namespace opt { + class Arg; class ArgList; + /// ArgStringList - Type used for constructing argv lists for subprocesses. -typedef SmallVector ArgStringList; +using ArgStringList = SmallVector; /// Base flags for all options. Custom flags may be added after. enum DriverFlag { @@ -49,6 +57,7 @@ class Option { UnknownClass, FlagClass, JoinedClass, + ValuesClass, SeparateClass, RemainingArgsClass, RemainingArgsJoinedClass, @@ -147,6 +156,7 @@ class Option { case CommaJoinedClass: return RenderCommaJoinedStyle; case FlagClass: + case ValuesClass: case SeparateClass: case MultiArgClass: case JoinedOrSeparateClass: @@ -202,6 +212,7 @@ class Option { }; } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_OPTION_H diff --git a/interpreter/llvm/src/include/llvm/Pass.h b/interpreter/llvm/src/include/llvm/Pass.h index e9c8ca3072c71..2dd6935cf01c6 100644 --- a/interpreter/llvm/src/include/llvm/Pass.h +++ b/interpreter/llvm/src/include/llvm/Pass.h @@ -384,7 +384,7 @@ extern bool isFunctionInPrintList(StringRef FunctionName); // Include support files that contain important APIs commonly used by Passes, // but that we want to separate out to make it easier to read the header files. // -#include "llvm/PassSupport.h" #include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" #endif diff --git a/interpreter/llvm/src/include/llvm/PassInfo.h b/interpreter/llvm/src/include/llvm/PassInfo.h index 21ade85b682fb..81dface3c9a05 100644 --- a/interpreter/llvm/src/include/llvm/PassInfo.h +++ b/interpreter/llvm/src/include/llvm/PassInfo.h @@ -32,7 +32,6 @@ class TargetMachine; class PassInfo { public: typedef Pass* (*NormalCtor_t)(); - typedef Pass *(*TargetMachineCtor_t)(TargetMachine *); private: StringRef PassName; // Nice name for Pass @@ -44,24 +43,20 @@ class PassInfo { std::vector ItfImpl; // Interfaces implemented by this pass NormalCtor_t NormalCtor; - TargetMachineCtor_t TargetMachineCtor; public: /// PassInfo ctor - Do not call this directly, this should only be invoked /// through RegisterPass. PassInfo(StringRef name, StringRef arg, const void *pi, NormalCtor_t normal, - bool isCFGOnly, bool is_analysis, - TargetMachineCtor_t machine = nullptr) + bool isCFGOnly, bool is_analysis) : PassName(name), PassArgument(arg), PassID(pi), IsCFGOnlyPass(isCFGOnly), - IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal), - TargetMachineCtor(machine) {} + IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal) {} /// PassInfo ctor - Do not call this directly, this should only be invoked /// through RegisterPass. This version is for use by analysis groups; it /// does not auto-register the pass. PassInfo(StringRef name, const void *pi) : PassName(name), PassArgument(""), PassID(pi), IsCFGOnlyPass(false), - IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr), - TargetMachineCtor(nullptr) {} + IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr) {} /// getPassName - Return the friendly name for the pass, never returns null /// @@ -101,16 +96,6 @@ class PassInfo { NormalCtor = Ctor; } - /// getTargetMachineCtor - Return a pointer to a function, that when called - /// with a TargetMachine, creates an instance of the pass and returns it. - /// This pointer may be null if there is no constructor with a TargetMachine - /// for the pass. - /// - TargetMachineCtor_t getTargetMachineCtor() const { return TargetMachineCtor; } - void setTargetMachineCtor(TargetMachineCtor_t Ctor) { - TargetMachineCtor = Ctor; - } - /// createPass() - Use this method to create an instance of this pass. Pass *createPass() const { assert((!isAnalysisGroup() || NormalCtor) && diff --git a/interpreter/llvm/src/include/llvm/PassSupport.h b/interpreter/llvm/src/include/llvm/PassSupport.h index 50e6b498fb462..602f45ac51787 100644 --- a/interpreter/llvm/src/include/llvm/PassSupport.h +++ b/interpreter/llvm/src/include/llvm/PassSupport.h @@ -31,8 +31,6 @@ namespace llvm { -class TargetMachine; - #define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \ static void *initialize##passName##PassOnce(PassRegistry &Registry) { \ PassInfo *PI = new PassInfo( \ @@ -78,10 +76,6 @@ class TargetMachine; template Pass *callDefaultCtor() { return new PassName(); } -template Pass *callTargetMachineCtor(TargetMachine *TM) { - return new PassName(TM); -} - //===--------------------------------------------------------------------------- /// RegisterPass template - This template class is used to notify the system /// that a Pass is available for use, and registers it into the internal diff --git a/interpreter/llvm/src/include/llvm/Passes/PassBuilder.h b/interpreter/llvm/src/include/llvm/Passes/PassBuilder.h index efa36d957fbd6..33433f6b4a104 100644 --- a/interpreter/llvm/src/include/llvm/Passes/PassBuilder.h +++ b/interpreter/llvm/src/include/llvm/Passes/PassBuilder.h @@ -31,8 +31,8 @@ class TargetMachine; struct PGOOptions { std::string ProfileGenFile = ""; std::string ProfileUseFile = ""; + std::string SampleProfileFile = ""; bool RunProfileGen = false; - bool SamplePGO = false; }; /// \brief This class provides access to building LLVM's passes. @@ -46,6 +46,19 @@ class PassBuilder { Optional PGOOpt; public: + /// \brief A struct to capture parsed pass pipeline names. + /// + /// A pipeline is defined as a series of names, each of which may in itself + /// recursively contain a nested pipeline. A name is either the name of a pass + /// (e.g. "instcombine") or the name of a pipeline type (e.g. "cgscc"). If the + /// name is the name of a pass, the InnerPipeline is empty, since passes + /// cannot contain inner pipelines. See parsePassPipeline() for a more + /// detailed description of the textual pipeline format. + struct PipelineElement { + StringRef Name; + std::vector InnerPipeline; + }; + /// \brief LLVM-provided high-level optimization levels. /// /// This enumerates the LLVM-provided high-level optimization levels. Each @@ -188,9 +201,52 @@ class PassBuilder { /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. + /// + /// \p PrepareForThinLTO indicates whether this is invoked in + /// PrepareForThinLTO phase. Special handling is needed for sample PGO to + /// ensure profile accurate in the backend profile annotation phase. FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool PrepareForThinLTO = false); + + /// Construct the core LLVM module canonicalization and simplification + /// pipeline. + /// + /// This pipeline focuses on canonicalizing and simplifying the entire module + /// of IR. Much like the function simplification pipeline above, it is + /// suitable to run repeatedly over the IR and is not expected to destroy + /// important information. It does, however, perform inlining and other + /// heuristic based simplifications that are not strictly reversible. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + /// + /// \p PrepareForThinLTO indicates whether this is invoked in + /// PrepareForThinLTO phase. Special handling is needed for sample PGO to + /// ensure profile accurate in the backend profile annotation phase. + ModulePassManager + buildModuleSimplificationPipeline(OptimizationLevel Level, + bool DebugLogging = false, + bool PrepareForThinLTO = false); + + /// Construct the core LLVM module optimization pipeline. + /// + /// This pipeline focuses on optimizing the execution speed of the IR. It + /// uses cost modeling and thresholds to balance code growth against runtime + /// improvements. It includes vectorization and other information destroying + /// transformations. It also cannot generally be run repeatedly on a module + /// without potentially seriously regressing either runtime performance of + /// the code or serious code size growth. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging = false); /// Build a per-module default optimization pipeline. /// @@ -206,6 +262,36 @@ class PassBuilder { ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool DebugLogging = false); + /// Build a pre-link, ThinLTO-targeting default optimization pipeline to + /// a pass manager. + /// + /// This adds the pre-link optimizations tuned to prepare a module for + /// a ThinLTO run. It works to minimize the IR which needs to be analyzed + /// without making irreversible decisions which could be made better during + /// the LTO run. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager + buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false); + + /// Build an ThinLTO default optimization pipeline to a pass manager. + /// + /// This provides a good default optimization pipeline for link-time + /// optimization and code generation. It is particularly tuned to fit well + /// when IR coming into the LTO phase was first run through \c + /// addPreLinkLTODefaultPipeline, and the two coordinate closely. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false); + /// Build a pre-link, LTO-targeting default optimization pipeline to a pass /// manager. /// @@ -239,7 +325,8 @@ class PassBuilder { /// registered. AAManager buildDefaultAAPipeline(); - /// \brief Parse a textual pass pipeline description into a \c ModulePassManager. + /// \brief Parse a textual pass pipeline description into a \c + /// ModulePassManager. /// /// The format of the textual pass pipeline description looks something like: /// @@ -249,8 +336,8 @@ class PassBuilder { /// are comma separated. As a special shortcut, if the very first pass is not /// a module pass (as a module pass manager is), this will automatically form /// the shortest stack of pass managers that allow inserting that first pass. - /// So, assuming function passes 'fpassN', CGSCC passes 'cgpassN', and loop passes - /// 'lpassN', all of these are valid: + /// So, assuming function passes 'fpassN', CGSCC passes 'cgpassN', and loop + /// passes 'lpassN', all of these are valid: /// /// fpass1,fpass2,fpass3 /// cgpass1,cgpass2,cgpass3 @@ -263,13 +350,28 @@ class PassBuilder { /// module(function(loop(lpass1,lpass2,lpass3))) /// /// This shortcut is especially useful for debugging and testing small pass - /// combinations. Note that these shortcuts don't introduce any other magic. If - /// the sequence of passes aren't all the exact same kind of pass, it will be - /// an error. You cannot mix different levels implicitly, you must explicitly - /// form a pass manager in which to nest passes. + /// combinations. Note that these shortcuts don't introduce any other magic. + /// If the sequence of passes aren't all the exact same kind of pass, it will + /// be an error. You cannot mix different levels implicitly, you must + /// explicitly form a pass manager in which to nest passes. bool parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText, bool VerifyEachPass = true, bool DebugLogging = false); + /// {{@ Parse a textual pass pipeline description into a specific PassManager + /// + /// Automatic deduction of an appropriate pass manager stack is not supported. + /// For example, to insert a loop pass 'lpass' into a FunctinoPassManager, + /// this is the valid pipeline text: + /// + /// function(lpass) + bool parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText, + bool VerifyEachPass = true, bool DebugLogging = false); + bool parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText, + bool VerifyEachPass = true, bool DebugLogging = false); + bool parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText, + bool VerifyEachPass = true, bool DebugLogging = false); + /// @}} + /// Parse a textual alias analysis pipeline into the provided AA manager. /// /// The format of the textual AA pipeline is a comma separated list of AA @@ -287,13 +389,139 @@ class PassBuilder { /// returns false. bool parseAAPipeline(AAManager &AA, StringRef PipelineText); -private: - /// A struct to capture parsed pass pipeline names. - struct PipelineElement { - StringRef Name; - std::vector InnerPipeline; - }; + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding passes that perform peephole + /// optimizations similar to the instruction combiner. These passes will be + /// inserted after each instance of the instruction combiner pass. + void registerPeepholeEPCallback( + const std::function &C) { + PeepholeEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding late loop canonicalization and + /// simplification passes. This is the last point in the loop optimization + /// pipeline before loop deletion. Each pass added + /// here must be an instance of LoopPass. + /// This is the place to add passes that can remove loops, such as target- + /// specific loop idiom recognition. + void registerLateLoopOptimizationsEPCallback( + const std::function &C) { + LateLoopOptimizationsEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding loop passes to the end of the loop + /// optimizer. + void registerLoopOptimizerEndEPCallback( + const std::function &C) { + LoopOptimizerEndEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding optimization passes after most of the + /// main optimizations, but before the last cleanup-ish optimizations. + void registerScalarOptimizerLateEPCallback( + const std::function &C) { + ScalarOptimizerLateEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding CallGraphSCC passes at the end of the + /// main CallGraphSCC passes and before any function simplification passes run + /// by CGPassManager. + void registerCGSCCOptimizerLateEPCallback( + const std::function &C) { + CGSCCOptimizerLateEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding optimization passes before the + /// vectorizer and other highly target specific optimization passes are + /// executed. + void registerVectorizerStartEPCallback( + const std::function &C) { + VectorizerStartEPCallbacks.push_back(C); + } + + /// \brief Register a callback for parsing an AliasAnalysis Name to populate + /// the given AAManager \p AA + void registerParseAACallback( + const std::function &C) { + AAParsingCallbacks.push_back(C); + } + + /// {{@ Register callbacks for analysis registration with this PassBuilder + /// instance. + /// Callees register their analyses with the given AnalysisManager objects. + void registerAnalysisRegistrationCallback( + const std::function &C) { + CGSCCAnalysisRegistrationCallbacks.push_back(C); + } + void registerAnalysisRegistrationCallback( + const std::function &C) { + FunctionAnalysisRegistrationCallbacks.push_back(C); + } + void registerAnalysisRegistrationCallback( + const std::function &C) { + LoopAnalysisRegistrationCallbacks.push_back(C); + } + void registerAnalysisRegistrationCallback( + const std::function &C) { + ModuleAnalysisRegistrationCallbacks.push_back(C); + } + /// @}} + + /// {{@ Register pipeline parsing callbacks with this pass builder instance. + /// Using these callbacks, callers can parse both a single pass name, as well + /// as entire sub-pipelines, and populate the PassManager instance + /// accordingly. + void registerPipelineParsingCallback( + const std::function)> &C) { + CGSCCPipelineParsingCallbacks.push_back(C); + } + void registerPipelineParsingCallback( + const std::function)> &C) { + FunctionPipelineParsingCallbacks.push_back(C); + } + void registerPipelineParsingCallback( + const std::function)> &C) { + LoopPipelineParsingCallbacks.push_back(C); + } + void registerPipelineParsingCallback( + const std::function)> &C) { + ModulePipelineParsingCallbacks.push_back(C); + } + /// @}} + + /// \brief Register a callback for a top-level pipeline entry. + /// + /// If the PassManager type is not given at the top level of the pipeline + /// text, this Callback should be used to determine the appropriate stack of + /// PassManagers and populate the passed ModulePassManager. + void registerParseTopLevelPipelineCallback( + const std::function, + bool VerifyEachPass, bool DebugLogging)> &C) { + TopLevelPipelineParsingCallbacks.push_back(C); + } +private: static Optional> parsePipelineText(StringRef Text); @@ -319,7 +547,106 @@ class PassBuilder { bool parseModulePassPipeline(ModulePassManager &MPM, ArrayRef Pipeline, bool VerifyEachPass, bool DebugLogging); + + void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, + OptimizationLevel Level, bool RunProfileGen, + std::string ProfileGenFile, + std::string ProfileUseFile); + + void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); + + // Extension Point callbacks + SmallVector, 2> + PeepholeEPCallbacks; + SmallVector, 2> + LateLoopOptimizationsEPCallbacks; + SmallVector, 2> + LoopOptimizerEndEPCallbacks; + SmallVector, 2> + ScalarOptimizerLateEPCallbacks; + SmallVector, 2> + CGSCCOptimizerLateEPCallbacks; + SmallVector, 2> + VectorizerStartEPCallbacks; + // Module callbacks + SmallVector, 2> + ModuleAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + ModulePipelineParsingCallbacks; + SmallVector, + bool VerifyEachPass, bool DebugLogging)>, + 2> + TopLevelPipelineParsingCallbacks; + // CGSCC callbacks + SmallVector, 2> + CGSCCAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + CGSCCPipelineParsingCallbacks; + // Function callbacks + SmallVector, 2> + FunctionAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + FunctionPipelineParsingCallbacks; + // Loop callbacks + SmallVector, 2> + LoopAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + LoopPipelineParsingCallbacks; + // AA callbacks + SmallVector, 2> + AAParsingCallbacks; }; + +/// This utility template takes care of adding require<> and invalidate<> +/// passes for an analysis to a given \c PassManager. It is intended to be used +/// during parsing of a pass pipeline when parsing a single PipelineName. +/// When registering a new function analysis FancyAnalysis with the pass +/// pipeline name "fancy-analysis", a matching ParsePipelineCallback could look +/// like this: +/// +/// static bool parseFunctionPipeline(StringRef Name, FunctionPassManager &FPM, +/// ArrayRef P) { +/// if (parseAnalysisUtilityPasses("fancy-analysis", Name, +/// FPM)) +/// return true; +/// return false; +/// } +template +bool parseAnalysisUtilityPasses( + StringRef AnalysisName, StringRef PipelineName, + PassManager &PM) { + if (!PipelineName.endswith(">")) + return false; + // See if this is an invalidate<> pass name + if (PipelineName.startswith("invalidate<")) { + PipelineName = PipelineName.substr(11, PipelineName.size() - 12); + if (PipelineName != AnalysisName) + return false; + PM.addPass(InvalidateAnalysisPass()); + return true; + } + + // See if this is a require<> pass name + if (PipelineName.startswith("require<")) { + PipelineName = PipelineName.substr(8, PipelineName.size() - 9); + if (PipelineName != AnalysisName) + return false; + PM.addPass(RequireAnalysisPass()); + return true; + } + + return false; +} } #endif diff --git a/interpreter/llvm/src/include/llvm/ProfileData/Coverage/CoverageMapping.h b/interpreter/llvm/src/include/llvm/ProfileData/Coverage/CoverageMapping.h index b9a9f53776984..fa9a87aed6806 100644 --- a/interpreter/llvm/src/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/interpreter/llvm/src/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -18,11 +18,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -168,13 +168,21 @@ class CounterExpressionBuilder { /// expression is added to the builder's collection of expressions. Counter get(const CounterExpression &E); + /// Represents a term in a counter expression tree. + struct Term { + unsigned CounterID; + int Factor; + + Term(unsigned CounterID, int Factor) + : CounterID(CounterID), Factor(Factor) {} + }; + /// \brief Gather the terms of the expression tree for processing. /// /// This collects each addition and subtraction referenced by the counter into /// a sequence that can be sorted and combined to build a simplified counter /// expression. - void extractTerms(Counter C, int Sign, - SmallVectorImpl> &Terms); + void extractTerms(Counter C, int Sign, SmallVectorImpl &Terms); /// \brief Simplifies the given expression tree /// by getting rid of algebraically redundant operations. @@ -411,9 +419,11 @@ class CoverageData { std::vector::const_iterator begin() const { return Segments.begin(); } + std::vector::const_iterator end() const { return Segments.end(); } + bool empty() const { return Segments.empty(); } /// \brief Expansions that can be further processed. @@ -430,6 +440,7 @@ class CoverageMapping { unsigned MismatchedFunctionCount = 0; CoverageMapping() = default; + /// \brief Add a function record corresponding to \p Record. Error loadFunctionRecord(const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader); @@ -439,21 +450,10 @@ class CoverageMapping { CoverageMapping &operator=(const CoverageMapping &) = delete; /// \brief Load the coverage mapping using the given readers. - static Expected> - load(CoverageMappingReader &CoverageReader, - IndexedInstrProfReader &ProfileReader); - static Expected> load(ArrayRef> CoverageReaders, IndexedInstrProfReader &ProfileReader); - /// \brief Load the coverage mapping from the given files. - static Expected> - load(StringRef ObjectFilename, StringRef ProfileFilename, - StringRef Arch = StringRef()) { - return load(ArrayRef(ObjectFilename), ProfileFilename, Arch); - } - static Expected> load(ArrayRef ObjectFilenames, StringRef ProfileFilename, StringRef Arch = StringRef()); @@ -607,13 +607,13 @@ enum CovMapVersion { }; template struct CovMapTraits { - typedef CovMapFunctionRecord CovMapFuncRecordType; - typedef uint64_t NameRefType; + using CovMapFuncRecordType = CovMapFunctionRecord; + using NameRefType = uint64_t; }; template struct CovMapTraits { - typedef CovMapFunctionRecordV1 CovMapFuncRecordType; - typedef IntPtrT NameRefType; + using CovMapFuncRecordType = CovMapFunctionRecordV1; + using NameRefType = IntPtrT; }; } // end namespace coverage @@ -622,6 +622,7 @@ template struct CovMapTraits { template<> struct DenseMapInfo { static inline coverage::CounterExpression getEmptyKey() { using namespace coverage; + return CounterExpression(CounterExpression::ExprKind::Subtract, Counter::getCounter(~0U), Counter::getCounter(~0U)); @@ -629,6 +630,7 @@ template<> struct DenseMapInfo { static inline coverage::CounterExpression getTombstoneKey() { using namespace coverage; + return CounterExpression(CounterExpression::ExprKind::Add, Counter::getCounter(~0U), Counter::getCounter(~0U)); diff --git a/interpreter/llvm/src/include/llvm/ProfileData/InstrProf.h b/interpreter/llvm/src/include/llvm/ProfileData/InstrProf.h index 1b07c33746e76..772187f70153c 100644 --- a/interpreter/llvm/src/include/llvm/ProfileData/InstrProf.h +++ b/interpreter/llvm/src/include/llvm/ProfileData/InstrProf.h @@ -212,12 +212,12 @@ StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, /// third field is the uncompressed strings; otherwise it is the /// compressed string. When the string compression is off, the /// second field will have value zero. -Error collectPGOFuncNameStrings(const std::vector &NameStrs, +Error collectPGOFuncNameStrings(ArrayRef NameStrs, bool doCompression, std::string &Result); /// Produce \c Result string with the same format described above. The input /// is vector of PGO function name variables that are referenced. -Error collectPGOFuncNameStrings(const std::vector &NameVars, +Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression = true); /// \c NameStrings is a string composed of one of more sub-strings encoded in @@ -249,9 +249,8 @@ void annotateValueSite(Module &M, Instruction &Inst, /// Same as the above interface but using an ArrayRef, as well as \p Sum. void annotateValueSite(Module &M, Instruction &Inst, - ArrayRef VDs, - uint64_t Sum, InstrProfValueKind ValueKind, - uint32_t MaxMDCount); + ArrayRef VDs, uint64_t Sum, + InstrProfValueKind ValueKind, uint32_t MaxMDCount); /// Extract the value profile data from \p Inst which is annotated with /// value profile meta data. Return false if there is no value data annotated, @@ -410,7 +409,7 @@ uint64_t ComputeHash(StringRef K); /// on how PGO name is formed. class InstrProfSymtab { public: - typedef std::vector> AddrHashMap; + using AddrHashMap = std::vector>; private: StringRef Data; @@ -450,11 +449,11 @@ class InstrProfSymtab { /// decls from module \c M. This interface is used by transformation /// passes such as indirect function call promotion. Variable \c InLTO /// indicates if this is called from LTO optimization passes. - void create(Module &M, bool InLTO = false); + Error create(Module &M, bool InLTO = false); /// Create InstrProfSymtab from a set of names iteratable from /// \p IterRange. This interface is used by IndexedProfReader. - template void create(const NameIterRange &IterRange); + template Error create(const NameIterRange &IterRange); // If the symtab is created by a series of calls to \c addFuncName, \c // finalizeSymtab needs to be called before looking up function names. @@ -464,11 +463,14 @@ class InstrProfSymtab { /// Update the symtab by adding \p FuncName to the table. This interface /// is used by the raw and text profile readers. - void addFuncName(StringRef FuncName) { + Error addFuncName(StringRef FuncName) { + if (FuncName.empty()) + return make_error(instrprof_error::malformed); auto Ins = NameTab.insert(FuncName); if (Ins.second) MD5NameMap.push_back(std::make_pair( IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + return Error::success(); } /// Map a function address to its name's MD5 hash. This interface @@ -511,11 +513,13 @@ Error InstrProfSymtab::create(StringRef NameStrings) { } template -void InstrProfSymtab::create(const NameIterRange &IterRange) { +Error InstrProfSymtab::create(const NameIterRange &IterRange) { for (auto Name : IterRange) - addFuncName(Name); + if (Error E = addFuncName(Name)) + return E; finalizeSymtab(); + return Error::success(); } void InstrProfSymtab::finalizeSymtab() { @@ -577,29 +581,43 @@ struct InstrProfValueSiteRecord { /// Merge data from another InstrProfValueSiteRecord /// Optionally scale merged counts by \p Weight. - void merge(SoftInstrProfErrors &SIPE, InstrProfValueSiteRecord &Input, - uint64_t Weight = 1); + void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, + function_ref Warn); /// Scale up value profile data counts. - void scale(SoftInstrProfErrors &SIPE, uint64_t Weight); + void scale(uint64_t Weight, function_ref Warn); }; /// Profiling information for a single function. struct InstrProfRecord { - StringRef Name; - uint64_t Hash; std::vector Counts; - SoftInstrProfErrors SIPE; InstrProfRecord() = default; - InstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) - : Name(Name), Hash(Hash), Counts(std::move(Counts)) {} + InstrProfRecord(std::vector Counts) : Counts(std::move(Counts)) {} + InstrProfRecord(InstrProfRecord &&) = default; + InstrProfRecord(const InstrProfRecord &RHS) + : Counts(RHS.Counts), + ValueData(RHS.ValueData + ? llvm::make_unique(*RHS.ValueData) + : nullptr) {} + InstrProfRecord &operator=(InstrProfRecord &&) = default; + InstrProfRecord &operator=(const InstrProfRecord &RHS) { + Counts = RHS.Counts; + if (!RHS.ValueData) { + ValueData = nullptr; + return *this; + } + if (!ValueData) + ValueData = llvm::make_unique(*RHS.ValueData); + else + *ValueData = *RHS.ValueData; + return *this; + } - typedef std::vector> ValueMapType; + using ValueMapType = std::vector>; /// Return the number of value profile kinds with non-zero number /// of profile sites. inline uint32_t getNumValueKinds() const; - /// Return the number of instrumented sites for ValueKind. inline uint32_t getNumValueSites(uint32_t ValueKind) const; @@ -634,20 +652,18 @@ struct InstrProfRecord { /// Merge the counts in \p Other into this one. /// Optionally scale merged counts by \p Weight. - void merge(InstrProfRecord &Other, uint64_t Weight = 1); + void merge(InstrProfRecord &Other, uint64_t Weight, + function_ref Warn); /// Scale up profile counts (including value profile data) by /// \p Weight. - void scale(uint64_t Weight); + void scale(uint64_t Weight, function_ref Warn); /// Sort value profile data (per site) by count. void sortValueData() { - for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { - std::vector &SiteRecords = - getValueSitesForKind(Kind); - for (auto &SR : SiteRecords) + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + for (auto &SR : getValueSitesForKind(Kind)) SR.sortByCount(); - } } /// Clear value data entries and edge counters. @@ -657,36 +673,51 @@ struct InstrProfRecord { } /// Clear value data entries - void clearValueData() { - for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - getValueSitesForKind(Kind).clear(); - } - - /// Get the error contained within the record's soft error counter. - Error takeError() { return SIPE.takeError(); } + void clearValueData() { ValueData = nullptr; } private: - std::vector IndirectCallSites; - std::vector MemOPSizes; - const std::vector & + struct ValueProfData { + std::vector IndirectCallSites; + std::vector MemOPSizes; + }; + std::unique_ptr ValueData; + MutableArrayRef + getValueSitesForKind(uint32_t ValueKind) { + // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever + // implemented in LLVM) to call the const overload of this function, then + // cast away the constness from the result. + auto AR = const_cast(this)->getValueSitesForKind( + ValueKind); + return makeMutableArrayRef( + const_cast(AR.data()), AR.size()); + } + ArrayRef getValueSitesForKind(uint32_t ValueKind) const { + if (!ValueData) + return None; switch (ValueKind) { case IPVK_IndirectCallTarget: - return IndirectCallSites; + return ValueData->IndirectCallSites; case IPVK_MemOPSize: - return MemOPSizes; + return ValueData->MemOPSizes; default: llvm_unreachable("Unknown value kind!"); } - return IndirectCallSites; } std::vector & - getValueSitesForKind(uint32_t ValueKind) { - return const_cast &>( - const_cast(this) - ->getValueSitesForKind(ValueKind)); + getOrCreateValueSitesForKind(uint32_t ValueKind) { + if (!ValueData) + ValueData = llvm::make_unique(); + switch (ValueKind) { + case IPVK_IndirectCallTarget: + return ValueData->IndirectCallSites; + case IPVK_MemOPSize: + return ValueData->MemOPSizes; + default: + llvm_unreachable("Unknown value kind!"); + } } // Map indirect call target name hash to name string. @@ -695,11 +726,23 @@ struct InstrProfRecord { // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. - void mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, - uint64_t Weight); + void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, + uint64_t Weight, + function_ref Warn); // Scale up value profile data count. - void scaleValueProfData(uint32_t ValueKind, uint64_t Weight); + void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, + function_ref Warn); +}; + +struct NamedInstrProfRecord : InstrProfRecord { + StringRef Name; + uint64_t Hash; + + NamedInstrProfRecord() = default; + NamedInstrProfRecord(StringRef Name, uint64_t Hash, + std::vector Counts) + : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -711,11 +754,8 @@ uint32_t InstrProfRecord::getNumValueKinds() const { uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { uint32_t N = 0; - const std::vector &SiteRecords = - getValueSitesForKind(ValueKind); - for (auto &SR : SiteRecords) { + for (auto &SR : getValueSitesForKind(ValueKind)) N += SR.ValueData.size(); - } return N; } @@ -760,9 +800,9 @@ uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[], } void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { - std::vector &ValueSites = - getValueSitesForKind(ValueKind); - ValueSites.reserve(NumValueSites); + if (!NumValueSites) + return; + getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites); } inline support::endianness getHostEndianness() { @@ -873,6 +913,11 @@ struct Summary { // The number of Cutoff Entries (Summary::Entry) following summary fields. uint64_t NumCutoffEntries; + Summary() = delete; + Summary(uint32_t Size) { memset(this, 0, Size); } + + void operator delete(void *ptr) { ::operator delete(ptr); } + static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) { return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) + NumSumFields * sizeof(uint64_t); @@ -911,11 +956,6 @@ struct Summary { ER.MinBlockCount = E.MinCount; ER.NumBlocks = E.NumCounts; } - - Summary(uint32_t Size) { memset(this, 0, Size); } - void operator delete(void *ptr) { ::operator delete(ptr); } - - Summary() = delete; }; inline std::unique_ptr allocSummary(uint32_t TotalSize) { @@ -967,7 +1007,7 @@ struct Header { } // end namespace RawInstrProf // Parse MemOP Size range option. -void getMemOPSizeRangeFromOption(std::string Str, int64_t &RangeStart, +void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/ProfileData/InstrProfReader.h b/interpreter/llvm/src/include/llvm/ProfileData/InstrProfReader.h index 1d85a7149afc8..424360e0f7655 100644 --- a/interpreter/llvm/src/include/llvm/ProfileData/InstrProfReader.h +++ b/interpreter/llvm/src/include/llvm/ProfileData/InstrProfReader.h @@ -40,9 +40,9 @@ class InstrProfReader; /// A file format agnostic iterator over profiling data. class InstrProfIterator : public std::iterator { + NamedInstrProfRecord> { InstrProfReader *Reader = nullptr; - InstrProfRecord Record; + value_type Record; void Increment(); @@ -53,12 +53,12 @@ class InstrProfIterator : public std::iterator() { return &Record; } + value_type &operator*() { return Record; } + value_type *operator->() { return &Record; } }; /// Base class and interface for reading profiling data of any known instrprof -/// format. Provides an iterator over InstrProfRecords. +/// format. Provides an iterator over NamedInstrProfRecords. class InstrProfReader { instrprof_error LastError = instrprof_error::success; @@ -70,7 +70,7 @@ class InstrProfReader { virtual Error readHeader() = 0; /// Read a single record. - virtual Error readNextRecord(InstrProfRecord &Record) = 0; + virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; /// Iterator over profile data. InstrProfIterator begin() { return InstrProfIterator(this); } @@ -92,6 +92,7 @@ class InstrProfReader { protected: std::unique_ptr Symtab; + /// Set the current error and return same. Error error(instrprof_error Err) { LastError = Err; @@ -160,7 +161,7 @@ class TextInstrProfReader : public InstrProfReader { Error readHeader() override; /// Read a single record. - Error readNextRecord(InstrProfRecord &Record) override; + Error readNextRecord(NamedInstrProfRecord &Record) override; InstrProfSymtab &getSymtab() override { assert(Symtab.get()); @@ -202,13 +203,13 @@ class RawInstrProfReader : public InstrProfReader { public: RawInstrProfReader(std::unique_ptr DataBuffer) - : DataBuffer(std::move(DataBuffer)) { } + : DataBuffer(std::move(DataBuffer)) {} RawInstrProfReader(const RawInstrProfReader &) = delete; RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; static bool hasFormat(const MemoryBuffer &DataBuffer); Error readHeader() override; - Error readNextRecord(InstrProfRecord &Record) override; + Error readNextRecord(NamedInstrProfRecord &Record) override; bool isIRLevelProfile() const override { return (Version & VARIANT_MASK_IR_PROF) != 0; @@ -242,8 +243,8 @@ class RawInstrProfReader : public InstrProfReader { return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); } - Error readName(InstrProfRecord &Record); - Error readFuncHash(InstrProfRecord &Record); + Error readName(NamedInstrProfRecord &Record); + Error readFuncHash(NamedInstrProfRecord &Record); Error readRawCounts(InstrProfRecord &Record); Error readValueProfilingData(InstrProfRecord &Record); bool atEnd() const { return Data == DataEnd; } @@ -268,8 +269,8 @@ class RawInstrProfReader : public InstrProfReader { } }; -typedef RawInstrProfReader RawInstrProfReader32; -typedef RawInstrProfReader RawInstrProfReader64; +using RawInstrProfReader32 = RawInstrProfReader; +using RawInstrProfReader64 = RawInstrProfReader; namespace IndexedInstrProf { @@ -280,7 +281,7 @@ enum class HashT : uint32_t; /// Trait for lookups into the on-disk hash table for the binary instrprof /// format. class InstrProfLookupTrait { - std::vector DataBuffer; + std::vector DataBuffer; IndexedInstrProf::HashT HashType; unsigned FormatVersion; // Endianness of the input value profile data. @@ -292,12 +293,12 @@ class InstrProfLookupTrait { InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) : HashType(HashType), FormatVersion(FormatVersion) {} - typedef ArrayRef data_type; + using data_type = ArrayRef; - typedef StringRef internal_key_type; - typedef StringRef external_key_type; - typedef uint64_t hash_value_type; - typedef uint64_t offset_type; + using internal_key_type = StringRef; + using external_key_type = StringRef; + using hash_value_type = uint64_t; + using offset_type = uint64_t; static bool EqualKey(StringRef A, StringRef B) { return A == B; } static StringRef GetInternalKey(StringRef K) { return K; } @@ -333,25 +334,24 @@ struct InstrProfReaderIndexBase { // Read all the profile records with the same key pointed to the current // iterator. - virtual Error getRecords(ArrayRef &Data) = 0; + virtual Error getRecords(ArrayRef &Data) = 0; // Read all the profile records with the key equal to FuncName virtual Error getRecords(StringRef FuncName, - ArrayRef &Data) = 0; + ArrayRef &Data) = 0; virtual void advanceToNextKey() = 0; virtual bool atEnd() const = 0; virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; virtual uint64_t getVersion() const = 0; virtual bool isIRLevelProfile() const = 0; - virtual void populateSymtab(InstrProfSymtab &) = 0; + virtual Error populateSymtab(InstrProfSymtab &) = 0; }; -typedef OnDiskIterableChainedHashTable - OnDiskHashTableImplV3; +using OnDiskHashTableImplV3 = + OnDiskIterableChainedHashTable; template class InstrProfReaderIndex : public InstrProfReaderIndexBase { - private: std::unique_ptr HashTable; typename HashTableImpl::data_iterator RecordIterator; @@ -364,9 +364,9 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { IndexedInstrProf::HashT HashType, uint64_t Version); ~InstrProfReaderIndex() override = default; - Error getRecords(ArrayRef &Data) override; + Error getRecords(ArrayRef &Data) override; Error getRecords(StringRef FuncName, - ArrayRef &Data) override; + ArrayRef &Data) override; void advanceToNextKey() override { RecordIterator++; } bool atEnd() const override { @@ -383,8 +383,8 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; } - void populateSymtab(InstrProfSymtab &Symtab) override { - Symtab.create(HashTable->keys()); + Error populateSymtab(InstrProfSymtab &Symtab) override { + return Symtab.create(HashTable->keys()); } }; @@ -419,10 +419,9 @@ class IndexedInstrProfReader : public InstrProfReader { /// Read the file header. Error readHeader() override; /// Read a single record. - Error readNextRecord(InstrProfRecord &Record) override; + Error readNextRecord(NamedInstrProfRecord &Record) override; - /// Return the pointer to InstrProfRecord associated with FuncName - /// and FuncHash + /// Return the NamedInstrProfRecord associated with FuncName and FuncHash Expected getInstrProfRecord(StringRef FuncName, uint64_t FuncHash); diff --git a/interpreter/llvm/src/include/llvm/ProfileData/InstrProfWriter.h b/interpreter/llvm/src/include/llvm/ProfileData/InstrProfWriter.h index 10742c0228ebe..8107ab386fe23 100644 --- a/interpreter/llvm/src/include/llvm/ProfileData/InstrProfWriter.h +++ b/interpreter/llvm/src/include/llvm/ProfileData/InstrProfWriter.h @@ -29,10 +29,11 @@ namespace llvm { /// Writer for instrumentation based profile data. class InstrProfRecordWriterTrait; class ProfOStream; +class raw_fd_ostream; class InstrProfWriter { public: - typedef SmallDenseMap ProfilingData; + using ProfilingData = SmallDenseMap; enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel }; private: @@ -49,19 +50,25 @@ class InstrProfWriter { /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. - Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1); + void addRecord(NamedInstrProfRecord &&I, uint64_t Weight, + function_ref Warn); + void addRecord(NamedInstrProfRecord &&I, function_ref Warn) { + addRecord(std::move(I), 1, Warn); + } /// Merge existing function counts from the given writer. - Error mergeRecordsFromWriter(InstrProfWriter &&IPW); + void mergeRecordsFromWriter(InstrProfWriter &&IPW, + function_ref Warn); /// Write the profile to \c OS void write(raw_fd_ostream &OS); /// Write the profile in text format to \c OS - void writeText(raw_fd_ostream &OS); + Error writeText(raw_fd_ostream &OS); /// Write \c Record in text format to \c OS - static void writeRecordInText(const InstrProfRecord &Record, + static void writeRecordInText(StringRef Name, uint64_t Hash, + const InstrProfRecord &Counters, InstrProfSymtab &Symtab, raw_fd_ostream &OS); /// Write the profile, returning the raw data. For testing. @@ -84,6 +91,8 @@ class InstrProfWriter { void setOutputSparse(bool Sparse); private: + void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I, + uint64_t Weight, function_ref Warn); bool shouldEncodeData(const ProfilingData &PD); void writeImpl(ProfOStream &OS); }; diff --git a/interpreter/llvm/src/include/llvm/ProfileData/ProfileCommon.h b/interpreter/llvm/src/include/llvm/ProfileData/ProfileCommon.h index 987e3160ccae2..51b065bcdb700 100644 --- a/interpreter/llvm/src/include/llvm/ProfileData/ProfileCommon.h +++ b/interpreter/llvm/src/include/llvm/ProfileData/ProfileCommon.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Error.h" #include #include @@ -27,8 +28,6 @@ namespace llvm { -struct InstrProfRecord; - namespace sampleprof { class FunctionSamples; diff --git a/interpreter/llvm/src/include/llvm/ProfileData/SampleProf.h b/interpreter/llvm/src/include/llvm/ProfileData/SampleProf.h index 7a705ca5416da..7fc258831be88 100644 --- a/interpreter/llvm/src/include/llvm/ProfileData/SampleProf.h +++ b/interpreter/llvm/src/include/llvm/ProfileData/SampleProf.h @@ -125,7 +125,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); /// will be a list of one or more functions. class SampleRecord { public: - typedef StringMap CallTargetMap; + using CallTargetMap = StringMap; SampleRecord() = default; @@ -182,10 +182,11 @@ class SampleRecord { raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); -typedef std::map BodySampleMap; class FunctionSamples; -typedef StringMap FunctionSamplesMap; -typedef std::map CallsiteSampleMap; + +using BodySampleMap = std::map; +using FunctionSamplesMap = StringMap; +using CallsiteSampleMap = std::map; /// Representation of the samples collected for a function. /// @@ -398,8 +399,8 @@ raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); /// order of LocationT. template class SampleSorter { public: - typedef std::pair SamplesWithLoc; - typedef SmallVector SamplesWithLocList; + using SamplesWithLoc = std::pair; + using SamplesWithLocList = SmallVector; SampleSorter(const std::map &Samples) { for (const auto &I : Samples) diff --git a/interpreter/llvm/src/include/llvm/ProfileData/SampleProfReader.h b/interpreter/llvm/src/include/llvm/ProfileData/SampleProfReader.h index 29e3aba3e0e76..9c1f357cbbd16 100644 --- a/interpreter/llvm/src/include/llvm/ProfileData/SampleProfReader.h +++ b/interpreter/llvm/src/include/llvm/ProfileData/SampleProfReader.h @@ -350,7 +350,7 @@ class SampleProfileReaderText : public SampleProfileReader { class SampleProfileReaderBinary : public SampleProfileReader { public: SampleProfileReaderBinary(std::unique_ptr B, LLVMContext &C) - : SampleProfileReader(std::move(B), C), Data(nullptr), End(nullptr) {} + : SampleProfileReader(std::move(B), C) {} /// \brief Read and validate the file header. std::error_code readHeader() override; @@ -388,10 +388,10 @@ class SampleProfileReaderBinary : public SampleProfileReader { std::error_code readProfile(FunctionSamples &FProfile); /// \brief Points to the current location in the buffer. - const uint8_t *Data; + const uint8_t *Data = nullptr; /// \brief Points to the end of the buffer. - const uint8_t *End; + const uint8_t *End = nullptr; /// Function name table. std::vector NameTable; @@ -403,7 +403,7 @@ class SampleProfileReaderBinary : public SampleProfileReader { std::error_code readSummary(); }; -typedef SmallVector InlineCallStack; +using InlineCallStack = SmallVector; // Supported histogram types in GCC. Currently, we only need support for // call target histograms. diff --git a/interpreter/llvm/src/include/llvm/Support/AArch64TargetParser.def b/interpreter/llvm/src/include/llvm/Support/AArch64TargetParser.def index 8eccebcd932a0..09f9602a24d94 100644 --- a/interpreter/llvm/src/include/llvm/Support/AArch64TargetParser.def +++ b/interpreter/llvm/src/include/llvm/Support/AArch64TargetParser.def @@ -43,8 +43,9 @@ AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") -AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") -AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") +AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME diff --git a/interpreter/llvm/src/include/llvm/Support/AMDGPUCodeObjectMetadata.h b/interpreter/llvm/src/include/llvm/Support/AMDGPUCodeObjectMetadata.h new file mode 100644 index 0000000000000..d274c5ee91842 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Support/AMDGPUCodeObjectMetadata.h @@ -0,0 +1,422 @@ +//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata definitions and in-memory +/// representations. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H +#define LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H + +#include +#include +#include +#include + +namespace llvm { +namespace AMDGPU { + +//===----------------------------------------------------------------------===// +// Code Object Metadata. +//===----------------------------------------------------------------------===// +namespace CodeObject { + +/// \brief Code object metadata major version. +constexpr uint32_t MetadataVersionMajor = 1; +/// \brief Code object metadata minor version. +constexpr uint32_t MetadataVersionMinor = 0; + +/// \brief Code object metadata beginning assembler directive. +constexpr char MetadataAssemblerDirectiveBegin[] = + ".amdgpu_code_object_metadata"; +/// \brief Code object metadata ending assembler directive. +constexpr char MetadataAssemblerDirectiveEnd[] = + ".end_amdgpu_code_object_metadata"; + +/// \brief Access qualifiers. +enum class AccessQualifier : uint8_t { + Default = 0, + ReadOnly = 1, + WriteOnly = 2, + ReadWrite = 3, + Unknown = 0xff +}; + +/// \brief Address space qualifiers. +enum class AddressSpaceQualifier : uint8_t { + Private = 0, + Global = 1, + Constant = 2, + Local = 3, + Generic = 4, + Region = 5, + Unknown = 0xff +}; + +/// \brief Value kinds. +enum class ValueKind : uint8_t { + ByValue = 0, + GlobalBuffer = 1, + DynamicSharedPointer = 2, + Sampler = 3, + Image = 4, + Pipe = 5, + Queue = 6, + HiddenGlobalOffsetX = 7, + HiddenGlobalOffsetY = 8, + HiddenGlobalOffsetZ = 9, + HiddenNone = 10, + HiddenPrintfBuffer = 11, + HiddenDefaultQueue = 12, + HiddenCompletionAction = 13, + Unknown = 0xff +}; + +/// \brief Value types. +enum class ValueType : uint8_t { + Struct = 0, + I8 = 1, + U8 = 2, + I16 = 3, + U16 = 4, + F16 = 5, + I32 = 6, + U32 = 7, + F32 = 8, + I64 = 9, + U64 = 10, + F64 = 11, + Unknown = 0xff +}; + +//===----------------------------------------------------------------------===// +// Kernel Metadata. +//===----------------------------------------------------------------------===// +namespace Kernel { + +//===----------------------------------------------------------------------===// +// Kernel Attributes Metadata. +//===----------------------------------------------------------------------===// +namespace Attrs { + +namespace Key { +/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize. +constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize"; +/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint. +constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint"; +/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint. +constexpr char VecTypeHint[] = "VecTypeHint"; +} // end namespace Key + +/// \brief In-memory representation of kernel attributes metadata. +struct Metadata final { + /// \brief 'reqd_work_group_size' attribute. Optional. + std::vector mReqdWorkGroupSize = std::vector(); + /// \brief 'work_group_size_hint' attribute. Optional. + std::vector mWorkGroupSizeHint = std::vector(); + /// \brief 'vec_type_hint' attribute. Optional. + std::string mVecTypeHint = std::string(); + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel attributes metadata is empty, false otherwise. + bool empty() const { + return mReqdWorkGroupSize.empty() && + mWorkGroupSizeHint.empty() && + mVecTypeHint.empty(); + } + + /// \returns True if kernel attributes metadata is not empty, false otherwise. + bool notEmpty() const { + return !empty(); + } +}; + +} // end namespace Attrs + +//===----------------------------------------------------------------------===// +// Kernel Argument Metadata. +//===----------------------------------------------------------------------===// +namespace Arg { + +namespace Key { +/// \brief Key for Kernel::Arg::Metadata::mSize. +constexpr char Size[] = "Size"; +/// \brief Key for Kernel::Arg::Metadata::mAlign. +constexpr char Align[] = "Align"; +/// \brief Key for Kernel::Arg::Metadata::mValueKind. +constexpr char ValueKind[] = "ValueKind"; +/// \brief Key for Kernel::Arg::Metadata::mValueType. +constexpr char ValueType[] = "ValueType"; +/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign. +constexpr char PointeeAlign[] = "PointeeAlign"; +/// \brief Key for Kernel::Arg::Metadata::mAccQual. +constexpr char AccQual[] = "AccQual"; +/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual. +constexpr char AddrSpaceQual[] = "AddrSpaceQual"; +/// \brief Key for Kernel::Arg::Metadata::mIsConst. +constexpr char IsConst[] = "IsConst"; +/// \brief Key for Kernel::Arg::Metadata::mIsPipe. +constexpr char IsPipe[] = "IsPipe"; +/// \brief Key for Kernel::Arg::Metadata::mIsRestrict. +constexpr char IsRestrict[] = "IsRestrict"; +/// \brief Key for Kernel::Arg::Metadata::mIsVolatile. +constexpr char IsVolatile[] = "IsVolatile"; +/// \brief Key for Kernel::Arg::Metadata::mName. +constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Arg::Metadata::mTypeName. +constexpr char TypeName[] = "TypeName"; +} // end namespace Key + +/// \brief In-memory representation of kernel argument metadata. +struct Metadata final { + /// \brief Size in bytes. Required. + uint32_t mSize = 0; + /// \brief Alignment in bytes. Required. + uint32_t mAlign = 0; + /// \brief Value kind. Required. + ValueKind mValueKind = ValueKind::Unknown; + /// \brief Value type. Required. + ValueType mValueType = ValueType::Unknown; + /// \brief Pointee alignment in bytes. Optional. + uint32_t mPointeeAlign = 0; + /// \brief Access qualifier. Optional. + AccessQualifier mAccQual = AccessQualifier::Unknown; + /// \brief Address space qualifier. Optional. + AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown; + /// \brief True if 'const' qualifier is specified. Optional. + bool mIsConst = false; + /// \brief True if 'pipe' qualifier is specified. Optional. + bool mIsPipe = false; + /// \brief True if 'restrict' qualifier is specified. Optional. + bool mIsRestrict = false; + /// \brief True if 'volatile' qualifier is specified. Optional. + bool mIsVolatile = false; + /// \brief Name. Optional. + std::string mName = std::string(); + /// \brief Type name. Optional. + std::string mTypeName = std::string(); + + /// \brief Default constructor. + Metadata() = default; +}; + +} // end namespace Arg + +//===----------------------------------------------------------------------===// +// Kernel Code Properties Metadata. +//===----------------------------------------------------------------------===// +namespace CodeProps { + +namespace Key { +/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize. +constexpr char KernargSegmentSize[] = "KernargSegmentSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize. +constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize. +constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs. +constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs. +constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign. +constexpr char KernargSegmentAlign[] = "KernargSegmentAlign"; +/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign. +constexpr char GroupSegmentAlign[] = "GroupSegmentAlign"; +/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign. +constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign"; +/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize. +constexpr char WavefrontSize[] = "WavefrontSize"; +} // end namespace Key + +/// \brief In-memory representation of kernel code properties metadata. +struct Metadata final { + /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory + /// holds the values of the arguments to the kernel. Optional. + uint64_t mKernargSegmentSize = 0; + /// \brief Size in bytes of the group segment memory required by a workgroup. + /// This value does not include any dynamically allocated group segment memory + /// that may be added when the kernel is dispatched. Optional. + uint32_t mWorkgroupGroupSegmentSize = 0; + /// \brief Size in bytes of the private segment memory required by a workitem. + /// Private segment memory includes arg, spill and private segments. Optional. + uint32_t mWorkitemPrivateSegmentSize = 0; + /// \brief Total number of SGPRs used by a wavefront. Optional. + uint16_t mWavefrontNumSGPRs = 0; + /// \brief Total number of VGPRs used by a workitem. Optional. + uint16_t mWorkitemNumVGPRs = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// kernarg memory segment. Expressed as a power of two. Optional. + uint8_t mKernargSegmentAlign = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// group memory segment. Expressed as a power of two. Optional. + uint8_t mGroupSegmentAlign = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// private memory segment. Expressed as a power of two. Optional. + uint8_t mPrivateSegmentAlign = 0; + /// \brief Wavefront size. Expressed as a power of two. Optional. + uint8_t mWavefrontSize = 0; + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel code properties metadata is empty, false + /// otherwise. + bool empty() const { + return !notEmpty(); + } + + /// \returns True if kernel code properties metadata is not empty, false + /// otherwise. + bool notEmpty() const { + return mKernargSegmentSize || mWorkgroupGroupSegmentSize || + mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs || + mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign || + mPrivateSegmentAlign || mWavefrontSize; + } +}; + +} // end namespace CodeProps + +//===----------------------------------------------------------------------===// +// Kernel Debug Properties Metadata. +//===----------------------------------------------------------------------===// +namespace DebugProps { + +namespace Key { +/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion. +constexpr char DebuggerABIVersion[] = "DebuggerABIVersion"; +/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs. +constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs"; +/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR. +constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR"; +/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR. +constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR"; +/// \brief Key for +/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR. +constexpr char WavefrontPrivateSegmentOffsetSGPR[] = + "WavefrontPrivateSegmentOffsetSGPR"; +} // end namespace Key + +/// \brief In-memory representation of kernel debug properties metadata. +struct Metadata final { + /// \brief Debugger ABI version. Optional. + std::vector mDebuggerABIVersion = std::vector(); + /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if + /// mDebuggerABIVersion is not set. Optional. + uint16_t mReservedNumVGPRs = 0; + /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if + /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional. + uint16_t mReservedFirstVGPR = uint16_t(-1); + /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used + /// for the entire kernel execution. Must be uint16_t(-1) if + /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional. + uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1); + /// \brief Fixed SGPR used to hold the wave scratch offset for the entire + /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set + /// or SGPR is not used or not known. Optional. + uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1); + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel debug properties metadata is empty, false + /// otherwise. + bool empty() const { + return !notEmpty(); + } + + /// \returns True if kernel debug properties metadata is not empty, false + /// otherwise. + bool notEmpty() const { + return !mDebuggerABIVersion.empty(); + } +}; + +} // end namespace DebugProps + +namespace Key { +/// \brief Key for Kernel::Metadata::mName. +constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Metadata::mLanguage. +constexpr char Language[] = "Language"; +/// \brief Key for Kernel::Metadata::mLanguageVersion. +constexpr char LanguageVersion[] = "LanguageVersion"; +/// \brief Key for Kernel::Metadata::mAttrs. +constexpr char Attrs[] = "Attrs"; +/// \brief Key for Kernel::Metadata::mArgs. +constexpr char Args[] = "Args"; +/// \brief Key for Kernel::Metadata::mCodeProps. +constexpr char CodeProps[] = "CodeProps"; +/// \brief Key for Kernel::Metadata::mDebugProps. +constexpr char DebugProps[] = "DebugProps"; +} // end namespace Key + +/// \brief In-memory representation of kernel metadata. +struct Metadata final { + /// \brief Name. Required. + std::string mName = std::string(); + /// \brief Language. Optional. + std::string mLanguage = std::string(); + /// \brief Language version. Optional. + std::vector mLanguageVersion = std::vector(); + /// \brief Attributes metadata. Optional. + Attrs::Metadata mAttrs = Attrs::Metadata(); + /// \brief Arguments metadata. Optional. + std::vector mArgs = std::vector(); + /// \brief Code properties metadata. Optional. + CodeProps::Metadata mCodeProps = CodeProps::Metadata(); + /// \brief Debug properties metadata. Optional. + DebugProps::Metadata mDebugProps = DebugProps::Metadata(); + + /// \brief Default constructor. + Metadata() = default; +}; + +} // end namespace Kernel + +namespace Key { +/// \brief Key for CodeObject::Metadata::mVersion. +constexpr char Version[] = "Version"; +/// \brief Key for CodeObject::Metadata::mPrintf. +constexpr char Printf[] = "Printf"; +/// \brief Key for CodeObject::Metadata::mKernels. +constexpr char Kernels[] = "Kernels"; +} // end namespace Key + +/// \brief In-memory representation of code object metadata. +struct Metadata final { + /// \brief Code object metadata version. Required. + std::vector mVersion = std::vector(); + /// \brief Printf metadata. Optional. + std::vector mPrintf = std::vector(); + /// \brief Kernels metadata. Optional. + std::vector mKernels = std::vector(); + + /// \brief Default constructor. + Metadata() = default; + + /// \brief Converts \p YamlString to \p CodeObjectMetadata. + static std::error_code fromYamlString(std::string YamlString, + Metadata &CodeObjectMetadata); + + /// \brief Converts \p CodeObjectMetadata to \p YamlString. + static std::error_code toYamlString(Metadata CodeObjectMetadata, + std::string &YamlString); +}; + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H diff --git a/interpreter/llvm/src/include/llvm/Support/ARMTargetParser.def b/interpreter/llvm/src/include/llvm/Support/ARMTargetParser.def index 32dc57a0fedf5..65cb2715a6a5e 100644 --- a/interpreter/llvm/src/include/llvm/Support/ARMTargetParser.def +++ b/interpreter/llvm/src/include/llvm/Support/ARMTargetParser.def @@ -206,7 +206,7 @@ ARM_CPU_NAME("cortex-a5", AK_ARMV7A, FK_NEON_VFPV4, false, ARM_CPU_NAME("cortex-a7", AK_ARMV7A, FK_NEON_VFPV4, false, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB)) -ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, true, ARM::AEK_SEC) +ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, false, ARM::AEK_SEC) ARM_CPU_NAME("cortex-a9", AK_ARMV7A, FK_NEON_FP16, false, (ARM::AEK_SEC | ARM::AEK_MP)) ARM_CPU_NAME("cortex-a12", AK_ARMV7A, FK_NEON_VFPV4, false, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | @@ -236,7 +236,7 @@ ARM_CPU_NAME("cortex-m23", AK_ARMV8MBaseline, FK_NONE, false, ARM::AEK_NONE) ARM_CPU_NAME("cortex-m33", AK_ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP) ARM_CPU_NAME("cortex-a32", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a35", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) -ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, ARM::AEK_CRC) +ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a73", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) diff --git a/interpreter/llvm/src/include/llvm/Support/BinaryItemStream.h b/interpreter/llvm/src/include/llvm/Support/BinaryItemStream.h index f4b319217819e..fe7e6caeaafb7 100644 --- a/interpreter/llvm/src/include/llvm/Support/BinaryItemStream.h +++ b/interpreter/llvm/src/include/llvm/Support/BinaryItemStream.h @@ -62,32 +62,45 @@ class BinaryItemStream : public BinaryStream { return Error::success(); } - void setItems(ArrayRef ItemArray) { Items = ItemArray; } + void setItems(ArrayRef ItemArray) { + Items = ItemArray; + computeItemOffsets(); + } uint32_t getLength() override { - uint32_t Size = 0; - for (const auto &Item : Items) - Size += Traits::length(Item); - return Size; + return ItemEndOffsets.empty() ? 0 : ItemEndOffsets.back(); } private: - Expected translateOffsetIndex(uint32_t Offset) const { + void computeItemOffsets() { + ItemEndOffsets.clear(); + ItemEndOffsets.reserve(Items.size()); uint32_t CurrentOffset = 0; - uint32_t CurrentIndex = 0; for (const auto &Item : Items) { - if (CurrentOffset >= Offset) - break; - CurrentOffset += Traits::length(Item); - ++CurrentIndex; + uint32_t Len = Traits::length(Item); + assert(Len > 0 && "no empty items"); + CurrentOffset += Len; + ItemEndOffsets.push_back(CurrentOffset); } - if (CurrentOffset != Offset) + } + + Expected translateOffsetIndex(uint32_t Offset) { + // Make sure the offset is somewhere in our items array. + if (Offset >= getLength()) return make_error(stream_error_code::stream_too_short); - return CurrentIndex; + ++Offset; + auto Iter = + std::lower_bound(ItemEndOffsets.begin(), ItemEndOffsets.end(), Offset); + size_t Idx = std::distance(ItemEndOffsets.begin(), Iter); + assert(Idx < Items.size() && "binary search for offset failed"); + return Idx; } llvm::support::endianness Endian; ArrayRef Items; + + // Sorted vector of offsets to accelerate lookup. + std::vector ItemEndOffsets; }; } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Support/BinaryStreamArray.h b/interpreter/llvm/src/include/llvm/Support/BinaryStreamArray.h index 77c99ffff919b..3f5562ba75195 100644 --- a/interpreter/llvm/src/include/llvm/Support/BinaryStreamArray.h +++ b/interpreter/llvm/src/include/llvm/Support/BinaryStreamArray.h @@ -42,36 +42,114 @@ namespace llvm { /// having to specify a second template argument to VarStreamArray (documented /// below). template struct VarStreamArrayExtractor { - struct ContextType {}; - // Method intentionally deleted. You must provide an explicit specialization - // with one of the following two methods implemented. - static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item) = delete; + // with the following method implemented. + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + T &Item) const = delete; +}; + +/// VarStreamArray represents an array of variable length records backed by a +/// stream. This could be a contiguous sequence of bytes in memory, it could +/// be a file on disk, or it could be a PDB stream where bytes are stored as +/// discontiguous blocks in a file. Usually it is desirable to treat arrays +/// as contiguous blocks of memory, but doing so with large PDB files, for +/// example, could mean allocating huge amounts of memory just to allow +/// re-ordering of stream data to be contiguous before iterating over it. By +/// abstracting this out, we need not duplicate this memory, and we can +/// iterate over arrays in arbitrarily formatted streams. Elements are parsed +/// lazily on iteration, so there is no upfront cost associated with building +/// or copying a VarStreamArray, no matter how large it may be. +/// +/// You create a VarStreamArray by specifying a ValueType and an Extractor type. +/// If you do not specify an Extractor type, you are expected to specialize +/// VarStreamArrayExtractor for your ValueType. +/// +/// By default an Extractor is default constructed in the class, but in some +/// cases you might find it useful for an Extractor to maintain state across +/// extractions. In this case you can provide your own Extractor through a +/// secondary constructor. The following examples show various ways of +/// creating a VarStreamArray. +/// +/// // Will use VarStreamArrayExtractor as the extractor. +/// VarStreamArray MyTypeArray; +/// +/// // Will use a default-constructed MyExtractor as the extractor. +/// VarStreamArray MyTypeArray2; +/// +/// // Will use the specific instance of MyExtractor provided. +/// // MyExtractor need not be default-constructible in this case. +/// MyExtractor E(SomeContext); +/// VarStreamArray MyTypeArray3(E); +/// + +template class VarStreamArrayIterator; + +template > +class VarStreamArray { + friend class VarStreamArrayIterator; + +public: + typedef VarStreamArrayIterator Iterator; + + VarStreamArray() = default; + + explicit VarStreamArray(const Extractor &E) : E(E) {} + + explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {} + + VarStreamArray(BinaryStreamRef Stream, const Extractor &E) + : Stream(Stream), E(E) {} - static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item, - const ContextType &Ctx) = delete; + Iterator begin(bool *HadError = nullptr) const { + return Iterator(*this, E, HadError); + } + + bool valid() const { return Stream.valid(); } + + Iterator end() const { return Iterator(E); } + + bool empty() const { return Stream.getLength() == 0; } + + /// \brief given an offset into the array's underlying stream, return an + /// iterator to the record at that offset. This is considered unsafe + /// since the behavior is undefined if \p Offset does not refer to the + /// beginning of a valid record. + Iterator at(uint32_t Offset) const { + return Iterator(*this, E, Offset, nullptr); + } + + const Extractor &getExtractor() const { return E; } + Extractor &getExtractor() { return E; } + + BinaryStreamRef getUnderlyingStream() const { return Stream; } + void setUnderlyingStream(BinaryStreamRef S) { Stream = S; } + +private: + BinaryStreamRef Stream; + Extractor E; }; -template +template class VarStreamArrayIterator - : public iterator_facade_base< - VarStreamArrayIterator, - std::forward_iterator_tag, Value> { - typedef VarStreamArrayIterator - IterType; + : public iterator_facade_base, + std::forward_iterator_tag, ValueType> { + typedef VarStreamArrayIterator IterType; + typedef VarStreamArray ArrayType; public: - VarStreamArrayIterator() = default; - VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx, - BinaryStreamRef Stream, bool *HadError = nullptr, - uint32_t Offset = 0) - : IterRef(Stream), Ctx(&Ctx), Array(&Array), AbsOffset(Offset), - HadError(HadError) { + VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, + bool *HadError) + : VarStreamArrayIterator(Array, E, 0, HadError) {} + + VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, + uint32_t Offset, bool *HadError) + : IterRef(Array.Stream.drop_front(Offset)), Extract(E), + Array(&Array), AbsOffset(Offset), HadError(HadError) { if (IterRef.getLength() == 0) moveToEnd(); else { - auto EC = Ctx.template invoke(IterRef, ThisLen, ThisValue); + auto EC = Extract(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); @@ -79,13 +157,8 @@ class VarStreamArrayIterator } } - VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx, - bool *HadError = nullptr) - : VarStreamArrayIterator(Array, Ctx, Array.Stream, HadError) {} - - VarStreamArrayIterator(const WrappedCtx &Ctx) : Ctx(&Ctx) {} - VarStreamArrayIterator(const VarStreamArrayIterator &Other) = default; - + VarStreamArrayIterator() = default; + explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} ~VarStreamArrayIterator() = default; bool operator==(const IterType &R) const { @@ -103,12 +176,12 @@ class VarStreamArrayIterator return false; } - const Value &operator*() const { + const ValueType &operator*() const { assert(Array && !HasError); return ThisValue; } - Value &operator*() { + ValueType &operator*() { assert(Array && !HasError); return ThisValue; } @@ -125,7 +198,7 @@ class VarStreamArrayIterator moveToEnd(); } else { // There is some data after the current record. - auto EC = Ctx->template invoke(IterRef, ThisLen, ThisValue); + auto EC = Extract(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); @@ -153,9 +226,9 @@ class VarStreamArrayIterator *HadError = true; } - Value ThisValue; + ValueType ThisValue; BinaryStreamRef IterRef; - const WrappedCtx *Ctx{nullptr}; + Extractor Extract; const ArrayType *Array{nullptr}; uint32_t ThisLen{0}; uint32_t AbsOffset{0}; @@ -163,127 +236,6 @@ class VarStreamArrayIterator bool *HadError{nullptr}; }; -template struct ContextWrapper { - ContextWrapper() = default; - - explicit ContextWrapper(Context &&Ctx) : Ctx(Ctx) {} - - template - Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const { - return Extractor::extract(Stream, Len, Item, Ctx); - } - - Context Ctx; -}; - -template struct ContextWrapper { - ContextWrapper() = default; - - template - Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const { - return Extractor::extract(Stream, Len, Item); - } -}; - -/// VarStreamArray represents an array of variable length records backed by a -/// stream. This could be a contiguous sequence of bytes in memory, it could -/// be a file on disk, or it could be a PDB stream where bytes are stored as -/// discontiguous blocks in a file. Usually it is desirable to treat arrays -/// as contiguous blocks of memory, but doing so with large PDB files, for -/// example, could mean allocating huge amounts of memory just to allow -/// re-ordering of stream data to be contiguous before iterating over it. By -/// abstracting this out, we need not duplicate this memory, and we can -/// iterate over arrays in arbitrarily formatted streams. Elements are parsed -/// lazily on iteration, so there is no upfront cost associated with building -/// or copying a VarStreamArray, no matter how large it may be. -/// -/// You create a VarStreamArray by specifying a ValueType and an Extractor type. -/// If you do not specify an Extractor type, you are expected to specialize -/// VarStreamArrayExtractor for your ValueType. -/// -/// The default extractor type is stateless, but by specializing -/// VarStreamArrayExtractor or defining your own custom extractor type and -/// adding the appropriate ContextType typedef to the class, you can pass a -/// context field during construction of the VarStreamArray that will be -/// passed to each call to extract. -/// -template -class VarStreamArrayBase { - typedef VarStreamArrayBase MyType; - -public: - typedef VarStreamArrayIterator Iterator; - friend Iterator; - - VarStreamArrayBase() = default; - - VarStreamArrayBase(BinaryStreamRef Stream, const WrappedCtx &Ctx) - : Stream(Stream), Ctx(Ctx) {} - - VarStreamArrayBase(const MyType &Other) - : Stream(Other.Stream), Ctx(Other.Ctx) {} - - Iterator begin(bool *HadError = nullptr) const { - if (empty()) - return end(); - - return Iterator(*this, Ctx, Stream, HadError); - } - - bool valid() const { return Stream.valid(); } - - Iterator end() const { return Iterator(Ctx); } - - bool empty() const { return Stream.getLength() == 0; } - - /// \brief given an offset into the array's underlying stream, return an - /// iterator to the record at that offset. This is considered unsafe - /// since the behavior is undefined if \p Offset does not refer to the - /// beginning of a valid record. - Iterator at(uint32_t Offset) const { - return Iterator(*this, Ctx, Stream.drop_front(Offset), nullptr, Offset); - } - - BinaryStreamRef getUnderlyingStream() const { return Stream; } - -private: - BinaryStreamRef Stream; - WrappedCtx Ctx; -}; - -template -class VarStreamArrayImpl - : public VarStreamArrayBase> { - typedef ContextWrapper WrappedContext; - typedef VarStreamArrayImpl MyType; - typedef VarStreamArrayBase BaseType; - -public: - typedef Context ContextType; - - VarStreamArrayImpl() = default; - VarStreamArrayImpl(BinaryStreamRef Stream, Context &&Ctx) - : BaseType(Stream, WrappedContext(std::forward(Ctx))) {} -}; - -template -class VarStreamArrayImpl - : public VarStreamArrayBase> { - typedef ContextWrapper WrappedContext; - typedef VarStreamArrayImpl MyType; - typedef VarStreamArrayBase BaseType; - -public: - VarStreamArrayImpl() = default; - VarStreamArrayImpl(BinaryStreamRef Stream) - : BaseType(Stream, WrappedContext()) {} -}; - -template > -using VarStreamArray = - VarStreamArrayImpl; - template class FixedStreamArrayIterator; /// FixedStreamArray is similar to VarStreamArray, except with each record @@ -338,6 +290,12 @@ template class FixedStreamArray { return FixedStreamArrayIterator(*this, size()); } + const T &front() const { return *begin(); } + const T &back() const { + FixedStreamArrayIterator I = end(); + return *(--I); + } + BinaryStreamRef getUnderlyingStream() const { return Stream; } private: diff --git a/interpreter/llvm/src/include/llvm/Support/BinaryStreamReader.h b/interpreter/llvm/src/include/llvm/Support/BinaryStreamReader.h index 77738077f5ffe..ae5ebb2c36286 100644 --- a/interpreter/llvm/src/include/llvm/Support/BinaryStreamReader.h +++ b/interpreter/llvm/src/include/llvm/Support/BinaryStreamReader.h @@ -14,9 +14,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/type_traits.h" #include @@ -32,7 +32,21 @@ namespace llvm { class BinaryStreamReader { public: BinaryStreamReader() = default; - explicit BinaryStreamReader(BinaryStreamRef Stream); + explicit BinaryStreamReader(BinaryStreamRef Ref); + explicit BinaryStreamReader(BinaryStream &Stream); + explicit BinaryStreamReader(ArrayRef Data, + llvm::support::endianness Endian); + explicit BinaryStreamReader(StringRef Data, llvm::support::endianness Endian); + + BinaryStreamReader(const BinaryStreamReader &Other) + : Stream(Other.Stream), Offset(Other.Offset) {} + + BinaryStreamReader &operator=(const BinaryStreamReader &Other) { + Stream = Other.Stream; + Offset = Other.Offset; + return *this; + } + virtual ~BinaryStreamReader() {} /// Read as much as possible from the underlying string at the current offset @@ -91,6 +105,13 @@ class BinaryStreamReader { /// returns an appropriate error code. Error readCString(StringRef &Dest); + /// Similar to readCString, however read a null-terminated UTF16 string + /// instead. + /// + /// \returns a success error code if the data was successfully read, otherwise + /// returns an appropriate error code. + Error readWideString(ArrayRef &Dest); + /// Read a \p Length byte string into \p Dest. Whether a copy occurs depends /// on the implementation of the underlying stream. Updates the stream's /// offset to point after the newly read data. @@ -116,6 +137,15 @@ class BinaryStreamReader { /// returns an appropriate error code. Error readStreamRef(BinaryStreamRef &Ref, uint32_t Length); + /// Read \p Length bytes from the underlying stream into \p Stream. This is + /// equivalent to calling getUnderlyingStream().slice(Offset, Length). + /// Updates the stream's offset to point after the newly read object. Never + /// causes a copy. + /// + /// \returns a success error code if the data was successfully read, otherwise + /// returns an appropriate error code. + Error readSubstream(BinarySubstreamRef &Stream, uint32_t Size); + /// Get a pointer to an object of type T from the underlying stream, as if by /// memcpy, and store the result into \p Dest. It is up to the caller to /// ensure that objects of type T can be safely treated in this manner. @@ -177,25 +207,7 @@ class BinaryStreamReader { BinaryStreamRef S; if (auto EC = readStreamRef(S, Size)) return EC; - Array = VarStreamArray(S); - return Error::success(); - } - - /// Read a VarStreamArray of size \p Size bytes and store the result into - /// \p Array. Updates the stream's offset to point after the newly read - /// array. Never causes a copy (although iterating the elements of the - /// VarStreamArray may, depending upon the implementation of the underlying - /// stream). - /// - /// \returns a success error code if the data was successfully read, otherwise - /// returns an appropriate error code. - template - Error readArray(VarStreamArray &Array, uint32_t Size, - ContextType &&Context) { - BinaryStreamRef S; - if (auto EC = readStreamRef(S, Size)) - return EC; - Array = VarStreamArray(S, std::move(Context)); + Array.setUnderlyingStream(S); return Error::success(); } @@ -244,12 +256,14 @@ class BinaryStreamReader { /// \returns the next byte in the stream. uint8_t peek() const; + Error padToAlignment(uint32_t Align); + std::pair split(uint32_t Offset) const; private: BinaryStreamRef Stream; - uint32_t Offset; + uint32_t Offset = 0; }; } // namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Support/BinaryStreamRef.h b/interpreter/llvm/src/include/llvm/Support/BinaryStreamRef.h index 465e724a68861..6d5135cb258dd 100644 --- a/interpreter/llvm/src/include/llvm/Support/BinaryStreamRef.h +++ b/interpreter/llvm/src/include/llvm/Support/BinaryStreamRef.h @@ -16,36 +16,74 @@ #include "llvm/Support/Error.h" #include #include +#include namespace llvm { /// Common stuff for mutable and immutable StreamRefs. -template class BinaryStreamRefBase { -public: - BinaryStreamRefBase() : Stream(nullptr), ViewOffset(0), Length(0) {} - BinaryStreamRefBase(StreamType &Stream, uint32_t Offset, uint32_t Length) - : Stream(&Stream), ViewOffset(Offset), Length(Length) {} +template class BinaryStreamRefBase { +protected: + BinaryStreamRefBase() = default; + BinaryStreamRefBase(std::shared_ptr SharedImpl, uint32_t Offset, + uint32_t Length) + : SharedImpl(SharedImpl), BorrowedImpl(SharedImpl.get()), + ViewOffset(Offset), Length(Length) {} + BinaryStreamRefBase(StreamType &BorrowedImpl, uint32_t Offset, + uint32_t Length) + : BorrowedImpl(&BorrowedImpl), ViewOffset(Offset), Length(Length) {} + BinaryStreamRefBase(const BinaryStreamRefBase &Other) { + SharedImpl = Other.SharedImpl; + BorrowedImpl = Other.BorrowedImpl; + ViewOffset = Other.ViewOffset; + Length = Other.Length; + } - llvm::support::endianness getEndian() const { return Stream->getEndian(); } +public: + llvm::support::endianness getEndian() const { + return BorrowedImpl->getEndian(); + } uint32_t getLength() const { return Length; } - const StreamType *getStream() const { return Stream; } /// Return a new BinaryStreamRef with the first \p N elements removed. RefType drop_front(uint32_t N) const { - if (!Stream) + if (!BorrowedImpl) return RefType(); N = std::min(N, Length); - return RefType(*Stream, ViewOffset + N, Length - N); + RefType Result(static_cast(*this)); + Result.ViewOffset += N; + Result.Length -= N; + return Result; } - /// Return a new BinaryStreamRef with only the first \p N elements remaining. - RefType keep_front(uint32_t N) const { - if (!Stream) + /// Return a new BinaryStreamRef with the first \p N elements removed. + RefType drop_back(uint32_t N) const { + if (!BorrowedImpl) return RefType(); + N = std::min(N, Length); - return RefType(*Stream, ViewOffset, N); + RefType Result(static_cast(*this)); + Result.Length -= N; + return Result; + } + + /// Return a new BinaryStreamRef with only the first \p N elements remaining. + RefType keep_front(uint32_t N) const { + assert(N <= getLength()); + return drop_back(getLength() - N); + } + + /// Return a new BinaryStreamRef with only the last \p N elements remaining. + RefType keep_back(uint32_t N) const { + assert(N <= getLength()); + return drop_front(getLength() - N); + } + + /// Return a new BinaryStreamRef with the first and last \p N elements + /// removed. + RefType drop_symmetric(uint32_t N) const { + return drop_front(N).drop_back(N); } /// Return a new BinaryStreamRef with the first \p Offset elements removed, @@ -54,8 +92,10 @@ template class BinaryStreamRefBase { return drop_front(Offset).keep_front(Len); } + bool valid() const { return BorrowedImpl != nullptr; } + bool operator==(const RefType &Other) const { - if (Stream != Other.Stream) + if (BorrowedImpl != Other.BorrowedImpl) return false; if (ViewOffset != Other.ViewOffset) return false; @@ -73,9 +113,10 @@ template class BinaryStreamRefBase { return Error::success(); } - StreamType *Stream; - uint32_t ViewOffset; - uint32_t Length; + std::shared_ptr SharedImpl; + StreamType *BorrowedImpl = nullptr; + uint32_t ViewOffset = 0; + uint32_t Length = 0; }; /// \brief BinaryStreamRef is to BinaryStream what ArrayRef is to an Array. It @@ -86,21 +127,27 @@ template class BinaryStreamRefBase { /// and use inheritance to achieve polymorphism. Instead, you should pass /// around BinaryStreamRefs by value and achieve polymorphism that way. class BinaryStreamRef - : public BinaryStreamRefBase { + : public BinaryStreamRefBase { + friend BinaryStreamRefBase; + friend class WritableBinaryStreamRef; + BinaryStreamRef(std::shared_ptr Impl, uint32_t ViewOffset, + uint32_t Length) + : BinaryStreamRefBase(Impl, ViewOffset, Length) {} + public: BinaryStreamRef() = default; - BinaryStreamRef(BinaryStream &Stream) - : BinaryStreamRefBase(Stream, 0, Stream.getLength()) {} - BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, uint32_t Length) - : BinaryStreamRefBase(Stream, Offset, Length) {} + BinaryStreamRef(BinaryStream &Stream); + BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, uint32_t Length); + explicit BinaryStreamRef(ArrayRef Data, + llvm::support::endianness Endian); + explicit BinaryStreamRef(StringRef Data, llvm::support::endianness Endian); + + BinaryStreamRef(const BinaryStreamRef &Other); // Use BinaryStreamRef.slice() instead. BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset, uint32_t Length) = delete; - /// Check if a Stream is valid. - bool valid() const { return Stream != nullptr; } - /// Given an Offset into this StreamRef and a Size, return a reference to a /// buffer owned by the stream. /// @@ -108,12 +155,7 @@ class BinaryStreamRef /// bounds of this BinaryStreamRef's view and the implementation could read /// the data, and an appropriate error code otherwise. Error readBytes(uint32_t Offset, uint32_t Size, - ArrayRef &Buffer) const { - if (auto EC = checkOffset(Offset, Size)) - return EC; - - return Stream->readBytes(ViewOffset + Offset, Size, Buffer); - } + ArrayRef &Buffer) const; /// Given an Offset into this BinaryStreamRef, return a reference to the /// largest buffer the stream could support without necessitating a copy. @@ -121,33 +163,47 @@ class BinaryStreamRef /// \returns a success error code if implementation could read the data, /// and an appropriate error code otherwise. Error readLongestContiguousChunk(uint32_t Offset, - ArrayRef &Buffer) const { - if (auto EC = checkOffset(Offset, 1)) - return EC; - - if (auto EC = - Stream->readLongestContiguousChunk(ViewOffset + Offset, Buffer)) - return EC; - // This StreamRef might refer to a smaller window over a larger stream. In - // that case we will have read out more bytes than we should return, because - // we should not read past the end of the current view. - uint32_t MaxLength = Length - Offset; - if (Buffer.size() > MaxLength) - Buffer = Buffer.slice(0, MaxLength); - return Error::success(); + ArrayRef &Buffer) const; +}; + +struct BinarySubstreamRef { + uint32_t Offset; // Offset in the parent stream + BinaryStreamRef StreamData; // Stream Data + + BinarySubstreamRef slice(uint32_t Off, uint32_t Size) const { + BinaryStreamRef SubSub = StreamData.slice(Off, Size); + return {Off + Offset, SubSub}; + } + BinarySubstreamRef drop_front(uint32_t N) const { + return slice(N, size() - N); + } + BinarySubstreamRef keep_front(uint32_t N) const { return slice(0, N); } + + std::pair + split(uint32_t Offset) const { + return std::make_pair(keep_front(Offset), drop_front(Offset)); } + + uint32_t size() const { return StreamData.getLength(); } + bool empty() const { return size() == 0; } }; class WritableBinaryStreamRef - : public BinaryStreamRefBase { + : public BinaryStreamRefBase { + friend BinaryStreamRefBase; + WritableBinaryStreamRef(std::shared_ptr Impl, + uint32_t ViewOffset, uint32_t Length) + : BinaryStreamRefBase(Impl, ViewOffset, Length) {} + public: WritableBinaryStreamRef() = default; - WritableBinaryStreamRef(WritableBinaryStream &Stream) - : BinaryStreamRefBase(Stream, 0, Stream.getLength()) {} + WritableBinaryStreamRef(WritableBinaryStream &Stream); WritableBinaryStreamRef(WritableBinaryStream &Stream, uint32_t Offset, - uint32_t Length) - : BinaryStreamRefBase(Stream, Offset, Length) {} + uint32_t Length); + explicit WritableBinaryStreamRef(MutableArrayRef Data, + llvm::support::endianness Endian); + WritableBinaryStreamRef(const WritableBinaryStreamRef &Other); // Use WritableBinaryStreamRef.slice() instead. WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint32_t Offset, @@ -159,17 +215,13 @@ class WritableBinaryStreamRef /// \returns a success error code if the data could fit within the underlying /// stream at the specified location and the implementation could write the /// data, and an appropriate error code otherwise. - Error writeBytes(uint32_t Offset, ArrayRef Data) const { - if (auto EC = checkOffset(Offset, Data.size())) - return EC; - - return Stream->writeBytes(ViewOffset + Offset, Data); - } + Error writeBytes(uint32_t Offset, ArrayRef Data) const; - operator BinaryStreamRef() { return BinaryStreamRef(*Stream); } + /// Conver this WritableBinaryStreamRef to a read-only BinaryStreamRef. + operator BinaryStreamRef() const; /// \brief For buffered streams, commits changes to the backing store. - Error commit() { return Stream->commit(); } + Error commit(); }; } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Support/BinaryStreamWriter.h b/interpreter/llvm/src/include/llvm/Support/BinaryStreamWriter.h index 1b61c32a25418..a4495a1ce27d4 100644 --- a/interpreter/llvm/src/include/llvm/Support/BinaryStreamWriter.h +++ b/interpreter/llvm/src/include/llvm/Support/BinaryStreamWriter.h @@ -32,7 +32,20 @@ namespace llvm { class BinaryStreamWriter { public: BinaryStreamWriter() = default; - explicit BinaryStreamWriter(WritableBinaryStreamRef Stream); + explicit BinaryStreamWriter(WritableBinaryStreamRef Ref); + explicit BinaryStreamWriter(WritableBinaryStream &Stream); + explicit BinaryStreamWriter(MutableArrayRef Data, + llvm::support::endianness Endian); + + BinaryStreamWriter(const BinaryStreamWriter &Other) + : Stream(Other.Stream), Offset(Other.Offset) {} + + BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) { + Stream = Other.Stream; + Offset = Other.Offset; + return *this; + } + virtual ~BinaryStreamWriter() {} /// Write the bytes specified in \p Buffer to the underlying stream. diff --git a/interpreter/llvm/src/include/llvm/Support/BlockFrequency.h b/interpreter/llvm/src/include/llvm/Support/BlockFrequency.h index 1b45cc52973f6..2e75cbdd29c16 100644 --- a/interpreter/llvm/src/include/llvm/Support/BlockFrequency.h +++ b/interpreter/llvm/src/include/llvm/Support/BlockFrequency.h @@ -71,6 +71,10 @@ class BlockFrequency { bool operator>=(BlockFrequency RHS) const { return Frequency >= RHS.Frequency; } + + bool operator==(BlockFrequency RHS) const { + return Frequency == RHS.Frequency; + } }; } diff --git a/interpreter/llvm/src/include/llvm/Support/CBindingWrapping.h b/interpreter/llvm/src/include/llvm/Support/CBindingWrapping.h index d4633aa7d3c6c..f60f99d376ad4 100644 --- a/interpreter/llvm/src/include/llvm/Support/CBindingWrapping.h +++ b/interpreter/llvm/src/include/llvm/Support/CBindingWrapping.h @@ -14,8 +14,8 @@ #ifndef LLVM_SUPPORT_CBINDINGWRAPPING_H #define LLVM_SUPPORT_CBINDINGWRAPPING_H -#include "llvm/Support/Casting.h" #include "llvm-c/Types.h" +#include "llvm/Support/Casting.h" #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref) \ inline ty *unwrap(ref P) { \ diff --git a/interpreter/llvm/src/include/llvm/Support/CMakeLists.txt b/interpreter/llvm/src/include/llvm/Support/CMakeLists.txt index 8f7aeec9d7f71..95752cf018567 100644 --- a/interpreter/llvm/src/include/llvm/Support/CMakeLists.txt +++ b/interpreter/llvm/src/include/llvm/Support/CMakeLists.txt @@ -9,25 +9,27 @@ function(find_first_existing_file out_var) endfunction() macro(find_first_existing_vc_file out_var path) - find_program(git_executable NAMES git git.exe git.cmd) - # Run from a subdirectory to force git to print an absolute path. - execute_process(COMMAND ${git_executable} rev-parse --git-dir - WORKING_DIRECTORY ${path}/cmake - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_dir - ERROR_QUIET) - if(git_result EQUAL 0) - string(STRIP "${git_dir}" git_dir) - set(${out_var} "${git_dir}/logs/HEAD") - # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD - if (NOT EXISTS "${git_dir}/logs/HEAD") - file(WRITE "${git_dir}/logs/HEAD" "") + if ( LLVM_APPEND_VC_REV ) + find_program(git_executable NAMES git git.exe git.cmd) + # Run from a subdirectory to force git to print an absolute path. + execute_process(COMMAND ${git_executable} rev-parse --git-dir + WORKING_DIRECTORY ${path}/cmake + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_dir + ERROR_QUIET) + if(git_result EQUAL 0) + string(STRIP "${git_dir}" git_dir) + set(${out_var} "${git_dir}/logs/HEAD") + # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD + if (NOT EXISTS "${git_dir}/logs/HEAD") + file(WRITE "${git_dir}/logs/HEAD" "") + endif() + else() + find_first_existing_file(${out_var} + "${path}/.svn/wc.db" # SVN 1.7 + "${path}/.svn/entries" # SVN 1.6 + ) endif() - else() - find_first_existing_file(${out_var} - "${path}/.svn/wc.db" # SVN 1.7 - "${path}/.svn/entries" # SVN 1.6 - ) endif() endmacro() @@ -38,13 +40,6 @@ set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/VCSRevision.h") set(get_svn_script "${LLVM_CMAKE_PATH}/GenerateVersionFromCVS.cmake") -# Ugly hack to prevent rebuilding LLVM whenever the git HEAD timestamp -# changes. This is properly solved by setting LLVM_APPEND_VC_REV to OFF -# but this only really works once this review is included in our LLVM -# version: https://reviews.llvm.org/D35377 -# Once our LLVM version includes this review, this code can be removed -# as we properly set LLVM_APPEND_VC_REV in interpreter/CMakeLists.txt. -set(llvm_vc) if(DEFINED llvm_vc) # Create custom target to generate the VC revision include. add_custom_command(OUTPUT "${version_inc}" @@ -60,9 +55,7 @@ if(DEFINED llvm_vc) PROPERTIES GENERATED TRUE HEADER_FILE_ONLY TRUE) else() - if(NOT EXISTS "${version_inc}") - file(WRITE "${version_inc}" "") - endif() + file(WRITE "${version_inc}" "") endif() add_custom_target(llvm_vcsrevision_h DEPENDS "${version_inc}") diff --git a/interpreter/llvm/src/include/llvm/Support/CachePruning.h b/interpreter/llvm/src/include/llvm/Support/CachePruning.h index e826938878e50..46e34358573bf 100644 --- a/interpreter/llvm/src/include/llvm/Support/CachePruning.h +++ b/interpreter/llvm/src/include/llvm/Support/CachePruning.h @@ -39,8 +39,13 @@ struct CachePruningPolicy { /// available space on the the disk. Set to 100 to indicate no limit, 50 to /// indicate that the cache size will not be left over half the available disk /// space. A value over 100 will be reduced to 100. A value of 0 disables the - /// size-based pruning. - unsigned PercentageOfAvailableSpace = 75; + /// percentage size-based pruning. + unsigned MaxSizePercentageOfAvailableSpace = 75; + + /// The maximum size for the cache directory in bytes. A value over the amount + /// of available space on the disk will be reduced to the amount of available + /// space. A value of 0 disables the absolute size-based pruning. + uint64_t MaxSizeBytes = 0; }; /// Parse the given string as a cache pruning policy. Defaults are taken from a diff --git a/interpreter/llvm/src/include/llvm/Support/Casting.h b/interpreter/llvm/src/include/llvm/Support/Casting.h index 89d2af052dc16..baa2a814e9a16 100644 --- a/interpreter/llvm/src/include/llvm/Support/Casting.h +++ b/interpreter/llvm/src/include/llvm/Support/Casting.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/Casting.h - Allow flexible, checked, casts -*- C++ -*-===// +//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,6 +19,7 @@ #include "llvm/Support/type_traits.h" #include #include +#include namespace llvm { @@ -31,18 +32,19 @@ namespace llvm { // template selection process... the default implementation is a noop. // template struct simplify_type { - typedef From SimpleType; // The real type this represents... + using SimpleType = From; // The real type this represents... // An accessor to get the real value... static SimpleType &getSimplifiedValue(From &Val) { return Val; } }; template struct simplify_type { - typedef typename simplify_type::SimpleType NonConstSimpleType; - typedef typename add_const_past_pointer::type - SimpleType; - typedef typename add_lvalue_reference_if_not_pointer::type - RetType; + using NonConstSimpleType = typename simplify_type::SimpleType; + using SimpleType = + typename add_const_past_pointer::type; + using RetType = + typename add_lvalue_reference_if_not_pointer::type; + static RetType getSimplifiedValue(const From& Val) { return simplify_type::getSimplifiedValue(const_cast(Val)); } @@ -148,36 +150,35 @@ template LLVM_NODISCARD inline bool isa(const Y &Val) { template struct cast_retty; - // Calculate what type the 'cast' function should return, based on a requested // type of To and a source type of From. template struct cast_retty_impl { - typedef To& ret_type; // Normal case, return Ty& + using ret_type = To &; // Normal case, return Ty& }; template struct cast_retty_impl { - typedef const To &ret_type; // Normal case, return Ty& + using ret_type = const To &; // Normal case, return Ty& }; template struct cast_retty_impl { - typedef To* ret_type; // Pointer arg case, return Ty* + using ret_type = To *; // Pointer arg case, return Ty* }; template struct cast_retty_impl { - typedef const To* ret_type; // Constant pointer arg case, return const Ty* + using ret_type = const To *; // Constant pointer arg case, return const Ty* }; template struct cast_retty_impl { - typedef const To* ret_type; // Constant pointer arg case, return const Ty* + using ret_type = const To *; // Constant pointer arg case, return const Ty* }; template struct cast_retty_impl> { private: - typedef typename cast_retty_impl::ret_type PointerType; - typedef typename std::remove_pointer::type ResultType; + using PointerType = typename cast_retty_impl::ret_type; + using ResultType = typename std::remove_pointer::type; public: - typedef std::unique_ptr ret_type; + using ret_type = std::unique_ptr; }; template @@ -185,19 +186,19 @@ struct cast_retty_wrap { // When the simplified type and the from type are not the same, use the type // simplifier to reduce the type, then reuse cast_retty_impl to get the // resultant type. - typedef typename cast_retty::ret_type ret_type; + using ret_type = typename cast_retty::ret_type; }; template struct cast_retty_wrap { // When the simplified type is equal to the from type, use it directly. - typedef typename cast_retty_impl::ret_type ret_type; + using ret_type = typename cast_retty_impl::ret_type; }; template struct cast_retty { - typedef typename cast_retty_wrap::SimpleType>::ret_type ret_type; + using ret_type = typename cast_retty_wrap< + To, From, typename simplify_type::SimpleType>::ret_type; }; // Ensure the non-simple values are converted using the simplify_type template @@ -393,6 +394,6 @@ LLVM_NODISCARD inline auto unique_dyn_cast_or_null(std::unique_ptr &&Val) return unique_dyn_cast_or_null(Val); } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_CASTING_H diff --git a/interpreter/llvm/src/include/llvm/Support/CommandLine.h b/interpreter/llvm/src/include/llvm/Support/CommandLine.h index ae32e20d6daba..71d2f02930831 100644 --- a/interpreter/llvm/src/include/llvm/Support/CommandLine.h +++ b/interpreter/llvm/src/include/llvm/Support/CommandLine.h @@ -21,18 +21,19 @@ #define LLVM_SUPPORT_COMMANDLINE_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ namespace llvm { class StringSaver; +class raw_ostream; /// cl Namespace - This namespace contains all of the command line option /// processing machinery. It is intentionally a short name to make qualified @@ -242,7 +244,7 @@ class Option { // Out of line virtual function to provide home for the class. virtual void anchor(); - int NumOccurrences; // The number of times specified + int NumOccurrences = 0; // The number of times specified // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid // problems with signed enums in bitfields. unsigned Occurrences : 3; // enum NumOccurrencesFlag @@ -252,8 +254,8 @@ class Option { unsigned HiddenFlag : 2; // enum OptionHidden unsigned Formatting : 2; // enum FormattingFlags unsigned Misc : 3; - unsigned Position; // Position of last occurrence of the option - unsigned AdditionalVals; // Greater than 0 for multi-valued option. + unsigned Position = 0; // Position of last occurrence of the option + unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option. public: StringRef ArgStr; // The argument string itself (ex: "help", "o") @@ -261,7 +263,7 @@ class Option { StringRef ValueStr; // String describing what the value of this option is OptionCategory *Category; // The Category this option belongs to SmallPtrSet Subs; // The subcommands this option belongs to. - bool FullyInitialized; // Has addArguemnt been called? + bool FullyInitialized = false; // Has addArguemnt been called? inline enum NumOccurrencesFlag getNumOccurrencesFlag() const { return (enum NumOccurrencesFlag)Occurrences; @@ -316,10 +318,8 @@ class Option { protected: explicit Option(enum NumOccurrencesFlag OccurrencesFlag, enum OptionHidden Hidden) - : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0), - HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0), Position(0), - AdditionalVals(0), Category(&GeneralCategory), FullyInitialized(false) { - } + : Occurrences(OccurrencesFlag), Value(0), HiddenFlag(Hidden), + Formatting(NormalFormatting), Misc(0), Category(&GeneralCategory) {} inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; } @@ -447,8 +447,8 @@ struct GenericOptionValue { protected: GenericOptionValue() = default; GenericOptionValue(const GenericOptionValue&) = default; - ~GenericOptionValue() = default; GenericOptionValue &operator=(const GenericOptionValue &) = default; + ~GenericOptionValue() = default; private: virtual void anchor(); @@ -461,7 +461,7 @@ template struct OptionValue; template struct OptionValueBase : public GenericOptionValue { // Temporary storage for argument passing. - typedef OptionValue WrapperType; + using WrapperType = OptionValue; bool hasValue() const { return false; } @@ -487,8 +487,8 @@ template class OptionValueCopy : public GenericOptionValue { protected: OptionValueCopy(const OptionValueCopy&) = default; + OptionValueCopy &operator=(const OptionValueCopy &) = default; ~OptionValueCopy() = default; - OptionValueCopy &operator=(const OptionValueCopy&) = default; public: OptionValueCopy() = default; @@ -519,13 +519,13 @@ template class OptionValueCopy : public GenericOptionValue { // Non-class option values. template struct OptionValueBase : OptionValueCopy { - typedef DataType WrapperType; + using WrapperType = DataType; protected: OptionValueBase() = default; OptionValueBase(const OptionValueBase&) = default; + OptionValueBase &operator=(const OptionValueBase &) = default; ~OptionValueBase() = default; - OptionValueBase &operator=(const OptionValueBase&) = default; }; // Top-level option class. @@ -548,7 +548,7 @@ enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE }; template <> struct OptionValue final : OptionValueCopy { - typedef cl::boolOrDefault WrapperType; + using WrapperType = cl::boolOrDefault; OptionValue() = default; @@ -565,7 +565,7 @@ struct OptionValue final template <> struct OptionValue final : OptionValueCopy { - typedef StringRef WrapperType; + using WrapperType = StringRef; OptionValue() = default; @@ -736,13 +736,15 @@ template class parser : public generic_parser_base { public: OptionInfo(StringRef name, DataType v, StringRef helpStr) : GenericOptionInfo(name, helpStr), V(v) {} + OptionValue V; }; SmallVector Values; public: parser(Option &O) : generic_parser_base(O) {} - typedef DataType parser_data_type; + + using parser_data_type = DataType; // Implement virtual functions needed by generic_parser_base unsigned getNumOptions() const override { return unsigned(Values.size()); } @@ -837,10 +839,10 @@ class basic_parser_impl { // non-template implementation of basic_parser // template class basic_parser : public basic_parser_impl { public: - basic_parser(Option &O) : basic_parser_impl(O) {} + using parser_data_type = DataType; + using OptVal = OptionValue; - typedef DataType parser_data_type; - typedef OptionValue OptVal; + basic_parser(Option &O) : basic_parser_impl(O) {} protected: ~basic_parser() = default; @@ -1292,6 +1294,7 @@ class opt : public Option, enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1300,6 +1303,7 @@ class opt : public Option, size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1384,16 +1388,18 @@ template class list_storage { std::vector Storage; public: - typedef typename std::vector::iterator iterator; + using iterator = typename std::vector::iterator; iterator begin() { return Storage.begin(); } iterator end() { return Storage.end(); } - typedef typename std::vector::const_iterator const_iterator; + using const_iterator = typename std::vector::const_iterator; + const_iterator begin() const { return Storage.begin(); } const_iterator end() const { return Storage.end(); } - typedef typename std::vector::size_type size_type; + using size_type = typename std::vector::size_type; + size_type size() const { return Storage.size(); } bool empty() const { return Storage.empty(); } @@ -1401,8 +1407,9 @@ template class list_storage { void push_back(const DataType &value) { Storage.push_back(value); } void push_back(DataType &&value) { Storage.push_back(value); } - typedef typename std::vector::reference reference; - typedef typename std::vector::const_reference const_reference; + using reference = typename std::vector::reference; + using const_reference = typename std::vector::const_reference; + reference operator[](size_type pos) { return Storage[pos]; } const_reference operator[](size_type pos) const { return Storage[pos]; } @@ -1453,6 +1460,7 @@ class list : public Option, public list_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1473,6 +1481,7 @@ class list : public Option, public list_storage { size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1592,6 +1601,7 @@ class bits : public Option, public bits_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1612,6 +1622,7 @@ class bits : public Option, public bits_storage { size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1824,9 +1835,9 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver, /// \brief String tokenization function type. Should be compatible with either /// Windows or Unix command line tokenizers. -typedef void (*TokenizerCallback)(StringRef Source, StringSaver &Saver, - SmallVectorImpl &NewArgv, - bool MarkEOLs); +using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs); /// \brief Expand response files on a command line recursively using the given /// StringSaver and tokenization strategy. Argv should contain the command line @@ -1880,6 +1891,7 @@ void ResetAllOptionOccurrences(); void ResetCommandLineParser(); } // end namespace cl + } // end namespace llvm #endif // LLVM_SUPPORT_COMMANDLINE_H diff --git a/interpreter/llvm/src/include/llvm/Support/Compiler.h b/interpreter/llvm/src/include/llvm/Support/Compiler.h index be9e465400165..b19e37235df57 100644 --- a/interpreter/llvm/src/include/llvm/Support/Compiler.h +++ b/interpreter/llvm/src/include/llvm/Support/Compiler.h @@ -493,4 +493,14 @@ void AnnotateIgnoreWritesEnd(const char *file, int line); #define LLVM_THREAD_LOCAL #endif +/// \macro LLVM_ENABLE_EXCEPTIONS +/// \brief Whether LLVM is built with exception support. +#if __has_feature(cxx_exceptions) +#define LLVM_ENABLE_EXCEPTIONS 1 +#elif defined(__GNUC__) && defined(__EXCEPTIONS) +#define LLVM_ENABLE_EXCEPTIONS 1 +#elif defined(_MSC_VER) && defined(_CPPUNWIND) +#define LLVM_ENABLE_EXCEPTIONS 1 +#endif + #endif diff --git a/interpreter/llvm/src/include/llvm/Support/ConvertUTF.h b/interpreter/llvm/src/include/llvm/Support/ConvertUTF.h index f714c0ed997ed..bd439f3602169 100644 --- a/interpreter/llvm/src/include/llvm/Support/ConvertUTF.h +++ b/interpreter/llvm/src/include/llvm/Support/ConvertUTF.h @@ -90,8 +90,8 @@ #ifndef LLVM_SUPPORT_CONVERTUTF_H #define LLVM_SUPPORT_CONVERTUTF_H -#include #include +#include // Wrap everything in namespace llvm so that programs can link with llvm and // their own version of the unicode libraries. diff --git a/interpreter/llvm/src/include/llvm/Support/DataExtractor.h b/interpreter/llvm/src/include/llvm/Support/DataExtractor.h index 380b628fd95ff..31447882a9196 100644 --- a/interpreter/llvm/src/include/llvm/Support/DataExtractor.h +++ b/interpreter/llvm/src/include/llvm/Support/DataExtractor.h @@ -14,6 +14,30 @@ #include "llvm/Support/DataTypes.h" namespace llvm { + +/// An auxiliary type to facilitate extraction of 3-byte entities. +struct Uint24 { + uint8_t Bytes[3]; + Uint24(uint8_t U) { + Bytes[0] = Bytes[1] = Bytes[2] = U; + } + Uint24(uint8_t U0, uint8_t U1, uint8_t U2) { + Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2; + } + uint32_t getAsUint32(bool IsLittleEndian) const { + int LoIx = IsLittleEndian ? 0 : 2; + return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16); + } +}; + +using uint24_t = Uint24; +static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3"); + +/// Needed by swapByteOrder(). +inline uint24_t getSwappedBytes(uint24_t C) { + return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]); +} + class DataExtractor { StringRef Data; uint8_t IsLittleEndian; @@ -236,6 +260,23 @@ class DataExtractor { /// NULL otherise. uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const; + /// Extract a 24-bit unsigned value from \a *offset_ptr and return it + /// in a uint32_t. + /// + /// Extract 3 bytes from the binary data at the offset pointed to by + /// \a offset_ptr, construct a uint32_t from them and update the offset + /// on success. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the 3 bytes if the value is extracted correctly. If the offset + /// is out of bounds or there are not enough bytes to extract this value, + /// the offset will be left unmodified. + /// + /// @return + /// The extracted 24-bit value represented in a uint32_t. + uint32_t getU24(uint32_t *offset_ptr) const; + /// Extract a uint32_t value from \a *offset_ptr. /// /// Extract a single uint32_t from the binary data at the offset diff --git a/interpreter/llvm/src/include/llvm/Support/DataTypes.h.cmake b/interpreter/llvm/src/include/llvm/Support/DataTypes.h.cmake index 541dbc3d635d7..a58e2e454b7d1 100644 --- a/interpreter/llvm/src/include/llvm/Support/DataTypes.h.cmake +++ b/interpreter/llvm/src/include/llvm/Support/DataTypes.h.cmake @@ -85,11 +85,11 @@ typedef u_int64_t uint64_t; #else /* _MSC_VER */ #ifdef __cplusplus -#include #include +#include #else -#include #include +#include #endif #include diff --git a/interpreter/llvm/src/include/llvm/Support/DebugCounter.h b/interpreter/llvm/src/include/llvm/Support/DebugCounter.h index 9687cb7b9d95f..a533feae7fa38 100644 --- a/interpreter/llvm/src/include/llvm/Support/DebugCounter.h +++ b/interpreter/llvm/src/include/llvm/Support/DebugCounter.h @@ -121,10 +121,10 @@ class DebugCounter { Us.Counters[ID] = Val; } - // Dump or print the current counter set. - LLVM_DUMP_METHOD void dump() { print(dbgs()); } + // Dump or print the current counter set into llvm::dbgs(). + LLVM_DUMP_METHOD void dump() const; - void print(raw_ostream &OS); + void print(raw_ostream &OS) const; // Get the counter ID for a given named counter, or return 0 if none is found. unsigned getCounterId(const std::string &Name) const { diff --git a/interpreter/llvm/src/include/llvm/Support/DynamicLibrary.h b/interpreter/llvm/src/include/llvm/Support/DynamicLibrary.h index aa9bb8938ad3b..469d5dfad0627 100644 --- a/interpreter/llvm/src/include/llvm/Support/DynamicLibrary.h +++ b/interpreter/llvm/src/include/llvm/Support/DynamicLibrary.h @@ -58,7 +58,7 @@ namespace sys { void *getAddressOfSymbol(const char *symbolName); /// This function permanently loads the dynamic library at the given path. - /// The library will only be unloaded when the program terminates. + /// The library will only be unloaded when llvm_shutdown() is called. /// This returns a valid DynamicLibrary instance on success and an invalid /// instance on failure (see isValid()). \p *errMsg will only be modified /// if the library fails to load. @@ -71,7 +71,8 @@ namespace sys { /// Registers an externally loaded library. The library will be unloaded /// when the program terminates. /// - /// It is safe to call this function multiple times for the same library. + /// It is safe to call this function multiple times for the same library, + /// though ownership is only taken if there was no error. /// /// \returns An empty \p DynamicLibrary if the library was already loaded. static DynamicLibrary addPermanentLibrary(void *handle, @@ -87,6 +88,22 @@ namespace sys { return !getPermanentLibrary(Filename, ErrMsg).isValid(); } + enum SearchOrdering { + /// SO_Linker - Search as a call to dlsym(dlopen(NULL)) would when + /// DynamicLibrary::getPermanentLibrary(NULL) has been called or + /// search the list of explcitly loaded symbols if not. + SO_Linker, + /// SO_LoadedFirst - Search all loaded libraries, then as SO_Linker would. + SO_LoadedFirst, + /// SO_LoadedLast - Search as SO_Linker would, then loaded libraries. + /// Only useful to search if libraries with RTLD_LOCAL have been added. + SO_LoadedLast, + /// SO_LoadOrder - Or this in to search libraries in the ordered loaded. + /// The default bahaviour is to search loaded libraries in reverse. + SO_LoadOrder = 4 + }; + static SearchOrdering SearchOrder; // = SO_Linker + /// This function will search through all previously loaded dynamic /// libraries for the symbol \p symbolName. If it is found, the address of /// that symbol is returned. If not, null is returned. Note that this will @@ -106,6 +123,8 @@ namespace sys { /// libraries. /// @brief Add searchable symbol/value pair. static void AddSymbol(StringRef symbolName, void *symbolValue); + + class HandleSet; }; } // End sys namespace diff --git a/interpreter/llvm/src/include/llvm/Support/Endian.h b/interpreter/llvm/src/include/llvm/Support/Endian.h index 06e089ffa166f..f50d9b502dafb 100644 --- a/interpreter/llvm/src/include/llvm/Support/Endian.h +++ b/interpreter/llvm/src/include/llvm/Support/Endian.h @@ -14,27 +14,36 @@ #ifndef LLVM_SUPPORT_ENDIAN_H #define LLVM_SUPPORT_ENDIAN_H +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" - -#include +#include +#include +#include +#include +#include namespace llvm { namespace support { + enum endianness {big, little, native}; // These are named values for common alignments. enum {aligned = 0, unaligned = 1}; namespace detail { - /// \brief ::value is either alignment, or alignof(T) if alignment is 0. - template - struct PickAlignment { - enum { value = alignment == 0 ? alignof(T) : alignment }; - }; + +/// \brief ::value is either alignment, or alignof(T) if alignment is 0. +template +struct PickAlignment { + enum { value = alignment == 0 ? alignof(T) : alignment }; +}; + } // end namespace detail namespace endian { + constexpr endianness system_endianness() { return sys::IsBigEndianHost ? big : little; } @@ -190,9 +199,11 @@ inline void writeAtBitAlignment(void *memory, value_type value, &val[0], sizeof(value_type) * 2); } } + } // end namespace endian namespace detail { + template @@ -254,77 +265,78 @@ struct packed_endian_specific_integral { } // end namespace detail -typedef detail::packed_endian_specific_integral - ulittle16_t; -typedef detail::packed_endian_specific_integral - ulittle32_t; -typedef detail::packed_endian_specific_integral - ulittle64_t; - -typedef detail::packed_endian_specific_integral - little16_t; -typedef detail::packed_endian_specific_integral - little32_t; -typedef detail::packed_endian_specific_integral - little64_t; - -typedef detail::packed_endian_specific_integral - aligned_ulittle16_t; -typedef detail::packed_endian_specific_integral - aligned_ulittle32_t; -typedef detail::packed_endian_specific_integral - aligned_ulittle64_t; - -typedef detail::packed_endian_specific_integral - aligned_little16_t; -typedef detail::packed_endian_specific_integral - aligned_little32_t; -typedef detail::packed_endian_specific_integral - aligned_little64_t; - -typedef detail::packed_endian_specific_integral - ubig16_t; -typedef detail::packed_endian_specific_integral - ubig32_t; -typedef detail::packed_endian_specific_integral - ubig64_t; - -typedef detail::packed_endian_specific_integral - big16_t; -typedef detail::packed_endian_specific_integral - big32_t; -typedef detail::packed_endian_specific_integral - big64_t; - -typedef detail::packed_endian_specific_integral - aligned_ubig16_t; -typedef detail::packed_endian_specific_integral - aligned_ubig32_t; -typedef detail::packed_endian_specific_integral - aligned_ubig64_t; - -typedef detail::packed_endian_specific_integral - aligned_big16_t; -typedef detail::packed_endian_specific_integral - aligned_big32_t; -typedef detail::packed_endian_specific_integral - aligned_big64_t; - -typedef detail::packed_endian_specific_integral - unaligned_uint16_t; -typedef detail::packed_endian_specific_integral - unaligned_uint32_t; -typedef detail::packed_endian_specific_integral - unaligned_uint64_t; - -typedef detail::packed_endian_specific_integral - unaligned_int16_t; -typedef detail::packed_endian_specific_integral - unaligned_int32_t; -typedef detail::packed_endian_specific_integral - unaligned_int64_t; +using ulittle16_t = + detail::packed_endian_specific_integral; +using ulittle32_t = + detail::packed_endian_specific_integral; +using ulittle64_t = + detail::packed_endian_specific_integral; + +using little16_t = + detail::packed_endian_specific_integral; +using little32_t = + detail::packed_endian_specific_integral; +using little64_t = + detail::packed_endian_specific_integral; + +using aligned_ulittle16_t = + detail::packed_endian_specific_integral; +using aligned_ulittle32_t = + detail::packed_endian_specific_integral; +using aligned_ulittle64_t = + detail::packed_endian_specific_integral; + +using aligned_little16_t = + detail::packed_endian_specific_integral; +using aligned_little32_t = + detail::packed_endian_specific_integral; +using aligned_little64_t = + detail::packed_endian_specific_integral; + +using ubig16_t = + detail::packed_endian_specific_integral; +using ubig32_t = + detail::packed_endian_specific_integral; +using ubig64_t = + detail::packed_endian_specific_integral; + +using big16_t = + detail::packed_endian_specific_integral; +using big32_t = + detail::packed_endian_specific_integral; +using big64_t = + detail::packed_endian_specific_integral; + +using aligned_ubig16_t = + detail::packed_endian_specific_integral; +using aligned_ubig32_t = + detail::packed_endian_specific_integral; +using aligned_ubig64_t = + detail::packed_endian_specific_integral; + +using aligned_big16_t = + detail::packed_endian_specific_integral; +using aligned_big32_t = + detail::packed_endian_specific_integral; +using aligned_big64_t = + detail::packed_endian_specific_integral; + +using unaligned_uint16_t = + detail::packed_endian_specific_integral; +using unaligned_uint32_t = + detail::packed_endian_specific_integral; +using unaligned_uint64_t = + detail::packed_endian_specific_integral; + +using unaligned_int16_t = + detail::packed_endian_specific_integral; +using unaligned_int32_t = + detail::packed_endian_specific_integral; +using unaligned_int64_t = + detail::packed_endian_specific_integral; namespace endian { + template inline T read(const void *P, endianness E) { return read(P, E); } @@ -394,8 +406,10 @@ inline void write64le(void *P, uint64_t V) { write64(P, V); } inline void write16be(void *P, uint16_t V) { write16(P, V); } inline void write32be(void *P, uint32_t V) { write32(P, V); } inline void write64be(void *P, uint64_t V) { write64(P, V); } + } // end namespace endian + } // end namespace support } // end namespace llvm -#endif +#endif // LLVM_SUPPORT_ENDIAN_H diff --git a/interpreter/llvm/src/include/llvm/Support/Errno.h b/interpreter/llvm/src/include/llvm/Support/Errno.h index 8e145c7b0b512..35dc1ea7cf84f 100644 --- a/interpreter/llvm/src/include/llvm/Support/Errno.h +++ b/interpreter/llvm/src/include/llvm/Support/Errno.h @@ -14,7 +14,9 @@ #ifndef LLVM_SUPPORT_ERRNO_H #define LLVM_SUPPORT_ERRNO_H +#include #include +#include namespace llvm { namespace sys { @@ -28,6 +30,16 @@ std::string StrError(); /// Like the no-argument version above, but uses \p errnum instead of errno. std::string StrError(int errnum); +template +inline auto RetryAfterSignal(const FailT &Fail, const Fun &F, + const Args &... As) -> decltype(F(As...)) { + decltype(F(As...)) Res; + do + Res = F(As...); + while (Res == Fail && errno == EINTR); + return Res; +} + } // namespace sys } // namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Support/Error.h b/interpreter/llvm/src/include/llvm/Support/Error.h index a3482f5a58b53..9a7fa0ae6356c 100644 --- a/interpreter/llvm/src/include/llvm/Support/Error.h +++ b/interpreter/llvm/src/include/llvm/Support/Error.h @@ -1,4 +1,4 @@ -//===----- llvm/Support/Error.h - Recoverable error handling ----*- C++ -*-===// +//===- llvm/Support/Error.h - Recoverable error handling --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,6 +22,7 @@ #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" #include @@ -167,7 +168,7 @@ class LLVM_NODISCARD Error { protected: /// Create a success value. Prefer using 'Error::success()' for readability - Error() : Payload(nullptr) { + Error() { setPtr(nullptr); setChecked(false); } @@ -182,7 +183,7 @@ class LLVM_NODISCARD Error { /// Move-construct an error value. The newly constructed error is considered /// unchecked, even if the source error had been checked. The original error /// becomes a checked Success value, regardless of its original state. - Error(Error &&Other) : Payload(nullptr) { + Error(Error &&Other) { setChecked(true); *this = std::move(Other); } @@ -299,7 +300,7 @@ class LLVM_NODISCARD Error { return Tmp; } - ErrorInfoBase *Payload; + ErrorInfoBase *Payload = nullptr; }; /// Subclass of Error for the sole purpose of identifying the success path in @@ -327,7 +328,6 @@ template Error make_error(ArgTs &&... Args) { template class ErrorInfo : public ParentErrT { public: - static const void *classID() { return &ThisErrT::ID; } const void *dynamicClassID() const override { return &ThisErrT::ID; } @@ -645,20 +645,22 @@ class ErrorAsOutParameter { template class LLVM_NODISCARD Expected { template friend class ExpectedAsOutParameter; template friend class Expected; + static const bool isRef = std::is_reference::value; - typedef ReferenceStorage::type> wrap; - typedef std::unique_ptr error_type; + using wrap = ReferenceStorage::type>; + + using error_type = std::unique_ptr; public: - typedef typename std::conditional::type storage_type; - typedef T value_type; + using storage_type = typename std::conditional::type; + using value_type = T; private: - typedef typename std::remove_reference::type &reference; - typedef const typename std::remove_reference::type &const_reference; - typedef typename std::remove_reference::type *pointer; - typedef const typename std::remove_reference::type *const_pointer; + using reference = typename std::remove_reference::type &; + using const_reference = const typename std::remove_reference::type &; + using pointer = typename std::remove_reference::type *; + using const_pointer = const typename std::remove_reference::type *; public: /// Create an Expected error value from the given Error. @@ -891,7 +893,6 @@ template class LLVM_NODISCARD Expected { template class ExpectedAsOutParameter { public: - ExpectedAsOutParameter(Expected *ValOrErr) : ValOrErr(ValOrErr) { if (ValOrErr) @@ -1075,6 +1076,27 @@ T cantFail(Expected ValOrErr) { llvm_unreachable("Failure value returned from cantFail wrapped call"); } +/// Report a fatal error if ValOrErr is a failure value, otherwise unwraps and +/// returns the contained reference. +/// +/// This function can be used to wrap calls to fallible functions ONLY when it +/// is known that the Error will always be a success value. E.g. +/// +/// @code{.cpp} +/// // foo only attempts the fallible operation if DoFallibleOperation is +/// // true. If DoFallibleOperation is false then foo always returns a Bar&. +/// Expected foo(bool DoFallibleOperation); +/// +/// Bar &X = cantFail(foo(false)); +/// @endcode +template +T& cantFail(Expected ValOrErr) { + if (ValOrErr) + return *ValOrErr; + else + llvm_unreachable("Failure value returned from cantFail wrapped call"); +} + } // end namespace llvm #endif // LLVM_SUPPORT_ERROR_H diff --git a/interpreter/llvm/src/include/llvm/Support/ErrorHandling.h b/interpreter/llvm/src/include/llvm/Support/ErrorHandling.h index 7c1edd8015712..b45f6348390e2 100644 --- a/interpreter/llvm/src/include/llvm/Support/ErrorHandling.h +++ b/interpreter/llvm/src/include/llvm/Support/ErrorHandling.h @@ -78,12 +78,48 @@ LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason, LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason, bool gen_crash_diag = true); - /// This function calls abort(), and prints the optional message to stderr. - /// Use the llvm_unreachable macro (that adds location info), instead of - /// calling this function directly. - LLVM_ATTRIBUTE_NORETURN void - llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, - unsigned line=0); +/// Installs a new bad alloc error handler that should be used whenever a +/// bad alloc error, e.g. failing malloc/calloc, is encountered by LLVM. +/// +/// The user can install a bad alloc handler, in order to define the behavior +/// in case of failing allocations, e.g. throwing an exception. Note that this +/// handler must not trigger any additional allocations itself. +/// +/// If no error handler is installed the default is to print the error message +/// to stderr, and call exit(1). If an error handler is installed then it is +/// the handler's responsibility to log the message, it will no longer be +/// printed to stderr. If the error handler returns, then exit(1) will be +/// called. +/// +/// +/// \param user_data - An argument which will be passed to the installed error +/// handler. +void install_bad_alloc_error_handler(fatal_error_handler_t handler, + void *user_data = nullptr); + +/// Restores default bad alloc error handling behavior. +void remove_bad_alloc_error_handler(); + +/// Reports a bad alloc error, calling any user defined bad alloc +/// error handler. In contrast to the generic 'report_fatal_error' +/// functions, this function is expected to return, e.g. the user +/// defined error handler throws an exception. +/// +/// Note: When throwing an exception in the bad alloc handler, make sure that +/// the following unwind succeeds, e.g. do not trigger additional allocations +/// in the unwind chain. +/// +/// If no error handler is installed (default), then a bad_alloc exception +/// is thrown if LLVM is compiled with exception support, otherwise an assertion +/// is called. +void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true); + +/// This function calls abort(), and prints the optional message to stderr. +/// Use the llvm_unreachable macro (that adds location info), instead of +/// calling this function directly. +LLVM_ATTRIBUTE_NORETURN void +llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr, + unsigned line = 0); } /// Marks that the current location is not supposed to be reachable. diff --git a/interpreter/llvm/src/include/llvm/Support/ErrorOr.h b/interpreter/llvm/src/include/llvm/Support/ErrorOr.h index 877f4063cd232..061fb65db465c 100644 --- a/interpreter/llvm/src/include/llvm/Support/ErrorOr.h +++ b/interpreter/llvm/src/include/llvm/Support/ErrorOr.h @@ -16,13 +16,14 @@ #ifndef LLVM_SUPPORT_ERROROR_H #define LLVM_SUPPORT_ERROROR_H -#include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/AlignOf.h" #include #include #include +#include namespace llvm { + /// \brief Stores a reference that can be changed. template class ReferenceStorage { @@ -67,17 +68,19 @@ class ReferenceStorage { template class ErrorOr { template friend class ErrorOr; + static const bool isRef = std::is_reference::value; - typedef ReferenceStorage::type> wrap; + + using wrap = ReferenceStorage::type>; public: - typedef typename std::conditional::type storage_type; + using storage_type = typename std::conditional::type; private: - typedef typename std::remove_reference::type &reference; - typedef const typename std::remove_reference::type &const_reference; - typedef typename std::remove_reference::type *pointer; - typedef const typename std::remove_reference::type *const_pointer; + using reference = typename std::remove_reference::type &; + using const_reference = const typename std::remove_reference::type &; + using pointer = typename std::remove_reference::type *; + using const_pointer = const typename std::remove_reference::type *; public: template @@ -282,6 +285,7 @@ typename std::enable_if::value || operator==(const ErrorOr &Err, E Code) { return Err.getError() == Code; } + } // end namespace llvm #endif // LLVM_SUPPORT_ERROROR_H diff --git a/interpreter/llvm/src/include/llvm/Support/FileSystem.h b/interpreter/llvm/src/include/llvm/Support/FileSystem.h index e3c5de7fbe642..21c5fcdb71450 100644 --- a/interpreter/llvm/src/include/llvm/Support/FileSystem.h +++ b/interpreter/llvm/src/include/llvm/Support/FileSystem.h @@ -233,50 +233,6 @@ class file_status void permissions(perms p) { Perms = p; } }; -/// file_magic - An "enum class" enumeration of file types based on magic (the first -/// N bytes of the file). -struct file_magic { - enum Impl { - unknown = 0, ///< Unrecognized file - bitcode, ///< Bitcode file - archive, ///< ar style archive file - elf, ///< ELF Unknown type - elf_relocatable, ///< ELF Relocatable object file - elf_executable, ///< ELF Executable image - elf_shared_object, ///< ELF dynamically linked shared lib - elf_core, ///< ELF core image - macho_object, ///< Mach-O Object file - macho_executable, ///< Mach-O Executable - macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM - macho_core, ///< Mach-O Core File - macho_preload_executable, ///< Mach-O Preloaded Executable - macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib - macho_dynamic_linker, ///< The Mach-O dynamic linker - macho_bundle, ///< Mach-O Bundle file - macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub - macho_dsym_companion, ///< Mach-O dSYM companion file - macho_kext_bundle, ///< Mach-O kext bundle file - macho_universal_binary, ///< Mach-O universal binary - coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file - coff_object, ///< COFF object file - coff_import_library, ///< COFF import library - pecoff_executable, ///< PECOFF executable file - windows_resource, ///< Windows compiled resource file (.rc) - wasm_object ///< WebAssembly Object file - }; - - bool is_object() const { - return V != unknown; - } - - file_magic() = default; - file_magic(Impl V) : V(V) {} - operator Impl() const { return V; } - -private: - Impl V = unknown; -}; - /// @} /// @name Physical Operators /// @{ @@ -770,17 +726,6 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD, std::error_code openFileForRead(const Twine &Name, int &ResultFD, SmallVectorImpl *RealPath = nullptr); -/// @brief Identify the type of a binary file based on how magical it is. -file_magic identify_magic(StringRef magic); - -/// @brief Get and identify \a path's type based on its content. -/// -/// @param path Input path. -/// @param result Set to the type of file, or file_magic::unknown. -/// @returns errc::success if result has been successfully set, otherwise a -/// platform-specific error_code. -std::error_code identify_magic(const Twine &path, file_magic &result); - std::error_code getUniqueID(const Twine Path, UniqueID &Result); /// @brief Get disk space usage information. diff --git a/interpreter/llvm/src/include/llvm/Support/Format.h b/interpreter/llvm/src/include/llvm/Support/Format.h index 017b4973f1ffc..bcbd2bec57228 100644 --- a/interpreter/llvm/src/include/llvm/Support/Format.h +++ b/interpreter/llvm/src/include/llvm/Support/Format.h @@ -125,30 +125,39 @@ inline format_object format(const char *Fmt, const Ts &... Vals) { return format_object(Fmt, Vals...); } -/// This is a helper class used for left_justify() and right_justify(). +/// This is a helper class for left_justify, right_justify, and center_justify. class FormattedString { +public: + enum Justification { JustifyNone, JustifyLeft, JustifyRight, JustifyCenter }; + FormattedString(StringRef S, unsigned W, Justification J) + : Str(S), Width(W), Justify(J) {} + +private: StringRef Str; unsigned Width; - bool RightJustify; + Justification Justify; friend class raw_ostream; - -public: - FormattedString(StringRef S, unsigned W, bool R) - : Str(S), Width(W), RightJustify(R) { } }; /// left_justify - append spaces after string so total output is /// \p Width characters. If \p Str is larger that \p Width, full string /// is written with no padding. inline FormattedString left_justify(StringRef Str, unsigned Width) { - return FormattedString(Str, Width, false); + return FormattedString(Str, Width, FormattedString::JustifyLeft); } /// right_justify - add spaces before string so total output is /// \p Width characters. If \p Str is larger that \p Width, full string /// is written with no padding. inline FormattedString right_justify(StringRef Str, unsigned Width) { - return FormattedString(Str, Width, true); + return FormattedString(Str, Width, FormattedString::JustifyRight); +} + +/// center_justify - add spaces before and after string so total output is +/// \p Width characters. If \p Str is larger that \p Width, full string +/// is written with no padding. +inline FormattedString center_justify(StringRef Str, unsigned Width) { + return FormattedString(Str, Width, FormattedString::JustifyCenter); } /// This is a helper class used for format_hex() and format_decimal(). diff --git a/interpreter/llvm/src/include/llvm/Support/FormatAdapters.h b/interpreter/llvm/src/include/llvm/Support/FormatAdapters.h index 698e134b328de..197beb7363dfc 100644 --- a/interpreter/llvm/src/include/llvm/Support/FormatAdapters.h +++ b/interpreter/llvm/src/include/llvm/Support/FormatAdapters.h @@ -28,14 +28,16 @@ namespace detail { template class AlignAdapter final : public FormatAdapter { AlignStyle Where; size_t Amount; + char Fill; public: - AlignAdapter(T &&Item, AlignStyle Where, size_t Amount) - : FormatAdapter(std::forward(Item)), Where(Where), Amount(Amount) {} + AlignAdapter(T &&Item, AlignStyle Where, size_t Amount, char Fill) + : FormatAdapter(std::forward(Item)), Where(Where), Amount(Amount), + Fill(Fill) {} void format(llvm::raw_ostream &Stream, StringRef Style) { auto Adapter = detail::build_format_adapter(std::forward(this->Item)); - FmtAlign(Adapter, Where, Amount).format(Stream, Style); + FmtAlign(Adapter, Where, Amount, Fill).format(Stream, Style); } }; @@ -72,8 +74,9 @@ template class RepeatAdapter final : public FormatAdapter { } template -detail::AlignAdapter fmt_align(T &&Item, AlignStyle Where, size_t Amount) { - return detail::AlignAdapter(std::forward(Item), Where, Amount); +detail::AlignAdapter fmt_align(T &&Item, AlignStyle Where, size_t Amount, + char Fill = ' ') { + return detail::AlignAdapter(std::forward(Item), Where, Amount, Fill); } template diff --git a/interpreter/llvm/src/include/llvm/Support/FormatCommon.h b/interpreter/llvm/src/include/llvm/Support/FormatCommon.h index a8c5fdeb6bffa..36fbad296c3f2 100644 --- a/interpreter/llvm/src/include/llvm/Support/FormatCommon.h +++ b/interpreter/llvm/src/include/llvm/Support/FormatCommon.h @@ -21,9 +21,11 @@ struct FmtAlign { detail::format_adapter &Adapter; AlignStyle Where; size_t Amount; + char Fill; - FmtAlign(detail::format_adapter &Adapter, AlignStyle Where, size_t Amount) - : Adapter(Adapter), Where(Where), Amount(Amount) {} + FmtAlign(detail::format_adapter &Adapter, AlignStyle Where, size_t Amount, + char Fill = ' ') + : Adapter(Adapter), Where(Where), Amount(Amount), Fill(Fill) {} void format(raw_ostream &S, StringRef Options) { // If we don't need to align, we can format straight into the underlying @@ -48,21 +50,27 @@ struct FmtAlign { switch (Where) { case AlignStyle::Left: S << Item; - S.indent(PadAmount); + fill(S, PadAmount); break; case AlignStyle::Center: { size_t X = PadAmount / 2; - S.indent(X); + fill(S, X); S << Item; - S.indent(PadAmount - X); + fill(S, PadAmount - X); break; } default: - S.indent(PadAmount); + fill(S, PadAmount); S << Item; break; } } + +private: + void fill(llvm::raw_ostream &S, uint32_t Count) { + for (uint32_t I = 0; I < Count; ++I) + S << Fill; + } }; } diff --git a/interpreter/llvm/src/include/llvm/Support/FormatVariadic.h b/interpreter/llvm/src/include/llvm/Support/FormatVariadic.h index 3a4668687cc94..c1153e84dfb56 100644 --- a/interpreter/llvm/src/include/llvm/Support/FormatVariadic.h +++ b/interpreter/llvm/src/include/llvm/Support/FormatVariadic.h @@ -27,8 +27,8 @@ #define LLVM_SUPPORT_FORMATVARIADIC_H #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatCommon.h" #include "llvm/Support/FormatProviders.h" diff --git a/interpreter/llvm/src/include/llvm/Support/GCOV.h b/interpreter/llvm/src/include/llvm/Support/GCOV.h index 73fddca8e35bb..02016e7dbd624 100644 --- a/interpreter/llvm/src/include/llvm/Support/GCOV.h +++ b/interpreter/llvm/src/include/llvm/Support/GCOV.h @@ -16,12 +16,12 @@ #define LLVM_SUPPORT_GCOV_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -271,8 +271,8 @@ struct GCOVEdge { /// GCOVFunction - Collects function information. class GCOVFunction { public: - typedef pointee_iterator>::const_iterator> BlockIterator; + using BlockIterator = pointee_iterator>::const_iterator>; GCOVFunction(GCOVFile &P) : Parent(P) {} @@ -321,7 +321,7 @@ class GCOVBlock { }; public: - typedef SmallVectorImpl::const_iterator EdgeIterator; + using EdgeIterator = SmallVectorImpl::const_iterator; GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {} ~GCOVBlock(); @@ -381,10 +381,10 @@ class FileInfo { // Therefore this typedef allows LineData.Functions to store multiple // functions // per instance. This is rare, however, so optimize for the common case. - typedef SmallVector FunctionVector; - typedef DenseMap FunctionLines; - typedef SmallVector BlockVector; - typedef DenseMap BlockLines; + using FunctionVector = SmallVector; + using FunctionLines = DenseMap; + using BlockVector = SmallVector; + using BlockLines = DenseMap; struct LineData { LineData() = default; @@ -448,8 +448,8 @@ class FileInfo { uint32_t RunCount = 0; uint32_t ProgramCount = 0; - typedef SmallVector, 4> FileCoverageList; - typedef MapVector FuncCoverageMap; + using FileCoverageList = SmallVector, 4>; + using FuncCoverageMap = MapVector; FileCoverageList FileCoverages; FuncCoverageMap FuncCoverages; diff --git a/interpreter/llvm/src/include/llvm/Support/GenericDomTree.h b/interpreter/llvm/src/include/llvm/Support/GenericDomTree.h index 851ff7d80403f..706320fed9a72 100644 --- a/interpreter/llvm/src/include/llvm/Support/GenericDomTree.h +++ b/interpreter/llvm/src/include/llvm/Support/GenericDomTree.h @@ -26,9 +26,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -41,76 +41,36 @@ namespace llvm { -template class DominatorTreeBase; - -namespace detail { - -template struct DominatorTreeBaseTraits { - static_assert(std::is_pointer::value, - "Currently NodeRef must be a pointer type."); - using type = DominatorTreeBase< - typename std::remove_pointer::type>; -}; - -} // end namespace detail - -template -using DominatorTreeBaseByGraphTraits = - typename detail::DominatorTreeBaseTraits::type; - -/// \brief Base class that other, more interesting dominator analyses -/// inherit from. -template class DominatorBase { -protected: - std::vector Roots; - bool IsPostDominators; - - explicit DominatorBase(bool isPostDom) - : Roots(), IsPostDominators(isPostDom) {} +template +class DominatorTreeBase; - DominatorBase(DominatorBase &&Arg) - : Roots(std::move(Arg.Roots)), - IsPostDominators(std::move(Arg.IsPostDominators)) { - Arg.Roots.clear(); - } - - DominatorBase &operator=(DominatorBase &&RHS) { - Roots = std::move(RHS.Roots); - IsPostDominators = std::move(RHS.IsPostDominators); - RHS.Roots.clear(); - return *this; - } - -public: - /// getRoots - Return the root blocks of the current CFG. This may include - /// multiple blocks if we are computing post dominators. For forward - /// dominators, this will always be a single block (the entry node). - /// - const std::vector &getRoots() const { return Roots; } - - /// isPostDominator - Returns true if analysis based of postdoms - /// - bool isPostDominator() const { return IsPostDominators; } -}; +namespace DomTreeBuilder { +template +struct SemiNCAInfo; +} // namespace DomTreeBuilder /// \brief Base class for the actual dominator tree node. template class DomTreeNodeBase { friend struct PostDominatorTree; - template friend class DominatorTreeBase; + friend class DominatorTreeBase; + friend class DominatorTreeBase; + friend struct DomTreeBuilder::SemiNCAInfo>; + friend struct DomTreeBuilder::SemiNCAInfo>; NodeT *TheBB; - DomTreeNodeBase *IDom; - std::vector *> Children; - mutable int DFSNumIn = -1; - mutable int DFSNumOut = -1; + DomTreeNodeBase *IDom; + unsigned Level; + std::vector Children; + mutable unsigned DFSNumIn = ~0; + mutable unsigned DFSNumOut = ~0; -public: - DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom) - : TheBB(BB), IDom(iDom) {} + public: + DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom) + : TheBB(BB), IDom(iDom), Level(IDom ? IDom->Level + 1 : 0) {} - typedef typename std::vector *>::iterator iterator; - typedef typename std::vector *>::const_iterator - const_iterator; + using iterator = typename std::vector::iterator; + using const_iterator = + typename std::vector::const_iterator; iterator begin() { return Children.begin(); } iterator end() { return Children.end(); } @@ -118,14 +78,13 @@ template class DomTreeNodeBase { const_iterator end() const { return Children.end(); } NodeT *getBlock() const { return TheBB; } - DomTreeNodeBase *getIDom() const { return IDom; } + DomTreeNodeBase *getIDom() const { return IDom; } + unsigned getLevel() const { return Level; } - const std::vector *> &getChildren() const { - return Children; - } + const std::vector &getChildren() const { return Children; } - std::unique_ptr> - addChild(std::unique_ptr> C) { + std::unique_ptr addChild( + std::unique_ptr C) { Children.push_back(C.get()); return C; } @@ -134,10 +93,12 @@ template class DomTreeNodeBase { void clearAllChildren() { Children.clear(); } - bool compare(const DomTreeNodeBase *Other) const { + bool compare(const DomTreeNodeBase *Other) const { if (getNumChildren() != Other->getNumChildren()) return true; + if (Level != Other->Level) return true; + SmallPtrSet OtherChildren; for (const DomTreeNodeBase *I : *Other) { const NodeT *Nd = I->getBlock(); @@ -152,20 +113,21 @@ template class DomTreeNodeBase { return false; } - void setIDom(DomTreeNodeBase *NewIDom) { + void setIDom(DomTreeNodeBase *NewIDom) { assert(IDom && "No immediate dominator?"); - if (IDom != NewIDom) { - typename std::vector *>::iterator I = - find(IDom->Children, this); - assert(I != IDom->Children.end() && - "Not in immediate dominator children set!"); - // I am no longer your child... - IDom->Children.erase(I); + if (IDom == NewIDom) return; - // Switch to new dominator - IDom = NewIDom; - IDom->Children.push_back(this); - } + auto I = find(IDom->Children, this); + assert(I != IDom->Children.end() && + "Not in immediate dominator children set!"); + // I am no longer your child... + IDom->Children.erase(I); + + // Switch to new dominator + IDom = NewIDom; + IDom->Children.push_back(this); + + UpdateLevel(); } /// getDFSNumIn/getDFSNumOut - These return the DFS visitation order for nodes @@ -177,186 +139,116 @@ template class DomTreeNodeBase { private: // Return true if this node is dominated by other. Use this only if DFS info // is valid. - bool DominatedBy(const DomTreeNodeBase *other) const { + bool DominatedBy(const DomTreeNodeBase *other) const { return this->DFSNumIn >= other->DFSNumIn && this->DFSNumOut <= other->DFSNumOut; } + + void UpdateLevel() { + assert(IDom); + if (Level == IDom->Level + 1) return; + + SmallVector WorkStack = {this}; + + while (!WorkStack.empty()) { + DomTreeNodeBase *Current = WorkStack.pop_back_val(); + Current->Level = Current->IDom->Level + 1; + + for (DomTreeNodeBase *C : *Current) { + assert(C->IDom); + if (C->Level != C->IDom->Level + 1) WorkStack.push_back(C); + } + } + } }; template -raw_ostream &operator<<(raw_ostream &o, const DomTreeNodeBase *Node) { +raw_ostream &operator<<(raw_ostream &O, const DomTreeNodeBase *Node) { if (Node->getBlock()) - Node->getBlock()->printAsOperand(o, false); + Node->getBlock()->printAsOperand(O, false); else - o << " <>"; + O << " <>"; - o << " {" << Node->getDFSNumIn() << "," << Node->getDFSNumOut() << "}"; + O << " {" << Node->getDFSNumIn() << "," << Node->getDFSNumOut() << "} [" + << Node->getLevel() << "]\n"; - return o << "\n"; + return O; } template -void PrintDomTree(const DomTreeNodeBase *N, raw_ostream &o, +void PrintDomTree(const DomTreeNodeBase *N, raw_ostream &O, unsigned Lev) { - o.indent(2 * Lev) << "[" << Lev << "] " << N; + O.indent(2 * Lev) << "[" << Lev << "] " << N; for (typename DomTreeNodeBase::const_iterator I = N->begin(), E = N->end(); I != E; ++I) - PrintDomTree(*I, o, Lev + 1); + PrintDomTree(*I, O, Lev + 1); } -// The calculate routine is provided in a separate header but referenced here. -template -void Calculate(DominatorTreeBaseByGraphTraits> &DT, FuncT &F); +namespace DomTreeBuilder { +// The routines below are provided in a separate header but referenced here. +template +void Calculate(DomTreeT &DT, FuncT &F); + +template +void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To); + +template +void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To); + +template +bool Verify(const DomTreeT &DT); +} // namespace DomTreeBuilder /// \brief Core dominator tree base class. /// /// This class is a generic template over graph nodes. It is instantiated for /// various graphs in the LLVM IR or in the code generator. -template class DominatorTreeBase : public DominatorBase { - bool dominatedBySlowTreeWalk(const DomTreeNodeBase *A, - const DomTreeNodeBase *B) const { - assert(A != B); - assert(isReachableFromEntry(B)); - assert(isReachableFromEntry(A)); - - const DomTreeNodeBase *IDom; - while ((IDom = B->getIDom()) != nullptr && IDom != A && IDom != B) - B = IDom; // Walk up the tree - return IDom != nullptr; - } - - /// \brief Wipe this tree's state without releasing any resources. - /// - /// This is essentially a post-move helper only. It leaves the object in an - /// assignable and destroyable state, but otherwise invalid. - void wipe() { - DomTreeNodes.clear(); - IDoms.clear(); - Vertex.clear(); - Info.clear(); - RootNode = nullptr; - } +template +class DominatorTreeBase { + protected: + std::vector Roots; -protected: - typedef DenseMap>> - DomTreeNodeMapType; + using DomTreeNodeMapType = + DenseMap>>; DomTreeNodeMapType DomTreeNodes; DomTreeNodeBase *RootNode; + using ParentPtr = decltype(std::declval()->getParent()); + ParentPtr Parent = nullptr; mutable bool DFSInfoValid = false; mutable unsigned int SlowQueries = 0; - // Information record used during immediate dominators computation. - struct InfoRec { - unsigned DFSNum = 0; - unsigned Parent = 0; - unsigned Semi = 0; - NodeT *Label = nullptr; - - InfoRec() = default; - }; - - DenseMap IDoms; - // Vertex - Map the DFS number to the NodeT* - std::vector Vertex; + friend struct DomTreeBuilder::SemiNCAInfo; - // Info - Collection of information used during the computation of idoms. - DenseMap Info; - - void reset() { - DomTreeNodes.clear(); - IDoms.clear(); - this->Roots.clear(); - Vertex.clear(); - RootNode = nullptr; - DFSInfoValid = false; - SlowQueries = 0; - } + public: + static_assert(std::is_pointer::NodeRef>::value, + "Currently DominatorTreeBase supports only pointer nodes"); + using NodeType = NodeT; + using NodePtr = NodeT *; + static constexpr bool IsPostDominator = IsPostDom; - // NewBB is split and now it has one successor. Update dominator tree to - // reflect this change. - template - void Split(typename GraphTraits::NodeRef NewBB) { - using GraphT = GraphTraits; - using NodeRef = typename GraphT::NodeRef; - assert(std::distance(GraphT::child_begin(NewBB), - GraphT::child_end(NewBB)) == 1 && - "NewBB should have a single successor!"); - NodeRef NewBBSucc = *GraphT::child_begin(NewBB); - - std::vector PredBlocks; - for (const auto &Pred : children>(NewBB)) - PredBlocks.push_back(Pred); - - assert(!PredBlocks.empty() && "No predblocks?"); - - bool NewBBDominatesNewBBSucc = true; - for (const auto &Pred : children>(NewBBSucc)) { - if (Pred != NewBB && !dominates(NewBBSucc, Pred) && - isReachableFromEntry(Pred)) { - NewBBDominatesNewBBSucc = false; - break; - } - } - - // Find NewBB's immediate dominator and create new dominator tree node for - // NewBB. - NodeT *NewBBIDom = nullptr; - unsigned i = 0; - for (i = 0; i < PredBlocks.size(); ++i) - if (isReachableFromEntry(PredBlocks[i])) { - NewBBIDom = PredBlocks[i]; - break; - } - - // It's possible that none of the predecessors of NewBB are reachable; - // in that case, NewBB itself is unreachable, so nothing needs to be - // changed. - if (!NewBBIDom) - return; - - for (i = i + 1; i < PredBlocks.size(); ++i) { - if (isReachableFromEntry(PredBlocks[i])) - NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]); - } - - // Create the new dominator tree node... and set the idom of NewBB. - DomTreeNodeBase *NewBBNode = addNewBlock(NewBB, NewBBIDom); - - // If NewBB strictly dominates other blocks, then it is now the immediate - // dominator of NewBBSucc. Update the dominator tree as appropriate. - if (NewBBDominatesNewBBSucc) { - DomTreeNodeBase *NewBBSuccNode = getNode(NewBBSucc); - changeImmediateDominator(NewBBSuccNode, NewBBNode); - } - } - -public: - explicit DominatorTreeBase(bool isPostDom) - : DominatorBase(isPostDom) {} + DominatorTreeBase() {} DominatorTreeBase(DominatorTreeBase &&Arg) - : DominatorBase( - std::move(static_cast &>(Arg))), + : Roots(std::move(Arg.Roots)), DomTreeNodes(std::move(Arg.DomTreeNodes)), - RootNode(std::move(Arg.RootNode)), - DFSInfoValid(std::move(Arg.DFSInfoValid)), - SlowQueries(std::move(Arg.SlowQueries)), IDoms(std::move(Arg.IDoms)), - Vertex(std::move(Arg.Vertex)), Info(std::move(Arg.Info)) { + RootNode(Arg.RootNode), + Parent(Arg.Parent), + DFSInfoValid(Arg.DFSInfoValid), + SlowQueries(Arg.SlowQueries) { Arg.wipe(); } DominatorTreeBase &operator=(DominatorTreeBase &&RHS) { - DominatorBase::operator=( - std::move(static_cast &>(RHS))); + Roots = std::move(RHS.Roots); DomTreeNodes = std::move(RHS.DomTreeNodes); - RootNode = std::move(RHS.RootNode); - DFSInfoValid = std::move(RHS.DFSInfoValid); - SlowQueries = std::move(RHS.SlowQueries); - IDoms = std::move(RHS.IDoms); - Vertex = std::move(RHS.Vertex); - Info = std::move(RHS.Info); + RootNode = RHS.RootNode; + Parent = RHS.Parent; + DFSInfoValid = RHS.DFSInfoValid; + SlowQueries = RHS.SlowQueries; RHS.wipe(); return *this; } @@ -364,9 +256,20 @@ template class DominatorTreeBase : public DominatorBase { DominatorTreeBase(const DominatorTreeBase &) = delete; DominatorTreeBase &operator=(const DominatorTreeBase &) = delete; + /// getRoots - Return the root blocks of the current CFG. This may include + /// multiple blocks if we are computing post dominators. For forward + /// dominators, this will always be a single block (the entry node). + /// + const std::vector &getRoots() const { return Roots; } + + /// isPostDominator - Returns true if analysis based of postdoms + /// + bool isPostDominator() const { return IsPostDominator; } + /// compare - Return false if the other dominator tree base matches this /// dominator tree base. Otherwise return true. bool compare(const DominatorTreeBase &Other) const { + if (Parent != Other.Parent) return true; const DomTreeNodeMapType &OtherDomTreeNodes = Other.DomTreeNodes; if (DomTreeNodes.size() != OtherDomTreeNodes.size()) @@ -472,6 +375,13 @@ template class DominatorTreeBase : public DominatorBase { if (!isReachableFromEntry(A)) return false; + if (B->getIDom() == A) return true; + + if (A->getIDom() == B) return false; + + // A can only dominate B if it is higher in the tree. + if (A->getLevel() >= B->getLevel()) return false; + // Compare the result of the tree walk and the dfs numbers, if expensive // checks are enabled. #ifdef EXPENSIVE_CHECKS @@ -503,7 +413,7 @@ template class DominatorTreeBase : public DominatorBase { /// findNearestCommonDominator - Find nearest common dominator basic block /// for basic block A and B. If there is no such block then return NULL. - NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) { + NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) const { assert(A->getParent() == B->getParent() && "Two blocks are not in same function"); @@ -515,64 +425,74 @@ template class DominatorTreeBase : public DominatorBase { return &Entry; } - // If B dominates A then B is nearest common dominator. - if (dominates(B, A)) - return B; - - // If A dominates B then A is nearest common dominator. - if (dominates(A, B)) - return A; - DomTreeNodeBase *NodeA = getNode(A); DomTreeNodeBase *NodeB = getNode(B); - // If we have DFS info, then we can avoid all allocations by just querying - // it from each IDom. Note that because we call 'dominates' twice above, we - // expect to call through this code at most 16 times in a row without - // building valid DFS information. This is important as below is a *very* - // slow tree walk. - if (DFSInfoValid) { - DomTreeNodeBase *IDomA = NodeA->getIDom(); - while (IDomA) { - if (NodeB->DominatedBy(IDomA)) - return IDomA->getBlock(); - IDomA = IDomA->getIDom(); - } - return nullptr; - } + if (!NodeA || !NodeB) return nullptr; - // Collect NodeA dominators set. - SmallPtrSet *, 16> NodeADoms; - NodeADoms.insert(NodeA); - DomTreeNodeBase *IDomA = NodeA->getIDom(); - while (IDomA) { - NodeADoms.insert(IDomA); - IDomA = IDomA->getIDom(); - } - - // Walk NodeB immediate dominators chain and find common dominator node. - DomTreeNodeBase *IDomB = NodeB->getIDom(); - while (IDomB) { - if (NodeADoms.count(IDomB) != 0) - return IDomB->getBlock(); + // Use level information to go up the tree until the levels match. Then + // continue going up til we arrive at the same node. + while (NodeA && NodeA != NodeB) { + if (NodeA->getLevel() < NodeB->getLevel()) std::swap(NodeA, NodeB); - IDomB = IDomB->getIDom(); + NodeA = NodeA->IDom; } - return nullptr; + return NodeA ? NodeA->getBlock() : nullptr; } - const NodeT *findNearestCommonDominator(const NodeT *A, const NodeT *B) { + const NodeT *findNearestCommonDominator(const NodeT *A, + const NodeT *B) const { // Cast away the const qualifiers here. This is ok since // const is re-introduced on the return type. return findNearestCommonDominator(const_cast(A), const_cast(B)); } + bool isVirtualRoot(const DomTreeNodeBase *A) const { + return isPostDominator() && !A->getBlock(); + } + //===--------------------------------------------------------------------===// // API to update (Post)DominatorTree information based on modifications to // the CFG... + /// Inform the dominator tree about a CFG edge insertion and update the tree. + /// + /// This function has to be called just before or just after making the update + /// on the actual CFG. There cannot be any other updates that the dominator + /// tree doesn't know about. + /// + /// Note that for postdominators it automatically takes care of inserting + /// a reverse edge internally (so there's no need to swap the parameters). + /// + void insertEdge(NodeT *From, NodeT *To) { + assert(From); + assert(To); + assert(From->getParent() == Parent); + assert(To->getParent() == Parent); + DomTreeBuilder::InsertEdge(*this, From, To); + } + + /// Inform the dominator tree about a CFG edge deletion and update the tree. + /// + /// This function has to be called just after making the update + /// on the actual CFG. There cannot be any other updates that the dominator + /// tree doesn't know about. The only exception is when the deletion that the + /// tree is informed about makes some (domominator) subtree unreachable -- in + /// this case, it is fine to perform deletions within this subtree. + /// + /// Note that for postdominators it automatically takes care of deleting + /// a reverse edge internally (so there's no need to swap the parameters). + /// + void deleteEdge(NodeT *From, NodeT *To) { + assert(From); + assert(To); + assert(From->getParent() == Parent); + assert(To->getParent() == Parent); + DomTreeBuilder::DeleteEdge(*this, From, To); + } + /// Add a new node to the dominator tree information. /// /// This creates a new node as a child of DomBB dominator node, linking it @@ -601,7 +521,6 @@ template class DominatorTreeBase : public DominatorBase { assert(!this->isPostDominator() && "Cannot change root of post-dominator tree"); DFSInfoValid = false; - auto &Roots = DominatorBase::Roots; DomTreeNodeBase *NewNode = (DomTreeNodes[BB] = llvm::make_unique>(BB, nullptr)).get(); if (Roots.empty()) { @@ -609,8 +528,10 @@ template class DominatorTreeBase : public DominatorBase { } else { assert(Roots.size() == 1); NodeT *OldRoot = Roots.front(); - DomTreeNodes[OldRoot] = - NewNode->addChild(std::move(DomTreeNodes[OldRoot])); + auto &OldNode = DomTreeNodes[OldRoot]; + OldNode = NewNode->addChild(std::move(DomTreeNodes[OldRoot])); + OldNode->IDom = NewNode; + OldNode->UpdateLevel(); Roots[0] = BB; } return RootNode = NewNode; @@ -655,7 +576,7 @@ template class DominatorTreeBase : public DominatorBase { /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. void splitBlock(NodeT *NewBB) { - if (this->IsPostDominators) + if (IsPostDominator) Split>(NewBB); else Split(NewBB); @@ -663,60 +584,20 @@ template class DominatorTreeBase : public DominatorBase { /// print - Convert to human readable form /// - void print(raw_ostream &o) const { - o << "=============================--------------------------------\n"; + void print(raw_ostream &O) const { + O << "=============================--------------------------------\n"; if (this->isPostDominator()) - o << "Inorder PostDominator Tree: "; + O << "Inorder PostDominator Tree: "; else - o << "Inorder Dominator Tree: "; + O << "Inorder Dominator Tree: "; if (!DFSInfoValid) - o << "DFSNumbers invalid: " << SlowQueries << " slow queries."; - o << "\n"; + O << "DFSNumbers invalid: " << SlowQueries << " slow queries."; + O << "\n"; // The postdom tree can have a null root if there are no returns. - if (getRootNode()) - PrintDomTree(getRootNode(), o, 1); + if (getRootNode()) PrintDomTree(getRootNode(), O, 1); } -protected: - template - friend typename GraphT::NodeRef - Eval(DominatorTreeBaseByGraphTraits &DT, typename GraphT::NodeRef V, - unsigned LastLinked); - - template - friend unsigned ReverseDFSPass(DominatorTreeBaseByGraphTraits &DT, - typename GraphT::NodeRef V, unsigned N); - - template - friend unsigned DFSPass(DominatorTreeBaseByGraphTraits &DT, - typename GraphT::NodeRef V, unsigned N); - - template - friend void Calculate(DominatorTreeBaseByGraphTraits> &DT, - FuncT &F); - - DomTreeNodeBase *getNodeForBlock(NodeT *BB) { - if (DomTreeNodeBase *Node = getNode(BB)) - return Node; - - // Haven't calculated this node yet? Get or calculate the node for the - // immediate dominator. - NodeT *IDom = getIDom(BB); - - assert(IDom || DomTreeNodes[nullptr]); - DomTreeNodeBase *IDomNode = getNodeForBlock(IDom); - - // Add a new tree node for this NodeT, and link it as a child of - // IDomNode - return (DomTreeNodes[BB] = IDomNode->addChild( - llvm::make_unique>(BB, IDomNode))).get(); - } - - NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); } - - void addRoot(NodeT *BB) { this->Roots.push_back(BB); } - public: /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking /// dominator tree in dfs order. @@ -770,31 +651,131 @@ template class DominatorTreeBase : public DominatorBase { /// recalculate - compute a dominator tree for the given function template void recalculate(FT &F) { - typedef GraphTraits TraitsTy; + using TraitsTy = GraphTraits; reset(); - Vertex.push_back(nullptr); + Parent = &F; - if (!this->IsPostDominators) { + if (!IsPostDominator) { // Initialize root NodeT *entry = TraitsTy::getEntryNode(&F); addRoot(entry); - - Calculate(*this, F); } else { // Initialize the roots list for (auto *Node : nodes(&F)) if (TraitsTy::child_begin(Node) == TraitsTy::child_end(Node)) addRoot(Node); + } + + DomTreeBuilder::Calculate(*this, F); + } + + /// verify - check parent and sibling property + bool verify() const { return DomTreeBuilder::Verify(*this); } + + protected: + void addRoot(NodeT *BB) { this->Roots.push_back(BB); } + + void reset() { + DomTreeNodes.clear(); + Roots.clear(); + RootNode = nullptr; + Parent = nullptr; + DFSInfoValid = false; + SlowQueries = 0; + } + + // NewBB is split and now it has one successor. Update dominator tree to + // reflect this change. + template + void Split(typename GraphTraits::NodeRef NewBB) { + using GraphT = GraphTraits; + using NodeRef = typename GraphT::NodeRef; + assert(std::distance(GraphT::child_begin(NewBB), + GraphT::child_end(NewBB)) == 1 && + "NewBB should have a single successor!"); + NodeRef NewBBSucc = *GraphT::child_begin(NewBB); + + std::vector PredBlocks; + for (const auto &Pred : children>(NewBB)) + PredBlocks.push_back(Pred); + + assert(!PredBlocks.empty() && "No predblocks?"); + + bool NewBBDominatesNewBBSucc = true; + for (const auto &Pred : children>(NewBBSucc)) { + if (Pred != NewBB && !dominates(NewBBSucc, Pred) && + isReachableFromEntry(Pred)) { + NewBBDominatesNewBBSucc = false; + break; + } + } - Calculate>(*this, F); + // Find NewBB's immediate dominator and create new dominator tree node for + // NewBB. + NodeT *NewBBIDom = nullptr; + unsigned i = 0; + for (i = 0; i < PredBlocks.size(); ++i) + if (isReachableFromEntry(PredBlocks[i])) { + NewBBIDom = PredBlocks[i]; + break; + } + + // It's possible that none of the predecessors of NewBB are reachable; + // in that case, NewBB itself is unreachable, so nothing needs to be + // changed. + if (!NewBBIDom) return; + + for (i = i + 1; i < PredBlocks.size(); ++i) { + if (isReachableFromEntry(PredBlocks[i])) + NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]); } + + // Create the new dominator tree node... and set the idom of NewBB. + DomTreeNodeBase *NewBBNode = addNewBlock(NewBB, NewBBIDom); + + // If NewBB strictly dominates other blocks, then it is now the immediate + // dominator of NewBBSucc. Update the dominator tree as appropriate. + if (NewBBDominatesNewBBSucc) { + DomTreeNodeBase *NewBBSuccNode = getNode(NewBBSucc); + changeImmediateDominator(NewBBSuccNode, NewBBNode); + } + } + + private: + bool dominatedBySlowTreeWalk(const DomTreeNodeBase *A, + const DomTreeNodeBase *B) const { + assert(A != B); + assert(isReachableFromEntry(B)); + assert(isReachableFromEntry(A)); + + const DomTreeNodeBase *IDom; + while ((IDom = B->getIDom()) != nullptr && IDom != A && IDom != B) + B = IDom; // Walk up the tree + return IDom != nullptr; + } + + /// \brief Wipe this tree's state without releasing any resources. + /// + /// This is essentially a post-move helper only. It leaves the object in an + /// assignable and destroyable state, but otherwise invalid. + void wipe() { + DomTreeNodes.clear(); + RootNode = nullptr; + Parent = nullptr; } }; +template +using DomTreeBase = DominatorTreeBase; + +template +using PostDomTreeBase = DominatorTreeBase; + // These two functions are declared out of line as a workaround for building // with old (< r147295) versions of clang because of pr11642. -template -bool DominatorTreeBase::dominates(const NodeT *A, const NodeT *B) const { +template +bool DominatorTreeBase::dominates(const NodeT *A, + const NodeT *B) const { if (A == B) return true; @@ -804,9 +785,9 @@ bool DominatorTreeBase::dominates(const NodeT *A, const NodeT *B) const { return dominates(getNode(const_cast(A)), getNode(const_cast(B))); } -template -bool DominatorTreeBase::properlyDominates(const NodeT *A, - const NodeT *B) const { +template +bool DominatorTreeBase::properlyDominates( + const NodeT *A, const NodeT *B) const { if (A == B) return false; diff --git a/interpreter/llvm/src/include/llvm/Support/GenericDomTreeConstruction.h b/interpreter/llvm/src/include/llvm/Support/GenericDomTreeConstruction.h index c1d757f3ab6a3..be90afa4c3c8e 100644 --- a/interpreter/llvm/src/include/llvm/Support/GenericDomTreeConstruction.h +++ b/interpreter/llvm/src/include/llvm/Support/GenericDomTreeConstruction.h @@ -10,272 +10,973 @@ /// /// Generic dominator tree construction - This file provides routines to /// construct immediate dominator information for a flow-graph based on the -/// algorithm described in this document: +/// Semi-NCA algorithm described in this dissertation: /// -/// A Fast Algorithm for Finding Dominators in a Flowgraph -/// T. Lengauer & R. Tarjan, ACM TOPLAS July 1979, pgs 121-141. +/// Linear-Time Algorithms for Dominators and Related Problems +/// Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: +/// ftp://ftp.cs.princeton.edu/reports/2005/737.pdf /// /// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns /// out that the theoretically slower O(n*log(n)) implementation is actually /// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs. /// +/// The file uses the Depth Based Search algorithm to perform incremental +/// upates (insertion and deletions). The implemented algorithm is based on this +/// publication: +/// +/// An Experimental Study of Dynamic Dominators +/// Loukas Georgiadis, et al., April 12 2016, pp. 5-7, 9-10: +/// https://arxiv.org/pdf/1604.02711.pdf +/// //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H #define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H +#include +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/GenericDomTree.h" +#define DEBUG_TYPE "dom-tree-builder" + namespace llvm { +namespace DomTreeBuilder { -// External storage for depth first iterator that reuses the info lookup map -// domtree already has. We don't have a set, but a map instead, so we are -// converting the one argument insert calls. -template struct df_iterator_dom_storage { -public: - typedef DenseMap BaseSet; - df_iterator_dom_storage(BaseSet &Storage) : Storage(Storage) {} - - typedef typename BaseSet::iterator iterator; - std::pair insert(NodeRef N) { - return Storage.insert({N, InfoType()}); +template +struct ChildrenGetter { + static auto Get(NodePtr N) -> decltype(reverse(children(N))) { + return reverse(children(N)); } - void completed(NodeRef) {} +}; -private: - BaseSet &Storage; +template +struct ChildrenGetter { + static auto Get(NodePtr N) -> decltype(inverse_children(N)) { + return inverse_children(N); + } }; -template -unsigned ReverseDFSPass(DominatorTreeBaseByGraphTraits &DT, - typename GraphT::NodeRef V, unsigned N) { - df_iterator_dom_storage< - typename GraphT::NodeRef, - typename DominatorTreeBaseByGraphTraits::InfoRec> - DFStorage(DT.Info); - bool IsChildOfArtificialExit = (N != 0); - for (auto I = idf_ext_begin(V, DFStorage), E = idf_ext_end(V, DFStorage); - I != E; ++I) { - typename GraphT::NodeRef BB = *I; - auto &BBInfo = DT.Info[BB]; - BBInfo.DFSNum = BBInfo.Semi = ++N; - BBInfo.Label = BB; - // Set the parent to the top of the visited stack. The stack includes us, - // and is 1 based, so we subtract to account for both of these. - if (I.getPathLength() > 1) - BBInfo.Parent = DT.Info[I.getPath(I.getPathLength() - 2)].DFSNum; - DT.Vertex.push_back(BB); // Vertex[n] = V; - - if (IsChildOfArtificialExit) - BBInfo.Parent = 1; - - IsChildOfArtificialExit = false; +template +struct SemiNCAInfo { + using NodePtr = typename DomTreeT::NodePtr; + using NodeT = typename DomTreeT::NodeType; + using TreeNodePtr = DomTreeNodeBase *; + static constexpr bool IsPostDom = DomTreeT::IsPostDominator; + + // Information record used by Semi-NCA during tree construction. + struct InfoRec { + unsigned DFSNum = 0; + unsigned Parent = 0; + unsigned Semi = 0; + NodePtr Label = nullptr; + NodePtr IDom = nullptr; + SmallVector ReverseChildren; + }; + + // Number to node mapping is 1-based. Initialize the mapping to start with + // a dummy element. + std::vector NumToNode = {nullptr}; + DenseMap NodeToInfo; + + void clear() { + NumToNode = {nullptr}; // Restore to initial state with a dummy start node. + NodeToInfo.clear(); } - return N; -} -template -unsigned DFSPass(DominatorTreeBaseByGraphTraits &DT, - typename GraphT::NodeRef V, unsigned N) { - df_iterator_dom_storage< - typename GraphT::NodeRef, - typename DominatorTreeBaseByGraphTraits::InfoRec> - DFStorage(DT.Info); - for (auto I = df_ext_begin(V, DFStorage), E = df_ext_end(V, DFStorage); - I != E; ++I) { - typename GraphT::NodeRef BB = *I; - auto &BBInfo = DT.Info[BB]; - BBInfo.DFSNum = BBInfo.Semi = ++N; - BBInfo.Label = BB; - // Set the parent to the top of the visited stack. The stack includes us, - // and is 1 based, so we subtract to account for both of these. - if (I.getPathLength() > 1) - BBInfo.Parent = DT.Info[I.getPath(I.getPathLength() - 2)].DFSNum; - DT.Vertex.push_back(BB); // Vertex[n] = V; + + NodePtr getIDom(NodePtr BB) const { + auto InfoIt = NodeToInfo.find(BB); + if (InfoIt == NodeToInfo.end()) return nullptr; + + return InfoIt->second.IDom; } - return N; -} -template -typename GraphT::NodeRef Eval(DominatorTreeBaseByGraphTraits &DT, - typename GraphT::NodeRef VIn, - unsigned LastLinked) { - auto &VInInfo = DT.Info[VIn]; - if (VInInfo.DFSNum < LastLinked) - return VIn; - - SmallVector Work; - SmallPtrSet Visited; - - if (VInInfo.Parent >= LastLinked) - Work.push_back(VIn); - - while (!Work.empty()) { - typename GraphT::NodeRef V = Work.back(); - auto &VInfo = DT.Info[V]; - typename GraphT::NodeRef VAncestor = DT.Vertex[VInfo.Parent]; - - // Process Ancestor first - if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) { - Work.push_back(VAncestor); - continue; + TreeNodePtr getNodeForBlock(NodePtr BB, DomTreeT &DT) { + if (TreeNodePtr Node = DT.getNode(BB)) return Node; + + // Haven't calculated this node yet? Get or calculate the node for the + // immediate dominator. + NodePtr IDom = getIDom(BB); + + assert(IDom || DT.DomTreeNodes[nullptr]); + TreeNodePtr IDomNode = getNodeForBlock(IDom, DT); + + // Add a new tree node for this NodeT, and link it as a child of + // IDomNode + return (DT.DomTreeNodes[BB] = IDomNode->addChild( + llvm::make_unique>(BB, IDomNode))) + .get(); + } + + static bool AlwaysDescend(NodePtr, NodePtr) { return true; } + + struct BlockNamePrinter { + NodePtr N; + + BlockNamePrinter(NodePtr Block) : N(Block) {} + BlockNamePrinter(TreeNodePtr TN) : N(TN ? TN->getBlock() : nullptr) {} + + friend raw_ostream &operator<<(raw_ostream &O, const BlockNamePrinter &BP) { + if (!BP.N) + O << "nullptr"; + else + BP.N->printAsOperand(O, false); + + return O; + } + }; + + // Custom DFS implementation which can skip nodes based on a provided + // predicate. It also collects ReverseChildren so that we don't have to spend + // time getting predecessors in SemiNCA. + template + unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, + unsigned AttachToNum) { + assert(V); + SmallVector WorkList = {V}; + if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; + + while (!WorkList.empty()) { + const NodePtr BB = WorkList.pop_back_val(); + auto &BBInfo = NodeToInfo[BB]; + + // Visited nodes always have positive DFS numbers. + if (BBInfo.DFSNum != 0) continue; + BBInfo.DFSNum = BBInfo.Semi = ++LastNum; + BBInfo.Label = BB; + NumToNode.push_back(BB); + + for (const NodePtr Succ : ChildrenGetter::Get(BB)) { + const auto SIT = NodeToInfo.find(Succ); + // Don't visit nodes more than once but remember to collect + // RerverseChildren. + if (SIT != NodeToInfo.end() && SIT->second.DFSNum != 0) { + if (Succ != BB) SIT->second.ReverseChildren.push_back(BB); + continue; + } + + if (!Condition(BB, Succ)) continue; + + // It's fine to add Succ to the map, because we know that it will be + // visited later. + auto &SuccInfo = NodeToInfo[Succ]; + WorkList.push_back(Succ); + SuccInfo.Parent = LastNum; + SuccInfo.ReverseChildren.push_back(BB); + } } - Work.pop_back(); - - // Update VInfo based on Ancestor info - if (VInfo.Parent < LastLinked) - continue; - - auto &VAInfo = DT.Info[VAncestor]; - typename GraphT::NodeRef VAncestorLabel = VAInfo.Label; - typename GraphT::NodeRef VLabel = VInfo.Label; - if (DT.Info[VAncestorLabel].Semi < DT.Info[VLabel].Semi) - VInfo.Label = VAncestorLabel; - VInfo.Parent = VAInfo.Parent; + + return LastNum; } - return VInInfo.Label; -} + NodePtr eval(NodePtr VIn, unsigned LastLinked) { + auto &VInInfo = NodeToInfo[VIn]; + if (VInInfo.DFSNum < LastLinked) + return VIn; + + SmallVector Work; + SmallPtrSet Visited; + + if (VInInfo.Parent >= LastLinked) + Work.push_back(VIn); + + while (!Work.empty()) { + NodePtr V = Work.back(); + auto &VInfo = NodeToInfo[V]; + NodePtr VAncestor = NumToNode[VInfo.Parent]; + + // Process Ancestor first + if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) { + Work.push_back(VAncestor); + continue; + } + Work.pop_back(); + + // Update VInfo based on Ancestor info + if (VInfo.Parent < LastLinked) + continue; + + auto &VAInfo = NodeToInfo[VAncestor]; + NodePtr VAncestorLabel = VAInfo.Label; + NodePtr VLabel = VInfo.Label; + if (NodeToInfo[VAncestorLabel].Semi < NodeToInfo[VLabel].Semi) + VInfo.Label = VAncestorLabel; + VInfo.Parent = VAInfo.Parent; + } -template -void Calculate(DominatorTreeBaseByGraphTraits> &DT, - FuncT &F) { - typedef GraphTraits GraphT; - static_assert(std::is_pointer::value, - "NodeRef should be pointer type"); - typedef typename std::remove_pointer::type NodeType; - - unsigned N = 0; - bool MultipleRoots = (DT.Roots.size() > 1); - if (MultipleRoots) { - auto &BBInfo = DT.Info[nullptr]; - BBInfo.DFSNum = BBInfo.Semi = ++N; - BBInfo.Label = nullptr; - - DT.Vertex.push_back(nullptr); // Vertex[n] = V; + return VInInfo.Label; } - // Step #1: Number blocks in depth-first order and initialize variables used - // in later stages of the algorithm. - if (DT.isPostDominator()){ - for (unsigned i = 0, e = static_cast(DT.Roots.size()); - i != e; ++i) - N = ReverseDFSPass(DT, DT.Roots[i], N); - } else { - N = DFSPass(DT, DT.Roots[0], N); + // This function requires DFS to be run before calling it. + void runSemiNCA(DomTreeT &DT, const unsigned MinLevel = 0) { + const unsigned NextDFSNum(NumToNode.size()); + // Initialize IDoms to spanning tree parents. + for (unsigned i = 1; i < NextDFSNum; ++i) { + const NodePtr V = NumToNode[i]; + auto &VInfo = NodeToInfo[V]; + VInfo.IDom = NumToNode[VInfo.Parent]; + } + + // Step #1: Calculate the semidominators of all vertices. + for (unsigned i = NextDFSNum - 1; i >= 2; --i) { + NodePtr W = NumToNode[i]; + auto &WInfo = NodeToInfo[W]; + + // Initialize the semi dominator to point to the parent node. + WInfo.Semi = WInfo.Parent; + for (const auto &N : WInfo.ReverseChildren) { + if (NodeToInfo.count(N) == 0) // Skip unreachable predecessors. + continue; + + const TreeNodePtr TN = DT.getNode(N); + // Skip predecessors whose level is above the subtree we are processing. + if (TN && TN->getLevel() < MinLevel) + continue; + + unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi; + if (SemiU < WInfo.Semi) WInfo.Semi = SemiU; + } + } + + // Step #2: Explicitly define the immediate dominator of each vertex. + // IDom[i] = NCA(SDom[i], SpanningTreeParent(i)). + // Note that the parents were stored in IDoms and later got invalidated + // during path compression in Eval. + for (unsigned i = 2; i < NextDFSNum; ++i) { + const NodePtr W = NumToNode[i]; + auto &WInfo = NodeToInfo[W]; + const unsigned SDomNum = NodeToInfo[NumToNode[WInfo.Semi]].DFSNum; + NodePtr WIDomCandidate = WInfo.IDom; + while (NodeToInfo[WIDomCandidate].DFSNum > SDomNum) + WIDomCandidate = NodeToInfo[WIDomCandidate].IDom; + + WInfo.IDom = WIDomCandidate; + } } - // it might be that some blocks did not get a DFS number (e.g., blocks of - // infinite loops). In these cases an artificial exit node is required. - MultipleRoots |= (DT.isPostDominator() && N != GraphTraits::size(&F)); + template + unsigned doFullDFSWalk(const DomTreeT &DT, DescendCondition DC) { + unsigned Num = 0; - // When naively implemented, the Lengauer-Tarjan algorithm requires a separate - // bucket for each vertex. However, this is unnecessary, because each vertex - // is only placed into a single bucket (that of its semidominator), and each - // vertex's bucket is processed before it is added to any bucket itself. - // - // Instead of using a bucket per vertex, we use a single array Buckets that - // has two purposes. Before the vertex V with preorder number i is processed, - // Buckets[i] stores the index of the first element in V's bucket. After V's - // bucket is processed, Buckets[i] stores the index of the next element in the - // bucket containing V, if any. - SmallVector Buckets; - Buckets.resize(N + 1); - for (unsigned i = 1; i <= N; ++i) - Buckets[i] = i; - - for (unsigned i = N; i >= 2; --i) { - typename GraphT::NodeRef W = DT.Vertex[i]; - auto &WInfo = DT.Info[W]; - - // Step #2: Implicitly define the immediate dominator of vertices - for (unsigned j = i; Buckets[j] != i; j = Buckets[j]) { - typename GraphT::NodeRef V = DT.Vertex[Buckets[j]]; - typename GraphT::NodeRef U = Eval(DT, V, i + 1); - DT.IDoms[V] = DT.Info[U].Semi < i ? U : W; + if (DT.Roots.size() > 1) { + auto &BBInfo = NodeToInfo[nullptr]; + BBInfo.DFSNum = BBInfo.Semi = ++Num; + BBInfo.Label = nullptr; + + NumToNode.push_back(nullptr); // NumToNode[n] = V; } - // Step #3: Calculate the semidominators of all vertices + if (DT.isPostDominator()) { + for (auto *Root : DT.Roots) Num = runDFS(Root, Num, DC, 1); + } else { + assert(DT.Roots.size() == 1); + Num = runDFS(DT.Roots[0], Num, DC, Num); + } + + return Num; + } + + void calculateFromScratch(DomTreeT &DT, const unsigned NumBlocks) { + // Step #0: Number blocks in depth-first order and initialize variables used + // in later stages of the algorithm. + const unsigned LastDFSNum = doFullDFSWalk(DT, AlwaysDescend); + + runSemiNCA(DT); + + if (DT.Roots.empty()) return; + + // Add a node for the root. This node might be the actual root, if there is + // one exit block, or it may be the virtual exit (denoted by + // (BasicBlock *)0) which postdominates all real exits if there are multiple + // exit blocks, or an infinite loop. + // It might be that some blocks did not get a DFS number (e.g., blocks of + // infinite loops). In these cases an artificial exit node is required. + const bool MultipleRoots = DT.Roots.size() > 1 || (DT.isPostDominator() && + LastDFSNum != NumBlocks); + NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr; + + DT.RootNode = (DT.DomTreeNodes[Root] = + llvm::make_unique>(Root, nullptr)) + .get(); + attachNewSubtree(DT, DT.RootNode); + } + + void attachNewSubtree(DomTreeT& DT, const TreeNodePtr AttachTo) { + // Attach the first unreachable block to AttachTo. + NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock(); + // Loop over all of the discovered blocks in the function... + for (size_t i = 1, e = NumToNode.size(); i != e; ++i) { + NodePtr W = NumToNode[i]; + DEBUG(dbgs() << "\tdiscovered a new reachable node " + << BlockNamePrinter(W) << "\n"); + + // Don't replace this with 'count', the insertion side effect is important + if (DT.DomTreeNodes[W]) continue; // Haven't calculated this node yet? + + NodePtr ImmDom = getIDom(W); + + // Get or calculate the node for the immediate dominator + TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT); - // initialize the semi dominator to point to the parent node - WInfo.Semi = WInfo.Parent; - for (const auto &N : inverse_children(W)) - if (DT.Info.count(N)) { // Only if this predecessor is reachable! - unsigned SemiU = DT.Info[Eval(DT, N, i + 1)].Semi; - if (SemiU < WInfo.Semi) - WInfo.Semi = SemiU; + // Add a new tree node for this BasicBlock, and link it as a child of + // IDomNode + DT.DomTreeNodes[W] = IDomNode->addChild( + llvm::make_unique>(W, IDomNode)); + } + } + + void reattachExistingSubtree(DomTreeT &DT, const TreeNodePtr AttachTo) { + NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock(); + for (size_t i = 1, e = NumToNode.size(); i != e; ++i) { + const NodePtr N = NumToNode[i]; + const TreeNodePtr TN = DT.getNode(N); + assert(TN); + const TreeNodePtr NewIDom = DT.getNode(NodeToInfo[N].IDom); + TN->setIDom(NewIDom); + } + } + + // Helper struct used during edge insertions. + struct InsertionInfo { + using BucketElementTy = std::pair; + struct DecreasingLevel { + bool operator()(const BucketElementTy &First, + const BucketElementTy &Second) const { + return First.first > Second.first; } + }; + + std::priority_queue, + DecreasingLevel> + Bucket; // Queue of tree nodes sorted by level in descending order. + SmallDenseSet Affected; + SmallDenseSet Visited; + SmallVector AffectedQueue; + SmallVector VisitedNotAffectedQueue; + }; + + static void InsertEdge(DomTreeT &DT, const NodePtr From, const NodePtr To) { + assert(From && To && "Cannot connect nullptrs"); + DEBUG(dbgs() << "Inserting edge " << BlockNamePrinter(From) << " -> " + << BlockNamePrinter(To) << "\n"); + const TreeNodePtr FromTN = DT.getNode(From); + + // Ignore edges from unreachable nodes. + if (!FromTN) return; + + DT.DFSInfoValid = false; + + const TreeNodePtr ToTN = DT.getNode(To); + if (!ToTN) + InsertUnreachable(DT, FromTN, To); + else + InsertReachable(DT, FromTN, ToTN); + } - // If V is a non-root vertex and sdom(V) = parent(V), then idom(V) is - // necessarily parent(V). In this case, set idom(V) here and avoid placing - // V into a bucket. - if (WInfo.Semi == WInfo.Parent) { - DT.IDoms[W] = DT.Vertex[WInfo.Parent]; - } else { - Buckets[i] = Buckets[WInfo.Semi]; - Buckets[WInfo.Semi] = i; + // Handles insertion to a node already in the dominator tree. + static void InsertReachable(DomTreeT &DT, const TreeNodePtr From, + const TreeNodePtr To) { + DEBUG(dbgs() << "\tReachable " << BlockNamePrinter(From->getBlock()) + << " -> " << BlockNamePrinter(To->getBlock()) << "\n"); + const NodePtr NCDBlock = + DT.findNearestCommonDominator(From->getBlock(), To->getBlock()); + assert(NCDBlock || DT.isPostDominator()); + const TreeNodePtr NCD = DT.getNode(NCDBlock); + assert(NCD); + + DEBUG(dbgs() << "\t\tNCA == " << BlockNamePrinter(NCD) << "\n"); + const TreeNodePtr ToIDom = To->getIDom(); + + // Nothing affected -- NCA property holds. + // (Based on the lemma 2.5 from the second paper.) + if (NCD == To || NCD == ToIDom) return; + + // Identify and collect affected nodes. + InsertionInfo II; + DEBUG(dbgs() << "Marking " << BlockNamePrinter(To) << " as affected\n"); + II.Affected.insert(To); + const unsigned ToLevel = To->getLevel(); + DEBUG(dbgs() << "Putting " << BlockNamePrinter(To) << " into a Bucket\n"); + II.Bucket.push({ToLevel, To}); + + while (!II.Bucket.empty()) { + const TreeNodePtr CurrentNode = II.Bucket.top().second; + II.Bucket.pop(); + DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: " + << BlockNamePrinter(CurrentNode) << "\n"); + II.Visited.insert(CurrentNode); + II.AffectedQueue.push_back(CurrentNode); + + // Discover and collect affected successors of the current node. + VisitInsertion(DT, CurrentNode, CurrentNode->getLevel(), NCD, II); } + + // Finish by updating immediate dominators and levels. + UpdateInsertion(DT, NCD, II); } - if (N >= 1) { - typename GraphT::NodeRef Root = DT.Vertex[1]; - for (unsigned j = 1; Buckets[j] != 1; j = Buckets[j]) { - typename GraphT::NodeRef V = DT.Vertex[Buckets[j]]; - DT.IDoms[V] = Root; + // Visits an affected node and collect its affected successors. + static void VisitInsertion(DomTreeT &DT, const TreeNodePtr TN, + const unsigned RootLevel, const TreeNodePtr NCD, + InsertionInfo &II) { + const unsigned NCDLevel = NCD->getLevel(); + DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n"); + + assert(TN->getBlock()); + for (const NodePtr Succ : + ChildrenGetter::Get(TN->getBlock())) { + const TreeNodePtr SuccTN = DT.getNode(Succ); + assert(SuccTN && "Unreachable successor found at reachable insertion"); + const unsigned SuccLevel = SuccTN->getLevel(); + + DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) + << ", level = " << SuccLevel << "\n"); + + // Succ dominated by subtree From -- not affected. + // (Based on the lemma 2.5 from the second paper.) + if (SuccLevel > RootLevel) { + DEBUG(dbgs() << "\t\tDominated by subtree From\n"); + if (II.Visited.count(SuccTN) != 0) continue; + + DEBUG(dbgs() << "\t\tMarking visited not affected " + << BlockNamePrinter(Succ) << "\n"); + II.Visited.insert(SuccTN); + II.VisitedNotAffectedQueue.push_back(SuccTN); + VisitInsertion(DT, SuccTN, RootLevel, NCD, II); + } else if ((SuccLevel > NCDLevel + 1) && II.Affected.count(SuccTN) == 0) { + DEBUG(dbgs() << "\t\tMarking affected and adding " + << BlockNamePrinter(Succ) << " to a Bucket\n"); + II.Affected.insert(SuccTN); + II.Bucket.push({SuccLevel, SuccTN}); + } } } - // Step #4: Explicitly define the immediate dominator of each vertex - for (unsigned i = 2; i <= N; ++i) { - typename GraphT::NodeRef W = DT.Vertex[i]; - typename GraphT::NodeRef &WIDom = DT.IDoms[W]; - if (WIDom != DT.Vertex[DT.Info[W].Semi]) - WIDom = DT.IDoms[WIDom]; + // Updates immediate dominators and levels after insertion. + static void UpdateInsertion(DomTreeT &DT, const TreeNodePtr NCD, + InsertionInfo &II) { + DEBUG(dbgs() << "Updating NCD = " << BlockNamePrinter(NCD) << "\n"); + + for (const TreeNodePtr TN : II.AffectedQueue) { + DEBUG(dbgs() << "\tIDom(" << BlockNamePrinter(TN) + << ") = " << BlockNamePrinter(NCD) << "\n"); + TN->setIDom(NCD); + } + + UpdateLevelsAfterInsertion(II); } - if (DT.Roots.empty()) return; + static void UpdateLevelsAfterInsertion(InsertionInfo &II) { + DEBUG(dbgs() << "Updating levels for visited but not affected nodes\n"); - // Add a node for the root. This node might be the actual root, if there is - // one exit block, or it may be the virtual exit (denoted by (BasicBlock *)0) - // which postdominates all real exits if there are multiple exit blocks, or - // an infinite loop. - typename GraphT::NodeRef Root = !MultipleRoots ? DT.Roots[0] : nullptr; + for (const TreeNodePtr TN : II.VisitedNotAffectedQueue) { + DEBUG(dbgs() << "\tlevel(" << BlockNamePrinter(TN) << ") = (" + << BlockNamePrinter(TN->getIDom()) << ") " + << TN->getIDom()->getLevel() << " + 1\n"); + TN->UpdateLevel(); + } + } - DT.RootNode = - (DT.DomTreeNodes[Root] = - llvm::make_unique>(Root, nullptr)) - .get(); + // Handles insertion to previously unreachable nodes. + static void InsertUnreachable(DomTreeT &DT, const TreeNodePtr From, + const NodePtr To) { + DEBUG(dbgs() << "Inserting " << BlockNamePrinter(From) + << " -> (unreachable) " << BlockNamePrinter(To) << "\n"); + + // Collect discovered edges to already reachable nodes. + SmallVector, 8> DiscoveredEdgesToReachable; + // Discover and connect nodes that became reachable with the insertion. + ComputeUnreachableDominators(DT, To, From, DiscoveredEdgesToReachable); + + DEBUG(dbgs() << "Inserted " << BlockNamePrinter(From) + << " -> (prev unreachable) " << BlockNamePrinter(To) << "\n"); + + DEBUG(DT.print(dbgs())); + + // Used the discovered edges and inset discovered connecting (incoming) + // edges. + for (const auto &Edge : DiscoveredEdgesToReachable) { + DEBUG(dbgs() << "\tInserting discovered connecting edge " + << BlockNamePrinter(Edge.first) << " -> " + << BlockNamePrinter(Edge.second) << "\n"); + InsertReachable(DT, DT.getNode(Edge.first), Edge.second); + } + } - // Loop over all of the reachable blocks in the function... - for (unsigned i = 2; i <= N; ++i) { - typename GraphT::NodeRef W = DT.Vertex[i]; + // Connects nodes that become reachable with an insertion. + static void ComputeUnreachableDominators( + DomTreeT &DT, const NodePtr Root, const TreeNodePtr Incoming, + SmallVectorImpl> + &DiscoveredConnectingEdges) { + assert(!DT.getNode(Root) && "Root must not be reachable"); + + // Visit only previously unreachable nodes. + auto UnreachableDescender = [&DT, &DiscoveredConnectingEdges](NodePtr From, + NodePtr To) { + const TreeNodePtr ToTN = DT.getNode(To); + if (!ToTN) return true; + + DiscoveredConnectingEdges.push_back({From, ToTN}); + return false; + }; + + SemiNCAInfo SNCA; + SNCA.runDFS(Root, 0, UnreachableDescender, 0); + SNCA.runSemiNCA(DT); + SNCA.attachNewSubtree(DT, Incoming); + + DEBUG(dbgs() << "After adding unreachable nodes\n"); + DEBUG(DT.print(dbgs())); + } - // Don't replace this with 'count', the insertion side effect is important - if (DT.DomTreeNodes[W]) - continue; // Haven't calculated this node yet? + // Checks if the tree contains all reachable nodes in the input graph. + bool verifyReachability(const DomTreeT &DT) { + clear(); + doFullDFSWalk(DT, AlwaysDescend); - typename GraphT::NodeRef ImmDom = DT.getIDom(W); + for (auto &NodeToTN : DT.DomTreeNodes) { + const TreeNodePtr TN = NodeToTN.second.get(); + const NodePtr BB = TN->getBlock(); - assert(ImmDom || DT.DomTreeNodes[nullptr]); + // Virtual root has a corresponding virtual CFG node. + if (DT.isVirtualRoot(TN)) continue; - // Get or calculate the node for the immediate dominator - DomTreeNodeBase *IDomNode = DT.getNodeForBlock(ImmDom); + if (NodeToInfo.count(BB) == 0) { + errs() << "DomTree node " << BlockNamePrinter(BB) + << " not found by DFS walk!\n"; + errs().flush(); - // Add a new tree node for this BasicBlock, and link it as a child of - // IDomNode - DT.DomTreeNodes[W] = IDomNode->addChild( - llvm::make_unique>(W, IDomNode)); + return false; + } + } + + for (const NodePtr N : NumToNode) { + if (N && !DT.getNode(N)) { + errs() << "CFG node " << BlockNamePrinter(N) + << " not found in the DomTree!\n"; + errs().flush(); + + return false; + } + } + + return true; + } + + static void DeleteEdge(DomTreeT &DT, const NodePtr From, const NodePtr To) { + assert(From && To && "Cannot disconnect nullptrs"); + DEBUG(dbgs() << "Deleting edge " << BlockNamePrinter(From) << " -> " + << BlockNamePrinter(To) << "\n"); + +#ifndef NDEBUG + // Ensure that the edge was in fact deleted from the CFG before informing + // the DomTree about it. + // The check is O(N), so run it only in debug configuration. + auto IsSuccessor = [](const NodePtr SuccCandidate, const NodePtr Of) { + auto Successors = ChildrenGetter::Get(Of); + return llvm::find(Successors, SuccCandidate) != Successors.end(); + }; + (void)IsSuccessor; + assert(!IsSuccessor(To, From) && "Deleted edge still exists in the CFG!"); +#endif + + const TreeNodePtr FromTN = DT.getNode(From); + // Deletion in an unreachable subtree -- nothing to do. + if (!FromTN) return; + + const TreeNodePtr ToTN = DT.getNode(To); + assert(ToTN && "To already unreachable -- there is no edge to delete"); + const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To); + const TreeNodePtr NCD = DT.getNode(NCDBlock); + + // To dominates From -- nothing to do. + if (ToTN == NCD) return; + + const TreeNodePtr ToIDom = ToTN->getIDom(); + DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom " + << BlockNamePrinter(ToIDom) << "\n"); + + // To remains reachable after deletion. + // (Based on the caption under Figure 4. from the second paper.) + if (FromTN != ToIDom || HasProperSupport(DT, ToTN)) + DeleteReachable(DT, FromTN, ToTN); + else + DeleteUnreachable(DT, ToTN); + } + + // Handles deletions that leave destination nodes reachable. + static void DeleteReachable(DomTreeT &DT, const TreeNodePtr FromTN, + const TreeNodePtr ToTN) { + DEBUG(dbgs() << "Deleting reachable " << BlockNamePrinter(FromTN) << " -> " + << BlockNamePrinter(ToTN) << "\n"); + DEBUG(dbgs() << "\tRebuilding subtree\n"); + + // Find the top of the subtree that needs to be rebuilt. + // (Based on the lemma 2.6 from the second paper.) + const NodePtr ToIDom = + DT.findNearestCommonDominator(FromTN->getBlock(), ToTN->getBlock()); + assert(ToIDom || DT.isPostDominator()); + const TreeNodePtr ToIDomTN = DT.getNode(ToIDom); + assert(ToIDomTN); + const TreeNodePtr PrevIDomSubTree = ToIDomTN->getIDom(); + // Top of the subtree to rebuild is the root node. Rebuild the tree from + // scratch. + if (!PrevIDomSubTree) { + DEBUG(dbgs() << "The entire tree needs to be rebuilt\n"); + DT.recalculate(*DT.Parent); + return; + } + + // Only visit nodes in the subtree starting at To. + const unsigned Level = ToIDomTN->getLevel(); + auto DescendBelow = [Level, &DT](NodePtr, NodePtr To) { + return DT.getNode(To)->getLevel() > Level; + }; + + DEBUG(dbgs() << "\tTop of subtree: " << BlockNamePrinter(ToIDomTN) << "\n"); + + SemiNCAInfo SNCA; + SNCA.runDFS(ToIDom, 0, DescendBelow, 0); + DEBUG(dbgs() << "\tRunning Semi-NCA\n"); + SNCA.runSemiNCA(DT, Level); + SNCA.reattachExistingSubtree(DT, PrevIDomSubTree); + } + + // Checks if a node has proper support, as defined on the page 3 and later + // explained on the page 7 of the second paper. + static bool HasProperSupport(DomTreeT &DT, const TreeNodePtr TN) { + DEBUG(dbgs() << "IsReachableFromIDom " << BlockNamePrinter(TN) << "\n"); + for (const NodePtr Pred : + ChildrenGetter::Get(TN->getBlock())) { + DEBUG(dbgs() << "\tPred " << BlockNamePrinter(Pred) << "\n"); + if (!DT.getNode(Pred)) continue; + + const NodePtr Support = + DT.findNearestCommonDominator(TN->getBlock(), Pred); + DEBUG(dbgs() << "\tSupport " << BlockNamePrinter(Support) << "\n"); + if (Support != TN->getBlock()) { + DEBUG(dbgs() << "\t" << BlockNamePrinter(TN) + << " is reachable from support " + << BlockNamePrinter(Support) << "\n"); + return true; + } + } + + return false; + } + + // Handle deletions that make destination node unreachable. + // (Based on the lemma 2.7 from the second paper.) + static void DeleteUnreachable(DomTreeT &DT, const TreeNodePtr ToTN) { + DEBUG(dbgs() << "Deleting unreachable subtree " << BlockNamePrinter(ToTN) + << "\n"); + assert(ToTN); + assert(ToTN->getBlock()); + + SmallVector AffectedQueue; + const unsigned Level = ToTN->getLevel(); + + // Traverse destination node's descendants with greater level in the tree + // and collect visited nodes. + auto DescendAndCollect = [Level, &AffectedQueue, &DT](NodePtr, NodePtr To) { + const TreeNodePtr TN = DT.getNode(To); + assert(TN); + if (TN->getLevel() > Level) return true; + if (llvm::find(AffectedQueue, To) == AffectedQueue.end()) + AffectedQueue.push_back(To); + + return false; + }; + + SemiNCAInfo SNCA; + unsigned LastDFSNum = + SNCA.runDFS(ToTN->getBlock(), 0, DescendAndCollect, 0); + + TreeNodePtr MinNode = ToTN; + + // Identify the top of the subtree to rebuilt by finding the NCD of all + // the affected nodes. + for (const NodePtr N : AffectedQueue) { + const TreeNodePtr TN = DT.getNode(N); + const NodePtr NCDBlock = + DT.findNearestCommonDominator(TN->getBlock(), ToTN->getBlock()); + assert(NCDBlock || DT.isPostDominator()); + const TreeNodePtr NCD = DT.getNode(NCDBlock); + assert(NCD); + + DEBUG(dbgs() << "Processing affected node " << BlockNamePrinter(TN) + << " with NCD = " << BlockNamePrinter(NCD) + << ", MinNode =" << BlockNamePrinter(MinNode) << "\n"); + if (NCD != TN && NCD->getLevel() < MinNode->getLevel()) MinNode = NCD; + } + + // Root reached, rebuild the whole tree from scratch. + if (!MinNode->getIDom()) { + DEBUG(dbgs() << "The entire tree needs to be rebuilt\n"); + DT.recalculate(*DT.Parent); + return; + } + + // Erase the unreachable subtree in reverse preorder to process all children + // before deleting their parent. + for (unsigned i = LastDFSNum; i > 0; --i) { + const NodePtr N = SNCA.NumToNode[i]; + const TreeNodePtr TN = DT.getNode(N); + DEBUG(dbgs() << "Erasing node " << BlockNamePrinter(TN) << "\n"); + + EraseNode(DT, TN); + } + + // The affected subtree start at the To node -- there's no extra work to do. + if (MinNode == ToTN) return; + + DEBUG(dbgs() << "DeleteUnreachable: running DFS with MinNode = " + << BlockNamePrinter(MinNode) << "\n"); + const unsigned MinLevel = MinNode->getLevel(); + const TreeNodePtr PrevIDom = MinNode->getIDom(); + assert(PrevIDom); + SNCA.clear(); + + // Identify nodes that remain in the affected subtree. + auto DescendBelow = [MinLevel, &DT](NodePtr, NodePtr To) { + const TreeNodePtr ToTN = DT.getNode(To); + return ToTN && ToTN->getLevel() > MinLevel; + }; + SNCA.runDFS(MinNode->getBlock(), 0, DescendBelow, 0); + + DEBUG(dbgs() << "Previous IDom(MinNode) = " << BlockNamePrinter(PrevIDom) + << "\nRunning Semi-NCA\n"); + + // Rebuild the remaining part of affected subtree. + SNCA.runSemiNCA(DT, MinLevel); + SNCA.reattachExistingSubtree(DT, PrevIDom); + } + + // Removes leaf tree nodes from the dominator tree. + static void EraseNode(DomTreeT &DT, const TreeNodePtr TN) { + assert(TN); + assert(TN->getNumChildren() == 0 && "Not a tree leaf"); + + const TreeNodePtr IDom = TN->getIDom(); + assert(IDom); + + auto ChIt = llvm::find(IDom->Children, TN); + assert(ChIt != IDom->Children.end()); + std::swap(*ChIt, IDom->Children.back()); + IDom->Children.pop_back(); + + DT.DomTreeNodes.erase(TN->getBlock()); + } + + //~~ + //===--------------- DomTree correctness verification ---------------------=== + //~~ + + // Check if for every parent with a level L in the tree all of its children + // have level L + 1. + static bool VerifyLevels(const DomTreeT &DT) { + for (auto &NodeToTN : DT.DomTreeNodes) { + const TreeNodePtr TN = NodeToTN.second.get(); + const NodePtr BB = TN->getBlock(); + if (!BB) continue; + + const TreeNodePtr IDom = TN->getIDom(); + if (!IDom && TN->getLevel() != 0) { + errs() << "Node without an IDom " << BlockNamePrinter(BB) + << " has a nonzero level " << TN->getLevel() << "!\n"; + errs().flush(); + + return false; + } + + if (IDom && TN->getLevel() != IDom->getLevel() + 1) { + errs() << "Node " << BlockNamePrinter(BB) << " has level " + << TN->getLevel() << " while its IDom " + << BlockNamePrinter(IDom->getBlock()) << " has level " + << IDom->getLevel() << "!\n"; + errs().flush(); + + return false; + } + } + + return true; } - // Free temporary memory used to construct idom's - DT.IDoms.clear(); - DT.Info.clear(); - DT.Vertex.clear(); - DT.Vertex.shrink_to_fit(); + // Checks if for every edge From -> To in the graph + // NCD(From, To) == IDom(To) or To. + bool verifyNCD(const DomTreeT &DT) { + clear(); + doFullDFSWalk(DT, AlwaysDescend); + + for (auto &BlockToInfo : NodeToInfo) { + auto &Info = BlockToInfo.second; + + const NodePtr From = NumToNode[Info.Parent]; + if (!From) continue; + + const NodePtr To = BlockToInfo.first; + const TreeNodePtr ToTN = DT.getNode(To); + assert(ToTN); + + const NodePtr NCD = DT.findNearestCommonDominator(From, To); + const TreeNodePtr NCDTN = DT.getNode(NCD); + const TreeNodePtr ToIDom = ToTN->getIDom(); + if (NCDTN != ToTN && NCDTN != ToIDom) { + errs() << "NearestCommonDominator verification failed:\n\tNCD(From:" + << BlockNamePrinter(From) << ", To:" << BlockNamePrinter(To) + << ") = " << BlockNamePrinter(NCD) + << ",\t (should be To or IDom[To]: " << BlockNamePrinter(ToIDom) + << ")\n"; + errs().flush(); + + return false; + } + } + + return true; + } + + // The below routines verify the correctness of the dominator tree relative to + // the CFG it's coming from. A tree is a dominator tree iff it has two + // properties, called the parent property and the sibling property. Tarjan + // and Lengauer prove (but don't explicitly name) the properties as part of + // the proofs in their 1972 paper, but the proofs are mostly part of proving + // things about semidominators and idoms, and some of them are simply asserted + // based on even earlier papers (see, e.g., lemma 2). Some papers refer to + // these properties as "valid" and "co-valid". See, e.g., "Dominators, + // directed bipolar orders, and independent spanning trees" by Loukas + // Georgiadis and Robert E. Tarjan, as well as "Dominator Tree Verification + // and Vertex-Disjoint Paths " by the same authors. + + // A very simple and direct explanation of these properties can be found in + // "An Experimental Study of Dynamic Dominators", found at + // https://arxiv.org/abs/1604.02711 + + // The easiest way to think of the parent property is that it's a requirement + // of being a dominator. Let's just take immediate dominators. For PARENT to + // be an immediate dominator of CHILD, all paths in the CFG must go through + // PARENT before they hit CHILD. This implies that if you were to cut PARENT + // out of the CFG, there should be no paths to CHILD that are reachable. If + // there are, then you now have a path from PARENT to CHILD that goes around + // PARENT and still reaches CHILD, which by definition, means PARENT can't be + // a dominator of CHILD (let alone an immediate one). + + // The sibling property is similar. It says that for each pair of sibling + // nodes in the dominator tree (LEFT and RIGHT) , they must not dominate each + // other. If sibling LEFT dominated sibling RIGHT, it means there are no + // paths in the CFG from sibling LEFT to sibling RIGHT that do not go through + // LEFT, and thus, LEFT is really an ancestor (in the dominator tree) of + // RIGHT, not a sibling. + + // It is possible to verify the parent and sibling properties in + // linear time, but the algorithms are complex. Instead, we do it in a + // straightforward N^2 and N^3 way below, using direct path reachability. + + + // Checks if the tree has the parent property: if for all edges from V to W in + // the input graph, such that V is reachable, the parent of W in the tree is + // an ancestor of V in the tree. + // + // This means that if a node gets disconnected from the graph, then all of + // the nodes it dominated previously will now become unreachable. + bool verifyParentProperty(const DomTreeT &DT) { + for (auto &NodeToTN : DT.DomTreeNodes) { + const TreeNodePtr TN = NodeToTN.second.get(); + const NodePtr BB = TN->getBlock(); + if (!BB || TN->getChildren().empty()) continue; + + clear(); + doFullDFSWalk(DT, [BB](NodePtr From, NodePtr To) { + return From != BB && To != BB; + }); + + for (TreeNodePtr Child : TN->getChildren()) + if (NodeToInfo.count(Child->getBlock()) != 0) { + errs() << "Child " << BlockNamePrinter(Child) + << " reachable after its parent " << BlockNamePrinter(BB) + << " is removed!\n"; + errs().flush(); + + return false; + } + } - DT.updateDFSNumbers(); + return true; + } + + // Check if the tree has sibling property: if a node V does not dominate a + // node W for all siblings V and W in the tree. + // + // This means that if a node gets disconnected from the graph, then all of its + // siblings will now still be reachable. + bool verifySiblingProperty(const DomTreeT &DT) { + for (auto &NodeToTN : DT.DomTreeNodes) { + const TreeNodePtr TN = NodeToTN.second.get(); + const NodePtr BB = TN->getBlock(); + if (!BB || TN->getChildren().empty()) continue; + + const auto &Siblings = TN->getChildren(); + for (const TreeNodePtr N : Siblings) { + clear(); + NodePtr BBN = N->getBlock(); + doFullDFSWalk(DT, [BBN](NodePtr From, NodePtr To) { + return From != BBN && To != BBN; + }); + + for (const TreeNodePtr S : Siblings) { + if (S == N) continue; + + if (NodeToInfo.count(S->getBlock()) == 0) { + errs() << "Node " << BlockNamePrinter(S) + << " not reachable when its sibling " << BlockNamePrinter(N) + << " is removed!\n"; + errs().flush(); + + return false; + } + } + } + } + + return true; + } +}; + + +template +void Calculate(DomTreeT &DT, FuncT &F) { + SemiNCAInfo SNCA; + SNCA.calculateFromScratch(DT, GraphTraits::size(&F)); +} + +template +void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To) { + if (DT.isPostDominator()) std::swap(From, To); + SemiNCAInfo::InsertEdge(DT, From, To); +} + +template +void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To) { + if (DT.isPostDominator()) std::swap(From, To); + SemiNCAInfo::DeleteEdge(DT, From, To); } + +template +bool Verify(const DomTreeT &DT) { + SemiNCAInfo SNCA; + return SNCA.verifyReachability(DT) && SNCA.VerifyLevels(DT) && + SNCA.verifyNCD(DT) && SNCA.verifyParentProperty(DT) && + SNCA.verifySiblingProperty(DT); } +} // namespace DomTreeBuilder +} // namespace llvm + +#undef DEBUG_TYPE + #endif diff --git a/interpreter/llvm/src/include/llvm/Support/GraphWriter.h b/interpreter/llvm/src/include/llvm/Support/GraphWriter.h index c318fea536511..3df5c867f7d33 100644 --- a/interpreter/llvm/src/include/llvm/Support/GraphWriter.h +++ b/interpreter/llvm/src/include/llvm/Support/GraphWriter.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/GraphWriter.h - Write graph to a .dot file -*- C++ -*-===// +//===- llvm/Support/GraphWriter.h - Write graph to a .dot file --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,30 +24,40 @@ #define LLVM_SUPPORT_GRAPHWRITER_H #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include #include namespace llvm { namespace DOT { // Private functions... - std::string EscapeString(const std::string &Label); - /// \brief Get a color string for this node number. Simply round-robin selects - /// from a reasonable number of colors. - StringRef getColorString(unsigned NodeNumber); -} +std::string EscapeString(const std::string &Label); + +/// \brief Get a color string for this node number. Simply round-robin selects +/// from a reasonable number of colors. +StringRef getColorString(unsigned NodeNumber); + +} // end namespace DOT namespace GraphProgram { - enum Name { - DOT, - FDP, - NEATO, - TWOPI, - CIRCO - }; -} + +enum Name { + DOT, + FDP, + NEATO, + TWOPI, + CIRCO +}; + +} // end namespace GraphProgram bool DisplayGraph(StringRef Filename, bool wait = true, GraphProgram::Name program = GraphProgram::DOT); @@ -57,11 +67,11 @@ class GraphWriter { raw_ostream &O; const GraphType &G; - typedef DOTGraphTraits DOTTraits; - typedef GraphTraits GTraits; - typedef typename GTraits::NodeRef NodeRef; - typedef typename GTraits::nodes_iterator node_iterator; - typedef typename GTraits::ChildIteratorType child_iterator; + using DOTTraits = DOTGraphTraits; + using GTraits = GraphTraits; + using NodeRef = typename GTraits::NodeRef; + using node_iterator = typename GTraits::nodes_iterator; + using child_iterator = typename GTraits::ChildIteratorType; DOTTraits DTraits; static_assert(std::is_pointer::value, @@ -346,6 +356,6 @@ void ViewGraph(const GraphType &G, const Twine &Name, DisplayGraph(Filename, false, Program); } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_GRAPHWRITER_H diff --git a/interpreter/llvm/src/include/llvm/Support/Host.h b/interpreter/llvm/src/include/llvm/Support/Host.h index 89986fdae9713..be93dd99032e2 100644 --- a/interpreter/llvm/src/include/llvm/Support/Host.h +++ b/interpreter/llvm/src/include/llvm/Support/Host.h @@ -21,6 +21,16 @@ #include #elif defined(_AIX) #include +#elif defined(__sun) +/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */ +#include +#define BIG_ENDIAN 4321 +#define LITTLE_ENDIAN 1234 +#if defined(_BIG_ENDIAN) +#define BYTE_ORDER BIG_ENDIAN +#else +#define BYTE_ORDER LITTLE_ENDIAN +#endif #else #if !defined(BYTE_ORDER) && !defined(LLVM_ON_WIN32) #include diff --git a/interpreter/llvm/src/include/llvm/Support/LowLevelTypeImpl.h b/interpreter/llvm/src/include/llvm/Support/LowLevelTypeImpl.h index e18e58b7b5b23..c79dd0c295079 100644 --- a/interpreter/llvm/src/include/llvm/Support/LowLevelTypeImpl.h +++ b/interpreter/llvm/src/include/llvm/Support/LowLevelTypeImpl.h @@ -27,9 +27,9 @@ #ifndef LLVM_SUPPORT_LOWLEVELTYPEIMPL_H #define LLVM_SUPPORT_LOWLEVELTYPEIMPL_H -#include #include "llvm/ADT/DenseMapInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Support/ManagedStatic.h b/interpreter/llvm/src/include/llvm/Support/ManagedStatic.h index 7ce86eee95d24..b4bf3210cc738 100644 --- a/interpreter/llvm/src/include/llvm/Support/ManagedStatic.h +++ b/interpreter/llvm/src/include/llvm/Support/ManagedStatic.h @@ -14,25 +14,22 @@ #ifndef LLVM_SUPPORT_MANAGEDSTATIC_H #define LLVM_SUPPORT_MANAGEDSTATIC_H -#include "llvm/Support/Compiler.h" #include #include namespace llvm { /// object_creator - Helper method for ManagedStatic. -template -LLVM_LIBRARY_VISIBILITY void* object_creator() { - return new C(); -} +template struct object_creator { + static void *call() { return new C(); } +}; /// object_deleter - Helper method for ManagedStatic. /// -template struct LLVM_LIBRARY_VISIBILITY object_deleter { +template struct object_deleter { static void call(void *Ptr) { delete (T *)Ptr; } }; -template -struct LLVM_LIBRARY_VISIBILITY object_deleter { +template struct object_deleter { static void call(void *Ptr) { delete[](T *)Ptr; } }; @@ -59,14 +56,15 @@ class ManagedStaticBase { /// libraries that link in LLVM components) and for making destruction be /// explicit through the llvm_shutdown() function call. /// -template +template , + class Deleter = object_deleter> class ManagedStatic : public ManagedStaticBase { public: // Accessors. C &operator*() { void *Tmp = Ptr.load(std::memory_order_acquire); if (!Tmp) - RegisterManagedStatic(object_creator, object_deleter::call); + RegisterManagedStatic(Creator::call, Deleter::call); return *static_cast(Ptr.load(std::memory_order_relaxed)); } @@ -76,7 +74,7 @@ class ManagedStatic : public ManagedStaticBase { const C &operator*() const { void *Tmp = Ptr.load(std::memory_order_acquire); if (!Tmp) - RegisterManagedStatic(object_creator, object_deleter::call); + RegisterManagedStatic(Creator::call, Deleter::call); return *static_cast(Ptr.load(std::memory_order_relaxed)); } diff --git a/interpreter/llvm/src/include/llvm/Support/MathExtras.h b/interpreter/llvm/src/include/llvm/Support/MathExtras.h index 7f07e8cc3a51e..fd29865c8475e 100644 --- a/interpreter/llvm/src/include/llvm/Support/MathExtras.h +++ b/interpreter/llvm/src/include/llvm/Support/MathExtras.h @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include #ifdef _MSC_VER #include @@ -272,23 +272,22 @@ T reverseBits(T Val) { // type overloading so that signed and unsigned integers can be used without // ambiguity. -/// Hi_32 - This function returns the high 32 bits of a 64 bit value. +/// Return the high 32 bits of a 64 bit value. constexpr inline uint32_t Hi_32(uint64_t Value) { return static_cast(Value >> 32); } -/// Lo_32 - This function returns the low 32 bits of a 64 bit value. +/// Return the low 32 bits of a 64 bit value. constexpr inline uint32_t Lo_32(uint64_t Value) { return static_cast(Value); } -/// Make_64 - This functions makes a 64-bit integer from a high / low pair of -/// 32-bit integers. +/// Make a 64-bit integer from a high / low pair of 32-bit integers. constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { return ((uint64_t)High << 32) | (uint64_t)Low; } -/// isInt - Checks if an integer fits into the given bit width. +/// Checks if an integer fits into the given bit width. template constexpr inline bool isInt(int64_t x) { return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1))); } @@ -303,8 +302,7 @@ template <> constexpr inline bool isInt<32>(int64_t x) { return static_cast(x) == x; } -/// isShiftedInt - Checks if a signed integer is an N bit number shifted -/// left by S. +/// Checks if a signed integer is an N bit number shifted left by S. template constexpr inline bool isShiftedInt(int64_t x) { static_assert( @@ -313,7 +311,7 @@ constexpr inline bool isShiftedInt(int64_t x) { return isInt(x) && (x % (UINT64_C(1) << S) == 0); } -/// isUInt - Checks if an unsigned integer fits into the given bit width. +/// Checks if an unsigned integer fits into the given bit width. /// /// This is written as two functions rather than as simply /// @@ -383,71 +381,63 @@ inline int64_t maxIntN(int64_t N) { return (UINT64_C(1) << (N - 1)) - 1; } -/// isUIntN - Checks if an unsigned integer fits into the given (dynamic) -/// bit width. +/// Checks if an unsigned integer fits into the given (dynamic) bit width. inline bool isUIntN(unsigned N, uint64_t x) { return N >= 64 || x <= maxUIntN(N); } -/// isIntN - Checks if an signed integer fits into the given (dynamic) -/// bit width. +/// Checks if an signed integer fits into the given (dynamic) bit width. inline bool isIntN(unsigned N, int64_t x) { return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); } -/// isMask_32 - This function returns true if the argument is a non-empty -/// sequence of ones starting at the least significant bit with the remainder -/// zero (32 bit version). Ex. isMask_32(0x0000FFFFU) == true. +/// Return true if the argument is a non-empty sequence of ones starting at the +/// least significant bit with the remainder zero (32 bit version). +/// Ex. isMask_32(0x0000FFFFU) == true. constexpr inline bool isMask_32(uint32_t Value) { return Value && ((Value + 1) & Value) == 0; } -/// isMask_64 - This function returns true if the argument is a non-empty -/// sequence of ones starting at the least significant bit with the remainder -/// zero (64 bit version). +/// Return true if the argument is a non-empty sequence of ones starting at the +/// least significant bit with the remainder zero (64 bit version). constexpr inline bool isMask_64(uint64_t Value) { return Value && ((Value + 1) & Value) == 0; } -/// isShiftedMask_32 - This function returns true if the argument contains a -/// non-empty sequence of ones with the remainder zero (32 bit version.) -/// Ex. isShiftedMask_32(0x0000FF00U) == true. +/// Return true if the argument contains a non-empty sequence of ones with the +/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. constexpr inline bool isShiftedMask_32(uint32_t Value) { return Value && isMask_32((Value - 1) | Value); } -/// isShiftedMask_64 - This function returns true if the argument contains a -/// non-empty sequence of ones with the remainder zero (64 bit version.) +/// Return true if the argument contains a non-empty sequence of ones with the +/// remainder zero (64 bit version.) constexpr inline bool isShiftedMask_64(uint64_t Value) { return Value && isMask_64((Value - 1) | Value); } -/// isPowerOf2_32 - This function returns true if the argument is a power of -/// two > 0. Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) +/// Return true if the argument is a power of two > 0. +/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) constexpr inline bool isPowerOf2_32(uint32_t Value) { return Value && !(Value & (Value - 1)); } -/// isPowerOf2_64 - This function returns true if the argument is a power of two -/// > 0 (64 bit edition.) +/// Return true if the argument is a power of two > 0 (64 bit edition.) constexpr inline bool isPowerOf2_64(uint64_t Value) { return Value && !(Value & (Value - int64_t(1L))); } -/// ByteSwap_16 - This function returns a byte-swapped representation of the -/// 16-bit argument, Value. +/// Return a byte-swapped representation of the 16-bit argument. inline uint16_t ByteSwap_16(uint16_t Value) { return sys::SwapByteOrder_16(Value); } -/// ByteSwap_32 - This function returns a byte-swapped representation of the -/// 32-bit argument, Value. +/// Return a byte-swapped representation of the 32-bit argument. inline uint32_t ByteSwap_32(uint32_t Value) { return sys::SwapByteOrder_32(Value); } -/// ByteSwap_64 - This function returns a byte-swapped representation of the -/// 64-bit argument, Value. +/// Return a byte-swapped representation of the 64-bit argument. inline uint64_t ByteSwap_64(uint64_t Value) { return sys::SwapByteOrder_64(Value); } @@ -455,7 +445,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) { /// \brief Count the number of ones from the most significant bit to the first /// zero bit. /// -/// Ex. CountLeadingOnes(0xFF0FFF00) == 8. +/// Ex. countLeadingOnes(0xFF0FFF00) == 8. /// Only unsigned integral types are allowed. /// /// \param ZB the behavior on an input of all ones. Only ZB_Width and @@ -526,7 +516,7 @@ inline unsigned countPopulation(T Value) { return detail::PopulationCounter::count(Value); } -/// Log2 - This function returns the log base 2 of the specified value +/// Return the log base 2 of the specified value. inline double Log2(double Value) { #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 return __builtin_log(Value) / __builtin_log(2.0); @@ -535,34 +525,33 @@ inline double Log2(double Value) { #endif } -/// Log2_32 - This function returns the floor log base 2 of the specified value, -/// -1 if the value is zero. (32 bit edition.) +/// Return the floor log base 2 of the specified value, -1 if the value is zero. +/// (32 bit edition.) /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 inline unsigned Log2_32(uint32_t Value) { return 31 - countLeadingZeros(Value); } -/// Log2_64 - This function returns the floor log base 2 of the specified value, -/// -1 if the value is zero. (64 bit edition.) +/// Return the floor log base 2 of the specified value, -1 if the value is zero. +/// (64 bit edition.) inline unsigned Log2_64(uint64_t Value) { return 63 - countLeadingZeros(Value); } -/// Log2_32_Ceil - This function returns the ceil log base 2 of the specified -/// value, 32 if the value is zero. (32 bit edition). +/// Return the ceil log base 2 of the specified value, 32 if the value is zero. +/// (32 bit edition). /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 inline unsigned Log2_32_Ceil(uint32_t Value) { return 32 - countLeadingZeros(Value - 1); } -/// Log2_64_Ceil - This function returns the ceil log base 2 of the specified -/// value, 64 if the value is zero. (64 bit edition.) +/// Return the ceil log base 2 of the specified value, 64 if the value is zero. +/// (64 bit edition.) inline unsigned Log2_64_Ceil(uint64_t Value) { return 64 - countLeadingZeros(Value - 1); } -/// GreatestCommonDivisor64 - Return the greatest common divisor of the two -/// values using Euclid's algorithm. +/// Return the greatest common divisor of the values using Euclid's algorithm. inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { while (B) { uint64_t T = B; @@ -572,8 +561,7 @@ inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { return A; } -/// BitsToDouble - This function takes a 64-bit integer and returns the bit -/// equivalent double. +/// This function takes a 64-bit integer and returns the bit equivalent double. inline double BitsToDouble(uint64_t Bits) { double D; static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); @@ -581,8 +569,7 @@ inline double BitsToDouble(uint64_t Bits) { return D; } -/// BitsToFloat - This function takes a 32-bit integer and returns the bit -/// equivalent float. +/// This function takes a 32-bit integer and returns the bit equivalent float. inline float BitsToFloat(uint32_t Bits) { float F; static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); @@ -590,10 +577,9 @@ inline float BitsToFloat(uint32_t Bits) { return F; } -/// DoubleToBits - This function takes a double and returns the bit -/// equivalent 64-bit integer. Note that copying doubles around -/// changes the bits of NaNs on some hosts, notably x86, so this -/// routine cannot be used if these bits are needed. +/// This function takes a double and returns the bit equivalent 64-bit integer. +/// Note that copying doubles around changes the bits of NaNs on some hosts, +/// notably x86, so this routine cannot be used if these bits are needed. inline uint64_t DoubleToBits(double Double) { uint64_t Bits; static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); @@ -601,10 +587,9 @@ inline uint64_t DoubleToBits(double Double) { return Bits; } -/// FloatToBits - This function takes a float and returns the bit -/// equivalent 32-bit integer. Note that copying floats around -/// changes the bits of NaNs on some hosts, notably x86, so this -/// routine cannot be used if these bits are needed. +/// This function takes a float and returns the bit equivalent 32-bit integer. +/// Note that copying floats around changes the bits of NaNs on some hosts, +/// notably x86, so this routine cannot be used if these bits are needed. inline uint32_t FloatToBits(float Float) { uint32_t Bits; static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); @@ -612,8 +597,8 @@ inline uint32_t FloatToBits(float Float) { return Bits; } -/// MinAlign - A and B are either alignments or offsets. Return the minimum -/// alignment that may be assumed after adding the two together. +/// A and B are either alignments or offsets. Return the minimum alignment that +/// may be assumed after adding the two together. constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { // The largest power of 2 that divides both A and B. // @@ -642,8 +627,8 @@ inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) { return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr; } -/// NextPowerOf2 - Returns the next power of two (in 64-bits) -/// that is strictly greater than A. Returns zero on overflow. +/// Returns the next power of two (in 64-bits) that is strictly greater than A. +/// Returns zero on overflow. inline uint64_t NextPowerOf2(uint64_t A) { A |= (A >> 1); A |= (A >> 2); diff --git a/interpreter/llvm/src/include/llvm/Support/MemoryBuffer.h b/interpreter/llvm/src/include/llvm/Support/MemoryBuffer.h index e8bdc3e89fa7c..73f0251a6b6e3 100644 --- a/interpreter/llvm/src/include/llvm/Support/MemoryBuffer.h +++ b/interpreter/llvm/src/include/llvm/Support/MemoryBuffer.h @@ -14,14 +14,14 @@ #ifndef LLVM_SUPPORT_MEMORYBUFFER_H #define LLVM_SUPPORT_MEMORYBUFFER_H +#include "llvm-c/Types.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/ErrorOr.h" -#include "llvm-c/Types.h" -#include #include #include +#include namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Support/Path.h b/interpreter/llvm/src/include/llvm/Support/Path.h index 6ac51195519eb..e5979674cf1c7 100644 --- a/interpreter/llvm/src/include/llvm/Support/Path.h +++ b/interpreter/llvm/src/include/llvm/Support/Path.h @@ -17,6 +17,7 @@ #define LLVM_SUPPORT_PATH_H #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" #include "llvm/Support/DataTypes.h" #include @@ -49,7 +50,8 @@ enum class Style { windows, posix, native }; /// C:\foo\bar => C:,/,foo,bar /// @endcode class const_iterator - : public std::iterator { + : public iterator_facade_base { StringRef Path; ///< The entire path. StringRef Component; ///< The current component. Not necessarily in Path. size_t Position; ///< The iterators current position within Path. @@ -61,10 +63,8 @@ class const_iterator public: reference operator*() const { return Component; } - pointer operator->() const { return &Component; } const_iterator &operator++(); // preincrement bool operator==(const const_iterator &RHS) const; - bool operator!=(const const_iterator &RHS) const { return !(*this == RHS); } /// @brief Difference in bytes between this and RHS. ptrdiff_t operator-(const const_iterator &RHS) const; @@ -76,7 +76,8 @@ class const_iterator /// \a path in reverse order. The traversal order is exactly reversed from that /// of \a const_iterator class reverse_iterator - : public std::iterator { + : public iterator_facade_base { StringRef Path; ///< The entire path. StringRef Component; ///< The current component. Not necessarily in Path. size_t Position; ///< The iterators current position within Path. @@ -87,10 +88,8 @@ class reverse_iterator public: reference operator*() const { return Component; } - pointer operator->() const { return &Component; } reverse_iterator &operator++(); // preincrement bool operator==(const reverse_iterator &RHS) const; - bool operator!=(const reverse_iterator &RHS) const { return !(*this == RHS); } /// @brief Difference in bytes between this and RHS. ptrdiff_t operator-(const reverse_iterator &RHS) const; diff --git a/interpreter/llvm/src/include/llvm/Support/ReverseIteration.h b/interpreter/llvm/src/include/llvm/Support/ReverseIteration.h new file mode 100644 index 0000000000000..cb97b60f06dd9 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Support/ReverseIteration.h @@ -0,0 +1,17 @@ +#ifndef LLVM_SUPPORT_REVERSEITERATION_H +#define LLVM_SUPPORT_REVERSEITERATION_H + +#include "llvm/Config/abi-breaking.h" + +namespace llvm { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS +template struct ReverseIterate { static bool value; }; +#if LLVM_ENABLE_REVERSE_ITERATION +template bool ReverseIterate::value = true; +#else +template bool ReverseIterate::value = false; +#endif +#endif +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/Support/Solaris/sys/regset.h b/interpreter/llvm/src/include/llvm/Support/Solaris/sys/regset.h new file mode 100644 index 0000000000000..6a69ebe718a19 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Support/Solaris/sys/regset.h @@ -0,0 +1,39 @@ +/*===- llvm/Support/Solaris/sys/regset.h ------------------------*- C++ -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file works around excessive name space pollution from the system header + * on Solaris hosts. + * + *===----------------------------------------------------------------------===*/ + +#ifndef LLVM_SUPPORT_SOLARIS_SYS_REGSET_H + +#include_next + +#undef CS +#undef DS +#undef ES +#undef FS +#undef GS +#undef SS +#undef EAX +#undef ECX +#undef EDX +#undef EBX +#undef ESP +#undef EBP +#undef ESI +#undef EDI +#undef EIP +#undef UESP +#undef EFL +#undef ERR +#undef TRAPNO + +#endif diff --git a/interpreter/llvm/src/include/llvm/Support/SourceMgr.h b/interpreter/llvm/src/include/llvm/Support/SourceMgr.h index cb90d968c44c5..399f8dcd76fca 100644 --- a/interpreter/llvm/src/include/llvm/Support/SourceMgr.h +++ b/interpreter/llvm/src/include/llvm/Support/SourceMgr.h @@ -49,7 +49,7 @@ class SourceMgr { /// Clients that want to handle their own diagnostics in a custom way can /// register a function pointer+context as a diagnostic handler. /// It gets called each time PrintMessage is invoked. - typedef void (*DiagHandlerTy)(const SMDiagnostic &, void *Context); + using DiagHandlerTy = void (*)(const SMDiagnostic &, void *Context); private: struct SrcBuffer { diff --git a/interpreter/llvm/src/include/llvm/Support/StringPool.h b/interpreter/llvm/src/include/llvm/Support/StringPool.h index 2ec0c3b76c11f..bb5fd07f0d009 100644 --- a/interpreter/llvm/src/include/llvm/Support/StringPool.h +++ b/interpreter/llvm/src/include/llvm/Support/StringPool.h @@ -1,4 +1,4 @@ -//===-- StringPool.h - Interned string pool ---------------------*- C++ -*-===// +//===- StringPool.h - Interned string pool ----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -30,6 +30,7 @@ #define LLVM_SUPPORT_STRINGPOOL_H #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { @@ -43,17 +44,17 @@ namespace llvm { /// PooledString - This is the value of an entry in the pool's interning /// table. struct PooledString { - StringPool *Pool; ///< So the string can remove itself. - unsigned Refcount; ///< Number of referencing PooledStringPtrs. + StringPool *Pool = nullptr; ///< So the string can remove itself. + unsigned Refcount = 0; ///< Number of referencing PooledStringPtrs. public: - PooledString() : Pool(nullptr), Refcount(0) { } + PooledString() = default; }; friend class PooledStringPtr; - typedef StringMap table_t; - typedef StringMapEntry entry_t; + using table_t = StringMap; + using entry_t = StringMapEntry; table_t InternTable; public: @@ -76,11 +77,12 @@ namespace llvm { /// a single pointer, but it does have reference-counting overhead when /// copied. class PooledStringPtr { - typedef StringPool::entry_t entry_t; - entry_t *S; + using entry_t = StringPool::entry_t; + + entry_t *S = nullptr; public: - PooledStringPtr() : S(nullptr) {} + PooledStringPtr() = default; explicit PooledStringPtr(entry_t *E) : S(E) { if (S) ++S->getValue().Refcount; @@ -133,6 +135,6 @@ namespace llvm { inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_STRINGPOOL_H diff --git a/interpreter/llvm/src/include/llvm/Support/TargetParser.h b/interpreter/llvm/src/include/llvm/Support/TargetParser.h index f29cc40ffdd55..e13582f6a6d3d 100644 --- a/interpreter/llvm/src/include/llvm/Support/TargetParser.h +++ b/interpreter/llvm/src/include/llvm/Support/TargetParser.h @@ -17,6 +17,7 @@ // FIXME: vector is used because that's what clang uses for subtarget feature // lists, but SmallVector would probably be better +#include "llvm/ADT/Triple.h" #include namespace llvm { @@ -84,6 +85,7 @@ enum ArchExtKind : unsigned { AEK_DSP = 0x400, AEK_FP16 = 0x800, AEK_RAS = 0x1000, + AEK_SVE = 0x2000, // Unsupported extensions. AEK_OS = 0x8000000, AEK_IWMMXT = 0x10000000, @@ -140,6 +142,8 @@ unsigned parseArchEndian(StringRef Arch); unsigned parseArchProfile(StringRef Arch); unsigned parseArchVersion(StringRef Arch); +StringRef computeDefaultTargetABI(const Triple &TT, StringRef CPU); + } // namespace ARM // FIXME:This should be made into class design,to avoid dupplication. @@ -163,7 +167,8 @@ enum ArchExtKind : unsigned { AEK_FP16 = 0x20, AEK_PROFILE = 0x40, AEK_RAS = 0x80, - AEK_LSE = 0x100 + AEK_LSE = 0x100, + AEK_SVE = 0x200 }; StringRef getCanonicalArchName(StringRef Arch); diff --git a/interpreter/llvm/src/include/llvm/Support/TargetRegistry.h b/interpreter/llvm/src/include/llvm/Support/TargetRegistry.h index bd68d24144875..8454b27b6f04f 100644 --- a/interpreter/llvm/src/include/llvm/Support/TargetRegistry.h +++ b/interpreter/llvm/src/include/llvm/Support/TargetRegistry.h @@ -20,10 +20,10 @@ #define LLVM_SUPPORT_TARGETREGISTRY_H #include "llvm-c/Disassembler.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -54,6 +54,7 @@ class MCSymbolizer; class MCTargetAsmParser; class MCTargetOptions; class MCTargetStreamer; +class raw_ostream; class raw_pwrite_stream; class TargetMachine; class TargetOptions; @@ -96,75 +97,75 @@ class Target { public: friend struct TargetRegistry; - typedef bool (*ArchMatchFnTy)(Triple::ArchType Arch); + using ArchMatchFnTy = bool (*)(Triple::ArchType Arch); - typedef MCAsmInfo *(*MCAsmInfoCtorFnTy)(const MCRegisterInfo &MRI, - const Triple &TT); - typedef void (*MCAdjustCodeGenOptsFnTy)(const Triple &TT, Reloc::Model RM, - CodeModel::Model &CM); + using MCAsmInfoCtorFnTy = MCAsmInfo *(*)(const MCRegisterInfo &MRI, + const Triple &TT); + using MCAdjustCodeGenOptsFnTy = void (*)(const Triple &TT, Reloc::Model RM, + CodeModel::Model &CM); - typedef MCInstrInfo *(*MCInstrInfoCtorFnTy)(void); - typedef MCInstrAnalysis *(*MCInstrAnalysisCtorFnTy)(const MCInstrInfo *Info); - typedef MCRegisterInfo *(*MCRegInfoCtorFnTy)(const Triple &TT); - typedef MCSubtargetInfo *(*MCSubtargetInfoCtorFnTy)(const Triple &TT, - StringRef CPU, - StringRef Features); - typedef TargetMachine *(*TargetMachineCtorTy)( + using MCInstrInfoCtorFnTy = MCInstrInfo *(*)(); + using MCInstrAnalysisCtorFnTy = MCInstrAnalysis *(*)(const MCInstrInfo *Info); + using MCRegInfoCtorFnTy = MCRegisterInfo *(*)(const Triple &TT); + using MCSubtargetInfoCtorFnTy = MCSubtargetInfo *(*)(const Triple &TT, + StringRef CPU, + StringRef Features); + using TargetMachineCtorTy = TargetMachine *(*)( const Target &T, const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL); // If it weren't for layering issues (this header is in llvm/Support, but // depends on MC?) this should take the Streamer by value rather than rvalue // reference. - typedef AsmPrinter *(*AsmPrinterCtorTy)( + using AsmPrinterCtorTy = AsmPrinter *(*)( TargetMachine &TM, std::unique_ptr &&Streamer); - typedef MCAsmBackend *(*MCAsmBackendCtorTy)(const Target &T, - const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, - const MCTargetOptions &Options); - typedef MCTargetAsmParser *(*MCAsmParserCtorTy)( + using MCAsmBackendCtorTy = MCAsmBackend *(*)(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); + using MCAsmParserCtorTy = MCTargetAsmParser *(*)( const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII, const MCTargetOptions &Options); - typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T, - const MCSubtargetInfo &STI, - MCContext &Ctx); - typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Triple &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI); - typedef MCCodeEmitter *(*MCCodeEmitterCtorTy)(const MCInstrInfo &II, - const MCRegisterInfo &MRI, - MCContext &Ctx); - typedef MCStreamer *(*ELFStreamerCtorTy)(const Triple &T, MCContext &Ctx, - MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll); - typedef MCStreamer *(*MachOStreamerCtorTy)(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool DWARFMustBeAtTheEnd); - typedef MCStreamer *(*COFFStreamerCtorTy)(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool IncrementalLinkerCompatible); - typedef MCStreamer *(*WasmStreamerCtorTy)(const Triple &T, MCContext &Ctx, + using MCDisassemblerCtorTy = MCDisassembler *(*)(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx); + using MCInstPrinterCtorTy = MCInstPrinter *(*)(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI); + using MCCodeEmitterCtorTy = MCCodeEmitter *(*)(const MCInstrInfo &II, + const MCRegisterInfo &MRI, + MCContext &Ctx); + using ELFStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll); - typedef MCTargetStreamer *(*NullTargetStreamerCtorTy)(MCStreamer &S); - typedef MCTargetStreamer *(*AsmTargetStreamerCtorTy)( + using MachOStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool DWARFMustBeAtTheEnd); + using COFFStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool IncrementalLinkerCompatible); + using WasmStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, + MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll); + using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S); + using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)( MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, bool IsVerboseAsm); - typedef MCTargetStreamer *(*ObjectTargetStreamerCtorTy)( + using ObjectTargetStreamerCtorTy = MCTargetStreamer *(*)( MCStreamer &S, const MCSubtargetInfo &STI); - typedef MCRelocationInfo *(*MCRelocationInfoCtorTy)(const Triple &TT, - MCContext &Ctx); - typedef MCSymbolizer *(*MCSymbolizerCtorTy)( + using MCRelocationInfoCtorTy = MCRelocationInfo *(*)(const Triple &TT, + MCContext &Ctx); + using MCSymbolizerCtorTy = MCSymbolizer *(*)( const Triple &TT, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx, std::unique_ptr &&RelInfo); diff --git a/interpreter/llvm/src/include/llvm/Support/ThreadPool.h b/interpreter/llvm/src/include/llvm/Support/ThreadPool.h index f0e3ffa0999c2..9ada946c6dae3 100644 --- a/interpreter/llvm/src/include/llvm/Support/ThreadPool.h +++ b/interpreter/llvm/src/include/llvm/Support/ThreadPool.h @@ -35,17 +35,8 @@ namespace llvm { /// for some work to become available. class ThreadPool { public: -#ifndef _MSC_VER - using VoidTy = void; using TaskTy = std::function; using PackagedTaskTy = std::packaged_task; -#else - // MSVC 2013 has a bug and can't use std::packaged_task; - // We force it to use bool(bool) instead. - using VoidTy = bool; - using TaskTy = std::function; - using PackagedTaskTy = std::packaged_task; -#endif /// Construct a pool with the number of core available on the system (or /// whatever the value returned by std::thread::hardware_concurrency() is). @@ -60,30 +51,17 @@ class ThreadPool { /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - inline std::shared_future async(Function &&F, Args &&... ArgList) { + inline std::shared_future async(Function &&F, Args &&... ArgList) { auto Task = std::bind(std::forward(F), std::forward(ArgList)...); -#ifndef _MSC_VER return asyncImpl(std::move(Task)); -#else - // This lambda has to be marked mutable because MSVC 2013's std::bind call - // operator isn't const qualified. - return asyncImpl([Task](VoidTy) mutable -> VoidTy { - Task(); - return VoidTy(); - }); -#endif } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - inline std::shared_future async(Function &&F) { -#ifndef _MSC_VER + inline std::shared_future async(Function &&F) { return asyncImpl(std::forward(F)); -#else - return asyncImpl([F] (VoidTy) -> VoidTy { F(); return VoidTy(); }); -#endif } /// Blocking wait for all the threads to complete and the queue to be empty. @@ -93,7 +71,7 @@ class ThreadPool { private: /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. - std::shared_future asyncImpl(TaskTy F); + std::shared_future asyncImpl(TaskTy F); /// Threads in flight std::vector Threads; diff --git a/interpreter/llvm/src/include/llvm/Support/UnicodeCharRanges.h b/interpreter/llvm/src/include/llvm/Support/UnicodeCharRanges.h index d4d4d8eb84a4b..4c655833b3967 100644 --- a/interpreter/llvm/src/include/llvm/Support/UnicodeCharRanges.h +++ b/interpreter/llvm/src/include/llvm/Support/UnicodeCharRanges.h @@ -18,11 +18,11 @@ #include "llvm/Support/raw_ostream.h" #include +#define DEBUG_TYPE "unicode" + namespace llvm { namespace sys { -#define DEBUG_TYPE "unicode" - /// \brief Represents a closed range of Unicode code points [Lower, Upper]. struct UnicodeCharRange { uint32_t Lower; @@ -99,10 +99,9 @@ class UnicodeCharSet { const CharRanges Ranges; }; -#undef DEBUG_TYPE // "unicode" - } // namespace sys } // namespace llvm +#undef DEBUG_TYPE // "unicode" #endif // LLVM_SUPPORT_UNICODECHARRANGES_H diff --git a/interpreter/llvm/src/include/llvm/Support/YAMLParser.h b/interpreter/llvm/src/include/llvm/Support/YAMLParser.h index b9e3fa47752ce..549da3ccad51f 100644 --- a/interpreter/llvm/src/include/llvm/Support/YAMLParser.h +++ b/interpreter/llvm/src/include/llvm/Support/YAMLParser.h @@ -1,4 +1,4 @@ -//===--- YAMLParser.h - Simple YAML parser --------------------------------===// +//===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -41,20 +41,25 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SMLoc.h" +#include +#include +#include #include +#include +#include #include -#include namespace llvm { + class MemoryBufferRef; class SourceMgr; -class Twine; class raw_ostream; +class Twine; namespace yaml { -class document_iterator; class Document; +class document_iterator; class Node; class Scanner; struct Token; @@ -87,6 +92,7 @@ class Stream { document_iterator end(); void skip(); bool failed(); + bool validate() { skip(); return !failed(); @@ -95,10 +101,10 @@ class Stream { void printError(Node *N, const Twine &Msg); private: + friend class Document; + std::unique_ptr scanner; std::unique_ptr CurrentDoc; - - friend class Document; }; /// \brief Abstract base class for all Nodes. @@ -119,6 +125,18 @@ class Node { Node(unsigned int Type, std::unique_ptr &, StringRef Anchor, StringRef Tag); + void *operator new(size_t Size, BumpPtrAllocator &Alloc, + size_t Alignment = 16) noexcept { + return Alloc.Allocate(Size, Alignment); + } + + void operator delete(void *Ptr, BumpPtrAllocator &Alloc, + size_t Size) noexcept { + Alloc.Deallocate(Ptr, Size); + } + + void operator delete(void *) noexcept = delete; + /// \brief Get the value of the anchor attached to this node. If it does not /// have one, getAnchor().size() will be 0. StringRef getAnchor() const { return Anchor; } @@ -146,22 +164,10 @@ class Node { unsigned int getType() const { return TypeID; } - void *operator new(size_t Size, BumpPtrAllocator &Alloc, - size_t Alignment = 16) noexcept { - return Alloc.Allocate(Size, Alignment); - } - - void operator delete(void *Ptr, BumpPtrAllocator &Alloc, - size_t Size) noexcept { - Alloc.Deallocate(Ptr, Size); - } - protected: std::unique_ptr &Doc; SMRange SourceRange; - void operator delete(void *) noexcept = delete; - ~Node() = default; private: @@ -182,7 +188,7 @@ class NullNode final : public Node { NullNode(std::unique_ptr &D) : Node(NK_Null, D, StringRef(), StringRef()) {} - static inline bool classof(const Node *N) { return N->getType() == NK_Null; } + static bool classof(const Node *N) { return N->getType() == NK_Null; } }; /// \brief A scalar node is an opaque datum that can be presented as a @@ -214,7 +220,7 @@ class ScalarNode final : public Node { /// This happens with escaped characters and multi-line literals. StringRef getValue(SmallVectorImpl &Storage) const; - static inline bool classof(const Node *N) { + static bool classof(const Node *N) { return N->getType() == NK_Scalar; } @@ -248,7 +254,7 @@ class BlockScalarNode final : public Node { /// \brief Gets the value of this node as a StringRef. StringRef getValue() const { return Value; } - static inline bool classof(const Node *N) { + static bool classof(const Node *N) { return N->getType() == NK_BlockScalar; } @@ -268,8 +274,7 @@ class KeyValueNode final : public Node { public: KeyValueNode(std::unique_ptr &D) - : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr), - Value(nullptr) {} + : Node(NK_KeyValue, D, StringRef(), StringRef()) {} /// \brief Parse and return the key. /// @@ -291,13 +296,13 @@ class KeyValueNode final : public Node { Val->skip(); } - static inline bool classof(const Node *N) { + static bool classof(const Node *N) { return N->getType() == NK_KeyValue; } private: - Node *Key; - Node *Value; + Node *Key = nullptr; + Node *Value = nullptr; }; /// \brief This is an iterator abstraction over YAML collections shared by both @@ -309,7 +314,7 @@ template class basic_collection_iterator : public std::iterator { public: - basic_collection_iterator() : Base(nullptr) {} + basic_collection_iterator() = default; basic_collection_iterator(BaseT *B) : Base(B) {} ValueT *operator->() const { @@ -358,7 +363,7 @@ class basic_collection_iterator } private: - BaseT *Base; + BaseT *Base = nullptr; }; // The following two templates are used for both MappingNode and Sequence Node. @@ -399,11 +404,12 @@ class MappingNode final : public Node { MappingNode(std::unique_ptr &D, StringRef Anchor, StringRef Tag, MappingType MT) - : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), - IsAtEnd(false), CurrentEntry(nullptr) {} + : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {} friend class basic_collection_iterator; - typedef basic_collection_iterator iterator; + + using iterator = basic_collection_iterator; + template friend typename T::iterator yaml::begin(T &); template friend void yaml::skip(T &); @@ -413,15 +419,15 @@ class MappingNode final : public Node { void skip() override { yaml::skip(*this); } - static inline bool classof(const Node *N) { + static bool classof(const Node *N) { return N->getType() == NK_Mapping; } private: MappingType Type; - bool IsAtBeginning; - bool IsAtEnd; - KeyValueNode *CurrentEntry; + bool IsAtBeginning = true; + bool IsAtEnd = false; + KeyValueNode *CurrentEntry = nullptr; void increment(); }; @@ -453,13 +459,12 @@ class SequenceNode final : public Node { SequenceNode(std::unique_ptr &D, StringRef Anchor, StringRef Tag, SequenceType ST) - : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), - IsAtEnd(false), - WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. - CurrentEntry(nullptr) {} + : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {} friend class basic_collection_iterator; - typedef basic_collection_iterator iterator; + + using iterator = basic_collection_iterator; + template friend typename T::iterator yaml::begin(T &); template friend void yaml::skip(T &); @@ -471,16 +476,16 @@ class SequenceNode final : public Node { void skip() override { yaml::skip(*this); } - static inline bool classof(const Node *N) { + static bool classof(const Node *N) { return N->getType() == NK_Sequence; } private: SequenceType SeqType; - bool IsAtBeginning; - bool IsAtEnd; - bool WasPreviousTokenFlowEntry; - Node *CurrentEntry; + bool IsAtBeginning = true; + bool IsAtEnd = false; + bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','. + Node *CurrentEntry = nullptr; }; /// \brief Represents an alias to a Node with an anchor. @@ -497,7 +502,7 @@ class AliasNode final : public Node { StringRef getName() const { return Name; } Node *getTarget(); - static inline bool classof(const Node *N) { return N->getType() == NK_Alias; } + static bool classof(const Node *N) { return N->getType() == NK_Alias; } private: StringRef Name; @@ -507,11 +512,11 @@ class AliasNode final : public Node { /// node. class Document { public: + Document(Stream &ParentStream); + /// \brief Root for parsing a node. Returns a single node. Node *parseBlockNode(); - Document(Stream &ParentStream); - /// \brief Finish parsing the current document and return true if there are /// more. Return false otherwise. bool skip(); @@ -564,7 +569,7 @@ class Document { /// \brief Iterator abstraction for Documents over a Stream. class document_iterator { public: - document_iterator() : Doc(nullptr) {} + document_iterator() = default; document_iterator(std::unique_ptr &D) : Doc(&D) {} bool operator==(const document_iterator &Other) { @@ -593,11 +598,11 @@ class document_iterator { private: bool isAtEnd() const { return !Doc || !*Doc; } - std::unique_ptr *Doc; + std::unique_ptr *Doc = nullptr; }; -} // End namespace yaml. +} // end namespace yaml -} // End namespace llvm. +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_YAMLPARSER_H diff --git a/interpreter/llvm/src/include/llvm/Support/YAMLTraits.h b/interpreter/llvm/src/include/llvm/Support/YAMLTraits.h index ffea679fab828..71fdf47f1979a 100644 --- a/interpreter/llvm/src/include/llvm/Support/YAMLTraits.h +++ b/interpreter/llvm/src/include/llvm/Support/YAMLTraits.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -179,17 +180,17 @@ struct BlockScalarTraits { /// to/from a YAML sequence. For example: /// /// template<> -/// struct SequenceTraits< std::vector> { -/// static size_t size(IO &io, std::vector &seq) { +/// struct SequenceTraits { +/// static size_t size(IO &io, MyContainer &seq) { /// return seq.size(); /// } -/// static MyType& element(IO &, std::vector &seq, size_t index) { +/// static MyType& element(IO &, MyContainer &seq, size_t index) { /// if ( index >= seq.size() ) /// seq.resize(index+1); /// return seq[index]; /// } /// }; -template +template struct SequenceTraits { // Must provide: // static size_t size(IO &io, T &seq); @@ -200,6 +201,14 @@ struct SequenceTraits { // static const bool flow = true; }; +/// This class should be specialized by any type for which vectors of that +/// type need to be converted to/from a YAML sequence. +template +struct SequenceElementTraits { + // Must provide: + // static const bool flow; +}; + /// This class should be specialized by any type that needs to be converted /// to/from a list of YAML documents. template @@ -226,7 +235,7 @@ struct MissingTrait; template struct has_ScalarEnumerationTraits { - typedef void (*Signature_enumeration)(class IO&, T&); + using Signature_enumeration = void (*)(class IO&, T&); template static char test(SameType*); @@ -243,7 +252,7 @@ struct has_ScalarEnumerationTraits template struct has_ScalarBitSetTraits { - typedef void (*Signature_bitset)(class IO&, T&); + using Signature_bitset = void (*)(class IO&, T&); template static char test(SameType*); @@ -259,9 +268,9 @@ struct has_ScalarBitSetTraits template struct has_ScalarTraits { - typedef StringRef (*Signature_input)(StringRef, void*, T&); - typedef void (*Signature_output)(const T&, void*, llvm::raw_ostream&); - typedef bool (*Signature_mustQuote)(StringRef); + using Signature_input = StringRef (*)(StringRef, void*, T&); + using Signature_output = void (*)(const T&, void*, raw_ostream&); + using Signature_mustQuote = bool (*)(StringRef); template static char test(SameType *, @@ -280,8 +289,8 @@ struct has_ScalarTraits template struct has_BlockScalarTraits { - typedef StringRef (*Signature_input)(StringRef, void *, T &); - typedef void (*Signature_output)(const T &, void *, llvm::raw_ostream &); + using Signature_input = StringRef (*)(StringRef, void *, T &); + using Signature_output = void (*)(const T &, void *, raw_ostream &); template static char test(SameType *, @@ -297,7 +306,7 @@ struct has_BlockScalarTraits // Test if MappingContextTraits is defined on type T. template struct has_MappingTraits { - typedef void (*Signature_mapping)(class IO &, T &, Context &); + using Signature_mapping = void (*)(class IO &, T &, Context &); template static char test(SameType*); @@ -312,7 +321,7 @@ template struct has_MappingTraits { // Test if MappingTraits is defined on type T. template struct has_MappingTraits { - typedef void (*Signature_mapping)(class IO &, T &); + using Signature_mapping = void (*)(class IO &, T &); template static char test(SameType *); @@ -325,7 +334,7 @@ template struct has_MappingTraits { // Test if MappingContextTraits::validate() is defined on type T. template struct has_MappingValidateTraits { - typedef StringRef (*Signature_validate)(class IO &, T &, Context &); + using Signature_validate = StringRef (*)(class IO &, T &, Context &); template static char test(SameType*); @@ -340,7 +349,7 @@ template struct has_MappingValidateTraits { // Test if MappingTraits::validate() is defined on type T. template struct has_MappingValidateTraits { - typedef StringRef (*Signature_validate)(class IO &, T &); + using Signature_validate = StringRef (*)(class IO &, T &); template static char test(SameType *); @@ -355,7 +364,7 @@ template struct has_MappingValidateTraits { template struct has_SequenceMethodTraits { - typedef size_t (*Signature_size)(class IO&, T&); + using Signature_size = size_t (*)(class IO&, T&); template static char test(SameType*); @@ -371,7 +380,7 @@ struct has_SequenceMethodTraits template struct has_CustomMappingTraits { - typedef void (*Signature_input)(IO &io, StringRef key, T &v); + using Signature_input = void (*)(IO &io, StringRef key, T &v); template static char test(SameType*); @@ -422,7 +431,7 @@ struct has_SequenceTraits : public std::integral_constant struct has_DocumentListTraits { - typedef size_t (*Signature_size)(class IO&, T&); + using Signature_size = size_t (*)(class IO &, T &); template static char test(SameType*); @@ -537,7 +546,7 @@ struct unvalidatedMappingTraits // Base class for Input and Output. class IO { public: - IO(void *Ctxt=nullptr); + IO(void *Ctxt = nullptr); virtual ~IO(); virtual bool outputting() = 0; @@ -638,6 +647,7 @@ class IO { EmptyContext Ctx; this->processKey(Key, Val, true, Ctx); } + template void mapRequired(const char *Key, T &Val, Context &Ctx) { this->processKey(Key, Val, true, Ctx); @@ -773,7 +783,7 @@ typename std::enable_if::value, void>::type yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) { if ( io.outputting() ) { std::string Storage; - llvm::raw_string_ostream Buffer(Storage); + raw_string_ostream Buffer(Storage); ScalarTraits::output(Val, io.getContext(), Buffer); StringRef Str = Buffer.str(); io.scalarString(Str, ScalarTraits::mustQuote(Str)); @@ -783,7 +793,7 @@ yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) { io.scalarString(Str, ScalarTraits::mustQuote(Str)); StringRef Result = ScalarTraits::input(Str, io.getContext(), Val); if ( !Result.empty() ) { - io.setError(llvm::Twine(Result)); + io.setError(Twine(Result)); } } } @@ -793,7 +803,7 @@ typename std::enable_if::value, void>::type yamlize(IO &YamlIO, T &Val, bool, EmptyContext &Ctx) { if (YamlIO.outputting()) { std::string Storage; - llvm::raw_string_ostream Buffer(Storage); + raw_string_ostream Buffer(Storage); BlockScalarTraits::output(Val, YamlIO.getContext(), Buffer); StringRef Str = Buffer.str(); YamlIO.blockScalarString(Str); @@ -803,7 +813,7 @@ yamlize(IO &YamlIO, T &Val, bool, EmptyContext &Ctx) { StringRef Result = BlockScalarTraits::input(Str, YamlIO.getContext(), Val); if (!Result.empty()) - YamlIO.setError(llvm::Twine(Result)); + YamlIO.setError(Twine(Result)); } } @@ -817,7 +827,7 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) { if (io.outputting()) { StringRef Err = MappingTraits::validate(io, Val); if (!Err.empty()) { - llvm::errs() << Err << "\n"; + errs() << Err << "\n"; assert(Err.empty() && "invalid struct trying to be written as yaml"); } } @@ -871,7 +881,7 @@ yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) { template typename std::enable_if::value, void>::type yamlize(IO &io, T &Seq, bool, Context &Ctx) { - if ( has_FlowTraits< SequenceTraits >::value ) { + if ( has_FlowTraits< SequenceTraits>::value ) { unsigned incnt = io.beginFlowSequence(); unsigned count = io.outputting() ? SequenceTraits::size(io, Seq) : incnt; for(unsigned i=0; i < count; ++i) { @@ -899,92 +909,92 @@ yamlize(IO &io, T &Seq, bool, Context &Ctx) { template<> struct ScalarTraits { - static void output(const bool &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, bool &); + static void output(const bool &, void* , raw_ostream &); + static StringRef input(StringRef, void *, bool &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const StringRef &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, StringRef &); + static void output(const StringRef &, void *, raw_ostream &); + static StringRef input(StringRef, void *, StringRef &); static bool mustQuote(StringRef S) { return needsQuotes(S); } }; template<> struct ScalarTraits { - static void output(const std::string &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, std::string &); + static void output(const std::string &, void *, raw_ostream &); + static StringRef input(StringRef, void *, std::string &); static bool mustQuote(StringRef S) { return needsQuotes(S); } }; template<> struct ScalarTraits { - static void output(const uint8_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, uint8_t &); + static void output(const uint8_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, uint8_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const uint16_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, uint16_t &); + static void output(const uint16_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, uint16_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const uint32_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, uint32_t &); + static void output(const uint32_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, uint32_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const uint64_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, uint64_t &); + static void output(const uint64_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, uint64_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const int8_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, int8_t &); + static void output(const int8_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, int8_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const int16_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, int16_t &); + static void output(const int16_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, int16_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const int32_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, int32_t &); + static void output(const int32_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, int32_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const int64_t &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, int64_t &); + static void output(const int64_t &, void *, raw_ostream &); + static StringRef input(StringRef, void *, int64_t &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const float &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, float &); + static void output(const float &, void *, raw_ostream &); + static StringRef input(StringRef, void *, float &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const double &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, double &); + static void output(const double &, void *, raw_ostream &); + static StringRef input(StringRef, void *, double &); static bool mustQuote(StringRef) { return false; } }; @@ -994,12 +1004,11 @@ struct ScalarTraits { template struct ScalarTraits> { - typedef support::detail::packed_endian_specific_integral - endian_type; + using endian_type = + support::detail::packed_endian_specific_integral; - static void output(const endian_type &E, void *Ctx, - llvm::raw_ostream &Stream) { + static void output(const endian_type &E, void *Ctx, raw_ostream &Stream) { ScalarTraits::output(static_cast(E), Ctx, Stream); } @@ -1039,7 +1048,7 @@ struct MappingNormalization { TNorm* operator->() { return BufPtr; } private: - typedef llvm::AlignedCharArrayUnion Storage; + using Storage = AlignedCharArrayUnion; Storage Buffer; IO &io; @@ -1051,9 +1060,8 @@ struct MappingNormalization { // to [de]normalize an object for use with YAML conversion. template struct MappingNormalizationHeap { - MappingNormalizationHeap(IO &i_o, TFinal &Obj, - llvm::BumpPtrAllocator *allocator) - : io(i_o), BufPtr(nullptr), Result(Obj) { + MappingNormalizationHeap(IO &i_o, TFinal &Obj, BumpPtrAllocator *allocator) + : io(i_o), Result(Obj) { if ( io.outputting() ) { BufPtr = new (&Buffer) TNorm(io, Obj); } @@ -1077,11 +1085,11 @@ struct MappingNormalizationHeap { TNorm* operator->() { return BufPtr; } private: - typedef llvm::AlignedCharArrayUnion Storage; + using Storage = AlignedCharArrayUnion; Storage Buffer; IO &io; - TNorm *BufPtr; + TNorm *BufPtr = nullptr; TFinal &Result; }; @@ -1106,6 +1114,10 @@ class Input : public IO { void *Ctxt = nullptr, SourceMgr::DiagHandlerTy DiagHandler = nullptr, void *DiagHandlerCtxt = nullptr); + Input(MemoryBufferRef Input, + void *Ctxt = nullptr, + SourceMgr::DiagHandlerTy DiagHandler = nullptr, + void *DiagHandlerCtxt = nullptr); ~Input() override; // Check if there was an syntax or semantic error during parsing. @@ -1148,7 +1160,7 @@ class Input : public IO { HNode(Node *n) : _node(n) { } virtual ~HNode() = default; - static inline bool classof(const HNode *) { return true; } + static bool classof(const HNode *) { return true; } Node *_node; }; @@ -1159,11 +1171,9 @@ class Input : public IO { public: EmptyHNode(Node *n) : HNode(n) { } - static inline bool classof(const HNode *n) { - return NullNode::classof(n->_node); - } + static bool classof(const HNode *n) { return NullNode::classof(n->_node); } - static inline bool classof(const EmptyHNode *) { return true; } + static bool classof(const EmptyHNode *) { return true; } }; class ScalarHNode : public HNode { @@ -1174,12 +1184,12 @@ class Input : public IO { StringRef value() const { return _value; } - static inline bool classof(const HNode *n) { + static bool classof(const HNode *n) { return ScalarNode::classof(n->_node) || BlockScalarNode::classof(n->_node); } - static inline bool classof(const ScalarHNode *) { return true; } + static bool classof(const ScalarHNode *) { return true; } protected: StringRef _value; @@ -1191,16 +1201,16 @@ class Input : public IO { public: MapHNode(Node *n) : HNode(n) { } - static inline bool classof(const HNode *n) { + static bool classof(const HNode *n) { return MappingNode::classof(n->_node); } - static inline bool classof(const MapHNode *) { return true; } + static bool classof(const MapHNode *) { return true; } - typedef llvm::StringMap> NameToNode; + using NameToNode = StringMap>; - NameToNode Mapping; - llvm::SmallVector ValidKeys; + NameToNode Mapping; + SmallVector ValidKeys; }; class SequenceHNode : public HNode { @@ -1209,11 +1219,11 @@ class Input : public IO { public: SequenceHNode(Node *n) : HNode(n) { } - static inline bool classof(const HNode *n) { + static bool classof(const HNode *n) { return SequenceNode::classof(n->_node); } - static inline bool classof(const SequenceHNode *) { return true; } + static bool classof(const SequenceHNode *) { return true; } std::vector> Entries; }; @@ -1232,14 +1242,14 @@ class Input : public IO { const Node *getCurrentNode() const; private: - llvm::SourceMgr SrcMgr; // must be before Strm + SourceMgr SrcMgr; // must be before Strm std::unique_ptr Strm; std::unique_ptr TopNode; std::error_code EC; - llvm::BumpPtrAllocator StringAllocator; - llvm::yaml::document_iterator DocIterator; + BumpPtrAllocator StringAllocator; + document_iterator DocIterator; std::vector BitValuesUsed; - HNode *CurrentNode; + HNode *CurrentNode = nullptr; bool ScalarMatchFound; }; @@ -1249,7 +1259,7 @@ class Input : public IO { /// class Output : public IO { public: - Output(llvm::raw_ostream &, void *Ctxt = nullptr, int WrapColumn = 70); + Output(raw_ostream &, void *Ctxt = nullptr, int WrapColumn = 70); ~Output() override; /// \brief Set whether or not to output optional values which are equal @@ -1312,17 +1322,17 @@ class Output : public IO { inFlowMapOtherKey }; - llvm::raw_ostream &Out; - int WrapColumn; - SmallVector StateStack; - int Column; - int ColumnAtFlowStart; - int ColumnAtMapFlowStart; - bool NeedBitValueComma; - bool NeedFlowSequenceComma; - bool EnumerationMatchFound; - bool NeedsNewLine; - bool WriteDefaultValues; + raw_ostream &Out; + int WrapColumn; + SmallVector StateStack; + int Column = 0; + int ColumnAtFlowStart = 0; + int ColumnAtMapFlowStart = 0; + bool NeedBitValueComma = false; + bool NeedFlowSequenceComma = false; + bool EnumerationMatchFound = false; + bool NeedsNewLine = false; + bool WriteDefaultValues = false; }; /// YAML I/O does conversion based on types. But often native data types @@ -1345,7 +1355,7 @@ class Output : public IO { bool operator==(const _base &rhs) const { return value == rhs; } \ bool operator<(const _type &rhs) const { return value < rhs.value; } \ _base value; \ - typedef _base BaseType; \ + using BaseType = _base; \ }; /// @@ -1359,29 +1369,29 @@ LLVM_YAML_STRONG_TYPEDEF(uint64_t, Hex64) template<> struct ScalarTraits { - static void output(const Hex8 &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, Hex8 &); + static void output(const Hex8 &, void *, raw_ostream &); + static StringRef input(StringRef, void *, Hex8 &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const Hex16 &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, Hex16 &); + static void output(const Hex16 &, void *, raw_ostream &); + static StringRef input(StringRef, void *, Hex16 &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const Hex32 &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, Hex32 &); + static void output(const Hex32 &, void *, raw_ostream &); + static StringRef input(StringRef, void *, Hex32 &); static bool mustQuote(StringRef) { return false; } }; template<> struct ScalarTraits { - static void output(const Hex64 &, void*, llvm::raw_ostream &); - static StringRef input(StringRef, void*, Hex64 &); + static void output(const Hex64 &, void *, raw_ostream &); + static StringRef input(StringRef, void *, Hex64 &); static bool mustQuote(StringRef) { return false; } }; @@ -1544,22 +1554,67 @@ operator<<(Output &yout, T &seq) { return yout; } -template struct SequenceTraitsImpl { - typedef typename T::value_type _type; +template struct IsFlowSequenceBase {}; +template <> struct IsFlowSequenceBase { static const bool flow = true; }; + +template +struct SequenceTraitsImpl : IsFlowSequenceBase { +private: + using type = typename T::value_type; + +public: static size_t size(IO &io, T &seq) { return seq.size(); } - static _type &element(IO &io, T &seq, size_t index) { + + static type &element(IO &io, T &seq, size_t index) { if (index >= seq.size()) seq.resize(index + 1); return seq[index]; } }; +// Simple helper to check an expression can be used as a bool-valued template +// argument. +template struct CheckIsBool { static const bool value = true; }; + +// If T has SequenceElementTraits, then vector and SmallVector have +// SequenceTraits that do the obvious thing. +template +struct SequenceTraits, + typename std::enable_if::flow>::value>::type> + : SequenceTraitsImpl, SequenceElementTraits::flow> {}; +template +struct SequenceTraits, + typename std::enable_if::flow>::value>::type> + : SequenceTraitsImpl, SequenceElementTraits::flow> {}; + +// Sequences of fundamental types use flow formatting. +template +struct SequenceElementTraits< + T, typename std::enable_if::value>::type> { + static const bool flow = true; +}; + +// Sequences of strings use block formatting. +template<> struct SequenceElementTraits { + static const bool flow = false; +}; +template<> struct SequenceElementTraits { + static const bool flow = false; +}; +template<> struct SequenceElementTraits> { + static const bool flow = false; +}; + /// Implementation of CustomMappingTraits for std::map. template struct StdMapStringCustomMappingTraitsImpl { - typedef std::map map_type; + using map_type = std::map; + static void inputOne(IO &io, StringRef key, map_type &v) { io.mapRequired(key.str().c_str(), v[key]); } + static void output(IO &io, map_type &v) { for (auto &p : v) io.mapRequired(p.first.c_str(), p.second); @@ -1569,39 +1624,64 @@ template struct StdMapStringCustomMappingTraitsImpl { } // end namespace yaml } // end namespace llvm -/// Utility for declaring that a std::vector of a particular type -/// should be considered a YAML sequence. -#define LLVM_YAML_IS_SEQUENCE_VECTOR(_type) \ +#define LLVM_YAML_IS_SEQUENCE_VECTOR_IMPL(TYPE, FLOW) \ namespace llvm { \ namespace yaml { \ - template <> \ - struct SequenceTraits> \ - : public SequenceTraitsImpl> {}; \ - template \ - struct SequenceTraits> \ - : public SequenceTraitsImpl> {}; \ + static_assert( \ + !std::is_fundamental::value && \ + !std::is_same::value && \ + !std::is_same::value, \ + "only use LLVM_YAML_IS_SEQUENCE_VECTOR for types you control"); \ + template <> struct SequenceElementTraits { \ + static const bool flow = FLOW; \ + }; \ } \ } +/// Utility for declaring that a std::vector of a particular type +/// should be considered a YAML sequence. +#define LLVM_YAML_IS_SEQUENCE_VECTOR(type) \ + LLVM_YAML_IS_SEQUENCE_VECTOR_IMPL(type, false) + /// Utility for declaring that a std::vector of a particular type /// should be considered a YAML flow sequence. -/// We need to do a partial specialization on the vector version, not a full. -/// If this is a full specialization, the compiler is a bit too "smart" and -/// decides to warn on -Wunused-const-variable. This workaround can be -/// removed and we can do a full specialization on std::vector once -/// PR28878 is fixed. -#define LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(_type) \ +#define LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(type) \ + LLVM_YAML_IS_SEQUENCE_VECTOR_IMPL(type, true) + +#define LLVM_YAML_DECLARE_MAPPING_TRAITS(Type) \ namespace llvm { \ namespace yaml { \ - template \ - struct SequenceTraits> \ - : public SequenceTraitsImpl> { \ - static const bool flow = true; \ + template <> struct MappingTraits { \ + static void mapping(IO &IO, Type &Obj); \ + }; \ + } \ + } + +#define LLVM_YAML_DECLARE_ENUM_TRAITS(Type) \ + namespace llvm { \ + namespace yaml { \ + template <> struct ScalarEnumerationTraits { \ + static void enumeration(IO &io, Type &Value); \ }; \ - template \ - struct SequenceTraits> \ - : public SequenceTraitsImpl> { \ - static const bool flow = true; \ + } \ + } + +#define LLVM_YAML_DECLARE_BITSET_TRAITS(Type) \ + namespace llvm { \ + namespace yaml { \ + template <> struct ScalarBitSetTraits { \ + static void bitset(IO &IO, Type &Options); \ + }; \ + } \ + } + +#define LLVM_YAML_DECLARE_SCALAR_TRAITS(Type, MustQuote) \ + namespace llvm { \ + namespace yaml { \ + template <> struct ScalarTraits { \ + static void output(const Type &Value, void *ctx, raw_ostream &Out); \ + static StringRef input(StringRef Scalar, void *ctxt, Type &Value); \ + static bool mustQuote(StringRef) { return MustQuote; } \ }; \ } \ } @@ -1613,10 +1693,10 @@ template struct StdMapStringCustomMappingTraitsImpl { namespace yaml { \ template \ struct DocumentListTraits> \ - : public SequenceTraitsImpl> {}; \ + : public SequenceTraitsImpl, false> {}; \ template <> \ struct DocumentListTraits> \ - : public SequenceTraitsImpl> {}; \ + : public SequenceTraitsImpl, false> {}; \ } \ } diff --git a/interpreter/llvm/src/include/llvm/Support/raw_sha1_ostream.h b/interpreter/llvm/src/include/llvm/Support/raw_sha1_ostream.h index 329ef9fd069bc..bd55d98b7c1d6 100644 --- a/interpreter/llvm/src/include/llvm/Support/raw_sha1_ostream.h +++ b/interpreter/llvm/src/include/llvm/Support/raw_sha1_ostream.h @@ -14,9 +14,9 @@ #ifndef LLVM_SUPPORT_RAW_SHA1_OSTREAM_H #define LLVM_SUPPORT_RAW_SHA1_OSTREAM_H -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/SHA1.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Support/type_traits.h b/interpreter/llvm/src/include/llvm/Support/type_traits.h index ce4bbf8cb2cc5..cc08783588009 100644 --- a/interpreter/llvm/src/include/llvm/Support/type_traits.h +++ b/interpreter/llvm/src/include/llvm/Support/type_traits.h @@ -14,11 +14,10 @@ #ifndef LLVM_SUPPORT_TYPE_TRAITS_H #define LLVM_SUPPORT_TYPE_TRAITS_H +#include "llvm/Support/Compiler.h" #include #include -#include "llvm/Support/Compiler.h" - #ifndef __has_feature #define LLVM_DEFINED_HAS_FEATURE #define __has_feature(x) 0 @@ -51,7 +50,7 @@ struct isPodLike { // std::pair's are pod-like if their elements are. template -struct isPodLike > { +struct isPodLike> { static const bool value = isPodLike::value && isPodLike::value; }; @@ -63,7 +62,7 @@ struct isPodLike > { /// Also note that enum classes aren't implicitly convertible to integral types, /// the value may therefore need to be explicitly converted before being used. template class is_integral_or_enum { - typedef typename std::remove_reference::type UnderlyingT; + using UnderlyingT = typename std::remove_reference::type; public: static const bool value = @@ -76,23 +75,23 @@ template class is_integral_or_enum { /// \brief If T is a pointer, just return it. If it is not, return T&. template -struct add_lvalue_reference_if_not_pointer { typedef T &type; }; +struct add_lvalue_reference_if_not_pointer { using type = T &; }; template struct add_lvalue_reference_if_not_pointer< T, typename std::enable_if::value>::type> { - typedef T type; + using type = T; }; /// \brief If T is a pointer to X, return a pointer to const X. If it is not, /// return const T. template -struct add_const_past_pointer { typedef const T type; }; +struct add_const_past_pointer { using type = const T; }; template struct add_const_past_pointer< T, typename std::enable_if::value>::type> { - typedef const typename std::remove_pointer::type *type; + using type = const typename std::remove_pointer::type *; }; template @@ -104,7 +103,8 @@ struct const_pointer_or_const_ref< T, typename std::enable_if::value>::type> { using type = typename add_const_past_pointer::type; }; -} + +} // end namespace llvm // If the compiler supports detecting whether a class is final, define // an LLVM_IS_FINAL macro. If it cannot be defined properly, this @@ -119,4 +119,4 @@ struct const_pointer_or_const_ref< #undef __has_feature #endif -#endif +#endif // LLVM_SUPPORT_TYPE_TRAITS_H diff --git a/interpreter/llvm/src/include/llvm/TableGen/Main.h b/interpreter/llvm/src/include/llvm/TableGen/Main.h index 866b9868deb55..670572dc81034 100644 --- a/interpreter/llvm/src/include/llvm/TableGen/Main.h +++ b/interpreter/llvm/src/include/llvm/TableGen/Main.h @@ -16,13 +16,15 @@ namespace llvm { -class RecordKeeper; class raw_ostream; -/// \brief Perform the action using Records, and write output to OS. -/// \returns true on error, false otherwise -typedef bool TableGenMainFn(raw_ostream &OS, RecordKeeper &Records); +class RecordKeeper; + +/// Perform the action using Records, and write output to OS. +/// Returns true on error, false otherwise. +using TableGenMainFn = bool (raw_ostream &OS, RecordKeeper &Records); int TableGenMain(char *argv0, TableGenMainFn *MainFn); -} -#endif +} // end namespace llvm + +#endif // LLVM_TABLEGEN_MAIN_H diff --git a/interpreter/llvm/src/include/llvm/TableGen/Record.h b/interpreter/llvm/src/include/llvm/TableGen/Record.h index fef5bf3045666..fa9ca285bcde9 100644 --- a/interpreter/llvm/src/include/llvm/TableGen/Record.h +++ b/interpreter/llvm/src/include/llvm/TableGen/Record.h @@ -38,11 +38,11 @@ namespace llvm { class ListRecTy; +struct MultiClass; class Record; class RecordKeeper; class RecordVal; class StringInit; -struct MultiClass; //===----------------------------------------------------------------------===// // Type Classes @@ -90,7 +90,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) { } /// 'bit' - Represent a single bit -/// class BitRecTy : public RecTy { static BitRecTy Shared; @@ -109,7 +108,6 @@ class BitRecTy : public RecTy { }; /// 'bits' - Represent a fixed number of bits -/// class BitsRecTy : public RecTy { unsigned Size; @@ -130,7 +128,6 @@ class BitsRecTy : public RecTy { }; /// 'code' - Represent a code fragment -/// class CodeRecTy : public RecTy { static CodeRecTy Shared; @@ -147,7 +144,6 @@ class CodeRecTy : public RecTy { }; /// 'int' - Represent an integer value of no particular size -/// class IntRecTy : public RecTy { static IntRecTy Shared; @@ -166,7 +162,6 @@ class IntRecTy : public RecTy { }; /// 'string' - Represent an string value -/// class StringRecTy : public RecTy { static StringRecTy Shared; @@ -185,14 +180,13 @@ class StringRecTy : public RecTy { /// 'list' - Represent a list of values, all of which must be of /// the specified type. -/// class ListRecTy : public RecTy { + friend ListRecTy *RecTy::getListTy(); + RecTy *Ty; explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), Ty(T) {} - friend ListRecTy *RecTy::getListTy(); - public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == ListRecTyKind; @@ -207,7 +201,6 @@ class ListRecTy : public RecTy { }; /// 'dag' - Represent a dag fragment -/// class DagRecTy : public RecTy { static DagRecTy Shared; @@ -225,14 +218,13 @@ class DagRecTy : public RecTy { /// '[classname]' - Represent an instance of a class, such as: /// (R32 X = EAX). -/// class RecordRecTy : public RecTy { + friend class Record; + Record *Rec; explicit RecordRecTy(Record *R) : RecTy(RecordRecTyKind), Rec(R) {} - friend class Record; - public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == RecordRecTyKind; @@ -249,7 +241,6 @@ class RecordRecTy : public RecTy { /// Find a common type that T1 and T2 convert to. /// Return 0 if no such type exists. -/// RecTy *resolveTypes(RecTy *T1, RecTy *T2); //===----------------------------------------------------------------------===// @@ -341,7 +332,6 @@ class Init { /// selection operator. Given an initializer, it selects the specified bits /// out, returning them as a new init of bits type. If it is not legal to use /// the bit subscript operator on this initializer, return null. - /// virtual Init *convertInitializerBitRange(ArrayRef Bits) const { return nullptr; } @@ -350,7 +340,6 @@ class Init { /// selection operator. Given an initializer, it selects the specified list /// elements, returning them as a new init of list type. If it is not legal /// to take a slice of this, return null. - /// virtual Init *convertInitListSlice(ArrayRef Elements) const { return nullptr; } @@ -358,7 +347,6 @@ class Init { /// This method is used to implement the FieldInit class. /// Implementors of this method should return the type of the named field if /// they are of record type. - /// virtual RecTy *getFieldType(StringInit *FieldName) const { return nullptr; } @@ -366,7 +354,6 @@ class Init { /// This method complements getFieldType to return the /// initializer for the specified field. If getFieldType returns non-null /// this method should return non-null, otherwise it returns null. - /// virtual Init *getFieldInit(Record &R, const RecordVal *RV, StringInit *FieldName) const { return nullptr; @@ -376,7 +363,6 @@ class Init { /// variables which may not be defined at the time the expression is formed. /// If a value is set for the variable later, this method will be called on /// users of the value to allow the value to propagate out. - /// virtual Init *resolveReferences(Record &R, const RecordVal *RV) const { return const_cast(this); } @@ -400,7 +386,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) { /// This is the common super-class of types that have a specific, /// explicit, type. -/// class TypedInit : public Init { RecTy *Ty; @@ -409,8 +394,8 @@ class TypedInit : public Init { : Init(K, Opc), Ty(T) {} public: - TypedInit(const TypedInit &Other) = delete; - TypedInit &operator=(const TypedInit &Other) = delete; + TypedInit(const TypedInit &) = delete; + TypedInit &operator=(const TypedInit &) = delete; static bool classof(const Init *I) { return I->getKind() >= IK_FirstTypedInit && @@ -438,13 +423,12 @@ class TypedInit : public Init { }; /// '?' - Represents an uninitialized value -/// class UnsetInit : public Init { UnsetInit() : Init(IK_UnsetInit) {} public: UnsetInit(const UnsetInit &) = delete; - UnsetInit &operator=(const UnsetInit &Other) = delete; + UnsetInit &operator=(const UnsetInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_UnsetInit; @@ -463,15 +447,14 @@ class UnsetInit : public Init { }; /// 'true'/'false' - Represent a concrete initializer for a bit. -/// class BitInit : public Init { bool Value; explicit BitInit(bool V) : Init(IK_BitInit), Value(V) {} public: - BitInit(const BitInit &Other) = delete; - BitInit &operator=(BitInit &Other) = delete; + BitInit(const BitInit &) = delete; + BitInit &operator=(BitInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_BitInit; @@ -493,7 +476,6 @@ class BitInit : public Init { /// '{ a, b, c }' - Represents an initializer for a BitsRecTy value. /// It contains a vector of bits, whose size is determined by the type. -/// class BitsInit final : public TypedInit, public FoldingSetNode, public TrailingObjects { unsigned NumBits; @@ -502,8 +484,8 @@ class BitsInit final : public TypedInit, public FoldingSetNode, : TypedInit(IK_BitsInit, BitsRecTy::get(N)), NumBits(N) {} public: - BitsInit(const BitsInit &Other) = delete; - BitsInit &operator=(const BitsInit &Other) = delete; + BitsInit(const BitsInit &) = delete; + BitsInit &operator=(const BitsInit &) = delete; // Do not use sized deallocation due to trailing objects. void operator delete(void *p) { ::operator delete(p); } @@ -552,7 +534,6 @@ class BitsInit final : public TypedInit, public FoldingSetNode, }; /// '7' - Represent an initialization by a literal integer value. -/// class IntInit : public TypedInit { int64_t Value; @@ -560,8 +541,8 @@ class IntInit : public TypedInit { : TypedInit(IK_IntInit, IntRecTy::get()), Value(V) {} public: - IntInit(const IntInit &Other) = delete; - IntInit &operator=(const IntInit &Other) = delete; + IntInit(const IntInit &) = delete; + IntInit &operator=(const IntInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_IntInit; @@ -590,7 +571,6 @@ class IntInit : public TypedInit { }; /// "foo" - Represent an initialization by a string value. -/// class StringInit : public TypedInit { StringRef Value; @@ -598,8 +578,8 @@ class StringInit : public TypedInit { : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {} public: - StringInit(const StringInit &Other) = delete; - StringInit &operator=(const StringInit &Other) = delete; + StringInit(const StringInit &) = delete; + StringInit &operator=(const StringInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_StringInit; @@ -636,8 +616,8 @@ class CodeInit : public TypedInit { Value(V) {} public: - CodeInit(const StringInit &Other) = delete; - CodeInit &operator=(const StringInit &Other) = delete; + CodeInit(const StringInit &) = delete; + CodeInit &operator=(const StringInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_CodeInit; @@ -671,19 +651,19 @@ class CodeInit : public TypedInit { /// [AL, AH, CL] - Represent a list of defs /// class ListInit final : public TypedInit, public FoldingSetNode, - public TrailingObjects { + public TrailingObjects { unsigned NumValues; public: - typedef Init *const *const_iterator; + using const_iterator = Init *const *; private: explicit ListInit(unsigned N, RecTy *EltTy) : TypedInit(IK_ListInit, ListRecTy::get(EltTy)), NumValues(N) {} public: - ListInit(const ListInit &Other) = delete; - ListInit &operator=(const ListInit &Other) = delete; + ListInit(const ListInit &) = delete; + ListInit &operator=(const ListInit &) = delete; // Do not use sized deallocation due to trailing objects. void operator delete(void *p) { ::operator delete(p); } @@ -744,8 +724,8 @@ class OpInit : public TypedInit { : TypedInit(K, Type, Opc) {} public: - OpInit(const OpInit &Other) = delete; - OpInit &operator=(OpInit &Other) = delete; + OpInit(const OpInit &) = delete; + OpInit &operator=(OpInit &) = delete; static bool classof(const Init *I) { return I->getKind() >= IK_FirstOpInit && @@ -781,8 +761,8 @@ class UnOpInit : public OpInit, public FoldingSetNode { : OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {} public: - UnOpInit(const UnOpInit &Other) = delete; - UnOpInit &operator=(const UnOpInit &Other) = delete; + UnOpInit(const UnOpInit &) = delete; + UnOpInit &operator=(const UnOpInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_UnOpInit; @@ -819,7 +799,6 @@ class UnOpInit : public OpInit, public FoldingSetNode { }; /// !op (X, Y) - Combine two inits. -/// class BinOpInit : public OpInit, public FoldingSetNode { public: enum BinaryOp : uint8_t { ADD, AND, OR, SHL, SRA, SRL, LISTCONCAT, @@ -832,8 +811,8 @@ class BinOpInit : public OpInit, public FoldingSetNode { OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {} public: - BinOpInit(const BinOpInit &Other) = delete; - BinOpInit &operator=(const BinOpInit &Other) = delete; + BinOpInit(const BinOpInit &) = delete; + BinOpInit &operator=(const BinOpInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_BinOpInit; @@ -874,7 +853,6 @@ class BinOpInit : public OpInit, public FoldingSetNode { }; /// !op (X, Y, Z) - Combine two inits. -/// class TernOpInit : public OpInit, public FoldingSetNode { public: enum TernaryOp : uint8_t { SUBST, FOREACH, IF }; @@ -887,8 +865,8 @@ class TernOpInit : public OpInit, public FoldingSetNode { OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {} public: - TernOpInit(const TernOpInit &Other) = delete; - TernOpInit &operator=(const TernOpInit &Other) = delete; + TernOpInit(const TernOpInit &) = delete; + TernOpInit &operator=(const TernOpInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_TernOpInit; @@ -935,7 +913,6 @@ class TernOpInit : public OpInit, public FoldingSetNode { }; /// 'Opcode' - Represent a reference to an entire variable object. -/// class VarInit : public TypedInit { Init *VarName; @@ -943,8 +920,8 @@ class VarInit : public TypedInit { : TypedInit(IK_VarInit, T), VarName(VN) {} public: - VarInit(const VarInit &Other) = delete; - VarInit &operator=(const VarInit &Other) = delete; + VarInit(const VarInit &) = delete; + VarInit &operator=(const VarInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_VarInit; @@ -980,7 +957,6 @@ class VarInit : public TypedInit { }; /// Opcode{0} - Represent access to one bit of a variable or field. -/// class VarBitInit : public Init { TypedInit *TI; unsigned Bit; @@ -994,8 +970,8 @@ class VarBitInit : public Init { } public: - VarBitInit(const VarBitInit &Other) = delete; - VarBitInit &operator=(const VarBitInit &Other) = delete; + VarBitInit(const VarBitInit &) = delete; + VarBitInit &operator=(const VarBitInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_VarBitInit; @@ -1032,8 +1008,8 @@ class VarListElementInit : public TypedInit { } public: - VarListElementInit(const VarListElementInit &Other) = delete; - void operator=(const VarListElementInit &Other) = delete; + VarListElementInit(const VarListElementInit &) = delete; + VarListElementInit &operator=(const VarListElementInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_VarListElementInit; @@ -1057,17 +1033,16 @@ class VarListElementInit : public TypedInit { }; /// AL - Represent a reference to a 'def' in the description -/// class DefInit : public TypedInit { + friend class Record; + Record *Def; DefInit(Record *D, RecordRecTy *T) : TypedInit(IK_DefInit, T), Def(D) {} - friend class Record; - public: - DefInit(const DefInit &Other) = delete; - DefInit &operator=(const DefInit &Other) = delete; + DefInit(const DefInit &) = delete; + DefInit &operator=(const DefInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_DefInit; @@ -1101,7 +1076,6 @@ class DefInit : public TypedInit { }; /// X.Y - Represent a reference to a subfield of a variable -/// class FieldInit : public TypedInit { Init *Rec; // Record we are referring to StringInit *FieldName; // Field we are accessing @@ -1112,8 +1086,8 @@ class FieldInit : public TypedInit { } public: - FieldInit(const FieldInit &Other) = delete; - FieldInit &operator=(const FieldInit &Other) = delete; + FieldInit(const FieldInit &) = delete; + FieldInit &operator=(const FieldInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_FieldInit; @@ -1136,22 +1110,24 @@ class FieldInit : public TypedInit { /// (v a, b) - Represent a DAG tree value. DAG inits are required /// to have at least one value then a (possibly empty) list of arguments. Each /// argument can have a name associated with it. -/// -class DagInit : public TypedInit, public FoldingSetNode { +class DagInit final : public TypedInit, public FoldingSetNode, + public TrailingObjects { + friend TrailingObjects; + Init *Val; StringInit *ValName; - SmallVector Args; - SmallVector ArgNames; + unsigned NumArgs; + unsigned NumArgNames; - DagInit(Init *V, StringInit *VN, ArrayRef ArgRange, - ArrayRef NameRange) + DagInit(Init *V, StringInit *VN, unsigned NumArgs, unsigned NumArgNames) : TypedInit(IK_DagInit, DagRecTy::get()), Val(V), ValName(VN), - Args(ArgRange.begin(), ArgRange.end()), - ArgNames(NameRange.begin(), NameRange.end()) {} + NumArgs(NumArgs), NumArgNames(NumArgNames) {} + + size_t numTrailingObjects(OverloadToken) const { return NumArgs; } public: - DagInit(const DagInit &Other) = delete; - DagInit &operator=(const DagInit &Other) = delete; + DagInit(const DagInit &) = delete; + DagInit &operator=(const DagInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_DagInit; @@ -1169,45 +1145,54 @@ class DagInit : public TypedInit, public FoldingSetNode { Init *getOperator() const { return Val; } StringInit *getName() const { return ValName; } + StringRef getNameStr() const { return ValName ? ValName->getValue() : StringRef(); } - unsigned getNumArgs() const { return Args.size(); } + unsigned getNumArgs() const { return NumArgs; } + Init *getArg(unsigned Num) const { - assert(Num < Args.size() && "Arg number out of range!"); - return Args[Num]; + assert(Num < NumArgs && "Arg number out of range!"); + return getTrailingObjects()[Num]; } + StringInit *getArgName(unsigned Num) const { - assert(Num < ArgNames.size() && "Arg number out of range!"); - return ArgNames[Num]; + assert(Num < NumArgNames && "Arg number out of range!"); + return getTrailingObjects()[Num]; } + StringRef getArgNameStr(unsigned Num) const { StringInit *Init = getArgName(Num); return Init ? Init->getValue() : StringRef(); } + ArrayRef getArgs() const { + return makeArrayRef(getTrailingObjects(), NumArgs); + } + + ArrayRef getArgNames() const { + return makeArrayRef(getTrailingObjects(), NumArgNames); + } + Init *resolveReferences(Record &R, const RecordVal *RV) const override; std::string getAsString() const override; - typedef SmallVectorImpl::const_iterator const_arg_iterator; - typedef SmallVectorImpl::const_iterator const_name_iterator; + using const_arg_iterator = SmallVectorImpl::const_iterator; + using const_name_iterator = SmallVectorImpl::const_iterator; - inline const_arg_iterator arg_begin() const { return Args.begin(); } - inline const_arg_iterator arg_end () const { return Args.end(); } - inline iterator_range args() const { - return llvm::make_range(arg_begin(), arg_end()); - } + inline const_arg_iterator arg_begin() const { return getArgs().begin(); } + inline const_arg_iterator arg_end () const { return getArgs().end(); } - inline size_t arg_size () const { return Args.size(); } - inline bool arg_empty() const { return Args.empty(); } + inline size_t arg_size () const { return NumArgs; } + inline bool arg_empty() const { return NumArgs == 0; } - inline const_name_iterator name_begin() const { return ArgNames.begin(); } - inline const_name_iterator name_end () const { return ArgNames.end(); } + inline const_name_iterator name_begin() const { return getArgNames().begin();} + inline const_name_iterator name_end () const { return getArgNames().end(); } - inline size_t name_size () const { return ArgNames.size(); } - inline bool name_empty() const { return ArgNames.empty(); } + inline size_t name_size () const { return NumArgNames; } + inline bool name_empty() const { return NumArgNames == 0; } Init *getBit(unsigned Bit) const override { llvm_unreachable("Illegal bit reference off dag"); @@ -1225,13 +1210,13 @@ class DagInit : public TypedInit, public FoldingSetNode { class RecordVal { friend class Record; + Init *Name; PointerIntPair TyAndPrefix; Init *Value; public: RecordVal(Init *N, RecTy *T, bool P); - RecordVal(StringRef N, RecTy *T, bool P); StringRef getName() const; Init *getNameInit() const { return Name; } @@ -1293,7 +1278,7 @@ class Record { // definitions that use them (e.g. Def). However, inside a multiclass they // can't be immediately resolved so we mark them ResolveFirst to fully // resolve them later as soon as the multiclass is instantiated. - bool ResolveFirst; + bool ResolveFirst = false; void init(); void checkName(); @@ -1303,7 +1288,7 @@ class Record { explicit Record(Init *N, ArrayRef locs, RecordKeeper &records, bool Anonymous = false) : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records), - ID(LastID++), IsAnonymous(Anonymous), ResolveFirst(false) { + ID(LastID++), IsAnonymous(Anonymous) { init(); } @@ -1325,6 +1310,7 @@ class Record { unsigned getID() const { return ID; } StringRef getName() const; + Init *getNameInit() const { return Name; } @@ -1334,7 +1320,6 @@ class Record { } void setName(Init *Name); // Also updates RecordKeeper. - void setName(StringRef Name); // Also updates RecordKeeper. ArrayRef getLoc() const { return Locs; } @@ -1357,10 +1342,6 @@ class Record { return false; } - bool isTemplateArg(StringRef Name) const { - return isTemplateArg(StringInit::get(Name)); - } - const RecordVal *getValue(const Init *Name) const { for (const RecordVal &Val : Values) if (Val.Name == Name) return &Val; @@ -1372,13 +1353,11 @@ class Record { } RecordVal *getValue(const Init *Name) { - for (RecordVal &Val : Values) - if (Val.Name == Name) return &Val; - return nullptr; + return const_cast(static_cast(this)->getValue(Name)); } RecordVal *getValue(StringRef Name) { - return getValue(StringInit::get(Name)); + return const_cast(static_cast(this)->getValue(Name)); } void addTemplateArg(Init *Name) { @@ -1386,10 +1365,6 @@ class Record { TemplateArgs.push_back(Name); } - void addTemplateArg(StringRef Name) { - addTemplateArg(StringInit::get(Name)); - } - void addValue(const RecordVal &RV) { assert(getValue(RV.getNameInit()) == nullptr && "Value already added!"); Values.push_back(RV); @@ -1441,7 +1416,6 @@ class Record { /// If there are any field references that refer to fields /// that have been filled in, we can propagate the values now. - /// void resolveReferences() { resolveReferencesTo(nullptr); } /// If anything in this record refers to RV, replace the @@ -1474,7 +1448,6 @@ class Record { /// Return the initializer for a value with the specified name, /// or throw an exception if the field does not exist. - /// Init *getValueInit(StringRef FieldName) const; /// Return true if the named field is unset. @@ -1485,67 +1458,56 @@ class Record { /// This method looks up the specified field and returns /// its value as a string, throwing an exception if the field does not exist /// or if the value is not a string. - /// - std::string getValueAsString(StringRef FieldName) const; + StringRef getValueAsString(StringRef FieldName) const; /// This method looks up the specified field and returns /// its value as a BitsInit, throwing an exception if the field does not exist /// or if the value is not the right type. - /// BitsInit *getValueAsBitsInit(StringRef FieldName) const; /// This method looks up the specified field and returns /// its value as a ListInit, throwing an exception if the field does not exist /// or if the value is not the right type. - /// ListInit *getValueAsListInit(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a vector of records, throwing an exception if the /// field does not exist or if the value is not the right type. - /// std::vector getValueAsListOfDefs(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a vector of integers, throwing an exception if the /// field does not exist or if the value is not the right type. - /// std::vector getValueAsListOfInts(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a vector of strings, throwing an exception if the /// field does not exist or if the value is not the right type. - /// - std::vector getValueAsListOfStrings(StringRef FieldName) const; + std::vector getValueAsListOfStrings(StringRef FieldName) const; /// This method looks up the specified field and returns its /// value as a Record, throwing an exception if the field does not exist or if /// the value is not the right type. - /// Record *getValueAsDef(StringRef FieldName) const; /// This method looks up the specified field and returns its /// value as a bit, throwing an exception if the field does not exist or if /// the value is not the right type. - /// bool getValueAsBit(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a bit. If the field is unset, sets Unset to true and /// returns false. - /// bool getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const; /// This method looks up the specified field and returns its /// value as an int64_t, throwing an exception if the field does not exist or /// if the value is not the right type. - /// int64_t getValueAsInt(StringRef FieldName) const; /// This method looks up the specified field and returns its /// value as an Dag, throwing an exception if the field does not exist or if /// the value is not the right type. - /// DagInit *getValueAsDag(StringRef FieldName) const; }; @@ -1553,7 +1515,7 @@ raw_ostream &operator<<(raw_ostream &OS, const Record &R); struct MultiClass { Record Rec; // Placeholder for template args and Name. - typedef std::vector> RecordVector; + using RecordVector = std::vector>; RecordVector DefPrototypes; void dump() const; @@ -1563,7 +1525,7 @@ struct MultiClass { }; class RecordKeeper { - typedef std::map> RecordMap; + using RecordMap = std::map>; RecordMap Classes, Defs; public: @@ -1606,7 +1568,6 @@ class RecordKeeper { }; /// Sorting predicate to sort record pointers by name. -/// struct LessRecord { bool operator()(const Record *Rec1, const Record *Rec2) const { return StringRef(Rec1->getName()).compare_numeric(Rec2->getName()) < 0; @@ -1625,7 +1586,6 @@ struct LessRecordByID { /// Sorting predicate to sort record pointers by their /// name field. -/// struct LessRecordFieldName { bool operator()(const Record *Rec1, const Record *Rec2) const { return Rec1->getValueAsString("Name") < Rec2->getValueAsString("Name"); diff --git a/interpreter/llvm/src/include/llvm/TableGen/SetTheory.h b/interpreter/llvm/src/include/llvm/TableGen/SetTheory.h index 818b0549b66a8..4b32f9e3da8fb 100644 --- a/interpreter/llvm/src/include/llvm/TableGen/SetTheory.h +++ b/interpreter/llvm/src/include/llvm/TableGen/SetTheory.h @@ -64,8 +64,8 @@ class Record; class SetTheory { public: - typedef std::vector RecVec; - typedef SmallSetVector RecSet; + using RecVec = std::vector; + using RecSet = SmallSetVector; /// Operator - A callback representing a DAG operator. class Operator { @@ -95,7 +95,7 @@ class SetTheory { private: // Map set defs to their fully expanded contents. This serves as a memoization // cache and it makes it possible to return const references on queries. - typedef std::map ExpandMap; + using ExpandMap = std::map; ExpandMap Expansions; // Known DAG operators by name. diff --git a/interpreter/llvm/src/include/llvm/TableGen/StringMatcher.h b/interpreter/llvm/src/include/llvm/TableGen/StringMatcher.h index 11a8ad8183aad..7c919ffec7b61 100644 --- a/interpreter/llvm/src/include/llvm/TableGen/StringMatcher.h +++ b/interpreter/llvm/src/include/llvm/TableGen/StringMatcher.h @@ -20,7 +20,8 @@ #include namespace llvm { - class raw_ostream; + +class raw_ostream; /// StringMatcher - Given a list of strings and code to execute when they match, /// output a simple switch tree to classify the input string. @@ -30,7 +31,7 @@ namespace llvm { /// class StringMatcher { public: - typedef std::pair StringPair; + using StringPair = std::pair; private: StringRef StrVariableName; @@ -49,6 +50,6 @@ class StringMatcher { unsigned CharNo, unsigned IndentCount) const; }; -} // end llvm namespace. +} // end namespace llvm -#endif +#endif // LLVM_TABLEGEN_STRINGMATCHER_H diff --git a/interpreter/llvm/src/include/llvm/Target/GenericOpcodes.td b/interpreter/llvm/src/include/llvm/Target/GenericOpcodes.td index de3796cd4ee56..e35bcb015d6a0 100644 --- a/interpreter/llvm/src/include/llvm/Target/GenericOpcodes.td +++ b/interpreter/llvm/src/include/llvm/Target/GenericOpcodes.td @@ -49,6 +49,12 @@ def G_TRUNC : Instruction { let hasSideEffects = 0; } +def G_IMPLICIT_DEF : Instruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins); + let hasSideEffects = 0; +} + def G_FRAME_INDEX : Instruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$src2); @@ -386,6 +392,15 @@ def G_FMUL : Instruction { let isCommutable = 1; } +// Generic fused multiply-add instruction. +// Behaves like llvm fma intrinsic ie src1 * src2 + src3 +def G_FMA : Instruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; + let isCommutable = 0; +} + // Generic FP division. def G_FDIV : Instruction { let OutOperandList = (outs type0:$dst); @@ -407,6 +422,34 @@ def G_FPOW : Instruction { let hasSideEffects = 0; } +// Floating point base-e exponential of a value. +def G_FEXP : Instruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1); + let hasSideEffects = 0; +} + +// Floating point base-2 exponential of a value. +def G_FEXP2 : Instruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1); + let hasSideEffects = 0; +} + +// Floating point base-2 logarithm of a value. +def G_FLOG : Instruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1); + let hasSideEffects = 0; +} + +// Floating point base-2 logarithm of a value. +def G_FLOG2 : Instruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1); + let hasSideEffects = 0; +} + //------------------------------------------------------------------------------ // Memory ops //------------------------------------------------------------------------------ @@ -456,15 +499,6 @@ def G_INSERT : Instruction { let hasSideEffects = 0; } -// Combine a sequence of generic vregs into a single larger value (starting at -// bit 0). Essentially a G_INSERT where $src is an IMPLICIT_DEF, but it's so -// important to legalization it probably deserves its own instruction. -def G_SEQUENCE : Instruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins variable_ops); - let hasSideEffects = 0; -} - def G_MERGE_VALUES : Instruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins variable_ops); diff --git a/interpreter/llvm/src/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/interpreter/llvm/src/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index a06c67fe814c8..50de41fd13208 100644 --- a/interpreter/llvm/src/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/interpreter/llvm/src/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -58,10 +58,14 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/interpreter/llvm/src/include/llvm/Target/Target.td b/interpreter/llvm/src/include/llvm/Target/Target.td index fc35b4527bc37..6f44292c47ed0 100644 --- a/interpreter/llvm/src/include/llvm/Target/Target.td +++ b/interpreter/llvm/src/include/llvm/Target/Target.td @@ -680,6 +680,11 @@ class RegisterOperand // this type. The method normally will just use an alt-name index to look // up the name to print. Default to the generic printOperand(). string PrintMethod = pm; + + // EncoderMethod - The target method name to call to encode this register + // operand. + string EncoderMethod = ""; + // ParserMatchClass - The "match class" that operands of this type fit // in. Match classes are used to define the order in which instructions are // match, to ensure that which instructions gets matched is deterministic. diff --git a/interpreter/llvm/src/include/llvm/Target/TargetInstrInfo.h b/interpreter/llvm/src/include/llvm/Target/TargetInstrInfo.h index 97a6f0c6e3ae0..1843a2eed9bff 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetInstrInfo.h +++ b/interpreter/llvm/src/include/llvm/Target/TargetInstrInfo.h @@ -1,4 +1,4 @@ -//===-- llvm/Target/TargetInstrInfo.h - Instruction Info --------*- C++ -*-===// +//===- llvm/Target/TargetInstrInfo.h - Instruction Info ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,36 +14,46 @@ #ifndef LLVM_TARGET_TARGETINSTRINFO_H #define LLVM_TARGET_TARGETINSTRINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/None.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include +#include namespace llvm { +class DFAPacketizer; class InstrItineraryData; class LiveVariables; -class MCAsmInfo; class MachineMemOperand; class MachineRegisterInfo; -class MDNode; +class MCAsmInfo; class MCInst; struct MCSchedModel; -class MCSymbolRefExpr; -class SDNode; +class Module; +class ScheduleDAG; class ScheduleHazardRecognizer; +class SDNode; class SelectionDAG; -class ScheduleDAG; +class RegScavenger; class TargetRegisterClass; class TargetRegisterInfo; -class TargetSubtargetInfo; class TargetSchedModel; -class DFAPacketizer; +class TargetSubtargetInfo; template class SmallVectorImpl; @@ -52,8 +62,6 @@ template class SmallVectorImpl; /// TargetInstrInfo - Interface to description of machine instruction set /// class TargetInstrInfo : public MCInstrInfo { - TargetInstrInfo(const TargetInstrInfo &) = delete; - void operator=(const TargetInstrInfo &) = delete; public: TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u, unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u) @@ -61,7 +69,8 @@ class TargetInstrInfo : public MCInstrInfo { CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode), ReturnOpcode(ReturnOpcode) {} - + TargetInstrInfo(const TargetInstrInfo &) = delete; + TargetInstrInfo &operator=(const TargetInstrInfo &) = delete; virtual ~TargetInstrInfo(); static bool isGenericOpcode(unsigned Opc) { @@ -396,14 +405,17 @@ class TargetInstrInfo : public MCInstrInfo { struct RegSubRegPair { unsigned Reg; unsigned SubReg; + RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0) : Reg(Reg), SubReg(SubReg) {} }; + /// A pair composed of a pair of a register and a sub-register index, /// and another sub-register index. /// Used to give some type checking when modeling Reg:SubReg1, SubReg2. struct RegSubRegPairAndIdx : RegSubRegPair { unsigned SubIdx; + RegSubRegPairAndIdx(unsigned Reg = 0, unsigned SubReg = 0, unsigned SubIdx = 0) : RegSubRegPair(Reg, SubReg), SubIdx(SubIdx) {} @@ -469,7 +481,6 @@ class TargetInstrInfo : public MCInstrInfo { RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const; - /// Return true if two machine instructions would produce identical values. /// By default, this is only true when the two instructions /// are deemed identical except for defs. If this function is called when the @@ -551,23 +562,19 @@ class TargetInstrInfo : public MCInstrInfo { PRED_INVALID // Sentinel value }; - ComparePredicate Predicate; - MachineOperand LHS; - MachineOperand RHS; - MachineBasicBlock *TrueDest; - MachineBasicBlock *FalseDest; - MachineInstr *ConditionDef; + ComparePredicate Predicate = PRED_INVALID; + MachineOperand LHS = MachineOperand::CreateImm(0); + MachineOperand RHS = MachineOperand::CreateImm(0); + MachineBasicBlock *TrueDest = nullptr; + MachineBasicBlock *FalseDest = nullptr; + MachineInstr *ConditionDef = nullptr; /// SingleUseCondition is true if ConditionDef is dead except for the /// branch(es) at the end of the basic block. /// - bool SingleUseCondition; + bool SingleUseCondition = false; - explicit MachineBranchPredicate() - : Predicate(PRED_INVALID), LHS(MachineOperand::CreateImm(0)), - RHS(MachineOperand::CreateImm(0)), TrueDest(nullptr), - FalseDest(nullptr), ConditionDef(nullptr), SingleUseCondition(false) { - } + explicit MachineBranchPredicate() = default; }; /// Analyze the branching code at the end of MBB and parse it into the @@ -1117,7 +1124,6 @@ class TargetInstrInfo : public MCInstrInfo { virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - /// Return the noop instruction to use for a noop. virtual void getNoop(MCInst &NopInst) const; @@ -1539,6 +1545,16 @@ class TargetInstrInfo : public MCInstrInfo { return None; } + /// Return an array that contains the MMO target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the MMO target flags that are + /// defined by this method. + virtual ArrayRef> + getSerializableMachineMemOperandTargetFlags() const { + return None; + } + /// Determines whether \p Inst is a tail call instruction. Override this /// method on targets that do not properly set MCID::Return and MCID::Call on /// tail call instructions." @@ -1621,16 +1637,18 @@ class TargetInstrInfo : public MCInstrInfo { /// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. template<> struct DenseMapInfo { - typedef DenseMapInfo RegInfo; + using RegInfo = DenseMapInfo; static inline TargetInstrInfo::RegSubRegPair getEmptyKey() { return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(), RegInfo::getEmptyKey()); } + static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() { return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(), RegInfo::getTombstoneKey()); } + /// \brief Reuse getHashValue implementation from /// std::pair. static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) { @@ -1638,6 +1656,7 @@ struct DenseMapInfo { std::make_pair(Val.Reg, Val.SubReg); return DenseMapInfo>::getHashValue(PairVal); } + static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS, const TargetInstrInfo::RegSubRegPair &RHS) { return RegInfo::isEqual(LHS.Reg, RHS.Reg) && diff --git a/interpreter/llvm/src/include/llvm/Target/TargetLowering.h b/interpreter/llvm/src/include/llvm/Target/TargetLowering.h index 1ca32d4c35898..23711d636c9a0 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetLowering.h +++ b/interpreter/llvm/src/include/llvm/Target/TargetLowering.h @@ -1,4 +1,4 @@ -//===-- llvm/Target/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// +//===- llvm/Target/TargetLowering.h - Target Lowering Info ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,7 @@ #ifndef LLVM_TARGET_TARGETLOWERING_H #define LLVM_TARGET_TARGETLOWERING_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -40,6 +41,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" @@ -66,10 +68,13 @@ namespace llvm { class BranchProbability; class CCState; class CCValAssign; +class Constant; class FastISel; class FunctionLoweringInfo; +class GlobalValue; class IntrinsicInst; struct KnownBits; +class LLVMContext; class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -78,6 +83,7 @@ class MachineLoop; class MachineRegisterInfo; class MCContext; class MCExpr; +class Module; class TargetRegisterClass; class TargetLibraryInfo; class TargetRegisterInfo; @@ -127,7 +133,7 @@ class TargetLoweringBase { /// LegalizeKind holds the legalization kind that needs to happen to EVT /// in order to type-legalize it. - typedef std::pair LegalizeKind; + using LegalizeKind = std::pair; /// Enum that describes how the target represents true/false values. enum BooleanContent { @@ -189,7 +195,7 @@ class TargetLoweringBase { void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx); }; - typedef std::vector ArgListTy; + using ArgListTy = std::vector; virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const {}; @@ -211,8 +217,8 @@ class TargetLoweringBase { /// NOTE: The TargetMachine owns TLOF. explicit TargetLoweringBase(const TargetMachine &TM); - TargetLoweringBase(const TargetLoweringBase&) = delete; - void operator=(const TargetLoweringBase&) = delete; + TargetLoweringBase(const TargetLoweringBase &) = delete; + TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; virtual ~TargetLoweringBase() = default; protected: @@ -404,8 +410,15 @@ class TargetLoweringBase { return false; } + /// Should we merge stores after Legalization (generally + /// better quality) or before (simpler) + virtual bool mergeStoresAfterLegalization() const { return false; } + /// Returns if it's reasonable to merge stores to MemVT size. - virtual bool canMergeStoresTo(EVT MemVT) const { return true; } + virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { + return true; + } /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. virtual bool isCheapToSpeculateCttz() const { @@ -675,6 +688,16 @@ class TargetLoweringBase { unsigned &NumIntermediates, MVT &RegisterVT) const; + /// Certain targets such as MIPS require that some types such as vectors are + /// always broken down into scalars in some contexts. This occurs even if the + /// vector type is legal. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, + RegisterVT); + } + struct IntrinsicInfo { unsigned opc = 0; // target opcode EVT memVT; // memory VT @@ -736,7 +759,7 @@ class TargetLoweringBase { if (VT.isExtended()) return Expand; // If a target-specific SDNode requires legalization, require the target // to provide custom legalization for it. - if (Op > array_lengthof(OpActions[0])) return Custom; + if (Op >= array_lengthof(OpActions[0])) return Custom; return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } @@ -1083,6 +1106,33 @@ class TargetLoweringBase { llvm_unreachable("Unsupported extended type!"); } + /// Certain combinations of ABIs, Targets and features require that types + /// are legal for some operations and not for other operations. + /// For MIPS all vector types must be passed through the integer register set. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const { + return getRegisterType(VT); + } + + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + return getRegisterType(Context, VT); + } + + /// Certain targets require unusual breakdowns of certain types. For MIPS, + /// this occurs when a vector type is used, as vector are passed through the + /// integer register set. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + return getNumRegisters(Context, VT); + } + + /// Certain targets have context senstive alignment requirements, where one + /// type has the alignment requirement of another type. + virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return DL.getABITypeAlignment(ArgTy); + } + /// If true, then instruction selection should seek to shrink the FP constant /// of the specified type to a smaller type in order to save space and / or /// reduce runtime. @@ -1141,6 +1191,16 @@ class TargetLoweringBase { return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; } + /// Get maximum # of load operations permitted for memcmp + /// + /// This function returns the maximum number of load operations permitted + /// to replace a call to memcmp. The value is set by the target at the + /// performance threshold for such a replacement. If OptSize is true, + /// return the limit for functions that have OptSize attribute. + unsigned getMaxExpandSizeMemcmp(bool OptSize) const { + return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; + } + /// \brief Get maximum # of store operations permitted for llvm.memmove /// /// This function returns the maximum number of store operations permitted @@ -1315,6 +1375,12 @@ class TargetLoweringBase { /// Returns the target-specific address of the unsafe stack pointer. virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; + /// Returns the name of the symbol used to emit stack probes or the empty + /// string if not applicable. + virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { + return ""; + } + /// Returns true if a cast between SrcAS and DestAS is a noop. virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { return false; @@ -1864,6 +1930,38 @@ class TargetLoweringBase { return false; } + /// Returns true if the opcode is a commutative binary operation. + virtual bool isCommutativeBinOp(unsigned Opcode) const { + // FIXME: This should get its info from the td file. + switch (Opcode) { + case ISD::ADD: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::MUL: + case ISD::MULHU: + case ISD::MULHS: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + case ISD::FADD: + case ISD::FMUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SADDO: + case ISD::UADDO: + case ISD::ADDC: + case ISD::ADDE: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + return true; + default: return false; + } + } + /// Return true if it's free to truncate a value of type FromTy to type /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 /// by referencing its sub-register AX. @@ -1914,6 +2012,35 @@ class TargetLoweringBase { return isExtFreeImpl(I); } + /// Return true if \p Load and \p Ext can form an ExtLoad. + /// For example, in AArch64 + /// %L = load i8, i8* %ptr + /// %E = zext i8 %L to i32 + /// can be lowered into one load instruction + /// ldrb w0, [x0] + bool isExtLoad(const LoadInst *Load, const Instruction *Ext, + const DataLayout &DL) const { + EVT VT = getValueType(DL, Ext->getType()); + EVT LoadVT = getValueType(DL, Load->getType()); + + // If the load has other users and the truncate is not free, the ext + // probably isn't free. + if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && + !isTruncateFree(Ext->getType(), Load->getType())) + return false; + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa(Ext)) + LType = ISD::ZEXTLOAD; + else { + assert(isa(Ext) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + + return isLoadExtLegal(LType, VT, LoadVT); + } + /// Return true if any actual instruction that defines a value of type FromTy /// implicitly zero-extends the value to ToTy in the result register. /// @@ -1954,7 +2081,7 @@ class TargetLoweringBase { /// this information should not be provided because it will generate more /// loads. virtual bool hasPairedLoad(EVT /*LoadedType*/, - unsigned & /*RequiredAligment*/) const { + unsigned & /*RequiredAlignment*/) const { return false; } @@ -2328,6 +2455,8 @@ class TargetLoweringBase { /// Maximum number of store operations that may be substituted for a call to /// memcpy, used for functions with OptSize attribute. unsigned MaxStoresPerMemcpyOptSize; + unsigned MaxLoadsPerMemcmp; + unsigned MaxLoadsPerMemcmpOptSize; /// \brief Specify maximum bytes of store instructions per memmove call. /// @@ -2372,8 +2501,8 @@ class TargetLowering : public TargetLoweringBase { public: struct DAGCombinerInfo; - TargetLowering(const TargetLowering&) = delete; - void operator=(const TargetLowering&) = delete; + TargetLowering(const TargetLowering &) = delete; + TargetLowering &operator=(const TargetLowering &) = delete; /// NOTE: The TargetMachine owns TLOF. explicit TargetLowering(const TargetMachine &TM); @@ -2623,10 +2752,22 @@ class TargetLowering : public TargetLoweringBase { // This transformation may not be desirable if it disrupts a particularly // auspicious target-specific tree (e.g. bitfield extraction in AArch64). // By default, it returns true. - virtual bool isDesirableToCommuteWithShift(const SDNode *N /*Op*/) const { + virtual bool isDesirableToCommuteWithShift(const SDNode *N) const { return true; } + // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE. + // Example of such a combine: + // v4i32 build_vector((extract_elt V, 0), + // (extract_elt V, 2), + // (extract_elt V, 4), + // (extract_elt V, 6)) + // --> + // v4i32 truncate (bitcast V to v4i64) + virtual bool isDesirableToCombineBuildVectorToTruncate() const { + return false; + } + /// Return true if the target has native support for the specified value type /// and it is 'desirable' to use the type for the given node type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer @@ -2689,7 +2830,6 @@ class TargetLowering : public TargetLoweringBase { /// described by the Ins array, into the specified DAG. The implementation /// should fill in the InVals array with legal-type argument values, and /// return the resulting token chain value. - /// virtual SDValue LowerFormalArguments( SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, const SmallVectorImpl & /*Ins*/, const SDLoc & /*dl*/, @@ -2703,7 +2843,7 @@ class TargetLowering : public TargetLoweringBase { /// implementation. struct CallLoweringInfo { SDValue Chain; - Type *RetTy; + Type *RetTy = nullptr; bool RetSExt : 1; bool RetZExt : 1; bool IsVarArg : 1; @@ -2711,30 +2851,31 @@ class TargetLowering : public TargetLoweringBase { bool DoesNotReturn : 1; bool IsReturnValueUsed : 1; bool IsConvergent : 1; + bool IsPatchPoint : 1; // IsTailCall should be modified by implementations of // TargetLowering::LowerCall that perform tail call conversions. - bool IsTailCall; + bool IsTailCall = false; - unsigned NumFixedArgs; - CallingConv::ID CallConv; + // Is Call lowering done post SelectionDAG type legalization. + bool IsPostTypeLegalization = false; + + unsigned NumFixedArgs = -1; + CallingConv::ID CallConv = CallingConv::C; SDValue Callee; ArgListTy Args; SelectionDAG &DAG; SDLoc DL; - ImmutableCallSite *CS; - bool IsPatchPoint; + ImmutableCallSite *CS = nullptr; SmallVector Outs; SmallVector OutVals; SmallVector Ins; SmallVector InVals; CallLoweringInfo(SelectionDAG &DAG) - : RetTy(nullptr), RetSExt(false), RetZExt(false), IsVarArg(false), - IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true), - IsConvergent(false), IsTailCall(false), NumFixedArgs(-1), - CallConv(CallingConv::C), DAG(DAG), CS(nullptr), IsPatchPoint(false) { - } + : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), + DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), + IsPatchPoint(false), DAG(DAG) {} CallLoweringInfo &setDebugLoc(const SDLoc &dl) { DL = dl; @@ -2841,6 +2982,11 @@ class TargetLowering : public TargetLoweringBase { return *this; } + CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { + IsPostTypeLegalization = Value; + return *this; + } + ArgListTy &getArgs() { return Args; } @@ -2959,6 +3105,13 @@ class TargetLowering : public TargetLoweringBase { return Chain; } + /// This callback is used to inspect load/store instructions and add + /// target-specific MachineMemOperand flags to them. The default + /// implementation does nothing. + virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { + return MachineMemOperand::MONone; + } + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do @@ -3008,7 +3161,6 @@ class TargetLowering : public TargetLoweringBase { return nullptr; } - bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const; @@ -3057,15 +3209,19 @@ class TargetLowering : public TargetLoweringBase { /// Information about the constraint code, e.g. Register, RegisterClass, /// Memory, Other, Unknown. - TargetLowering::ConstraintType ConstraintType; + TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; /// If this is the result output operand or a clobber, this is null, /// otherwise it is the incoming operand to the CallInst. This gets /// modified as the asm is processed. - Value *CallOperandVal; + Value *CallOperandVal = nullptr; /// The ValueType for the operand value. - MVT ConstraintVT; + MVT ConstraintVT = MVT::Other; + + /// Copy constructor for copying from a ConstraintInfo. + AsmOperandInfo(InlineAsm::ConstraintInfo Info) + : InlineAsm::ConstraintInfo(std::move(Info)) {} /// Return true of this is an input operand that is a matching constraint /// like "4". @@ -3074,15 +3230,9 @@ class TargetLowering : public TargetLoweringBase { /// If this is an input matching constraint, this method returns the output /// operand it matches. unsigned getMatchedOperand() const; - - /// Copy constructor for copying from a ConstraintInfo. - AsmOperandInfo(InlineAsm::ConstraintInfo Info) - : InlineAsm::ConstraintInfo(std::move(Info)), - ConstraintType(TargetLowering::C_Unknown), CallOperandVal(nullptr), - ConstraintVT(MVT::Other) {} }; - typedef std::vector AsmOperandInfoVector; + using AsmOperandInfoVector = std::vector; /// Split up the constraint string from the inline assembly value into the /// specific constraints and their prefixes, and also tie in the associated diff --git a/interpreter/llvm/src/include/llvm/Target/TargetLoweringObjectFile.h b/interpreter/llvm/src/include/llvm/Target/TargetLoweringObjectFile.h index 0ffd4b7f8c786..80d4d8e42e519 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetLoweringObjectFile.h +++ b/interpreter/llvm/src/include/llvm/Target/TargetLoweringObjectFile.h @@ -70,10 +70,9 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, const MCSymbol *Sym) const; - /// Emit the module flags that the platform cares about. - virtual void emitModuleFlags(MCStreamer &Streamer, - ArrayRef Flags, - const TargetMachine &TM) const {} + /// Emit the module-level metadata that the platform cares about. + virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const {} /// Given a constant with the SectionKind, return a section that it should be /// placed in. diff --git a/interpreter/llvm/src/include/llvm/Target/TargetMachine.h b/interpreter/llvm/src/include/llvm/Target/TargetMachine.h index 73ae2ad129881..933c6c87b0bea 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetMachine.h +++ b/interpreter/llvm/src/include/llvm/Target/TargetMachine.h @@ -25,7 +25,6 @@ namespace llvm { class GlobalValue; -class MachineFunctionInitializer; class Mangler; class MCAsmInfo; class MCContext; @@ -227,8 +226,7 @@ class TargetMachine { PassManagerBase &, raw_pwrite_stream &, CodeGenFileType, bool /*DisableVerify*/ = true, AnalysisID /*StartBefore*/ = nullptr, AnalysisID /*StartAfter*/ = nullptr, AnalysisID /*StopBefore*/ = nullptr, - AnalysisID /*StopAfter*/ = nullptr, - MachineFunctionInitializer * /*MFInitializer*/ = nullptr) { + AnalysisID /*StopAfter*/ = nullptr) { return true; } @@ -289,8 +287,7 @@ class LLVMTargetMachine : public TargetMachine { PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, bool DisableVerify = true, AnalysisID StartBefore = nullptr, AnalysisID StartAfter = nullptr, AnalysisID StopBefore = nullptr, - AnalysisID StopAfter = nullptr, - MachineFunctionInitializer *MFInitializer = nullptr) override; + AnalysisID StopAfter = nullptr) override; /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It @@ -299,6 +296,17 @@ class LLVMTargetMachine : public TargetMachine { bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &OS, bool DisableVerify = true) override; + + /// Returns true if the target is expected to pass all machine verifier + /// checks. This is a stopgap measure to fix targets one by one. We will + /// remove this at some point and always enable the verifier when + /// EXPENSIVE_CHECKS is enabled. + virtual bool isMachineVerifierClean() const { return true; } + + /// \brief Adds an AsmPrinter pass to the pipeline that prints assembly or + /// machine code from the MI representation. + bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out, + CodeGenFileType FileTYpe, MCContext &Context); }; } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Target/TargetOpcodes.def b/interpreter/llvm/src/include/llvm/Target/TargetOpcodes.def index 36764249632da..cadf86058f0cb 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetOpcodes.def +++ b/interpreter/llvm/src/include/llvm/Target/TargetOpcodes.def @@ -222,6 +222,8 @@ HANDLE_TARGET_OPCODE(G_OR) HANDLE_TARGET_OPCODE(G_XOR) +HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF) + /// Generic instruction to materialize the address of an alloca or other /// stack-based object. HANDLE_TARGET_OPCODE(G_FRAME_INDEX) @@ -241,8 +243,6 @@ HANDLE_TARGET_OPCODE(G_INSERT) /// Generic instruction to paste a variable number of components together into a /// larger register. -HANDLE_TARGET_OPCODE(G_SEQUENCE) - HANDLE_TARGET_OPCODE(G_MERGE_VALUES) /// Generic pointer to int conversion. @@ -359,6 +359,9 @@ HANDLE_TARGET_OPCODE(G_FSUB) /// Generic FP multiplication. HANDLE_TARGET_OPCODE(G_FMUL) +/// Generic FMA multiplication. Behaves like llvm fma intrinsic +HANDLE_TARGET_OPCODE(G_FMA) + /// Generic FP division. HANDLE_TARGET_OPCODE(G_FDIV) @@ -368,6 +371,18 @@ HANDLE_TARGET_OPCODE(G_FREM) /// Generic FP exponentiation. HANDLE_TARGET_OPCODE(G_FPOW) +/// Generic base-e exponential of a value. +HANDLE_TARGET_OPCODE(G_FEXP) + +/// Generic base-2 exponential of a value. +HANDLE_TARGET_OPCODE(G_FEXP2) + +/// Floating point base-e logarithm of a value. +HANDLE_TARGET_OPCODE(G_FLOG) + +/// Floating point base-2 logarithm of a value. +HANDLE_TARGET_OPCODE(G_FLOG2) + /// Generic FP negation. HANDLE_TARGET_OPCODE(G_FNEG) diff --git a/interpreter/llvm/src/include/llvm/Target/TargetOptions.h b/interpreter/llvm/src/include/llvm/Target/TargetOptions.h index 7cc33f2fdccbb..5c2063880f8be 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetOptions.h +++ b/interpreter/llvm/src/include/llvm/Target/TargetOptions.h @@ -105,10 +105,10 @@ namespace llvm { HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), UseInitArray(false), - DisableIntegratedAS(false), CompressDebugSections(false), - RelaxELFRelocations(false), FunctionSections(false), - DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), - EmulatedTLS(false), EnableIPRA(false) {} + DisableIntegratedAS(false), RelaxELFRelocations(false), + FunctionSections(false), DataSections(false), + UniqueSectionNames(true), TrapUnreachable(false), EmulatedTLS(false), + EnableIPRA(false) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging @@ -194,7 +194,7 @@ namespace llvm { unsigned DisableIntegratedAS : 1; /// Compress DWARF debug sections. - unsigned CompressDebugSections : 1; + DebugCompressionType CompressDebugSections = DebugCompressionType::None; unsigned RelaxELFRelocations : 1; diff --git a/interpreter/llvm/src/include/llvm/Target/TargetRegisterInfo.h b/interpreter/llvm/src/include/llvm/Target/TargetRegisterInfo.h index 4ce6d2ff5e26b..b6839dad106f1 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetRegisterInfo.h +++ b/interpreter/llvm/src/include/llvm/Target/TargetRegisterInfo.h @@ -1,4 +1,4 @@ -//=== Target/TargetRegisterInfo.h - Target Register Information -*- C++ -*-===// +//==- Target/TargetRegisterInfo.h - Target Register Information --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -17,30 +17,35 @@ #define LLVM_TARGET_TARGETREGISTERINFO_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/CallingConv.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include +#include #include namespace llvm { class BitVector; +class LiveRegMatrix; class MachineFunction; +class MachineInstr; class RegScavenger; -template class SmallVectorImpl; class VirtRegMap; -class raw_ostream; -class LiveRegMatrix; class TargetRegisterClass { public: - typedef const MCPhysReg* iterator; - typedef const MCPhysReg* const_iterator; - typedef const TargetRegisterClass* const * sc_iterator; + using iterator = const MCPhysReg *; + using const_iterator = const MCPhysReg *; + using sc_iterator = const TargetRegisterClass* const *; // Instance variables filled by tablegen, do not use! const MCRegisterClass *MC; @@ -151,7 +156,6 @@ class TargetRegisterClass { /// There exists SuperRC where: /// For all Reg in SuperRC: /// this->contains(Reg:Idx) - /// const uint16_t *getSuperRegIndices() const { return SuperRegIndices; } @@ -182,7 +186,6 @@ class TargetRegisterClass { /// other criteria. /// /// By default, this method returns all registers in the class. - /// ArrayRef getRawAllocationOrder(const MachineFunction &MF) const { return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs()); } @@ -217,8 +220,9 @@ struct RegClassWeight { /// class TargetRegisterInfo : public MCRegisterInfo { public: - typedef const TargetRegisterClass * const * regclass_iterator; - typedef const MVT::SimpleValueType* vt_iterator; + using regclass_iterator = const TargetRegisterClass * const *; + using vt_iterator = const MVT::SimpleValueType *; + private: const TargetRegisterInfoDesc *InfoDesc; // Extra desc array for codegen const char *const *SubRegIndexNames; // Names of subreg indexes. @@ -236,8 +240,8 @@ class TargetRegisterInfo : public MCRegisterInfo { const LaneBitmask *SRILaneMasks, LaneBitmask CoveringLanes); virtual ~TargetRegisterInfo(); -public: +public: // Register numbers can represent physical registers, virtual registers, and // sometimes stack slots. The unsigned values are divided into these ranges: // @@ -497,10 +501,20 @@ class TargetRegisterInfo : public MCRegisterInfo { /// function. Used by MachineRegisterInfo::isConstantPhysReg(). virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; } + /// Physical registers that may be modified within a function but are + /// guaranteed to be restored before any uses. This is useful for targets that + /// have call sequences where a GOT register may be updated by the caller + /// prior to a call and is guaranteed to be restored (also by the caller) + /// after the call. + virtual bool isCallerPreservedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + return false; + } + /// Prior to adding the live-out mask to a stackmap or patchpoint /// instruction, provide the target the opportunity to adjust it (mainly to /// remove pseudo-registers that should be ignored). - virtual void adjustStackMapLiveOutMask(uint32_t *Mask) const { } + virtual void adjustStackMapLiveOutMask(uint32_t *Mask) const {} /// Return a super-register of the specified register /// Reg so its sub-register of index SubIdx is Reg. @@ -558,7 +572,6 @@ class TargetRegisterInfo : public MCRegisterInfo { /// The ARM register Q0 has two D subregs dsub_0:D0 and dsub_1:D1. It also has /// ssub_0:S0 - ssub_3:S3 subregs. /// If you compose subreg indices dsub_1, ssub_0 you get ssub_2. - /// unsigned composeSubRegIndices(unsigned a, unsigned b) const { if (!a) return b; if (!b) return a; @@ -633,7 +646,6 @@ class TargetRegisterInfo : public MCRegisterInfo { /// corresponding argument register class. /// /// The function returns NULL if no register class can be found. - /// const TargetRegisterClass* getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, const TargetRegisterClass *RCB, unsigned SubB, @@ -644,7 +656,6 @@ class TargetRegisterInfo : public MCRegisterInfo { // /// Register class iterators - /// regclass_iterator regclass_begin() const { return RegClassBegin; } regclass_iterator regclass_end() const { return RegClassEnd; } iterator_range regclasses() const { @@ -899,7 +910,6 @@ class TargetRegisterInfo : public MCRegisterInfo { /// Return true if the register was spilled, false otherwise. /// If this function does not spill the register, the scavenger /// will instead spill it to the emergency spill slot. - /// virtual bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, @@ -958,7 +968,6 @@ class TargetRegisterInfo : public MCRegisterInfo { ArrayRef Exceptions = ArrayRef()) const; }; - //===----------------------------------------------------------------------===// // SuperRegClassIterator //===----------------------------------------------------------------------===// @@ -977,7 +986,7 @@ class TargetRegisterInfo : public MCRegisterInfo { // class SuperRegClassIterator { const unsigned RCMaskWords; - unsigned SubReg; + unsigned SubReg = 0; const uint16_t *Idx; const uint32_t *Mask; @@ -988,9 +997,7 @@ class SuperRegClassIterator { const TargetRegisterInfo *TRI, bool IncludeSelf = false) : RCMaskWords((TRI->getNumRegClasses() + 31) / 32), - SubReg(0), - Idx(RC->getSuperRegIndices()), - Mask(RC->getSubClassMask()) { + Idx(RC->getSuperRegIndices()), Mask(RC->getSubClassMask()) { if (!IncludeSelf) ++*this; } @@ -1029,12 +1036,12 @@ class BitMaskClassIterator { /// Base index of CurrentChunk. /// In other words, the number of bit we read to get at the /// beginning of that chunck. - unsigned Base; + unsigned Base = 0; /// Adjust base index of CurrentChunk. /// Base index + how many bit we read within CurrentChunk. - unsigned Idx; + unsigned Idx = 0; /// Current register class ID. - unsigned ID; + unsigned ID = 0; /// Mask we are iterating over. const uint32_t *Mask; /// Current chunk of the Mask we are traversing. @@ -1088,8 +1095,7 @@ class BitMaskClassIterator { /// /// \pre \p Mask != nullptr BitMaskClassIterator(const uint32_t *Mask, const TargetRegisterInfo &TRI) - : NumRegClasses(TRI.getNumRegClasses()), Base(0), Idx(0), ID(0), - Mask(Mask), CurrentChunk(*Mask) { + : NumRegClasses(TRI.getNumRegClasses()), Mask(Mask), CurrentChunk(*Mask) { // Move to the first ID. moveToNextID(); } @@ -1141,6 +1147,6 @@ Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); /// registers on a \ref raw_ostream. Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_TARGET_TARGETREGISTERINFO_H diff --git a/interpreter/llvm/src/include/llvm/Target/TargetSubtargetInfo.h b/interpreter/llvm/src/include/llvm/Target/TargetSubtargetInfo.h index 83950a9cd027a..9440c56dcf17e 100644 --- a/interpreter/llvm/src/include/llvm/Target/TargetSubtargetInfo.h +++ b/interpreter/llvm/src/include/llvm/Target/TargetSubtargetInfo.h @@ -1,4 +1,4 @@ -//==-- llvm/Target/TargetSubtargetInfo.h - Target Information ----*- C++ -*-==// +//===- llvm/Target/TargetSubtargetInfo.h - Target Information ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,23 +18,31 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/PBQPRAConstraint.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" -#include "llvm/MC/MCInst.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" #include #include + namespace llvm { class CallLowering; +class InstrItineraryData; +struct InstrStage; class InstructionSelector; class LegalizerInfo; class MachineInstr; +struct MachineSchedPolicy; +struct MCReadAdvanceEntry; +struct MCWriteLatencyEntry; +struct MCWriteProcResEntry; class RegisterBankInfo; class SDep; class SelectionDAGTargetInfo; +struct SubtargetFeatureKV; +struct SubtargetInfoKV; class SUnit; class TargetFrameLowering; class TargetInstrInfo; @@ -42,7 +50,7 @@ class TargetLowering; class TargetRegisterClass; class TargetRegisterInfo; class TargetSchedModel; -struct MachineSchedPolicy; +class Triple; //===----------------------------------------------------------------------===// /// @@ -64,13 +72,13 @@ class TargetSubtargetInfo : public MCSubtargetInfo { public: // AntiDepBreakMode - Type of anti-dependence breaking that should // be performed before post-RA scheduling. - typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode; - typedef SmallVectorImpl RegClassVector; + using AntiDepBreakMode = enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL }; + using RegClassVector = SmallVectorImpl; TargetSubtargetInfo() = delete; TargetSubtargetInfo(const TargetSubtargetInfo &) = delete; - void operator=(const TargetSubtargetInfo &) = delete; - virtual ~TargetSubtargetInfo(); + TargetSubtargetInfo &operator=(const TargetSubtargetInfo &) = delete; + ~TargetSubtargetInfo() override; virtual bool isXRaySupported() const { return false; } @@ -112,7 +120,6 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// getRegisterInfo - If register information is available, return it. If /// not, return null. - /// virtual const TargetRegisterInfo *getRegisterInfo() const { return nullptr; } /// If the information for the register banks is available, return it. @@ -121,7 +128,6 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// getInstrItineraryData - Returns instruction itinerary data for the target /// or specific subtarget. - /// virtual const InstrItineraryData *getInstrItineraryData() const { return nullptr; } diff --git a/interpreter/llvm/src/include/llvm/Testing/Support/Error.h b/interpreter/llvm/src/include/llvm/Testing/Support/Error.h new file mode 100644 index 0000000000000..f23d289266adc --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Testing/Support/Error.h @@ -0,0 +1,69 @@ +//===- llvm/Testing/Support/Error.h ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TESTING_SUPPORT_ERROR_H +#define LLVM_TESTING_SUPPORT_ERROR_H + +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" + +#include "gmock/gmock.h" +#include + +namespace llvm { +namespace detail { +ErrorHolder TakeError(Error Err); + +template ExpectedHolder TakeExpected(Expected &Exp) { + llvm::detail::ExpectedHolder Result; + auto &EH = static_cast(Result); + EH = TakeError(Exp.takeError()); + if (Result.Success) + Result.Value = &(*Exp); + return Result; +} + +template ExpectedHolder TakeExpected(Expected &&Exp) { + return TakeExpected(Exp); +} +} // namespace detail + +#define EXPECT_THAT_ERROR(Err, Matcher) \ + EXPECT_THAT(llvm::detail::TakeError(Err), Matcher) +#define ASSERT_THAT_ERROR(Err, Matcher) \ + ASSERT_THAT(llvm::detail::TakeError(Err), Matcher) + +#define EXPECT_THAT_EXPECTED(Err, Matcher) \ + EXPECT_THAT(llvm::detail::TakeExpected(Err), Matcher) +#define ASSERT_THAT_EXPECTED(Err, Matcher) \ + ASSERT_THAT(llvm::detail::TakeExpected(Err), Matcher) + +MATCHER(Succeeded, "") { return arg.Success; } +MATCHER(Failed, "") { return !arg.Success; } + +MATCHER_P(HasValue, value, + "succeeded with value " + testing::PrintToString(value)) { + if (!arg.Success) { + *result_listener << "operation failed"; + return false; + } + + assert(arg.Value.hasValue()); + if (**arg.Value != value) { + *result_listener << "but \"" + testing::PrintToString(**arg.Value) + + "\" != " + testing::PrintToString(value); + return false; + } + + return true; +} +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/Testing/Support/SupportHelpers.h b/interpreter/llvm/src/include/llvm/Testing/Support/SupportHelpers.h new file mode 100644 index 0000000000000..c4dd414b80dbc --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Testing/Support/SupportHelpers.h @@ -0,0 +1,47 @@ +//===- Testing/Support/SupportHelpers.h -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H +#define LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "gtest/gtest-printers.h" + +namespace llvm { +namespace detail { +struct ErrorHolder { + bool Success; + std::string Message; +}; + +template struct ExpectedHolder : public ErrorHolder { + Optional Value; +}; + +inline void PrintTo(const ErrorHolder &Err, std::ostream *Out) { + *Out << (Err.Success ? "succeeded" : "failed"); + if (!Err.Success) { + *Out << " (" << StringRef(Err.Message).trim().str() << ")"; + } +} + +template +void PrintTo(const ExpectedHolder &Item, std::ostream *Out) { + if (Item.Success) { + *Out << "succeeded with value \"" << ::testing::PrintToString(**Item.Value) + << "\""; + } else { + PrintTo(static_cast(Item), Out); + } +} +} // namespace detail +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h b/interpreter/llvm/src/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h new file mode 100644 index 0000000000000..964b0f7620a2b --- /dev/null +++ b/interpreter/llvm/src/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h @@ -0,0 +1,24 @@ +//===- DlltoolDriver.h - dlltool.exe-compatible driver ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines an interface to a dlltool.exe-compatible driver. +// Used by llvm-dlltool. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLDRIVERS_LLVM_DLLTOOL_DLLTOOLDRIVER_H +#define LLVM_TOOLDRIVERS_LLVM_DLLTOOL_DLLTOOLDRIVER_H + +namespace llvm { +template class ArrayRef; + +int dlltoolDriverMain(ArrayRef ArgsArr); +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/ToolDrivers/llvm-lib/LibDriver.h b/interpreter/llvm/src/include/llvm/ToolDrivers/llvm-lib/LibDriver.h new file mode 100644 index 0000000000000..a4806ac4ad698 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/ToolDrivers/llvm-lib/LibDriver.h @@ -0,0 +1,24 @@ +//===- llvm-lib/LibDriver.h - lib.exe-compatible driver ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines an interface to a lib.exe-compatible driver that also understands +// bitcode files. Used by llvm-lib and lld-link /lib. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLDRIVERS_LLVM_LIB_LIBDRIVER_H +#define LLVM_TOOLDRIVERS_LLVM_LIB_LIBDRIVER_H + +namespace llvm { +template class ArrayRef; + +int libDriverMain(ArrayRef ARgs); +} + +#endif diff --git a/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionAttrs.h b/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionAttrs.h index 85d6364c8bbc9..36dd06b85b417 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -14,8 +14,8 @@ #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H #define LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H -#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionImport.h b/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionImport.h index ed5742ab8b564..de35cdf052e1f 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionImport.h +++ b/interpreter/llvm/src/include/llvm/Transforms/IPO/FunctionImport.h @@ -53,8 +53,7 @@ class FunctionImporter { : Index(Index), ModuleLoader(std::move(ModuleLoader)) {} /// Import functions in Module \p M based on the supplied import list. - Expected - importFunctions(Module &M, const ImportMapTy &ImportList); + Expected importFunctions(Module &M, const ImportMapTy &ImportList); private: /// The summaries index used to trigger importing. @@ -82,15 +81,11 @@ class FunctionImportPass : public PassInfoMixin { /// \p ExportLists contains for each Module the set of globals (GUID) that will /// be imported by another module, or referenced by such a function. I.e. this /// is the set of globals that need to be promoted/renamed appropriately. -/// -/// \p DeadSymbols (optional) contains a list of GUID that are deemed "dead" and -/// will be ignored for the purpose of importing. void ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap &ModuleToDefinedGVSummaries, StringMap &ImportLists, - StringMap &ExportLists, - const DenseSet *DeadSymbols = nullptr); + StringMap &ExportLists); /// Compute all the imports for the given module using the Index. /// @@ -103,9 +98,9 @@ void ComputeCrossModuleImportForModule( /// Compute all the symbols that are "dead": i.e these that can't be reached /// in the graph from any of the given symbols listed in /// \p GUIDPreservedSymbols. -DenseSet -computeDeadSymbols(const ModuleSummaryIndex &Index, - const DenseSet &GUIDPreservedSymbols); +void computeDeadSymbols( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols); /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. diff --git a/interpreter/llvm/src/include/llvm/Transforms/IPO/PassManagerBuilder.h b/interpreter/llvm/src/include/llvm/Transforms/IPO/PassManagerBuilder.h index 247382c35eebf..276306f686ffa 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/interpreter/llvm/src/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -145,11 +145,9 @@ class PassManagerBuilder { bool DisableTailCalls; bool DisableUnitAtATime; bool DisableUnrollLoops; - bool BBVectorize; bool SLPVectorize; bool LoopVectorize; bool RerollLoops; - bool LoadCombine; bool NewGVN; bool DisableGVNLoadPRE; bool VerifyInput; diff --git a/interpreter/llvm/src/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h b/interpreter/llvm/src/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h new file mode 100644 index 0000000000000..bf04bbfe92d81 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h @@ -0,0 +1,41 @@ +//===- ThinLTOBitcodeWriter.h - Bitcode writing pass for ThinLTO ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass prepares a module containing type metadata for ThinLTO by splitting +// it into regular and thin LTO parts if possible, and writing both parts to +// a multi-module bitcode file. Modules that do not contain type metadata are +// written unmodified as a single module. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_THINLTOBITCODEWRITER_H +#define LLVM_TRANSFORMS_IPO_THINLTOBITCODEWRITER_H + +#include +#include + +namespace llvm { + +class ThinLTOBitcodeWriterPass + : public PassInfoMixin { + raw_ostream &OS; + raw_ostream *ThinLinkOS; + +public: + // Writes bitcode to OS. Also write thin link file to ThinLinkOS, if + // it's not nullptr. + ThinLTOBitcodeWriterPass(raw_ostream &OS, raw_ostream *ThinLinkOS) + : OS(OS), ThinLinkOS(ThinLinkOS) {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/include/llvm/Transforms/InstrProfiling.h b/interpreter/llvm/src/include/llvm/Transforms/InstrProfiling.h index 65e69761baddd..0fe6ad5eeac7d 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/InstrProfiling.h +++ b/interpreter/llvm/src/include/llvm/Transforms/InstrProfiling.h @@ -28,6 +28,7 @@ namespace llvm { class TargetLibraryInfo; +using LoadStorePair = std::pair; /// Instrumentation based profiling lowering pass. This pass lowers /// the profile instrumented code generated by FE or the IR based @@ -60,11 +61,26 @@ class InstrProfiling : public PassInfoMixin { GlobalVariable *NamesVar; size_t NamesSize; + // vector of counter load/store pairs to be register promoted. + std::vector PromotionCandidates; + // The start value of precise value profile range for memory intrinsic sizes. int64_t MemOPSizeRangeStart; // The end value of precise value profile range for memory intrinsic sizes. int64_t MemOPSizeRangeLast; + int64_t TotalCountersPromoted = 0; + + /// Lower instrumentation intrinsics in the function. Returns true if there + /// any lowering. + bool lowerIntrinsics(Function *F); + + /// Register-promote counter loads and stores in loops. + void promoteCounterLoadStores(Function *F); + + /// Returns true if profile counter update register promotion is enabled. + bool isCounterPromotionEnabled() const; + /// Count the number of instrumented value sites for the function. void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); diff --git a/interpreter/llvm/src/include/llvm/Transforms/Instrumentation.h b/interpreter/llvm/src/include/llvm/Transforms/Instrumentation.h index 023d7af7f729d..f2fc6dc8dad5e 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Instrumentation.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Instrumentation.h @@ -116,6 +116,9 @@ struct InstrProfOptions { // Add the 'noredzone' attribute to added runtime library calls. bool NoRedZone = false; + // Do counter register promotion + bool DoCounterPromotion = false; + // Name of the profile file to use as output std::string InstrProfileOutput; @@ -177,6 +180,7 @@ struct SanitizerCoverageOptions { bool Use8bitCounters = false; bool TracePC = false; bool TracePCGuard = false; + bool Inline8bitCounters = false; bool NoPrune = false; SanitizerCoverageOptions() = default; diff --git a/interpreter/llvm/src/include/llvm/Transforms/SampleProfile.h b/interpreter/llvm/src/include/llvm/Transforms/SampleProfile.h index 93fa9532cc3a7..c984fe74ba939 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/SampleProfile.h +++ b/interpreter/llvm/src/include/llvm/Transforms/SampleProfile.h @@ -21,6 +21,10 @@ namespace llvm { class SampleProfileLoaderPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + SampleProfileLoaderPass(std::string File = "") : ProfileFileName(File) {} + +private: + std::string ProfileFileName; }; } // End llvm namespace diff --git a/interpreter/llvm/src/include/llvm/Transforms/Scalar.h b/interpreter/llvm/src/include/llvm/Transforms/Scalar.h index ba0a3ee1287a4..1913a9d5da027 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Scalar.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Scalar.h @@ -354,6 +354,13 @@ FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false); // FunctionPass *createGVNHoistPass(); +//===----------------------------------------------------------------------===// +// +// GVNSink - This pass uses an "inverted" value numbering to decide the +// similarity of expressions and sinks similar expressions into successors. +// +FunctionPass *createGVNSinkPass(); + //===----------------------------------------------------------------------===// // // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads @@ -478,12 +485,6 @@ FunctionPass *createSpeculativeExecutionPass(); // TargetTransformInfo::hasBranchDivergence() is true. FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); -//===----------------------------------------------------------------------===// -// -// LoadCombine - Combine loads into bigger loads. -// -BasicBlockPass *createLoadCombinePass(); - //===----------------------------------------------------------------------===// // // StraightLineStrengthReduce - This pass strength-reduces some certain diff --git a/interpreter/llvm/src/include/llvm/Transforms/Scalar/ConstantHoisting.h b/interpreter/llvm/src/include/llvm/Transforms/Scalar/ConstantHoisting.h index edc91add7a737..a2a9afc083a0b 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -131,6 +131,8 @@ class ConstantHoistingPass : public PassInfoMixin { void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, ConstantInt *ConstInt); + void collectConstantCandidates(ConstCandMapType &ConstCandMap, + Instruction *Inst, unsigned Idx); void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst); void collectConstantCandidates(Function &Fn); diff --git a/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVN.h b/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVN.h index 8f05e8cdb2336..f25ab40640dfc 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVN.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVN.h @@ -209,7 +209,7 @@ class GVN : public PassInfoMixin { // Other helper routines bool processInstruction(Instruction *I); bool processBlock(BasicBlock *BB); - void dump(DenseMap &d); + void dump(DenseMap &d) const; bool iterateOnFunction(Function &F); bool performPRE(Function &F); bool performScalarPRE(Instruction *I); @@ -238,7 +238,12 @@ struct GVNHoistPass : PassInfoMixin { /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; - +/// \brief Uses an "inverted" value numbering to decide the similarity of +/// expressions and sinks similar expressions into successors. +struct GVNSinkPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } #endif diff --git a/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVNExpression.h b/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVNExpression.h index 2670a0c1a5339..f603ebcbca7cc 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVNExpression.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Scalar/GVNExpression.h @@ -40,6 +40,7 @@ enum ExpressionType { ET_Base, ET_Constant, ET_Variable, + ET_Dead, ET_Unknown, ET_BasicStart, ET_Basic, @@ -57,10 +58,11 @@ class Expression { private: ExpressionType EType; unsigned Opcode; + mutable hash_code HashVal; public: Expression(ExpressionType ET = ET_Base, unsigned O = ~2U) - : EType(ET), Opcode(O) {} + : EType(ET), Opcode(O), HashVal(0) {} Expression(const Expression &) = delete; Expression &operator=(const Expression &) = delete; virtual ~Expression(); @@ -81,8 +83,21 @@ class Expression { return equals(Other); } + hash_code getComputedHash() const { + // It's theoretically possible for a thing to hash to zero. In that case, + // we will just compute the hash a few extra times, which is no worse that + // we did before, which was to compute it always. + if (static_cast(HashVal) == 0) + HashVal = getHashValue(); + return HashVal; + } virtual bool equals(const Expression &Other) const { return true; } + // Return true if the two expressions are exactly the same, including the + // normally ignored fields. + virtual bool exactlyEquals(const Expression &Other) const { + return getExpressionType() == Other.getExpressionType() && equals(Other); + } unsigned getOpcode() const { return Opcode; } void setOpcode(unsigned opcode) { Opcode = opcode; } @@ -106,10 +121,7 @@ class Expression { OS << "}"; } - LLVM_DUMP_METHOD void dump() const { - print(dbgs()); - dbgs() << "\n"; - } + LLVM_DUMP_METHOD void dump() const; }; inline raw_ostream &operator<<(raw_ostream &OS, const Expression &E) { @@ -335,6 +347,10 @@ class LoadExpression final : public MemoryExpression { void setAlignment(unsigned Align) { Alignment = Align; } bool equals(const Expression &Other) const override; + bool exactlyEquals(const Expression &Other) const override { + return Expression::exactlyEquals(Other) && + cast(Other).getLoadInst() == getLoadInst(); + } // // Debugging support @@ -372,6 +388,10 @@ class StoreExpression final : public MemoryExpression { Value *getStoredValue() const { return StoredValue; } bool equals(const Expression &Other) const override; + bool exactlyEquals(const Expression &Other) const override { + return Expression::exactlyEquals(Other) && + cast(Other).getStoreInst() == getStoreInst(); + } // Debugging support // @@ -380,7 +400,9 @@ class StoreExpression final : public MemoryExpression { OS << "ExpressionTypeStore, "; this->BasicExpression::printInternal(OS, false); OS << " represents Store " << *Store; - OS << " with MemoryLeader " << *getMemoryLeader(); + OS << " with StoredValue "; + StoredValue->printAsOperand(OS); + OS << " and MemoryLeader " << *getMemoryLeader(); } }; @@ -513,6 +535,17 @@ class PHIExpression final : public BasicExpression { } }; +class DeadExpression final : public Expression { +public: + DeadExpression() : Expression(ET_Dead) {} + DeadExpression(const DeadExpression &) = delete; + DeadExpression &operator=(const DeadExpression &) = delete; + + static bool classof(const Expression *E) { + return E->getExpressionType() == ET_Dead; + } +}; + class VariableExpression final : public Expression { private: Value *VariableValue; diff --git a/interpreter/llvm/src/include/llvm/Transforms/Scalar/Reassociate.h b/interpreter/llvm/src/include/llvm/Transforms/Scalar/Reassociate.h index 7b68b44893063..a30a7176baa8b 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Scalar/Reassociate.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Scalar/Reassociate.h @@ -82,8 +82,6 @@ class ReassociatePass : public PassInfoMixin { bool CombineXorOpnd(Instruction *I, reassociate::XorOpnd *Opnd1, reassociate::XorOpnd *Opnd2, APInt &ConstOpnd, Value *&Res); - bool collectMultiplyFactors(SmallVectorImpl &Ops, - SmallVectorImpl &Factors); Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, SmallVectorImpl &Factors); Value *OptimizeMul(BinaryOperator *I, diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/Cloning.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/Cloning.h index 91c9d255302ff..2a8b89d862821 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/Cloning.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/Cloning.h @@ -36,6 +36,7 @@ class BasicBlock; class BlockFrequencyInfo; class CallInst; class CallGraph; +class DebugInfoFinder; class DominatorTree; class Function; class Instruction; @@ -110,7 +111,8 @@ struct ClonedCodeInfo { /// BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, - ClonedCodeInfo *CodeInfo = nullptr); + ClonedCodeInfo *CodeInfo = nullptr, + DebugInfoFinder *DIFinder = nullptr); /// CloneFunction - Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/CodeExtractor.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/CodeExtractor.h index a602498e5f221..682b353ab5ae8 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/CodeExtractor.h @@ -25,6 +25,7 @@ template class ArrayRef; class BranchProbabilityInfo; class DominatorTree; class Function; + class Instruction; class Loop; class Module; class RegionNode; @@ -103,7 +104,34 @@ template class ArrayRef; /// a code sequence, that sequence is modified, including changing these /// sets, before extraction occurs. These modifications won't have any /// significant impact on the cost however. - void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const; + void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &Allocas) const; + + /// Check if life time marker nodes can be hoisted/sunk into the outline + /// region. + /// + /// Returns true if it is safe to do the code motion. + bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const; + /// Find the set of allocas whose life ranges are contained within the + /// outlined region. + /// + /// Allocas which have life_time markers contained in the outlined region + /// should be pushed to the outlined function. The address bitcasts that + /// are used by the lifetime markers are also candidates for shrink- + /// wrapping. The instructions that need to be sunk are collected in + /// 'Allocas'. + void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + BasicBlock *&ExitBlock) const; + + /// Find or create a block within the outline region for placing hoisted + /// code. + /// + /// CommonExitBlock is block outside the outline region. It is the common + /// successor of blocks inside the region. If there exists a single block + /// inside the region that is the predecessor of CommonExitBlock, that block + /// will be returned. Otherwise CommonExitBlock will be split and the + /// original block will be added to the outline region. + BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock); private: void severSplitPHINodes(BasicBlock *&Header); diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/EscapeEnumerator.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/EscapeEnumerator.h index 80d16ed4cf5bf..1256dfdaca172 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/EscapeEnumerator.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/EscapeEnumerator.h @@ -15,8 +15,8 @@ #ifndef LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H #define LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/FunctionComparator.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/FunctionComparator.h index ee58d1d138f74..b0f10eafaa95f 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/FunctionComparator.h @@ -19,8 +19,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" -#include "llvm/IR/ValueMap.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h index bb7fa523cb198..b7a3d130aa11e 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h @@ -14,8 +14,8 @@ #define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include #include diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/Local.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/Local.h index b5a5f4c2704c3..30b27616cd982 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/Local.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/Local.h @@ -15,13 +15,13 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H #define LLVM_TRANSFORMS_UTILS_LOCAL_H +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" -#include "llvm/ADT/SmallPtrSet.h" namespace llvm { @@ -356,6 +356,10 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef Kn /// Unknown metadata is removed. void combineMetadataForCSE(Instruction *K, const Instruction *J); +// Replace each use of 'From' with 'To', if that use does not belong to basic +// block where 'From' is defined. Returns the number of replacements made. +unsigned replaceNonLocalUsesWith(Instruction *From, Value *To); + /// Replace each use of 'From' with 'To' if that use is dominated by /// the given edge. Returns the number of replacements made. unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, @@ -376,6 +380,19 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, /// during lowering by the GC infrastructure. bool callsGCLeafFunction(ImmutableCallSite CS); +/// Copy a nonnull metadata node to a new load instruction. +/// +/// This handles mapping it to range metadata if the new load is an integer +/// load instead of a pointer load. +void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI); + +/// Copy a range metadata node to a new load instruction. +/// +/// This handles mapping it to nonnull metadata if the new load is a pointer +/// load instead of an integer load and the range doesn't cover null. +void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, + LoadInst &NewLI); + //===----------------------------------------------------------------------===// // Intrinsic pattern matching // @@ -406,6 +423,14 @@ bool recognizeBSwapOrBitReverseIdiom( void maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI, const TargetLibraryInfo *TLI); +//===----------------------------------------------------------------------===// +// Transform predicates +// + +/// Given an instruction, is it legal to set operand OpIdx to a non-constant +/// value? +bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx); + } // End llvm namespace #endif diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopUtils.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopUtils.h index 561f948806240..94e20b83754e7 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopUtils.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopUtils.h @@ -184,9 +184,14 @@ class RecurrenceDescriptor { /// Returns true if Phi is a first-order recurrence. A first-order recurrence /// is a non-reduction recurrence relation in which the value of the /// recurrence in the current loop iteration equals a value defined in the - /// previous iteration. - static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, - DominatorTree *DT); + /// previous iteration. \p SinkAfter includes pairs of instructions where the + /// first will be rescheduled to appear after the second if/when the loop is + /// vectorized. It may be augmented with additional pairs if needed in order + /// to handle Phi as a first-order recurrence. + static bool + isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DenseMap &SinkAfter, + DominatorTree *DT); RecurrenceKind getRecurrenceKind() { return Kind; } @@ -362,6 +367,14 @@ class InductionDescriptor { BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA); +/// Ensure that all exit blocks of the loop are dedicated exits. +/// +/// For any loop exit block with non-loop predecessors, we split the loop +/// predecessors to use a dedicated loop exit block. We update the dominator +/// tree and loop info if provided, and will preserve LCSSA if requested. +bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA); + /// Ensures LCSSA form for every instruction from the Worklist in the scope of /// innermost containing loop. /// @@ -518,8 +531,10 @@ Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). +/// If OpValue is non-null, we only consider operations similar to OpValue +/// when intersecting. /// Flag set: NSW, NUW, exact, and all of fast-math. -void propagateIRFlags(Value *I, ArrayRef VL); +void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr); } // end namespace llvm diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopVersioning.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopVersioning.h index 0d345a972e103..fa5d7845d0808 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/LoopVersioning.h @@ -18,8 +18,8 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/LowerMemIntrinsics.h index e4906b709e4bf..4554b5cbc6440 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -17,21 +17,39 @@ namespace llvm { +class ConstantInt; class Instruction; class MemCpyInst; class MemMoveInst; class MemSetInst; +class TargetTransformInfo; class Value; /// Emit a loop implementing the semantics of llvm.memcpy with the equivalent /// arguments at \p InsertBefore. -void createMemCpyLoop(Instruction *InsertBefore, - Value *SrcAddr, Value *DstAddr, Value *CopyLen, - unsigned SrcAlign, unsigned DestAlign, +void createMemCpyLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, + Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile); +/// Emit a loop implementing the semantics of llvm.memcpy where the size is not +/// a compile-time constant. Loop will be insterted at \p InsertBefore. +void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI); + +/// Emit a loop implementing the semantics of an llvm.memcpy whose size is a +/// compile time constant. Loop is inserted at \p InsertBefore. +void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, ConstantInt *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI); + + /// Expand \p MemCpy as a loop. \p MemCpy is not deleted. -void expandMemCpyAsLoop(MemCpyInst *MemCpy); +void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI); /// Expand \p MemMove as a loop. \p MemMove is not deleted. void expandMemMoveAsLoop(MemMoveInst *MemMove); diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/Mem2Reg.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/Mem2Reg.h index 456876b520b0f..1fe186d6c3ad9 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/Mem2Reg.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/Mem2Reg.h @@ -25,4 +25,4 @@ class PromotePass : public PassInfoMixin { }; } -#endif // LLVM_TRANSFORMS_UTILS_MEM2REG_H \ No newline at end of file +#endif // LLVM_TRANSFORMS_UTILS_MEM2REG_H diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/OrderedInstructions.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/OrderedInstructions.h new file mode 100644 index 0000000000000..165d4bdaa6d41 --- /dev/null +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/OrderedInstructions.h @@ -0,0 +1,54 @@ +//===- llvm/Transforms/Utils/OrderedInstructions.h -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an efficient way to check for dominance relation between 2 +// instructions. +// +// This interface dispatches to appropriate dominance check given 2 +// instructions, i.e. in case the instructions are in the same basic block, +// OrderedBasicBlock (with instruction numbering and caching) are used. +// Otherwise, dominator tree is used. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H +#define LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Operator.h" + +namespace llvm { + +class OrderedInstructions { + /// Used to check dominance for instructions in same basic block. + mutable DenseMap> + OBBMap; + + /// The dominator tree of the parent function. + DominatorTree *DT; + +public: + /// Constructor. + OrderedInstructions(DominatorTree *DT) : DT(DT) {} + + /// Return true if first instruction dominates the second. + bool dominates(const Instruction *, const Instruction *) const; + + /// Invalidate the OrderedBasicBlock cache when its basic block changes. + /// i.e. If an instruction is deleted or added to the basic block, the user + /// should call this function to invalidate the OrderedBasicBlock cache for + /// this basic block. + void invalidateBlock(const BasicBlock *BB) { OBBMap.erase(BB); } +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/PredicateInfo.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/PredicateInfo.h index 1322c686eb900..8150f1528397e 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/PredicateInfo.h @@ -74,6 +74,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/OrderedInstructions.h" #include #include #include @@ -89,7 +90,6 @@ class Instruction; class MemoryAccess; class LLVMContext; class raw_ostream; -class OrderedBasicBlock; enum PredicateType { PT_Branch, PT_Assume, PT_Switch }; @@ -114,8 +114,9 @@ class PredicateBase : public ilist_node { class PredicateWithCondition : public PredicateBase { public: Value *Condition; - static inline bool classof(const PredicateBase *PB) { - return PB->Type == PT_Assume || PB->Type == PT_Branch || PB->Type == PT_Switch; + static bool classof(const PredicateBase *PB) { + return PB->Type == PT_Assume || PB->Type == PT_Branch || + PB->Type == PT_Switch; } protected: @@ -133,7 +134,7 @@ class PredicateAssume : public PredicateWithCondition { : PredicateWithCondition(PT_Assume, Op, Condition), AssumeInst(AssumeInst) {} PredicateAssume() = delete; - static inline bool classof(const PredicateBase *PB) { + static bool classof(const PredicateBase *PB) { return PB->Type == PT_Assume; } }; @@ -146,7 +147,7 @@ class PredicateWithEdge : public PredicateWithCondition { BasicBlock *From; BasicBlock *To; PredicateWithEdge() = delete; - static inline bool classof(const PredicateBase *PB) { + static bool classof(const PredicateBase *PB) { return PB->Type == PT_Branch || PB->Type == PT_Switch; } @@ -166,7 +167,7 @@ class PredicateBranch : public PredicateWithEdge { : PredicateWithEdge(PT_Branch, Op, BranchBB, SplitBB, Condition), TrueEdge(TakenEdge) {} PredicateBranch() = delete; - static inline bool classof(const PredicateBase *PB) { + static bool classof(const PredicateBase *PB) { return PB->Type == PT_Branch; } }; @@ -182,7 +183,7 @@ class PredicateSwitch : public PredicateWithEdge { SI->getCondition()), CaseValue(CaseValue), Switch(SI) {} PredicateSwitch() = delete; - static inline bool classof(const PredicateBase *PB) { + static bool classof(const PredicateBase *PB) { return PB->Type == PT_Switch; } }; @@ -244,6 +245,7 @@ class PredicateInfo { Function &F; DominatorTree &DT; AssumptionCache &AC; + OrderedInstructions OI; // This maps from copy operands to Predicate Info. Note that it does not own // the Predicate Info, they belong to the ValueInfo structs in the ValueInfos // vector. @@ -256,8 +258,6 @@ class PredicateInfo { // 0 is not a valid Value Info index, you can use DenseMap::lookup and tell // whether it returned a valid result. DenseMap ValueInfoNums; - // OrderedBasicBlocks used during sorting uses - DenseMap> OBBMap; // The set of edges along which we can only handle phi uses, due to critical // edges. DenseSet> EdgeUsesOnly; diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index b0448fed9f4d9..2dd205d8b2af2 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -22,10 +22,10 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" -namespace llvm { - #define DEBUG_TYPE "ssaupdater" +namespace llvm { + class CastInst; class PHINode; template class SSAUpdaterTraits; @@ -453,8 +453,8 @@ class SSAUpdaterImpl { } }; -#undef DEBUG_TYPE // "ssaupdater" +} // end llvm namespace -} // End llvm namespace +#undef DEBUG_TYPE // "ssaupdater" -#endif +#endif // LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 665dd6f4b2579..6aba9b2298b10 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -121,6 +121,7 @@ class LibCallSimplifier { Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); + Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B); // Wrapper for all String/Memory Library Call Optimizations Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); @@ -165,6 +166,9 @@ class LibCallSimplifier { /// hasFloatVersion - Checks if there is a float version of the specified /// function by checking for an existing function with name FuncName + f bool hasFloatVersion(StringRef FuncName); + + /// Shared code to optimize strlen+wcslen. + Value *optimizeStringLength(CallInst *CI, IRBuilder<> &B, unsigned CharSize); }; } // End llvm namespace diff --git a/interpreter/llvm/src/include/llvm/Transforms/Utils/ValueMapper.h b/interpreter/llvm/src/include/llvm/Transforms/Utils/ValueMapper.h index e44dc437342d7..45ef8246dcd16 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Utils/ValueMapper.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Utils/ValueMapper.h @@ -16,8 +16,8 @@ #define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/IR/ValueMap.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" namespace llvm { @@ -116,7 +116,7 @@ static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) { /// - \a scheduleMapGlobalAliasee() /// - \a scheduleRemapFunction() /// -/// Sometimes a callback needs a diferent mapping context. Such a context can +/// Sometimes a callback needs a different mapping context. Such a context can /// be registered using \a registerAlternateMappingContext(), which takes an /// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to /// pass into the schedule*() functions. diff --git a/interpreter/llvm/src/include/llvm/Transforms/Vectorize.h b/interpreter/llvm/src/include/llvm/Transforms/Vectorize.h index f734e299c6e9e..19845e471e487 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Vectorize.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Vectorize.h @@ -106,13 +106,6 @@ struct VectorizeConfig { VectorizeConfig(); }; -//===----------------------------------------------------------------------===// -// -// BBVectorize - A basic-block vectorization pass. -// -BasicBlockPass * -createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig()); - //===----------------------------------------------------------------------===// // // LoopVectorize - Create a loop vectorization pass. diff --git a/interpreter/llvm/src/include/llvm/Transforms/Vectorize/LoopVectorize.h b/interpreter/llvm/src/include/llvm/Transforms/Vectorize/LoopVectorize.h index 73d1f264c37b5..57d10c4c74734 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -87,8 +87,6 @@ struct LoopVectorizePass : public PassInfoMixin { std::function *GetLAA; OptimizationRemarkEmitter *ORE; - BlockFrequency ColdEntryFreq; - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Shim for old PM. diff --git a/interpreter/llvm/src/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/interpreter/llvm/src/include/llvm/Transforms/Vectorize/SLPVectorizer.h index c514db41623cb..6f258191e89eb 100644 --- a/interpreter/llvm/src/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/interpreter/llvm/src/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -84,7 +84,7 @@ struct SLPVectorizerPass : public PassInfoMixin { ArrayRef BuildVector = None, bool AllowReorder = false); - /// \brief Try to vectorize a chain that may start at the operands of \V; + /// \brief Try to vectorize a chain that may start at the operands of \p V. bool tryToVectorize(BinaryOperator *V, slpvectorizer::BoUpSLP &R); /// \brief Vectorize the store instructions collected in Stores. diff --git a/interpreter/llvm/src/include/llvm/module.modulemap b/interpreter/llvm/src/include/llvm/module.modulemap index 5e15e8d498026..766198bbc5de9 100644 --- a/interpreter/llvm/src/include/llvm/module.modulemap +++ b/interpreter/llvm/src/include/llvm/module.modulemap @@ -23,6 +23,7 @@ module LLVM_Backend { exclude header "CodeGen/CommandFlags.h" exclude header "CodeGen/LinkAllAsmWriterComponents.h" exclude header "CodeGen/LinkAllCodegenComponents.h" + exclude header "CodeGen/GlobalISel/InstructionSelectorImpl.h" // These are intended for (repeated) textual inclusion. textual header "CodeGen/DIEValue.def" @@ -38,6 +39,31 @@ module LLVM_Backend { } module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } } + +module LLVM_BinaryFormat { + requires cplusplus + umbrella "BinaryFormat" module * { export * } + textual header "BinaryFormat/Dwarf.def" + textual header "BinaryFormat/MachO.def" + textual header "BinaryFormat/ELFRelocs/AArch64.def" + textual header "BinaryFormat/ELFRelocs/AMDGPU.def" + textual header "BinaryFormat/ELFRelocs/ARM.def" + textual header "BinaryFormat/ELFRelocs/AVR.def" + textual header "BinaryFormat/ELFRelocs/BPF.def" + textual header "BinaryFormat/ELFRelocs/Hexagon.def" + textual header "BinaryFormat/ELFRelocs/i386.def" + textual header "BinaryFormat/ELFRelocs/Lanai.def" + textual header "BinaryFormat/ELFRelocs/Mips.def" + textual header "BinaryFormat/ELFRelocs/PowerPC64.def" + textual header "BinaryFormat/ELFRelocs/PowerPC.def" + textual header "BinaryFormat/ELFRelocs/RISCV.def" + textual header "BinaryFormat/ELFRelocs/Sparc.def" + textual header "BinaryFormat/ELFRelocs/SystemZ.def" + textual header "BinaryFormat/ELFRelocs/x86_64.def" + textual header "BinaryFormat/ELFRelocs/WebAssembly.def" + textual header "BinaryFormat/WasmRelocs/WebAssembly.def" +} + module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } } module LLVM_DebugInfo { @@ -95,8 +121,8 @@ module LLVM_DebugInfo_CodeView { module * { export * } // These are intended for (repeated) textual inclusion. - textual header "DebugInfo/CodeView/TypeRecords.def" - textual header "DebugInfo/CodeView/CVSymbolTypes.def" + textual header "DebugInfo/CodeView/CodeViewTypes.def" + textual header "DebugInfo/CodeView/CodeViewSymbols.def" } module LLVM_ExecutionEngine { @@ -250,34 +276,13 @@ module LLVM_Utils { umbrella "Support" module * { export * } - // Exclude this; it's only included on Solaris. - exclude header "Support/Solaris.h" - - // Exclude this; it's fundamentally non-modular. + // Exclude these; they are fundamentally non-modular. exclude header "Support/PluginLoader.h" + exclude header "Support/Solaris/sys/regset.h" // These are intended for textual inclusion. textual header "Support/ARMTargetParser.def" textual header "Support/AArch64TargetParser.def" - textual header "Support/Dwarf.def" - textual header "Support/MachO.def" - textual header "Support/ELFRelocs/AArch64.def" - textual header "Support/ELFRelocs/AMDGPU.def" - textual header "Support/ELFRelocs/ARM.def" - textual header "Support/ELFRelocs/AVR.def" - textual header "Support/ELFRelocs/BPF.def" - textual header "Support/ELFRelocs/Hexagon.def" - textual header "Support/ELFRelocs/i386.def" - textual header "Support/ELFRelocs/Lanai.def" - textual header "Support/ELFRelocs/Mips.def" - textual header "Support/ELFRelocs/PowerPC64.def" - textual header "Support/ELFRelocs/PowerPC.def" - textual header "Support/ELFRelocs/RISCV.def" - textual header "Support/ELFRelocs/Sparc.def" - textual header "Support/ELFRelocs/SystemZ.def" - textual header "Support/ELFRelocs/x86_64.def" - textual header "Support/ELFRelocs/WebAssembly.def" - textual header "Support/WasmRelocs/WebAssembly.def" } // This part of the module is usable from both C and C++ code. diff --git a/interpreter/llvm/src/lib/Analysis/AliasAnalysisEvaluator.cpp b/interpreter/llvm/src/lib/Analysis/AliasAnalysisEvaluator.cpp index 4d6a6c9a30aa9..435c782d97a57 100644 --- a/interpreter/llvm/src/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/interpreter/llvm/src/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -14,9 +14,9 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/Analysis/AliasSetTracker.cpp b/interpreter/llvm/src/lib/Analysis/AliasSetTracker.cpp index 16b711a69ec39..4dfa25490d00d 100644 --- a/interpreter/llvm/src/lib/Analysis/AliasSetTracker.cpp +++ b/interpreter/llvm/src/lib/Analysis/AliasSetTracker.cpp @@ -17,8 +17,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -218,8 +218,8 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, return false; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { - if (auto *Inst = getUnknownInst(i)) { - ImmutableCallSite C1(Inst), C2(Inst); + if (auto *UnknownInst = getUnknownInst(i)) { + ImmutableCallSite C1(UnknownInst), C2(Inst); if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || AA.getModRefInfo(C2, C1) != MRI_NoModRef) return true; diff --git a/interpreter/llvm/src/lib/Analysis/AssumptionCache.cpp b/interpreter/llvm/src/lib/Analysis/AssumptionCache.cpp index 0468c794e81dd..3ff27890dc385 100644 --- a/interpreter/llvm/src/lib/Analysis/AssumptionCache.cpp +++ b/interpreter/llvm/src/lib/Analysis/AssumptionCache.cpp @@ -84,18 +84,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) { Value *B; ConstantInt *C; // (A & B) or (A | B) or (A ^ B). - if (match(V, - m_CombineOr(m_And(m_Value(A), m_Value(B)), - m_CombineOr(m_Or(m_Value(A), m_Value(B)), - m_Xor(m_Value(A), m_Value(B)))))) { + if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) { AddAffected(A); AddAffected(B); // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant. - } else if (match(V, - m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)), - m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)), - m_AShr(m_Value(A), - m_ConstantInt(C)))))) { + } else if (match(V, m_Shift(m_Value(A), m_ConstantInt(C)))) { AddAffected(A); } }; diff --git a/interpreter/llvm/src/lib/Analysis/BasicAliasAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/BasicAliasAnalysis.cpp index 537823020301a..e682a644ef2c1 100644 --- a/interpreter/llvm/src/lib/Analysis/BasicAliasAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/BasicAliasAnalysis.cpp @@ -17,13 +17,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -36,6 +36,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include #define DEBUG_TYPE "basicaa" @@ -682,8 +683,11 @@ static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) { #ifndef NDEBUG static const Function *getParent(const Value *V) { - if (const Instruction *inst = dyn_cast(V)) + if (const Instruction *inst = dyn_cast(V)) { + if (!inst->getParent()) + return nullptr; return inst->getParent()->getParent(); + } if (const Argument *arg = dyn_cast(V)) return arg->getParent(); @@ -1002,15 +1006,32 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // Because they cannot partially overlap and because fields in an array // cannot overlap, if we can prove the final indices are different between // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias. - + // If the last indices are constants, we've already checked they don't // equal each other so we can exit early. if (C1 && C2) return NoAlias; - if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1), - GEP2->getOperand(GEP2->getNumOperands() - 1), - DL)) - return NoAlias; + { + Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1); + Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1); + if (isa(GEP1LastIdx) || isa(GEP2LastIdx)) { + // If one of the indices is a PHI node, be safe and only use + // computeKnownBits so we don't make any assumptions about the + // relationships between the two indices. This is important if we're + // asking about values from different loop iterations. See PR32314. + // TODO: We may be able to change the check so we only do this when + // we definitely looked through a PHINode. + if (GEP1LastIdx != GEP2LastIdx && + GEP1LastIdx->getType() == GEP2LastIdx->getType()) { + KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL); + KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL); + if (Known1.Zero.intersects(Known2.One) || + Known1.One.intersects(Known2.Zero)) + return NoAlias; + } + } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL)) + return NoAlias; + } return MayAlias; } else if (!LastIndexedStruct || !C1 || !C2) { return MayAlias; @@ -1283,9 +1304,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // give up if we can't determine conditions that hold for every cycle: const Value *V = DecompGEP1.VarIndices[i].V; - bool SignKnownZero, SignKnownOne; - ComputeSignBit(const_cast(V), SignKnownZero, SignKnownOne, DL, - 0, &AC, nullptr, DT); + KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT); + bool SignKnownZero = Known.isNonNegative(); + bool SignKnownOne = Known.isNegative(); // Zero-extension widens the variable, and so forces the sign // bit to zero. @@ -1327,11 +1348,7 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // Statically, we can see that the base objects are the same, but the // pointers have dynamic offsets which we can't resolve. And none of our // little tricks above worked. - // - // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the - // practical effect of this is protecting TBAA in the case of dynamic - // indices into arrays of unions or malloc'd memory. - return PartialAlias; + return MayAlias; } static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { diff --git a/interpreter/llvm/src/lib/Analysis/BranchProbabilityInfo.cpp b/interpreter/llvm/src/lib/Analysis/BranchProbabilityInfo.cpp index db87b17c1567b..a329e5ad48c94 100644 --- a/interpreter/llvm/src/lib/Analysis/BranchProbabilityInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/BranchProbabilityInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -30,6 +31,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -58,45 +60,12 @@ char BranchProbabilityInfoWrapperPass::ID = 0; static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; -/// \brief Unreachable-terminating branch taken weight. +/// \brief Unreachable-terminating branch taken probability. /// -/// This is the weight for a branch being taken to a block that terminates +/// This is the probability for a branch being taken to a block that terminates /// (eventually) in unreachable. These are predicted as unlikely as possible. -static const uint32_t UR_TAKEN_WEIGHT = 1; - -/// \brief Unreachable-terminating branch not-taken weight. -/// -/// This is the weight for a branch not being taken toward a block that -/// terminates (eventually) in unreachable. Such a branch is essentially never -/// taken. Set the weight to an absurdly high value so that nested loops don't -/// easily subsume it. -static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; - -/// \brief Returns the branch probability for unreachable edge according to -/// heuristic. -/// -/// This is the branch probability being taken to a block that terminates -/// (eventually) in unreachable. These are predicted as unlikely as possible. -static BranchProbability getUnreachableProbability(uint64_t UnreachableCount) { - assert(UnreachableCount > 0 && "UnreachableCount must be > 0"); - return BranchProbability::getBranchProbability( - UR_TAKEN_WEIGHT, - (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * UnreachableCount); -} - -/// \brief Returns the branch probability for reachable edge according to -/// heuristic. -/// -/// This is the branch probability not being taken toward a block that -/// terminates (eventually) in unreachable. Such a branch is essentially never -/// taken. Set the weight to an absurdly high value so that nested loops don't -/// easily subsume it. -static BranchProbability getReachableProbability(uint64_t ReachableCount) { - assert(ReachableCount > 0 && "ReachableCount must be > 0"); - return BranchProbability::getBranchProbability( - UR_NONTAKEN_WEIGHT, - (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * ReachableCount); -} +/// All reachable probability will equally share the remaining part. +static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); /// \brief Weight for a branch taken going into a cold block. /// @@ -232,8 +201,10 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { return true; } - auto UnreachableProb = getUnreachableProbability(UnreachableEdges.size()); - auto ReachableProb = getReachableProbability(ReachableEdges.size()); + auto UnreachableProb = UR_TAKEN_PROB; + auto ReachableProb = + (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) / + ReachableEdges.size(); for (unsigned SuccIdx : UnreachableEdges) setEdgeProbability(BB, SuccIdx, UnreachableProb); @@ -319,7 +290,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { // If the unreachable heuristic is more strong then we use it for this edge. if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) { auto ToDistribute = BranchProbability::getZero(); - auto UnreachableProb = getUnreachableProbability(UnreachableIdxs.size()); + auto UnreachableProb = UR_TAKEN_PROB; for (auto i : UnreachableIdxs) if (UnreachableProb < BP[i]) { ToDistribute += BP[i] - UnreachableProb; @@ -488,7 +459,8 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, return true; } -bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { +bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, + const TargetLibraryInfo *TLI) { const BranchInst *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -511,8 +483,37 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { if (AndRHS->getUniqueInteger().isPowerOf2()) return false; + // Check if the LHS is the return value of a library function + LibFunc Func = NumLibFuncs; + if (TLI) + if (CallInst *Call = dyn_cast(CI->getOperand(0))) + if (Function *CalledFn = Call->getCalledFunction()) + TLI->getLibFunc(*CalledFn, Func); + bool isProb; - if (CV->isZero()) { + if (Func == LibFunc_strcasecmp || + Func == LibFunc_strcmp || + Func == LibFunc_strncasecmp || + Func == LibFunc_strncmp || + Func == LibFunc_memcmp) { + // strcmp and similar functions return zero, negative, or positive, if the + // first string is equal, less, or greater than the second. We consider it + // likely that the strings are not equal, so a comparison with zero is + // probably false, but also a comparison with any other number is also + // probably false given that what exactly is returned for nonzero values is + // not specified. Any kind of comparison other than equality we know + // nothing about. + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + isProb = false; + break; + case CmpInst::ICMP_NE: + isProb = true; + break; + default: + return false; + } + } else if (CV->isZero()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == 0 -> Unlikely @@ -537,7 +538,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; - } else if (CV->isAllOnesValue()) { + } else if (CV->isMinusOne()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == -1 -> Unlikely @@ -738,7 +739,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { } } -void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { +void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. @@ -764,7 +766,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { continue; if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(BB)) + if (calcZeroHeuristics(BB, TLI)) continue; if (calcFloatingPointHeuristics(BB)) continue; @@ -778,12 +780,14 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); - BPI.calculate(F, LI); + const TargetLibraryInfo &TLI = getAnalysis().getTLI(); + BPI.calculate(F, LI, &TLI); return false; } @@ -798,7 +802,7 @@ AnalysisKey BranchProbabilityAnalysis::Key; BranchProbabilityInfo BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; - BPI.calculate(F, AM.getResult(F)); + BPI.calculate(F, AM.getResult(F), &AM.getResult(F)); return BPI; } diff --git a/interpreter/llvm/src/lib/Analysis/CFLAndersAliasAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/CFLAndersAliasAnalysis.cpp index ddd5123d0eff7..0de7ad98af467 100644 --- a/interpreter/llvm/src/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -68,17 +68,6 @@ CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {} CFLAndersAAResult::~CFLAndersAAResult() {} -static const Function *parentFunctionOfValue(const Value *Val) { - if (auto *Inst = dyn_cast(Val)) { - auto *Bb = Inst->getParent(); - return Bb->getParent(); - } - - if (auto *Arg = dyn_cast(Val)) - return Arg->getParent(); - return nullptr; -} - namespace { enum class MatchState : uint8_t { @@ -789,10 +778,10 @@ void CFLAndersAAResult::scan(const Function &Fn) { // resize and invalidating the reference returned by operator[] auto FunInfo = buildInfoFrom(Fn); Cache[&Fn] = std::move(FunInfo); - Handles.push_front(FunctionHandle(const_cast(&Fn), this)); + Handles.emplace_front(const_cast(&Fn), this); } -void CFLAndersAAResult::evict(const Function &Fn) { Cache.erase(&Fn); } +void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); } const Optional & CFLAndersAAResult::ensureCached(const Function &Fn) { diff --git a/interpreter/llvm/src/lib/Analysis/CFLGraph.h b/interpreter/llvm/src/lib/Analysis/CFLGraph.h index a8fb12b725686..95874b88244b1 100644 --- a/interpreter/llvm/src/lib/Analysis/CFLGraph.h +++ b/interpreter/llvm/src/lib/Analysis/CFLGraph.h @@ -16,7 +16,6 @@ #define LLVM_ANALYSIS_CFLGRAPH_H #include "AliasAnalysisSummary.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -210,6 +209,11 @@ template class CFLGraphBuilder { void addDerefEdge(Value *From, Value *To, bool IsRead) { assert(From != nullptr && To != nullptr); + // FIXME: This is subtly broken, due to how we model some instructions + // (e.g. extractvalue, extractelement) as loads. Since those take + // non-pointer operands, we'll entirely skip adding edges for those. + // + // addAssignEdge seems to have a similar issue with insertvalue, etc. if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) return; addNode(From); @@ -540,6 +544,7 @@ template class CFLGraphBuilder { case Instruction::ExtractValue: { auto *Ptr = CE->getOperand(0); addLoadEdge(Ptr, CE); + break; } case Instruction::ShuffleVector: { auto *From1 = CE->getOperand(0); diff --git a/interpreter/llvm/src/lib/Analysis/CFLSteensAliasAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/CFLSteensAliasAnalysis.cpp index dde24ef5fdd57..adbdd82012a33 100644 --- a/interpreter/llvm/src/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -80,9 +80,6 @@ class CFLSteensAAResult::FunctionInfo { const AliasSummary &getAliasSummary() const { return Summary; } }; -/// Try to go from a Value* to a Function*. Never returns nullptr. -static Optional parentFunctionOfValue(Value *); - const StratifiedIndex StratifiedLink::SetSentinel = std::numeric_limits::max(); @@ -91,19 +88,6 @@ const StratifiedIndex StratifiedLink::SetSentinel = //===----------------------------------------------------------------------===// /// Determines whether it would be pointless to add the given Value to our sets. -static bool canSkipAddingToSets(Value *Val); - -static Optional parentFunctionOfValue(Value *Val) { - if (auto *Inst = dyn_cast(Val)) { - auto *Bb = Inst->getParent(); - return Bb->getParent(); - } - - if (auto *Arg = dyn_cast(Val)) - return Arg->getParent(); - return None; -} - static bool canSkipAddingToSets(Value *Val) { // Constants can share instances, which may falsely unify multiple // sets, e.g. in @@ -248,7 +232,7 @@ void CFLSteensAAResult::scan(Function *Fn) { auto FunInfo = buildSetsFrom(Fn); Cache[Fn] = std::move(FunInfo); - Handles.push_front(FunctionHandle(Fn, this)); + Handles.emplace_front(Fn, this); } void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); } @@ -284,9 +268,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, return NoAlias; Function *Fn = nullptr; - auto MaybeFnA = parentFunctionOfValue(ValA); - auto MaybeFnB = parentFunctionOfValue(ValB); - if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { + Function *MaybeFnA = const_cast(parentFunctionOfValue(ValA)); + Function *MaybeFnB = const_cast(parentFunctionOfValue(ValB)); + if (!MaybeFnA && !MaybeFnB) { // The only times this is known to happen are when globals + InlineAsm are // involved DEBUG(dbgs() @@ -294,12 +278,12 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, return MayAlias; } - if (MaybeFnA.hasValue()) { - Fn = *MaybeFnA; - assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) && + if (MaybeFnA) { + Fn = MaybeFnA; + assert((!MaybeFnB || MaybeFnB == MaybeFnA) && "Interprocedural queries not supported"); } else { - Fn = *MaybeFnB; + Fn = MaybeFnB; } assert(Fn != nullptr); diff --git a/interpreter/llvm/src/lib/Analysis/CGSCCPassManager.cpp b/interpreter/llvm/src/lib/Analysis/CGSCCPassManager.cpp index 9d4521221f477..74b5d79ebac56 100644 --- a/interpreter/llvm/src/lib/Analysis/CGSCCPassManager.cpp +++ b/interpreter/llvm/src/lib/Analysis/CGSCCPassManager.cpp @@ -196,18 +196,117 @@ FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C, bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( LazyCallGraph::SCC &C, const PreservedAnalyses &PA, CGSCCAnalysisManager::Invalidator &Inv) { - for (LazyCallGraph::Node &N : C) - FAM->invalidate(N.getFunction(), PA); + // If literally everything is preserved, we're done. + if (PA.areAllPreserved()) + return false; // This is still a valid proxy. + + // If this proxy isn't marked as preserved, then even if the result remains + // valid, the key itself may no longer be valid, so we clear everything. + // + // Note that in order to preserve this proxy, a module pass must ensure that + // the FAM has been completely updated to handle the deletion of functions. + // Specifically, any FAM-cached results for those functions need to have been + // forcibly cleared. When preserved, this proxy will only invalidate results + // cached on functions *still in the module* at the end of the module pass. + auto PAC = PA.getChecker(); + if (!PAC.preserved() && !PAC.preservedSet>()) { + for (LazyCallGraph::Node &N : C) + FAM->clear(N.getFunction()); + + return true; + } + + // Directly check if the relevant set is preserved. + bool AreFunctionAnalysesPreserved = + PA.allAnalysesInSetPreserved>(); + + // Now walk all the functions to see if any inner analysis invalidation is + // necessary. + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + Optional FunctionPA; + + // Check to see whether the preserved set needs to be pruned based on + // SCC-level analysis invalidation that triggers deferred invalidation + // registered with the outer analysis manager proxy for this function. + if (auto *OuterProxy = + FAM->getCachedResult(F)) + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first; + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + if (Inv.invalidate(OuterAnalysisID, C, PA)) { + if (!FunctionPA) + FunctionPA = PA; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + FunctionPA->abandon(InnerAnalysisID); + } + } + + // Check if we needed a custom PA set, and if so we'll need to run the + // inner invalidation. + if (FunctionPA) { + FAM->invalidate(F, *FunctionPA); + continue; + } - // This proxy doesn't need to handle invalidation itself. Instead, the - // module-level CGSCC proxy handles it above by ensuring that if the - // module-level FAM proxy becomes invalid the entire SCC layer, which - // includes this proxy, is cleared. + // Otherwise we only need to do invalidation if the original PA set didn't + // preserve all function analyses. + if (!AreFunctionAnalysesPreserved) + FAM->invalidate(F, PA); + } + + // Return false to indicate that this result is still a valid proxy. return false; } } // End llvm namespace +/// When a new SCC is created for the graph and there might be function +/// analysis results cached for the functions now in that SCC two forms of +/// updates are required. +/// +/// First, a proxy from the SCC to the FunctionAnalysisManager needs to be +/// created so that any subsequent invalidation events to the SCC are +/// propagated to the function analysis results cached for functions within it. +/// +/// Second, if any of the functions within the SCC have analysis results with +/// outer analysis dependencies, then those dependencies would point to the +/// *wrong* SCC's analysis result. We forcibly invalidate the necessary +/// function analyses so that they don't retain stale handles. +static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C, + LazyCallGraph &G, + CGSCCAnalysisManager &AM) { + // Get the relevant function analysis manager. + auto &FAM = + AM.getResult(C, G).getManager(); + + // Now walk the functions in this SCC and invalidate any function analysis + // results that might have outer dependencies on an SCC analysis. + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + + auto *OuterProxy = + FAM.getCachedResult(F); + if (!OuterProxy) + // No outer analyses were queried, nothing to do. + continue; + + // Forcibly abandon all the inner analyses with dependencies, but + // invalidate nothing else. + auto PA = PreservedAnalyses::all(); + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + PA.abandon(InnerAnalysisID); + } + + // Now invalidate anything we found. + FAM.invalidate(F, PA); + } +} + namespace { /// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c /// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly @@ -236,7 +335,6 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"; SCC *OldC = C; - (void)OldC; // Update the current SCC. Note that if we have new SCCs, this must actually // change the SCC. @@ -245,6 +343,26 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, C = &*NewSCCRange.begin(); assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + // If we had a cached FAM proxy originally, we will want to create more of + // them for each SCC that was split off. + bool NeedFAMProxy = + AM.getCachedResult(*OldC) != nullptr; + + // We need to propagate an invalidation call to all but the newly current SCC + // because the outer pass manager won't do that for us after splitting them. + // FIXME: We should accept a PreservedAnalysis from the CG updater so that if + // there are preserved ananalyses we can avoid invalidating them here for + // split-off SCCs. + // We know however that this will preserve any FAM proxy so go ahead and mark + // that. + PreservedAnalyses PA; + PA.preserve(); + AM.invalidate(*OldC, PA); + + // Ensure the now-current SCC's function analyses are updated. + if (NeedFAMProxy) + updateNewSCCFunctionAnalyses(*C, G, AM); + for (SCC &NewC : reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) { assert(C != &NewC && "No need to re-visit the current SCC!"); @@ -252,6 +370,14 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, UR.CWorklist.insert(&NewC); if (DebugLogging) dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"; + + // Ensure new SCCs' function analyses are updated. + if (NeedFAMProxy) + updateNewSCCFunctionAnalyses(NewC, G, AM); + + // Also propagate a normal invalidation to the new SCC as only the current + // will get one from the pass manager infrastructure. + AM.invalidate(NewC, PA); } return C; } @@ -307,7 +433,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( if (Visited.insert(C).second) Worklist.push_back(C); - LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) { + auto VisitRef = [&](Function &Referee) { Node &RefereeN = *G.lookup(Referee); Edge *E = N->lookup(RefereeN); // FIXME: Similarly to new calls, we also currently preclude @@ -318,7 +444,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( RetainedEdges.insert(&RefereeN); if (E->isCall()) DemotedCallTargets.insert(&RefereeN); - }); + }; + LazyCallGraph::visitReferences(Worklist, Visited, VisitRef); + + // Include synthetic reference edges to known, defined lib functions. + for (auto *F : G.getLibFunctions()) + VisitRef(*F); // First remove all of the edges that are no longer present in this function. // We have to build a list of dead targets first and then remove them as the @@ -349,14 +480,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // For separate SCCs this is trivial. RC->switchTrivialInternalEdgeToRef(N, TargetN); } else { - // Otherwise we may end up re-structuring the call graph. First, - // invalidate any SCC analyses. We have to do this before we split - // functions into new SCCs and lose track of where their analyses are - // cached. - // FIXME: We should accept a more precise preserved set here. For - // example, it might be possible to preserve some function analyses - // even as the SCC structure is changed. - AM.invalidate(*C, PreservedAnalyses::none()); // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, C, AM, UR, DebugLogging); @@ -424,13 +547,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( continue; } - // Otherwise we may end up re-structuring the call graph. First, invalidate - // any SCC analyses. We have to do this before we split functions into new - // SCCs and lose track of where their analyses are cached. - // FIXME: We should accept a more precise preserved set here. For example, - // it might be possible to preserve some function analyses even as the SCC - // structure is changed. - AM.invalidate(*C, PreservedAnalyses::none()); // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N, C, AM, UR, DebugLogging); @@ -459,25 +575,48 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // Otherwise we are switching an internal ref edge to a call edge. This // may merge away some SCCs, and we add those to the UpdateResult. We also // need to make sure to update the worklist in the event SCCs have moved - // before the current one in the post-order sequence. + // before the current one in the post-order sequence + bool HasFunctionAnalysisProxy = false; auto InitialSCCIndex = RC->find(*C) - RC->begin(); - auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, *CallTarget); - if (!InvalidatedSCCs.empty()) { + bool FormedCycle = RC->switchInternalEdgeToCall( + N, *CallTarget, [&](ArrayRef MergedSCCs) { + for (SCC *MergedC : MergedSCCs) { + assert(MergedC != &TargetC && "Cannot merge away the target SCC!"); + + HasFunctionAnalysisProxy |= + AM.getCachedResult( + *MergedC) != nullptr; + + // Mark that this SCC will no longer be valid. + UR.InvalidatedSCCs.insert(MergedC); + + // FIXME: We should really do a 'clear' here to forcibly release + // memory, but we don't have a good way of doing that and + // preserving the function analyses. + auto PA = PreservedAnalyses::allInSet>(); + PA.preserve(); + AM.invalidate(*MergedC, PA); + } + }); + + // If we formed a cycle by creating this call, we need to update more data + // structures. + if (FormedCycle) { C = &TargetC; assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); - // Any analyses cached for this SCC are no longer precise as the shape - // has changed by introducing this cycle. - AM.invalidate(*C, PreservedAnalyses::none()); - - for (SCC *InvalidatedC : InvalidatedSCCs) { - assert(InvalidatedC != C && "Cannot invalidate the current SCC!"); - UR.InvalidatedSCCs.insert(InvalidatedC); + // If one of the invalidated SCCs had a cached proxy to a function + // analysis manager, we need to create a proxy in the new current SCC as + // the invaliadted SCCs had their functions moved. + if (HasFunctionAnalysisProxy) + AM.getResult(*C, G); - // Also clear any cached analyses for the SCCs that are dead. This - // isn't really necessary for correctness but can release memory. - AM.clear(*InvalidatedC); - } + // Any analyses cached for this SCC are no longer precise as the shape + // has changed by introducing this cycle. However, we have taken care to + // update the proxies so it remains valide. + auto PA = PreservedAnalyses::allInSet>(); + PA.preserve(); + AM.invalidate(*C, PA); } auto NewSCCIndex = RC->find(*C) - RC->begin(); if (InitialSCCIndex < NewSCCIndex) { diff --git a/interpreter/llvm/src/lib/Analysis/CallGraphSCCPass.cpp b/interpreter/llvm/src/lib/Analysis/CallGraphSCCPass.cpp index 8058e5b1935c1..facda246936dc 100644 --- a/interpreter/llvm/src/lib/Analysis/CallGraphSCCPass.cpp +++ b/interpreter/llvm/src/lib/Analysis/CallGraphSCCPass.cpp @@ -477,10 +477,8 @@ bool CGPassManager::runOnModule(Module &M) { if (DevirtualizedCall) DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration << " times, due to -max-cg-scc-iterations\n"); - - if (Iteration > MaxSCCIterations) - MaxSCCIterations = Iteration; - + + MaxSCCIterations.updateMax(Iteration); } Changed |= doFinalization(CG); return Changed; @@ -610,18 +608,18 @@ namespace { } bool runOnSCC(CallGraphSCC &SCC) override { + bool BannerPrinted = false; auto PrintBannerOnce = [&] () { - static bool BannerPrinted = false; if (BannerPrinted) return; Out << Banner; BannerPrinted = true; }; for (CallGraphNode *CGN : SCC) { - if (CGN->getFunction()) { - if (isFunctionInPrintList(CGN->getFunction()->getName())) { + if (Function *F = CGN->getFunction()) { + if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) { PrintBannerOnce(); - CGN->getFunction()->print(Out); + F->print(Out); } } else if (llvm::isFunctionInPrintList("*")) { PrintBannerOnce(); diff --git a/interpreter/llvm/src/lib/Analysis/CallPrinter.cpp b/interpreter/llvm/src/lib/Analysis/CallPrinter.cpp index af942e9ed3e97..e7017e77652af 100644 --- a/interpreter/llvm/src/lib/Analysis/CallPrinter.cpp +++ b/interpreter/llvm/src/lib/Analysis/CallPrinter.cpp @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Analysis/CaptureTracking.cpp b/interpreter/llvm/src/lib/Analysis/CaptureTracking.cpp index 9862c3c9c270a..3b0026ba10e90 100644 --- a/interpreter/llvm/src/lib/Analysis/CaptureTracking.cpp +++ b/interpreter/llvm/src/lib/Analysis/CaptureTracking.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -94,8 +94,8 @@ namespace { // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or // by its successors, i.e, prune if: // - // (1) BB is an entry block or have no sucessors. - // (2) There's no path coming back through BB sucessors. + // (1) BB is an entry block or have no successors. + // (2) There's no path coming back through BB successors. if (BB == &BB->getParent()->getEntryBlock() || !BB->getTerminator()->getNumSuccessors()) return true; diff --git a/interpreter/llvm/src/lib/Analysis/CodeMetrics.cpp b/interpreter/llvm/src/lib/Analysis/CodeMetrics.cpp index bdffdd8eb270b..e4d9292db92d7 100644 --- a/interpreter/llvm/src/lib/Analysis/CodeMetrics.cpp +++ b/interpreter/llvm/src/lib/Analysis/CodeMetrics.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/interpreter/llvm/src/lib/Analysis/ConstantFolding.cpp b/interpreter/llvm/src/lib/Analysis/ConstantFolding.cpp index 0ca712bbfe707..0f5ec3f5626ef 100644 --- a/interpreter/llvm/src/lib/Analysis/ConstantFolding.cpp +++ b/interpreter/llvm/src/lib/Analysis/ConstantFolding.cpp @@ -22,8 +22,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" @@ -687,11 +687,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, // bits. if (Opc == Instruction::And) { - unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType()); - KnownBits Known0(BitWidth); - KnownBits Known1(BitWidth); - computeKnownBits(Op0, Known0, DL); - computeKnownBits(Op1, Known1, DL); + KnownBits Known0 = computeKnownBits(Op0, DL); + KnownBits Known1 = computeKnownBits(Op1, DL); if ((Known1.One | Known0.Zero).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; @@ -1018,9 +1015,11 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, case Instruction::ICmp: case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: - if (auto *F = dyn_cast(Ops.back())) - if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); + if (auto *F = dyn_cast(Ops.back())) { + ImmutableCallSite CS(cast(InstOrCE)); + if (canConstantFoldCallTo(CS, F)) + return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI); + } return nullptr; case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); @@ -1173,7 +1172,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, const DataLayout &DL, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp 0, (ptrtoint x) -> icmp null, x // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y // @@ -1243,6 +1244,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); } + } else if (isa(Ops1)) { + // If RHS is a constant expression, but the left side isn't, swap the + // operands and try again. + Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate); + return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } return ConstantExpr::getCompare(Predicate, Ops0, Ops1); @@ -1352,7 +1358,9 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // Constant Folding for Calls // -bool llvm::canConstantFoldCallTo(const Function *F) { +bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { + if (CS.isNoBuiltin()) + return false; switch (F->getIntrinsicID()) { case Intrinsic::fabs: case Intrinsic::minnum: @@ -1580,6 +1588,9 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN if (IntrinsicID == Intrinsic::cos) return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::bswap || + IntrinsicID == Intrinsic::bitreverse) + return Operands[0]; } if (auto *Op = dyn_cast(Operands[0])) { if (IntrinsicID == Intrinsic::convert_to_fp16) { @@ -1742,6 +1753,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, if ((Name == "round" && TLI->has(LibFunc_round)) || (Name == "roundf" && TLI->has(LibFunc_roundf))) return ConstantFoldFP(round, V, Ty); + break; case 's': if ((Name == "sin" && TLI->has(LibFunc_sin)) || (Name == "sinf" && TLI->has(LibFunc_sinf))) @@ -1810,6 +1822,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/false, Ty); + break; case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: @@ -1818,16 +1831,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty); + break; } } - if (isa(Operands[0])) { - if (IntrinsicID == Intrinsic::bswap || - IntrinsicID == Intrinsic::bitreverse) - return Operands[0]; - return nullptr; - } - return nullptr; } @@ -2028,6 +2035,14 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { + // These intrinsics use a scalar type for their second argument. + if (J == 1 && + (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz || + IntrinsicID == Intrinsic::powi)) { + Lane[J] = Operands[J]; + continue; + } + Constant *Agg = Operands[J]->getAggregateElement(I); if (!Agg) return nullptr; @@ -2048,8 +2063,11 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, } // end anonymous namespace Constant * -llvm::ConstantFoldCall(Function *F, ArrayRef Operands, +llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, + ArrayRef Operands, const TargetLibraryInfo *TLI) { + if (CS.isNoBuiltin()) + return nullptr; if (!F->hasName()) return nullptr; StringRef Name = F->getName(); @@ -2066,6 +2084,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap // (and to some extent ConstantFoldScalarCall). + if (CS.isNoBuiltin()) + return false; Function *F = CS.getCalledFunction(); if (!F) return false; diff --git a/interpreter/llvm/src/lib/Analysis/DemandedBits.cpp b/interpreter/llvm/src/lib/Analysis/DemandedBits.cpp index c2b35b99d7c14..9c53f9140ca33 100644 --- a/interpreter/llvm/src/lib/Analysis/DemandedBits.cpp +++ b/interpreter/llvm/src/lib/Analysis/DemandedBits.cpp @@ -86,13 +86,11 @@ void DemandedBits::determineLiveOperandBits( [&](unsigned BitWidth, const Value *V1, const Value *V2) { const DataLayout &DL = I->getModule()->getDataLayout(); Known = KnownBits(BitWidth); - computeKnownBits(const_cast(V1), Known, DL, 0, - &AC, UserI, &DT); + computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT); if (V2) { Known2 = KnownBits(BitWidth); - computeKnownBits(const_cast(V2), Known2, DL, - 0, &AC, UserI, &DT); + computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT); } }; @@ -109,6 +107,8 @@ void DemandedBits::determineLiveOperandBits( AB = AOut.byteSwap(); break; case Intrinsic::bitreverse: + // The alive bits of the input are the reversed alive bits of + // the output. AB = AOut.reverseBits(); break; case Intrinsic::ctlz: @@ -143,9 +143,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::Shl: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.lshr(ShiftAmt); // If the shift is nuw/nsw, then the high bits are not dead @@ -159,9 +158,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::LShr: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.shl(ShiftAmt); // If the shift is exact, then the low bits are not dead @@ -172,9 +170,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::AShr: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.shl(ShiftAmt); // Because the high input bit is replicated into the // high-order bits of the result, if we need any of those diff --git a/interpreter/llvm/src/lib/Analysis/DependenceAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/DependenceAnalysis.cpp index a4672efeedd69..34eccc07f2655 100644 --- a/interpreter/llvm/src/lib/Analysis/DependenceAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/DependenceAnalysis.cpp @@ -2984,7 +2984,7 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, SmallVectorImpl &Constraints, bool &Consistent) { bool Result = false; - for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { + for (unsigned LI : Loops.set_bits()) { DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); DEBUG(Constraints[LI].dump(dbgs())); if (Constraints[LI].isDistance()) @@ -3266,7 +3266,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // For debugging purposes, dump a small bit vector to dbgs(). static void dumpSmallBitVector(SmallBitVector &BV) { dbgs() << "{"; - for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { + for (unsigned VI : BV.set_bits()) { dbgs() << VI; if (BV.find_next(VI) >= 0) dbgs() << ' '; @@ -3342,7 +3342,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && - (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); + (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) && + isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV); } unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector Pair(Pairs); @@ -3371,7 +3372,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (Delinearize && CommonLevels > 1) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinerized GEP\n"); + DEBUG(dbgs() << " delinearized GEP\n"); Pairs = Pair.size(); } } @@ -3506,7 +3507,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, NewConstraint.setAny(SE); // test separable subscripts - for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + for (unsigned SI : Separable.set_bits()) { DEBUG(dbgs() << "testing subscript " << SI); switch (Pair[SI].Classification) { case Subscript::ZIV: @@ -3545,14 +3546,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SmallVector Constraints(MaxLevels + 1); for (unsigned II = 0; II <= MaxLevels; ++II) Constraints[II].setAny(SE); - for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + for (unsigned SI : Coupled.set_bits()) { DEBUG(dbgs() << "testing subscript group " << SI << " { "); SmallBitVector Group(Pair[SI].Group); SmallBitVector Sivs(Pairs); SmallBitVector Mivs(Pairs); SmallBitVector ConstrainedLevels(MaxLevels + 1); SmallVector PairsInGroup; - for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + for (unsigned SJ : Group.set_bits()) { DEBUG(dbgs() << SJ << " "); if (Pair[SJ].Classification == Subscript::SIV) Sivs.set(SJ); @@ -3564,7 +3565,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << "}\n"); while (Sivs.any()) { bool Changed = false; - for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + for (unsigned SJ : Sivs.set_bits()) { DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); // SJ is an SIV subscript that's part of the current coupled group unsigned Level; @@ -3588,7 +3589,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << " propagating\n"); DEBUG(dbgs() << "\tMivs = "); DEBUG(dumpSmallBitVector(Mivs)); - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { // SJ is an MIV subscript that's part of the current coupled group DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, @@ -3622,7 +3623,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, } // test & propagate remaining RDIVs - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { if (Pair[SJ].Classification == Subscript::RDIV) { DEBUG(dbgs() << "RDIV test\n"); if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) @@ -3635,7 +3636,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // test remaining MIVs // This code is temporary. // Better to somehow test all remaining subscripts simultaneously. - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { if (Pair[SJ].Classification == Subscript::MIV) { DEBUG(dbgs() << "MIV test\n"); if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) @@ -3647,9 +3648,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // update Result.DV from constraint vector DEBUG(dbgs() << " updating\n"); - for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; - SJ = ConstrainedLevels.find_next(SJ)) { - if (SJ > (int)CommonLevels) + for (unsigned SJ : ConstrainedLevels.set_bits()) { + if (SJ > CommonLevels) break; updateDirection(Result.DV[SJ - 1], Constraints[SJ]); if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) @@ -3797,7 +3797,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, if (Delinearize && CommonLevels > 1) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinerized GEP\n"); + DEBUG(dbgs() << " delinearized GEP\n"); Pairs = Pair.size(); } } @@ -3859,7 +3859,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, NewConstraint.setAny(SE); // test separable subscripts - for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + for (unsigned SI : Separable.set_bits()) { switch (Pair[SI].Classification) { case Subscript::SIV: { unsigned Level; @@ -3886,12 +3886,12 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, SmallVector Constraints(MaxLevels + 1); for (unsigned II = 0; II <= MaxLevels; ++II) Constraints[II].setAny(SE); - for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + for (unsigned SI : Coupled.set_bits()) { SmallBitVector Group(Pair[SI].Group); SmallBitVector Sivs(Pairs); SmallBitVector Mivs(Pairs); SmallBitVector ConstrainedLevels(MaxLevels + 1); - for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + for (unsigned SJ : Group.set_bits()) { if (Pair[SJ].Classification == Subscript::SIV) Sivs.set(SJ); else @@ -3899,7 +3899,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, } while (Sivs.any()) { bool Changed = false; - for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + for (unsigned SJ : Sivs.set_bits()) { // SJ is an SIV subscript that's part of the current coupled group unsigned Level; const SCEV *SplitIter = nullptr; @@ -3914,7 +3914,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, } if (Changed) { // propagate, possibly creating new SIVs and ZIVs - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { // SJ is an MIV subscript that's part of the current coupled group if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Constraints, Result.Consistent)) { diff --git a/interpreter/llvm/src/lib/Analysis/DivergenceAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/DivergenceAnalysis.cpp index 1b36569f7a07c..2d39a0b021500 100644 --- a/interpreter/llvm/src/lib/Analysis/DivergenceAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/DivergenceAnalysis.cpp @@ -241,7 +241,7 @@ void DivergencePropagator::exploreDataDependency(Value *V) { // Follow def-use chains of V. for (User *U : V->users()) { Instruction *UserInst = cast(U); - if (DV.insert(UserInst).second) + if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second) Worklist.push_back(UserInst); } } diff --git a/interpreter/llvm/src/lib/Analysis/DominanceFrontier.cpp b/interpreter/llvm/src/lib/Analysis/DominanceFrontier.cpp index 5b6e2d0476e45..c08c6cfe0c3bc 100644 --- a/interpreter/llvm/src/lib/Analysis/DominanceFrontier.cpp +++ b/interpreter/llvm/src/lib/Analysis/DominanceFrontier.cpp @@ -14,7 +14,8 @@ using namespace llvm; namespace llvm { -template class DominanceFrontierBase; +template class DominanceFrontierBase; +template class DominanceFrontierBase; template class ForwardDominanceFrontierBase; } diff --git a/interpreter/llvm/src/lib/Analysis/EHPersonalities.cpp b/interpreter/llvm/src/lib/Analysis/EHPersonalities.cpp index ebf0a370b0b05..b12ae9884e3d6 100644 --- a/interpreter/llvm/src/lib/Analysis/EHPersonalities.cpp +++ b/interpreter/llvm/src/lib/Analysis/EHPersonalities.cpp @@ -27,8 +27,10 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) { return StringSwitch(F->getName()) .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_seh0",EHPersonality::GNU_CXX) .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__gcc_personality_seh0",EHPersonality::GNU_C) .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) .Case("_except_handler3", EHPersonality::MSVC_X86SEH) diff --git a/interpreter/llvm/src/lib/Analysis/GlobalsModRef.cpp b/interpreter/llvm/src/lib/Analysis/GlobalsModRef.cpp index 33f00cb19b269..4ef023379bb67 100644 --- a/interpreter/llvm/src/lib/Analysis/GlobalsModRef.cpp +++ b/interpreter/llvm/src/lib/Analysis/GlobalsModRef.cpp @@ -475,7 +475,9 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { const std::vector &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); - if (!SCC[0]->getFunction() || !SCC[0]->getFunction()->isDefinitionExact()) { + Function *F = SCC[0]->getFunction(); + + if (!F || !F->isDefinitionExact()) { // Calls externally or not exact - can't say anything useful. Remove any // existing function records (may have been created when scanning // globals). @@ -484,19 +486,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { continue; } - FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()]; + FunctionInfo &FI = FunctionInfos[F]; bool KnowNothing = false; // Collect the mod/ref properties due to called functions. We only compute // one mod-ref set. for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { - Function *F = SCC[i]->getFunction(); if (!F) { KnowNothing = true; break; } - if (F->isDeclaration()) { + if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) { // Try to get mod/ref behaviour from function attributes. if (F->doesNotAccessMemory()) { // Can't do better than that! @@ -545,6 +546,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { for (auto *Node : SCC) { if (FI.getModRefInfo() == MRI_ModRef) break; // The mod/ref lattice saturates here. + + // Don't prove any properties based on the implementation of an optnone + // function. Function attributes were already used as a best approximation + // above. + if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone)) + continue; + for (Instruction &I : instructions(Node->getFunction())) { if (FI.getModRefInfo() == MRI_ModRef) break; // The mod/ref lattice saturates here. diff --git a/interpreter/llvm/src/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 3da33ac71421e..ed233d201537f 100644 --- a/interpreter/llvm/src/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -43,7 +43,7 @@ static cl::opt // The percent threshold for the direct-call target (this call site vs the // total call count) for it to be considered as the promotion target. static cl::opt - ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, + ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the promotion")); diff --git a/interpreter/llvm/src/lib/Analysis/InlineCost.cpp b/interpreter/llvm/src/lib/Analysis/InlineCost.cpp index 44c14cb17c22c..35693666aa036 100644 --- a/interpreter/llvm/src/lib/Analysis/InlineCost.cpp +++ b/interpreter/llvm/src/lib/Analysis/InlineCost.cpp @@ -54,11 +54,6 @@ static cl::opt cl::init(45), cl::desc("Threshold for inlining cold callsites")); -static cl::opt - EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden, - cl::init(false), - cl::desc("Enable generic switch cost model")); - // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. @@ -71,6 +66,12 @@ static cl::opt cl::ZeroOrMore, cl::desc("Threshold for hot callsites ")); +static cl::opt ColdCallSiteRelFreq( + "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, + cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " + "entry frequency, for a callsite to be cold in the absence of " + "profile information.")); + namespace { class CallAnalyzer : public InstVisitor { @@ -177,6 +178,9 @@ class CallAnalyzer : public InstVisitor { /// Return true if size growth is allowed when inlining the callee at CS. bool allowSizeGrowth(CallSite CS); + /// Return true if \p CS is a cold callsite. + bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI); + // Custom analysis routines. bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); @@ -636,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) { return true; } +bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { + // If global profile summary is available, then callsite's coldness is + // determined based on that. + if (PSI->hasProfileSummary()) + return PSI->isColdCallSite(CS, CallerBFI); + if (!CallerBFI) + return false; + + // In the absence of global profile summary, determine if the callsite is cold + // relative to caller's entry. We could potentially cache the computation of + // scaled entry frequency, but the added complexity is not worth it unless + // this scaling shows up high in the profiles. + const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); + auto CallSiteBB = CS.getInstruction()->getParent(); + auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); + auto CallerEntryFreq = + CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock())); + return CallSiteFreq < CallerEntryFreq * ColdProb; +} + void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. if (!allowSizeGrowth(CS)) { @@ -669,21 +693,33 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { Threshold = MaxIfValid(Threshold, Params.HintThreshold); if (PSI) { BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; - if (PSI->isHotCallSite(CS, CallerBFI)) { - DEBUG(dbgs() << "Hot callsite.\n"); - Threshold = Params.HotCallSiteThreshold.getValue(); - } else if (PSI->isFunctionEntryHot(&Callee)) { - DEBUG(dbgs() << "Hot callee.\n"); - // If callsite hotness can not be determined, we may still know - // that the callee is hot and treat it as a weaker hint for threshold - // increase. - Threshold = MaxIfValid(Threshold, Params.HintThreshold); - } else if (PSI->isColdCallSite(CS, CallerBFI)) { - DEBUG(dbgs() << "Cold callsite.\n"); - Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); - } else if (PSI->isFunctionEntryCold(&Callee)) { - DEBUG(dbgs() << "Cold callee.\n"); - Threshold = MinIfValid(Threshold, Params.ColdThreshold); + // FIXME: After switching to the new passmanager, simplify the logic below + // by checking only the callsite hotness/coldness. The check for CallerBFI + // exists only because we do not have BFI available with the old PM. + // + // Use callee's hotness information only if we have no way of determining + // callsite's hotness information. Callsite hotness can be determined if + // sample profile is used (which adds hotness metadata to calls) or if + // caller's BlockFrequencyInfo is available. + if (CallerBFI || PSI->hasSampleProfile()) { + if (PSI->isHotCallSite(CS, CallerBFI)) { + DEBUG(dbgs() << "Hot callsite.\n"); + Threshold = Params.HotCallSiteThreshold.getValue(); + } else if (isColdCallSite(CS, CallerBFI)) { + DEBUG(dbgs() << "Cold callsite.\n"); + Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); + } + } else { + if (PSI->isFunctionEntryHot(&Callee)) { + DEBUG(dbgs() << "Hot callee.\n"); + // If callsite hotness can not be determined, we may still know + // that the callee is hot and treat it as a weaker hint for threshold + // increase. + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + } else if (PSI->isFunctionEntryCold(&Callee)) { + DEBUG(dbgs() << "Cold callee.\n"); + Threshold = MinIfValid(Threshold, Params.ColdThreshold); + } } } } @@ -862,7 +898,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { // because we have to continually rebuild the argument list even when no // simplifications can be performed. Until that is fixed with remapping // inside of instsimplify, directly constant fold calls here. - if (!canConstantFoldCallTo(F)) + if (!canConstantFoldCallTo(CS, F)) return false; // Try to re-map the arguments to constants. @@ -878,7 +914,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { ConstantArgs.push_back(C); } - if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) { SimplifiedValues[CS.getInstruction()] = C; return true; } @@ -1003,83 +1039,74 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { if (isa(V)) return true; - if (EnableGenericSwitchCost) { - // Assume the most general case where the swith is lowered into - // either a jump table, bit test, or a balanced binary tree consisting of - // case clusters without merging adjacent clusters with the same - // destination. We do not consider the switches that are lowered with a mix - // of jump table/bit test/binary search tree. The cost of the switch is - // proportional to the size of the tree or the size of jump table range. - - // Exit early for a large switch, assuming one case needs at least one - // instruction. - // FIXME: This is not true for a bit test, but ignore such case for now to - // save compile-time. - int64_t CostLowerBound = - std::min((int64_t)INT_MAX, - (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - - if (CostLowerBound > Threshold) { - Cost = CostLowerBound; - return false; - } + // Assume the most general case where the switch is lowered into + // either a jump table, bit test, or a balanced binary tree consisting of + // case clusters without merging adjacent clusters with the same + // destination. We do not consider the switches that are lowered with a mix + // of jump table/bit test/binary search tree. The cost of the switch is + // proportional to the size of the tree or the size of jump table range. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. - unsigned JumpTableSize = 0; - unsigned NumCaseCluster = - TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + // Maximum valid cost increased in this function. + int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; - // If suitable for a jump table, consider the cost for the table size and - // branch to destination. - if (JumpTableSize) { - int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + - 4 * InlineConstants::InstrCost; - Cost = std::min((int64_t)INT_MAX, JTCost + Cost); - return false; - } + // Exit early for a large switch, assuming one case needs at least one + // instruction. + // FIXME: This is not true for a bit test, but ignore such case for now to + // save compile-time. + int64_t CostLowerBound = + std::min((int64_t)CostUpperBound, + (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - // Considering forming a binary search, we should find the number of nodes - // which is same as the number of comparisons when lowered. For a given - // number of clusters, n, we can define a recursive function, f(n), to find - // the number of nodes in the tree. The recursion is : - // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, - // and f(n) = n, when n <= 3. - // This will lead a binary tree where the leaf should be either f(2) or f(3) - // when n > 3. So, the number of comparisons from leaves should be n, while - // the number of non-leaf should be : - // 2^(log2(n) - 1) - 1 - // = 2^log2(n) * 2^-1 - 1 - // = n / 2 - 1. - // Considering comparisons from leaf and non-leaf nodes, we can estimate the - // number of comparisons in a simple closed form : - // n + n / 2 - 1 = n * 3 / 2 - 1 - if (NumCaseCluster <= 3) { - // Suppose a comparison includes one compare and one conditional branch. - Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; - return false; - } - int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; - uint64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); + if (CostLowerBound > Threshold) { + Cost = CostLowerBound; return false; } - // Use a simple switch cost model where we accumulate a cost proportional to - // the number of distinct successor blocks. This fan-out in the CFG cannot - // be represented for free even if we can represent the core switch as a - // jumptable that takes a single instruction. - /// - // NB: We convert large switches which are just used to initialize large phi - // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent - // inlining those. It will prevent inlining in cases where the optimization - // does not (yet) fire. - SmallPtrSet SuccessorBlocks; - SuccessorBlocks.insert(SI.getDefaultDest()); - for (auto Case : SI.cases()) - SuccessorBlocks.insert(Case.getCaseSuccessor()); - // Add cost corresponding to the number of distinct destinations. The first - // we model as free because of fallthrough. - Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + unsigned JumpTableSize = 0; + unsigned NumCaseCluster = + TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + + // If suitable for a jump table, consider the cost for the table size and + // branch to destination. + if (JumpTableSize) { + int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + + 4 * InlineConstants::InstrCost; + + Cost = std::min((int64_t)CostUpperBound, JTCost + Cost); + return false; + } + + // Considering forming a binary search, we should find the number of nodes + // which is same as the number of comparisons when lowered. For a given + // number of clusters, n, we can define a recursive function, f(n), to find + // the number of nodes in the tree. The recursion is : + // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, + // and f(n) = n, when n <= 3. + // This will lead a binary tree where the leaf should be either f(2) or f(3) + // when n > 3. So, the number of comparisons from leaves should be n, while + // the number of non-leaf should be : + // 2^(log2(n) - 1) - 1 + // = 2^log2(n) * 2^-1 - 1 + // = n / 2 - 1. + // Considering comparisons from leaf and non-leaf nodes, we can estimate the + // number of comparisons in a simple closed form : + // n + n / 2 - 1 = n * 3 / 2 - 1 + if (NumCaseCluster <= 3) { + // Suppose a comparison includes one compare and one conditional branch. + Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; + return false; + } + + int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; + int64_t SwitchCost = + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + + Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost); return false; } diff --git a/interpreter/llvm/src/lib/Analysis/InstCount.cpp b/interpreter/llvm/src/lib/Analysis/InstCount.cpp index de2b9c0c56dba..95ab6ee3db5bd 100644 --- a/interpreter/llvm/src/lib/Analysis/InstCount.cpp +++ b/interpreter/llvm/src/lib/Analysis/InstCount.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" @@ -26,14 +26,12 @@ using namespace llvm; STATISTIC(TotalInsts , "Number of instructions (of all types)"); STATISTIC(TotalBlocks, "Number of basic blocks"); STATISTIC(TotalFuncs , "Number of non-external functions"); -STATISTIC(TotalMemInst, "Number of memory instructions"); #define HANDLE_INST(N, OPCODE, CLASS) \ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); #include "llvm/IR/Instruction.def" - namespace { class InstCount : public FunctionPass, public InstVisitor { friend class InstVisitor; @@ -76,13 +74,6 @@ FunctionPass *llvm::createInstCountPass() { return new InstCount(); } // function. // bool InstCount::runOnFunction(Function &F) { - unsigned StartMemInsts = - NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + - NumInvokeInst + NumAllocaInst; visit(F); - unsigned EndMemInsts = - NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + - NumInvokeInst + NumAllocaInst; - TotalMemInst += EndMemInsts-StartMemInsts; return false; } diff --git a/interpreter/llvm/src/lib/Analysis/InstructionSimplify.cpp b/interpreter/llvm/src/lib/Analysis/InstructionSimplify.cpp index 1457422b255d5..b4f3b87e18466 100644 --- a/interpreter/llvm/src/lib/Analysis/InstructionSimplify.cpp +++ b/interpreter/llvm/src/lib/Analysis/InstructionSimplify.cpp @@ -103,13 +103,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { return false; // If we have a DominatorTree then do a precise test. - if (DT) { - if (!DT->isReachableFromEntry(P->getParent())) - return true; - if (!DT->isReachableFromEntry(I->getParent())) - return false; + if (DT) return DT->dominates(I, P); - } // Otherwise, if the instruction is in the entry block and is not an invoke, // then it obviously dominates all phi nodes. @@ -126,8 +121,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, - Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q, - unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExpand, + const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -184,7 +179,8 @@ static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, /// Generic simplifications for associative binary operations. /// Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, - Value *LHS, Value *RHS, const SimplifyQuery &Q, + Value *LHS, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); @@ -564,7 +560,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return Y; /// i1 add -> xor. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -602,7 +598,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// folding. static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { - assert(V->getType()->getScalarType()->isPointerTy()); + assert(V->getType()->isPtrOrPtrVectorTy()); Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); @@ -631,8 +627,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, } break; } - assert(V->getType()->getScalarType()->isPointerTy() && - "Unexpected operand type!"); + assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); @@ -687,9 +682,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (isNUW) return Op0; - unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. @@ -777,7 +770,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // i1 sub -> xor. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -908,7 +901,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return X; // i1 mul -> and. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -1004,7 +997,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { // X % 1 -> 0 // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. - if (match(Op1, m_One()) || Ty->getScalarType()->isIntegerTy(1)) + if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1)) return IsDiv ? Op0 : Constant::getNullValue(Ty); return nullptr; @@ -1308,15 +1301,13 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // If any bits in the shift amount make that value greater than or equal to // the number of bits in the type, the shift is undefined. - unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (Known.One.getLimitedValue() >= BitWidth) + KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.One.getLimitedValue() >= Known.getBitWidth()) return UndefValue::get(Op0->getType()); // If all valid bits in the shift amount are known zero, the first operand is // unchanged. - unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth); + unsigned NumValidShiftBits = Log2_32_Ceil(Known.getBitWidth()); if (Known.countMinTrailingZeros() >= NumValidShiftBits) return Op0; @@ -1342,9 +1333,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { - unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); - KnownBits Op0Known(BitWidth); - computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); if (Op0Known.One[0]) return Op0; } @@ -1427,6 +1416,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); } +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, ICmpInst *UnsignedICmp, bool IsAnd) { Value *X, *Y; @@ -1559,20 +1550,8 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; } -/// Commuted variants are assumed to be handled by calling this function again -/// with the parameters swapped. -static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) - return X; - - if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) - return X; - - if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) - return X; - +static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) & (icmp V, C0) - Type *ITy = Op0->getType(); ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; Value *V; @@ -1586,6 +1565,7 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (AddInst->getOperand(1) != Op1->getOperand(1)) return nullptr; + Type *ITy = Op0->getType(); bool isNSW = AddInst->hasNoSignedWrap(); bool isNUW = AddInst->hasNoUnsignedWrap(); @@ -1616,18 +1596,29 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } -/// Commuted variants are assumed to be handled by calling this function again -/// with the parameters swapped. -static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) +static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + return X; + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) return X; - if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) + if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) + return X; + if (Value *X = simplifyAndOfICmpsWithSameOperands(Op1, Op0)) return X; - if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) + if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) + return X; + + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1)) return X; + if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0)) + return X; + + return nullptr; +} +static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) | (icmp V, C0) ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; @@ -1673,19 +1664,24 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } -static Value *simplifyPossiblyCastedAndOrOfICmps(ICmpInst *Cmp0, ICmpInst *Cmp1, - bool IsAnd, CastInst *Cast) { - Value *V = - IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1); - if (!V) - return nullptr; - if (!Cast) - return V; +static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + return X; + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) + return X; - // If we looked through casts, we can only handle a constant simplification - // because we are not allowed to create a cast instruction here. - if (auto *C = dyn_cast(V)) - return ConstantExpr::getCast(Cast->getOpcode(), C, Cast->getType()); + if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) + return X; + if (Value *X = simplifyOrOfICmpsWithSameOperands(Op1, Op0)) + return X; + + if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) + return X; + + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1)) + return X; + if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0)) + return X; return nullptr; } @@ -1705,11 +1701,18 @@ static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) { if (!Cmp0 || !Cmp1) return nullptr; - if (Value *V = simplifyPossiblyCastedAndOrOfICmps(Cmp0, Cmp1, IsAnd, Cast0)) - return V; - if (Value *V = simplifyPossiblyCastedAndOrOfICmps(Cmp1, Cmp0, IsAnd, Cast0)) + Value *V = + IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1); + if (!V) + return nullptr; + if (!Cast0) return V; + // If we looked through casts, we can only handle a constant simplification + // because we are not allowed to create a cast instruction here. + if (auto *C = dyn_cast(V)) + return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType()); + return nullptr; } @@ -1742,16 +1745,31 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Constant::getNullValue(Op0->getType()); // (A | ?) & A = A - Value *A = nullptr, *B = nullptr; - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - (A == Op1 || B == Op1)) + if (match(Op0, m_c_Or(m_Specific(Op1), m_Value()))) return Op1; // A & (A | ?) = A - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - (A == Op0 || B == Op0)) + if (match(Op1, m_c_Or(m_Specific(Op0), m_Value()))) return Op0; + // A mask that only clears known zeros of a shifted value is a no-op. + Value *X; + const APInt *Mask; + const APInt *ShAmt; + if (match(Op1, m_APInt(Mask))) { + // If all bits in the inverted and shifted mask are clear: + // and (shl X, ShAmt), Mask --> shl X, ShAmt + if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) && + (~(*Mask)).lshr(*ShAmt).isNullValue()) + return Op0; + + // If all bits in the inverted and shifted mask are clear: + // and (lshr X, ShAmt), Mask --> lshr X, ShAmt + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) && + (~(*Mask)).shl(*ShAmt).isNullValue()) + return Op0; + } + // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { @@ -1831,26 +1849,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Constant::getAllOnesValue(Op0->getType()); // (A & ?) | A = A - Value *A = nullptr, *B = nullptr; - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - (A == Op1 || B == Op1)) + if (match(Op0, m_c_And(m_Specific(Op1), m_Value()))) return Op1; // A | (A & ?) = A - if (match(Op1, m_And(m_Value(A), m_Value(B))) && - (A == Op0 || B == Op0)) + if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) return Op0; // ~(A & ?) | A = -1 - if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) && - (A == Op1 || B == Op1)) + if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value())))) return Constant::getAllOnesValue(Op1->getType()); // A | ~(A & ?) = -1 - if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) && - (A == Op0 || B == Op0)) + if (match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value())))) return Constant::getAllOnesValue(Op0->getType()); + Value *A, *B; // (A & ~B) | (A ^ B) -> (A ^ B) // (~B & A) | (A ^ B) -> (A ^ B) // (A & ~B) | (B ^ A) -> (B ^ A) @@ -1870,6 +1884,24 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) return Op0; + // (A & B) | (~A ^ B) -> (~A ^ B) + // (B & A) | (~A ^ B) -> (~A ^ B) + // (A & B) | (B ^ ~A) -> (B ^ ~A) + // (B & A) | (B ^ ~A) -> (B ^ ~A) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) + return Op1; + + // (~A ^ B) | (A & B) -> (~A ^ B) + // (~A ^ B) | (B & A) -> (~A ^ B) + // (B ^ ~A) | (A & B) -> (B ^ ~A) + // (B ^ ~A) | (B & A) -> (B ^ ~A) + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) + return Op0; + if (Value *V = simplifyAndOrOfICmps(Op0, Op1, false)) return V; @@ -1890,37 +1922,27 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, MaxRecurse)) return V; - // (A & C)|(B & D) - Value *C = nullptr, *D = nullptr; - if (match(Op0, m_And(m_Value(A), m_Value(C))) && - match(Op1, m_And(m_Value(B), m_Value(D)))) { - ConstantInt *C1 = dyn_cast(C); - ConstantInt *C2 = dyn_cast(D); - if (C1 && C2 && (C1->getValue() == ~C2->getValue())) { + // (A & C1)|(B & C2) + const APInt *C1, *C2; + if (match(Op0, m_And(m_Value(A), m_APInt(C1))) && + match(Op1, m_And(m_Value(B), m_APInt(C2)))) { + if (*C1 == ~*C2) { // (A & C1)|(B & C2) // If we have: ((V + N) & C1) | (V & C2) // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 // replace with V+N. - Value *V1, *V2; - if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { + Value *N; + if (C2->isMask() && // C2 == 0+1+ + match(A, m_c_Add(m_Specific(B), m_Value(N)))) { // Add commutes, try both ways. - if (V1 == B && - MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return A; - if (V2 == B && - MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue() + 1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { + if (C1->isMask() && + match(B, m_c_Add(m_Specific(A), m_Value(N)))) { // Add commutes, try both ways. - if (V1 == A && - MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return B; - if (V2 == A && - MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; } } @@ -2221,31 +2243,52 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. - if (!OpTy->getScalarType()->isIntegerTy(1)) + if (!OpTy->isIntOrIntVectorTy(1)) return nullptr; - switch (Pred) { - default: - break; - case ICmpInst::ICMP_EQ: - // X == 1 -> X - if (match(RHS, m_One())) - return LHS; - break; - case ICmpInst::ICMP_NE: - // X != 0 -> X - if (match(RHS, m_Zero())) + // A boolean compared to true/false can be simplified in 14 out of the 20 + // (10 predicates * 2 constants) possible combinations. Cases not handled here + // require a 'not' of the LHS, so those must be transformed in InstCombine. + if (match(RHS, m_Zero())) { + switch (Pred) { + case CmpInst::ICMP_NE: // X != 0 -> X + case CmpInst::ICMP_UGT: // X >u 0 -> X + case CmpInst::ICMP_SLT: // X X return LHS; - break; - case ICmpInst::ICMP_UGT: - // X >u 0 -> X - if (match(RHS, m_Zero())) + + case CmpInst::ICMP_ULT: // X false + case CmpInst::ICMP_SGT: // X >s 0 -> false + return getFalse(ITy); + + case CmpInst::ICMP_UGE: // X >=u 0 -> true + case CmpInst::ICMP_SLE: // X <=s 0 -> true + return getTrue(ITy); + + default: break; + } + } else if (match(RHS, m_One())) { + switch (Pred) { + case CmpInst::ICMP_EQ: // X == 1 -> X + case CmpInst::ICMP_UGE: // X >=u 1 -> X + case CmpInst::ICMP_SLE: // X <=s -1 -> X return LHS; + + case CmpInst::ICMP_UGT: // X >u 1 -> false + case CmpInst::ICMP_SLT: // X false + return getFalse(ITy); + + case CmpInst::ICMP_ULE: // X <=u 1 -> true + case CmpInst::ICMP_SGE: // X >=s -1 -> true + return getTrue(ITy); + + default: break; + } + } + + switch (Pred) { + default: break; case ICmpInst::ICMP_UGE: - // X >=u 1 -> X - if (match(RHS, m_One())) - return LHS; if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false)) return getTrue(ITy); break; @@ -2260,16 +2303,6 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) return getTrue(ITy); break; - case ICmpInst::ICMP_SLT: - // X X - if (match(RHS, m_Zero())) - return LHS; - break; - case ICmpInst::ICMP_SLE: - // X <=s -1 -> X - if (match(RHS, m_One())) - return LHS; - break; case ICmpInst::ICMP_ULE: if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) return getTrue(ITy); @@ -2286,7 +2319,6 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, return nullptr; Type *ITy = GetCompareTy(LHS); // The return type. - bool LHSKnownNonNegative, LHSKnownNegative; switch (Pred) { default: llvm_unreachable("Unknown ICmp predicate!"); @@ -2304,39 +2336,41 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; - case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + case ICmpInst::ICMP_SLT: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getTrue(ITy); - if (LHSKnownNonNegative) + if (LHSKnown.isNonNegative()) return getFalse(ITy); break; - case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SLE: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (LHSKnown.isNonNegative() && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; - case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SGE: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getFalse(ITy); - if (LHSKnownNonNegative) + if (LHSKnown.isNonNegative()) return getTrue(ITy); break; - case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SGT: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (LHSKnown.isNonNegative() && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; } + } return nullptr; } @@ -2349,7 +2383,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { const APInt *C; switch (BO.getOpcode()) { case Instruction::Add: - if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // FIXME: If we have both nuw and nsw, we should reduce the range further. if (BO.hasNoUnsignedWrap()) { // 'add nuw x, C' produces [C, UINT_MAX]. @@ -2387,7 +2421,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { unsigned ShiftAmount = Width - 1; - if (*C != 0 && BO.isExact()) + if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); if (C->isNegative()) { // 'ashr C, x' produces [C, C >> (Width-1)] @@ -2408,7 +2442,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { } else if (match(BO.getOperand(0), m_APInt(C))) { // 'lshr C, x' produces [C >> (Width-1), C]. unsigned ShiftAmount = Width - 1; - if (*C != 0 && BO.isExact()) + if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); Lower = C->lshr(ShiftAmount); Upper = *C + 1; @@ -2470,7 +2504,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { break; case Instruction::UDiv: - if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // 'udiv x, C' produces [0, UINT_MAX / C]. Upper = APInt::getMaxValue(Width).udiv(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { @@ -2619,15 +2653,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getTrue(ITy); if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { - bool RHSKnownNonNegative, RHSKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, Q.DL, 0, - Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (RHSKnownNonNegative && YKnownNegative) + KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (RHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); - if (RHSKnownNegative || YKnownNonNegative) + if (RHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); } } @@ -2639,31 +2669,25 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getFalse(ITy); if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) { - bool LHSKnownNonNegative, LHSKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, - Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNonNegative && YKnownNegative) + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy); - if (LHSKnownNegative || YKnownNonNegative) + if (LHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy); } } } // icmp pred (and X, Y), X - if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)), - m_And(m_Specific(RHS), m_Value())))) { + if (LBO && match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) { if (Pred == ICmpInst::ICMP_UGT) return getFalse(ITy); if (Pred == ICmpInst::ICMP_ULE) return getTrue(ITy); } // icmp pred X, (and X, Y) - if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)), - m_And(m_Specific(LHS), m_Value())))) { + if (RBO && match(RBO, m_c_And(m_Value(), m_Specific(LHS)))) { if (Pred == ICmpInst::ICMP_UGE) return getTrue(ITy); if (Pred == ICmpInst::ICMP_ULT) @@ -2694,28 +2718,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { - bool KnownNonNegative, KnownNegative; switch (Pred) { default: break; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getFalse(ITy); case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2725,28 +2748,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp pred X, (urem Y, X) if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { - bool KnownNonNegative, KnownNegative; switch (Pred) { default: break; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getTrue(ITy); case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2795,14 +2817,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // - CI2 is one // - CI isn't zero if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() || - *CI2Val == 1 || !CI->isZero()) { + CI2Val->isOneValue() || !CI->isZero()) { if (Pred == ICmpInst::ICMP_EQ) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_NE) return ConstantInt::getTrue(RHS->getContext()); } } - if (CIVal->isSignMask() && *CI2Val == 1) { + if (CIVal->isSignMask() && CI2Val->isOneValue()) { if (Pred == ICmpInst::ICMP_UGT) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_ULE) @@ -2818,10 +2840,19 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, break; case Instruction::UDiv: case Instruction::LShr: - if (ICmpInst::isSigned(Pred)) + if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact()) break; - LLVM_FALLTHROUGH; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse - 1)) + return V; + break; case Instruction::SDiv: + if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact()) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse - 1)) + return V; + break; case Instruction::AShr: if (!LBO->isExact() || !RBO->isExact()) break; @@ -3267,11 +3298,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // icmp eq|ne X, Y -> false|true if X != Y - if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + if (ICmpInst::isEquality(Pred) && isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { - LLVMContext &Ctx = LHS->getType()->getContext(); - return Pred == ICmpInst::ICMP_NE ? - ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy); } if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse)) @@ -3319,21 +3348,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // If a bit is known to be zero for A and known to be one for B, - // then A and B cannot be equal. - if (ICmpInst::isEquality(Pred)) { - const APInt *RHSVal; - if (match(RHS, m_APInt(RHSVal))) { - unsigned BitWidth = RHSVal->getBitWidth(); - KnownBits LHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); - if (LHSKnown.Zero.intersects(*RHSVal) || - !LHSKnown.One.isSubsetOf(*RHSVal)) - return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) - : ConstantInt::getTrue(ITy); - } - } - // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) @@ -3491,6 +3505,10 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (V == Op) return RepOp; + // We cannot replace a constant, and shouldn't even try. + if (isa(Op)) + return nullptr; + auto *I = dyn_cast(V); if (!I) return nullptr; @@ -3853,12 +3871,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, } // Check to see if this is constant foldable. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (!isa(Ops[i])) - return nullptr; + if (!all_of(Ops, [](Value *V) { return isa(V); })) + return nullptr; - return ConstantExpr::getGetElementPtr(SrcTy, cast(Ops[0]), - Ops.slice(1)); + auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast(Ops[0]), + Ops.slice(1)); + if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL)) + return CEFolded; + return CE; } Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, @@ -4396,19 +4416,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: { // X + undef -> undef - if (isa(RHS)) + if (isa(LHS) || isa(RHS)) return UndefValue::get(ReturnType); return nullptr; } case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: { + // 0 * X -> { 0, false } // X * 0 -> { 0, false } - if (match(RHS, m_Zero())) + if (match(LHS, m_Zero()) || match(RHS, m_Zero())) return Constant::getNullValue(ReturnType); + // undef * X -> { 0, false } // X * undef -> { 0, false } - if (match(RHS, m_Undef())) + if (match(LHS, m_Undef()) || match(RHS, m_Undef())) return Constant::getNullValue(ReturnType); return nullptr; @@ -4441,8 +4463,9 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, } template -static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, - const SimplifyQuery &Q, unsigned MaxRecurse) { +static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, + IterTy ArgEnd, const SimplifyQuery &Q, + unsigned MaxRecurse) { Type *Ty = V->getType(); if (PointerType *PTy = dyn_cast(Ty)) Ty = PTy->getElementType(); @@ -4461,7 +4484,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) return Ret; - if (!canConstantFoldCallTo(F)) + if (!canConstantFoldCallTo(CS, F)) return nullptr; SmallVector ConstantArgs; @@ -4473,17 +4496,18 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, ConstantArgs.push_back(C); } - return ConstantFoldCall(F, ConstantArgs, Q.TLI); + return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI); } -Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const SimplifyQuery &Q) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit); +Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, + User::op_iterator ArgBegin, User::op_iterator ArgEnd, + const SimplifyQuery &Q) { + return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit); } -Value *llvm::SimplifyCall(Value *V, ArrayRef Args, - const SimplifyQuery &Q) { - return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit); +Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, + ArrayRef Args, const SimplifyQuery &Q) { + return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); } /// See if we can compute a simplified version of this instruction. @@ -4614,7 +4638,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, break; case Instruction::Call: { CallSite CS(cast(I)); - Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), Q); + Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: @@ -4632,9 +4657,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, // In general, it is possible for computeKnownBits to determine all bits in a // value even when the operands are not all constants. if (!Result && I->getType()->isIntOrIntVectorTy()) { - unsigned BitWidth = I->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); + KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); if (Known.isConstant()) Result = ConstantInt::get(I->getType(), Known.getConstant()); } diff --git a/interpreter/llvm/src/lib/Analysis/IteratedDominanceFrontier.cpp b/interpreter/llvm/src/lib/Analysis/IteratedDominanceFrontier.cpp index 2a736ec0379ca..3992657417c5a 100644 --- a/interpreter/llvm/src/lib/Analysis/IteratedDominanceFrontier.cpp +++ b/interpreter/llvm/src/lib/Analysis/IteratedDominanceFrontier.cpp @@ -17,17 +17,9 @@ #include namespace llvm { -template -void IDFCalculator::calculate( +template +void IDFCalculator::calculate( SmallVectorImpl &PHIBlocks) { - // If we haven't computed dominator tree levels, do so now. - if (DomLevels.empty()) { - for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode()); - DFI != DFE; ++DFI) { - DomLevels[*DFI] = DFI.getPathLength() - 1; - } - } - // Use a priority queue keyed on dominator tree level so that inserted nodes // are handled from the bottom of the dominator tree upwards. typedef std::pair DomTreeNodePair; @@ -37,7 +29,7 @@ void IDFCalculator::calculate( for (BasicBlock *BB : *DefBlocks) { if (DomTreeNode *Node = DT.getNode(BB)) - PQ.push(std::make_pair(Node, DomLevels.lookup(Node))); + PQ.push({Node, Node->getLevel()}); } SmallVector Worklist; @@ -72,7 +64,7 @@ void IDFCalculator::calculate( if (SuccNode->getIDom() == Node) continue; - unsigned SuccLevel = DomLevels.lookup(SuccNode); + const unsigned SuccLevel = SuccNode->getLevel(); if (SuccLevel > RootLevel) continue; @@ -96,6 +88,6 @@ void IDFCalculator::calculate( } } -template class IDFCalculator; -template class IDFCalculator>; +template class IDFCalculator; +template class IDFCalculator, true>; } diff --git a/interpreter/llvm/src/lib/Analysis/LLVMBuild.txt b/interpreter/llvm/src/lib/Analysis/LLVMBuild.txt index 15c757b48f76c..8a87b980b0a89 100644 --- a/interpreter/llvm/src/lib/Analysis/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/Analysis/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Analysis parent = Libraries -required_libraries = Core Support ProfileData Object +required_libraries = BinaryFormat Core Object ProfileData Support diff --git a/interpreter/llvm/src/lib/Analysis/LazyBranchProbabilityInfo.cpp b/interpreter/llvm/src/lib/Analysis/LazyBranchProbabilityInfo.cpp index b51c6beb79592..e2884d0a45646 100644 --- a/interpreter/llvm/src/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -24,6 +25,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) @@ -41,6 +43,7 @@ void LazyBranchProbabilityInfoPass::print(raw_ostream &OS, void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } @@ -48,16 +51,19 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); } bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis().getLoopInfo(); - LBPI = llvm::make_unique(&F, &LI); + TargetLibraryInfo &TLI = getAnalysis().getTLI(); + LBPI = llvm::make_unique(&F, &LI, &TLI); return false; } void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) { AU.addRequired(); AU.addRequired(); + AU.addRequired(); } void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass); INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); + INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass); } diff --git a/interpreter/llvm/src/lib/Analysis/LazyCallGraph.cpp b/interpreter/llvm/src/lib/Analysis/LazyCallGraph.cpp index eef56815f2e07..d287f81985fd2 100644 --- a/interpreter/llvm/src/lib/Analysis/LazyCallGraph.cpp +++ b/interpreter/llvm/src/lib/Analysis/LazyCallGraph.cpp @@ -8,10 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/Sequence.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -107,6 +106,13 @@ LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() { LazyCallGraph::Edge::Ref); }); + // Add implicit reference edges to any defined libcall functions (if we + // haven't found an explicit edge). + for (auto *F : G->LibFunctions) + if (!Visited.count(F)) + addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*F), + LazyCallGraph::Edge::Ref); + return *Edges; } @@ -121,15 +127,34 @@ LLVM_DUMP_METHOD void LazyCallGraph::Node::dump() const { } #endif -LazyCallGraph::LazyCallGraph(Module &M) { +static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { + LibFunc LF; + + // Either this is a normal library function or a "vectorizable" function. + return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); +} + +LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() << "\n"); - for (Function &F : M) - if (!F.isDeclaration() && !F.hasLocalLinkage()) { - DEBUG(dbgs() << " Adding '" << F.getName() - << "' to entry set of the graph.\n"); - addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref); - } + for (Function &F : M) { + if (F.isDeclaration()) + continue; + // If this function is a known lib function to LLVM then we want to + // synthesize reference edges to it to model the fact that LLVM can turn + // arbitrary code into a library function call. + if (isKnownLibFunction(F, TLI)) + LibFunctions.insert(&F); + + if (F.hasLocalLinkage()) + continue; + + // External linkage defined functions have edges to them from other + // modules. + DEBUG(dbgs() << " Adding '" << F.getName() + << "' to entry set of the graph.\n"); + addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref); + } // Now add entry nodes for functions reachable via initializers to globals. SmallVector Worklist; @@ -150,7 +175,8 @@ LazyCallGraph::LazyCallGraph(Module &M) { LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)), EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)), - SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)) { + SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)), + LibFunctions(std::move(G.LibFunctions)) { updateGraphPtrs(); } @@ -161,6 +187,7 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { SCCBPA = std::move(G.SCCBPA); SCCMap = std::move(G.SCCMap); LeafRefSCCs = std::move(G.LeafRefSCCs); + LibFunctions = std::move(G.LibFunctions); updateGraphPtrs(); return *this; } @@ -457,8 +484,10 @@ updatePostorderSequenceForEdgeInsertion( return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx); } -SmallVector -LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { +bool +LazyCallGraph::RefSCC::switchInternalEdgeToCall( + Node &SourceN, Node &TargetN, + function_ref MergeSCCs)> MergeCB) { assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!"); SmallVector DeletedSCCs; @@ -476,7 +505,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // we've just added more connectivity. if (&SourceSCC == &TargetSCC) { SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } // At this point we leverage the postorder list of SCCs to detect when the @@ -489,7 +518,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { int TargetIdx = SCCIndices[&TargetSCC]; if (TargetIdx < SourceIdx) { SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } // Compute the SCCs which (transitively) reach the source. @@ -556,12 +585,16 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet, ComputeTargetConnectedSet); + // Run the user's callback on the merged SCCs before we actually merge them. + if (MergeCB) + MergeCB(makeArrayRef(MergeRange.begin(), MergeRange.end())); + // If the merge range is empty, then adding the edge didn't actually form any // new cycles. We're done. if (MergeRange.begin() == MergeRange.end()) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } #ifndef NDEBUG @@ -597,8 +630,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); - // And we're done! - return DeletedSCCs; + // And we're done, but we did form a new cycle. + return true; } void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN, @@ -1575,6 +1608,11 @@ void LazyCallGraph::removeDeadFunction(Function &F) { assert(F.use_empty() && "This routine should only be called on trivially dead functions!"); + // We shouldn't remove library functions as they are never really dead while + // the call graph is in use -- every function definition refers to them. + assert(!isLibFunction(F) && + "Must not remove lib functions from the call graph!"); + auto NI = NodeMap.find(&F); if (NI == NodeMap.end()) // Not in the graph at all! diff --git a/interpreter/llvm/src/lib/Analysis/LazyValueInfo.cpp b/interpreter/llvm/src/lib/Analysis/LazyValueInfo.cpp index a2b9015a8a1d8..102081e721ac6 100644 --- a/interpreter/llvm/src/lib/Analysis/LazyValueInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/LazyValueInfo.cpp @@ -302,7 +302,7 @@ static bool hasSingleValue(const LVILatticeVal &Val) { /// contradictory. If this happens, we return some valid lattice value so as /// not confuse the rest of LVI. Ideally, we'd always return Undefined, but /// we do not make this guarantee. TODO: This would be a useful enhancement. -static LVILatticeVal intersect(LVILatticeVal A, LVILatticeVal B) { +static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) { // Undefined is the strongest state. It means the value is known to be along // an unreachable path. if (A.isUndefined()) @@ -364,7 +364,6 @@ namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { - friend class LazyValueInfoAnnotatedWriter; /// This is all of the cached block information for exactly one Value*. /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. @@ -384,7 +383,6 @@ namespace { /// don't spend time removing unused blocks from our caches. DenseSet > SeenBlocks; - protected: /// This is all of the cached information for all values, /// mapped from Value* to key information. DenseMap> ValueCache; @@ -443,7 +441,6 @@ namespace { return BBI->second; } - void printCache(Function &F, raw_ostream &OS); /// clear - Empty the cache. void clear() { SeenBlocks.clear(); @@ -467,61 +464,6 @@ namespace { }; } - -namespace { - - /// An assembly annotator class to print LazyValueCache information in - /// comments. - class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { - const LazyValueInfoCache* LVICache; - - public: - LazyValueInfoAnnotatedWriter(const LazyValueInfoCache *L) : LVICache(L) {} - - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) { - auto ODI = LVICache->OverDefinedCache.find(const_cast(BB)); - if (ODI == LVICache->OverDefinedCache.end()) - return; - OS << "; OverDefined values for block are: \n"; - for (auto *V : ODI->second) - OS << ";" << *V << "\n"; - - // Find if there are latticevalues defined for arguments of the function. - auto *F = const_cast(BB->getParent()); - for (auto &Arg : F->args()) { - auto VI = LVICache->ValueCache.find_as(&Arg); - if (VI == LVICache->ValueCache.end()) - continue; - auto BBI = VI->second->BlockVals.find(const_cast(BB)); - if (BBI != VI->second->BlockVals.end()) - OS << "; CachedLatticeValue for: '" << *VI->first << "' is: '" - << BBI->second << "'\n"; - } - } - - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { - - auto VI = LVICache->ValueCache.find_as(const_cast(I)); - if (VI == LVICache->ValueCache.end()) - return; - OS << "; CachedLatticeValues for: '" << *VI->first << "'\n"; - for (auto &BV : VI->second->BlockVals) { - OS << "; at beginning of BasicBlock: '"; - BV.first->printAsOperand(OS, false); - OS << "' LatticeVal: '" << BV.second << "' \n"; - } - } -}; -} - -void LazyValueInfoCache::printCache(Function &F, raw_ostream &OS) { - LazyValueInfoAnnotatedWriter Writer(this); - F.print(OS, &Writer); - -} - void LazyValueInfoCache::eraseValue(Value *V) { for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) { // Copy and increment the iterator immediately so we can erase behind @@ -615,6 +557,30 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, } } + +namespace { +/// An assembly annotator class to print LazyValueCache information in +/// comments. +class LazyValueInfoImpl; +class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { + LazyValueInfoImpl *LVIImpl; + // While analyzing which blocks we can solve values for, we need the dominator + // information. Since this is an optional parameter in LVI, we require this + // DomTreeAnalysis pass in the printer pass, and pass the dominator + // tree to the LazyValueInfoAnnotatedWriter. + DominatorTree &DT; + +public: + LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree) + : LVIImpl(L), DT(DTree) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS); + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS); +}; +} namespace { // The actual implementation of the lazy analysis and update. Note that the // inheritance from LazyValueInfoCache is intended to be temporary while @@ -662,13 +628,13 @@ namespace { bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB); bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S, BasicBlock *BB); - bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI, BasicBlock *BB); - bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV, - Instruction *BBI); + Instruction *BBI); void solve(); @@ -693,9 +659,10 @@ namespace { TheCache.clear(); } - /// Printing the LazyValueInfoCache. - void printCache(Function &F, raw_ostream &OS) { - TheCache.printCache(F, OS); + /// Printing the LazyValueInfo Analysis. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { + LazyValueInfoAnnotatedWriter Writer(this, DTree); + F.print(OS, &Writer); } /// This is part of the update interface to inform the cache @@ -714,6 +681,7 @@ namespace { }; } // end anonymous namespace + void LazyValueInfoImpl::solve() { SmallVector, 8> StartingStack( BlockValueStack.begin(), BlockValueStack.end()); @@ -838,7 +806,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, // that for all other pointer typed values, we terminate the search at the // definition. We could easily extend this to look through geps, bitcasts, // and the like to prove non-nullness, but it's not clear that's worth it - // compile time wise. The context-insensative value walk done inside + // compile time wise. The context-insensitive value walk done inside // isKnownNonNull gets most of the profitable cases at much less expense. // This does mean that we have a sensativity to where the defining // instruction is placed, even if it could legally be hoisted much higher. @@ -849,12 +817,12 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, return true; } if (BBI->getType()->isIntegerTy()) { - if (isa(BBI)) - return solveBlockValueCast(Res, BBI, BB); - + if (auto *CI = dyn_cast(BBI)) + return solveBlockValueCast(Res, CI, BB); + BinaryOperator *BO = dyn_cast(BBI); if (BO && isa(BO->getOperand(1))) - return solveBlockValueBinaryOp(Res, BBI, BB); + return solveBlockValueBinaryOp(Res, BO, BB); } DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -1168,9 +1136,9 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, } bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, - Instruction *BBI, - BasicBlock *BB) { - if (!BBI->getOperand(0)->getType()->isSized()) { + CastInst *CI, + BasicBlock *BB) { + if (!CI->getOperand(0)->getType()->isSized()) { // Without knowing how wide the input is, we can't analyze it in any useful // way. BBLV = LVILatticeVal::getOverdefined(); @@ -1180,7 +1148,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Filter out casts we don't know how to reason about before attempting to // recurse on our operand. This can cut a long search short if we know we're // not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + switch (CI->getOpcode()) { case Instruction::Trunc: case Instruction::SExt: case Instruction::ZExt: @@ -1197,44 +1165,43 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, we still apply the // transfer rule on the full set since we may be able to locally infer // interesting facts. - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(CI->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, CI->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(CI->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(CI->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal, + CI); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - const unsigned ResultBitWidth = - cast(BBI->getType())->getBitWidth(); + const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth(); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto CastOp = (Instruction::CastOps) BBI->getOpcode(); - BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth)); + BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(), + ResultBitWidth)); return true; } bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, - Instruction *BBI, + BinaryOperator *BO, BasicBlock *BB) { - assert(BBI->getOperand(0)->getType()->isSized() && + assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); // Filter out operators we don't know how to reason about before attempting to // recurse on our operand(s). This can cut a long search short if we know - // we're not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + // we're not going to be able to get any useful information anyways. + switch (BO->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1256,29 +1223,29 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, use a conservative range, // but apply the transfer rule anyways. This lets us pick up facts from // expressions like "and i32 (call i32 @foo()), 32" - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(BO->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, BO->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(BO->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(BO->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal, + BO); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - ConstantInt *RHS = cast(BBI->getOperand(1)); + ConstantInt *RHS = cast(BO->getOperand(1)); ConstantRange RHSRange = ConstantRange(RHS->getValue()); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto BinOp = (Instruction::BinaryOps) BBI->getOpcode(); + Instruction::BinaryOps BinOp = BO->getOpcode(); BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange)); return true; } @@ -1357,12 +1324,12 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, return getValueFromICmpCondition(Val, ICI, isTrueDest); // Handle conditions in the form of (cond1 && cond2), we know that on the - // true dest path both of the conditions hold. - if (!isTrueDest) - return LVILatticeVal::getOverdefined(); - + // true dest path both of the conditions hold. Similarly for conditions of + // the form (cond1 || cond2), we know that on the false dest path neither + // condition holds. BinaryOperator *BO = dyn_cast(Cond); - if (!BO || BO->getOpcode() != BinaryOperator::And) + if (!BO || (isTrueDest && BO->getOpcode() != BinaryOperator::And) || + (!isTrueDest && BO->getOpcode() != BinaryOperator::Or)) return LVILatticeVal::getOverdefined(); auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited); @@ -1693,64 +1660,83 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, return nullptr; } +ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, + BasicBlock *FromBB, + BasicBlock *ToBB, + Instruction *CxtI) { + unsigned Width = V->getType()->getIntegerBitWidth(); + const DataLayout &DL = FromBB->getModule()->getDataLayout(); + LVILatticeVal Result = + getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + + if (Result.isUndefined()) + return ConstantRange(Width, /*isFullSet=*/false); + if (Result.isConstantRange()) + return Result.getConstantRange(); + // We represent ConstantInt constants as constant ranges but other kinds + // of integer constants, i.e. ConstantExpr will be tagged as constants + assert(!(Result.isConstant() && isa(Result.getConstant())) && + "ConstantInt value must be represented as constantrange"); + return ConstantRange(Width, /*isFullSet=*/true); +} + static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, - LVILatticeVal &Result, + const LVILatticeVal &Val, const DataLayout &DL, TargetLibraryInfo *TLI) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; - if (Result.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, - TLI); + if (Val.isConstant()) { + Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI); if (ConstantInt *ResCI = dyn_cast(Res)) return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; return LazyValueInfo::Unknown; } - if (Result.isConstantRange()) { + if (Val.isConstantRange()) { ConstantInt *CI = dyn_cast(C); if (!CI) return LazyValueInfo::Unknown; - const ConstantRange &CR = Result.getConstantRange(); + const ConstantRange &CR = Val.getConstantRange(); if (Pred == ICmpInst::ICMP_EQ) { if (!CR.contains(CI->getValue())) return LazyValueInfo::False; - if (CR.isSingleElement() && CR.contains(CI->getValue())) + if (CR.isSingleElement()) return LazyValueInfo::True; } else if (Pred == ICmpInst::ICMP_NE) { if (!CR.contains(CI->getValue())) return LazyValueInfo::True; - if (CR.isSingleElement() && CR.contains(CI->getValue())) + if (CR.isSingleElement()) + return LazyValueInfo::False; + } else { + // Handle more complex predicates. + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( + (ICmpInst::Predicate)Pred, CI->getValue()); + if (TrueValues.contains(CR)) + return LazyValueInfo::True; + if (TrueValues.inverse().contains(CR)) return LazyValueInfo::False; } - - // Handle more complex predicates. - ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( - (ICmpInst::Predicate)Pred, CI->getValue()); - if (TrueValues.contains(CR)) - return LazyValueInfo::True; - if (TrueValues.inverse().contains(CR)) - return LazyValueInfo::False; return LazyValueInfo::Unknown; } - if (Result.isNotConstant()) { + if (Val.isNotConstant()) { // If this is an equality comparison, we can try to fold it knowing that // "V != C1". if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, DL, + Val.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, DL, + Val.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::True; @@ -1891,12 +1877,65 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) { } -void LazyValueInfo::printCache(Function &F, raw_ostream &OS) { +void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { if (PImpl) { - getImpl(PImpl, AC, DL, DT).printCache(F, OS); + getImpl(PImpl, AC, DL, DT).printLVI(F, DTree, OS); + } +} + +// Print the LVI for the function arguments at the start of each basic block. +void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( + const BasicBlock *BB, formatted_raw_ostream &OS) { + // Find if there are latticevalues defined for arguments of the function. + auto *F = BB->getParent(); + for (auto &Arg : F->args()) { + LVILatticeVal Result = LVIImpl->getValueInBlock( + const_cast(&Arg), const_cast(BB)); + if (Result.isUndefined()) + continue; + OS << "; LatticeVal for: '" << Arg << "' is: " << Result << "\n"; } } +// This function prints the LVI analysis for the instruction I at the beginning +// of various basic blocks. It relies on calculated values that are stored in +// the LazyValueInfoCache, and in the absence of cached values, recalculte the +// LazyValueInfo for `I`, and print that info. +void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( + const Instruction *I, formatted_raw_ostream &OS) { + + auto *ParentBB = I->getParent(); + SmallPtrSet BlocksContainingLVI; + // We can generate (solve) LVI values only for blocks that are dominated by + // the I's parent. However, to avoid generating LVI for all dominating blocks, + // that contain redundant/uninteresting information, we print LVI for + // blocks that may use this LVI information (such as immediate successor + // blocks, and blocks that contain uses of `I`). + auto printResult = [&](const BasicBlock *BB) { + if (!BlocksContainingLVI.insert(BB).second) + return; + LVILatticeVal Result = LVIImpl->getValueInBlock( + const_cast(I), const_cast(BB)); + OS << "; LatticeVal for: '" << *I << "' in BB: '"; + BB->printAsOperand(OS, false); + OS << "' is: " << Result << "\n"; + }; + + printResult(ParentBB); + // Print the LVI analysis results for the the immediate successor blocks, that + // are dominated by `ParentBB`. + for (auto *BBSucc : successors(ParentBB)) + if (DT.dominates(ParentBB, BBSucc)) + printResult(BBSucc); + + // Print LVI in blocks where `I` is used. + for (auto *U : I->users()) + if (auto *UseI = dyn_cast(U)) + if (!isa(UseI) || DT.dominates(ParentBB, UseI->getParent())) + printResult(UseI->getParent()); + +} + namespace { // Printer class for LazyValueInfo results. class LazyValueInfoPrinter : public FunctionPass { @@ -1909,12 +1948,16 @@ class LazyValueInfoPrinter : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); } + // Get the mandatory dominator tree analysis and pass this in to the + // LVIPrinter. We cannot rely on the LVI's DT, since it's optional. bool runOnFunction(Function &F) override { dbgs() << "LVI for function '" << F.getName() << "':\n"; auto &LVI = getAnalysis().getLVI(); - LVI.printCache(F, dbgs()); + auto &DTree = getAnalysis().getDomTree(); + LVI.printLVI(F, DTree, dbgs()); return false; } }; diff --git a/interpreter/llvm/src/lib/Analysis/Lint.cpp b/interpreter/llvm/src/lib/Analysis/Lint.cpp index 471ccb62970d4..ada600a69b872 100644 --- a/interpreter/llvm/src/lib/Analysis/Lint.cpp +++ b/interpreter/llvm/src/lib/Analysis/Lint.cpp @@ -58,13 +58,13 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" @@ -405,7 +405,7 @@ void Lint::visitMemoryReference(Instruction &I, Assert(!isa(UnderlyingObject), "Undefined behavior: Undef pointer dereference", &I); Assert(!isa(UnderlyingObject) || - !cast(UnderlyingObject)->isAllOnesValue(), + !cast(UnderlyingObject)->isMinusOne(), "Unusual: All-ones pointer dereference", &I); Assert(!isa(UnderlyingObject) || !cast(UnderlyingObject)->isOne(), @@ -534,9 +534,7 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { - unsigned BitWidth = V->getType()->getIntegerBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(V, Known, DL, 0, AC, dyn_cast(V), DT); + KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); return Known.isZero(); } @@ -550,14 +548,12 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, // For a vector, KnownZero will only be true if all values are zero, so check // this per component - unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { Constant *Elem = C->getAggregateElement(I); if (isa(Elem)) return true; - KnownBits Known(BitWidth); - computeKnownBits(Elem, Known, DL); + KnownBits Known = computeKnownBits(Elem, DL); if (Known.isZero()) return true; } diff --git a/interpreter/llvm/src/lib/Analysis/Loads.cpp b/interpreter/llvm/src/lib/Analysis/Loads.cpp index 96799a459bfc4..591b0fc481d24 100644 --- a/interpreter/llvm/src/lib/Analysis/Loads.cpp +++ b/interpreter/llvm/src/lib/Analysis/Loads.cpp @@ -116,6 +116,16 @@ static bool isDereferenceableAndAlignedPointer( return false; } +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const APInt &Size, + const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT) { + SmallPtrSet Visited; + return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT, + Visited); +} + bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI, diff --git a/interpreter/llvm/src/lib/Analysis/LoopInfo.cpp b/interpreter/llvm/src/lib/Analysis/LoopInfo.cpp index ff68810abb827..697b58622bb4a 100644 --- a/interpreter/llvm/src/lib/Analysis/LoopInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/LoopInfo.cpp @@ -131,13 +131,13 @@ PHINode *Loop::getCanonicalInductionVariable() const { PHINode *PN = cast(I); if (ConstantInt *CI = dyn_cast(PN->getIncomingValueForBlock(Incoming))) - if (CI->isNullValue()) + if (CI->isZero()) if (Instruction *Inc = dyn_cast(PN->getIncomingValueForBlock(Backedge))) if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) if (ConstantInt *CI = dyn_cast(Inc->getOperand(1))) - if (CI->equalsInt(1)) + if (CI->isOne()) return PN; } return nullptr; @@ -460,7 +460,7 @@ class UnloopUpdater { void UnloopUpdater::updateBlockParents() { if (Unloop.getNumBlocks()) { // Perform a post order CFG traversal of all blocks within this loop, - // propagating the nearest loop from sucessors to predecessors. + // propagating the nearest loop from successors to predecessors. LoopBlocksTraversal Traversal(DFS, LI); for (BasicBlock *POI : Traversal) { @@ -609,7 +609,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { return NearLoop; } -LoopInfo::LoopInfo(const DominatorTreeBase &DomTree) { +LoopInfo::LoopInfo(const DomTreeBase &DomTree) { analyze(DomTree); } diff --git a/interpreter/llvm/src/lib/Analysis/LoopPass.cpp b/interpreter/llvm/src/lib/Analysis/LoopPass.cpp index 0b5f6266e3737..e988f6444a58d 100644 --- a/interpreter/llvm/src/lib/Analysis/LoopPass.cpp +++ b/interpreter/llvm/src/lib/Analysis/LoopPass.cpp @@ -73,30 +73,23 @@ LPPassManager::LPPassManager() CurrentLoop = nullptr; } -// Inset loop into loop nest (LoopInfo) and loop queue (LQ). -Loop &LPPassManager::addLoop(Loop *ParentLoop) { - // Create a new loop. LI will take ownership. - Loop *L = new Loop(); - - // Insert into the loop nest and the loop queue. - if (!ParentLoop) { +// Insert loop into loop nest (LoopInfo) and loop queue (LQ). +void LPPassManager::addLoop(Loop &L) { + if (!L.getParentLoop()) { // This is the top level loop. - LI->addTopLevelLoop(L); - LQ.push_front(L); - return *L; + LQ.push_front(&L); + return; } - ParentLoop->addChildLoop(L); // Insert L into the loop queue after the parent loop. for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) { - if (*I == L->getParentLoop()) { + if (*I == L.getParentLoop()) { // deque does not support insert after. ++I; - LQ.insert(I, 1, L); - break; + LQ.insert(I, 1, &L); + return; } } - return *L; } /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for diff --git a/interpreter/llvm/src/lib/Analysis/MemDepPrinter.cpp b/interpreter/llvm/src/lib/Analysis/MemDepPrinter.cpp index e7a85ae06e681..5c0cbb26484c1 100644 --- a/interpreter/llvm/src/lib/Analysis/MemDepPrinter.cpp +++ b/interpreter/llvm/src/lib/Analysis/MemDepPrinter.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" diff --git a/interpreter/llvm/src/lib/Analysis/MemDerefPrinter.cpp b/interpreter/llvm/src/lib/Analysis/MemDerefPrinter.cpp index fa0cc5a46c2b8..4231a78352ce5 100644 --- a/interpreter/llvm/src/lib/Analysis/MemDerefPrinter.cpp +++ b/interpreter/llvm/src/lib/Analysis/MemDerefPrinter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" diff --git a/interpreter/llvm/src/lib/Analysis/MemoryBuiltins.cpp b/interpreter/llvm/src/lib/Analysis/MemoryBuiltins.cpp index 7983d62c2f7aa..7327c07499bed 100644 --- a/interpreter/llvm/src/lib/Analysis/MemoryBuiltins.cpp +++ b/interpreter/llvm/src/lib/Analysis/MemoryBuiltins.cpp @@ -400,8 +400,8 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) { /// \brief Compute the size of the object pointed by Ptr. Returns true and the /// object size in Size if successful, and false otherwise. -/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, -/// byval arguments, and global variables. +/// If RoundToAlign is true, then Size is rounded up to the alignment of +/// allocas, byval arguments, and global variables. bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) { ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts); @@ -505,6 +505,22 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return unknown(); } +/// When we're compiling N-bit code, and the user uses parameters that are +/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into +/// trouble with APInt size issues. This function handles resizing + overflow +/// checks for us. Check and zext or trunc \p I depending on IntTyBits and +/// I's value. +bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) { + // More bits than we can handle. Checking the bit width isn't necessary, but + // it's faster than checking active bits, and should give `false` in the + // vast majority of cases. + if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) + return false; + if (I.getBitWidth() != IntTyBits) + I = I.zextOrTrunc(IntTyBits); + return true; +} + SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); @@ -515,8 +531,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { Value *ArraySize = I.getArraySize(); if (const ConstantInt *C = dyn_cast(ArraySize)) { - Size *= C->getValue().zextOrSelf(IntTyBits); - return std::make_pair(align(Size, I.getAlignment()), Zero); + APInt NumElems = C->getValue(); + if (!CheckedZextOrTrunc(NumElems)) + return unknown(); + + bool Overflow; + Size = Size.umul_ov(NumElems, Overflow); + return Overflow ? unknown() : std::make_pair(align(Size, I.getAlignment()), + Zero); } return unknown(); } @@ -561,21 +583,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { if (!Arg) return unknown(); - // When we're compiling N-bit code, and the user uses parameters that are - // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into - // trouble with APInt size issues. This function handles resizing + overflow - // checks for us. - auto CheckedZextOrTrunc = [&](APInt &I) { - // More bits than we can handle. Checking the bit width isn't necessary, but - // it's faster than checking active bits, and should give `false` in the - // vast majority of cases. - if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) - return false; - if (I.getBitWidth() != IntTyBits) - I = I.zextOrTrunc(IntTyBits); - return true; - }; - APInt Size = Arg->getValue(); if (!CheckedZextOrTrunc(Size)) return unknown(); diff --git a/interpreter/llvm/src/lib/Analysis/MemoryDependenceAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/MemoryDependenceAnalysis.cpp index 66a0d145dcd85..263cf42ebe271 100644 --- a/interpreter/llvm/src/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -15,17 +15,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/OrderedBasicBlock.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -310,11 +310,11 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( } static bool isVolatile(Instruction *Inst) { - if (LoadInst *LI = dyn_cast(Inst)) + if (auto *LI = dyn_cast(Inst)) return LI->isVolatile(); - else if (StoreInst *SI = dyn_cast(Inst)) + if (auto *SI = dyn_cast(Inst)) return SI->isVolatile(); - else if (AtomicCmpXchgInst *AI = dyn_cast(Inst)) + if (auto *AI = dyn_cast(Inst)) return AI->isVolatile(); return false; } @@ -691,6 +691,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // load query, we can safely ignore it (scan past it). if (isLoad) continue; + LLVM_FALLTHROUGH; default: // Otherwise, there is a potential dependence. Return a clobber. return MemDepResult::getClobber(Inst); diff --git a/interpreter/llvm/src/lib/Analysis/MemorySSA.cpp b/interpreter/llvm/src/lib/Analysis/MemorySSA.cpp index 2480fe44d5c0e..86de474c7aa95 100644 --- a/interpreter/llvm/src/lib/Analysis/MemorySSA.cpp +++ b/interpreter/llvm/src/lib/Analysis/MemorySSA.cpp @@ -39,7 +39,6 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Transforms/Scalar.h" #include #define DEBUG_TYPE "memoryssa" @@ -1799,6 +1798,15 @@ bool MemorySSA::dominates(const MemoryAccess *Dominator, const static char LiveOnEntryStr[] = "liveOnEntry"; +void MemoryAccess::print(raw_ostream &OS) const { + switch (getValueID()) { + case MemoryPhiVal: return static_cast(this)->print(OS); + case MemoryDefVal: return static_cast(this)->print(OS); + case MemoryUseVal: return static_cast(this)->print(OS); + } + llvm_unreachable("invalid value id"); +} + void MemoryDef::print(raw_ostream &OS) const { MemoryAccess *UO = getDefiningAccess(); @@ -1836,8 +1844,6 @@ void MemoryPhi::print(raw_ostream &OS) const { OS << ')'; } -MemoryAccess::~MemoryAccess() {} - void MemoryUse::print(raw_ostream &OS) const { MemoryAccess *UO = getDefiningAccess(); OS << "MemoryUse("; @@ -1865,7 +1871,6 @@ MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); - AU.addPreserved(); } bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { @@ -1950,6 +1955,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( #ifdef EXPENSIVE_CHECKS MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); assert(NewNoCache == New && "Cache made us hand back a different result?"); + (void)NewNoCache; #endif if (AutoResetWalker) resetClobberWalker(); @@ -2054,3 +2060,15 @@ MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( return StartingAccess; } } // namespace llvm + +void MemoryPhi::deleteMe(DerivedUser *Self) { + delete static_cast(Self); +} + +void MemoryDef::deleteMe(DerivedUser *Self) { + delete static_cast(Self); +} + +void MemoryUse::deleteMe(DerivedUser *Self) { + delete static_cast(Self); +} diff --git a/interpreter/llvm/src/lib/Analysis/MemorySSAUpdater.cpp b/interpreter/llvm/src/lib/Analysis/MemorySSAUpdater.cpp index da5c79ab6c813..1ff84471c0946 100644 --- a/interpreter/llvm/src/lib/Analysis/MemorySSAUpdater.cpp +++ b/interpreter/llvm/src/lib/Analysis/MemorySSAUpdater.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -24,7 +25,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Analysis/MemorySSA.h" #include #define DEBUG_TYPE "memoryssa" @@ -124,17 +124,12 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) { return &*Iter; } else { // Otherwise, have to walk the all access iterator. - auto Iter = MA->getReverseIterator(); - ++Iter; - while (&*Iter != &*Defs->begin()) { - if (!isa(*Iter)) - return &*Iter; - --Iter; - } - // At this point it must be pointing at firstdef - assert(&*Iter == &*Defs->begin() && - "Should have hit first def walking backwards"); - return &*Iter; + auto End = MSSA->getWritableBlockAccesses(MA->getBlock())->rend(); + for (auto &U : make_range(++MA->getReverseIterator(), End)) + if (!isa(U)) + return cast(&U); + // Note that if MA comes before Defs->begin(), we won't hit a def. + return nullptr; } } return nullptr; diff --git a/interpreter/llvm/src/lib/Analysis/ModuleDebugInfoPrinter.cpp b/interpreter/llvm/src/lib/Analysis/ModuleDebugInfoPrinter.cpp index f675830aa67d9..e12cdf9182c74 100644 --- a/interpreter/llvm/src/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/interpreter/llvm/src/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" diff --git a/interpreter/llvm/src/lib/Analysis/ModuleSummaryAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/ModuleSummaryAnalysis.cpp index 26706f5509bab..e9e354ebb88f9 100644 --- a/interpreter/llvm/src/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -266,7 +266,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness( - CalleeInfo::HotnessType::Hot); + CalleeInfo::HotnessType::Critical); bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = @@ -275,7 +275,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. F.isVarArg(); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* LiveRoot = */ false); + /* Live = */ false); auto FuncSummary = llvm::make_unique( Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), @@ -295,7 +295,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto GVarSummary = llvm::make_unique(Flags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -308,7 +308,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto AS = llvm::make_unique(Flags, ArrayRef{}); auto *Aliasee = A.getBaseObject(); auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); @@ -323,7 +323,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name))) for (auto &Summary : VI.getSummaryList()) - Summary->setLiveRoot(); + Summary->setLive(true); } ModuleSummaryIndex llvm::buildModuleSummaryIndex( @@ -423,8 +423,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( return; assert(GV->isDeclaration() && "Def in module asm already has definition"); GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, - /* NotEligibleToImport */ true, - /* LiveRoot */ true); + /* NotEligibleToImport = */ true, + /* Live = */ true); CantBePromoted.insert(GlobalValue::getGUID(Name)); // Create the appropriate summary type. if (isa(GV)) { @@ -447,6 +447,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( }); } + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + for (auto &GlobalList : Index) { // Ignore entries for references that are undefined in the current module. if (GlobalList.second.SummaryList.empty()) @@ -455,6 +460,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( assert(GlobalList.second.SummaryList.size() == 1 && "Expected module's index to have one summary per GUID"); auto &Summary = GlobalList.second.SummaryList[0]; + if (!IsThinLTO) { + Summary->setNotEligibleToImport(); + continue; + } + bool AllRefsCanBeExternallyReferenced = llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { return !CantBePromoted.count(VI.getGUID()); diff --git a/interpreter/llvm/src/lib/Analysis/ObjCARCInstKind.cpp b/interpreter/llvm/src/lib/Analysis/ObjCARCInstKind.cpp index 1e75c0824d031..f374dd33f86f6 100644 --- a/interpreter/llvm/src/lib/Analysis/ObjCARCInstKind.cpp +++ b/interpreter/llvm/src/lib/Analysis/ObjCARCInstKind.cpp @@ -20,8 +20,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ObjCARCInstKind.h" -#include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Analysis/OptimizationDiagnosticInfo.cpp b/interpreter/llvm/src/lib/Analysis/OptimizationDiagnosticInfo.cpp index 73245981b0228..eb259fd7a3840 100644 --- a/interpreter/llvm/src/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) : F(F), BFI(nullptr) { - if (!F->getContext().getDiagnosticHotnessRequested()) + if (!F->getContext().getDiagnosticsHotnessRequested()) return; // First create a dominator tree. @@ -101,7 +101,7 @@ void MappingTraits::mapping( // These are read-only for now. DiagnosticLocation DL = OptDiag->getLocation(); StringRef FN = - GlobalValue::getRealLinkageName(OptDiag->getFunction().getName()); + GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); StringRef PassName(OptDiag->PassName); io.mapRequired("Pass", PassName); @@ -155,6 +155,13 @@ void OptimizationRemarkEmitter::emit( DiagnosticInfoOptimizationBase &OptDiagBase) { auto &OptDiag = cast(OptDiagBase); computeHotness(OptDiag); + // If a diagnostic has a hotness value, then only emit it if its hotness + // meets the threshold. + if (OptDiag.getHotness() && + *OptDiag.getHotness() < + F->getContext().getDiagnosticsHotnessThreshold()) { + return; + } yaml::Output *Out = F->getContext().getDiagnosticsOutputFile(); if (Out) { @@ -176,7 +183,7 @@ OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass() bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) { BlockFrequencyInfo *BFI; - if (Fn.getContext().getDiagnosticHotnessRequested()) + if (Fn.getContext().getDiagnosticsHotnessRequested()) BFI = &getAnalysis().getBFI(); else BFI = nullptr; @@ -198,7 +205,7 @@ OptimizationRemarkEmitterAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BlockFrequencyInfo *BFI; - if (F.getContext().getDiagnosticHotnessRequested()) + if (F.getContext().getDiagnosticsHotnessRequested()) BFI = &AM.getResult(F); else BFI = nullptr; diff --git a/interpreter/llvm/src/lib/Analysis/OrderedBasicBlock.cpp b/interpreter/llvm/src/lib/Analysis/OrderedBasicBlock.cpp index 0f0016f22cc0a..a04c0aef04bea 100644 --- a/interpreter/llvm/src/lib/Analysis/OrderedBasicBlock.cpp +++ b/interpreter/llvm/src/lib/Analysis/OrderedBasicBlock.cpp @@ -55,7 +55,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A, assert(II != IE && "Instruction not found?"); assert((Inst == A || Inst == B) && "Should find A or B"); LastInstFound = II; - return Inst == A; + return Inst != B; } /// \brief Find out whether \p A dominates \p B, meaning whether \p A diff --git a/interpreter/llvm/src/lib/Analysis/ProfileSummaryInfo.cpp b/interpreter/llvm/src/lib/Analysis/ProfileSummaryInfo.cpp index 502f4205b689a..12b86daa602be 100644 --- a/interpreter/llvm/src/lib/Analysis/ProfileSummaryInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/ProfileSummaryInfo.cpp @@ -75,7 +75,7 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, return None; assert((isa(Inst) || isa(Inst)) && "We can only get profile count for call/invoke instruction."); - if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) { + if (hasSampleProfile()) { // In sample PGO mode, check if there is a profile metadata on the // instruction. If it is present, determine hotness solely based on that, // since the sampled entry count may not be accurate. diff --git a/interpreter/llvm/src/lib/Analysis/RegionInfo.cpp b/interpreter/llvm/src/lib/Analysis/RegionInfo.cpp index 63ef8d28d44ad..9004873230052 100644 --- a/interpreter/llvm/src/lib/Analysis/RegionInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/RegionInfo.cpp @@ -10,28 +10,29 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionInfo.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/RegionInfoImpl.h" -#include "llvm/Analysis/RegionIterator.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #ifndef NDEBUG #include "llvm/Analysis/RegionPrinter.h" #endif +#include "llvm/Analysis/RegionInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "region" namespace llvm { + template class RegionBase>; template class RegionNodeBase>; template class RegionInfoBase>; -} + +} // end namespace llvm STATISTIC(numRegions, "The # of regions"); STATISTIC(numSimpleRegions, "The # of simple regions"); @@ -44,7 +45,6 @@ VerifyRegionInfoX( cl::location(RegionInfoBase>::VerifyRegionInfo), cl::desc("Verify region info (time consuming)")); - static cl::opt printStyleX("print-region-style", cl::location(RegionInfo::printStyle), cl::Hidden, @@ -56,7 +56,6 @@ static cl::opt printStyleX("print-region-style", clEnumValN(Region::PrintRN, "rn", "print regions in detail with element_iterator"))); - //===----------------------------------------------------------------------===// // Region implementation // @@ -68,20 +67,15 @@ Region::Region(BasicBlock *Entry, BasicBlock *Exit, } -Region::~Region() { } +Region::~Region() = default; //===----------------------------------------------------------------------===// // RegionInfo implementation // -RegionInfo::RegionInfo() : - RegionInfoBase>() { - -} +RegionInfo::RegionInfo() = default; -RegionInfo::~RegionInfo() { - -} +RegionInfo::~RegionInfo() = default; bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &) { @@ -126,9 +120,7 @@ RegionInfoPass::RegionInfoPass() : FunctionPass(ID) { initializeRegionInfoPassPass(*PassRegistry::getPassRegistry()); } -RegionInfoPass::~RegionInfoPass() { - -} +RegionInfoPass::~RegionInfoPass() = default; bool RegionInfoPass::runOnFunction(Function &F) { releaseMemory(); @@ -181,10 +173,12 @@ INITIALIZE_PASS_END(RegionInfoPass, "regions", // the link time optimization. namespace llvm { + FunctionPass *createRegionInfoPass() { return new RegionInfoPass(); } -} + +} // end namespace llvm //===----------------------------------------------------------------------===// // RegionInfoAnalysis implementation diff --git a/interpreter/llvm/src/lib/Analysis/RegionPass.cpp b/interpreter/llvm/src/lib/Analysis/RegionPass.cpp index f2fa626406f7a..b172d42c97091 100644 --- a/interpreter/llvm/src/lib/Analysis/RegionPass.cpp +++ b/interpreter/llvm/src/lib/Analysis/RegionPass.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/IR/OptBisect.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" @@ -280,3 +281,18 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { return new PrintRegionPass(Banner, O); } + +bool RegionPass::skipRegion(Region &R) const { + Function &F = *R.getEntry()->getParent(); + if (!F.getContext().getOptBisect().shouldRunPass(this, R)) + return true; + + if (F.hasFnAttribute(Attribute::OptimizeNone)) { + // Report this only once per function. + if (R.getEntry() == &F.getEntryBlock()) + DEBUG(dbgs() << "Skipping pass '" << getPassName() + << "' on function " << F.getName() << "\n"); + return true; + } + return false; +} diff --git a/interpreter/llvm/src/lib/Analysis/RegionPrinter.cpp b/interpreter/llvm/src/lib/Analysis/RegionPrinter.cpp index 30a4e011060e9..5986b8c4e0c30 100644 --- a/interpreter/llvm/src/lib/Analysis/RegionPrinter.cpp +++ b/interpreter/llvm/src/lib/Analysis/RegionPrinter.cpp @@ -9,14 +9,14 @@ // Print out the region tree of a function using dotty/graphviz. //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/RegionPrinter.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/RegionPrinter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/Analysis/ScalarEvolution.cpp b/interpreter/llvm/src/lib/Analysis/ScalarEvolution.cpp index d71206335a506..9539fd7c75596 100644 --- a/interpreter/llvm/src/lib/Analysis/ScalarEvolution.cpp +++ b/interpreter/llvm/src/lib/Analysis/ScalarEvolution.cpp @@ -91,8 +91,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -126,11 +126,11 @@ static cl::opt static cl::opt MulOpsInlineThreshold( "scev-mulops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining multiplication operands into a SCEV"), - cl::init(1000)); + cl::init(32)); static cl::opt AddOpsInlineThreshold( "scev-addops-inline-threshold", cl::Hidden, - cl::desc("Threshold for inlining multiplication operands into a SCEV"), + cl::desc("Threshold for inlining addition operands into a SCEV"), cl::init(500)); static cl::opt MaxSCEVCompareDepth( @@ -149,14 +149,24 @@ static cl::opt MaxValueCompareDepth( cl::init(2)); static cl::opt - MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden, - cl::desc("Maximum depth of recursive AddExpr"), - cl::init(32)); + MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, + cl::desc("Maximum depth of recursive arithmetics"), + cl::init(32)); static cl::opt MaxConstantEvolvingDepth( "scalar-evolution-max-constant-evolving-depth", cl::Hidden, cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); +static cl::opt + MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden, + cl::desc("Maximum depth of recursive SExt/ZExt"), + cl::init(8)); + +static cl::opt + MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, + cl::desc("Max coefficients in AddRec during evolving"), + cl::init(16)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -321,7 +331,7 @@ bool SCEV::isOne() const { bool SCEV::isAllOnesValue() const { if (const SCEVConstant *SC = dyn_cast(this)) - return SC->getValue()->isAllOnesValue(); + return SC->getValue()->isMinusOne(); return false; } @@ -584,7 +594,7 @@ CompareValueComplexity(SmallSet, 8> &EqCache, static int CompareSCEVComplexity( SmallSet, 8> &EqCacheSCEV, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, - unsigned Depth = 0) { + DominatorTree &DT, unsigned Depth = 0) { // Fast-path: SCEVs are uniqued so we can do a quick equality check. if (LHS == RHS) return 0; @@ -629,12 +639,19 @@ static int CompareSCEVComplexity( const SCEVAddRecExpr *LA = cast(LHS); const SCEVAddRecExpr *RA = cast(RHS); - // Compare addrec loop depths. + // There is always a dominance between two recs that are used by one SCEV, + // so we can safely sort recs by loop header dominance. We require such + // order in getAddExpr. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); if (LLoop != RLoop) { - unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth(); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; + const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader(); + assert(LHead != RHead && "Two loops share the same header?"); + if (DT.dominates(LHead, RHead)) + return 1; + else + assert(DT.dominates(RHead, LHead) && + "No dominance between recurrences used by one SCEV?"); + return -1; } // Addrec complexity grows with operand count. @@ -645,7 +662,7 @@ static int CompareSCEVComplexity( // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i), - RA->getOperand(i), Depth + 1); + RA->getOperand(i), DT, Depth + 1); if (X != 0) return X; } @@ -669,7 +686,7 @@ static int CompareSCEVComplexity( if (i >= RNumOps) return 1; int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i), - RC->getOperand(i), Depth + 1); + RC->getOperand(i), DT, Depth + 1); if (X != 0) return X; } @@ -683,10 +700,10 @@ static int CompareSCEVComplexity( // Lexicographically compare udiv expressions. int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(), - Depth + 1); + DT, Depth + 1); if (X != 0) return X; - X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), + X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT, Depth + 1); if (X == 0) EqCacheSCEV.insert({LHS, RHS}); @@ -701,7 +718,7 @@ static int CompareSCEVComplexity( // Compare cast expressions by operand. int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(), - RC->getOperand(), Depth + 1); + RC->getOperand(), DT, Depth + 1); if (X == 0) EqCacheSCEV.insert({LHS, RHS}); return X; @@ -724,7 +741,7 @@ static int CompareSCEVComplexity( /// land in memory. /// static void GroupByComplexity(SmallVectorImpl &Ops, - LoopInfo *LI) { + LoopInfo *LI, DominatorTree &DT) { if (Ops.size() < 2) return; // Noop SmallSet, 8> EqCache; @@ -732,15 +749,16 @@ static void GroupByComplexity(SmallVectorImpl &Ops, // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; - if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0) + if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. std::stable_sort(Ops.begin(), Ops.end(), - [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) { - return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0; + [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) { + return + CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0; }); // Now that we are sorted by complexity, group elements of the same @@ -1251,12 +1269,12 @@ static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, if (SE->isKnownPositive(Step)) { *Pred = ICmpInst::ICMP_SLT; return SE->getConstant(APInt::getSignedMinValue(BitWidth) - - SE->getSignedRange(Step).getSignedMax()); + SE->getSignedRangeMax(Step)); } if (SE->isKnownNegative(Step)) { *Pred = ICmpInst::ICMP_SGT; return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - - SE->getSignedRange(Step).getSignedMin()); + SE->getSignedRangeMin(Step)); } return nullptr; } @@ -1271,14 +1289,14 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, *Pred = ICmpInst::ICMP_ULT; return SE->getConstant(APInt::getMinValue(BitWidth) - - SE->getUnsignedRange(Step).getUnsignedMax()); + SE->getUnsignedRangeMax(Step)); } namespace { struct ExtendOpTraitsBase { - typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)( - const SCEV *, Type *, ScalarEvolution::ExtendCacheTy &Cache); + typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *, + unsigned); }; // Used to make code generic over signed and unsigned overflow. @@ -1307,9 +1325,8 @@ struct ExtendOpTraits : public ExtendOpTraitsBase { } }; -const ExtendOpTraitsBase::GetExtendExprTy - ExtendOpTraits::GetExtendExpr = - &ScalarEvolution::getSignExtendExprCached; +const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< + SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; template <> struct ExtendOpTraits : public ExtendOpTraitsBase { @@ -1324,9 +1341,8 @@ struct ExtendOpTraits : public ExtendOpTraitsBase { } }; -const ExtendOpTraitsBase::GetExtendExprTy - ExtendOpTraits::GetExtendExpr = - &ScalarEvolution::getZeroExtendExprCached; +const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< + SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; } // The recurrence AR has been shown to have no signed/unsigned wrap or something @@ -1338,8 +1354,7 @@ const ExtendOpTraitsBase::GetExtendExprTy // "sext/zext(PostIncAR)" template static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, - ScalarEvolution *SE, - ScalarEvolution::ExtendCacheTy &Cache) { + ScalarEvolution *SE, unsigned Depth) { auto WrapType = ExtendOpTraits::WrapType; auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; @@ -1386,9 +1401,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); const SCEV *OperandExtendedStart = - SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Cache), - (SE->*GetExtendExpr)(Step, WideTy, Cache)); - if ((SE->*GetExtendExpr)(Start, WideTy, Cache) == OperandExtendedStart) { + SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth), + (SE->*GetExtendExpr)(Step, WideTy, Depth)); + if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) { if (PreAR && AR->getNoWrapFlags(WrapType)) { // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then @@ -1414,16 +1429,16 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, template static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE, - ScalarEvolution::ExtendCacheTy &Cache) { + unsigned Depth) { auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; - const SCEV *PreStart = getPreStartForExtend(AR, Ty, SE, Cache); + const SCEV *PreStart = getPreStartForExtend(AR, Ty, SE, Depth); if (!PreStart) - return (SE->*GetExtendExpr)(AR->getStart(), Ty, Cache); + return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth); - return SE->getAddExpr( - (SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Cache), - (SE->*GetExtendExpr)(PreStart, Ty, Cache)); + return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, + Depth), + (SE->*GetExtendExpr)(PreStart, Ty, Depth)); } // Try to prove away overflow by looking at "nearby" add recurrences. A @@ -1503,31 +1518,8 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, return false; } -const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { - // Use the local cache to prevent exponential behavior of - // getZeroExtendExprImpl. - ExtendCacheTy Cache; - return getZeroExtendExprCached(Op, Ty, Cache); -} - -/// Query \p Cache before calling getZeroExtendExprImpl. If there is no -/// related entry in the \p Cache, call getZeroExtendExprImpl and save -/// the result in the \p Cache. -const SCEV *ScalarEvolution::getZeroExtendExprCached(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache) { - auto It = Cache.find({Op, Ty}); - if (It != Cache.end()) - return It->second; - const SCEV *ZExt = getZeroExtendExprImpl(Op, Ty, Cache); - auto InsertResult = Cache.insert({{Op, Ty}, ZExt}); - assert(InsertResult.second && "Expect the key was not in the cache"); - (void)InsertResult; - return ZExt; -} - -/// The real implementation of getZeroExtendExpr. -const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache) { +const SCEV * +ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1537,11 +1529,11 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getZExt(SC->getValue(), Ty))); + cast(ConstantExpr::getZExt(SC->getValue(), Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) - return getZeroExtendExprCached(SZ->getOperand(), Ty, Cache); + return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. @@ -1551,6 +1543,12 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + if (Depth > MaxExtDepth) { + SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; + } // zext(trunc(x)) --> zext(x) or x or trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) { @@ -1585,8 +1583,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, // we don't need to do any further analysis. if (AR->hasNoUnsignedWrap()) return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Depth + 1), + getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1610,22 +1608,29 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. - const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *ZAdd = - getZeroExtendExprCached(getAddExpr(Start, ZMul), WideTy, Cache); - const SCEV *WideStart = getZeroExtendExprCached(Start, WideTy, Cache); + const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step, + SCEV::FlagAnyWrap, Depth + 1); + const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul, + SCEV::FlagAnyWrap, + Depth + 1), + WideTy, Depth + 1); + const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1); const SCEV *WideMaxBECount = - getZeroExtendExprCached(CastedMaxBECount, WideTy, Cache); - const SCEV *OperandExtendedAdd = getAddExpr( - WideStart, getMulExpr(WideMaxBECount, getZeroExtendExprCached( - Step, WideTy, Cache))); + getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); + const SCEV *OperandExtendedAdd = + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, + getZeroExtendExpr(Step, WideTy, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getZeroExtendExprCached(Step, Ty, Cache), L, + getExtendAddRecStart(AR, Ty, this, + Depth + 1), + getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. @@ -1633,15 +1638,19 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, - getSignExtendExpr(Step, WideTy))); + getSignExtendExpr(Step, WideTy, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, + Depth + 1), + getSignExtendExpr(Step, Ty, Depth + 1), L, + AR->getNoWrapFlags()); } } } @@ -1662,7 +1671,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, // is safe. if (isKnownPositive(Step)) { const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - - getUnsignedRange(Step).getUnsignedMax()); + getUnsignedRangeMax(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, @@ -1672,13 +1681,14 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getZeroExtendExprCached(Step, Ty, Cache), L, + getExtendAddRecStart(AR, Ty, this, + Depth + 1), + getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - - getSignedRange(Step).getSignedMin()); + getSignedRangeMin(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, @@ -1689,8 +1699,10 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, + Depth + 1), + getSignExtendExpr(Step, Ty, Depth + 1), L, + AR->getNoWrapFlags()); } } } @@ -1698,8 +1710,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, if (proveNoWrapByVaryingStart(Start, Step, L)) { const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Depth + 1), + getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } @@ -1710,8 +1722,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, // commute the zero extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) - Ops.push_back(getZeroExtendExprCached(Op, Ty, Cache)); - return getAddExpr(Ops, SCEV::FlagNUW); + Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); + return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); } } @@ -1724,31 +1736,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, return S; } -const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) { - // Use the local cache to prevent exponential behavior of - // getSignExtendExprImpl. - ExtendCacheTy Cache; - return getSignExtendExprCached(Op, Ty, Cache); -} - -/// Query \p Cache before calling getSignExtendExprImpl. If there is no -/// related entry in the \p Cache, call getSignExtendExprImpl and save -/// the result in the \p Cache. -const SCEV *ScalarEvolution::getSignExtendExprCached(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache) { - auto It = Cache.find({Op, Ty}); - if (It != Cache.end()) - return It->second; - const SCEV *SExt = getSignExtendExprImpl(Op, Ty, Cache); - auto InsertResult = Cache.insert({{Op, Ty}, SExt}); - assert(InsertResult.second && "Expect the key was not in the cache"); - (void)InsertResult; - return SExt; -} - -/// The real implementation of getSignExtendExpr. -const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, - ExtendCacheTy &Cache) { +const SCEV * +ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1758,15 +1747,15 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getSExt(SC->getValue(), Ty))); + cast(ConstantExpr::getSExt(SC->getValue(), Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) - return getSignExtendExprCached(SS->getOperand(), Ty, Cache); + return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1); // sext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) - return getZeroExtendExpr(SZ->getOperand(), Ty); + return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. @@ -1776,6 +1765,13 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // Limit recursion depth. + if (Depth > MaxExtDepth) { + SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; + } // sext(trunc(x)) --> sext(x) or x or trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) { @@ -1801,8 +1797,9 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) - return getAddExpr(getSignExtendExprCached(SC1, Ty, Cache), - getSignExtendExprCached(SMul, Ty, Cache)); + return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1), + getSignExtendExpr(SMul, Ty, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); } } } @@ -1813,8 +1810,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, // commute the sign extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) - Ops.push_back(getSignExtendExprCached(Op, Ty, Cache)); - return getAddExpr(Ops, SCEV::FlagNSW); + Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1)); + return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); } } // If the input value is a chrec scev, and we can prove that the value @@ -1837,8 +1834,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, // we don't need to do any further analysis. if (AR->hasNoSignedWrap()) return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getSignExtendExprCached(Step, Ty, Cache), L, SCEV::FlagNSW); + getExtendAddRecStart(AR, Ty, this, Depth + 1), + getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1862,22 +1859,29 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. - const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *SAdd = - getSignExtendExprCached(getAddExpr(Start, SMul), WideTy, Cache); - const SCEV *WideStart = getSignExtendExprCached(Start, WideTy, Cache); + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step, + SCEV::FlagAnyWrap, Depth + 1); + const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul, + SCEV::FlagAnyWrap, + Depth + 1), + WideTy, Depth + 1); + const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1); const SCEV *WideMaxBECount = - getZeroExtendExpr(CastedMaxBECount, WideTy); - const SCEV *OperandExtendedAdd = getAddExpr( - WideStart, getMulExpr(WideMaxBECount, getSignExtendExprCached( - Step, WideTy, Cache))); + getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); + const SCEV *OperandExtendedAdd = + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, + getSignExtendExpr(Step, WideTy, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getSignExtendExprCached(Step, Ty, Cache), L, + getExtendAddRecStart(AR, Ty, this, + Depth + 1), + getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. @@ -1885,7 +1889,9 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, - getZeroExtendExpr(Step, WideTy))); + getZeroExtendExpr(Step, WideTy, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // If AR wraps around then // @@ -1899,8 +1905,10 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, + Depth + 1), + getZeroExtendExpr(Step, Ty, Depth + 1), L, + AR->getNoWrapFlags()); } } } @@ -1931,9 +1939,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getSignExtendExprCached(Step, Ty, Cache), L, - AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Depth + 1), + getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } @@ -1947,25 +1954,26 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) { - Start = getSignExtendExprCached(Start, Ty, Cache); + Start = getSignExtendExpr(Start, Ty, Depth + 1); const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, AR->getNoWrapFlags()); - return getAddExpr(Start, getSignExtendExprCached(NewAR, Ty, Cache)); + return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); } } if (proveNoWrapByVaryingStart(Start, Step, L)) { const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Cache), - getSignExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Depth + 1), + getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } // If the input value is provably positive and we could not simplify // away the sext build a zext instead. if (isKnownNonNegative(Op)) - return getZeroExtendExpr(Op, Ty); + return getZeroExtendExpr(Op, Ty, Depth + 1); // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. @@ -2170,6 +2178,62 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, return Flags; } +bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { + if (!isLoopInvariant(S, L)) + return false; + // If a value depends on a SCEVUnknown which is defined after the loop, we + // conservatively assume that we cannot calculate it at the loop's entry. + struct FindDominatedSCEVUnknown { + bool Found = false; + const Loop *L; + DominatorTree &DT; + LoopInfo &LI; + + FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI) + : L(L), DT(DT), LI(LI) {} + + bool checkSCEVUnknown(const SCEVUnknown *SU) { + if (auto *I = dyn_cast(SU->getValue())) { + if (DT.dominates(L->getHeader(), I->getParent())) + Found = true; + else + assert(DT.dominates(I->getParent(), L->getHeader()) && + "No dominance relationship between SCEV and loop?"); + } + return false; + } + + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scConstant: + return false; + case scAddRecExpr: + case scTruncate: + case scZeroExtend: + case scSignExtend: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUDivExpr: + return true; + case scUnknown: + return checkSCEVUnknown(cast(S)); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + return false; + } + + bool isDone() { return Found; } + }; + + FindDominatedSCEVUnknown FSU(L, DT, LI); + SCEVTraversal ST(FSU); + ST.visitAll(S); + return !FSU.Found; +} + /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, @@ -2186,7 +2250,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); @@ -2212,8 +2276,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, if (Ops.size() == 1) return Ops[0]; } - // Limit recursion calls depth - if (Depth > MaxAddExprDepth) + // Limit recursion calls depth. + if (Depth > MaxArithDepth) return getOrCreateAddExpr(Ops, Flags); // Okay, check to see if the same value occurs in the operand list more than @@ -2229,7 +2293,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, ++Count; // Merge the values into a multiply. const SCEV *Scale = getConstant(Ty, Count); - const SCEV *Mul = getMulExpr(Scale, Ops[i]); + const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == Count) return Mul; Ops[i] = Mul; @@ -2279,7 +2343,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, } } if (Ok) - LargeOps.push_back(getMulExpr(LargeMulOps)); + LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1)); } else { Ok = false; break; @@ -2353,7 +2417,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, if (MulOp.first != 0) Ops.push_back(getMulExpr( getConstant(MulOp.first), - getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1))); + getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1)); if (Ops.empty()) return getZero(Ty); if (Ops.size() == 1) @@ -2381,11 +2446,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SmallVector MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul = getMulExpr(MulOps); + InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {getOne(Ty), InnerMul}; const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); - const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV, + SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); @@ -2414,19 +2480,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SmallVector MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul1 = getMulExpr(MulOps); + InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector MulOps(OtherMul->op_begin(), OtherMul->op_begin()+OMulOp); MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); - InnerMul2 = getMulExpr(MulOps); + InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {InnerMul1, InnerMul2}; const SCEV *InnerMulSum = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); - const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum, + SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); @@ -2451,7 +2518,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -2492,32 +2559,40 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // added together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) + ++OtherIdx) { + // We expect the AddRecExpr's to be sorted in reverse dominance order, + // so that the 1st found AddRecExpr is dominated by all others. + assert(DT.dominates( + cast(Ops[OtherIdx])->getLoop()->getHeader(), + AddRec->getLoop()->getHeader()) && + "AddRecExprs are not sorted in reverse dominance order?"); if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} SmallVector AddRecOps(AddRec->op_begin(), AddRec->op_end()); for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) - if (const auto *OtherAddRec = dyn_cast(Ops[OtherIdx])) - if (OtherAddRec->getLoop() == AddRecLoop) { - for (unsigned i = 0, e = OtherAddRec->getNumOperands(); - i != e; ++i) { - if (i >= AddRecOps.size()) { - AddRecOps.append(OtherAddRec->op_begin()+i, - OtherAddRec->op_end()); - break; - } - SmallVector TwoOps = { - AddRecOps[i], OtherAddRec->getOperand(i)}; - AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); + ++OtherIdx) { + const auto *OtherAddRec = cast(Ops[OtherIdx]); + if (OtherAddRec->getLoop() == AddRecLoop) { + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); + i != e; ++i) { + if (i >= AddRecOps.size()) { + AddRecOps.append(OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; } - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + SmallVector TwoOps = { + AddRecOps[i], OtherAddRec->getOperand(i)}; + AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); } + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + } // Step size has changed, so we cannot guarantee no self-wraparound. Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } + } // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2549,6 +2624,27 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl &Ops, return S; } +const SCEV * +ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl &Ops, + SCEV::NoWrapFlags Flags) { + FoldingSetNodeID ID; + ID.AddInteger(scMulExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + SCEVMulExpr *S = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { uint64_t k = i*j; if (j > 1 && k / j != i) Overflow = true; @@ -2601,7 +2697,8 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) { /// Get a canonical multiply expression, or something simpler if possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); @@ -2614,10 +2711,14 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); + // Limit recursion calls depth. + if (Depth > MaxArithDepth) + return getOrCreateMulExpr(Ops, Flags); + // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { @@ -2629,8 +2730,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // apply this transformation as well. if (Add->getNumOperands() == 2) if (containsConstantSomewhere(Add)) - return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), - getMulExpr(LHSC, Add->getOperand(1))); + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), + SCEV::FlagAnyWrap, Depth + 1), + getMulExpr(LHSC, Add->getOperand(1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); ++Idx; while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { @@ -2644,7 +2748,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, } // If we are left with a constant one being multiplied, strip it off. - if (cast(Ops[0])->getValue()->equalsInt(1)) { + if (cast(Ops[0])->getValue()->isOne()) { Ops.erase(Ops.begin()); --Idx; } else if (cast(Ops[0])->getValue()->isZero()) { @@ -2658,17 +2762,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, SmallVector NewOps; bool AnyFolded = false; for (const SCEV *AddOp : Add->operands()) { - const SCEV *Mul = getMulExpr(Ops[0], AddOp); + const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap, + Depth + 1); if (!isa(Mul)) AnyFolded = true; NewOps.push_back(Mul); } if (AnyFolded) - return getAddExpr(NewOps); + return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1); } else if (const auto *AddRec = dyn_cast(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector Operands; for (const SCEV *AddRecOp : AddRec->operands()) - Operands.push_back(getMulExpr(Ops[0], AddRecOp)); + Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap, + Depth + 1)); return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); @@ -2690,18 +2796,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, while (const SCEVMulExpr *Mul = dyn_cast(Ops[Idx])) { if (Ops.size() > MulOpsInlineThreshold) break; - // If we have an mul, expand the mul operands onto the end of the operands - // list. + // If we have an mul, expand the mul operands onto the end of the + // operands list. Ops.erase(Ops.begin()+Idx); Ops.append(Mul->op_begin(), Mul->op_end()); DeletedMul = true; } - // If we deleted at least one mul, we added operands to the end of the list, - // and they are not necessarily sorted. Recurse to resort and resimplify - // any operands we just acquired. + // If we deleted at least one mul, we added operands to the end of the + // list, and they are not necessarily sorted. Recurse to resort and + // resimplify any operands we just acquired. if (DeletedMul) - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // If there are any add recurrences in the operands list, see if any other @@ -2712,13 +2818,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // Scan over all recurrences, trying to fold loop invariants into them. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { - // Scan all of the other operands to this mul and add them to the vector if - // they are loop invariant w.r.t. the recurrence. + // Scan all of the other operands to this mul and add them to the vector + // if they are loop invariant w.r.t. the recurrence. SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -2729,9 +2835,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} SmallVector NewOps; NewOps.reserve(AddRec->getNumOperands()); - const SCEV *Scale = getMulExpr(LIOps); + const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1); for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) - NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i), + SCEV::FlagAnyWrap, Depth + 1)); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer mul and the inner addrec are guaranteed to have no overflow. @@ -2750,12 +2857,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, Ops[i] = NewRec; break; } - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } - // Okay, if there weren't any loop invariants to be folded, check to see if - // there are multiple AddRec's with the same loop induction variable being - // multiplied together. If so, we can fold them. + // Okay, if there weren't any loop invariants to be folded, check to see + // if there are multiple AddRec's with the same loop induction variable + // being multiplied together. If so, we can fold them. // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ @@ -2776,6 +2883,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) continue; + // Limit max number of arguments to avoid creation of unreasonably big + // SCEVAddRecs with very complex operands. + if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > + MaxAddRecSize) + continue; + bool Overflow = false; Type *Ty = AddRec->getType(); bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; @@ -2797,7 +2910,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, const SCEV *CoeffTerm = getConstant(Ty, Coeff); const SCEV *Term1 = AddRec->getOperand(y-z); const SCEV *Term2 = OtherAddRec->getOperand(z); - Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2, + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); } } AddRecOps.push_back(Term); @@ -2815,7 +2930,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, } } if (OpsModified) - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2823,22 +2938,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // Okay, it looks like we really DO need an mul expr. Check to see if we // already have one, otherwise create a new one. - FoldingSetNodeID ID; - ID.AddInteger(scMulExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); - void *IP = nullptr; - SCEVMulExpr *S = - static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); - if (!S) { - const SCEV **O = SCEVAllocator.Allocate(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); - S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); - UniqueSCEVs.InsertNode(S, IP); - } - S->setNoWrapFlags(Flags); - return S; + return getOrCreateMulExpr(Ops, Flags); } /// Get a canonical unsigned division expression, or something simpler if @@ -2850,7 +2950,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, "SCEVUDivExpr operand types don't match!"); if (const SCEVConstant *RHSC = dyn_cast(RHS)) { - if (RHSC->getValue()->equalsInt(1)) + if (RHSC->getValue()->isOne()) return LHS; // X udiv 1 --> x // If the denominator is zero, the result of the udiv is undefined. Don't // try to analyze it, because the resolution chosen here may differ from @@ -3211,7 +3311,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3312,7 +3412,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3641,7 +3741,8 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { } const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { // Fast path: X - X --> 0. if (LHS == RHS) return getZero(LHS->getType()); @@ -3650,7 +3751,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, // makes it so that we cannot make much use of NUW. auto AddFlags = SCEV::FlagAnyWrap; const bool RHSIsNotMinSigned = - !getSignedRange(RHS).getSignedMin().isMinSignedValue(); + !getSignedRangeMin(RHS).isMinSignedValue(); if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { // Let M be the minimum representable signed value. Then (-1)*RHS // signed-wraps if and only if RHS is M. That can happen even for @@ -3675,7 +3776,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, // larger scope than intended. auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags); + return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); } const SCEV * @@ -3869,7 +3970,7 @@ class SCEVInitRewriter : public SCEVRewriteVisitor { : SCEVRewriteVisitor(SE), L(L), Valid(true) {} const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + if (!SE.isLoopInvariant(Expr, L)) Valid = false; return Expr; } @@ -3903,7 +4004,7 @@ class SCEVShiftRewriter : public SCEVRewriteVisitor { const SCEV *visitUnknown(const SCEVUnknown *Expr) { // Only allow AddRecExprs for this loop. - if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + if (!SE.isLoopInvariant(Expr, L)) Valid = false; return Expr; } @@ -4083,6 +4184,319 @@ static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { return None; } +/// Helper function to createAddRecFromPHIWithCasts. We have a phi +/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via +/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the +/// way. This function checks if \p Op, an operand of this SCEVAddExpr, +/// follows one of the following patterns: +/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// If the SCEV expression of \p Op conforms with one of the expected patterns +/// we return the type of the truncation operation, and indicate whether the +/// truncated type should be treated as signed/unsigned by setting +/// \p Signed to true/false, respectively. +static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, + bool &Signed, ScalarEvolution &SE) { + + // The case where Op == SymbolicPHI (that is, with no type conversions on + // the way) is handled by the regular add recurrence creating logic and + // would have already been triggered in createAddRecForPHI. Reaching it here + // means that createAddRecFromPHI had failed for this PHI before (e.g., + // because one of the other operands of the SCEVAddExpr updating this PHI is + // not invariant). + // + // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in + // this case predicates that allow us to prove that Op == SymbolicPHI will + // be added. + if (Op == SymbolicPHI) + return nullptr; + + unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType()); + unsigned NewBits = SE.getTypeSizeInBits(Op->getType()); + if (SourceBits != NewBits) + return nullptr; + + const SCEVSignExtendExpr *SExt = dyn_cast(Op); + const SCEVZeroExtendExpr *ZExt = dyn_cast(Op); + if (!SExt && !ZExt) + return nullptr; + const SCEVTruncateExpr *Trunc = + SExt ? dyn_cast(SExt->getOperand()) + : dyn_cast(ZExt->getOperand()); + if (!Trunc) + return nullptr; + const SCEV *X = Trunc->getOperand(); + if (X != SymbolicPHI) + return nullptr; + Signed = SExt ? true : false; + return Trunc->getType(); +} + +static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { + if (!PN->getType()->isIntegerTy()) + return nullptr; + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent()) + return nullptr; + return L; +} + +// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the +// computation that updates the phi follows the following pattern: +// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +// which correspond to a phi->trunc->sext/zext->add->phi update chain. +// If so, try to see if it can be rewritten as an AddRecExpr under some +// Predicates. If successful, return them as a pair. Also cache the results +// of the analysis. +// +// Example usage scenario: +// Say the Rewriter is called for the following SCEV: +// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) +// where: +// %X = phi i64 (%Start, %BEValue) +// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), +// and call this function with %SymbolicPHI = %X. +// +// The analysis will find that the value coming around the backedge has +// the following SCEV: +// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) +// Upon concluding that this matches the desired pattern, the function +// will return the pair {NewAddRec, SmallPredsVec} where: +// NewAddRec = {%Start,+,%Step} +// SmallPredsVec = {P1, P2, P3} as follows: +// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)} Flags: +// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) +// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) +// The returned pair means that SymbolicPHI can be rewritten into NewAddRec +// under the predicates {P1,P2,P3}. +// This predicated rewrite will be cached in PredicatedSCEVRewrites: +// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} +// +// TODO's: +// +// 1) Extend the Induction descriptor to also support inductions that involve +// casts: When needed (namely, when we are called in the context of the +// vectorizer induction analysis), a Set of cast instructions will be +// populated by this method, and provided back to isInductionPHI. This is +// needed to allow the vectorizer to properly record them to be ignored by +// the cost model and to avoid vectorizing them (otherwise these casts, +// which are redundant under the runtime overflow checks, will be +// vectorized, which can be costly). +// +// 2) Support additional induction/PHISCEV patterns: We also want to support +// inductions where the sext-trunc / zext-trunc operations (partly) occur +// after the induction update operation (the induction increment): +// +// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) +// which correspond to a phi->add->trunc->sext/zext->phi update chain. +// +// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) +// which correspond to a phi->trunc->add->sext/zext->phi update chain. +// +// 3) Outline common code with createAddRecFromPHI to avoid duplication. +// +Optional>> +ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) { + SmallVector Predicates; + + // *** Part1: Analyze if we have a phi-with-cast pattern for which we can + // return an AddRec expression under some predicate. + + auto *PN = cast(SymbolicPHI->getValue()); + const Loop *L = isIntegerLoopHeaderPHI(PN, LI); + assert (L && "Expecting an integer loop header phi"); + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = nullptr, *StartValueV = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = nullptr; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = nullptr; + break; + } + } + if (!BEValueV || !StartValueV) + return None; + + const SCEV *BEValue = getSCEV(BEValueV); + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, possibly with casts that we can ignore under + // an appropriate runtime guard, then we found a simple induction variable! + const auto *Add = dyn_cast(BEValue); + if (!Add) + return None; + + // If there is a single occurrence of the symbolic value, possibly + // casted, replace it with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + Type *TruncTy = nullptr; + bool Signed; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if ((TruncTy = + isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex == Add->getNumOperands()) + return None; + + // Create an add with everything but the specified operand. + SmallVector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // The runtime checks will not be valid if the step amount is + // varying inside the loop. + if (!isLoopInvariant(Accum, L)) + return None; + + + // *** Part2: Create the predicates + + // Analysis was successful: we have a phi-with-cast pattern for which we + // can return an AddRec expression under the following predicates: + // + // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) + // fits within the truncated type (does not overflow) for i = 0 to n-1. + // P2: An Equal predicate that guarantees that + // Start = (Ext ix (Trunc iy (Start) to ix) to iy) + // P3: An Equal predicate that guarantees that + // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) + // + // As we next prove, the above predicates guarantee that: + // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) + // + // + // More formally, we want to prove that: + // Expr(i+1) = Start + (i+1) * Accum + // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum + // + // Given that: + // 1) Expr(0) = Start + // 2) Expr(1) = Start + Accum + // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 + // 3) Induction hypothesis (step i): + // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + // + // Proof: + // Expr(i+1) = + // = Start + (i+1)*Accum + // = (Start + i*Accum) + Accum + // = Expr(i) + Accum + // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum + // :: from step i + // + // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum + // + // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + // + (Ext ix (Trunc iy (Accum) to ix) to iy) + // + Accum :: from P3 + // + // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) + // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) + // + // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum + // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum + // + // By induction, the same applies to all iterations 1<=i(PHISCEV); + + SCEVWrapPredicate::IncrementWrapFlags AddedFlags = + Signed ? SCEVWrapPredicate::IncrementNSSW + : SCEVWrapPredicate::IncrementNUSW; + const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); + Predicates.push_back(AddRecPred); + + // Create the Equal Predicates P2,P3: + auto AppendPredicate = [&](const SCEV *Expr) -> void { + assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); + const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); + const SCEV *ExtendedExpr = + Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType()) + : getZeroExtendExpr(TruncatedExpr, Expr->getType()); + if (Expr != ExtendedExpr && + !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { + const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); + DEBUG (dbgs() << "Added Predicate: " << *Pred); + Predicates.push_back(Pred); + } + }; + + AppendPredicate(StartVal); + AppendPredicate(Accum); + + // *** Part3: Predicates are ready. Now go ahead and create the new addrec in + // which the casts had been folded away. The caller can rewrite SymbolicPHI + // into NewAR if it will also add the runtime overflow checks specified in + // Predicates. + auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); + + std::pair> PredRewrite = + std::make_pair(NewAR, Predicates); + // Remember the result of the analysis for this SCEV at this locayyytion. + PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite; + return PredRewrite; +} + +Optional>> +ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { + + auto *PN = cast(SymbolicPHI->getValue()); + const Loop *L = isIntegerLoopHeaderPHI(PN, LI); + if (!L) + return None; + + // Check to see if we already analyzed this PHI. + auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); + if (I != PredicatedSCEVRewrites.end()) { + std::pair> Rewrite = + I->second; + // Analysis was done before and failed to create an AddRec: + if (Rewrite.first == SymbolicPHI) + return None; + // Analysis was done before and succeeded to create an AddRec under + // a predicate: + assert(isa(Rewrite.first) && "Expected an AddRec"); + assert(!(Rewrite.second).empty() && "Expected to find Predicates"); + return Rewrite; + } + + Optional>> + Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI); + + // Record in the cache that the analysis failed + if (!Rewrite) { + SmallVector Predicates; + PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; + return None; + } + + return Rewrite; +} + /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most @@ -4632,10 +5046,7 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - unsigned BitWidth = getTypeSizeInBits(U->getType()); - KnownBits Known(BitWidth); - computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC, - nullptr, &DT); + KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT); return Known.countMinTrailingZeros(); } @@ -4666,9 +5077,9 @@ static Optional GetRangeFromMetadata(Value *V) { /// Determine the range for a particular SCEV. If SignHint is /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges /// with a "cleaner" unsigned (resp. signed) representation. -ConstantRange -ScalarEvolution::getRange(const SCEV *S, - ScalarEvolution::RangeSignHint SignHint) { +const ConstantRange & +ScalarEvolution::getRangeRef(const SCEV *S, + ScalarEvolution::RangeSignHint SignHint) { DenseMap &Cache = SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; @@ -4699,54 +5110,54 @@ ScalarEvolution::getRange(const SCEV *S, } if (const SCEVAddExpr *Add = dyn_cast(S)) { - ConstantRange X = getRange(Add->getOperand(0), SignHint); + ConstantRange X = getRangeRef(Add->getOperand(0), SignHint); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) - X = X.add(getRange(Add->getOperand(i), SignHint)); + X = X.add(getRangeRef(Add->getOperand(i), SignHint)); return setRange(Add, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVMulExpr *Mul = dyn_cast(S)) { - ConstantRange X = getRange(Mul->getOperand(0), SignHint); + ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) - X = X.multiply(getRange(Mul->getOperand(i), SignHint)); + X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint)); return setRange(Mul, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVSMaxExpr *SMax = dyn_cast(S)) { - ConstantRange X = getRange(SMax->getOperand(0), SignHint); + ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) - X = X.smax(getRange(SMax->getOperand(i), SignHint)); + X = X.smax(getRangeRef(SMax->getOperand(i), SignHint)); return setRange(SMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUMaxExpr *UMax = dyn_cast(S)) { - ConstantRange X = getRange(UMax->getOperand(0), SignHint); + ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) - X = X.umax(getRange(UMax->getOperand(i), SignHint)); + X = X.umax(getRangeRef(UMax->getOperand(i), SignHint)); return setRange(UMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { - ConstantRange X = getRange(UDiv->getLHS(), SignHint); - ConstantRange Y = getRange(UDiv->getRHS(), SignHint); + ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); + ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); return setRange(UDiv, SignHint, ConservativeResult.intersectWith(X.udiv(Y))); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast(S)) { - ConstantRange X = getRange(ZExt->getOperand(), SignHint); + ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint); return setRange(ZExt, SignHint, ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); } if (const SCEVSignExtendExpr *SExt = dyn_cast(S)) { - ConstantRange X = getRange(SExt->getOperand(), SignHint); + ConstantRange X = getRangeRef(SExt->getOperand(), SignHint); return setRange(SExt, SignHint, ConservativeResult.intersectWith(X.signExtend(BitWidth))); } if (const SCEVTruncateExpr *Trunc = dyn_cast(S)) { - ConstantRange X = getRange(Trunc->getOperand(), SignHint); + ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint); return setRange(Trunc, SignHint, ConservativeResult.intersectWith(X.truncate(BitWidth))); } @@ -4815,8 +5226,7 @@ ScalarEvolution::getRange(const SCEV *S, const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. - KnownBits Known(BitWidth); - computeKnownBits(U->getValue(), Known, DL, 0, &AC, nullptr, &DT); + KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (Known.One != ~Known.Zero + 1) ConservativeResult = ConservativeResult.intersectWith(ConstantRange(Known.One, @@ -4914,8 +5324,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, "Precondition!"); MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); - ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - APInt MaxBECountValue = MaxBECountRange.getUnsignedMax(); + APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount); // First, consider step signed. ConstantRange StartSRange = getSignedRange(Start); @@ -4932,7 +5341,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, // Next, consider step unsigned. ConstantRange UR = getRangeForAffineARHelper( - getUnsignedRange(Step).getUnsignedMax(), getUnsignedRange(Start), + getUnsignedRangeMax(Step), getUnsignedRange(Start), MaxBECountValue, BitWidth, /* Signed = */ false); // Finally, intersect signed and unsigned ranges. @@ -5336,9 +5745,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // For an expression like x&255 that merely masks off the high bits, // use zext(trunc(x)) as the SCEV expression. if (ConstantInt *CI = dyn_cast(BO->RHS)) { - if (CI->isNullValue()) + if (CI->isZero()) return getSCEV(BO->RHS); - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return getSCEV(BO->LHS); const APInt &A = CI->getValue(); @@ -5413,7 +5822,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Xor: if (ConstantInt *CI = dyn_cast(BO->RHS)) { // If the RHS of xor is -1, then this is a not operation. - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return getNotSCEV(getSCEV(BO->LHS)); // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. @@ -5492,7 +5901,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { if (CI->getValue().uge(BitWidth)) break; - if (CI->isNullValue()) + if (CI->isZero()) return getSCEV(BO->LHS); // shift by zero --> noop uint64_t AShrAmt = CI->getZExtValue(); @@ -5819,6 +6228,16 @@ void ScalarEvolution::forgetLoop(const Loop *L) { RemoveLoopFromBackedgeMap(BackedgeTakenCounts); RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); + // Drop information about predicated SCEV rewrites for this loop. + for (auto I = PredicatedSCEVRewrites.begin(); + I != PredicatedSCEVRewrites.end();) { + std::pair Entry = I->first; + if (Entry.second == L) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; PushLoopPHIs(L, Worklist); @@ -5931,6 +6350,8 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax()) return SE->getCouldNotCompute(); + assert((isa(getMax()) || isa(getMax())) && + "No point in having a non-constant max backedge taken count!"); return getMax(); } @@ -5955,6 +6376,45 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, return false; } +ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) + : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) { + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); +} + +ScalarEvolution::ExitLimit::ExitLimit( + const SCEV *E, const SCEV *M, bool MaxOrZero, + ArrayRef *> PredSetList) + : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { + assert((isa(ExactNotTaken) || + !isa(MaxNotTaken)) && + "Exact is not allowed to be less precise than Max"); + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); + for (auto *PredSet : PredSetList) + for (auto *P : *PredSet) + addPredicate(P); +} + +ScalarEvolution::ExitLimit::ExitLimit( + const SCEV *E, const SCEV *M, bool MaxOrZero, + const SmallPtrSetImpl &PredSet) + : ExitLimit(E, M, MaxOrZero, {&PredSet}) { + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); +} + +ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, + bool MaxOrZero) + : ExitLimit(E, M, MaxOrZero, None) { + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); +} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -5978,6 +6438,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate)); }); + assert((isa(MaxCount) || isa(MaxCount)) && + "No point in having a non-constant max backedge taken count!"); } /// Invalidate this result and free the ExitNotTakenInfo array. @@ -6239,7 +6701,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( // to not. if (isa(MaxBECount) && !isa(BECount)) - MaxBECount = BECount; + MaxBECount = getConstant(getUnsignedRangeMax(BECount)); return ExitLimit(BECount, MaxBECount, false, {&EL0.Predicates, &EL1.Predicates}); @@ -6637,13 +7099,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( // {K,ashr,} stabilizes to signum(K) in at most // bitwidth(K) iterations. Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); - bool KnownZero, KnownOne; - ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, - Predecessor->getTerminator(), &DT); + KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr, + Predecessor->getTerminator(), &DT); auto *Ty = cast(RHS->getType()); - if (KnownZero) + if (Known.isNonNegative()) StableValue = ConstantInt::get(Ty, 0); - else if (KnownOne) + else if (Known.isNegative()) StableValue = ConstantInt::get(Ty, -1, true); else return getCouldNotCompute(); @@ -6683,7 +7144,7 @@ static bool CanConstantFold(const Instruction *I) { if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) - return canConstantFoldCallTo(F); + return canConstantFoldCallTo(CI, F); return false; } @@ -7132,6 +7593,25 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); if (const SCEVConstant *BTCC = dyn_cast(BackedgeTakenCount)) { + + // This trivial case can show up in some degenerate cases where + // the incoming IR has not yet been fully simplified. + if (BTCC->getValue()->isZero()) { + Value *InitValue = nullptr; + bool MultipleInitValues = false; + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + if (!LI->contains(PN->getIncomingBlock(i))) { + if (!InitValue) + InitValue = PN->getIncomingValue(i); + else if (InitValue != PN->getIncomingValue(i)) { + MultipleInitValues = true; + break; + } + } + if (!MultipleInitValues && InitValue) + return getSCEV(InitValue); + } + } // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. @@ -7380,17 +7860,17 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C // The A coefficient is N/2 - APInt A(N.sdiv(Two)); + APInt A = N.sdiv(Two); // The B coefficient is M-N/2 - APInt B(M); + APInt B = M; B -= A; // A is the same as N/2. // The C coefficient is L. const APInt& C = L; // Compute the B^2-4ac term. - APInt SqrtTerm(B); + APInt SqrtTerm = B; SqrtTerm *= B; SqrtTerm -= 4 * (A * C); @@ -7401,12 +7881,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest // integer value or else APInt::sqrt() will assert. - APInt SqrtVal(SqrtTerm.sqrt()); + APInt SqrtVal = SqrtTerm.sqrt(); // Compute the two solutions for the quadratic formula. // The divisions must be performed as signed divisions. - APInt NegB(-std::move(B)); - APInt TwoA(std::move(A)); + APInt NegB = -std::move(B); + APInt TwoA = std::move(A); TwoA <<= 1; if (TwoA.isNullValue()) return None; @@ -7499,7 +7979,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); - if (!StepC || StepC->getValue()->equalsInt(0)) + if (!StepC || StepC->getValue()->isZero()) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -7513,8 +7993,8 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) - if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { - APInt MaxBECount = getUnsignedRange(Distance).getUnsignedMax(); + if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { + APInt MaxBECount = getUnsignedRangeMax(Distance); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this @@ -7544,13 +8024,20 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, loopHasNoAbnormalExits(AddRec->getLoop())) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact, false, Predicates); + const SCEV *Max = + Exact == getCouldNotCompute() + ? Exact + : getConstant(getUnsignedRangeMax(Exact)); + return ExitLimit(Exact, Max, false, Predicates); } // Solve the general equation. - const SCEV *E = SolveLinEquationWithOverflow( - StepC->getAPInt(), getNegativeSCEV(Start), *this); - return ExitLimit(E, E, false, Predicates); + const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), + getNegativeSCEV(Start), *this); + const SCEV *M = E == getCouldNotCompute() + ? E + : getConstant(getUnsignedRangeMax(E)); + return ExitLimit(E, M, false, Predicates); } ScalarEvolution::ExitLimit @@ -7562,7 +8049,7 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { // If the value is a constant, check to see if it is known to be non-zero // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast(V)) { - if (!C->getValue()->isNullValue()) + if (!C->getValue()->isZero()) return getZero(C->getType()); return getCouldNotCompute(); // Otherwise it will loop infinitely. } @@ -7746,12 +8233,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // adding or subtracting 1 from one of the operands. switch (Pred) { case ICmpInst::ICMP_SLE: - if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { + if (!getSignedRangeMax(RHS).isMaxSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; - } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { + } else if (!getSignedRangeMin(LHS).isMinSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; @@ -7759,12 +8246,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } break; case ICmpInst::ICMP_SGE: - if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { + if (!getSignedRangeMin(RHS).isMinSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; - } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { + } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; @@ -7772,23 +8259,23 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } break; case ICmpInst::ICMP_ULE: - if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { + if (!getUnsignedRangeMax(RHS).isMaxValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; - } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { + } else if (!getUnsignedRangeMin(LHS).isMinValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); Pred = ICmpInst::ICMP_ULT; Changed = true; } break; case ICmpInst::ICMP_UGE: - if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { + if (!getUnsignedRangeMin(RHS).isMinValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); Pred = ICmpInst::ICMP_UGT; Changed = true; - } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { + } else if (!getUnsignedRangeMax(LHS).isMaxValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; @@ -7822,19 +8309,19 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } bool ScalarEvolution::isKnownNegative(const SCEV *S) { - return getSignedRange(S).getSignedMax().isNegative(); + return getSignedRangeMax(S).isNegative(); } bool ScalarEvolution::isKnownPositive(const SCEV *S) { - return getSignedRange(S).getSignedMin().isStrictlyPositive(); + return getSignedRangeMin(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { - return !getSignedRange(S).getSignedMin().isNegative(); + return !getSignedRangeMin(S).isNegative(); } bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { - return !getSignedRange(S).getSignedMax().isStrictlyPositive(); + return !getSignedRangeMax(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonZero(const SCEV *S) { @@ -8065,6 +8552,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLE: // X s<= (X + C) if C >= 0 if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) @@ -8078,6 +8566,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, case ICmpInst::ICMP_SGT: std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: // X s< (X + C) if C > 0 if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && @@ -8418,7 +8907,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // predicate we're interested in folding. APInt Min = ICmpInst::isSigned(Pred) ? - getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin(); + getSignedRangeMin(V) : getUnsignedRangeMin(V); if (Min == C->getAPInt()) { // Given (V >= Min && V != Min) we conclude V >= (Min + 1). @@ -8435,6 +8924,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin))) return true; + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: @@ -8449,6 +8939,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) return true; + LLVM_FALLTHROUGH; default: // No change @@ -8971,19 +9462,17 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, const SCEV *One = getOne(Stride->getType()); if (IsSigned) { - APInt MaxRHS = getSignedRange(RHS).getSignedMax(); + APInt MaxRHS = getSignedRangeMax(RHS); APInt MaxValue = APInt::getSignedMaxValue(BitWidth); - APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) - .getSignedMax(); + APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); } - APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); + APInt MaxRHS = getUnsignedRangeMax(RHS); APInt MaxValue = APInt::getMaxValue(BitWidth); - APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) - .getUnsignedMax(); + APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); @@ -8997,19 +9486,17 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, const SCEV *One = getOne(Stride->getType()); if (IsSigned) { - APInt MinRHS = getSignedRange(RHS).getSignedMin(); + APInt MinRHS = getSignedRangeMin(RHS); APInt MinValue = APInt::getSignedMinValue(BitWidth); - APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) - .getSignedMax(); + APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); } - APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); + APInt MinRHS = getUnsignedRangeMin(RHS); APInt MinValue = APInt::getMinValue(BitWidth); - APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) - .getUnsignedMax(); + APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); @@ -9148,8 +9635,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, } else { // Calculate the maximum backedge count based on the range of values // permitted by Start, End, and Stride. - APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() - : getUnsignedRange(Start).getUnsignedMin(); + APInt MinStart = IsSigned ? getSignedRangeMin(Start) + : getUnsignedRangeMin(Start); unsigned BitWidth = getTypeSizeInBits(LHS->getType()); @@ -9157,8 +9644,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, if (PositiveStride) StrideForMaxBECount = - IsSigned ? getSignedRange(Stride).getSignedMin() - : getUnsignedRange(Stride).getUnsignedMin(); + IsSigned ? getSignedRangeMin(Stride) + : getUnsignedRangeMin(Stride); else // Using a stride of 1 is safe when computing max backedge taken count for // a loop with unknown stride. @@ -9172,15 +9659,16 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, // the case End = RHS. This is safe because in the other case (End - Start) // is zero, leading to a zero maximum backedge taken count. APInt MaxEnd = - IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) - : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); + IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit) + : APIntOps::umin(getUnsignedRangeMax(RHS), Limit); MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), getConstant(StrideForMaxBECount), false); } - if (isa(MaxBECount)) - MaxBECount = BECount; + if (isa(MaxBECount) && + !isa(BECount)) + MaxBECount = getConstant(getUnsignedRangeMax(BECount)); return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); } @@ -9231,11 +9719,11 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); - APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax() - : getUnsignedRange(Start).getUnsignedMax(); + APInt MaxStart = IsSigned ? getSignedRangeMax(Start) + : getUnsignedRangeMax(Start); - APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() - : getUnsignedRange(Stride).getUnsignedMin(); + APInt MinStride = IsSigned ? getSignedRangeMin(Stride) + : getUnsignedRangeMin(Stride); unsigned BitWidth = getTypeSizeInBits(LHS->getType()); APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) @@ -9245,8 +9733,8 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, // the case End = RHS. This is safe because in the other case (Start - End) // is zero, leading to a zero maximum backedge taken count. APInt MinEnd = - IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit) - : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit); + IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit) + : APIntOps::umax(getUnsignedRangeMin(RHS), Limit); const SCEV *MaxBECount = getCouldNotCompute(); @@ -9471,8 +9959,11 @@ struct SCEVCollectAddRecMultiplies { bool HasAddRec = false; SmallVector Operands; for (auto Op : Mul->operands()) { - if (isa(Op)) { + const SCEVUnknown *Unknown = dyn_cast(Op); + if (Unknown && !isa(Unknown->getValue())) { Operands.push_back(Op); + } else if (Unknown) { + HasAddRec = true; } else { bool ContainsAddRec; SCEVHasAddRec ContiansAddRec(ContainsAddRec); @@ -9924,6 +10415,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), + PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; } @@ -10324,6 +10816,15 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { HasRecMap.erase(S); MinTrailingZerosCache.erase(S); + for (auto I = PredicatedSCEVRewrites.begin(); + I != PredicatedSCEVRewrites.end();) { + std::pair Entry = I->first; + if (Entry.first == S) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + auto RemoveSCEVFromBackedgeMap = [S, this](DenseMap &Map) { for (auto I = Map.begin(), E = Map.end(); I != E;) { @@ -10483,10 +10984,11 @@ void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredTransitive(); } -const SCEVPredicate * -ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS, - const SCEVConstant *RHS) { +const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, + const SCEV *RHS) { FoldingSetNodeID ID; + assert(LHS->getType() == RHS->getType() && + "Type mismatch between LHS and RHS"); // Unique this node based on the arguments ID.AddInteger(SCEVPredicate::P_Equal); ID.AddPointer(LHS); @@ -10549,8 +11051,7 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor { if (IPred->getLHS() == Expr) return IPred->getRHS(); } - - return Expr; + return convertToAddRecWithPreds(Expr); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { @@ -10586,17 +11087,41 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor { } private: - bool addOverflowAssumption(const SCEVAddRecExpr *AR, - SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { - auto *A = SE.getWrapPredicate(AR, AddedFlags); + bool addOverflowAssumption(const SCEVPredicate *P) { if (!NewPreds) { // Check if we've already made this assumption. - return Pred && Pred->implies(A); + return Pred && Pred->implies(P); } - NewPreds->insert(A); + NewPreds->insert(P); return true; } + bool addOverflowAssumption(const SCEVAddRecExpr *AR, + SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { + auto *A = SE.getWrapPredicate(AR, AddedFlags); + return addOverflowAssumption(A); + } + + // If \p Expr represents a PHINode, we try to see if it can be represented + // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible + // to add this predicate as a runtime overflow check, we return the AddRec. + // If \p Expr does not meet these conditions (is not a PHI node, or we + // couldn't create an AddRec for it, or couldn't add the predicate), we just + // return \p Expr. + const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { + if (!isa(Expr->getValue())) + return Expr; + Optional>> + PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); + if (!PredicatedRewrite) + return Expr; + for (auto *P : PredicatedRewrite->second){ + if (!addOverflowAssumption(P)) + return Expr; + } + return PredicatedRewrite->first; + } + SmallPtrSetImpl *NewPreds; SCEVUnionPredicate *Pred; const Loop *L; @@ -10633,9 +11158,11 @@ SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, : FastID(ID), Kind(Kind) {} SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, - const SCEVUnknown *LHS, - const SCEVConstant *RHS) - : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {} + const SCEV *LHS, const SCEV *RHS) + : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) { + assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); + assert(LHS != RHS && "LHS and RHS are the same SCEV"); +} bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); diff --git a/interpreter/llvm/src/lib/Analysis/ScalarEvolutionExpander.cpp b/interpreter/llvm/src/lib/Analysis/ScalarEvolutionExpander.cpp index 86cbd79aa84e8..47bdac00ae1f3 100644 --- a/interpreter/llvm/src/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/interpreter/llvm/src/lib/Analysis/ScalarEvolutionExpander.cpp @@ -748,18 +748,56 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. Value *Prod = nullptr; - for (const auto &I : OpsAndLoops) { - const SCEV *Op = I.second; + auto I = OpsAndLoops.begin(); + + // Expand the calculation of X pow N in the following manner: + // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: + // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). + const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { + auto E = I; + // Calculate how many times the same operand from the same loop is included + // into this power. + uint64_t Exponent = 0; + const uint64_t MaxExponent = UINT64_MAX >> 1; + // No one sane will ever try to calculate such huge exponents, but if we + // need this, we stop on UINT64_MAX / 2 because we need to exit the loop + // below when the power of 2 exceeds our Exponent, and we want it to be + // 1u << 31 at most to not deal with unsigned overflow. + while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { + ++Exponent; + ++E; + } + assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); + + // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them + // that are needed into the result. + Value *P = expandCodeFor(I->second, Ty); + Value *Result = nullptr; + if (Exponent & 1) + Result = P; + for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { + P = InsertBinop(Instruction::Mul, P, P); + if (Exponent & BinExp) + Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P; + } + + I = E; + assert(Result && "Nothing was expanded?"); + return Result; + }; + + while (I != OpsAndLoops.end()) { if (!Prod) { // This is the first operand. Just expand it. - Prod = expand(Op); - } else if (Op->isAllOnesValue()) { + Prod = ExpandOpBinPowN(); + } else if (I->second->isAllOnesValue()) { // Instead of doing a multiply by negative one, just do a negate. Prod = InsertNoopCastOfTo(Prod, Ty); Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + ++I; } else { // A simple mul. - Value *W = expandCodeFor(Op, Ty); + Value *W = ExpandOpBinPowN(); Prod = InsertNoopCastOfTo(Prod, Ty); // Canonicalize a constant to the RHS. if (isa(Prod)) std::swap(Prod, W); @@ -1305,12 +1343,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. Type *ExpandTy = PostLoopScale ? IntTy : STy; + // We can't use a pointer type for the addrec if the pointer type is + // non-integral. + Type *AddRecPHIExpandTy = + DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; + // In some cases, we decide to reuse an existing phi node but need to truncate // it and/or invert the step. Type *TruncTy = nullptr; bool InvertStep = false; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, - TruncTy, InvertStep); + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, + IntTy, TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1383,8 +1426,15 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating offset. if (PostLoopOffset) { if (PointerType *PTy = dyn_cast(ExpandTy)) { - const SCEV *const OffsetArray[1] = { PostLoopOffset }; - Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); + if (Result->getType()->isIntegerTy()) { + Value *Base = expandCodeFor(PostLoopOffset, ExpandTy); + const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)}; + Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base); + } else { + const SCEV *const OffsetArray[1] = {PostLoopOffset}; + Result = + expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result); + } } else { Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateAdd(Result, diff --git a/interpreter/llvm/src/lib/Analysis/ScalarEvolutionNormalization.cpp b/interpreter/llvm/src/lib/Analysis/ScalarEvolutionNormalization.cpp index 54c44c8e542d0..3740039b8f867 100644 --- a/interpreter/llvm/src/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/interpreter/llvm/src/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionNormalization.h" using namespace llvm; /// TransformKind - Different types of transformations that diff --git a/interpreter/llvm/src/lib/Analysis/TargetLibraryInfo.cpp b/interpreter/llvm/src/lib/Analysis/TargetLibraryInfo.cpp index 5f05e80bd479e..2be5d5caf7c21 100644 --- a/interpreter/llvm/src/lib/Analysis/TargetLibraryInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/TargetLibraryInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -540,7 +541,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) { // Check for \01 prefix that is used to mangle __asm declarations and // strip it if present. - return GlobalValue::getRealLinkageName(funcName); + return GlobalValue::dropLLVMManglingEscape(funcName); } bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, @@ -1518,6 +1519,21 @@ TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) { return *Impl; } +unsigned TargetLibraryInfoImpl::getTargetWCharSize(const Triple &T) { + // See also clang/lib/Basic/Targets.cpp. + if (T.isPS4() || T.isOSWindows() || T.isArch16Bit()) + return 2; + if (T.getArch() == Triple::xcore) + return 1; + return 4; +} + +unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const { + if (auto *ShortWChar = cast_or_null( + M.getModuleFlag("wchar_size"))) + return cast(ShortWChar->getValue())->getZExtValue(); + return getTargetWCharSize(Triple(M.getTargetTriple())); +} TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass() : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) { diff --git a/interpreter/llvm/src/lib/Analysis/TargetTransformInfo.cpp b/interpreter/llvm/src/lib/Analysis/TargetTransformInfo.cpp index 805b645eacaf0..25813c65037f2 100644 --- a/interpreter/llvm/src/lib/Analysis/TargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Analysis/TargetTransformInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include @@ -23,6 +24,11 @@ using namespace llvm; #define DEBUG_TYPE "tti" +static cl::opt UseWideMemcpyLoopLowering( + "use-wide-memcpy-loop-lowering", cl::init(false), + cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), + cl::Hidden); + namespace { /// \brief No-op implementation of the TTI interface using the utility base /// classes. @@ -76,6 +82,11 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); } +int TargetTransformInfo::getExtCost(const Instruction *I, + const Value *Src) const { + return TTIImpl->getExtCost(I, Src); +} + int TargetTransformInfo::getIntrinsicCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const { int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); @@ -89,8 +100,9 @@ TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI, return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); } -int TargetTransformInfo::getUserCost(const User *U) const { - int Cost = TTIImpl->getUserCost(U); +int TargetTransformInfo::getUserCost(const User *U, + ArrayRef Operands) const { + int Cost = TTIImpl->getUserCost(U, Operands); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -103,6 +115,10 @@ bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } +bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { + return TTIImpl->isAlwaysUniform(V); +} + unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } @@ -112,8 +128,8 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const { } void TargetTransformInfo::getUnrollingPreferences( - Loop *L, UnrollingPreferences &UP) const { - return TTIImpl->getUnrollingPreferences(L, UP); + Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { + return TTIImpl->getUnrollingPreferences(L, SE, UP); } bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { @@ -133,6 +149,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale, AddrSpace); } +bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { + return TTIImpl->isLSRCostLess(C1, C2); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { return TTIImpl->isLegalMaskedStore(DataType); } @@ -149,6 +169,10 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { return TTIImpl->isLegalMaskedGather(DataType); } +bool TargetTransformInfo::prefersVectorizedAddressing() const { + return TTIImpl->prefersVectorizedAddressing(); +} + int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, @@ -211,6 +235,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } +bool TargetTransformInfo::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const { + return TTIImpl->expandMemCmp(I, MaxLoadSize); +} + bool TargetTransformInfo::enableInterleavedAccessVectorization() const { return TTIImpl->enableInterleavedAccessVectorization(); } @@ -279,6 +307,10 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } +unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { + return TTIImpl->getMinVectorRegisterBitWidth(); +} + bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( @@ -452,11 +484,34 @@ bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, return TTIImpl->getTgtMemIntrinsic(Inst, Info); } +unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { + return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); +} + Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( IntrinsicInst *Inst, Type *ExpectedType) const { return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } +Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context, + Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const { + return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign, + DestAlign); +} + +void TargetTransformInfo::getMemcpyLoopResidualLoweringType( + SmallVectorImpl &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const { + TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAlign, DestAlign); +} + +bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const { + return UseWideMemcpyLoopLowering; +} + bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); diff --git a/interpreter/llvm/src/lib/Analysis/TypeBasedAliasAnalysis.cpp b/interpreter/llvm/src/lib/Analysis/TypeBasedAliasAnalysis.cpp index e920c4c4e6b2b..86c528de267a2 100644 --- a/interpreter/llvm/src/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/interpreter/llvm/src/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -23,10 +23,10 @@ // // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to // three fields, e.g.: -// !0 = metadata !{ metadata !"an example type tree" } -// !1 = metadata !{ metadata !"int", metadata !0 } -// !2 = metadata !{ metadata !"float", metadata !0 } -// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } +// !0 = !{ !"an example type tree" } +// !1 = !{ !"int", !0 } +// !2 = !{ !"float", !0 } +// !3 = !{ !"const float", !2, i64 1 } // // The first field is an identity field. It can be any value, usually // an MDString, which uniquely identifies the type. The most important @@ -58,7 +58,7 @@ // // The struct type node has a name and a list of pairs, one pair for each member // of the struct. The first element of each pair is a type node (a struct type -// node or a sclar type node), specifying the type of the member, the second +// node or a scalar type node), specifying the type of the member, the second // element of each pair is the offset of the member. // // Given an example @@ -74,13 +74,13 @@ // instruction. The base type is !4 (struct B), the access type is !2 (scalar // type short) and the offset is 4. // -// !0 = metadata !{metadata !"Simple C/C++ TBAA"} -// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node -// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node -// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node -// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4} +// !0 = !{!"Simple C/C++ TBAA"} +// !1 = !{!"omnipotent char", !0} // Scalar type node +// !2 = !{!"short", !1} // Scalar type node +// !3 = !{!"A", !2, i64 0} // Struct type node +// !4 = !{!"B", !2, i64 0, !3, i64 4} // // Struct type node -// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node +// !5 = !{!4, !2, i64 4} // Path tag node // // The struct type nodes and the scalar type nodes form a type DAG. // Root (!0) diff --git a/interpreter/llvm/src/lib/Analysis/ValueTracking.cpp b/interpreter/llvm/src/lib/Analysis/ValueTracking.cpp index 38bcb0f6c71b1..cdfe74d158c95 100644 --- a/interpreter/llvm/src/lib/Analysis/ValueTracking.cpp +++ b/interpreter/llvm/src/lib/Analysis/ValueTracking.cpp @@ -17,15 +17,16 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" @@ -148,8 +149,10 @@ static KnownBits computeKnownBits(const Value *V, unsigned Depth, KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); + const DominatorTree *DT, + OptimizationRemarkEmitter *ORE) { + return ::computeKnownBits(V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, @@ -169,13 +172,16 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, } -void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); - KnownZero = Known.isNonNegative(); - KnownOne = Known.isNegative(); +bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) { + for (const User *U : CxtI->users()) { + if (const ICmpInst *IC = dyn_cast(U)) + if (IC->isEquality()) + if (Constant *C = dyn_cast(IC->getOperand(1))) + if (C->isNullValue()) + continue; + return false; + } + return true; } static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, @@ -680,8 +686,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, - m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, m_ConstantInt(C))), + m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { @@ -692,9 +697,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, Known.Zero |= RHSKnown.Zero << C->getZExtValue(); Known.One |= RHSKnown.One << C->getZExtValue(); // assume(~(v >> c) = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( - m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, m_ConstantInt(C)))), + } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { @@ -846,7 +849,8 @@ static void computeKnownBitsFromShiftOperator( Optional ShifterOperandIsNonZero; // Early exit if we can't constrain any well-defined shift amount. - if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { + if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && + !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) @@ -1496,12 +1500,10 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Known.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarType()->isPointerTy()) && + assert((V->getType()->isIntOrIntVectorTy(BitWidth) || + V->getType()->isPtrOrPtrVectorTy()) && "Not integer or pointer type!"); - assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarSizeInBits() == BitWidth) && + assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth && "V and Known should have same BitWidth"); (void)BitWidth; @@ -1948,7 +1950,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } // Check if all incoming values are non-zero constant. bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) { - return isa(V) && !cast(V)->isZeroValue(); + return isa(V) && !cast(V)->isZero(); }); if (AllNonZeroConstants) return true; @@ -1976,7 +1978,7 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) { /// Return true if it is known that V1 != V2. static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { - if (V1->getType()->isVectorTy() || V1 == V2) + if (V1 == V2) return false; if (V1->getType() != V2->getType()) // We can't look through casts yet. @@ -1984,18 +1986,14 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q)) return true; - if (IntegerType *Ty = dyn_cast(V1->getType())) { + if (V1->getType()->isIntOrIntVectorTy()) { // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. - auto BitWidth = Ty->getBitWidth(); - KnownBits Known1(BitWidth); - computeKnownBits(V1, Known1, 0, Q); - KnownBits Known2(BitWidth); - computeKnownBits(V2, Known2, 0, Q); - - APInt OppositeBits = (Known1.Zero & Known2.One) | - (Known2.Zero & Known1.One); - if (OppositeBits.getBoolValue()) + KnownBits Known1 = computeKnownBits(V1, 0, Q); + KnownBits Known2 = computeKnownBits(V2, 0, Q); + + if (Known1.Zero.intersects(Known2.One) || + Known2.Zero.intersects(Known1.One)) return true; } return false; @@ -2333,6 +2331,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, case Instruction::SExt: if (!LookThroughSExt) return false; // otherwise fall through to ZExt + LLVM_FALLTHROUGH; case Instruction::ZExt: return ComputeMultiple(I->getOperand(0), Base, Multiple, LookThroughSExt, Depth+1); @@ -2962,14 +2961,16 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, return Ptr; } -bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) { +bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, + unsigned CharSize) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; - // Make sure the index-ee is a pointer to array of i8. + // Make sure the index-ee is a pointer to array of \p CharSize integers. + // CharSize. ArrayType *AT = dyn_cast(GEP->getSourceElementType()); - if (!AT || !AT->getElementType()->isIntegerTy(8)) + if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -2981,11 +2982,9 @@ bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) { return true; } -/// This function computes the length of a null-terminated C string pointed to -/// by V. If successful, it returns true and returns the string in Str. -/// If unsuccessful, it returns false. -bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, - uint64_t Offset, bool TrimAtNul) { +bool llvm::getConstantDataArrayInfo(const Value *V, + ConstantDataArraySlice &Slice, + unsigned ElementSize, uint64_t Offset) { assert(V); // Look through bitcast instructions and geps. @@ -2996,7 +2995,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (const GEPOperator *GEP = dyn_cast(V)) { // The GEP operator should be based on a pointer to string constant, and is // indexing into the string constant. - if (!isGEPBasedOnPointerToString(GEP)) + if (!isGEPBasedOnPointerToString(GEP, ElementSize)) return false; // If the second index isn't a ConstantInt, then this is a variable index @@ -3007,8 +3006,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, StartIdx = CI->getZExtValue(); else return false; - return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset, - TrimAtNul); + return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize, + StartIdx + Offset); } // The GEP instruction, constant or instruction, must reference a global @@ -3018,30 +3017,72 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - // Handle the all-zeros case. + const ConstantDataArray *Array; + ArrayType *ArrayTy; if (GV->getInitializer()->isNullValue()) { - // This is a degenerate case. The initializer is constant zero so the - // length of the string must be zero. - Str = ""; - return true; + Type *GVTy = GV->getValueType(); + if ( (ArrayTy = dyn_cast(GVTy)) ) { + // A zeroinitializer for the array; there is no ConstantDataArray. + Array = nullptr; + } else { + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy); + uint64_t Length = SizeInBytes / (ElementSize / 8); + if (Length <= Offset) + return false; + + Slice.Array = nullptr; + Slice.Offset = 0; + Slice.Length = Length - Offset; + return true; + } + } else { + // This must be a ConstantDataArray. + Array = dyn_cast(GV->getInitializer()); + if (!Array) + return false; + ArrayTy = Array->getType(); } + if (!ArrayTy->getElementType()->isIntegerTy(ElementSize)) + return false; - // This must be a ConstantDataArray. - const auto *Array = dyn_cast(GV->getInitializer()); - if (!Array || !Array->isString()) + uint64_t NumElts = ArrayTy->getArrayNumElements(); + if (Offset > NumElts) return false; - // Get the number of elements in the array. - uint64_t NumElts = Array->getType()->getArrayNumElements(); + Slice.Array = Array; + Slice.Offset = Offset; + Slice.Length = NumElts - Offset; + return true; +} - // Start out with the entire array in the StringRef. - Str = Array->getAsString(); +/// This function computes the length of a null-terminated C string pointed to +/// by V. If successful, it returns true and returns the string in Str. +/// If unsuccessful, it returns false. +bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, + uint64_t Offset, bool TrimAtNul) { + ConstantDataArraySlice Slice; + if (!getConstantDataArrayInfo(V, Slice, 8, Offset)) + return false; - if (Offset > NumElts) + if (Slice.Array == nullptr) { + if (TrimAtNul) { + Str = StringRef(); + return true; + } + if (Slice.Length == 1) { + Str = StringRef("", 1); + return true; + } + // We cannot instantiate a StringRef as we do not have an appropriate string + // of 0s at hand. return false; + } + // Start out with the entire array in the StringRef. + Str = Slice.Array->getAsString(); // Skip over 'offset' bytes. - Str = Str.substr(Offset); + Str = Str.substr(Slice.Offset); if (TrimAtNul) { // Trim off the \0 and anything after it. If the array is not nul @@ -3059,7 +3100,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLengthH(const Value *V, - SmallPtrSetImpl &PHIs) { + SmallPtrSetImpl &PHIs, + unsigned CharSize) { // Look through noop bitcast instructions. V = V->stripPointerCasts(); @@ -3072,7 +3114,7 @@ static uint64_t GetStringLengthH(const Value *V, // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; for (Value *IncValue : PN->incoming_values()) { - uint64_t Len = GetStringLengthH(IncValue, PHIs); + uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); if (Len == 0) return 0; // Unknown length -> unknown. if (Len == ~0ULL) continue; @@ -3088,9 +3130,9 @@ static uint64_t GetStringLengthH(const Value *V, // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) if (const SelectInst *SI = dyn_cast(V)) { - uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); if (Len1 == 0) return 0; - uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); if (Len2 == 0) return 0; if (Len1 == ~0ULL) return Len2; if (Len2 == ~0ULL) return Len1; @@ -3099,20 +3141,30 @@ static uint64_t GetStringLengthH(const Value *V, } // Otherwise, see if we can read the string. - StringRef StrData; - if (!getConstantStringInfo(V, StrData)) + ConstantDataArraySlice Slice; + if (!getConstantDataArrayInfo(V, Slice, CharSize)) return 0; - return StrData.size()+1; + if (Slice.Array == nullptr) + return 1; + + // Search for nul characters + unsigned NullIndex = 0; + for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { + if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) + break; + } + + return NullIndex + 1; } /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. -uint64_t llvm::GetStringLength(const Value *V) { +uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { if (!V->getType()->isPointerTy()) return 0; SmallPtrSet PHIs; - uint64_t Len = GetStringLengthH(V, PHIs); + uint64_t Len = GetStringLengthH(V, PHIs, CharSize); // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return // an empty string as a length. return Len == ~0ULL ? 1 : Len; @@ -3225,6 +3277,69 @@ void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, } while (!Worklist.empty()); } +/// This is the function that does the work of looking through basic +/// ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObjectFromInt(const Value *V) { + do { + if (const Operator *U = dyn_cast(V)) { + // If we find a ptrtoint, we can transfer control back to the + // regular getUnderlyingObjectFromInt. + if (U->getOpcode() == Instruction::PtrToInt) + return U->getOperand(0); + // If we find an add of a constant, a multiplied value, or a phi, it's + // likely that the other operand will lead us to the base + // object. We don't have to worry about the case where the + // object address is somehow being computed by the multiply, + // because our callers only care when the result is an + // identifiable object. + if (U->getOpcode() != Instruction::Add || + (!isa(U->getOperand(1)) && + Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && + !isa(U->getOperand(1)))) + return V; + V = U->getOperand(0); + } else { + return V; + } + assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); + } while (true); +} + +/// This is a wrapper around GetUnderlyingObjects and adds support for basic +/// ptrtoint+arithmetic+inttoptr sequences. +void llvm::getUnderlyingObjectsForCodeGen(const Value *V, + SmallVectorImpl &Objects, + const DataLayout &DL) { + SmallPtrSet Visited; + SmallVector Working(1, V); + do { + V = Working.pop_back_val(); + + SmallVector Objs; + GetUnderlyingObjects(const_cast(V), Objs, DL); + + for (Value *V : Objs) { + if (!Visited.insert(V).second) + continue; + if (Operator::getOpcode(V) == Instruction::IntToPtr) { + const Value *O = + getUnderlyingObjectFromInt(cast(V)->getOperand(0)); + if (O->getType()->isPointerTy()) { + Working.push_back(O); + continue; + } + } + // If GetUnderlyingObjects fails to find an identifiable object, + // getUnderlyingObjectsForCodeGen also fails for safety. + if (!isIdentifiedObject(V)) { + Objects.clear(); + return; + } + Objects.push_back(const_cast(V)); + } + } while (!Working.empty()); +} + /// Return true if the only users of this pointer are lifetime markers. bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { for (const User *U : V->users()) { @@ -3495,6 +3610,51 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, return OverflowResult::MayOverflow; } +/// \brief Return true if we can prove that adding the two values of the +/// knownbits will not overflow. +/// Otherwise return false. +static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, + const KnownBits &RHSKnown) { + // Addition of two 2's complement numbers having opposite signs will never + // overflow. + if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNegative())) + return true; + + // If either of the values is known to be non-negative, adding them can only + // overflow if the second is also non-negative, so we can assume that. + // Two non-negative numbers will only overflow if there is a carry to the + // sign bit, so we can check if even when the values are as big as possible + // there is no overflow to the sign bit. + if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { + APInt MaxLHS = ~LHSKnown.Zero; + MaxLHS.clearSignBit(); + APInt MaxRHS = ~RHSKnown.Zero; + MaxRHS.clearSignBit(); + APInt Result = std::move(MaxLHS) + std::move(MaxRHS); + return Result.isSignBitClear(); + } + + // If either of the values is known to be negative, adding them can only + // overflow if the second is also negative, so we can assume that. + // Two negative number will only overflow if there is no carry to the sign + // bit, so we can check if even when the values are as small as possible + // there is overflow to the sign bit. + if (LHSKnown.isNegative() || RHSKnown.isNegative()) { + APInt MinLHS = LHSKnown.One; + MinLHS.clearSignBit(); + APInt MinRHS = RHSKnown.One; + MinRHS.clearSignBit(); + APInt Result = std::move(MinLHS) + std::move(MinRHS); + return Result.isSignBitSet(); + } + + // If we reached here it means that we know nothing about the sign bits. + // In this case we can't know if there will be an overflow, since by + // changing the sign bits any two values can be made to overflow. + return false; +} + static OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const AddOperator *Add, @@ -3506,14 +3666,29 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, return OverflowResult::NeverOverflows; } + // If LHS and RHS each have at least two sign bits, the addition will look + // like + // + // XX..... + + // YY..... + // + // If the carry into the most significant position is 0, X and Y can't both + // be 1 and therefore the carry out of the addition is also 0. + // + // If the carry into the most significant position is 1, X and Y can't both + // be 0 and therefore the carry out of the addition is also 1. + // + // Since the carry into the most significant position is always equal to + // the carry out of the addition, there is no signed overflow. + if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + return OverflowResult::NeverOverflows; + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); - if ((LHSKnown.isNonNegative() && RHSKnown.isNegative()) || - (LHSKnown.isNegative() && RHSKnown.isNonNegative())) { - // The sign bits are opposite: this CANNOT overflow. + if (checkRippleForSignedAdd(LHSKnown, RHSKnown)) return OverflowResult::NeverOverflows; - } // The remaining code needs Add to be available. Early returns if not so. if (!Add) @@ -3525,7 +3700,8 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, // operands. bool LHSOrRHSKnownNonNegative = (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()); - bool LHSOrRHSKnownNegative = (LHSKnown.isNegative() || RHSKnown.isNegative()); + bool LHSOrRHSKnownNegative = + (LHSKnown.isNegative() || RHSKnown.isNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || @@ -4278,35 +4454,64 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS, } Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, - const DataLayout &DL, bool InvertAPred, + const DataLayout &DL, bool LHSIsFalse, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + // Bail out when we hit the limit. + if (Depth == MaxDepth) + return None; + // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example. if (LHS->getType() != RHS->getType()) return None; Type *OpTy = LHS->getType(); - assert(OpTy->getScalarType()->isIntegerTy(1)); + assert(OpTy->isIntOrIntVectorTy(1)); // LHS ==> RHS by definition - if (!InvertAPred && LHS == RHS) - return true; + if (LHS == RHS) + return !LHSIsFalse; if (OpTy->isVectorTy()) // TODO: extending the code below to handle vectors return None; assert(OpTy->isIntegerTy(1) && "implied by above"); - ICmpInst::Predicate APred, BPred; - Value *ALHS, *ARHS; Value *BLHS, *BRHS; + ICmpInst::Predicate BPred; + // We expect the RHS to be an icmp. + if (!match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + return None; - if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || - !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + Value *ALHS, *ARHS; + ICmpInst::Predicate APred; + // The LHS can be an 'or', 'and', or 'icmp'. + if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS)))) { + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth == MaxDepth) + return None; + // If the result of an 'or' is false, then we know both legs of the 'or' are + // false. Similarly, if the result of an 'and' is true, then we know both + // legs of the 'and' are true. + if ((LHSIsFalse && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) || + (!LHSIsFalse && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) { + if (Optional Implication = isImpliedCondition( + ALHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) + return Implication; + if (Optional Implication = isImpliedCondition( + ARHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) + return Implication; + return None; + } return None; + } + // All of the below logic assumes both LHS and RHS are icmps. + assert(isa(LHS) && isa(RHS) && "Expected icmps."); - if (InvertAPred) + // The rest of the logic assumes the LHS condition is true. If that's not the + // case, invert the predicate to make it so. + if (LHSIsFalse) APred = CmpInst::getInversePredicate(APred); // Can we infer anything when the two compares have matching operands? diff --git a/interpreter/llvm/src/lib/Analysis/VectorUtils.cpp b/interpreter/llvm/src/lib/Analysis/VectorUtils.cpp index 2d2249da4e132..554d132c2ab77 100644 --- a/interpreter/llvm/src/lib/Analysis/VectorUtils.cpp +++ b/interpreter/llvm/src/lib/Analysis/VectorUtils.cpp @@ -11,19 +11,19 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/IRBuilder.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -301,7 +301,7 @@ const llvm::Value *llvm::getSplatValue(const Value *V) { auto *InsertEltInst = dyn_cast(ShuffleInst->getOperand(0)); if (!InsertEltInst || !isa(InsertEltInst->getOperand(2)) || - !cast(InsertEltInst->getOperand(2))->isNullValue()) + !cast(InsertEltInst->getOperand(2))->isZero()) return nullptr; return InsertEltInst->getOperand(1); diff --git a/interpreter/llvm/src/lib/AsmParser/LLLexer.cpp b/interpreter/llvm/src/lib/AsmParser/LLLexer.cpp index a49276099f194..90e0d6a216ee1 100644 --- a/interpreter/llvm/src/lib/AsmParser/LLLexer.cpp +++ b/interpreter/llvm/src/lib/AsmParser/LLLexer.cpp @@ -542,7 +542,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(release); KEYWORD(acq_rel); KEYWORD(seq_cst); - KEYWORD(singlethread); + KEYWORD(syncscope); KEYWORD(nnan); KEYWORD(ninf); @@ -588,7 +588,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(spir_func); KEYWORD(intel_ocl_bicc); KEYWORD(x86_64_sysvcc); - KEYWORD(x86_64_win64cc); + KEYWORD(win64cc); KEYWORD(x86_regcallcc); KEYWORD(webkit_jscc); KEYWORD(swiftcc); diff --git a/interpreter/llvm/src/lib/AsmParser/LLParser.cpp b/interpreter/llvm/src/lib/AsmParser/LLParser.cpp index d7602c83435cd..13679ce1d25c7 100644 --- a/interpreter/llvm/src/lib/AsmParser/LLParser.cpp +++ b/interpreter/llvm/src/lib/AsmParser/LLParser.cpp @@ -15,9 +15,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" @@ -41,7 +42,6 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SaveAndRestore.h" @@ -1670,7 +1670,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'spir_func' /// ::= 'spir_kernel' /// ::= 'x86_64_sysvcc' -/// ::= 'x86_64_win64cc' +/// ::= 'win64cc' /// ::= 'webkit_jscc' /// ::= 'anyregcc' /// ::= 'preserve_mostcc' @@ -1712,7 +1712,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break; case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break; case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break; - case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break; + case lltok::kw_win64cc: CC = CallingConv::Win64; break; case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break; case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break; case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break; @@ -1919,20 +1919,42 @@ bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg, } /// ParseScopeAndOrdering -/// if isAtomic: ::= 'singlethread'? AtomicOrdering +/// if isAtomic: ::= SyncScope? AtomicOrdering /// else: ::= /// /// This sets Scope and Ordering to the parsed values. -bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, +bool LLParser::ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering) { if (!isAtomic) return false; - Scope = CrossThread; - if (EatIfPresent(lltok::kw_singlethread)) - Scope = SingleThread; + return ParseScope(SSID) || ParseOrdering(Ordering); +} + +/// ParseScope +/// ::= syncscope("singlethread" | "")? +/// +/// This sets synchronization scope ID to the ID of the parsed value. +bool LLParser::ParseScope(SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (EatIfPresent(lltok::kw_syncscope)) { + auto StartParenAt = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return Error(StartParenAt, "Expected '(' in syncscope"); + + std::string SSN; + auto SSNAt = Lex.getLoc(); + if (ParseStringConstant(SSN)) + return Error(SSNAt, "Expected synchronization scope name"); - return ParseOrdering(Ordering); + auto EndParenAt = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return Error(EndParenAt, "Expected ')' in syncscope"); + + SSID = Context.getOrInsertSyncScopeID(SSN); + } + + return false; } /// ParseOrdering @@ -2502,7 +2524,7 @@ LLParser::PerFunctionState::~PerFunctionState() { continue; P.second.first->replaceAllUsesWith( UndefValue::get(P.second.first->getType())); - delete P.second.first; + P.second.first->deleteValue(); } for (const auto &P : ForwardRefValIDs) { @@ -2510,7 +2532,7 @@ LLParser::PerFunctionState::~PerFunctionState() { continue; P.second.first->replaceAllUsesWith( UndefValue::get(P.second.first->getType())); - delete P.second.first; + P.second.first->deleteValue(); } } @@ -2642,7 +2664,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, getTypeString(FI->second.first->getType()) + "'"); Sentinel->replaceAllUsesWith(Inst); - delete Sentinel; + Sentinel->deleteValue(); ForwardRefValIDs.erase(FI); } @@ -2659,7 +2681,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, getTypeString(FI->second.first->getType()) + "'"); Sentinel->replaceAllUsesWith(Inst); - delete Sentinel; + Sentinel->deleteValue(); ForwardRefVals.erase(FI); } @@ -3061,7 +3083,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } else { assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); if (!Val0->getType()->isIntOrIntVectorTy() && - !Val0->getType()->getScalarType()->isPointerTy()) + !Val0->getType()->isPtrOrPtrVectorTy()) return Error(ID.Loc, "icmp requires pointer or integer operands"); ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); } @@ -3210,7 +3232,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Opc == Instruction::GetElementPtr) { if (Elts.size() == 0 || - !Elts[0]->getType()->getScalarType()->isPointerTy()) + !Elts[0]->getType()->isPtrOrPtrVectorTy()) return Error(ID.Loc, "base of getelementptr must be a pointer"); Type *BaseType = Elts[0]->getType(); @@ -3226,7 +3248,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ArrayRef Indices(Elts.begin() + 1, Elts.end()); for (Constant *Val : Indices) { Type *ValTy = Val->getType(); - if (!ValTy->getScalarType()->isIntegerTy()) + if (!ValTy->isIntOrIntVectorTy()) return Error(ID.Loc, "getelementptr index must be an integer"); if (ValTy->isVectorTy()) { unsigned ValNumEl = ValTy->getVectorNumElements(); @@ -4389,13 +4411,15 @@ bool LLParser::ParseDIImportedEntity(MDNode *&Result, bool IsDistinct) { REQUIRED(tag, DwarfTagField, ); \ REQUIRED(scope, MDField, ); \ OPTIONAL(entity, MDField, ); \ + OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ OPTIONAL(name, MDStringField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS - Result = GET_OR_DISTINCT(DIImportedEntity, (Context, tag.Val, scope.Val, - entity.Val, line.Val, name.Val)); + Result = GET_OR_DISTINCT( + DIImportedEntity, + (Context, tag.Val, scope.Val, entity.Val, file.Val, line.Val, name.Val)); return false; } @@ -5697,7 +5721,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, } else { assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); if (!LHS->getType()->isIntOrIntVectorTy() && - !LHS->getType()->getScalarType()->isPointerTy()) + !LHS->getType()->isPtrOrPtrVectorTy()) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); } @@ -6100,7 +6124,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; if (Lex.getKind() == lltok::kw_atomic) { isAtomic = true; @@ -6118,7 +6142,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { if (ParseType(Ty) || ParseToken(lltok::comma, "expected comma after load's type") || ParseTypeAndValue(Val, Loc, PFS) || - ParseScopeAndOrdering(isAtomic, Scope, Ordering) || + ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -6134,7 +6158,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { return Error(ExplicitTypeLoc, "explicit pointee type doesn't match operand's pointee type"); - Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope); + Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, SSID); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -6149,7 +6173,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; if (Lex.getKind() == lltok::kw_atomic) { isAtomic = true; @@ -6165,7 +6189,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Val, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after store operand") || ParseTypeAndValue(Ptr, PtrLoc, PFS) || - ParseScopeAndOrdering(isAtomic, Scope, Ordering) || + ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -6181,7 +6205,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Ordering == AtomicOrdering::AcquireRelease) return Error(Loc, "atomic store cannot use Acquire ordering"); - Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, Scope); + Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, SSID); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -6193,7 +6217,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; AtomicOrdering SuccessOrdering = AtomicOrdering::NotAtomic; AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; bool isWeak = false; @@ -6208,7 +6232,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { ParseTypeAndValue(Cmp, CmpLoc, PFS) || ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") || ParseTypeAndValue(New, NewLoc, PFS) || - ParseScopeAndOrdering(true /*Always atomic*/, Scope, SuccessOrdering) || + ParseScopeAndOrdering(true /*Always atomic*/, SSID, SuccessOrdering) || ParseOrdering(FailureOrdering)) return true; @@ -6231,7 +6255,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { if (!New->getType()->isFirstClassType()) return Error(NewLoc, "cmpxchg operand must be a first class value"); AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst( - Ptr, Cmp, New, SuccessOrdering, FailureOrdering, Scope); + Ptr, Cmp, New, SuccessOrdering, FailureOrdering, SSID); CXI->setVolatile(isVolatile); CXI->setWeak(isWeak); Inst = CXI; @@ -6245,7 +6269,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { Value *Ptr, *Val; LocTy PtrLoc, ValLoc; bool AteExtraComma = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; AtomicRMWInst::BinOp Operation; @@ -6271,7 +6295,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Ptr, PtrLoc, PFS) || ParseToken(lltok::comma, "expected ',' after atomicrmw address") || ParseTypeAndValue(Val, ValLoc, PFS) || - ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering)) return true; if (Ordering == AtomicOrdering::Unordered) @@ -6288,7 +6312,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { " integer"); AtomicRMWInst *RMWI = - new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope); + new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID); RMWI->setVolatile(isVolatile); Inst = RMWI; return AteExtraComma ? InstExtraComma : InstNormal; @@ -6298,8 +6322,8 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'fence' 'singlethread'? AtomicOrdering int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; - if (ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + SyncScope::ID SSID = SyncScope::System; + if (ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering)) return true; if (Ordering == AtomicOrdering::Unordered) @@ -6307,7 +6331,7 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { if (Ordering == AtomicOrdering::Monotonic) return TokError("fence cannot be monotonic"); - Inst = new FenceInst(Context, Ordering, Scope); + Inst = new FenceInst(Context, Ordering, SSID); return InstNormal; } @@ -6349,7 +6373,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { break; } if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; - if (!Val->getType()->getScalarType()->isIntegerTy()) + if (!Val->getType()->isIntOrIntVectorTy()) return Error(EltLoc, "getelementptr index must be an integer"); if (Val->getType()->isVectorTy()) { diff --git a/interpreter/llvm/src/lib/AsmParser/LLParser.h b/interpreter/llvm/src/lib/AsmParser/LLParser.h index 4616c2e86947c..d5b059355c423 100644 --- a/interpreter/llvm/src/lib/AsmParser/LLParser.h +++ b/interpreter/llvm/src/lib/AsmParser/LLParser.h @@ -241,8 +241,9 @@ namespace llvm { bool ParseOptionalCallingConv(unsigned &CC); bool ParseOptionalAlignment(unsigned &Alignment); bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); - bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, + bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); + bool ParseScope(SyncScope::ID &SSID); bool ParseOrdering(AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); diff --git a/interpreter/llvm/src/lib/AsmParser/LLToken.h b/interpreter/llvm/src/lib/AsmParser/LLToken.h index 6c8ed7da495d1..0f3707ba0d1ea 100644 --- a/interpreter/llvm/src/lib/AsmParser/LLToken.h +++ b/interpreter/llvm/src/lib/AsmParser/LLToken.h @@ -93,7 +93,7 @@ enum Kind { kw_release, kw_acq_rel, kw_seq_cst, - kw_singlethread, + kw_syncscope, kw_nnan, kw_ninf, kw_nsz, @@ -141,7 +141,7 @@ enum Kind { kw_spir_kernel, kw_spir_func, kw_x86_64_sysvcc, - kw_x86_64_win64cc, + kw_win64cc, kw_webkit_jscc, kw_anyregcc, kw_swiftcc, diff --git a/interpreter/llvm/src/lib/AsmParser/LLVMBuild.txt b/interpreter/llvm/src/lib/AsmParser/LLVMBuild.txt index 3bc31ed910a79..82dba8c15bb8d 100644 --- a/interpreter/llvm/src/lib/AsmParser/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/AsmParser/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmParser parent = Libraries -required_libraries = Core Support +required_libraries = BinaryFormat Core Support diff --git a/interpreter/llvm/src/lib/BinaryFormat/CMakeLists.txt b/interpreter/llvm/src/lib/BinaryFormat/CMakeLists.txt new file mode 100644 index 0000000000000..cb78ea6fdf927 --- /dev/null +++ b/interpreter/llvm/src/lib/BinaryFormat/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMBinaryFormat + Dwarf.cpp + Magic.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat + ) + \ No newline at end of file diff --git a/interpreter/llvm/src/lib/Support/Dwarf.cpp b/interpreter/llvm/src/lib/BinaryFormat/Dwarf.cpp similarity index 77% rename from interpreter/llvm/src/lib/Support/Dwarf.cpp rename to interpreter/llvm/src/lib/BinaryFormat/Dwarf.cpp index 200546857de7f..37c4579ef0f89 100644 --- a/interpreter/llvm/src/lib/Support/Dwarf.cpp +++ b/interpreter/llvm/src/lib/BinaryFormat/Dwarf.cpp @@ -1,4 +1,4 @@ -//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===// +//===-- llvm/BinaryFormat/Dwarf.cpp - Dwarf Framework ------------*- C++-*-===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" @@ -25,15 +25,15 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return "DW_TAG_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getTag(StringRef TagString) { return StringSwitch(TagString) #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ - .Case("DW_TAG_" #NAME, DW_TAG_##NAME) -#include "llvm/Support/Dwarf.def" + .Case("DW_TAG_" #NAME, DW_TAG_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Default(DW_TAG_invalid); } @@ -44,7 +44,7 @@ unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) { #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -55,14 +55,16 @@ unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) { #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::ChildrenString(unsigned Children) { switch (Children) { - case DW_CHILDREN_no: return "DW_CHILDREN_no"; - case DW_CHILDREN_yes: return "DW_CHILDREN_yes"; + case DW_CHILDREN_no: + return "DW_CHILDREN_no"; + case DW_CHILDREN_yes: + return "DW_CHILDREN_yes"; } return StringRef(); } @@ -74,7 +76,7 @@ StringRef llvm::dwarf::AttributeString(unsigned Attribute) { #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ case DW_AT_##NAME: \ return "DW_AT_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -85,7 +87,7 @@ unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) { #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ case DW_AT_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -96,7 +98,7 @@ unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) { #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ case DW_AT_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -107,7 +109,7 @@ StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) { #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ case DW_FORM_##NAME: \ return "DW_FORM_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -118,7 +120,7 @@ unsigned llvm::dwarf::FormVersion(dwarf::Form Form) { #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ case DW_FORM_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -129,7 +131,7 @@ unsigned llvm::dwarf::FormVendor(dwarf::Form Form) { #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ case DW_FORM_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -140,7 +142,7 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return "DW_OP_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" case DW_OP_LLVM_fragment: return "DW_OP_LLVM_fragment"; } @@ -149,8 +151,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { return StringSwitch(OperationEncodingString) #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ - .Case("DW_OP_" #NAME, DW_OP_##NAME) -#include "llvm/Support/Dwarf.def" + .Case("DW_OP_" #NAME, DW_OP_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) .Default(0); } @@ -162,7 +164,7 @@ unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) { #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -173,7 +175,7 @@ unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) { #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -184,15 +186,15 @@ StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return "DW_ATE_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) { return StringSwitch(EncodingString) #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ - .Case("DW_ATE_" #NAME, DW_ATE_##NAME) -#include "llvm/Support/Dwarf.def" + .Case("DW_ATE_" #NAME, DW_ATE_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } @@ -203,7 +205,7 @@ unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -214,28 +216,38 @@ unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::DecimalSignString(unsigned Sign) { switch (Sign) { - case DW_DS_unsigned: return "DW_DS_unsigned"; - case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch"; - case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch"; - case DW_DS_leading_separate: return "DW_DS_leading_separate"; - case DW_DS_trailing_separate: return "DW_DS_trailing_separate"; + case DW_DS_unsigned: + return "DW_DS_unsigned"; + case DW_DS_leading_overpunch: + return "DW_DS_leading_overpunch"; + case DW_DS_trailing_overpunch: + return "DW_DS_trailing_overpunch"; + case DW_DS_leading_separate: + return "DW_DS_leading_separate"; + case DW_DS_trailing_separate: + return "DW_DS_trailing_separate"; } return StringRef(); } StringRef llvm::dwarf::EndianityString(unsigned Endian) { switch (Endian) { - case DW_END_default: return "DW_END_default"; - case DW_END_big: return "DW_END_big"; - case DW_END_little: return "DW_END_little"; - case DW_END_lo_user: return "DW_END_lo_user"; - case DW_END_hi_user: return "DW_END_hi_user"; + case DW_END_default: + return "DW_END_default"; + case DW_END_big: + return "DW_END_big"; + case DW_END_little: + return "DW_END_little"; + case DW_END_lo_user: + return "DW_END_lo_user"; + case DW_END_hi_user: + return "DW_END_hi_user"; } return StringRef(); } @@ -243,18 +255,24 @@ StringRef llvm::dwarf::EndianityString(unsigned Endian) { StringRef llvm::dwarf::AccessibilityString(unsigned Access) { switch (Access) { // Accessibility codes - case DW_ACCESS_public: return "DW_ACCESS_public"; - case DW_ACCESS_protected: return "DW_ACCESS_protected"; - case DW_ACCESS_private: return "DW_ACCESS_private"; + case DW_ACCESS_public: + return "DW_ACCESS_public"; + case DW_ACCESS_protected: + return "DW_ACCESS_protected"; + case DW_ACCESS_private: + return "DW_ACCESS_private"; } return StringRef(); } StringRef llvm::dwarf::VisibilityString(unsigned Visibility) { switch (Visibility) { - case DW_VIS_local: return "DW_VIS_local"; - case DW_VIS_exported: return "DW_VIS_exported"; - case DW_VIS_qualified: return "DW_VIS_qualified"; + case DW_VIS_local: + return "DW_VIS_local"; + case DW_VIS_exported: + return "DW_VIS_exported"; + case DW_VIS_qualified: + return "DW_VIS_qualified"; } return StringRef(); } @@ -266,7 +284,7 @@ StringRef llvm::dwarf::VirtualityString(unsigned Virtuality) { #define HANDLE_DW_VIRTUALITY(ID, NAME) \ case DW_VIRTUALITY_##NAME: \ return "DW_VIRTUALITY_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -274,7 +292,7 @@ unsigned llvm::dwarf::getVirtuality(StringRef VirtualityString) { return StringSwitch(VirtualityString) #define HANDLE_DW_VIRTUALITY(ID, NAME) \ .Case("DW_VIRTUALITY_" #NAME, DW_VIRTUALITY_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(DW_VIRTUALITY_invalid); } @@ -285,7 +303,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) { #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return "DW_LANG_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -293,7 +311,7 @@ unsigned llvm::dwarf::getLanguage(StringRef LanguageString) { return StringSwitch(LanguageString) #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ .Case("DW_LANG_" #NAME, DW_LANG_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } @@ -304,7 +322,7 @@ unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) { #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -315,16 +333,20 @@ unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) { #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::CaseString(unsigned Case) { switch (Case) { - case DW_ID_case_sensitive: return "DW_ID_case_sensitive"; - case DW_ID_up_case: return "DW_ID_up_case"; - case DW_ID_down_case: return "DW_ID_down_case"; - case DW_ID_case_insensitive: return "DW_ID_case_insensitive"; + case DW_ID_case_sensitive: + return "DW_ID_case_sensitive"; + case DW_ID_up_case: + return "DW_ID_up_case"; + case DW_ID_down_case: + return "DW_ID_down_case"; + case DW_ID_case_insensitive: + return "DW_ID_case_insensitive"; } return StringRef(); } @@ -333,42 +355,50 @@ StringRef llvm::dwarf::ConventionString(unsigned CC) { switch (CC) { default: return StringRef(); -#define HANDLE_DW_CC(ID, NAME) \ - case DW_CC_##NAME: \ +#define HANDLE_DW_CC(ID, NAME) \ + case DW_CC_##NAME: \ return "DW_CC_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getCallingConvention(StringRef CCString) { return StringSwitch(CCString) #define HANDLE_DW_CC(ID, NAME) .Case("DW_CC_" #NAME, DW_CC_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } StringRef llvm::dwarf::InlineCodeString(unsigned Code) { switch (Code) { - case DW_INL_not_inlined: return "DW_INL_not_inlined"; - case DW_INL_inlined: return "DW_INL_inlined"; - case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined"; - case DW_INL_declared_inlined: return "DW_INL_declared_inlined"; + case DW_INL_not_inlined: + return "DW_INL_not_inlined"; + case DW_INL_inlined: + return "DW_INL_inlined"; + case DW_INL_declared_not_inlined: + return "DW_INL_declared_not_inlined"; + case DW_INL_declared_inlined: + return "DW_INL_declared_inlined"; } return StringRef(); } StringRef llvm::dwarf::ArrayOrderString(unsigned Order) { switch (Order) { - case DW_ORD_row_major: return "DW_ORD_row_major"; - case DW_ORD_col_major: return "DW_ORD_col_major"; + case DW_ORD_row_major: + return "DW_ORD_row_major"; + case DW_ORD_col_major: + return "DW_ORD_col_major"; } return StringRef(); } StringRef llvm::dwarf::DiscriminantString(unsigned Discriminant) { switch (Discriminant) { - case DW_DSC_label: return "DW_DSC_label"; - case DW_DSC_range: return "DW_DSC_range"; + case DW_DSC_label: + return "DW_DSC_label"; + case DW_DSC_range: + return "DW_DSC_range"; } return StringRef(); } @@ -377,10 +407,10 @@ StringRef llvm::dwarf::LNStandardString(unsigned Standard) { switch (Standard) { default: return StringRef(); -#define HANDLE_DW_LNS(ID, NAME) \ - case DW_LNS_##NAME: \ +#define HANDLE_DW_LNS(ID, NAME) \ + case DW_LNS_##NAME: \ return "DW_LNS_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -388,22 +418,28 @@ StringRef llvm::dwarf::LNExtendedString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_LNE(ID, NAME) \ - case DW_LNE_##NAME: \ +#define HANDLE_DW_LNE(ID, NAME) \ + case DW_LNE_##NAME: \ return "DW_LNE_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::MacinfoString(unsigned Encoding) { switch (Encoding) { // Macinfo Type Encodings - case DW_MACINFO_define: return "DW_MACINFO_define"; - case DW_MACINFO_undef: return "DW_MACINFO_undef"; - case DW_MACINFO_start_file: return "DW_MACINFO_start_file"; - case DW_MACINFO_end_file: return "DW_MACINFO_end_file"; - case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext"; - case DW_MACINFO_invalid: return "DW_MACINFO_invalid"; + case DW_MACINFO_define: + return "DW_MACINFO_define"; + case DW_MACINFO_undef: + return "DW_MACINFO_undef"; + case DW_MACINFO_start_file: + return "DW_MACINFO_start_file"; + case DW_MACINFO_end_file: + return "DW_MACINFO_end_file"; + case DW_MACINFO_vendor_ext: + return "DW_MACINFO_vendor_ext"; + case DW_MACINFO_invalid: + return "DW_MACINFO_invalid"; } return StringRef(); } @@ -422,10 +458,10 @@ StringRef llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_CFA(ID, NAME) \ - case DW_CFA_##NAME: \ +#define HANDLE_DW_CFA(ID, NAME) \ + case DW_CFA_##NAME: \ return "DW_CFA_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -433,10 +469,10 @@ StringRef llvm::dwarf::ApplePropertyString(unsigned Prop) { switch (Prop) { default: return StringRef(); -#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) \ - case DW_APPLE_PROPERTY_##NAME: \ +#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) \ + case DW_APPLE_PROPERTY_##NAME: \ return "DW_APPLE_PROPERTY_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -447,7 +483,7 @@ StringRef llvm::dwarf::UnitTypeString(unsigned UT) { #define HANDLE_DW_UT(ID, NAME) \ case DW_UT_##NAME: \ return "DW_UT_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } diff --git a/interpreter/llvm/src/lib/BinaryFormat/LLVMBuild.txt b/interpreter/llvm/src/lib/BinaryFormat/LLVMBuild.txt new file mode 100644 index 0000000000000..d7d4dcb5f23d3 --- /dev/null +++ b/interpreter/llvm/src/lib/BinaryFormat/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/BinaryFormat/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = BinaryFormat +parent = Libraries +required_libraries = Support diff --git a/interpreter/llvm/src/lib/BinaryFormat/Magic.cpp b/interpreter/llvm/src/lib/BinaryFormat/Magic.cpp new file mode 100644 index 0000000000000..b19a07a9066b0 --- /dev/null +++ b/interpreter/llvm/src/lib/BinaryFormat/Magic.cpp @@ -0,0 +1,217 @@ +//===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/Magic.h" + +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileSystem.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include +#else +#include +#endif + +using namespace llvm; +using namespace llvm::support::endian; +using namespace llvm::sys::fs; + +template +static bool startswith(StringRef Magic, const char (&S)[N]) { + return Magic.startswith(StringRef(S, N - 1)); +} + +/// @brief Identify the magic in magic. +file_magic llvm::identify_magic(StringRef Magic) { + if (Magic.size() < 4) + return file_magic::unknown; + switch ((unsigned char)Magic[0]) { + case 0x00: { + // COFF bigobj, CL.exe's LTO object file, or short import library file + if (startswith(Magic, "\0\0\xFF\xFF")) { + size_t MinSize = + offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); + if (Magic.size() < MinSize) + return file_magic::coff_import_library; + + const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); + if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0) + return file_magic::coff_object; + if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0) + return file_magic::coff_cl_gl_object; + return file_magic::coff_import_library; + } + // Windows resource file + if (Magic.size() >= sizeof(COFF::WinResMagic) && + memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0) + return file_magic::windows_resource; + // 0x0000 = COFF unknown machine type + if (Magic[1] == 0) + return file_magic::coff_object; + if (startswith(Magic, "\0asm")) + return file_magic::wasm_object; + break; + } + case 0xDE: // 0x0B17C0DE = BC wraper + if (startswith(Magic, "\xDE\xC0\x17\x0B")) + return file_magic::bitcode; + break; + case 'B': + if (startswith(Magic, "BC\xC0\xDE")) + return file_magic::bitcode; + break; + case '!': + if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) + return file_magic::archive; + break; + + case '\177': + if (startswith(Magic, "\177ELF") && Magic.size() >= 18) { + bool Data2MSB = Magic[5] == 2; + unsigned high = Data2MSB ? 16 : 17; + unsigned low = Data2MSB ? 17 : 16; + if (Magic[high] == 0) { + switch (Magic[low]) { + default: + return file_magic::elf; + case 1: + return file_magic::elf_relocatable; + case 2: + return file_magic::elf_executable; + case 3: + return file_magic::elf_shared_object; + case 4: + return file_magic::elf_core; + } + } + // It's still some type of ELF file. + return file_magic::elf; + } + break; + + case 0xCA: + if (startswith(Magic, "\xCA\xFE\xBA\xBE") || + startswith(Magic, "\xCA\xFE\xBA\xBF")) { + // This is complicated by an overlap with Java class files. + // See the Mach-O section in /usr/share/file/magic for details. + if (Magic.size() >= 8 && Magic[7] < 43) + return file_magic::macho_universal_binary; + } + break; + + // The two magic numbers for mach-o are: + // 0xfeedface - 32-bit mach-o + // 0xfeedfacf - 64-bit mach-o + case 0xFE: + case 0xCE: + case 0xCF: { + uint16_t type = 0; + if (startswith(Magic, "\xFE\xED\xFA\xCE") || + startswith(Magic, "\xFE\xED\xFA\xCF")) { + /* Native endian */ + size_t MinSize; + if (Magic[3] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; + } else if (startswith(Magic, "\xCE\xFA\xED\xFE") || + startswith(Magic, "\xCF\xFA\xED\xFE")) { + /* Reverse endian */ + size_t MinSize; + if (Magic[0] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12]; + } + switch (type) { + default: + break; + case 1: + return file_magic::macho_object; + case 2: + return file_magic::macho_executable; + case 3: + return file_magic::macho_fixed_virtual_memory_shared_lib; + case 4: + return file_magic::macho_core; + case 5: + return file_magic::macho_preload_executable; + case 6: + return file_magic::macho_dynamically_linked_shared_lib; + case 7: + return file_magic::macho_dynamic_linker; + case 8: + return file_magic::macho_bundle; + case 9: + return file_magic::macho_dynamically_linked_shared_lib_stub; + case 10: + return file_magic::macho_dsym_companion; + case 11: + return file_magic::macho_kext_bundle; + } + break; + } + case 0xF0: // PowerPC Windows + case 0x83: // Alpha 32-bit + case 0x84: // Alpha 64-bit + case 0x66: // MPS R4000 Windows + case 0x50: // mc68K + case 0x4c: // 80386 Windows + case 0xc4: // ARMNT Windows + if (Magic[1] == 0x01) + return file_magic::coff_object; + LLVM_FALLTHROUGH; + + case 0x90: // PA-RISC Windows + case 0x68: // mc68K Windows + if (Magic[1] == 0x02) + return file_magic::coff_object; + break; + + case 'M': // Possible MS-DOS stub on Windows PE file + if (startswith(Magic, "MZ")) { + uint32_t off = read32le(Magic.data() + 0x3c); + // PE/COFF file, either EXE or DLL. + if (off < Magic.size() && + memcmp(Magic.data() + off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) + return file_magic::pecoff_executable; + } + break; + + case 0x64: // x86-64 or ARM64 Windows. + if (Magic[1] == char(0x86) || Magic[1] == char(0xaa)) + return file_magic::coff_object; + break; + + default: + break; + } + return file_magic::unknown; +} + +std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) { + int FD; + if (std::error_code EC = openFileForRead(Path, FD)) + return EC; + + char Buffer[32]; + int Length = read(FD, Buffer, sizeof(Buffer)); + if (close(FD) != 0 || Length < 0) + return std::error_code(errno, std::generic_category()); + + Result = identify_magic(StringRef(Buffer, Length)); + return std::error_code(); +} diff --git a/interpreter/llvm/src/lib/Bitcode/Reader/BitcodeReader.cpp b/interpreter/llvm/src/lib/Bitcode/Reader/BitcodeReader.cpp index 76298121566aa..2b4970a80cddb 100644 --- a/interpreter/llvm/src/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/interpreter/llvm/src/lib/Bitcode/Reader/BitcodeReader.cpp @@ -28,8 +28,8 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -40,13 +40,13 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" @@ -513,6 +513,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { TBAAVerifier TBAAVerifyHelper; std::vector BundleTags; + SmallVector SSIDs; public: BitcodeReader(BitstreamCursor Stream, StringRef Strtab, @@ -648,6 +649,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { Error parseTypeTable(); Error parseTypeTableBody(); Error parseOperandBundleTags(); + Error parseSyncScopeNames(); Expected recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT); @@ -668,6 +670,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { Error findFunctionInStream( Function *F, DenseMap::iterator DeferredFunctionInfoIterator); + + SyncScope::ID getDecodedSyncScopeID(unsigned Val); }; /// Class to manage reading and parsing function summary index bitcode @@ -733,13 +737,13 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { std::vector makeCallList(ArrayRef Record, bool IsOldProfileFormat, bool HasProfile); - Error parseEntireSummary(); + Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); std::pair getValueInfoFromValueId(unsigned ValueId); - ModulePathStringTableTy::iterator addThisModulePath(); + ModuleSummaryIndex::ModuleInfo *addThisModule(); }; } // end anonymous namespace @@ -865,11 +869,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits RawFlags = RawFlags >> 4; bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3; - // The LiveRoot flag wasn't introduced until version 3. For dead stripping + // The Live flag wasn't introduced until version 3. For dead stripping // to work correctly on earlier versions, we must conservatively treat all // values as live. - bool LiveRoot = (RawFlags & 0x2) || Version < 3; - return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot); + bool Live = (RawFlags & 0x2) || Version < 3; + return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live); } static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { @@ -998,14 +1002,6 @@ static AtomicOrdering getDecodedOrdering(unsigned Val) { } } -static SynchronizationScope getDecodedSynchScope(unsigned Val) { - switch (Val) { - case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread; - default: // Map unknown scopes to cross-thread. - case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread; - } -} - static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { switch (Val) { default: // Map unknown selection kinds to any. @@ -1745,6 +1741,44 @@ Error BitcodeReader::parseOperandBundleTags() { } } +Error BitcodeReader::parseSyncScopeNames() { + if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID)) + return error("Invalid record"); + + if (!SSIDs.empty()) + return error("Invalid multiple synchronization scope names blocks"); + + SmallVector Record; + while (true) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + if (SSIDs.empty()) + return error("Invalid empty synchronization scope names block"); + return Error::success(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Synchronization scope names are implicitly mapped to synchronization + // scope IDs by their order. + + if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME) + return error("Invalid record"); + + SmallString<16> SSN; + if (convertToString(Record, 0, SSN)) + return error("Invalid record"); + + SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN)); + Record.clear(); + } +} + /// Associate a value with its name from the given index in the provided record. Expected BitcodeReader::recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT) { @@ -2608,6 +2642,16 @@ Error BitcodeReader::materializeMetadata() { if (Error Err = MDLoader->parseModuleMetadata()) return Err; } + + // Upgrade "Linker Options" module flag to "llvm.linker.options" module-level + // metadata. + if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) { + NamedMDNode *LinkerOpts = + TheModule->getOrInsertNamedMetadata("llvm.linker.options"); + for (const MDOperand &MDOptions : cast(Val)->operands()) + LinkerOpts->addOperand(cast(MDOptions)); + } + DeferredMetadataInfo.clear(); return Error::success(); } @@ -3122,6 +3166,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, if (Error Err = parseOperandBundleTags()) return Err; break; + case bitc::SYNC_SCOPE_NAMES_BLOCK_ID: + if (Error Err = parseSyncScopeNames()) + return Err; + break; } continue; @@ -4194,7 +4242,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_LOADATOMIC: { - // LOADATOMIC: [opty, op, align, vol, ordering, synchscope] + // LOADATOMIC: [opty, op, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || @@ -4216,12 +4264,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Invalid record"); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope); + I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; @@ -4250,7 +4298,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_STOREATOMIC: case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: { - // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope] + // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -4270,20 +4318,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Ordering == AtomicOrdering::Acquire || Ordering == AtomicOrdering::AcquireRelease) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SynchScope); + I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CMPXCHG_OLD: case bitc::FUNC_CODE_INST_CMPXCHG: { - // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope, + // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid, // failureordering?, isweak?] unsigned OpNum = 0; Value *Ptr, *Cmp, *New; @@ -4300,7 +4348,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (SuccessOrdering == AtomicOrdering::NotAtomic || SuccessOrdering == AtomicOrdering::Unordered) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]); if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) return Err; @@ -4312,7 +4360,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { FailureOrdering = getDecodedOrdering(Record[OpNum + 3]); I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering, - SynchScope); + SSID); cast(I)->setVolatile(Record[OpNum]); if (Record.size() < 8) { @@ -4329,7 +4377,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_ATOMICRMW: { - // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope] + // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid] unsigned OpNum = 0; Value *Ptr, *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -4346,13 +4394,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Unordered) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); - I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); + I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID); cast(I)->setVolatile(Record[OpNum+1]); InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope] + case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, ssid] if (2 != Record.size()) return error("Invalid record"); AtomicOrdering Ordering = getDecodedOrdering(Record[0]); @@ -4360,8 +4408,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Ordering == AtomicOrdering::Unordered || Ordering == AtomicOrdering::Monotonic) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[1]); - I = new FenceInst(Context, Ordering, SynchScope); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[1]); + I = new FenceInst(Context, Ordering, SSID); InstructionList.push_back(I); break; } @@ -4489,11 +4537,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // Add instruction to end of current BB. If there is no current BB, reject // this file. if (!CurBB) { - delete I; + I->deleteValue(); return error("Invalid instruction with no BB"); } if (!OperandBundles.empty()) { - delete I; + I->deleteValue(); return error("Operand bundles found with no consumer"); } CurBB->getInstList().push_back(I); @@ -4557,6 +4605,14 @@ Error BitcodeReader::findFunctionInStream( return Error::success(); } +SyncScope::ID BitcodeReader::getDecodedSyncScopeID(unsigned Val) { + if (Val == SyncScope::SingleThread || Val == SyncScope::System) + return SyncScope::ID(Val); + if (Val >= SSIDs.size()) + return SyncScope::System; // Map unknown synchronization scopes to system. + return SSIDs[Val]; +} + //===----------------------------------------------------------------------===// // GVMaterializer implementation //===----------------------------------------------------------------------===// @@ -4691,9 +4747,9 @@ ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex), ModulePath(ModulePath), ModuleId(ModuleId) {} -ModulePathStringTableTy::iterator -ModuleSummaryIndexBitcodeReader::addThisModulePath() { - return TheIndex.addModulePath(ModulePath, ModuleId); +ModuleSummaryIndex::ModuleInfo * +ModuleSummaryIndexBitcodeReader::addThisModule() { + return TheIndex.addModule(ModulePath, ModuleId); } std::pair @@ -4844,6 +4900,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() { return error("Invalid record"); break; case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: + case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: assert(!SeenValueSymbolTable && "Already read VST when parsing summary block?"); // We might not have a VST if there were no values in the @@ -4856,7 +4913,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() { SeenValueSymbolTable = true; } SeenGlobalValSummary = true; - if (Error Err = parseEntireSummary()) + if (Error Err = parseEntireSummary(Entry.ID)) return Err; break; case bitc::MODULE_STRTAB_BLOCK_ID: @@ -4889,7 +4946,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() { case bitc::MODULE_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); - auto &Hash = addThisModulePath()->second.second; + auto &Hash = addThisModule()->second.second; int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); @@ -4964,8 +5021,8 @@ std::vector ModuleSummaryIndexBitcodeReader::makeCallLi // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. -Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { - if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID)) +Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { + if (Stream.EnterSubBlock(ID)) return error("Invalid record"); SmallVector Record; @@ -5070,7 +5127,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); - FS->setModulePath(addThisModulePath()->first()); + FS->setModulePath(addThisModule()->first()); FS->setOriginalName(VIAndOriginalGUID.second); TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS)); break; @@ -5090,7 +5147,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. - AS->setModulePath(addThisModulePath()->first()); + AS->setModulePath(addThisModule()->first()); GlobalValue::GUID AliaseeGUID = getValueInfoFromValueId(AliaseeID).first.getGUID(); @@ -5113,7 +5170,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { std::vector Refs = makeRefList(ArrayRef(Record).slice(2)); auto FS = llvm::make_unique(Flags, std::move(Refs)); - FS->setModulePath(addThisModulePath()->first()); + FS->setModulePath(addThisModule()->first()); auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); @@ -5241,6 +5298,20 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}}); break; } + case bitc::FS_CFI_FUNCTION_DEFS: { + std::set &CfiFunctionDefs = TheIndex.cfiFunctionDefs(); + for (unsigned I = 0; I != Record.size(); I += 2) + CfiFunctionDefs.insert( + {Strtab.data() + Record[I], static_cast(Record[I + 1])}); + break; + } + case bitc::FS_CFI_FUNCTION_DECLS: { + std::set &CfiFunctionDecls = TheIndex.cfiFunctionDecls(); + for (unsigned I = 0; I != Record.size(); I += 2) + CfiFunctionDecls.insert( + {Strtab.data() + Record[I], static_cast(Record[I + 1])}); + break; + } } } llvm_unreachable("Exit infinite loop"); @@ -5255,7 +5326,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { SmallVector Record; SmallString<128> ModulePath; - ModulePathStringTableTy::iterator LastSeenModulePath; + ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -5282,8 +5353,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { if (convertToString(Record, 1, ModulePath)) return error("Invalid record"); - LastSeenModulePath = TheIndex.addModulePath(ModulePath, ModuleId); - ModuleIdMap[ModuleId] = LastSeenModulePath->first(); + LastSeenModule = TheIndex.addModule(ModulePath, ModuleId); + ModuleIdMap[ModuleId] = LastSeenModule->first(); ModulePath.clear(); break; @@ -5292,15 +5363,15 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { case bitc::MST_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); - if (LastSeenModulePath == TheIndex.modulePaths().end()) + if (!LastSeenModule) return error("Invalid hash that does not follow a module path"); int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); - LastSeenModulePath->second.second[Pos++] = Val; + LastSeenModule->second.second[Pos++] = Val; } - // Reset LastSeenModulePath to avoid overriding the hash unexpectedly. - LastSeenModulePath = TheIndex.modulePaths().end(); + // Reset LastSeenModule to avoid overriding the hash unexpectedly. + LastSeenModule = nullptr; break; } } @@ -5335,8 +5406,9 @@ const std::error_category &llvm::BitcodeErrorCategory() { return *ErrorCategory; } -static Expected readStrtab(BitstreamCursor &Stream) { - if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID)) +static Expected readBlobInRecord(BitstreamCursor &Stream, + unsigned Block, unsigned RecordID) { + if (Stream.EnterSubBlock(Block)) return error("Invalid record"); StringRef Strtab; @@ -5357,7 +5429,7 @@ static Expected readStrtab(BitstreamCursor &Stream) { case BitstreamEntry::Record: StringRef Blob; SmallVector Record; - if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB) + if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID) Strtab = Blob; break; } @@ -5370,12 +5442,20 @@ static Expected readStrtab(BitstreamCursor &Stream) { Expected> llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { + auto FOrErr = getBitcodeFileContents(Buffer); + if (!FOrErr) + return FOrErr.takeError(); + return std::move(FOrErr->Mods); +} + +Expected +llvm::getBitcodeFileContents(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); BitstreamCursor &Stream = *StreamOrErr; - std::vector Modules; + BitcodeFileContents F; while (true) { uint64_t BCBegin = Stream.getCurrentByteNo(); @@ -5383,7 +5463,7 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to // the end that there cannot possibly be another module, stop looking. if (BCBegin + 8 >= Stream.getBitcodeBytes().size()) - return Modules; + return F; BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { @@ -5409,26 +5489,49 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { if (Stream.SkipBlock()) return error("Malformed block"); - Modules.push_back({Stream.getBitcodeBytes().slice( - BCBegin, Stream.getCurrentByteNo() - BCBegin), - Buffer.getBufferIdentifier(), IdentificationBit, - ModuleBit}); + F.Mods.push_back({Stream.getBitcodeBytes().slice( + BCBegin, Stream.getCurrentByteNo() - BCBegin), + Buffer.getBufferIdentifier(), IdentificationBit, + ModuleBit}); continue; } if (Entry.ID == bitc::STRTAB_BLOCK_ID) { - Expected Strtab = readStrtab(Stream); + Expected Strtab = + readBlobInRecord(Stream, bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB); if (!Strtab) return Strtab.takeError(); // This string table is used by every preceding bitcode module that does // not have its own string table. A bitcode file may have multiple // string tables if it was created by binary concatenation, for example // with "llvm-cat -b". - for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) { + for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) { if (!I->Strtab.empty()) break; I->Strtab = *Strtab; } + // Similarly, the string table is used by every preceding symbol table; + // normally there will be just one unless the bitcode file was created + // by binary concatenation. + if (!F.Symtab.empty() && F.StrtabForSymtab.empty()) + F.StrtabForSymtab = *Strtab; + continue; + } + + if (Entry.ID == bitc::SYMTAB_BLOCK_ID) { + Expected SymtabOrErr = + readBlobInRecord(Stream, bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB); + if (!SymtabOrErr) + return SymtabOrErr.takeError(); + + // We can expect the bitcode file to have multiple symbol tables if it + // was created by binary concatenation. In that case we silently + // ignore any subsequent symbol tables, which is fine because this is a + // low level function. The client is expected to notice that the number + // of modules in the symbol table does not match the number of modules + // in the input file and regenerate the symbol table. + if (F.Symtab.empty()) + F.Symtab = *SymtabOrErr; continue; } @@ -5499,13 +5602,16 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, } // Parse the specified bitcode buffer and merge the index into CombinedIndex. +// We don't use ModuleIdentifier here because the client may need to control the +// module path used in the combined summary (e.g. when reading summaries for +// regular LTO modules). Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex, - unsigned ModuleId) { + StringRef ModulePath, uint64_t ModuleId) { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex, - ModuleIdentifier, ModuleId); + ModulePath, ModuleId); return R.parseModule(); } @@ -5525,7 +5631,7 @@ Expected> BitcodeModule::getSummary() { } // Check if the given bitcode buffer contains a global value summary block. -Expected BitcodeModule::hasSummary() { +Expected BitcodeModule::getLTOInfo() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); @@ -5539,11 +5645,14 @@ Expected BitcodeModule::hasSummary() { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - return false; + return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false}; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) - return true; + return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true}; + + if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) + return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true}; // Ignore other sub-blocks. if (Stream.SkipBlock()) @@ -5630,12 +5739,12 @@ Expected llvm::getBitcodeProducerString(MemoryBufferRef Buffer) { Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer, ModuleSummaryIndex &CombinedIndex, - unsigned ModuleId) { + uint64_t ModuleId) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->readSummary(CombinedIndex, ModuleId); + return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId); } Expected> @@ -5647,12 +5756,12 @@ llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) { return BM->getSummary(); } -Expected llvm::hasGlobalValueSummary(MemoryBufferRef Buffer) { +Expected llvm::getBitcodeLTOInfo(MemoryBufferRef Buffer) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->hasSummary(); + return BM->getLTOInfo(); } Expected> diff --git a/interpreter/llvm/src/lib/Bitcode/Reader/MetadataLoader.cpp b/interpreter/llvm/src/lib/Bitcode/Reader/MetadataLoader.cpp index 42135e5949ce1..10fbcdea784ff 100644 --- a/interpreter/llvm/src/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/interpreter/llvm/src/lib/Bitcode/Reader/MetadataLoader.cpp @@ -53,8 +53,8 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" @@ -407,6 +407,11 @@ void PlaceholderQueue::flush(BitcodeReaderMetadataList &MetadataList) { } // anonynous namespace +static Error error(const Twine &Message) { + return make_error( + Message, make_error_code(BitcodeError::CorruptedBitcode)); +} + class MetadataLoader::MetadataLoaderImpl { BitcodeReaderMetadataList MetadataList; BitcodeReaderValueList &ValueList; @@ -500,7 +505,7 @@ class MetadataLoader::MetadataLoaderImpl { // Upgrade variables attached to globals. for (auto &GV : TheModule.globals()) { - SmallVector MDs, NewMDs; + SmallVector MDs; GV.getMetadata(LLVMContext::MD_dbg, MDs); GV.eraseMetadata(LLVMContext::MD_dbg); for (auto *MD : MDs) @@ -533,6 +538,88 @@ class MetadataLoader::MetadataLoaderImpl { } } + /// Upgrade the expression from previous versions. + Error upgradeDIExpression(uint64_t FromVersion, + MutableArrayRef &Expr, + SmallVectorImpl &Buffer) { + auto N = Expr.size(); + switch (FromVersion) { + default: + return error("Invalid record"); + case 0: + if (N >= 3 && Expr[N - 3] == dwarf::DW_OP_bit_piece) + Expr[N - 3] = dwarf::DW_OP_LLVM_fragment; + LLVM_FALLTHROUGH; + case 1: + // Move DW_OP_deref to the end. + if (N && Expr[0] == dwarf::DW_OP_deref) { + auto End = Expr.end(); + if (Expr.size() >= 3 && + *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment) + End = std::prev(End, 3); + std::move(std::next(Expr.begin()), End, Expr.begin()); + *std::prev(End) = dwarf::DW_OP_deref; + } + NeedDeclareExpressionUpgrade = true; + LLVM_FALLTHROUGH; + case 2: { + // Change DW_OP_plus to DW_OP_plus_uconst. + // Change DW_OP_minus to DW_OP_uconst, DW_OP_minus + auto SubExpr = ArrayRef(Expr); + while (!SubExpr.empty()) { + // Skip past other operators with their operands + // for this version of the IR, obtained from + // from historic DIExpression::ExprOperand::getSize(). + size_t HistoricSize; + switch (SubExpr.front()) { + default: + HistoricSize = 1; + break; + case dwarf::DW_OP_constu: + case dwarf::DW_OP_minus: + case dwarf::DW_OP_plus: + HistoricSize = 2; + break; + case dwarf::DW_OP_LLVM_fragment: + HistoricSize = 3; + break; + } + + // If the expression is malformed, make sure we don't + // copy more elements than we should. + HistoricSize = std::min(SubExpr.size(), HistoricSize); + ArrayRef Args = SubExpr.slice(1, HistoricSize-1); + + switch (SubExpr.front()) { + case dwarf::DW_OP_plus: + Buffer.push_back(dwarf::DW_OP_plus_uconst); + Buffer.append(Args.begin(), Args.end()); + break; + case dwarf::DW_OP_minus: + Buffer.push_back(dwarf::DW_OP_constu); + Buffer.append(Args.begin(), Args.end()); + Buffer.push_back(dwarf::DW_OP_minus); + break; + default: + Buffer.push_back(*SubExpr.begin()); + Buffer.append(Args.begin(), Args.end()); + break; + } + + // Continue with remaining elements. + SubExpr = SubExpr.slice(HistoricSize); + } + Expr = MutableArrayRef(Buffer); + LLVM_FALLTHROUGH; + } + case 3: + // Up-to-date! + break; + } + + return Error::success(); + } + void upgradeDebugInfo() { upgradeCUSubprograms(); upgradeCUVariables(); @@ -590,11 +677,6 @@ class MetadataLoader::MetadataLoaderImpl { void upgradeDebugIntrinsics(Function &F) { upgradeDeclareExpressions(F); } }; -static Error error(const Twine &Message) { - return make_error( - Message, make_error_code(BitcodeError::CorruptedBitcode)); -} - Expected MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { IndexCursor = Stream; @@ -1551,34 +1633,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( IsDistinct = Record[0] & 1; uint64_t Version = Record[0] >> 1; auto Elts = MutableArrayRef(Record).slice(1); - unsigned N = Elts.size(); - // Perform various upgrades. - switch (Version) { - case 0: - if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece) - Elts[N - 3] = dwarf::DW_OP_LLVM_fragment; - LLVM_FALLTHROUGH; - case 1: - // Move DW_OP_deref to the end. - if (N && Elts[0] == dwarf::DW_OP_deref) { - auto End = Elts.end(); - if (Elts.size() >= 3 && *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment) - End = std::prev(End, 3); - std::move(std::next(Elts.begin()), End, Elts.begin()); - *std::prev(End) = dwarf::DW_OP_deref; - } - NeedDeclareExpressionUpgrade = true; - LLVM_FALLTHROUGH; - case 2: - // Up-to-date! - break; - default: - return error("Invalid record"); - } + + SmallVector Buffer; + if (Error Err = upgradeDIExpression(Version, Elts, Buffer)) + return Err; MetadataList.assignValue( - GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))), - NextMetadataNo); + GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo); NextMetadataNo++; break; } @@ -1610,15 +1671,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_IMPORTED_ENTITY: { - if (Record.size() != 6) + if (Record.size() != 6 && Record.size() != 7) return error("Invalid record"); IsDistinct = Record[0]; + bool HasFile = (Record.size() == 7); MetadataList.assignValue( GET_OR_DISTINCT(DIImportedEntity, (Context, Record[1], getMDOrNull(Record[2]), - getDITypeRefOrNull(Record[3]), Record[4], - getMDString(Record[5]))), + getDITypeRefOrNull(Record[3]), + HasFile ? getMDOrNull(Record[6]) : nullptr, + HasFile ? Record[4] : 0, getMDString(Record[5]))), NextMetadataNo); NextMetadataNo++; break; diff --git a/interpreter/llvm/src/lib/Bitcode/Reader/ValueList.cpp b/interpreter/llvm/src/lib/Bitcode/Reader/ValueList.cpp index d1a2a11bbfadb..f2a3439a87be6 100644 --- a/interpreter/llvm/src/lib/Bitcode/Reader/ValueList.cpp +++ b/interpreter/llvm/src/lib/Bitcode/Reader/ValueList.cpp @@ -73,7 +73,7 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) { // If there was a forward reference to this value, replace it. Value *PrevVal = OldV; OldV->replaceAllUsesWith(V); - delete PrevVal; + PrevVal->deleteValue(); } } @@ -194,6 +194,6 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() { // Update all ValueHandles, they should be the only users at this point. Placeholder->replaceAllUsesWith(RealVal); - delete Placeholder; + Placeholder->deleteValue(); } } diff --git a/interpreter/llvm/src/lib/Bitcode/Writer/BitcodeWriter.cpp b/interpreter/llvm/src/lib/Bitcode/Writer/BitcodeWriter.cpp index 1f8b50342c2d6..dcffde1742cd7 100644 --- a/interpreter/llvm/src/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/interpreter/llvm/src/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -29,10 +29,12 @@ #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -77,10 +79,13 @@ class BitcodeWriterBase { /// The stream created and owned by the client. BitstreamWriter &Stream; + StringTableBuilder &StrtabBuilder; + public: /// Constructs a BitcodeWriterBase object that writes to the provided /// \p Stream. - BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {} + BitcodeWriterBase(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder) + : Stream(Stream), StrtabBuilder(StrtabBuilder) {} protected: void writeBitcodeHeader(); @@ -97,8 +102,6 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { /// Pointer to the buffer allocated by caller for bitcode writing. const SmallVectorImpl &Buffer; - StringTableBuilder &StrtabBuilder; - /// The Module to write to bitcode. const Module &M; @@ -111,6 +114,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { /// True if a module hash record should be written. bool GenerateHash; + SHA1 Hasher; + /// If non-null, when GenerateHash is true, the resulting hash is written /// into ModHash. When GenerateHash is false, that specified value /// is used as the hash instead of computing from the generated bitcode. @@ -142,8 +147,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash = nullptr) - : BitcodeWriterBase(Stream), Buffer(Buffer), StrtabBuilder(StrtabBuilder), - M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index), + : BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M), + VE(*M, ShouldPreserveUseListOrder), Index(Index), GenerateHash(GenerateHash), ModHash(ModHash), BitcodeStartBit(Stream.GetCurrentBitNo()) { // Assign ValueIds to any callee values in the index that came from @@ -173,6 +178,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { private: uint64_t bitcodeStartBit() { return BitcodeStartBit; } + size_t addToStrtab(StringRef Str); + void writeAttributeGroupTable(); void writeAttributeTable(); void writeTypeTable(); @@ -259,6 +266,7 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { const GlobalObject &GO); void writeModuleMetadataKinds(); void writeOperandBundleTags(); + void writeSyncScopeNames(); void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal); void writeModuleConstants(); bool pushValueAndType(const Value *V, unsigned InstID, @@ -309,6 +317,10 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { return VE.getValueID(VI.getValue()); } std::map &valueIds() { return GUIDToValueIdMap; } + + unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { + return unsigned(SSID); + } }; /// Class to manage the bitcode writing for a combined index. @@ -331,10 +343,11 @@ class IndexBitcodeWriter : public BitcodeWriterBase { /// Constructs a IndexBitcodeWriter object for the given combined index, /// writing to the provided \p Buffer. When writing a subset of the index /// for a distributed backend, provide a \p ModuleToSummariesForIndex map. - IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index, + IndexBitcodeWriter(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder, + const ModuleSummaryIndex &Index, const std::map *ModuleToSummariesForIndex = nullptr) - : BitcodeWriterBase(Stream), Index(Index), + : BitcodeWriterBase(Stream, StrtabBuilder), Index(Index), ModuleToSummariesForIndex(ModuleToSummariesForIndex) { // Assign unique value ids to all summaries to be written, for use // in writing out the call graph edges. Save the mapping from GUID @@ -351,7 +364,8 @@ class IndexBitcodeWriter : public BitcodeWriterBase { /// Calls the callback for each value GUID and summary to be written to /// bitcode. This hides the details of whether they are being pulled from the /// entire index or just those in a provided ModuleToSummariesForIndex map. - void forEachSummary(std::function Callback) { + template + void forEachSummary(Functor Callback) { if (ModuleToSummariesForIndex) { for (auto &M : *ModuleToSummariesForIndex) for (auto &Summary : M.second) @@ -363,6 +377,29 @@ class IndexBitcodeWriter : public BitcodeWriterBase { } } + /// Calls the callback for each entry in the modulePaths StringMap that + /// should be written to the module path string table. This hides the details + /// of whether they are being pulled from the entire index or just those in a + /// provided ModuleToSummariesForIndex map. + template void forEachModule(Functor Callback) { + if (ModuleToSummariesForIndex) { + for (const auto &M : *ModuleToSummariesForIndex) { + const auto &MPI = Index.modulePaths().find(M.first); + if (MPI == Index.modulePaths().end()) { + // This should only happen if the bitcode file was empty, in which + // case we shouldn't be importing (the ModuleToSummariesForIndex + // would only include the module we are writing and index for). + assert(ModuleToSummariesForIndex->size() == 1); + continue; + } + Callback(*MPI); + } + } else { + for (const auto &MPSE : Index.modulePaths()) + Callback(MPSE); + } + } + /// Main entry point for writing a combined index to bitcode. void write(); @@ -370,26 +407,10 @@ class IndexBitcodeWriter : public BitcodeWriterBase { void writeModStrings(); void writeCombinedGlobalValueSummary(); - /// Indicates whether the provided \p ModulePath should be written into - /// the module string table, e.g. if full index written or if it is in - /// the provided subset. - bool doIncludeModule(StringRef ModulePath) { - return !ModuleToSummariesForIndex || - ModuleToSummariesForIndex->count(ModulePath); - } - - bool hasValueId(GlobalValue::GUID ValGUID) { - const auto &VMI = GUIDToValueIdMap.find(ValGUID); - return VMI != GUIDToValueIdMap.end(); - } - void assignValueId(GlobalValue::GUID ValGUID) { - unsigned &ValueId = GUIDToValueIdMap[ValGUID]; - if (ValueId == 0) - ValueId = ++GlobalValueId; - } - unsigned getValueId(GlobalValue::GUID ValGUID) { + Optional getValueId(GlobalValue::GUID ValGUID) { auto VMI = GUIDToValueIdMap.find(ValGUID); - assert(VMI != GUIDToValueIdMap.end()); + if (VMI == GUIDToValueIdMap.end()) + return None; return VMI->second; } std::map &valueIds() { return GUIDToValueIdMap; } @@ -469,14 +490,6 @@ static unsigned getEncodedOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid ordering"); } -static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) { - switch (SynchScope) { - case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD; - case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD; - } - llvm_unreachable("Invalid synch scope"); -} - static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, StringRef Str, unsigned AbbrevToUse) { SmallVector Vals; @@ -660,10 +673,12 @@ void ModuleBitcodeWriter::writeAttributeTable() { SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - const AttributeList &A = Attrs[i]; - for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) - Record.push_back( - VE.getAttributeGroupID({A.getSlotIndex(i), A.getSlotAttributes(i)})); + AttributeList AL = Attrs[i]; + for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) { + AttributeSet AS = AL.getAttributes(i); + if (AS.hasAttributes()) + Record.push_back(VE.getAttributeGroupID({i, AS})); + } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); @@ -870,7 +885,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) { uint64_t RawFlags = 0; RawFlags |= Flags.NotEligibleToImport; // bool - RawFlags |= (Flags.LiveRoot << 1); + RawFlags |= (Flags.Live << 1); // Linkage don't need to be remapped at that time for the summary. Any future // change to the getEncodedLinkage() function will need to be taken into // account here as well. @@ -933,11 +948,17 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) { llvm_unreachable("Invalid unnamed_addr"); } +size_t ModuleBitcodeWriter::addToStrtab(StringRef Str) { + if (GenerateHash) + Hasher.update(Str); + return StrtabBuilder.add(Str); +} + void ModuleBitcodeWriter::writeComdats() { SmallVector Vals; for (const Comdat *C : VE.getComdats()) { // COMDAT: [strtab offset, strtab size, selection_kind] - Vals.push_back(StrtabBuilder.add(C->getName())); + Vals.push_back(addToStrtab(C->getName())); Vals.push_back(C->getName().size()); Vals.push_back(getEncodedComdatSelectionKind(*C)); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); @@ -974,19 +995,18 @@ void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() { enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; /// Determine the encoding to use for the given string name and length. -static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { +static StringEncoding getStringEncoding(StringRef Str) { bool isChar6 = true; - for (const char *C = Str, *E = C + StrLen; C != E; ++C) { + for (char C : Str) { if (isChar6) - isChar6 = BitCodeAbbrevOp::isChar6(*C); - if ((unsigned char)*C & 128) + isChar6 = BitCodeAbbrevOp::isChar6(C); + if ((unsigned char)C & 128) // don't bother scanning the rest. return SE_Fixed8; } if (isChar6) return SE_Char6; - else - return SE_Fixed7; + return SE_Fixed7; } /// Emit top-level description of module, including target triple, inline asm, @@ -1079,8 +1099,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { SmallVector Vals; // Emit the module's source file name. { - StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(), - M.getSourceFileName().size()); + StringEncoding Bits = getStringEncoding(M.getSourceFileName()); BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); if (Bits == SE_Char6) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); @@ -1110,7 +1129,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat, attributes] - Vals.push_back(StrtabBuilder.add(GV.getName())); + Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); @@ -1149,7 +1168,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // linkage, paramattrs, alignment, section, visibility, gc, // unnamed_addr, prologuedata, dllstorageclass, comdat, // prefixdata, personalityfn] - Vals.push_back(StrtabBuilder.add(F.getName())); + Vals.push_back(addToStrtab(F.getName())); Vals.push_back(F.getName().size()); Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); @@ -1179,7 +1198,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { for (const GlobalAlias &A : M.aliases()) { // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage, // visibility, dllstorageclass, threadlocal, unnamed_addr] - Vals.push_back(StrtabBuilder.add(A.getName())); + Vals.push_back(addToStrtab(A.getName())); Vals.push_back(A.getName().size()); Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); @@ -1198,7 +1217,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { for (const GlobalIFunc &I : M.ifuncs()) { // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver // val#, linkage, visibility] - Vals.push_back(StrtabBuilder.add(I.getName())); + Vals.push_back(addToStrtab(I.getName())); Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); Vals.push_back(I.getType()->getAddressSpace()); @@ -1655,7 +1674,7 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.reserve(N->getElements().size() + 1); - const uint64_t Version = 2 << 1; + const uint64_t Version = 3 << 1; Record.push_back((uint64_t)N->isDistinct() | Version); Record.append(N->elements_begin(), N->elements_end()); @@ -1699,6 +1718,7 @@ void ModuleBitcodeWriter::writeDIImportedEntity( Record.push_back(VE.getMetadataOrNullID(N->getEntity())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getRawFile())); Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev); Record.clear(); @@ -2020,6 +2040,24 @@ void ModuleBitcodeWriter::writeOperandBundleTags() { Stream.ExitBlock(); } +void ModuleBitcodeWriter::writeSyncScopeNames() { + SmallVector SSNs; + M.getContext().getSyncScopeNames(SSNs); + if (SSNs.empty()) + return; + + Stream.EnterSubblock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID, 2); + + SmallVector Record; + for (auto SSN : SSNs) { + Record.append(SSN.begin(), SSN.end()); + Stream.EmitRecord(bitc::SYNC_SCOPE_NAME, Record, 0); + Record.clear(); + } + + Stream.ExitBlock(); +} + static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); @@ -2636,7 +2674,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast(I).getSynchScope())); + Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); } break; case Instruction::Store: @@ -2650,7 +2688,8 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast(I).getSynchScope())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); } break; case Instruction::AtomicCmpXchg: @@ -2662,7 +2701,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back( getEncodedOrdering(cast(I).getSuccessOrdering())); Vals.push_back( - getEncodedSynchScope(cast(I).getSynchScope())); + getEncodedSyncScopeID(cast(I).getSyncScopeID())); Vals.push_back( getEncodedOrdering(cast(I).getFailureOrdering())); Vals.push_back(cast(I).isWeak()); @@ -2676,12 +2715,12 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast(I).isVolatile()); Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); Vals.push_back( - getEncodedSynchScope(cast(I).getSynchScope())); + getEncodedSyncScopeID(cast(I).getSyncScopeID())); break; case Instruction::Fence: Code = bitc::FUNC_CODE_INST_FENCE; Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast(I).getSynchScope())); + Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); break; case Instruction::Call: { const CallInst &CI = cast(I); @@ -2796,8 +2835,7 @@ void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable( for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. - StringEncoding Bits = - getStringEncoding(Name.getKeyData(), Name.getKeyLength()); + StringEncoding Bits = getStringEncoding(Name.getKey()); unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; NameVals.push_back(VE.getValueID(Name.getValue())); @@ -3155,41 +3193,33 @@ void IndexBitcodeWriter::writeModStrings() { unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv)); SmallVector Vals; - for (const auto &MPSE : Index.modulePaths()) { - if (!doIncludeModule(MPSE.getKey())) - continue; - StringEncoding Bits = - getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); - unsigned AbbrevToUse = Abbrev8Bit; - if (Bits == SE_Char6) - AbbrevToUse = Abbrev6Bit; - else if (Bits == SE_Fixed7) - AbbrevToUse = Abbrev7Bit; - - Vals.push_back(MPSE.getValue().first); - - for (const auto P : MPSE.getKey()) - Vals.push_back((unsigned char)P); - - // Emit the finished record. - Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); - - Vals.clear(); - // Emit an optional hash for the module now - auto &Hash = MPSE.getValue().second; - bool AllZero = true; // Detect if the hash is empty, and do not generate it - for (auto Val : Hash) { - if (Val) - AllZero = false; - Vals.push_back(Val); - } - if (!AllZero) { - // Emit the hash record. - Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); - } + forEachModule( + [&](const StringMapEntry> &MPSE) { + StringRef Key = MPSE.getKey(); + const auto &Value = MPSE.getValue(); + StringEncoding Bits = getStringEncoding(Key); + unsigned AbbrevToUse = Abbrev8Bit; + if (Bits == SE_Char6) + AbbrevToUse = Abbrev6Bit; + else if (Bits == SE_Fixed7) + AbbrevToUse = Abbrev7Bit; + + Vals.push_back(Value.first); + Vals.append(Key.begin(), Key.end()); + + // Emit the finished record. + Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); + + // Emit an optional hash for the module now + const auto &Hash = Value.second; + if (llvm::any_of(Hash, [](uint32_t H) { return H; })) { + Vals.assign(Hash.begin(), Hash.end()); + // Emit the hash record. + Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); + } - Vals.clear(); - } + Vals.clear(); + }); Stream.ExitBlock(); } @@ -3306,7 +3336,15 @@ static const uint64_t INDEX_VERSION = 3; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { - Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4); + // By default we compile with ThinLTO if the module has a summary, but the + // client can request full LTO with a module flag. + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + Stream.EnterSubblock(IsThinLTO ? bitc::GLOBALVAL_SUMMARY_BLOCK_ID + : bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, + 4); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); @@ -3411,34 +3449,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); - // Create value IDs for undefined references. - forEachSummary([&](GVInfo I) { - if (auto *VS = dyn_cast(I.second)) { - for (auto &RI : VS->refs()) - assignValueId(RI.getGUID()); - return; - } - - auto *FS = dyn_cast(I.second); - if (!FS) - return; - for (auto &RI : FS->refs()) - assignValueId(RI.getGUID()); - - for (auto &EI : FS->calls()) { - GlobalValue::GUID GUID = EI.first.getGUID(); - if (!hasValueId(GUID)) { - // For SamplePGO, the indirect call targets for local functions will - // have its original name annotated in profile. We try to find the - // corresponding PGOFuncName as the GUID. - GUID = Index.getGUIDFromOriginalID(GUID); - if (GUID == 0 || !hasValueId(GUID)) - continue; - } - assignValueId(GUID); - } - }); - for (const auto &GVI : valueIds()) { Stream.EmitRecord(bitc::FS_VALUE_GUID, ArrayRef{GVI.second, GVI.first}); @@ -3512,9 +3522,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { GlobalValueSummary *S = I.second; assert(S); - assert(hasValueId(I.first)); - unsigned ValueId = getValueId(I.first); - SummaryToValueIdMap[S] = ValueId; + auto ValueId = getValueId(I.first); + assert(ValueId); + SummaryToValueIdMap[S] = *ValueId; if (auto *AS = dyn_cast(S)) { // Will process aliases as a post-pass because the reader wants all @@ -3524,11 +3534,14 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { } if (auto *VS = dyn_cast(S)) { - NameVals.push_back(ValueId); + NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(VS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); for (auto &RI : VS->refs()) { - NameVals.push_back(getValueId(RI.getGUID())); + auto RefValueId = getValueId(RI.getGUID()); + if (!RefValueId) + continue; + NameVals.push_back(*RefValueId); } // Emit the finished record. @@ -3542,15 +3555,22 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { auto *FS = cast(S); writeFunctionTypeMetadataRecords(Stream, FS); - NameVals.push_back(ValueId); + NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(FS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); - NameVals.push_back(FS->refs().size()); + // Fill in below + NameVals.push_back(0); + unsigned Count = 0; for (auto &RI : FS->refs()) { - NameVals.push_back(getValueId(RI.getGUID())); + auto RefValueId = getValueId(RI.getGUID()); + if (!RefValueId) + continue; + NameVals.push_back(*RefValueId); + Count++; } + NameVals[4] = Count; bool HasProfileData = false; for (auto &EI : FS->calls()) { @@ -3563,15 +3583,19 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { // If this GUID doesn't have a value id, it doesn't have a function // summary and we don't need to record any calls to it. GlobalValue::GUID GUID = EI.first.getGUID(); - if (!hasValueId(GUID)) { + auto CallValueId = getValueId(GUID); + if (!CallValueId) { // For SamplePGO, the indirect call targets for local functions will // have its original name annotated in profile. We try to find the // corresponding PGOFuncName as the GUID. GUID = Index.getGUIDFromOriginalID(GUID); - if (GUID == 0 || !hasValueId(GUID)) + if (GUID == 0) + continue; + CallValueId = getValueId(GUID); + if (!CallValueId) continue; } - NameVals.push_back(getValueId(GUID)); + NameVals.push_back(*CallValueId); if (HasProfileData) NameVals.push_back(static_cast(EI.second.Hotness)); } @@ -3602,6 +3626,24 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { MaybeEmitOriginalName(*AS); } + if (!Index.cfiFunctionDefs().empty()) { + for (auto &S : Index.cfiFunctionDefs()) { + NameVals.push_back(StrtabBuilder.add(S)); + NameVals.push_back(S.size()); + } + Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals); + NameVals.clear(); + } + + if (!Index.cfiFunctionDecls().empty()) { + for (auto &S : Index.cfiFunctionDecls()) { + NameVals.push_back(StrtabBuilder.add(S)); + NameVals.push_back(S.size()); + } + Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals); + NameVals.clear(); + } + Stream.ExitBlock(); } @@ -3633,7 +3675,6 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) { // Emit the module's hash. // MODULE_CODE_HASH: [5*i32] if (GenerateHash) { - SHA1 Hasher; uint32_t Vals[5]; Hasher.update(ArrayRef((const uint8_t *)&(Buffer)[BlockStartPos], Buffer.size() - BlockStartPos)); @@ -3692,6 +3733,7 @@ void ModuleBitcodeWriter::write() { writeUseListBlock(nullptr); writeOperandBundleTags(); + writeSyncScopeNames(); // Emit function bodies. DenseMap FunctionToBitcodeIndex; @@ -3807,6 +3849,38 @@ void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) { Stream->ExitBlock(); } +void BitcodeWriter::writeSymtab() { + assert(!WroteStrtab && !WroteSymtab); + + // If any module has module-level inline asm, we will require a registered asm + // parser for the target so that we can create an accurate symbol table for + // the module. + for (Module *M : Mods) { + if (M->getModuleInlineAsm().empty()) + continue; + + std::string Err; + const Triple TT(M->getTargetTriple()); + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + if (!T || !T->hasMCAsmParser()) + return; + } + + WroteSymtab = true; + SmallVector Symtab; + // The irsymtab::build function may be unable to create a symbol table if the + // module is malformed (e.g. it contains an invalid alias). Writing a symbol + // table is not required for correctness, but we still want to be able to + // write malformed modules to bitcode files, so swallow the error. + if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) { + consumeError(std::move(E)); + return; + } + + writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB, + {Symtab.data(), Symtab.size()}); +} + void BitcodeWriter::writeStrtab() { assert(!WroteStrtab); @@ -3830,12 +3904,29 @@ void BitcodeWriter::writeModule(const Module *M, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { + assert(!WroteStrtab); + + // The Mods vector is used by irsymtab::build, which requires non-const + // Modules in case it needs to materialize metadata. But the bitcode writer + // requires that the module is materialized, so we can cast to non-const here, + // after checking that it is in fact materialized. + assert(M->isMaterialized()); + Mods.push_back(const_cast(M)); + ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); ModuleWriter.write(); } +void BitcodeWriter::writeIndex( + const ModuleSummaryIndex *Index, + const std::map *ModuleToSummariesForIndex) { + IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index, + ModuleToSummariesForIndex); + IndexWriter.write(); +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, @@ -3854,6 +3945,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, BitcodeWriter Writer(Buffer); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); + Writer.writeSymtab(); Writer.writeStrtab(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) @@ -3887,11 +3979,9 @@ void llvm::WriteIndexToFile( SmallVector Buffer; Buffer.reserve(256 * 1024); - BitstreamWriter Stream(Buffer); - writeBitcodeHeader(Stream); - - IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex); - IndexWriter.write(); + BitcodeWriter Writer(Buffer); + Writer.writeIndex(&Index, ModuleToSummariesForIndex); + Writer.writeStrtab(); Out.write((char *)&Buffer.front(), Buffer.size()); } diff --git a/interpreter/llvm/src/lib/Bitcode/Writer/LLVMBuild.txt b/interpreter/llvm/src/lib/Bitcode/Writer/LLVMBuild.txt index a07c280fa9e3f..ef6dc9f901e2c 100644 --- a/interpreter/llvm/src/lib/Bitcode/Writer/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/Bitcode/Writer/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = BitWriter parent = Bitcode -required_libraries = Analysis Core MC Support +required_libraries = Analysis Core MC Object Support diff --git a/interpreter/llvm/src/lib/Bitcode/Writer/ValueEnumerator.cpp b/interpreter/llvm/src/lib/Bitcode/Writer/ValueEnumerator.cpp index fd76400331d95..bb626baabd129 100644 --- a/interpreter/llvm/src/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/interpreter/llvm/src/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -902,8 +902,11 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { } // Do lookups for all attribute groups. - for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { - IndexAndAttrSet Pair = {PAL.getSlotIndex(i), PAL.getSlotAttributes(i)}; + for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) { + AttributeSet AS = PAL.getAttributes(i); + if (!AS.hasAttributes()) + continue; + IndexAndAttrSet Pair = {i, AS}; unsigned &Entry = AttributeGroupMap[Pair]; if (Entry == 0) { AttributeGroups.push_back(Pair); diff --git a/interpreter/llvm/src/lib/CMakeLists.txt b/interpreter/llvm/src/lib/CMakeLists.txt index 76549540ce0f7..946067e6358f3 100644 --- a/interpreter/llvm/src/lib/CMakeLists.txt +++ b/interpreter/llvm/src/lib/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(IR) add_subdirectory(IRReader) add_subdirectory(CodeGen) +add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) add_subdirectory(Transforms) add_subdirectory(Linker) @@ -21,5 +22,6 @@ add_subdirectory(LineEditor) add_subdirectory(ProfileData) add_subdirectory(Fuzzer) add_subdirectory(Passes) -add_subdirectory(LibDriver) +add_subdirectory(ToolDrivers) add_subdirectory(XRay) +add_subdirectory(Testing) diff --git a/interpreter/llvm/src/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/interpreter/llvm/src/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 3a57772cc7f55..5abf50e5bd10c 100644 --- a/interpreter/llvm/src/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -128,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( } DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); - DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; - r = CriticalPathSet.find_next(r)) + DEBUG(for (unsigned r : CriticalPathSet.set_bits()) dbgs() << " " << TRI->getName(r)); DEBUG(dbgs() << '\n'); } @@ -166,7 +165,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; - if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg))) + if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; @@ -571,7 +570,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG({ dbgs() << " ::"; - for (int r = BV.find_first(); r != -1; r = BV.find_next(r)) + for (unsigned r : BV.set_bits()) dbgs() << " " << TRI->getName(r); dbgs() << "\n"; }); diff --git a/interpreter/llvm/src/lib/CodeGen/Analysis.cpp b/interpreter/llvm/src/lib/CodeGen/Analysis.cpp index 09a37a77e9fbc..c2aecc651b792 100644 --- a/interpreter/llvm/src/lib/CodeGen/Analysis.cpp +++ b/interpreter/llvm/src/lib/CodeGen/Analysis.cpp @@ -24,8 +24,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ARMException.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ARMException.cpp index 61149d9229b77..8b1376ab363d7 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -14,6 +14,7 @@ #include "DwarfException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7ddb86d80bf07..ff427c9a0d756 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/AsmPrinter.h" #include "AsmPrinterHandler.h" #include "CodeViewDebug.h" #include "DwarfDebug.h" @@ -19,18 +20,19 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" @@ -82,14 +84,12 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -628,12 +628,17 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value, /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::EmitFunctionHeader() { + const Function *F = MF->getFunction(); + + if (isVerbose()) + OutStreamer->GetCommentOS() + << "-- Begin function " + << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n'; + // Print out constants referenced by the function EmitConstantPool(); // Print the 'header' of function. - const Function *F = MF->getFunction(); - OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); @@ -946,6 +951,19 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { MCConstantExpr::create(FrameOffset, OutContext)); } +static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, + MachineModuleInfo *MMI) { + if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo()) + return true; + + // We might emit an EH table that uses function begin and end labels even if + // we don't have any landingpads. + if (!MF.getFunction()->hasPersonalityFn()) + return false; + return !isNoOpWithoutInvoke( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { @@ -1073,8 +1091,8 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); - if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() || - MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { + if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) || + MAI->hasDotTypeDotSizeDirective()) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->EmitLabel(CurrentFnEnd); @@ -1107,6 +1125,9 @@ void AsmPrinter::EmitFunctionBody() { HI.Handler->endFunction(MF); } + if (isVerbose()) + OutStreamer->GetCommentOS() << "-- End function\n"; + OutStreamer->AddBlankLine(); } @@ -1267,11 +1288,7 @@ bool AsmPrinter::doFinalization(Module &M) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - // Emit module flags. - SmallVector ModuleFlags; - M.getModuleFlagsMetadata(ModuleFlags); - if (!ModuleFlags.empty()) - TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM); + TLOF.emitModuleMetadata(*OutStreamer, M, TM); if (TM.getTargetTriple().isOSBinFormatELF()) { MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo(); @@ -1396,8 +1413,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); - if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() || - MF.hasEHFunclets() || NeedsLocalForSize) { + if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -2787,26 +2803,24 @@ void AsmPrinter::emitXRayTable() { } // Before we switch over, we force a reference to a label inside the - // xray_instr_map and xray_fn_idx sections. Since this function is always - // called just before the function's end, we assume that this is happening - // after the last return instruction. We also use the synthetic label in the - // xray_inster_map as a delimeter for the range of sleds for this function in - // the index. + // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept + // live by the linker if the function is not garbage-collected. Since this + // function is always called just before the function's end, we assume that + // this is happening after the last return instruction. auto WordSizeBytes = MAI->getCodePointerSize(); - MCSymbol *SledsStart = OutContext.createTempSymbol("xray_synthetic_", true); MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the // range of sleds associated with a function. + MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); OutStreamer->SwitchSection(InstMap); OutStreamer->EmitLabel(SledsStart); for (const auto &Sled : Sleds) Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); - MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_synthetic_end", true); + MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); OutStreamer->EmitLabel(SledsEnd); // We then emit a single entry in the index per function. We use the symbols diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 0185c380cc394..0edf9051d342e 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -15,6 +15,7 @@ #include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index a0bf1632dff39..eae79ad101d38 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 9d60fff3bccce..a81d56e9618bf 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1,4 +1,4 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===// // // The LLVM Compiler Infrastructure // @@ -12,37 +12,82 @@ //===----------------------------------------------------------------------===// #include "CodeViewDebug.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/Line.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/DebugInfo/CodeView/TypeTableCollection.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::codeview; CodeViewDebug::CodeViewDebug(AsmPrinter *AP) - : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(), - TypeTable(Allocator), CurFn(nullptr) { + : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || @@ -178,7 +223,8 @@ static const DISubprogram *getQualifiedNameComponents( static std::string getQualifiedName(ArrayRef QualifiedNameComponents, StringRef TypeName) { std::string FullyQualifiedName; - for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) { + for (StringRef QualifiedNameComponent : + llvm::reverse(QualifiedNameComponents)) { FullyQualifiedName.append(QualifiedNameComponent); FullyQualifiedName.append("::"); } @@ -319,7 +365,7 @@ static void addLocIfNotPresent(SmallVectorImpl &Locs, void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, const MachineFunction *MF) { // Skip this instruction if it has the same location as the previous one. - if (DL == CurFn->LastLoc) + if (!DL || DL == PrevInstLoc) return; const DIScope *Scope = DL.get()->getScope(); @@ -339,11 +385,11 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, if (!CurFn->HaveLineInfo) CurFn->HaveLineInfo = true; unsigned FileId = 0; - if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile()) + if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile()) FileId = CurFn->LastFileId; else FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile()); - CurFn->LastLoc = DL; + PrevInstLoc = DL; unsigned FuncId = CurFn->FuncId; if (const DILocation *SiteLoc = DL->getInlinedAt()) { @@ -393,7 +439,7 @@ void CodeViewDebug::endModule() { // subprograms. switchToDebugSectionForSymbol(nullptr); - MCSymbol *CompilerInfo = beginCVSubsection(ModuleDebugFragmentKind::Symbols); + MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols); emitCompilerInformation(); endCVSubsection(CompilerInfo); @@ -417,7 +463,7 @@ void CodeViewDebug::endModule() { // Emit UDT records for any types used by global variables. if (!GlobalUDTs.empty()) { - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); emitDebugInfoForUDTs(GlobalUDTs); endCVSubsection(SymbolsEnd); } @@ -469,17 +515,21 @@ void CodeViewDebug::emitTypeInformation() { CommentPrefix += ' '; } - TypeDatabase TypeDB(TypeTable.records().size()); - CVTypeDumper CVTD(TypeDB); - TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef Record) { + TypeTableCollection Table(TypeTable.records()); + Optional B = Table.getFirst(); + while (B) { + // This will fail if the record data is invalid. + CVType Record = Table.getType(*B); + if (OS.isVerboseAsm()) { // Emit a block comment describing the type record for readability. SmallString<512> CommentBlock; raw_svector_ostream CommentOS(CommentBlock); ScopedPrinter SP(CommentOS); SP.setPrefix(CommentPrefix); - TypeDumpVisitor TDV(TypeDB, &SP, false); - Error E = CVTD.dump(Record, TDV); + TypeDumpVisitor TDV(Table, &SP, false); + + Error E = codeview::visitTypeRecord(Record, *B, TDV); if (E) { logAllUnhandledErrors(std::move(E), errs(), "error: "); llvm_unreachable("produced malformed type record"); @@ -489,29 +539,10 @@ void CodeViewDebug::emitTypeInformation() { // newline. OS.emitRawComment( CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); - } else { -#ifndef NDEBUG - // Assert that the type data is valid even if we aren't dumping - // comments. The MSVC linker doesn't do much type record validation, - // so the first link of an invalid type record can succeed while - // subsequent links will fail with LNK1285. - BinaryByteStream Stream(Record, llvm::support::little); - CVTypeArray Types; - BinaryStreamReader Reader(Stream); - Error E = Reader.readArray(Types, Reader.getLength()); - if (!E) { - TypeVisitorCallbacks C; - E = CVTypeVisitor(C).visitTypeStream(Types); - } - if (E) { - logAllUnhandledErrors(std::move(E), errs(), "error: "); - llvm_unreachable("produced malformed type record"); - } -#endif } - StringRef S(reinterpret_cast(Record.data()), Record.size()); - OS.EmitBinaryData(S); - }); + OS.EmitBinaryData(Record.str_data()); + B = Table.getNext(*B); + } } namespace { @@ -586,7 +617,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) { } } -} // anonymous namespace +} // end anonymous namespace void CodeViewDebug::emitCompilerInformation() { MCContext &Context = MMI->getContext(); @@ -645,8 +676,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() { return; OS.AddComment("Inlinee lines subsection"); - MCSymbol *InlineEnd = - beginCVSubsection(ModuleDebugFragmentKind::InlineeLines); + MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines); // We don't provide any extra file info. // FIXME: Find out if debuggers use this info. @@ -767,11 +797,11 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // If our DISubprogram name is empty, use the mangled name. if (FuncName.empty()) - FuncName = GlobalValue::getRealLinkageName(GV->getName()); + FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); { MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(), *ProcRecordEnd = MMI->getContext().createTempSymbol(); @@ -1040,11 +1070,11 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { bool EmptyPrologue = true; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { - if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { PrologEndLoc = MI.getDebugLoc(); break; - } else if (!MI.isDebugValue()) { + } else if (!MI.isMetaInstruction()) { EmptyPrologue = false; } } @@ -1577,7 +1607,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { EnumeratorCount++; } } - FTI = FLRB.end(); + FTI = FLRB.end(true); } std::string FullName = getFullyQualifiedName(Ty); @@ -1597,11 +1627,11 @@ struct llvm::ClassInfo { uint64_t BaseOffset; }; // [MemberInfo] - typedef std::vector MemberList; + using MemberList = std::vector; - typedef TinyPtrVector MethodsList; + using MethodsList = TinyPtrVector; // MethodName -> MethodsList - typedef MapVector MethodsMap; + using MethodsMap = MapVector; /// Base classes. std::vector Inheritance; @@ -1866,7 +1896,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { translateMethodOptionFlags(SP), VFTableOffset, Name)); MemberCount++; } - assert(Methods.size() > 0 && "Empty methods map entry"); + assert(!Methods.empty() && "Empty methods map entry"); if (Methods.size() == 1) FLBR.writeMemberType(Methods[0]); else { @@ -1884,7 +1914,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { MemberCount++; } - TypeIndex FieldTI = FLBR.end(); + TypeIndex FieldTI = FLBR.end(true); return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount, !Info.NestedClasses.empty()); } @@ -2120,13 +2150,27 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { if (!Asm || !CurFn || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) return; + + // If the first instruction of a new MBB has no location, find the first + // instruction with a location and use that. DebugLoc DL = MI->getDebugLoc(); - if (DL == PrevInstLoc || !DL) + if (!DL && MI->getParent() != PrevInstBB) { + for (const auto &NextMI : *MI->getParent()) { + DL = NextMI.getDebugLoc(); + if (DL) + break; + } + } + PrevInstBB = MI->getParent(); + + // If we still don't have a debug location, don't record a location. + if (!DL) return; + maybeRecordLocation(DL, Asm->MF); } -MCSymbol *CodeViewDebug::beginCVSubsection(ModuleDebugFragmentKind Kind) { +MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) { MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), *EndLabel = MMI->getContext().createTempSymbol(); OS.EmitIntValue(unsigned(Kind), 4); @@ -2186,7 +2230,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() { if (!GV->hasComdat() && !GV->isDeclarationForLinker()) { if (!EndLabel) { OS.AddComment("Symbol subsection for globals"); - EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols); + EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); } // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV)); @@ -2202,9 +2246,9 @@ void CodeViewDebug::emitDebugInfoForGlobals() { if (GV->hasComdat()) { MCSymbol *GVSym = Asm->getSymbol(GV); OS.AddComment("Symbol subsection for " + - Twine(GlobalValue::getRealLinkageName(GV->getName()))); + Twine(GlobalValue::dropLLVMManglingEscape(GV->getName()))); switchToDebugSectionForSymbol(GVSym); - EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols); + EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym); endCVSubsection(EndLabel); diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 46b2daa1e0071..fd8f60425c240 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -1,4 +1,4 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===// +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,29 +14,44 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H +#include "DbgValueHistoryCalculator.h" #include "DebugHandlerBase.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" +#include +#include +#include +#include +#include +#include +#include namespace llvm { -class StringRef; -class LexicalScope; struct ClassInfo; +class StringRef; +class AsmPrinter; +class Function; +class GlobalVariable; +class MCSectionCOFF; +class MCStreamer; +class MCSymbol; +class MachineFunction; /// \brief Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { MCStreamer &OS; - llvm::BumpPtrAllocator Allocator; + BumpPtrAllocator Allocator; codeview::TypeTableBuilder TypeTable; /// Represents the most general definition range. @@ -103,14 +118,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector Locals; - DebugLoc LastLoc; const MCSymbol *Begin = nullptr; const MCSymbol *End = nullptr; unsigned FuncId = 0; unsigned LastFileId = 0; bool HaveLineInfo = false; }; - FunctionInfo *CurFn; + FunctionInfo *CurFn = nullptr; /// The set of comdat .debug$S sections that we've seen so far. Each section /// must start with a magic version number that must only be emitted once. @@ -176,8 +190,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { std::vector> LocalUDTs, GlobalUDTs; - typedef std::map FileToFilepathMapTy; + using FileToFilepathMapTy = std::map; FileToFilepathMapTy FileToFilepathMap; + StringRef getFullFilepath(const DIFile *S); unsigned maybeRecordFile(const DIFile *F); @@ -216,14 +231,14 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Opens a subsection of the given kind in a .debug$S codeview section. /// Returns an end label for use with endCVSubsection when the subsection is /// finished. - MCSymbol *beginCVSubsection(codeview::ModuleDebugFragmentKind Kind); + MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind); void endCVSubsection(MCSymbol *EndLabel); void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, const InlineSite &Site); - typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + using InlinedVariable = DbgValueHistoryMap::InlinedVariable; void collectVariableInfo(const DISubprogram *SP); @@ -309,7 +324,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { public: CodeViewDebug(AsmPrinter *Asm); - void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} + void setSymbolSize(const MCSymbol *, uint64_t) override {} /// \brief Emit the COFF section that holds the line table information. void endModule() override; @@ -317,6 +332,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// \brief Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; }; -} // End of namespace llvm -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIE.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIE.cpp index 30bfd7c94e68b..886e6e264b3ec 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIE.cpp @@ -105,7 +105,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { } LLVM_DUMP_METHOD -void DIEAbbrev::print(raw_ostream &O) { +void DIEAbbrev::print(raw_ostream &O) const { O << "Abbreviation @" << format("0x%lx", (long)(intptr_t)this) << " " @@ -128,7 +128,7 @@ void DIEAbbrev::print(raw_ostream &O) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void DIEAbbrev::dump() { +LLVM_DUMP_METHOD void DIEAbbrev::dump() const { print(dbgs()); } #endif @@ -268,7 +268,7 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void DIE::dump() { +LLVM_DUMP_METHOD void DIE::dump() const { print(dbgs()); } #endif diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.cpp index 8e3b88d0af0e5..15ade3c96dfea 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "ByteStreamer.h" #include "DIEHash.h" +#include "ByteStreamer.h" #include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" @@ -116,65 +116,17 @@ void DIEHash::addParentContext(const DIE &Parent) { // Collect all of the attributes for a particular DIE in single structure. void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { -#define COLLECT_ATTR(NAME) \ - case dwarf::NAME: \ - Attrs.NAME = V; \ - break for (const auto &V : Die.values()) { DEBUG(dbgs() << "Attribute: " << dwarf::AttributeString(V.getAttribute()) << " added.\n"); switch (V.getAttribute()) { - COLLECT_ATTR(DW_AT_name); - COLLECT_ATTR(DW_AT_accessibility); - COLLECT_ATTR(DW_AT_address_class); - COLLECT_ATTR(DW_AT_allocated); - COLLECT_ATTR(DW_AT_artificial); - COLLECT_ATTR(DW_AT_associated); - COLLECT_ATTR(DW_AT_binary_scale); - COLLECT_ATTR(DW_AT_bit_offset); - COLLECT_ATTR(DW_AT_bit_size); - COLLECT_ATTR(DW_AT_bit_stride); - COLLECT_ATTR(DW_AT_byte_size); - COLLECT_ATTR(DW_AT_byte_stride); - COLLECT_ATTR(DW_AT_const_expr); - COLLECT_ATTR(DW_AT_const_value); - COLLECT_ATTR(DW_AT_containing_type); - COLLECT_ATTR(DW_AT_count); - COLLECT_ATTR(DW_AT_data_bit_offset); - COLLECT_ATTR(DW_AT_data_location); - COLLECT_ATTR(DW_AT_data_member_location); - COLLECT_ATTR(DW_AT_decimal_scale); - COLLECT_ATTR(DW_AT_decimal_sign); - COLLECT_ATTR(DW_AT_default_value); - COLLECT_ATTR(DW_AT_digit_count); - COLLECT_ATTR(DW_AT_discr); - COLLECT_ATTR(DW_AT_discr_list); - COLLECT_ATTR(DW_AT_discr_value); - COLLECT_ATTR(DW_AT_encoding); - COLLECT_ATTR(DW_AT_enum_class); - COLLECT_ATTR(DW_AT_endianity); - COLLECT_ATTR(DW_AT_explicit); - COLLECT_ATTR(DW_AT_is_optional); - COLLECT_ATTR(DW_AT_location); - COLLECT_ATTR(DW_AT_lower_bound); - COLLECT_ATTR(DW_AT_mutable); - COLLECT_ATTR(DW_AT_ordering); - COLLECT_ATTR(DW_AT_picture_string); - COLLECT_ATTR(DW_AT_prototyped); - COLLECT_ATTR(DW_AT_small); - COLLECT_ATTR(DW_AT_segment); - COLLECT_ATTR(DW_AT_string_length); - COLLECT_ATTR(DW_AT_threads_scaled); - COLLECT_ATTR(DW_AT_upper_bound); - COLLECT_ATTR(DW_AT_use_location); - COLLECT_ATTR(DW_AT_use_UTF8); - COLLECT_ATTR(DW_AT_variable_parameter); - COLLECT_ATTR(DW_AT_virtuality); - COLLECT_ATTR(DW_AT_visibility); - COLLECT_ATTR(DW_AT_vtable_elem_location); - COLLECT_ATTR(DW_AT_type); +#define HANDLE_DIE_HASH_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME = V; \ + break; +#include "DIEHashAttributes.def" default: break; } @@ -366,62 +318,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { // Go through the attributes from \param Attrs in the order specified in 7.27.4 // and hash them. void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { -#define ADD_ATTR(ATTR) \ +#define HANDLE_DIE_HASH_ATTR(NAME) \ { \ - if (ATTR) \ - hashAttribute(ATTR, Tag); \ + if (Attrs.NAME) \ + hashAttribute(Attrs.NAME, Tag); \ } - - ADD_ATTR(Attrs.DW_AT_name); - ADD_ATTR(Attrs.DW_AT_accessibility); - ADD_ATTR(Attrs.DW_AT_address_class); - ADD_ATTR(Attrs.DW_AT_allocated); - ADD_ATTR(Attrs.DW_AT_artificial); - ADD_ATTR(Attrs.DW_AT_associated); - ADD_ATTR(Attrs.DW_AT_binary_scale); - ADD_ATTR(Attrs.DW_AT_bit_offset); - ADD_ATTR(Attrs.DW_AT_bit_size); - ADD_ATTR(Attrs.DW_AT_bit_stride); - ADD_ATTR(Attrs.DW_AT_byte_size); - ADD_ATTR(Attrs.DW_AT_byte_stride); - ADD_ATTR(Attrs.DW_AT_const_expr); - ADD_ATTR(Attrs.DW_AT_const_value); - ADD_ATTR(Attrs.DW_AT_containing_type); - ADD_ATTR(Attrs.DW_AT_count); - ADD_ATTR(Attrs.DW_AT_data_bit_offset); - ADD_ATTR(Attrs.DW_AT_data_location); - ADD_ATTR(Attrs.DW_AT_data_member_location); - ADD_ATTR(Attrs.DW_AT_decimal_scale); - ADD_ATTR(Attrs.DW_AT_decimal_sign); - ADD_ATTR(Attrs.DW_AT_default_value); - ADD_ATTR(Attrs.DW_AT_digit_count); - ADD_ATTR(Attrs.DW_AT_discr); - ADD_ATTR(Attrs.DW_AT_discr_list); - ADD_ATTR(Attrs.DW_AT_discr_value); - ADD_ATTR(Attrs.DW_AT_encoding); - ADD_ATTR(Attrs.DW_AT_enum_class); - ADD_ATTR(Attrs.DW_AT_endianity); - ADD_ATTR(Attrs.DW_AT_explicit); - ADD_ATTR(Attrs.DW_AT_is_optional); - ADD_ATTR(Attrs.DW_AT_location); - ADD_ATTR(Attrs.DW_AT_lower_bound); - ADD_ATTR(Attrs.DW_AT_mutable); - ADD_ATTR(Attrs.DW_AT_ordering); - ADD_ATTR(Attrs.DW_AT_picture_string); - ADD_ATTR(Attrs.DW_AT_prototyped); - ADD_ATTR(Attrs.DW_AT_small); - ADD_ATTR(Attrs.DW_AT_segment); - ADD_ATTR(Attrs.DW_AT_string_length); - ADD_ATTR(Attrs.DW_AT_threads_scaled); - ADD_ATTR(Attrs.DW_AT_upper_bound); - ADD_ATTR(Attrs.DW_AT_use_location); - ADD_ATTR(Attrs.DW_AT_use_UTF8); - ADD_ATTR(Attrs.DW_AT_variable_parameter); - ADD_ATTR(Attrs.DW_AT_virtuality); - ADD_ATTR(Attrs.DW_AT_visibility); - ADD_ATTR(Attrs.DW_AT_vtable_elem_location); - ADD_ATTR(Attrs.DW_AT_type); - +#include "DIEHashAttributes.def" // FIXME: Add the extended attributes. } @@ -478,10 +380,12 @@ void DIEHash::computeHash(const DIE &Die) { /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE /// with the inclusion of the full CU and all top level CU entities. // TODO: Initialize the type chain at 0 instead of 1 for CU signatures. -uint64_t DIEHash::computeCUSignature(const DIE &Die) { +uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) { Numbering.clear(); Numbering[&Die] = 1; + if (!DWOName.empty()) + Hash.update(DWOName); // Hash the DIE. computeHash(Die); diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.h index 996cd7ef3d2e5..29337ae38a996 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHash.h @@ -28,64 +28,15 @@ class CompileUnit; class DIEHash { // Collection of all attributes used in hashing a particular DIE. struct DIEAttrs { - DIEValue DW_AT_name; - DIEValue DW_AT_accessibility; - DIEValue DW_AT_address_class; - DIEValue DW_AT_allocated; - DIEValue DW_AT_artificial; - DIEValue DW_AT_associated; - DIEValue DW_AT_binary_scale; - DIEValue DW_AT_bit_offset; - DIEValue DW_AT_bit_size; - DIEValue DW_AT_bit_stride; - DIEValue DW_AT_byte_size; - DIEValue DW_AT_byte_stride; - DIEValue DW_AT_const_expr; - DIEValue DW_AT_const_value; - DIEValue DW_AT_containing_type; - DIEValue DW_AT_count; - DIEValue DW_AT_data_bit_offset; - DIEValue DW_AT_data_location; - DIEValue DW_AT_data_member_location; - DIEValue DW_AT_decimal_scale; - DIEValue DW_AT_decimal_sign; - DIEValue DW_AT_default_value; - DIEValue DW_AT_digit_count; - DIEValue DW_AT_discr; - DIEValue DW_AT_discr_list; - DIEValue DW_AT_discr_value; - DIEValue DW_AT_encoding; - DIEValue DW_AT_enum_class; - DIEValue DW_AT_endianity; - DIEValue DW_AT_explicit; - DIEValue DW_AT_is_optional; - DIEValue DW_AT_location; - DIEValue DW_AT_lower_bound; - DIEValue DW_AT_mutable; - DIEValue DW_AT_ordering; - DIEValue DW_AT_picture_string; - DIEValue DW_AT_prototyped; - DIEValue DW_AT_small; - DIEValue DW_AT_segment; - DIEValue DW_AT_string_length; - DIEValue DW_AT_threads_scaled; - DIEValue DW_AT_upper_bound; - DIEValue DW_AT_use_location; - DIEValue DW_AT_use_UTF8; - DIEValue DW_AT_variable_parameter; - DIEValue DW_AT_virtuality; - DIEValue DW_AT_visibility; - DIEValue DW_AT_vtable_elem_location; - DIEValue DW_AT_type; - - // Insert any additional ones here... +#define HANDLE_DIE_HASH_ATTR(NAME) DIEValue NAME; +#include "DIEHashAttributes.def" }; public: DIEHash(AsmPrinter *A = nullptr) : AP(A) {} /// \brief Computes the CU signature. - uint64_t computeCUSignature(const DIE &Die); + uint64_t computeCUSignature(StringRef DWOName, const DIE &Die); /// \brief Computes the type signature. uint64_t computeTypeSignature(const DIE &Die); diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHashAttributes.def new file mode 100644 index 0000000000000..28a02390fccb6 --- /dev/null +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DIEHashAttributes.def @@ -0,0 +1,55 @@ +#ifndef HANDLE_DIE_HASH_ATTR +#error "Missing macro definition of HANDLE_DIE_HASH_ATTR" +#endif + +HANDLE_DIE_HASH_ATTR(DW_AT_name) +HANDLE_DIE_HASH_ATTR(DW_AT_accessibility) +HANDLE_DIE_HASH_ATTR(DW_AT_address_class) +HANDLE_DIE_HASH_ATTR(DW_AT_allocated) +HANDLE_DIE_HASH_ATTR(DW_AT_artificial) +HANDLE_DIE_HASH_ATTR(DW_AT_associated) +HANDLE_DIE_HASH_ATTR(DW_AT_binary_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_size) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_size) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_const_expr) +HANDLE_DIE_HASH_ATTR(DW_AT_const_value) +HANDLE_DIE_HASH_ATTR(DW_AT_containing_type) +HANDLE_DIE_HASH_ATTR(DW_AT_count) +HANDLE_DIE_HASH_ATTR(DW_AT_data_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_data_location) +HANDLE_DIE_HASH_ATTR(DW_AT_data_member_location) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_sign) +HANDLE_DIE_HASH_ATTR(DW_AT_default_value) +HANDLE_DIE_HASH_ATTR(DW_AT_digit_count) +HANDLE_DIE_HASH_ATTR(DW_AT_discr) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_list) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_value) +HANDLE_DIE_HASH_ATTR(DW_AT_encoding) +HANDLE_DIE_HASH_ATTR(DW_AT_enum_class) +HANDLE_DIE_HASH_ATTR(DW_AT_endianity) +HANDLE_DIE_HASH_ATTR(DW_AT_explicit) +HANDLE_DIE_HASH_ATTR(DW_AT_is_optional) +HANDLE_DIE_HASH_ATTR(DW_AT_location) +HANDLE_DIE_HASH_ATTR(DW_AT_lower_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_mutable) +HANDLE_DIE_HASH_ATTR(DW_AT_ordering) +HANDLE_DIE_HASH_ATTR(DW_AT_picture_string) +HANDLE_DIE_HASH_ATTR(DW_AT_prototyped) +HANDLE_DIE_HASH_ATTR(DW_AT_small) +HANDLE_DIE_HASH_ATTR(DW_AT_segment) +HANDLE_DIE_HASH_ATTR(DW_AT_string_length) +HANDLE_DIE_HASH_ATTR(DW_AT_threads_scaled) +HANDLE_DIE_HASH_ATTR(DW_AT_upper_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_use_location) +HANDLE_DIE_HASH_ATTR(DW_AT_use_UTF8) +HANDLE_DIE_HASH_ATTR(DW_AT_variable_parameter) +HANDLE_DIE_HASH_ATTR(DW_AT_virtuality) +HANDLE_DIE_HASH_ATTR(DW_AT_visibility) +HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location) +HANDLE_DIE_HASH_ATTR(DW_AT_type) + +#undef HANDLE_DIE_HASH_ATTR diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 22fd7bb460560..c2ad9db81cfd9 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -194,6 +194,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // some variables. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg()) { + // Ignore call instructions that claim to clobber SP. The AArch64 + // backend does this for aggregate function arguments. + if (MI.isCall() && MO.getReg() == SP) + continue; // If this is a virtual register, only clobber it since it doesn't // have aliases. if (TRI->isVirtualRegister(MO.getReg())) @@ -209,8 +213,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, } else if (MO.isRegMask()) { // If this is a register mask operand, clobber all debug values in // non-CSRs. - for (int I = ChangingRegs.find_first(); I != -1; - I = ChangingRegs.find_next(I)) { + for (unsigned I : ChangingRegs.set_bits()) { // Don't consider SP to be clobbered by register masks. if (unsigned(I) != SP && TRI->isPhysicalRegister(I) && MO.clobbersPhysReg(I)) { diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 826162ad47c45..0971c5942203c 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -115,7 +115,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { return getBaseTypeSize(BaseType); } -bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { +static bool hasDebugInfo(const MachineModuleInfo *MMI, + const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return false; auto *SP = MF->getFunction()->getSubprogram(); @@ -223,9 +224,9 @@ void DebugHandlerBase::endInstruction() { return; assert(CurMI != nullptr); - // Don't create a new label after DBG_VALUE instructions. - // They don't generate code. - if (!CurMI->isDebugValue()) { + // Don't create a new label after DBG_VALUE and other instructions that don't + // generate code. + if (!CurMI->isMetaInstruction()) { PrevLabel = nullptr; PrevInstBB = CurMI->getParent(); } diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugLocStream.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugLocStream.h index 3656e9d950992..0c551dfff9ccb 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -10,9 +10,9 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H +#include "ByteStreamer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "ByteStreamer.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 05ac1cb02f760..b1ef8cfe989d0 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -16,12 +16,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index e08306b001fbf..dd7f7931b06b8 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -14,6 +14,7 @@ #include "DwarfException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -28,7 +29,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index e172712cf8894..676c48fe5c678 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -245,17 +245,6 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { CURanges.back().setEnd(Range.getEnd()); } -DIE::value_iterator -DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label, const MCSymbol *Sec) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - return addLabel(Die, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - Label); - return addSectionDelta(Die, Attribute, Label, Sec); -} - void DwarfCompileUnit::initStmtList() { // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = @@ -380,15 +369,6 @@ void DwarfCompileUnit::constructScopeDIE( FinalChildren.push_back(std::move(ScopeDIE)); } -DIE::value_iterator -DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo) { - return Die.addValue(DIEValueAllocator, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - new (DIEValueAllocator) DIEDelta(Hi, Lo)); -} - void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector Range) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); @@ -552,7 +532,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); DwarfExpr.addFragmentOffset(Expr); SmallVector Ops; - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); Ops.append(Expr->elements_begin(), Expr->elements_end()); DIExpressionCursor Cursor(Ops); @@ -684,8 +664,9 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( else EntityDie = getDIE(Entity); assert(EntityDie); - addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(), - Module->getScope()->getDirectory()); + auto *File = Module->getFile(); + addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "", + File ? File->getDirectory() : ""); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); StringRef Name = Module->getName(); if (!Name.empty()) @@ -760,7 +741,7 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) { /// addGlobalName - Add a new global name to the compile unit. void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Name.str(); GlobalNames[FullName] = &Die; @@ -768,7 +749,7 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Name.str(); // Insert, allowing the entry to remain as-is if it's already present @@ -781,7 +762,7 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, /// Add a new global type to the unit. void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); GlobalTypes[FullName] = &Die; @@ -789,7 +770,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); // Insert, allowing the entry to remain as-is if it's already present @@ -821,7 +802,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, SmallVector Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } DIExpressionCursor Cursor(Ops); @@ -850,7 +831,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, SmallVector Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 77e9e671529f5..e386727928673 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,8 +15,8 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DwarfUnit.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/Support/Dwarf.h" namespace llvm { @@ -77,8 +77,6 @@ class DwarfCompileUnit final : public DwarfUnit { bool isDwoUnit() const override; - bool includeMinimalInlineScopes() const; - DenseMap &getAbstractSPDies() { if (isDwoUnit() && !DD->shareAcrossDWOCUs()) return AbstractSPDies; @@ -101,6 +99,8 @@ class DwarfCompileUnit final : public DwarfUnit { return Skeleton; } + bool includeMinimalInlineScopes() const; + void initStmtList(); /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. @@ -127,10 +127,6 @@ class DwarfCompileUnit final : public DwarfUnit { void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label); - /// addSectionDelta - Add a label delta attribute data and value. - DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo); - DwarfCompileUnit &getCU() override { return *this; } unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; @@ -151,12 +147,6 @@ class DwarfCompileUnit final : public DwarfUnit { void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); - /// addSectionLabel - Add a Dwarf section label attribute data and value. - /// - DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label, - const MCSymbol *Sec); - /// \brief Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 3410b98d77766..f1b4d9f20ca96 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -38,7 +39,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" @@ -252,12 +252,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Handle split DWARF. HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty(); - // Pubnames/pubtypes on by default for GDB. - if (DwarfPubSections == Default) - HasDwarfPubSections = tuneForGDB(); - else - HasDwarfPubSections = DwarfPubSections == Enable; - // SCE defaults to linkage names only for abstract subprograms. if (DwarfLinkageNames == DefaultLinkageNames) UseAllLinkageNames = !tuneForSCE(); @@ -380,19 +374,35 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - auto &CU = *CUMap.lookup(SP->getUnit()); - if (auto *SkelCU = CU.getSkeleton()) { - (shareAcrossDWOCUs() ? CU : SrcCU) - .constructAbstractSubprogramScopeDIE(Scope); - if (CU.getCUNode()->getSplitDebugInlining()) - SkelCU->constructAbstractSubprogramScopeDIE(Scope); - } else { - CU.constructAbstractSubprogramScopeDIE(Scope); + if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining()) + // Avoid building the original CU if it won't be used + SrcCU.constructAbstractSubprogramScopeDIE(Scope); + else { + auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + if (auto *SkelCU = CU.getSkeleton()) { + (shareAcrossDWOCUs() ? CU : SrcCU) + .constructAbstractSubprogramScopeDIE(Scope); + if (CU.getCUNode()->getSplitDebugInlining()) + SkelCU->constructAbstractSubprogramScopeDIE(Scope); + } else + CU.constructAbstractSubprogramScopeDIE(Scope); } } -void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { - if (!GenerateGnuPubSections) +bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const { + // Opting in to GNU Pubnames/types overrides the default to ensure these are + // generated for things like Gold's gdb_index generation. + if (GenerateGnuPubSections) + return true; + + if (DwarfPubSections == Default) + return tuneForGDB() && !includeMinimalInlineScopes; + + return DwarfPubSections == Enable; +} + +void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const { + if (!hasDwarfPubSections(U.includeMinimalInlineScopes())) return; U.addFlag(D, dwarf::DW_AT_GNU_pubnames); @@ -401,7 +411,9 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & -DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { +DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { + if (auto *CU = CUMap.lookup(DIUnit)) + return *CU; StringRef FN = DIUnit->getFilename(); CompilationDir = DIUnit->getDirectory(); @@ -534,7 +546,12 @@ void DwarfDebug::beginModule() { } for (DICompileUnit *CUNode : M->debug_compile_units()) { - DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); + if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() && + CUNode->getGlobalVariables().empty() && + CUNode->getImportedEntities().empty() && CUNode->getMacros().empty()) + continue; + + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) CU.addImportedEntity(IE); @@ -581,11 +598,12 @@ void DwarfDebug::finishVariableDefinitions() { } void DwarfDebug::finishSubprogramDefinitions() { - for (const DISubprogram *SP : ProcessedSPNodes) - if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug) - forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) { - CU.finishSubprogramDefinition(SP); - }); + for (const DISubprogram *SP : ProcessedSPNodes) { + assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug); + forBothCUs( + getOrCreateDwarfCompileUnit(SP->getUnit()), + [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); }); + } } void DwarfDebug::finalizeModuleInfo() { @@ -595,6 +613,13 @@ void DwarfDebug::finalizeModuleInfo() { finishVariableDefinitions(); + // Include the DWO file name in the hash if there's more than one CU. + // This handles ThinLTO's situation where imported CUs may very easily be + // duplicate with the same CU partially imported into another ThinLTO unit. + StringRef DWOName; + if (CUMap.size() > 1) + DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile; + // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &P : CUMap) { @@ -609,7 +634,8 @@ void DwarfDebug::finalizeModuleInfo() { auto *SkCU = TheCU.getSkeleton(); if (useSplitDwarf()) { // Emit a unique identifier for this CU. - uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie()); + uint64_t ID = + DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, ID); SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, @@ -718,7 +744,9 @@ void DwarfDebug::endModule() { } // Emit the pubnames and pubtypes sections if requested. - if (HasDwarfPubSections) { + // The condition is optimistically correct - any CU not using GMLT (& + // implicit/default pubnames state) might still have pubnames. + if (hasDwarfPubSections(/* gmlt */ false)) { emitDebugPubNames(GenerateGnuPubSections); emitDebugPubTypes(GenerateGnuPubSections); } @@ -944,16 +972,62 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU, return ConcreteVariables.back().get(); } -// Determine whether this DBG_VALUE is valid at the beginning of the function. -static bool validAtEntry(const MachineInstr *MInsn) { - auto MBB = MInsn->getParent(); - // Is it in the entry basic block? - if (!MBB->pred_empty()) +/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its +/// enclosing lexical scope. The check ensures there are no other instructions +/// in the same lexical scope preceding the DBG_VALUE and that its range is +/// either open or otherwise rolls off the end of the scope. +static bool validThroughout(LexicalScopes &LScopes, + const MachineInstr *DbgValue, + const MachineInstr *RangeEnd) { + assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); + auto MBB = DbgValue->getParent(); + auto DL = DbgValue->getDebugLoc(); + auto *LScope = LScopes.findLexicalScope(DL); + // Scope doesn't exist; this is a dead DBG_VALUE. + if (!LScope) return false; - for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I) - if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup))) + auto &LSRange = LScope->getRanges(); + if (LSRange.size() == 0) + return false; + + // Determine if the DBG_VALUE is valid at the beginning of its lexical block. + const MachineInstr *LScopeBegin = LSRange.front().first; + // Early exit if the lexical scope begins outside of the current block. + if (LScopeBegin->getParent() != MBB) + return false; + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); + for (++Pred; Pred != MBB->rend(); ++Pred) { + if (Pred->getFlag(MachineInstr::FrameSetup)) + break; + auto PredDL = Pred->getDebugLoc(); + if (!PredDL || Pred->isMetaInstruction()) + continue; + // Check whether the instruction preceding the DBG_VALUE is in the same + // (sub)scope as the DBG_VALUE. + if (DL->getScope() == PredDL->getScope()) + return false; + auto *PredScope = LScopes.findLexicalScope(PredDL); + if (!PredScope || LScope->dominates(PredScope)) return false; - return true; + } + + // If the range of the DBG_VALUE is open-ended, report success. + if (!RangeEnd) + return true; + + // Fail if there are instructions belonging to our scope in another block. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (LScopeEnd->getParent() != MBB) + return false; + + // Single, constant DBG_VALUEs in the prologue are promoted to be live + // throughout the function. This is a hack, presumably for DWARF v2 and not + // necessarily correct. It would be much better to use a dbg.declare instead + // if we know the constant is live throughout the scope. + if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) + return true; + + return false; } // Find variables for each lexical scope. @@ -988,11 +1062,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - // Check if there is a single DBG_VALUE, valid throughout the function. - // A single constant is also considered valid for the entire function. + // Check if there is a single DBG_VALUE, valid throughout the var's scope. if (Ranges.size() == 1 && - (MInsn->getOperand(0).isImm() || - (validAtEntry(MInsn) && Ranges.front().second == nullptr))) { + validThroughout(LScopes, MInsn, Ranges.front().second)) { RegVar->initializeDbgValue(MInsn); continue; } @@ -1028,8 +1100,12 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); + const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram(); + if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) + return; + // Check if source location changes, but ignore DBG_VALUE and CFI locations. - if (MI->isDebugValue() || MI->isCFIInstruction()) + if (MI->isMetaInstruction()) return; const DebugLoc &DL = MI->getDebugLoc(); // When we emit a line-0 record, we don't update PrevInstLoc; so look at @@ -1111,7 +1187,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { // the beginning of the function body. for (const auto &MBB : *MF) for (const auto &MI : MBB) - if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) return MI.getDebugLoc(); return DebugLoc(); @@ -1122,40 +1198,28 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { CurFn = MF; - if (LScopes.empty()) + auto *SP = MF->getFunction()->getSubprogram(); + assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode()); + if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - // FnScope->getScopeNode() and DI->second should represent the same function, - // though they may not be the same MDNode due to inline functions merged in - // LTO where the debug info metadata still differs (either due to distinct - // written differences - two versions of a linkonce_odr function - // written/copied into two separate files, or some sub-optimal metadata that - // isn't structurally identical (see: file path/name info from clang, which - // includes the directory of the cpp file being built, even when the file name - // is absolute (such as an <> lookup header))) - auto *SP = cast(FnScope->getScopeNode()); - DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit()); - if (!TheCU) { - assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug && - "DICompileUnit missing from llvm.dbg.cu?"); - return; - } if (Asm->OutStreamer->hasRawTextSupport()) // Use a single line table if we are generating assembly. Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); else - Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID()); // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); - if (DILocation *L = PrologEndLoc) { + if (PrologEndLoc) { // We'd like to list the prologue as "not statements" but GDB behaves // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - auto *SP = L->getInlinedAtScope()->getSubprogram(); + auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram(); recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); } } @@ -1395,7 +1459,7 @@ void DwarfDebug::emitDebugPubSection( const auto &Globals = (TheU->*Accessor)(); - if (Globals.empty()) + if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes())) continue; if (auto *Skeleton = TheU->getSkeleton()) @@ -1491,7 +1555,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.setMemoryLocationKind(); SmallVector Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); @@ -1544,6 +1608,9 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { // Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { + if (DebugLocs.getLists().empty()) + return; + // Start the dwarf loc section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); @@ -1755,6 +1822,9 @@ void DwarfDebug::emitDebugARanges() { /// Emit address ranges into a debug ranges section. void DwarfDebug::emitDebugRanges() { + if (CUMap.empty()) + return; + // Start the dwarf ranges section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); @@ -1834,6 +1904,9 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { /// Emit macros into a debug macinfo section. void DwarfDebug::emitDebugMacinfo() { + if (CUMap.empty()) + return; + // Start the dwarf macinfo section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfMacinfoSection()); diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.h index b9c5aa9ffb231..5dfe06c64ec22 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -134,6 +134,13 @@ class DbgVariable { assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); + if (FrameIndexExprs.size()) { + auto *Expr = FrameIndexExprs.back().Expr; + // Get rid of duplicate non-fragment entries. More than one non-fragment + // dbg.declare makes no sense so ignore all but the first. + if (!Expr || !Expr->isFragment()) + return; + } FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); assert(all_of(FrameIndexExprs, [](FrameIndexExpr &FIE) { @@ -246,9 +253,6 @@ class DwarfDebug : public DebugHandlerBase { std::pair, const DICompositeType *>, 1> TypeUnitsUnderConstruction; - /// Whether to emit the pubnames/pubtypes sections. - bool HasDwarfPubSections; - /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; @@ -415,11 +419,11 @@ class DwarfDebug : public DebugHandlerBase { /// Flags to let the linker know we have emitted new style pubnames. Only /// emit it here if we don't have a skeleton CU for split dwarf. - void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; + void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const; /// Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit); + DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit); /// Construct imported_module or imported_declaration DIE. void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, @@ -556,6 +560,8 @@ class DwarfDebug : public DebugHandlerBase { /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); + + bool hasDwarfPubSections(bool includeMinimalInlineScopes) const; }; } // End of namespace llvm diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index ccd326917bfd3..fe38ee805682c 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -14,8 +14,8 @@ #include "DwarfExpression.h" #include "DwarfDebug.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -248,15 +248,25 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, assert(Reg.Size == 0 && "subregister has same size as superregister"); // Pattern-match combinations for which more efficient representations exist. - // [Reg, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]. - // [Reg, Offset, DW_OP_minus] --> [DW_OP_breg, -Offset]. - // If Reg is a subregister we need to mask it out before subtracting. - if (Op && ((Op->getOp() == dwarf::DW_OP_plus) || - (Op->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { - int Offset = Op->getArg(0); - SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset; + // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. + if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { + SignedOffset = Op->getArg(0); ExprCursor.take(); } + + // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] + // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] + // If Reg is a subregister we need to mask it out before subtracting. + if (Op && Op->getOp() == dwarf::DW_OP_constu) { + auto N = ExprCursor.peekNext(); + if (N && (N->getOp() == dwarf::DW_OP_plus || + (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { + int Offset = Op->getArg(0); + SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset; + ExprCursor.consume(2); + } + } + if (FBReg) addFBReg(SignedOffset); else @@ -320,17 +330,14 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, LocationKind = Unknown; return; } - case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus_uconst: assert(LocationKind != Register); emitOp(dwarf::DW_OP_plus_uconst); emitUnsigned(Op->getArg(0)); break; + case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: - assert(LocationKind != Register); - // There is no DW_OP_minus_uconst. - emitOp(dwarf::DW_OP_constu); - emitUnsigned(Op->getArg(0)); - emitOp(dwarf::DW_OP_minus); + emitOp(Op->getOp()); break; case dwarf::DW_OP_deref: { assert(LocationKind != Register); diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.h index de86132000672..728f8ad9225bc 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -42,6 +42,9 @@ class DIExpressionCursor { DIExpressionCursor(ArrayRef Expr) : Start(Expr.begin()), End(Expr.end()) {} + DIExpressionCursor(const DIExpressionCursor &C) + : Start(C.Start), End(C.End) {} + /// Consume one operation. Optional take() { if (Start == End) diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index bf8318cffe543..4f4ebfc562977 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -18,19 +18,19 @@ #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -475,7 +475,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, SmallVector Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } // If we started with a pointer to the __Block_byref... struct, then @@ -487,7 +487,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(forwardingFieldOffset); } @@ -499,7 +499,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(varFieldOffset); } @@ -647,7 +647,7 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { addString(Die, DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + GlobalValue::dropLLVMManglingEscape(LinkageName)); } void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { @@ -1587,6 +1587,26 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) { sizeof(Ty->getOffset())); } +DIE::value_iterator +DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + return Die.addValue(DIEValueAllocator, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + new (DIEValueAllocator) DIEDelta(Hi, Lo)); +} + +DIE::value_iterator +DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return addLabel(Die, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Label); + return addSectionDelta(Die, Attribute, Label, Sec); +} + bool DwarfTypeUnit::isDwoUnit() const { // Since there are no skeleton type units, all type units are dwo type units // when split DWARF is being used. diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.h b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.h index 7acad2cbd89fc..4cc01b3298d47 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -291,6 +291,15 @@ class DwarfUnit : public DIEUnit { void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); + /// addSectionDelta - Add a label delta attribute data and value. + DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// Add a Dwarf section label attribute data and value. + DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, + const MCSymbol *Sec); + protected: ~DwarfUnit(); diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 0a4a7a06cb2e7..e14d5be1177a6 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) { + if (SawPotentiallyThrowing && !IsSJLJ) { CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 342efc3611c78..c5795559fb7d6 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" @@ -25,8 +26,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/LLVMBuild.txt index 2bb66d12f3761..bde8148d259bc 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmPrinter parent = Libraries -required_libraries = Analysis CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target +required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 8baee4db772e8..035f1a0063aae 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -1,4 +1,4 @@ -//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===// // // The LLVM Compiler Infrastructure // @@ -11,23 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCs.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include +#include +#include +#include + using namespace llvm; namespace { @@ -37,7 +41,8 @@ class OcamlGCMetadataPrinter : public GCMetadataPrinter { void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; }; -} + +} // end anonymous namespace static GCMetadataPrinterRegistry::Add Y("ocaml", "ocaml 3.10-compatible collector"); @@ -50,7 +55,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { std::string SymName; SymName += "caml"; size_t Letter = SymName.size(); - SymName.append(MId.begin(), find(MId, '.')); + SymName.append(MId.begin(), llvm::find(MId, '.')); SymName += "__"; SymName += Id; diff --git a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/WinException.cpp b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/WinException.cpp index 704f0ac2f1919..5d485f213573d 100644 --- a/interpreter/llvm/src/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AsmPrinter/WinException.cpp @@ -14,6 +14,8 @@ #include "WinException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,8 +31,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -101,7 +101,7 @@ void WinException::beginFunction(const MachineFunction *MF) { // functions may still refer to it. const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); StringRef FLinkageName = - GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); } shouldEmitLSDA = hasEHFunclets; @@ -174,7 +174,7 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, // their funclet entry block's number. const MachineFunction *MF = MBB->getParent(); const Function *F = MF->getFunction(); - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); MCContext &Ctx = MF->getContext(); StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + @@ -252,7 +252,7 @@ void WinException::endFunclet() { !CurrentFuncletEntry->isCleanupFuncletEntry()) { // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); @@ -536,7 +536,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { // Emit a label assignment with the SEH frame offset so we can use it for // llvm.x86.seh.recoverfp. StringRef FLinkageName = - GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); MCSymbol *ParentFrameOffset = Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); const MCExpr *MCOffset = @@ -635,7 +635,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { auto &OS = *Asm->OutStreamer; const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); SmallVector, 4> IPToStateTable; MCSymbol *FuncInfoXData = nullptr; @@ -942,7 +942,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, void WinException::emitExceptHandlerTable(const MachineFunction *MF) { MCStreamer &OS = *Asm->OutStreamer; const Function *F = MF->getFunction(); - StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); bool VerboseAsm = OS.isVerboseAsm(); auto AddComment = [&](const Twine &Comment) { diff --git a/interpreter/llvm/src/lib/CodeGen/AtomicExpandPass.cpp b/interpreter/llvm/src/lib/CodeGen/AtomicExpandPass.cpp index 17e6be05eb42e..aa9c8e94d08a3 100644 --- a/interpreter/llvm/src/lib/CodeGen/AtomicExpandPass.cpp +++ b/interpreter/llvm/src/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -35,12 +36,10 @@ using namespace llvm; namespace { class AtomicExpand: public FunctionPass { - const TargetMachine *TM; const TargetLowering *TLI; public: static char ID; // Pass identification, replacement for typeid - explicit AtomicExpand(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr) { + AtomicExpand() : FunctionPass(ID), TLI(nullptr) { initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); } @@ -97,12 +96,10 @@ namespace { char AtomicExpand::ID = 0; char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions", - false, false) +INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) -FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { - return new AtomicExpand(TM); -} +FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } namespace { // Helper functions to retrieve the size of atomic instructions. @@ -172,9 +169,14 @@ bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { } // end anonymous namespace bool AtomicExpand::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand()) + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) return false; - TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + TLI = TM.getSubtargetImpl(F)->getTargetLowering(); SmallVector AtomicInsts; @@ -359,7 +361,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *NewLI = Builder.CreateLoad(NewAddr); NewLI->setAlignment(LI->getAlignment()); NewLI->setVolatile(LI->isVolatile()); - NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); @@ -442,7 +444,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); NewSI->setAlignment(SI->getAlignment()); NewSI->setVolatile(SI->isVolatile()); - NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); SI->eraseFromParent(); return NewSI; @@ -799,7 +801,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), - CI->getFailureOrdering(), CI->getSynchScope()); + CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); // When we're building a strong cmpxchg, we need a loop, so you // might think we could use a weak cmpxchg inside. But, using strong @@ -922,7 +924,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, CI->getSuccessOrdering(), CI->getFailureOrdering(), - CI->getSynchScope()); + CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); diff --git a/interpreter/llvm/src/lib/CodeGen/BasicTargetTransformInfo.cpp b/interpreter/llvm/src/lib/CodeGen/BasicTargetTransformInfo.cpp index a67e194356d82..be93ff0dad29d 100644 --- a/interpreter/llvm/src/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,17 +15,15 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include using namespace llvm; -#define DEBUG_TYPE "basictti" - // This flag is used by the template base class for BasicTTIImpl, and here to // provide a definition. cl::opt diff --git a/interpreter/llvm/src/lib/CodeGen/BranchCoalescing.cpp b/interpreter/llvm/src/lib/CodeGen/BranchCoalescing.cpp index efdf300df8506..2c41b597843c9 100644 --- a/interpreter/llvm/src/lib/CodeGen/BranchCoalescing.cpp +++ b/interpreter/llvm/src/lib/CodeGen/BranchCoalescing.cpp @@ -27,7 +27,7 @@ using namespace llvm; -#define DEBUG_TYPE "coal-branch" +#define DEBUG_TYPE "branch-coalescing" static cl::opt EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, @@ -193,11 +193,11 @@ class BranchCoalescing : public MachineFunctionPass { char BranchCoalescing::ID = 0; char &llvm::BranchCoalescingID = BranchCoalescing::ID; -INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing", +INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing", +INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() diff --git a/interpreter/llvm/src/lib/CodeGen/BranchFolding.cpp b/interpreter/llvm/src/lib/CodeGen/BranchFolding.cpp index b63d9f4a43519..3c439e66944b2 100644 --- a/interpreter/llvm/src/lib/CodeGen/BranchFolding.cpp +++ b/interpreter/llvm/src/lib/CodeGen/BranchFolding.cpp @@ -1,4 +1,4 @@ -//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +//===- BranchFolding.cpp - Fold machine code branch instructions ----------===// // // The LLVM Compiler Infrastructure // @@ -18,33 +18,49 @@ //===----------------------------------------------------------------------===// #include "BranchFolding.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include +#include +#include +#include +#include +#include + using namespace llvm; -#define DEBUG_TYPE "branchfolding" +#define DEBUG_TYPE "branch-folder" STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); @@ -69,10 +85,12 @@ TailMergeSize("tail-merge-size", cl::init(3), cl::Hidden); namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. class BranchFolderPass : public MachineFunctionPass { public: static char ID; + explicit BranchFolderPass(): MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -84,12 +102,13 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char BranchFolderPass::ID = 0; char &llvm::BranchFolderPassID = BranchFolderPass::ID; -INITIALIZE_PASS(BranchFolderPass, "branch-folder", +INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { @@ -153,13 +172,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TriedMerging.clear(); + MachineRegisterInfo &MRI = MF.getRegInfo(); AfterBlockPlacement = AfterPlacement; TII = tii; TRI = tri; MMI = mmi; MLI = mli; + this->MRI = &MRI; - MachineRegisterInfo &MRI = MF.getRegInfo(); UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF); if (!UpdateLiveIns) MRI.invalidateLiveness(); @@ -351,7 +371,7 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, if (UpdateLiveIns) { NewDest->clearLiveIns(); - computeLiveIns(LiveRegs, *TRI, *NewDest); + computeLiveIns(LiveRegs, *MRI, *NewDest); } ++NumTailMerge; @@ -367,7 +387,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = CurMBB.getIterator(); - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -388,7 +408,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); if (UpdateLiveIns) - computeLiveIns(LiveRegs, *TRI, *NewMBB); + computeLiveIns(LiveRegs, *MRI, *NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); @@ -505,7 +525,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, MachineBasicBlock::iterator &I) { I = MBB->end(); unsigned NumTerms = 0; - for (;;) { + while (true) { if (I == MBB->begin()) { I = MBB->end(); break; @@ -1455,13 +1475,14 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool PredAnalyzable = !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true); - if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) { + if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB && + PredTBB != PredFBB) { // The predecessor has a conditional branch to this block which consists // of only a tail call. Try to fold the tail call into the conditional // branch. if (TII->canMakeTailCallConditional(PredCond, TailCall)) { // TODO: It would be nice if analyzeBranch() could provide a pointer - // to the branch insturction so replaceBranchWithTailCall() doesn't + // to the branch instruction so replaceBranchWithTailCall() doesn't // have to search for it. TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall); ++NumTailCalls; @@ -1600,7 +1621,6 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // block doesn't fall through into some other block, see if we can find a // place to move this block where a fall-through will happen. if (!PrevBB.canFallThrough()) { - // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. bool CurFallsThru = MBB->canFallThrough(); diff --git a/interpreter/llvm/src/lib/CodeGen/BranchFolding.h b/interpreter/llvm/src/lib/CodeGen/BranchFolding.h index 4852721eea102..92681137e4c63 100644 --- a/interpreter/llvm/src/lib/CodeGen/BranchFolding.h +++ b/interpreter/llvm/src/lib/CodeGen/BranchFolding.h @@ -108,6 +108,7 @@ namespace llvm { bool UpdateLiveIns; unsigned MinCommonTailLength; const TargetInstrInfo *TII; + const MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; MachineLoopInfo *MLI; diff --git a/interpreter/llvm/src/lib/CodeGen/BranchRelaxation.cpp b/interpreter/llvm/src/lib/CodeGen/BranchRelaxation.cpp index 7af1369416615..27ee12c4c5ff2 100644 --- a/interpreter/llvm/src/lib/CodeGen/BranchRelaxation.cpp +++ b/interpreter/llvm/src/lib/CodeGen/BranchRelaxation.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -259,7 +259,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeLiveIns(LiveRegs, *TRI, *NewBB); + computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB); ++NumSplit; @@ -345,6 +345,10 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { // Do it here since if there's no split, no update is needed. MBB->replaceSuccessor(FBB, &NewBB); NewBB.addSuccessor(FBB); + + // Need to fix live-in lists if we track liveness. + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB); } // We now have an appropriate fall-through block in place (either naturally or diff --git a/interpreter/llvm/src/lib/CodeGen/BuiltinGCs.cpp b/interpreter/llvm/src/lib/CodeGen/BuiltinGCs.cpp index e4eab8c513d99..abac555d66025 100644 --- a/interpreter/llvm/src/lib/CodeGen/BuiltinGCs.cpp +++ b/interpreter/llvm/src/lib/CodeGen/BuiltinGCs.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCs.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Casting.h" diff --git a/interpreter/llvm/src/lib/CodeGen/CMakeLists.txt b/interpreter/llvm/src/lib/CodeGen/CMakeLists.txt index df933aebe3b19..7f3c6da912687 100644 --- a/interpreter/llvm/src/lib/CodeGen/CMakeLists.txt +++ b/interpreter/llvm/src/lib/CodeGen/CMakeLists.txt @@ -92,6 +92,7 @@ add_llvm_library(LLVMCodeGen PatchableFunction.cpp MIRPrinter.cpp MIRPrintingPass.cpp + MacroFusion.cpp OptimizePHIs.cpp ParallelCG.cpp PeepholeOptimizer.cpp @@ -120,6 +121,7 @@ add_llvm_library(LLVMCodeGen SafeStack.cpp SafeStackColoring.cpp SafeStackLayout.cpp + ScalarizeMaskedMemIntrin.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp diff --git a/interpreter/llvm/src/lib/CodeGen/CalcSpillWeights.cpp b/interpreter/llvm/src/lib/CodeGen/CalcSpillWeights.cpp index dc2d38a95f998..c2ced19458ed6 100644 --- a/interpreter/llvm/src/lib/CodeGen/CalcSpillWeights.cpp +++ b/interpreter/llvm/src/lib/CodeGen/CalcSpillWeights.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/interpreter/llvm/src/lib/CodeGen/CodeGen.cpp b/interpreter/llvm/src/lib/CodeGen/CodeGen.cpp index 26a98d76c13a8..b7fd45a3f6a66 100644 --- a/interpreter/llvm/src/lib/CodeGen/CodeGen.cpp +++ b/interpreter/llvm/src/lib/CodeGen/CodeGen.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" #include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; @@ -77,10 +77,13 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostRASchedulerPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); + initializeRABasicPass(Registry); + initializeRAFastPass(Registry); initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); + initializeScalarizeMaskedMemIntrinPass(Registry); initializeShrinkWrapPass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); diff --git a/interpreter/llvm/src/lib/CodeGen/CodeGenPrepare.cpp b/interpreter/llvm/src/lib/CodeGen/CodeGenPrepare.cpp index 5651db8ed9571..dc02a00e0fcc8 100644 --- a/interpreter/llvm/src/lib/CodeGen/CodeGenPrepare.cpp +++ b/interpreter/llvm/src/lib/CodeGen/CodeGenPrepare.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -23,12 +22,14 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -59,6 +60,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Utils/ValueMapper.h" + using namespace llvm; using namespace llvm::PatternMatch; @@ -83,6 +85,12 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpGreaterThanMax, + "Number of memcmp calls with size greater than max size"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); + static cl::opt DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); @@ -126,7 +134,7 @@ static cl::opt DisablePreheaderProtect( cl::desc("Disable protection against removing loop preheaders")); static cl::opt ProfileGuidedSectionPrefix( - "profile-guided-section-prefix", cl::Hidden, cl::init(true), + "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Use profile info to add section prefix for hot/cold functions")); static cl::opt FreqRatioToSkipMerge( @@ -143,6 +151,11 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); +static cl::opt MemCmpNumLoadsPerBlock( + "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), + cl::desc("The number of loads per basic block for inline expansion of " + "memcmp that is only being compared against zero.")); + namespace { typedef SmallPtrSet SetOfInstrs; typedef PointerIntPair TypeIsSExt; @@ -197,10 +210,11 @@ class TypePromotionTransaction; public: static char ID; // Pass identification, replacement for typeid - explicit CodeGenPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) { - initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); - } + CodeGenPrepare() + : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr), + DL(nullptr) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "CodeGen Prepare"; } @@ -221,12 +235,12 @@ class TypePromotionTransaction; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader); - bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); - bool optimizeInst(Instruction *I, bool& ModifiedDT); + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy, unsigned AS); bool optimizeInlineAsmInst(CallInst *CS); - bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *I); @@ -255,15 +269,13 @@ class TypePromotionTransaction; } char CodeGenPrepare::ID = 0; -INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare", - "Optimize for code generation", false, false) +INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, + "Optimize for code generation", false, false) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare", - "Optimize for code generation", false, false) +INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, + "Optimize for code generation", false, false) -FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { - return new CodeGenPrepare(TM); -} +FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } bool CodeGenPrepare::runOnFunction(Function &F) { if (skipFunction(F)) @@ -279,7 +291,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BPI.reset(); ModifiedDT = false; - if (TM) { + if (auto *TPC = getAnalysisIfAvailable()) { + TM = &TPC->getTM(); SubtargetInfo = TM->getSubtargetImpl(F); TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); @@ -349,7 +362,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Really free removed instructions during promotion. for (Instruction *I : RemovedInsts) - delete I; + I->deleteValue(); EverMadeChange |= MadeChange; } @@ -1549,519 +1562,6 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, return MadeChange; } -// Translate a masked load intrinsic like -// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, -// <16 x i1> %mask, <16 x i32> %passthru) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.load, label %else -// -//cond.load: ; preds = %0 -// %4 = getelementptr i32* %1, i32 0 -// %5 = load i32* %4 -// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 -// br label %else -// -//else: ; preds = %0, %cond.load -// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] -// %7 = extractelement <16 x i1> %mask, i32 1 -// %8 = icmp eq i1 %7, true -// br i1 %8, label %cond.load1, label %else2 -// -//cond.load1: ; preds = %else -// %9 = getelementptr i32* %1, i32 1 -// %10 = load i32* %9 -// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 -// br label %else2 -// -//else2: ; preds = %else, %cond.load1 -// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] -// %12 = extractelement <16 x i1> %mask, i32 2 -// %13 = icmp eq i1 %12, true -// br i1 %13, label %cond.load4, label %else5 -// -static void scalarizeMaskedLoad(CallInst *CI) { - Value *Ptr = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - unsigned AlignVal = cast(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast(CI->getType()); - assert(VecType && "Unexpected return type of masked load intrinsic"); - - Type *EltTy = CI->getType()->getVectorElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - bool IsAllOnesMask = isa(Mask) && - cast(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { - Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8); - // Bitcast %addr fron i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); - - Value *UndefVal = UndefValue::get(VecType); - - // The result vector - Value *VResult = UndefVal; - - if (isa(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx)); - } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_load = icmp eq i1 %mask_1, true - // br i1 %to_load, label %cond.load, label %else - // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } - - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - } - - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); -} - -// Translate a masked store intrinsic, like -// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, -// <16 x i1> %mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.store, label %else -// -// cond.store: ; preds = %0 -// %4 = extractelement <16 x i32> %val, i32 0 -// %5 = getelementptr i32* %1, i32 0 -// store i32 %4, i32* %5 -// br label %else -// -// else: ; preds = %0, %cond.store -// %6 = extractelement <16 x i1> %mask, i32 1 -// %7 = icmp eq i1 %6, true -// br i1 %7, label %cond.store1, label %else2 -// -// cond.store1: ; preds = %else -// %8 = extractelement <16 x i32> %val, i32 1 -// %9 = getelementptr i32* %1, i32 1 -// store i32 %8, i32* %9 -// br label %else2 -// . . . -static void scalarizeMaskedStore(CallInst *CI) { - Value *Src = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - unsigned AlignVal = cast(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast(Src->getType()); - assert(VecType && "Unexpected data type in masked store intrinsic"); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - bool IsAllOnesMask = isa(Mask) && - cast(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { - Builder.CreateAlignedStore(Src, Ptr, AlignVal); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8); - // Bitcast %addr fron i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); - - if (isa(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); - } - CI->eraseFromParent(); - return; - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_store = icmp eq i1 %mask_1, true - // br i1 %to_store, label %cond.store, label %else - // - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); - - // Create "cond" block - // - // %OneElt = extractelement <16 x i32> %Src, i32 Idx - // %EltAddr = getelementptr i32* %1, i32 0 - // %store i32 %OneElt, i32* %EltAddr - // - BasicBlock *CondBlock = - IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); -} - -// Translate a masked gather intrinsic like -// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, -// <16 x i1> %Mask, <16 x i32> %Src) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> %Mask, i32 0 -// % ToLoad0 = icmp eq i1 % Mask0, true -// br i1 % ToLoad0, label %cond.load, label %else -// -// cond.load: -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// % Load0 = load i32, i32* % Ptr0, align 4 -// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 -// br label %else -// -// else: -// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] -// % Mask1 = extractelement <16 x i1> %Mask, i32 1 -// % ToLoad1 = icmp eq i1 % Mask1, true -// br i1 % ToLoad1, label %cond.load1, label %else2 -// -// cond.load1: -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// % Load1 = load i32, i32* % Ptr1, align 4 -// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 -// br label %else2 -// . . . -// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src -// ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI) { - Value *Ptrs = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - VectorType *VecType = dyn_cast(CI->getType()); - - assert(VecType && "Unexpected return type of masked load intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - unsigned AlignVal = cast(Alignment)->getZExtValue(); - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - Value *UndefVal = UndefValue::get(VecType); - - // The result vector - Value *VResult = UndefVal; - unsigned VectorWidth = VecType->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa(Mask); - - if (IsConstMask) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, - "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx), - "Res" + Twine(Idx)); - } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %Mask1 = extractelement <16 x i1> %Mask, i32 1 - // %ToLoad1 = icmp eq i1 %Mask1, true - // br i1 %ToLoad1, label %cond.load, label %else - // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } - - Value *Predicate = Builder.CreateExtractElement(Mask, - Builder.getInt32(Idx), - "Mask" + Twine(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToLoad" + Twine(Idx)); - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, - "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), - "Res" + Twine(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - } - - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); -} - -// Translate a masked scatter intrinsic, like -// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, -// <16 x i1> %Mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set. -// -// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> % Mask, i32 0 -// % ToStore0 = icmp eq i1 % Mask0, true -// br i1 %ToStore0, label %cond.store, label %else -// -// cond.store: -// % Elt0 = extractelement <16 x i32> %Src, i32 0 -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// store i32 %Elt0, i32* % Ptr0, align 4 -// br label %else -// -// else: -// % Mask1 = extractelement <16 x i1> % Mask, i32 1 -// % ToStore1 = icmp eq i1 % Mask1, true -// br i1 % ToStore1, label %cond.store1, label %else2 -// -// cond.store1: -// % Elt1 = extractelement <16 x i32> %Src, i32 1 -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// store i32 % Elt1, i32* % Ptr1, align 4 -// br label %else2 -// . . . -static void scalarizeMaskedScatter(CallInst *CI) { - Value *Src = CI->getArgOperand(0); - Value *Ptrs = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - assert(isa(Src->getType()) && - "Unexpected data type in masked scatter intrinsic"); - assert(isa(Ptrs->getType()) && - isa(Ptrs->getType()->getVectorElementType()) && - "Vector of pointers is expected in masked scatter intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - unsigned AlignVal = cast(Alignment)->getZExtValue(); - unsigned VectorWidth = Src->getType()->getVectorNumElements(); - - // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa(Mask); - - if (IsConstMask) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - } - CI->eraseFromParent(); - return; - } - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx - // % ToStore = icmp eq i1 % Mask1, true - // br i1 % ToStore, label %cond.store, label %else - // - Value *Predicate = Builder.CreateExtractElement(Mask, - Builder.getInt32(Idx), - "Mask" + Twine(Idx)); - Value *Cmp = - Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToStore" + Twine(Idx)); - - // Create "cond" block - // - // % Elt1 = extractelement <16 x i32> %Src, i32 1 - // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 - // %store i32 % Elt1, i32* % Ptr1 - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); -} - /// If counting leading or trailing zeros is an expensive operation and a zero /// input is defined, add a check for zero to avoid calling the intrinsic. /// @@ -2141,7 +1641,657 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return true; } -bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { +// This class provides helper functions to expand a memcmp library call into an +// inline expansion. +class MemCmpExpansion { + struct ResultBlock { + BasicBlock *BB; + PHINode *PhiSrc1; + PHINode *PhiSrc2; + ResultBlock(); + }; + + CallInst *CI; + ResultBlock ResBlock; + unsigned MaxLoadSize; + unsigned NumBlocks; + unsigned NumBlocksNonOneByte; + unsigned NumLoadsPerBlock; + std::vector LoadCmpBlocks; + BasicBlock *EndBlock; + PHINode *PhiRes; + bool IsUsedForZeroCmp; + const DataLayout &DL; + IRBuilder<> Builder; + + unsigned calculateNumBlocks(unsigned Size); + void createLoadCmpBlocks(); + void createResultBlock(); + void setupResultBlockPHINodes(); + void setupEndBlockPHINodes(); + void emitLoadCompareBlock(unsigned Index, unsigned LoadSize, + unsigned GEPIndex); + Value *getCompareLoadPairs(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed); + void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed); + void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex); + void emitMemCmpResultBlock(); + Value *getMemCmpExpansionZeroCase(unsigned Size); + Value *getMemCmpEqZeroOneBlock(unsigned Size); + Value *getMemCmpOneBlock(unsigned Size); + unsigned getLoadSize(unsigned Size); + unsigned getNumLoads(unsigned Size); + +public: + MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize, + unsigned NumLoadsPerBlock, const DataLayout &DL); + Value *getMemCmpExpansion(uint64_t Size); +}; + +MemCmpExpansion::ResultBlock::ResultBlock() + : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {} + +// Initialize the basic block structure required for expansion of memcmp call +// with given maximum load size and memcmp size parameter. +// This structure includes: +// 1. A list of load compare blocks - LoadCmpBlocks. +// 2. An EndBlock, split from original instruction point, which is the block to +// return from. +// 3. ResultBlock, block to branch to for early exit when a +// LoadCmpBlock finds a difference. +MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, + unsigned MaxLoadSize, unsigned LoadsPerBlock, + const DataLayout &TheDataLayout) + : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock), + DL(TheDataLayout), Builder(CI) { + + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. + IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + NumBlocks = calculateNumBlocks(Size); + if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) + setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); +} + +void MemCmpExpansion::createLoadCmpBlocks() { + for (unsigned i = 0; i < NumBlocks; i++) { + BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", + EndBlock->getParent(), EndBlock); + LoadCmpBlocks.push_back(BB); + } +} + +void MemCmpExpansion::createResultBlock() { + ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", + EndBlock->getParent(), EndBlock); +} + +// This function creates the IR instructions for loading and comparing 1 byte. +// It loads 1 byte from each source of the memcmp parameters with the given +// GEPIndex. It then subtracts the two loaded values and adds this result to the +// final phi node for selecting the memcmp result. +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, + unsigned GEPIndex) { + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]); + + if (Index < (LoadCmpBlocks.size() - 1)) { + // Early exit branch if difference found to EndBlock. Otherwise, continue to + // next LoadCmpBlock, + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BranchInst *CmpBr = + BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp); + Builder.Insert(CmpBr); + } else { + // The last block has an unconditional branch to EndBlock. + BranchInst *CmpBr = BranchInst::Create(EndBlock); + Builder.Insert(CmpBr); + } +} + +unsigned MemCmpExpansion::getNumLoads(unsigned Size) { + return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize); +} + +unsigned MemCmpExpansion::getLoadSize(unsigned Size) { + return MinAlign(PowerOf2Floor(Size), MaxLoadSize); +} + +/// Generate an equality comparison for one or more pairs of loaded values. +/// This is used in the case where the memcmp() call is compared equal or not +/// equal to zero. +Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed) { + std::vector XorList, OrList; + Value *Diff; + + unsigned RemainingBytes = Size - NumBytesProcessed; + unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); + unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); + + // For a single-block expansion, start inserting before the memcmp call. + if (LoadCmpBlocks.empty()) + Builder.SetInsertPoint(CI); + else + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + + Value *Cmp = nullptr; + for (unsigned i = 0; i < NumLoads; ++i) { + unsigned LoadSize = getLoadSize(RemainingBytes); + unsigned GEPIndex = NumBytesProcessed / LoadSize; + NumBytesProcessed += LoadSize; + RemainingBytes -= LoadSize; + + Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(LoadSize <= MaxLoadSize && "Unexpected load type"); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + // Get a constant or load a value for each source address. + Value *LoadSrc1 = nullptr; + if (auto *Source1C = dyn_cast(Source1)) + LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); + if (!LoadSrc1) + LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + + Value *LoadSrc2 = nullptr; + if (auto *Source2C = dyn_cast(Source2)) + LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); + if (!LoadSrc2) + LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (NumLoads != 1) { + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExt(Diff, MaxLoadType); + XorList.push_back(Diff); + } else { + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + } + } + + auto pairWiseOr = [&](std::vector &InList) -> std::vector { + std::vector OutList; + for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { + Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); + OutList.push_back(Or); + } + if (InList.size() % 2 != 0) + OutList.push_back(InList.back()); + return OutList; + }; + + if (!Cmp) { + // Pairwise OR the XOR results. + OrList = pairWiseOr(XorList); + + // Pairwise OR the OR results until one result left. + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); + } + + return Cmp; +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( + unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { + Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed); + + BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[Index + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, + // continue to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (Index == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + } +} + +// This function creates the IR intructions for loading and comparing using the +// given LoadSize. It loads the number of bytes specified by LoadSize from each +// source of the memcmp parameters. It then does a subtract to see if there was +// a difference in the loaded values. If a difference is found, it branches +// with an early exit to the ResultBlock for calculating which source was +// larger. Otherwise, it falls through to the either the next LoadCmpBlock or +// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with +// a special case through emitLoadCompareByteBlock. The special handling can +// simply subtract the loaded values and add it to the result phi node. +void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, + unsigned GEPIndex) { + if (LoadSize == 1) { + MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); + return; + } + + Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(LoadSize <= MaxLoadSize && "Unexpected load type"); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian()) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + + // Add the loaded values to the phi nodes for calculating memcmp result only + // if result is not used in a zero equality. + if (!IsUsedForZeroCmp) { + ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]); + ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); + } + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); + BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[Index + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, continue + // to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (Index == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + } +} + +// This function populates the ResultBlock with a sequence to calculate the +// memcmp result. It compares the two loaded source values and returns -1 if +// src1 < src2 and 1 if src1 > src2. +void MemCmpExpansion::emitMemCmpResultBlock() { + // Special case: if memcmp result is used in a zero equality, result does not + // need to be calculated and can simply return 1. + if (IsUsedForZeroCmp) { + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); + PhiRes->addIncoming(Res, ResBlock.BB); + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + return; + } + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, + ResBlock.PhiSrc2); + + Value *Res = + Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), + ConstantInt::get(Builder.getInt32Ty(), 1)); + + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + PhiRes->addIncoming(Res, ResBlock.BB); +} + +unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) { + unsigned NumBlocks = 0; + bool HaveOneByteLoad = false; + unsigned RemainingSize = Size; + unsigned LoadSize = MaxLoadSize; + while (RemainingSize) { + if (LoadSize == 1) + HaveOneByteLoad = true; + NumBlocks += RemainingSize / LoadSize; + RemainingSize = RemainingSize % LoadSize; + LoadSize = LoadSize / 2; + } + NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks; + + if (IsUsedForZeroCmp) + NumBlocks = NumBlocks / NumLoadsPerBlock + + (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0); + + return NumBlocks; +} + +void MemCmpExpansion::setupResultBlockPHINodes() { + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Builder.SetInsertPoint(ResBlock.BB); + ResBlock.PhiSrc1 = + Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1"); + ResBlock.PhiSrc2 = + Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2"); +} + +void MemCmpExpansion::setupEndBlockPHINodes() { + Builder.SetInsertPoint(&EndBlock->front()); + PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); +} + +Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) { + unsigned NumBytesProcessed = 0; + // This loop populates each of the LoadCmpBlocks with the IR sequence to + // handle multiple loads per block. + for (unsigned i = 0; i < NumBlocks; ++i) + emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed); + + emitMemCmpResultBlock(); + return PhiRes; +} + +/// A memcmp expansion that compares equality with 0 and only has one block of +/// load and compare can bypass the compare, branch, and phi IR that is required +/// in the general case. +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { + unsigned NumBytesProcessed = 0; + Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed); + return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); +} + +/// A memcmp expansion that only has one block of load and compare can bypass +/// the compare, branch, and phi IR that is required in the general case. +Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) { + assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian() && Size != 1) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + // TODO: Instead of comparing ULT, just subtract and return the difference? + Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Type *I32 = Builder.getInt32Ty(); + Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1), + ConstantInt::get(I32, 1)); + return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0)); +} + +// This function expands the memcmp call into an inline expansion and returns +// the memcmp result. +Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { + if (IsUsedForZeroCmp) + return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : + getMemCmpExpansionZeroCase(Size); + + // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). + if (NumBlocks == 1 && NumLoadsPerBlock == 1) + return getMemCmpOneBlock(Size); + + // This loop calls emitLoadCompareBlock for comparing Size bytes of the two + // memcmp sources. It starts with loading using the maximum load size set by + // the target. It processes any remaining bytes using a load size which is the + // next smallest power of 2. + unsigned LoadSize = MaxLoadSize; + unsigned NumBytesToBeProcessed = Size; + unsigned Index = 0; + while (NumBytesToBeProcessed) { + // Calculate how many blocks we can create with the current load size. + unsigned NumBlocks = NumBytesToBeProcessed / LoadSize; + unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; + NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; + + // For each NumBlocks, populate the instruction sequence for loading and + // comparing LoadSize bytes. + while (NumBlocks--) { + emitLoadCompareBlock(Index, LoadSize, GEPIndex); + Index++; + GEPIndex++; + } + // Get the next LoadSize to use. + LoadSize = LoadSize / 2; + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, returns false to leave as a library call. +// Otherwise, the library call is replaced with a new IR instruction sequence. +/// We want to transform: +/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) +/// To: +/// loadbb: +/// %0 = bitcast i32* %buffer2 to i8* +/// %1 = bitcast i32* %buffer1 to i8* +/// %2 = bitcast i8* %1 to i64* +/// %3 = bitcast i8* %0 to i64* +/// %4 = load i64, i64* %2 +/// %5 = load i64, i64* %3 +/// %6 = call i64 @llvm.bswap.i64(i64 %4) +/// %7 = call i64 @llvm.bswap.i64(i64 %5) +/// %8 = sub i64 %6, %7 +/// %9 = icmp ne i64 %8, 0 +/// br i1 %9, label %res_block, label %loadbb1 +/// res_block: ; preds = %loadbb2, +/// %loadbb1, %loadbb +/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] +/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] +/// %10 = icmp ult i64 %phi.src1, %phi.src2 +/// %11 = select i1 %10, i32 -1, i32 1 +/// br label %endblock +/// loadbb1: ; preds = %loadbb +/// %12 = bitcast i32* %buffer2 to i8* +/// %13 = bitcast i32* %buffer1 to i8* +/// %14 = bitcast i8* %13 to i32* +/// %15 = bitcast i8* %12 to i32* +/// %16 = getelementptr i32, i32* %14, i32 2 +/// %17 = getelementptr i32, i32* %15, i32 2 +/// %18 = load i32, i32* %16 +/// %19 = load i32, i32* %17 +/// %20 = call i32 @llvm.bswap.i32(i32 %18) +/// %21 = call i32 @llvm.bswap.i32(i32 %19) +/// %22 = zext i32 %20 to i64 +/// %23 = zext i32 %21 to i64 +/// %24 = sub i64 %22, %23 +/// %25 = icmp ne i64 %24, 0 +/// br i1 %25, label %res_block, label %loadbb2 +/// loadbb2: ; preds = %loadbb1 +/// %26 = bitcast i32* %buffer2 to i8* +/// %27 = bitcast i32* %buffer1 to i8* +/// %28 = bitcast i8* %27 to i16* +/// %29 = bitcast i8* %26 to i16* +/// %30 = getelementptr i16, i16* %28, i16 6 +/// %31 = getelementptr i16, i16* %29, i16 6 +/// %32 = load i16, i16* %30 +/// %33 = load i16, i16* %31 +/// %34 = call i16 @llvm.bswap.i16(i16 %32) +/// %35 = call i16 @llvm.bswap.i16(i16 %33) +/// %36 = zext i16 %34 to i64 +/// %37 = zext i16 %35 to i64 +/// %38 = sub i64 %36, %37 +/// %39 = icmp ne i64 %38, 0 +/// br i1 %39, label %res_block, label %loadbb3 +/// loadbb3: ; preds = %loadbb2 +/// %40 = bitcast i32* %buffer2 to i8* +/// %41 = bitcast i32* %buffer1 to i8* +/// %42 = getelementptr i8, i8* %41, i8 14 +/// %43 = getelementptr i8, i8* %40, i8 14 +/// %44 = load i8, i8* %42 +/// %45 = load i8, i8* %43 +/// %46 = zext i8 %44 to i32 +/// %47 = zext i8 %45 to i32 +/// %48 = sub i32 %46, %47 +/// br label %endblock +/// endblock: ; preds = %res_block, +/// %loadbb3 +/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] +/// ret i32 %phi.res +static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, + const TargetLowering *TLI, const DataLayout *DL) { + NumMemCmpCalls++; + + // TTI call to check if target would like to expand memcmp. Also, get the + // MaxLoadSize. + unsigned MaxLoadSize; + if (!TTI->expandMemCmp(CI, MaxLoadSize)) + return false; + + // Early exit from expansion if -Oz. + if (CI->getFunction()->optForMinSize()) + return false; + + // Early exit from expansion if size is not a constant. + ConstantInt *SizeCast = dyn_cast(CI->getArgOperand(2)); + if (!SizeCast) { + NumMemCmpNotConstant++; + return false; + } + + // Early exit from expansion if size greater than max bytes to load. + uint64_t SizeVal = SizeCast->getZExtValue(); + unsigned NumLoads = 0; + unsigned RemainingSize = SizeVal; + unsigned LoadSize = MaxLoadSize; + while (RemainingSize) { + NumLoads += RemainingSize / LoadSize; + RemainingSize = RemainingSize % LoadSize; + LoadSize = LoadSize / 2; + } + + if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) { + NumMemCmpGreaterThanMax++; + return false; + } + + NumMemCmpInlined++; + + // MemCmpHelper object creates and sets up basic blocks required for + // expanding memcmp with size SizeVal. + unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock; + MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL); + + Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal); + + // Replace call with result of expansion and erase call. + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + + return true; +} + +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -2242,39 +2392,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } return true; } - case Intrinsic::masked_load: { - // Scalarize unsupported vector masked load - if (!TTI->isLegalMaskedLoad(CI->getType())) { - scalarizeMaskedLoad(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_store: { - if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { - scalarizeMaskedStore(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_gather: { - if (!TTI->isLegalMaskedGather(CI->getType())) { - scalarizeMaskedGather(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_scatter: { - if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { - scalarizeMaskedScatter(CI); - ModifiedDT = true; - return true; - } - return false; - } case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); @@ -2325,6 +2442,13 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { CI->eraseFromParent(); return true; } + + LibFunc Func; + if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) && + Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) { + ModifiedDT = true; + return true; + } return false; } @@ -3870,7 +3994,7 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI) { - const Function *F = CI->getParent()->getParent(); + const Function *F = CI->getFunction(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, ImmutableCallSite(CI)); @@ -3892,14 +4016,18 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, return true; } +// Max number of memory uses to look at before aborting the search to conserve +// compile time. +static constexpr int MaxMemoryUsesToScan = 20; + /// Recursively walk all the uses of I until we find a memory use. /// If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl> &MemoryUses, - SmallPtrSetImpl &ConsideredInsts, - const TargetLowering &TLI, const TargetRegisterInfo &TRI) { + SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI, + const TargetRegisterInfo &TRI, int SeenInsts = 0) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -3912,8 +4040,12 @@ static bool FindAllMemoryUses( // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { - Instruction *UserI = cast(U.getUser()); + // Conservatively return true if we're seeing a large number or a deep chain + // of users. This avoids excessive compilation times in pathological cases. + if (SeenInsts++ >= MaxMemoryUsesToScan) + return true; + Instruction *UserI = cast(U.getUser()); if (LoadInst *LI = dyn_cast(UserI)) { MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); continue; @@ -3958,7 +4090,8 @@ static bool FindAllMemoryUses( continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, + SeenInsts)) return true; } @@ -4143,9 +4276,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. - Value *Consensus = nullptr; - unsigned NumUsesConsensus = 0; - bool IsNumUsesConsensusValid = false; + bool AddrModeFound = false; + bool PhiSeen = false; SmallVector AddrModeInsts; ExtAddrMode AddrMode; TypePromotionTransaction TPT(RemovedInsts); @@ -4155,72 +4287,59 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *V = worklist.back(); worklist.pop_back(); - // Break use-def graph loops. - if (!Visited.insert(V).second) { - Consensus = nullptr; - break; - } + // We allow traversing cyclic Phi nodes. + // In case of success after this loop we ensure that traversing through + // Phi nodes ends up with all cases to compute address of the form + // BaseGV + Base + Scale * Index + Offset + // where Scale and Offset are constans and BaseGV, Base and Index + // are exactly the same Values in all cases. + // It means that BaseGV, Scale and Offset dominate our memory instruction + // and have the same value as they had in address computation represented + // as Phi. So we can safely sink address computation to memory instruction. + if (!Visited.insert(V).second) + continue; // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast(V)) { for (Value *IncValue : P->incoming_values()) worklist.push_back(IncValue); + PhiSeen = true; continue; } // For non-PHIs, determine the addressing mode being computed. Note that // the result may differ depending on what other uses our candidate // addressing instructions might have. - SmallVector NewAddrModeInsts; + AddrModeInsts.clear(); ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT); - - // This check is broken into two cases with very similar code to avoid using - // getNumUses() as much as possible. Some values have a lot of uses, so - // calling getNumUses() unconditionally caused a significant compile-time - // regression. - if (!Consensus) { - Consensus = V; - AddrMode = NewAddrMode; - AddrModeInsts = NewAddrModeInsts; - continue; - } else if (NewAddrMode == AddrMode) { - if (!IsNumUsesConsensusValid) { - NumUsesConsensus = Consensus->getNumUses(); - IsNumUsesConsensusValid = true; - } + V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, + InsertedInsts, PromotedInsts, TPT); - // Ensure that the obtained addressing mode is equivalent to that obtained - // for all other roots of the PHI traversal. Also, when choosing one - // such root as representative, select the one with the most uses in order - // to keep the cost modeling heuristics in AddressingModeMatcher - // applicable. - unsigned NumUses = V->getNumUses(); - if (NumUses > NumUsesConsensus) { - Consensus = V; - NumUsesConsensus = NumUses; - AddrModeInsts = NewAddrModeInsts; - } + if (!AddrModeFound) { + AddrModeFound = true; + AddrMode = NewAddrMode; continue; } + if (NewAddrMode == AddrMode) + continue; - Consensus = nullptr; + AddrModeFound = false; break; } // If the addressing mode couldn't be determined, or if multiple different // ones were determined, bail out now. - if (!Consensus) { + if (!AddrModeFound) { TPT.rollback(LastKnownGood); return false; } TPT.commit(); // If all the instructions matched are already in this BB, don't do anything. - if (none_of(AddrModeInsts, [&](Value *V) { + // If we saw Phi node then it is not local definitely. + if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); - })) { + })) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -4266,6 +4385,20 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, AddrMode.Scale = 0; } + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + // + // (See below for code to add the scale.) + if (AddrMode.Scale) { + Type *ScaledRegTy = AddrMode.ScaledReg->getType(); + if (cast(IntPtrTy)->getBitWidth() > + cast(ScaledRegTy)->getBitWidth()) + return false; + } + if (AddrMode.BaseGV) { if (ResultPtr) return false; @@ -4276,14 +4409,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If the real base value actually came from an inttoptr, then the matcher // will look through it and provide only the integer value. In that case, // use it here. - if (!ResultPtr && AddrMode.BaseReg) { - ResultPtr = - Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); - AddrMode.BaseReg = nullptr; - } else if (!ResultPtr && AddrMode.Scale == 1) { - ResultPtr = - Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); - AddrMode.Scale = 0; + if (!DL->isNonIntegralPointerType(Addr->getType())) { + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), + "sunkaddr"); + AddrMode.BaseReg = nullptr; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), + "sunkaddr"); + AddrMode.Scale = 0; + } } if (!ResultPtr && @@ -4314,19 +4449,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *V = AddrMode.ScaledReg; if (V->getType() == IntPtrTy) { // done. - } else if (cast(IntPtrTy)->getBitWidth() < - cast(V->getType())->getBitWidth()) { - V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - // It is only safe to sign extend the BaseReg if we know that the math - // required to create it did not overflow before we extend it. Since - // the original IR value was tossed in favor of a constant back when - // the AddrMode was created we need to bail out gracefully if widths - // do not match instead of extending it. - Instruction *I = dyn_cast_or_null(ResultIndex); - if (I && (ResultIndex != AddrMode.BaseReg)) - I->eraseFromParent(); - return false; + assert(cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth() && + "We can't transform if ScaledReg is too narrow"); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) @@ -4364,6 +4491,19 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); } } else { + // We'd require a ptrtoint/inttoptr down the line, which we can't do for + // non-integral pointers, so in that case bail out now. + Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr; + Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr; + PointerType *BasePtrTy = dyn_cast_or_null(BaseTy); + PointerType *ScalePtrTy = dyn_cast_or_null(ScaleTy); + if (DL->isNonIntegralPointerType(Addr->getType()) || + (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) || + (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) || + (AddrMode.BaseGV && + DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) + return false; + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); @@ -4467,7 +4607,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; const TargetRegisterInfo *TRI = - TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); + TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(*DL, TRI, CS); unsigned ArgNo = 0; @@ -4699,25 +4839,7 @@ bool CodeGenPrepare::canFormExtLd( if (!HasPromoted && LI->getParent() == Inst->getParent()) return false; - EVT VT = TLI->getValueType(*DL, Inst->getType()); - EVT LoadVT = TLI->getValueType(*DL, LI->getType()); - - // If the load has other users and the truncate is not free, this probably - // isn't worthwhile. - if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && - !TLI->isTruncateFree(Inst->getType(), LI->getType())) - return false; - - // Check whether the target supports casts folded into loads. - unsigned LType; - if (isa(Inst)) - LType = ISD::ZEXTLOAD; - else { - assert(isa(Inst) && "Unexpected ext type!"); - LType = ISD::SEXTLOAD; - } - - return TLI->isLoadExtLegal(LType, VT, LoadVT); + return TLI->isExtLoad(LI, Inst, *DL); } /// Move a zext or sext fed by a load into the same basic block as the load, @@ -5472,6 +5594,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { return true; } + namespace { /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. @@ -5950,7 +6073,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, return true; } -bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -6105,7 +6228,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL, // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; @@ -6264,7 +6387,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } // Update PHI nodes in both successors. The original BB needs to be - // replaced in one succesor's PHI nodes, because the branch comes now from + // replaced in one successor's PHI nodes, because the branch comes now from // the newly generated BB (NewBB). In the other successor we need to add one // incoming edge to the PHI nodes, because both branch instructions target // now the same successor. Depending on the original branch condition diff --git a/interpreter/llvm/src/lib/CodeGen/CriticalAntiDepBreaker.cpp b/interpreter/llvm/src/lib/CodeGen/CriticalAntiDepBreaker.cpp index b2d6652b075e7..a3cf2846d2f5d 100644 --- a/interpreter/llvm/src/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/interpreter/llvm/src/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -74,7 +74,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; - if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg))) + if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; diff --git a/interpreter/llvm/src/lib/CodeGen/DFAPacketizer.cpp b/interpreter/llvm/src/lib/CodeGen/DFAPacketizer.cpp index 65f58e5686e0d..853b9afa1026c 100644 --- a/interpreter/llvm/src/lib/CodeGen/DFAPacketizer.cpp +++ b/interpreter/llvm/src/lib/CodeGen/DFAPacketizer.cpp @@ -23,49 +23,59 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "packets" - #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include using namespace llvm; +#define DEBUG_TYPE "packets" + static cl::opt InstrLimit("dfa-instr-limit", cl::Hidden, cl::init(0), cl::desc("If present, stops packetizing after N instructions")); + static unsigned InstrCount = 0; // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp -namespace { - DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { - return (Inp << DFA_MAX_RESOURCES) | FuncUnits; - } +static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { + return (Inp << DFA_MAX_RESOURCES) | FuncUnits; +} - /// Return the DFAInput for an instruction class input vector. - /// This function is used in both DFAPacketizer.cpp and in - /// DFAPacketizerEmitter.cpp. - DFAInput getDFAInsnInput(const std::vector &InsnClass) { - DFAInput InsnInput = 0; - assert((InsnClass.size() <= DFA_MAX_RESTERMS) && - "Exceeded maximum number of DFA terms"); - for (auto U : InsnClass) - InsnInput = addDFAFuncUnits(InsnInput, U); - return InsnInput; - } +/// Return the DFAInput for an instruction class input vector. +/// This function is used in both DFAPacketizer.cpp and in +/// DFAPacketizerEmitter.cpp. +static DFAInput getDFAInsnInput(const std::vector &InsnClass) { + DFAInput InsnInput = 0; + assert((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); + for (auto U : InsnClass) + InsnInput = addDFAFuncUnits(InsnInput, U); + return InsnInput; } + // -------------------------------------------------------------------- DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], const unsigned *SET): - InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), - DFAStateEntryTable(SET) { + InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { // Make sure DFA types are large enough for the number of terms & resources. static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)), @@ -75,7 +85,6 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); } - // Read the DFA transition table and update CachedTable. // // Format of the transition tables: @@ -97,7 +106,6 @@ void DFAPacketizer::ReadTable(unsigned int state) { DFAStateInputTable[i][1]; } - // Return the DFAInput for an instruction class. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. @@ -112,16 +120,14 @@ DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { return InsnInput; } - // Return the DFAInput for an instruction class input vector. DFAInput DFAPacketizer::getInsnInput(const std::vector &InsnClass) { return getDFAInsnInput(InsnClass); } - // Check if the resources occupied by a MCInstrDesc are available in the // current state. -bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { +bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); @@ -129,10 +135,9 @@ bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { return CachedTable.count(StateTrans) != 0; } - // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. -void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { +void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); @@ -141,24 +146,22 @@ void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { CurrentState = CachedTable[StateTrans]; } - // Check if the resources occupied by a machine instruction are available // in the current state. -bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) { - const llvm::MCInstrDesc &MID = MI.getDesc(); +bool DFAPacketizer::canReserveResources(MachineInstr &MI) { + const MCInstrDesc &MID = MI.getDesc(); return canReserveResources(&MID); } - // Reserve the resources occupied by a machine instruction and change the // current state to reflect that change. -void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) { - const llvm::MCInstrDesc &MID = MI.getDesc(); +void DFAPacketizer::reserveResources(MachineInstr &MI) { + const MCInstrDesc &MID = MI.getDesc(); reserveResources(&MID); } - namespace llvm { + // This class extends ScheduleDAGInstrs and overrides the schedule method // to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { @@ -166,9 +169,11 @@ class DefaultVLIWScheduler : public ScheduleDAGInstrs { AliasAnalysis *AA; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; + public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA); + // Actual scheduling work. void schedule() override; @@ -176,11 +181,12 @@ class DefaultVLIWScheduler : public ScheduleDAGInstrs { void addMutation(std::unique_ptr Mutation) { Mutations.push_back(std::move(Mutation)); } + protected: void postprocessDAG(); }; -} +} // end namespace llvm DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, @@ -189,21 +195,18 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, CanHandleTerminators = true; } - /// Apply each ScheduleDAGMutation step in order. void DefaultVLIWScheduler::postprocessDAG() { for (auto &M : Mutations) M->apply(this); } - void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. buildSchedGraph(AA); postprocessDAG(); } - VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, MachineLoopInfo &mli, AliasAnalysis *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { @@ -211,13 +214,11 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } - VLIWPacketizerList::~VLIWPacketizerList() { delete VLIWScheduler; delete ResourceTracker; } - // End the current packet, bundle packet instructions and reset DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) { @@ -237,7 +238,6 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, DEBUG(dbgs() << "End packet\n"); } - // Bundle machine instructions into packets. void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, @@ -336,7 +336,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, VLIWScheduler->finishBlock(); } - // Add a DAG mutation object to the ordered list. void VLIWPacketizerList::addMutation( std::unique_ptr Mutation) { diff --git a/interpreter/llvm/src/lib/CodeGen/DeadMachineInstructionElim.cpp b/interpreter/llvm/src/lib/CodeGen/DeadMachineInstructionElim.cpp index 7ac2e5445435d..91d18e2bcaa69 100644 --- a/interpreter/llvm/src/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/interpreter/llvm/src/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -23,7 +23,7 @@ using namespace llvm; -#define DEBUG_TYPE "codegen-dce" +#define DEBUG_TYPE "dead-mi-elimination" STATISTIC(NumDeletes, "Number of dead instructions deleted"); @@ -54,7 +54,7 @@ namespace { char DeadMachineInstructionElim::ID = 0; char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; -INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", +INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE, "Remove dead machine instructions", false, false) bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { diff --git a/interpreter/llvm/src/lib/CodeGen/DetectDeadLanes.cpp b/interpreter/llvm/src/lib/CodeGen/DetectDeadLanes.cpp index 6f4ea1912cf4e..ab9a0592e0177 100644 --- a/interpreter/llvm/src/lib/CodeGen/DetectDeadLanes.cpp +++ b/interpreter/llvm/src/lib/CodeGen/DetectDeadLanes.cpp @@ -132,8 +132,7 @@ class DetectDeadLanes : public MachineFunctionPass { char DetectDeadLanes::ID = 0; char &llvm::DetectDeadLanesID = DetectDeadLanes::ID; -INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes", - false, false) +INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false) /// Returns true if \p MI will get lowered to a series of COPY instructions. /// We call this a COPY-like instruction. diff --git a/interpreter/llvm/src/lib/CodeGen/DwarfEHPrepare.cpp b/interpreter/llvm/src/lib/CodeGen/DwarfEHPrepare.cpp index 38af19a044485..2f833260bca20 100644 --- a/interpreter/llvm/src/lib/CodeGen/DwarfEHPrepare.cpp +++ b/interpreter/llvm/src/lib/CodeGen/DwarfEHPrepare.cpp @@ -12,12 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -34,8 +35,6 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { - const TargetMachine *TM; - // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; @@ -52,15 +51,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in - // practice. DwarfEHPrepare() - : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr), - TLI(nullptr) {} - - DwarfEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr), - TLI(nullptr) {} + : FunctionPass(ID), RewindFunction(nullptr), DT(nullptr), TLI(nullptr) { + } bool runOnFunction(Function &Fn) override; @@ -78,18 +71,18 @@ namespace { } // end anonymous namespace char DwarfEHPrepare::ID = 0; -INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare", - "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE, + "Prepare DWARF exceptions", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare", - "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE, + "Prepare DWARF exceptions", false, false) -FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { - return new DwarfEHPrepare(TM); -} +FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); } void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -254,9 +247,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } bool DwarfEHPrepare::runOnFunction(Function &Fn) { - assert(TM && "DWARF EH preparation requires a target machine"); + const TargetMachine &TM = + getAnalysis().getTM(); DT = &getAnalysis().getDomTree(); - TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); + TLI = TM.getSubtargetImpl(Fn)->getTargetLowering(); bool Changed = InsertUnwindResumeCalls(Fn); DT = nullptr; TLI = nullptr; diff --git a/interpreter/llvm/src/lib/CodeGen/EarlyIfConversion.cpp b/interpreter/llvm/src/lib/CodeGen/EarlyIfConversion.cpp index 7291727964530..402afe75b1414 100644 --- a/interpreter/llvm/src/lib/CodeGen/EarlyIfConversion.cpp +++ b/interpreter/llvm/src/lib/CodeGen/EarlyIfConversion.cpp @@ -616,13 +616,13 @@ class EarlyIfConverter : public MachineFunctionPass { char EarlyIfConverter::ID = 0; char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; -INITIALIZE_PASS_BEGIN(EarlyIfConverter, - "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, + "Early If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(EarlyIfConverter, - "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, + "Early If Converter", false, false) void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); diff --git a/interpreter/llvm/src/lib/CodeGen/ExpandISelPseudos.cpp b/interpreter/llvm/src/lib/CodeGen/ExpandISelPseudos.cpp index 0ec79c2e69f94..324ea171293db 100644 --- a/interpreter/llvm/src/lib/CodeGen/ExpandISelPseudos.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ExpandISelPseudos.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -41,7 +41,7 @@ namespace { char ExpandISelPseudos::ID = 0; char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; -INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", +INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE, "Expand ISel Pseudo-instructions", false, false) bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { diff --git a/interpreter/llvm/src/lib/CodeGen/ExpandPostRAPseudos.cpp b/interpreter/llvm/src/lib/CodeGen/ExpandPostRAPseudos.cpp index e860906043dda..4ce86f27a7dd6 100644 --- a/interpreter/llvm/src/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -58,7 +58,7 @@ struct ExpandPostRA : public MachineFunctionPass { char ExpandPostRA::ID = 0; char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; -INITIALIZE_PASS(ExpandPostRA, "postrapseudos", +INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE, "Post-RA pseudo instruction expansion pass", false, false) /// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered diff --git a/interpreter/llvm/src/lib/CodeGen/ExpandReductions.cpp b/interpreter/llvm/src/lib/CodeGen/ExpandReductions.cpp index a40ea28056ddd..70dca3b74b2f3 100644 --- a/interpreter/llvm/src/lib/CodeGen/ExpandReductions.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ExpandReductions.cpp @@ -12,17 +12,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/CodeGen/FaultMaps.cpp b/interpreter/llvm/src/lib/CodeGen/FaultMaps.cpp index 43f3641289787..2924b011e0c1d 100644 --- a/interpreter/llvm/src/lib/CodeGen/FaultMaps.cpp +++ b/interpreter/llvm/src/lib/CodeGen/FaultMaps.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/FaultMaps.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/FaultMaps.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" diff --git a/interpreter/llvm/src/lib/CodeGen/FuncletLayout.cpp b/interpreter/llvm/src/lib/CodeGen/FuncletLayout.cpp index d61afad4db577..9c71b18619a1e 100644 --- a/interpreter/llvm/src/lib/CodeGen/FuncletLayout.cpp +++ b/interpreter/llvm/src/lib/CodeGen/FuncletLayout.cpp @@ -11,10 +11,10 @@ // funclets being contiguous. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" using namespace llvm; #define DEBUG_TYPE "funclet-layout" @@ -37,7 +37,7 @@ class FuncletLayout : public MachineFunctionPass { char FuncletLayout::ID = 0; char &llvm::FuncletLayoutID = FuncletLayout::ID; -INITIALIZE_PASS(FuncletLayout, "funclet-layout", +INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE, "Contiguously Lay Out Funclets", false, false) bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { diff --git a/interpreter/llvm/src/lib/CodeGen/GCMetadata.cpp b/interpreter/llvm/src/lib/CodeGen/GCMetadata.cpp index be21c7306da16..456fa799e8e1a 100644 --- a/interpreter/llvm/src/lib/CodeGen/GCMetadata.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GCMetadata.cpp @@ -11,22 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + using namespace llvm; namespace { class Printer : public FunctionPass { static char ID; + raw_ostream &OS; public: @@ -38,7 +43,8 @@ class Printer : public FunctionPass { bool runOnFunction(Function &F) override; bool doFinalization(Module &M) override; }; -} + +} // end anonymous namespace INITIALIZE_PASS(GCModuleInfo, "collector-metadata", "Create Garbage Collector Module Metadata", false, false) @@ -48,7 +54,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata", GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) : F(F), S(S), FrameSize(~0LL) {} -GCFunctionInfo::~GCFunctionInfo() {} +GCFunctionInfo::~GCFunctionInfo() = default; // ----------------------------------------------------------------------------- @@ -67,7 +73,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { return *I->second; GCStrategy *S = getGCStrategy(F.getGC()); - Functions.push_back(make_unique(F, *S)); + Functions.push_back(llvm::make_unique(F, *S)); GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; diff --git a/interpreter/llvm/src/lib/CodeGen/GCMetadataPrinter.cpp b/interpreter/llvm/src/lib/CodeGen/GCMetadataPrinter.cpp index d183c7f2980b0..bc7beb6f6c2d3 100644 --- a/interpreter/llvm/src/lib/CodeGen/GCMetadataPrinter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GCMetadataPrinter.cpp @@ -1,4 +1,4 @@ -//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===// // // The LLVM Compiler Infrastructure // @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCMetadataPrinter.h" + using namespace llvm; LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry) -GCMetadataPrinter::GCMetadataPrinter() {} +GCMetadataPrinter::GCMetadataPrinter() = default; -GCMetadataPrinter::~GCMetadataPrinter() {} +GCMetadataPrinter::~GCMetadataPrinter() = default; diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/CMakeLists.txt b/interpreter/llvm/src/lib/CodeGen/GlobalISel/CMakeLists.txt index 03a8c4f5f909a..eba7ea8132e3b 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -8,6 +8,7 @@ set(GLOBAL_ISEL_FILES LegalizerHelper.cpp Legalizer.cpp LegalizerInfo.cpp + Localizer.cpp RegBankSelect.cpp RegisterBank.cpp RegisterBankInfo.cpp @@ -24,11 +25,11 @@ endif() # In LLVMBuild.txt files, it is not possible to mark a dependency to a # library as optional. So instead, generate an empty library if we did -# not ask for it. +# not ask for it. add_llvm_library(LLVMGlobalISel ${GLOBAL_ISEL_BUILD_FILES} GlobalISel.cpp - + DEPENDS intrinsics_gen ) diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/GlobalISel.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/GlobalISel.cpp index fcd2722f1c2fa..29d1209bb02a5 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -26,6 +26,7 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) { void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); initializeLegalizerPass(Registry); + initializeLocalizerPass(Registry); initializeRegBankSelectPass(Registry); initializeInstructionSelectPass(Registry); } diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/IRTranslator.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/IRTranslator.cpp index 811858f136eb3..ed1bd995e60be 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -11,34 +11,69 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/IRTranslator.h" - +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include #define DEBUG_TYPE "irtranslator" using namespace llvm; char IRTranslator::ID = 0; + INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) @@ -62,7 +97,7 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) { +IRTranslator::IRTranslator() : MachineFunctionPass(ID) { initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); } @@ -71,7 +106,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } - unsigned IRTranslator::getOrCreateVReg(const Value &Val) { unsigned &ValReg = ValToVReg[&Val]; @@ -311,7 +345,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), Flags, DL->getTypeStoreSize(LI.getType()), getMemOpAlignment(LI), AAMDNodes(), nullptr, - LI.getSynchScope(), LI.getOrdering())); + LI.getSyncScopeID(), LI.getOrdering())); return true; } @@ -329,7 +363,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand( MachinePointerInfo(SI.getPointerOperand()), Flags, DL->getTypeStoreSize(SI.getValueOperand()->getType()), - getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSynchScope(), + getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), SI.getOrdering())); return true; } @@ -340,6 +374,15 @@ bool IRTranslator::translateExtractValue(const User &U, Type *Int32Ty = Type::getInt32Ty(U.getContext()); SmallVector Indices; + // If Src is a single element ConstantStruct, translate extractvalue + // to that element to avoid inserting a cast instruction. + if (auto CS = dyn_cast(Src)) + if (CS->getNumOperands() == 1) { + unsigned Res = getOrCreateVReg(*CS->getOperand(0)); + ValToVReg[&U] = Res; + return true; + } + // getIndexedOffsetInType is designed for GEPs, so the first index is the // usual array element rather than looking into the actual aggregate. Indices.push_back(ConstantInt::get(Int32Ty, 0)); @@ -573,7 +616,7 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MIB.addUse(Zero); } - MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width); + MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width}); return true; } @@ -677,6 +720,33 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(0))) .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; + case Intrinsic::exp: + MIRBuilder.buildInstr(TargetOpcode::G_FEXP) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::exp2: + MIRBuilder.buildInstr(TargetOpcode::G_FEXP2) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::log: + MIRBuilder.buildInstr(TargetOpcode::G_FLOG) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::log2: + MIRBuilder.buildInstr(TargetOpcode::G_FLOG2) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::fma: + MIRBuilder.buildInstr(TargetOpcode::G_FMA) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))) + .addUse(getOrCreateVReg(*CI.getArgOperand(2))); + return true; case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: @@ -775,6 +845,21 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { return false; MIB.addUse(getOrCreateVReg(*Arg)); } + + // Add a MachineMemOperand if it is a target mem intrinsic. + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + TargetLowering::IntrinsicInfo Info; + // TODO: Add a GlobalISel version of getTgtMemIntrinsic. + if (TLI.getTgtMemIntrinsic(Info, CI, ID)) { + MachineMemOperand::Flags Flags = + Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + Flags |= + Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore; + uint64_t Size = Info.memVT.getSizeInBits() >> 3; + MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), + Flags, Size, Info.align)); + } + return true; } @@ -803,7 +888,6 @@ bool IRTranslator::translateInvoke(const User &U, if (!isa(EHPadBB->front())) return false; - // Emit the actual call, bracketed by EH_LABELs so that the MF knows about // the region covered by the try. MCSymbol *BeginSymbol = Context.createTempSymbol(); @@ -1108,6 +1192,23 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { default: return false; } + } else if (auto CS = dyn_cast(&C)) { + // Return the element if it is a single element ConstantStruct. + if (CS->getNumOperands() == 1) { + unsigned EltReg = getOrCreateVReg(*CS->getOperand(0)); + EntryBuilder.buildCast(Reg, EltReg); + return true; + } + SmallVector Ops; + SmallVector Indices; + uint64_t Offset = 0; + for (unsigned i = 0; i < CS->getNumOperands(); ++i) { + unsigned OpReg = getOrCreateVReg(*CS->getOperand(i)); + Ops.push_back(OpReg); + Indices.push_back(Offset); + Offset += MRI->getType(OpReg).getSizeInBits(); + } + EntryBuilder.buildSequence(Reg, Ops, Indices); } else if (auto CV = dyn_cast(&C)) { if (CV->getNumOperands() == 1) return translate(*CV->getOperand(0), Reg); @@ -1129,6 +1230,11 @@ void IRTranslator::finalizeFunction() { ValToVReg.clear(); FrameIndices.clear(); MachinePreds.clear(); + // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it + // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid + // destroying it twice (in ~IRTranslator() and ~LLVMContext()) + EntryBuilder = MachineIRBuilder(); + CurBuilder = MachineIRBuilder(); } bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { @@ -1142,7 +1248,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); TPC = &getAnalysis(); - ORE = make_unique(&F); + ORE = llvm::make_unique(&F); assert(PendingPHIs.empty() && "stale PHIs"); diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/InstructionSelector.cpp index c67da8629a3ba..bf427225d6a96 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===// // // The LLVM Compiler Infrastructure // @@ -11,19 +11,41 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/IR/Constants.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include #define DEBUG_TYPE "instructionselector" using namespace llvm; -InstructionSelector::InstructionSelector() {} +InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) + : Renderers(MaxRenderers, nullptr), MIs() {} + +InstructionSelector::InstructionSelector() = default; + +bool InstructionSelector::constrainOperandRegToRegClass( + MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC, + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + return + constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC); +} bool InstructionSelector::constrainSelectedInstRegOperands( MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, @@ -72,8 +94,7 @@ bool InstructionSelector::constrainSelectedInstRegOperands( bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { - - if (MO.getReg()) + if (MO.isReg() && MO.getReg()) if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI)) return *VRegVal == Value; return false; diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/Legalizer.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/Legalizer.cpp index aec379197dfb7..b699156c568b4 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -15,7 +15,6 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -51,72 +50,9 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { void Legalizer::init(MachineFunction &MF) { } -bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { - bool Changed = false; - if (MI.getOpcode() != TargetOpcode::G_EXTRACT) - return Changed; - - unsigned NumDefs = (MI.getNumOperands() - 1) / 2; - unsigned SrcReg = MI.getOperand(NumDefs).getReg(); - MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg); - if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE) - return Changed; - - unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2; - bool AllDefsReplaced = true; - - // Try to match each register extracted with a corresponding insertion formed - // by the G_SEQUENCE. - for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) { - MachineOperand &ExtractMO = MI.getOperand(Idx); - assert(ExtractMO.isReg() && ExtractMO.isDef() && - "unexpected extract operand"); - - unsigned ExtractReg = ExtractMO.getReg(); - unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm(); - - while (SeqIdx < NumSeqSrcs && - SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos) - ++SeqIdx; - - if (SeqIdx == NumSeqSrcs) { - AllDefsReplaced = false; - continue; - } - - unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg(); - if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos || - MRI.getType(OrigReg) != MRI.getType(ExtractReg)) { - AllDefsReplaced = false; - continue; - } - - assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) && - "unexpected physical register in G_SEQUENCE"); - - // Finally we can replace the uses. - MRI.replaceRegWith(ExtractReg, OrigReg); - } - - if (AllDefsReplaced) { - // If SeqI was the next instruction in the BB and we removed it, we'd break - // the outer iteration. - assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI && - "G_SEQUENCE does not dominate G_EXTRACT"); - - MI.eraseFromParent(); - - if (MRI.use_empty(SrcReg)) - SeqI.eraseFromParent(); - Changed = true; - } - - return Changed; -} - bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + MachineIRBuilder &MIRBuilder) { if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) return false; @@ -126,18 +62,62 @@ bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES) return false; - if (MergeI.getNumOperands() - 1 != NumDefs) - return false; + const unsigned NumMergeRegs = MergeI.getNumOperands() - 1; - // FIXME: is a COPY appropriate if the types mismatch? We know both registers - // are allocatable by now. - if (MRI.getType(MI.getOperand(0).getReg()) != - MRI.getType(MergeI.getOperand(1).getReg())) - return false; + if (NumMergeRegs < NumDefs) { + if (NumDefs % NumMergeRegs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to UNMERGEs, for example + // %1 = G_MERGE_VALUES %4, %5 + // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 + // to + // %9, %10 = G_UNMERGE_VALUES %4 + // %11, %12 = G_UNMERGE_VALUES %5 - for (unsigned Idx = 0; Idx < NumDefs; ++Idx) - MRI.replaceRegWith(MI.getOperand(Idx).getReg(), - MergeI.getOperand(Idx + 1).getReg()); + const unsigned NewNumDefs = NumDefs / NumMergeRegs; + for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { + SmallVector DstRegs; + for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; + ++j, ++DefIdx) + DstRegs.push_back(MI.getOperand(DefIdx).getReg()); + + MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg()); + } + + } else if (NumMergeRegs > NumDefs) { + if (NumMergeRegs % NumDefs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to MERGEs + // %6 = G_MERGE_VALUES %17, %18, %19, %20 + // %7, %8 = G_UNMERGE_VALUES %6 + // to + // %7 = G_MERGE_VALUES %17, %18 + // %8 = G_MERGE_VALUES %19, %20 + + const unsigned NumRegs = NumMergeRegs / NumDefs; + for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { + SmallVector Regs; + for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx) + Regs.push_back(MergeI.getOperand(Idx).getReg()); + + MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); + } + + } else { + // FIXME: is a COPY appropriate if the types mismatch? We know both + // registers are allocatable by now. + if (MRI.getType(MI.getOperand(0).getReg()) != + MRI.getType(MergeI.getOperand(1).getReg())) + return false; + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) + MRI.replaceRegWith(MI.getOperand(Idx).getReg(), + MergeI.getOperand(Idx + 1).getReg()); + } MI.eraseFromParent(); if (MRI.use_empty(MergeI.getOperand(0).getReg())) @@ -227,13 +207,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Get the next Instruction before we try to legalize, because there's a // good chance MI will be deleted. NextMI = std::next(MI); - - // combineExtracts erases MI. - if (combineExtracts(*MI, MRI, TII)) { - Changed = true; - continue; - } - Changed |= combineMerges(*MI, MRI, TII); + Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder); } } diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ef5818dabe232..5258370e6680b 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -82,6 +82,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_UDIV: assert(Size == 32 && "Unsupported size"); return RTLIB::UDIV_I32; + case TargetOpcode::G_SREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SREM_I32; + case TargetOpcode::G_UREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UREM_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -93,52 +99,72 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } -static LegalizerHelper::LegalizeResult -simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, - Type *OpType) { +LegalizerHelper::LegalizeResult +llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef Args) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - auto Libcall = getRTLibDesc(MI.getOpcode(), Size); const char *Name = TLI.getLibcallName(Libcall); + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), - {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); - MI.eraseFromParent(); + if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), + MachineOperand::CreateES(Name), Result, Args)) + return LegalizerHelper::UnableToLegalize; + return LegalizerHelper::Legalized; } +static LegalizerHelper::LegalizeResult +simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType) { + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned Size = Ty.getSizeInBits(); + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; case TargetOpcode::G_SDIV: - case TargetOpcode::G_UDIV: { - Type *Ty = Type::getInt32Ty(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, Ty); + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + Type *HLTy = Type::getInt32Ty(Ctx); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { - Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, Ty); + Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } } + + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. - if (TypeIdx != 0) + if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) return UnableToLegalize; MIRBuilder.setInstr(MI); @@ -146,6 +172,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_IMPLICIT_DEF: { + int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / + NarrowTy.getSizeInBits(); + + SmallVector DstRegs; + for (int i = 0; i < NumParts; ++i) { + unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUndef(Dst); + DstRegs.push_back(Dst); + } + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: { // Expand in terms of carry-setting/consuming G_ADDE instructions. int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / @@ -173,6 +213,58 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_EXTRACT: { + if (TypeIdx != 1) + return UnableToLegalize; + + int64_t NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = + MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize; + + SmallVector SrcRegs, DstRegs; + SmallVector Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + unsigned OpReg = MI.getOperand(0).getReg(); + int64_t OpStart = MI.getOperand(2).getImm(); + int64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned SrcStart = i * NarrowSize; + + if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { + // No part of the extract uses this subregister, ignore it. + continue; + } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is extracted, forward the value. + DstRegs.push_back(SrcRegs[i]); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset, SegSize; + if (OpStart < SrcStart) { + ExtractOffset = 0; + SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); + } else { + ExtractOffset = OpStart - SrcStart; + SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); + } + + unsigned SegReg = SrcRegs[i]; + if (ExtractOffset != 0 || SegSize != NarrowSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); + } + + DstRegs.push_back(SegReg); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_INSERT: { if (TypeIdx != 0) return UnableToLegalize; @@ -237,17 +329,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector DstRegs; for (int i = 0; i < NumParts; ++i) { unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); + unsigned SrcReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(SrcReg, MI.getOperand(1).getReg(), Offset); // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin()); @@ -263,17 +356,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector SrcRegs; extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(DstReg, MI.getOperand(1).getReg(), Offset); + unsigned DstReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); + // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin()); @@ -338,9 +433,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { } case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: { unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV || + MI.getOpcode() == TargetOpcode::G_SREM || MI.getOpcode() == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 4d45910422967..76917aa9660d4 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -1,4 +1,4 @@ -//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==// +//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===// // // The LLVM Compiler Infrastructure // @@ -18,16 +18,25 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" - #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOpcodes.h" +#include +#include +#include +#include + using namespace llvm; -LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { +LegalizerInfo::LegalizerInfo() { + DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar; + // FIXME: these two can be legalized to the fundamental load/store Jakob // proposed. Once loads & stores are supported. DefaultActions[TargetOpcode::G_ANYEXT] = Legal; @@ -42,6 +51,7 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar; + DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar; DefaultActions[TargetOpcode::G_FNEG] = Lower; } @@ -75,9 +85,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { // FIXME: the long-term plan calls for expansion in terms of load/store (if // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_SEQUENCE || - Aspect.Opcode == TargetOpcode::G_EXTRACT || - Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || + if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); @@ -173,21 +181,21 @@ Optional LegalizerInfo::findLegalType(const InstrAspect &Aspect, case Custom: return Aspect.Type; case NarrowScalar: { - return findLegalType(Aspect, - [](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); + return findLegalizableSize( + Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); } case WidenScalar: { - return findLegalType(Aspect, [](LLT Ty) -> LLT { + return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT { return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); }); } case FewerElements: { - return findLegalType(Aspect, - [](LLT Ty) -> LLT { return Ty.halfElements(); }); + return findLegalizableSize( + Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); }); } case MoreElements: { - return findLegalType(Aspect, - [](LLT Ty) -> LLT { return Ty.doubleElements(); }); + return findLegalizableSize( + Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); } } } diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/Localizer.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/Localizer.cpp new file mode 100644 index 0000000000000..c5d0999fe4388 --- /dev/null +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/Localizer.cpp @@ -0,0 +1,123 @@ +//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the Localizer class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "localizer" + +using namespace llvm; + +char Localizer::ID = 0; +INITIALIZE_PASS(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", false, + false) + +Localizer::Localizer() : MachineFunctionPass(ID) { + initializeLocalizerPass(*PassRegistry::getPassRegistry()); +} + +void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } + +bool Localizer::shouldLocalize(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + // Constants-like instructions should be close to their users. + // We don't want long live-ranges for them. + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + return true; + } +} + +bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, + MachineBasicBlock *&InsertMBB) { + MachineInstr &MIUse = *MOUse.getParent(); + InsertMBB = MIUse.getParent(); + if (MIUse.isPHI()) + InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB(); + return InsertMBB == Def.getParent(); +} + +bool Localizer::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); + + init(MF); + + bool Changed = false; + // Keep track of the instructions we localized. + // We won't need to process them if we see them later in the CFG. + SmallPtrSet LocalizedInstrs; + DenseMap, unsigned> MBBWithLocalDef; + // TODO: Do bottom up traversal. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) + continue; + DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + // Don't try to be smart for the insertion point. + // There is no guarantee that the first seen use is the first + // use in the block. + InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = + MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + } + return Changed; +} diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 54ef7e5c5a1b1..4636806c3f081 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -166,19 +166,24 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, .addGlobalAddress(GV); } -MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, +MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1) { assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && "invalid operand type"); assert(MRI->getType(Res) == MRI->getType(Op0) && MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - return buildInstr(TargetOpcode::G_ADD) + return buildInstr(Opcode) .addDef(Res) .addUse(Op0) .addUse(Op1); } +MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1); +} + MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, unsigned Op1) { assert(MRI->getType(Res).isPointer() && @@ -191,6 +196,24 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, .addUse(Op1); } +Optional +MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, uint64_t Value) { + assert(Res == 0 && "Res is a result argument"); + assert(ValueTy.isScalar() && "invalid offset type"); + + if (Value == 0) { + Res = Op0; + return None; + } + + Res = MRI->createGenericVirtualRegister(MRI->getType(Op0)); + unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy); + + buildConstant(TmpReg, Value); + return buildGEP(Res, Op0, TmpReg); +} + MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits) { assert(MRI->getType(Res).isPointer() && @@ -204,41 +227,22 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_SUB) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_MUL) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1); +} - return buildInstr(TargetOpcode::G_AND) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); +MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { @@ -246,10 +250,13 @@ MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { } MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) { + assert(MRI->getType(Tgt).isPointer() && "invalid branch destination"); return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt); } MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) { + assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() || + MRI->getType(Res) == MRI->getType(Op)); return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op); } @@ -346,27 +353,36 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) { MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, unsigned Op) { + assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); + assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); + unsigned Opcode = TargetOpcode::COPY; if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_SEXT; else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; + else + assert(MRI->getType(Res) == MRI->getType(Op)); return buildInstr(Opcode).addDef(Res).addUse(Op); } MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res, unsigned Op) { + assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); + assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); + unsigned Opcode = TargetOpcode::COPY; if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_ZEXT; else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; + else + assert(MRI->getType(Res) == MRI->getType(Op)); return buildInstr(Opcode).addDef(Res).addUse(Op); } - MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) { LLT SrcTy = MRI->getType(Src); LLT DstTy = MRI->getType(Dst); @@ -407,10 +423,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, .addImm(Index); } -MachineInstrBuilder -MachineIRBuilder::buildSequence(unsigned Res, - ArrayRef Ops, - ArrayRef Indices) { +void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef Ops, + ArrayRef Indices) { #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); @@ -422,17 +436,35 @@ MachineIRBuilder::buildSequence(unsigned Res, assert(MRI->getType(Op).isValid() && "invalid operand type"); #endif - MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE); - MIB.addDef(Res); + LLT ResTy = MRI->getType(Res); + LLT OpTy = MRI->getType(Ops[0]); + unsigned OpSize = OpTy.getSizeInBits(); + bool MaybeMerge = true; for (unsigned i = 0; i < Ops.size(); ++i) { - MIB.addUse(Ops[i]); - MIB.addImm(Indices[i]); + if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { + MaybeMerge = false; + break; + } + } + + if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) { + buildMerge(Res, Ops); + return; + } + + unsigned ResIn = MRI->createGenericVirtualRegister(ResTy); + buildUndef(ResIn); + + for (unsigned i = 0; i < Ops.size(); ++i) { + unsigned ResOut = + i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy); + buildInsert(ResOut, ResIn, Ops[i], Indices[i]); + ResIn = ResOut; } - return MIB; } MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) { - return buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Res); + return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res); } MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, @@ -448,6 +480,9 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, "input operands do not cover output register"); #endif + if (Ops.size() == 1) + return buildCast(Res, Ops[0]); + MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES); MIB.addDef(Res); for (unsigned i = 0; i < Ops.size(); ++i) @@ -477,8 +512,11 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef Res, MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, unsigned Op, unsigned Index) { + assert(Index + MRI->getType(Op).getSizeInBits() <= + MRI->getType(Res).getSizeInBits() && + "insertion past the end of a register"); + if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) { - assert(Index == 0 && "insertion past the end of a register"); return buildCast(Res, Op); } diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 7248f50945d04..677941dbbf6da 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==// +//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -12,18 +12,39 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Attributes.h" +#include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include #define DEBUG_TYPE "regbankselect" @@ -37,6 +58,7 @@ static cl::opt RegBankSelectMode( "Use the Greedy mode (best local mapping)"))); char RegBankSelect::ID = 0; + INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false); @@ -48,8 +70,7 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, false) RegBankSelect::RegBankSelect(Mode RunningMode) - : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr), - MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) { + : MachineFunctionPass(ID), OptMode(RunningMode) { initializeRegBankSelectPass(*PassRegistry::getPassRegistry()); if (RegBankSelectMode.getNumOccurrences() != 0) { OptMode = RegBankSelectMode; @@ -72,7 +93,7 @@ void RegBankSelect::init(MachineFunction &MF) { MBPI = nullptr; } MIRBuilder.setMF(MF); - MORE = make_unique(MF, MBFI); + MORE = llvm::make_unique(MF, MBFI); } void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { @@ -133,9 +154,11 @@ bool RegBankSelect::repairReg( TargetRegisterInfo::isPhysicalRegister(Dst)) && "We are about to create several defs for Dst"); - // Build the instruction used to repair, then clone it at the right places. - MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src); - MI->removeFromParent(); + // Build the instruction used to repair, then clone it at the right + // places. Avoiding buildCopy bypasses the check that Src and Dst have the + // same types because the type is a placeholder when this function is called. + MachineInstr *MI = + MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst) << '\n'); // TODO: @@ -202,15 +225,11 @@ uint64_t RegBankSelect::getRepairCost( RBI->copyCost(*DesiredRegBrank, *CurRegBank, RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI)); // TODO: use a dedicated constant for ImpossibleCost. - if (Cost != UINT_MAX) + if (Cost != std::numeric_limits::max()) return Cost; - assert(!TPC->isGlobalISelAbortEnabled() && - "Legalization not available yet"); // Return the legalization cost of that repairing. } - assert(!TPC->isGlobalISelAbortEnabled() && - "Complex repairing not implemented yet"); - return UINT_MAX; + return std::numeric_limits::max(); } const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( @@ -356,7 +375,7 @@ void RegBankSelect::tryAvoidingSplit( // the repairing cost because of the PHIs already proceeded // as already stated. // Though the code will be correct. - assert(0 && "Repairing cost may not be accurate"); + assert(false && "Repairing cost may not be accurate"); } else { // We need to do non-local repairing. Basically, patch all // the uses (i.e., phis) that we already proceeded. @@ -452,6 +471,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( // Sums up the repairing cost of MO at each insertion point. uint64_t RepairCost = getRepairCost(MO, ValMapping); + + // This is an impossible to repair cost. + if (RepairCost == std::numeric_limits::max()) + continue; + // Bias used for splitting: 5%. const uint64_t PercentageForBias = 5; uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100; @@ -534,9 +558,11 @@ bool RegBankSelect::applyMapping( llvm_unreachable("Other kind should not happen"); } } + // Second, rewrite the instruction. DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); RBI->applyMapping(OpdMapper); + return true; } @@ -637,11 +663,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement( MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P, RepairingPlacement::RepairingKind Kind) // Default is, we are going to insert code to repair OpIdx. - : Kind(Kind), - OpIdx(OpIdx), - CanMaterialize(Kind != RepairingKind::Impossible), - HasSplit(false), - P(P) { + : Kind(Kind), OpIdx(OpIdx), + CanMaterialize(Kind != RepairingKind::Impossible), P(P) { const MachineOperand &MO = MI.getOperand(OpIdx); assert(MO.isReg() && "Trying to repair a non-reg operand"); @@ -846,7 +869,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const { } RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq) - : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {} + : LocalFreq(LocalFreq.getFrequency()) {} bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) { // Check if this overflows. @@ -919,7 +942,6 @@ bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const { OtherLocalAdjust = Cost.LocalCost - LocalCost; else ThisLocalAdjust = LocalCost - Cost.LocalCost; - } else { ThisLocalAdjust = LocalCost; OtherLocalAdjust = Cost.LocalCost; diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalISel/Utils.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalISel/Utils.cpp index 254bdf10d804f..5ecaf5c563f82 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalISel/Utils.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalISel/Utils.cpp @@ -26,6 +26,23 @@ using namespace llvm; +unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, + MachineInstr &InsertPt, unsigned Reg, + const TargetRegisterClass &RegClass) { + if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) { + unsigned NewReg = MRI.createVirtualRegister(&RegClass); + BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + return NewReg; + } + + return Reg; +} + + unsigned llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, @@ -36,16 +53,7 @@ unsigned llvm::constrainOperandRegClass( "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); - - if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) { - unsigned NewReg = MRI.createVirtualRegister(RegClass); - BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), - TII.get(TargetOpcode::COPY), NewReg) - .addReg(Reg); - return NewReg; - } - - return Reg; + return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); } bool llvm::isTriviallyDead(const MachineInstr &MI, diff --git a/interpreter/llvm/src/lib/CodeGen/GlobalMerge.cpp b/interpreter/llvm/src/lib/CodeGen/GlobalMerge.cpp index 1ea5349399486..c6ca49ce24d73 100644 --- a/interpreter/llvm/src/lib/CodeGen/GlobalMerge.cpp +++ b/interpreter/llvm/src/lib/CodeGen/GlobalMerge.cpp @@ -192,10 +192,7 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", - false, false) -INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", - false, false) +INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false) bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { @@ -556,7 +553,12 @@ bool GlobalMerge::doInitialization(Module &M) { // Grab all non-const globals. for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only - if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection()) + if (GV.isDeclaration() || GV.isThreadLocal() || + GV.hasSection() || GV.hasImplicitSection()) + continue; + + // It's not safe to merge globals that may be preempted + if (TM && !TM->shouldAssumeDSOLocal(M, &GV)) continue; if (!(MergeExternalGlobals && GV.hasExternalLinkage()) && diff --git a/interpreter/llvm/src/lib/CodeGen/IfConversion.cpp b/interpreter/llvm/src/lib/CodeGen/IfConversion.cpp index 628d599a3cc7f..ff8405366173e 100644 --- a/interpreter/llvm/src/lib/CodeGen/IfConversion.cpp +++ b/interpreter/llvm/src/lib/CodeGen/IfConversion.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" @@ -25,6 +24,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -39,7 +39,7 @@ using namespace llvm; -#define DEBUG_TYPE "ifcvt" +#define DEBUG_TYPE "if-converter" // Hidden options for help debugging. static cl::opt IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); @@ -316,9 +316,9 @@ namespace { char &llvm::IfConverterID = IfConverter::ID; -INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) @@ -1474,8 +1474,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { DontKill.addLiveIns(NextMBB); } + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); @@ -1484,11 +1487,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(&CvtMBB, true); } else { + // Predicate the instructions in the true block. RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Merge converted block into entry block. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI); } @@ -1588,31 +1591,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); } - // To be able to insert code freely at the end of BBI we sometimes remove - // the branch from BBI to NextMBB temporarily. Remember if this happened. - bool RemovedBranchToNextMBB = false; + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); - // Keep the CFG updated. + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); - // Remove the branch from the entry of the triangle to NextBB to be able to - // do the merge below. Keep the CFG updated, but remember we removed the - // branch since we do want to execute NextMBB, either by introducing a - // branch to it again, or merging it into the entry block. - // How it's handled is decided further down. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); - BBI.BB->removeSuccessor(&NextMBB, true); - RemovedBranchToNextMBB = true; - // Now merge the entry of the triangle with the true block. MergeBlocks(BBI, *CvtBBI, false); } @@ -1651,19 +1646,12 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. if (!HasEarlyExit && - // We might have removed BBI from NextMBB's predecessor list above but - // we want it to be there, so consider that too. - (NextMBB.pred_size() == (RemovedBranchToNextMBB ? 0 : 1)) && - !NextBBI->HasFallThrough && + NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough && !NextMBB.hasAddressTaken()) { - // We will merge NextBBI into BBI, and thus remove the current - // fallthrough from BBI into CvtBBI. - BBI.BB->removeSuccessor(&CvtMBB, true); MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { InsertUncondBranch(*BBI.BB, NextMBB, TII); - BBI.BB->addSuccessor(&NextMBB); BBI.HasFallThrough = false; } // Mixed predicated and unpredicated code. This cannot be iteratively @@ -1671,6 +1659,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { IterIfcvt = false; } + RemoveExtraEdges(BBI); + // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; diff --git a/interpreter/llvm/src/lib/CodeGen/ImplicitNullChecks.cpp b/interpreter/llvm/src/lib/CodeGen/ImplicitNullChecks.cpp index 920c2a372a9b8..e308f49ec4e85 100644 --- a/interpreter/llvm/src/lib/CodeGen/ImplicitNullChecks.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ImplicitNullChecks.cpp @@ -31,21 +31,21 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/FaultMaps.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -359,30 +359,15 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, Offset < PageSize)) return SR_Unsuitable; - // Finally, we need to make sure that the access instruction actually is - // accessing from PointerReg, and there isn't some re-definition of PointerReg - // between the compare and the memory access. - // If PointerReg has been redefined before then there is no sense to continue - // lookup due to this condition will fail for any further instruction. - SuitabilityResult Suitable = SR_Suitable; - for (auto *PrevMI : PrevInsts) - for (auto &PrevMO : PrevMI->operands()) { - if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() && - TRI->regsOverlap(PrevMO.getReg(), PointerReg)) - return SR_Impossible; - - // Check whether the current memory access aliases with previous one. - // If we already found that it aliases then no need to continue. - // But we continue base pointer check as it can result in SR_Impossible. - if (Suitable == SR_Suitable) { - AliasResult AR = areMemoryOpsAliased(MI, PrevMI); - if (AR == AR_WillAliasEverything) - return SR_Impossible; - if (AR == AR_MayAlias) - Suitable = SR_Unsuitable; - } - } - return Suitable; + // Finally, check whether the current memory access aliases with previous one. + for (auto *PrevMI : PrevInsts) { + AliasResult AR = areMemoryOpsAliased(MI, PrevMI); + if (AR == AR_WillAliasEverything) + return SR_Impossible; + if (AR == AR_MayAlias) + return SR_Unsuitable; + } + return SR_Suitable; } bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, @@ -569,6 +554,12 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( return true; } + // If MI re-defines the PointerReg then we cannot move further. + if (any_of(MI.operands(), [&](MachineOperand &MO) { + return MO.isReg() && MO.getReg() && MO.isDef() && + TRI->regsOverlap(MO.getReg(), PointerReg); + })) + return false; InstsSeenSoFar.push_back(&MI); } @@ -607,8 +598,20 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr( .addMBB(HandlerMBB) .addImm(MI->getOpcode()); - for (auto &MO : MI->uses()) - MIB.add(MO); + for (auto &MO : MI->uses()) { + if (MO.isReg()) { + MachineOperand NewMO = MO; + if (MO.isUse()) { + NewMO.setIsKill(false); + } else { + assert(MO.isDef() && "Expected def or use"); + NewMO.setIsDead(false); + } + MIB.add(NewMO); + } else { + MIB.add(MO); + } + } MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -674,8 +677,8 @@ void ImplicitNullChecks::rewriteNullChecks( char ImplicitNullChecks::ID = 0; char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID; -INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks", +INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks", +INITIALIZE_PASS_END(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) diff --git a/interpreter/llvm/src/lib/CodeGen/InlineSpiller.cpp b/interpreter/llvm/src/lib/CodeGen/InlineSpiller.cpp index b7ab404070b1a..eda4f74c78749 100644 --- a/interpreter/llvm/src/lib/CodeGen/InlineSpiller.cpp +++ b/interpreter/llvm/src/lib/CodeGen/InlineSpiller.cpp @@ -643,8 +643,11 @@ void InlineSpiller::reMaterializeAll() { Edit->eraseVirtReg(Reg); continue; } - assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) && - "Reg with empty interval has reference"); + + assert(LIS.hasInterval(Reg) && + (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) && + "Empty and not used live-range?!"); + RegsToSpill[ResultPos++] = Reg; } RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); @@ -857,21 +860,46 @@ void InlineSpiller::insertReload(unsigned NewVReg, ++NumReloads; } +/// Check if \p Def fully defines a VReg with an undefined value. +/// If that's the case, that means the value of VReg is actually +/// not relevant. +static bool isFullUndefDef(const MachineInstr &Def) { + if (!Def.isImplicitDef()) + return false; + assert(Def.getNumOperands() == 1 && + "Implicit def with more than one definition"); + // We can say that the VReg defined by Def is undef, only if it is + // fully defined by Def. Otherwise, some of the lanes may not be + // undef and the value of the VReg matters. + return !Def.getOperand(0).getSubReg(); +} + /// insertSpill - Insert a spill of NewVReg after MI. void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); - TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + bool IsRealSpill = true; + if (isFullUndefDef(*MI)) { + // Don't spill undef value. + // Anything works for undef, in particular keeping the memory + // uninitialized is a viable option and it saves code size and + // run time. + BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) + .addReg(NewVReg, getKillRegState(isKill)); + IsRealSpill = false; + } else + TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; - HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); + if (IsRealSpill) + HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. diff --git a/interpreter/llvm/src/lib/CodeGen/InterleavedAccessPass.cpp b/interpreter/llvm/src/lib/CodeGen/InterleavedAccessPass.cpp index ec35b3f6449e1..ee4929c91482c 100644 --- a/interpreter/llvm/src/lib/CodeGen/InterleavedAccessPass.cpp +++ b/interpreter/llvm/src/lib/CodeGen/InterleavedAccessPass.cpp @@ -45,6 +45,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" @@ -68,8 +69,7 @@ class InterleavedAccess : public FunctionPass { public: static char ID; - InterleavedAccess(const TargetMachine *TM = nullptr) - : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) { + InterleavedAccess() : FunctionPass(ID), DT(nullptr), TLI(nullptr) { initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); } @@ -84,7 +84,6 @@ class InterleavedAccess : public FunctionPass { private: DominatorTree *DT; - const TargetMachine *TM; const TargetLowering *TLI; /// The maximum supported interleave factor. @@ -108,18 +107,16 @@ class InterleavedAccess : public FunctionPass { } // end anonymous namespace. char InterleavedAccess::ID = 0; -INITIALIZE_TM_PASS_BEGIN( - InterleavedAccess, "interleaved-access", +INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_TM_PASS_END( - InterleavedAccess, "interleaved-access", +INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) -FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) { - return new InterleavedAccess(TM); +FunctionPass *llvm::createInterleavedAccessPass() { + return new InterleavedAccess(); } /// \brief Check if the mask is a DE-interleave mask of the given factor @@ -426,13 +423,15 @@ bool InterleavedAccess::lowerInterleavedStore( } bool InterleavedAccess::runOnFunction(Function &F) { - if (!TM || !LowerInterleavedAccesses) + auto *TPC = getAnalysisIfAvailable(); + if (!TPC || !LowerInterleavedAccesses) return false; DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); DT = &getAnalysis().getDomTree(); - TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + auto &TM = TPC->getTM(); + TLI = TM.getSubtargetImpl(F)->getTargetLowering(); MaxFactor = TLI->getMaxSupportedInterleaveFactor(); // Holds dead instructions that will be erased later. diff --git a/interpreter/llvm/src/lib/CodeGen/LLVMTargetMachine.cpp b/interpreter/llvm/src/lib/CodeGen/LLVMTargetMachine.cpp index 7b1706f0f4ba9..f2defb4fd6234 100644 --- a/interpreter/llvm/src/lib/CodeGen/LLVMTargetMachine.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LLVMTargetMachine.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/BasicTTIImpl.h" @@ -31,21 +30,11 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; -// Enable or disable FastISel. Both options are needed, because -// FastISel is enabled by default with -fast, and we wish to be -// able to enable or disable fast-isel independently from -O0. -static cl::opt -EnableFastISelOption("fast-isel", cl::Hidden, - cl::desc("Enable the \"fast\" instruction selector")); - -static cl::opt - EnableGlobalISel("global-isel", cl::Hidden, - cl::desc("Enable the \"global\" instruction selector")); - void LLVMTargetMachine::initAsmInfo() { MRI = TheTarget.createMCRegInfo(getTargetTriple().str()); MII = TheTarget.createMCInstrInfo(); @@ -71,8 +60,7 @@ void LLVMTargetMachine::initAsmInfo() { TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments); - if (Options.CompressDebugSections) - TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu); + TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections); TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations); @@ -106,112 +94,31 @@ static MCContext * addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, AnalysisID StopBefore, - AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer = nullptr) { - - // When in emulated TLS mode, add the LowerEmuTLS pass. - if (TM->Options.EmulatedTLS) - PM.add(createLowerEmuTLSPass(TM)); - - PM.add(createPreISelIntrinsicLoweringPass()); - - // Add internal analysis passes from the target machine. - PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); - + AnalysisID StopAfter) { // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore, StopAfter); - // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); - PM.add(PassConfig); - - PassConfig->addIRPasses(); - - PassConfig->addCodeGenPrepare(); - - PassConfig->addPassesToHandleExceptions(); - - PassConfig->addISelPrepare(); - MachineModuleInfo *MMI = new MachineModuleInfo(TM); - MMI->setMachineFunctionInitializer(MFInitializer); PM.add(MMI); - // Enable FastISel with -fast, but allow that to be overridden. - TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); - if (EnableFastISelOption == cl::BOU_TRUE || - (TM->getOptLevel() == CodeGenOpt::None && - TM->getO0WantsFastISel())) - TM->setFastISel(true); - - // Ask the target for an isel. - // Enable GlobalISel if the target wants to, but allow that to be overriden. - if (EnableGlobalISel == cl::BOU_TRUE || (EnableGlobalISel == cl::BOU_UNSET && - PassConfig->isGlobalISelEnabled())) { - if (PassConfig->addIRTranslator()) - return nullptr; - - PassConfig->addPreLegalizeMachineIR(); - - if (PassConfig->addLegalizeMachineIR()) - return nullptr; - - // Before running the register bank selector, ask the target if it - // wants to run some passes. - PassConfig->addPreRegBankSelect(); - - if (PassConfig->addRegBankSelect()) - return nullptr; - - PassConfig->addPreGlobalInstructionSelect(); - - if (PassConfig->addGlobalInstructionSelect()) - return nullptr; - - // Pass to reset the MachineFunction if the ISel failed. - PM.add(createResetMachineFunctionPass( - PassConfig->reportDiagnosticWhenGlobalISelFallback(), - PassConfig->isGlobalISelAbortEnabled())); - - // Provide a fallback path when we do not want to abort on - // not-yet-supported input. - if (!PassConfig->isGlobalISelAbortEnabled() && - PassConfig->addInstSelector()) - return nullptr; - - } else if (PassConfig->addInstSelector()) + if (PassConfig->addISelPasses()) return nullptr; - PassConfig->addMachinePasses(); - PassConfig->setInitialized(); return &MMI->getContext(); } -bool LLVMTargetMachine::addPassesToEmitFile( - PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, - bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, - AnalysisID StopBefore, AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer) { - // Add common CodeGen passes. - MCContext *Context = - addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, - StopBefore, StopAfter, MFInitializer); - if (!Context) - return true; - - if (StopBefore || StopAfter) { - PM.add(createPrintMIRPass(Out)); - return false; - } - +bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, + raw_pwrite_stream &Out, CodeGenFileType FileType, + MCContext &Context) { if (Options.MCOptions.MCSaveTempLabels) - Context->setAllowTemporaryLabels(false); + Context.setAllowTemporaryLabels(false); const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCAsmInfo &MAI = *getMCAsmInfo(); @@ -228,14 +135,14 @@ bool LLVMTargetMachine::addPassesToEmitFile( // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = nullptr; if (Options.MCOptions.ShowMCEncoding) - MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, Options.MCOptions); auto FOut = llvm::make_unique(Out); MCStreamer *S = getTarget().createAsmStreamer( - *Context, std::move(FOut), Options.MCOptions.AsmVerbose, + Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); @@ -244,7 +151,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, Options.MCOptions); @@ -252,11 +159,11 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; // Don't waste memory on names of temp labels. - Context->setUseNamesOnTempLabels(false); + Context.setUseNamesOnTempLabels(false); Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( - T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); break; @@ -264,7 +171,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( case CGFT_Null: // The Null output is intended for use for performance analysis and testing, // not real users. - AsmStreamer.reset(getTarget().createNullStreamer(*Context)); + AsmStreamer.reset(getTarget().createNullStreamer(Context)); break; } @@ -275,8 +182,28 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; PM.add(Printer); - PM.add(createFreeMachineFunctionPass()); + return false; +} +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopBefore, AnalysisID StopAfter) { + // Add common CodeGen passes. + MCContext *Context = + addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, + StopBefore, StopAfter); + if (!Context) + return true; + + if (StopBefore || StopAfter) { + PM.add(createPrintMIRPass(Out)); + } else { + if (addAsmPrinter(PM, Out, FileType, *Context)) + return true; + } + + PM.add(createFreeMachineFunctionPass()); return false; } diff --git a/interpreter/llvm/src/lib/CodeGen/LexicalScopes.cpp b/interpreter/llvm/src/lib/CodeGen/LexicalScopes.cpp index 275d84e2c185f..995c58a635649 100644 --- a/interpreter/llvm/src/lib/CodeGen/LexicalScopes.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LexicalScopes.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -86,8 +86,9 @@ void LexicalScopes::extractLexicalScopes( continue; } - // Ignore DBG_VALUE. It does not contribute to any instruction in output. - if (MInsn.isDebugValue()) + // Ignore DBG_VALUE and similar instruction that do not contribute to any + // instruction in the output. + if (MInsn.isMetaInstruction()) continue; if (RangeBeginMI) { diff --git a/interpreter/llvm/src/lib/CodeGen/LiveDebugValues.cpp b/interpreter/llvm/src/lib/CodeGen/LiveDebugValues.cpp index f956974b1aafe..b5e705f6455df 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveDebugValues.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveDebugValues.cpp @@ -43,7 +43,7 @@ using namespace llvm; -#define DEBUG_TYPE "live-debug-values" +#define DEBUG_TYPE "livedebugvalues" STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); @@ -283,7 +283,7 @@ class LiveDebugValues : public MachineFunctionPass { char LiveDebugValues::ID = 0; char &llvm::LiveDebugValuesID = LiveDebugValues::ID; -INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis", +INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false, false) /// Default construct and initialize the pass. diff --git a/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.cpp b/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.cpp index bcf7c8e99c7ff..0c76478af551f 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.cpp @@ -45,7 +45,7 @@ using namespace llvm; -#define DEBUG_TYPE "livedebug" +#define DEBUG_TYPE "livedebugvars" static cl::opt EnableLDV("live-debug-variables", cl::init(true), @@ -54,11 +54,11 @@ EnableLDV("live-debug-variables", cl::init(true), STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); char LiveDebugVariables::ID = 0; -INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars", +INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars", +INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { @@ -1006,7 +1006,7 @@ bool LiveDebugVariables::doInitialization(Module &M) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LiveDebugVariables::dump() { +LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { if (pImpl) static_cast(pImpl)->print(dbgs()); } diff --git a/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.h b/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.h index afe87a52544d8..1d7e3d4371a24 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.h +++ b/interpreter/llvm/src/lib/CodeGen/LiveDebugVariables.h @@ -59,7 +59,7 @@ class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { void emitDebugValues(VirtRegMap *VRM); /// dump - Print data structures to dbgs(). - void dump(); + void dump() const; private: diff --git a/interpreter/llvm/src/lib/CodeGen/LiveIntervalAnalysis.cpp b/interpreter/llvm/src/lib/CodeGen/LiveIntervalAnalysis.cpp index 3f5b8e19d1f0c..471dcea4bb390 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1,4 +1,4 @@ -//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===// // // The LLVM Compiler Infrastructure // @@ -16,26 +16,43 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "LiveRangeCalc.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include -#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "regalloc" @@ -59,11 +76,13 @@ static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG namespace llvm { + cl::opt UseSegmentSetForPhysRegs( "use-segment-set-for-physregs", cl::Hidden, cl::init(true), cl::desc( "Use segment set for the computation of the live ranges of physregs.")); -} + +} // end namespace llvm void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -78,8 +97,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), - DomTree(nullptr), LRCalc(nullptr) { +LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); } @@ -168,12 +186,10 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? - llvm::huge_valf : 0.0F; + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } - /// Compute the live interval of a virtual register, based on defs and uses. void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); @@ -337,7 +353,7 @@ static void createSegmentsForValues(LiveRange &LR, } } -typedef SmallVector, 16> ShrinkToUsesWorkList; +using ShrinkToUsesWorkList = SmallVector, 16>; static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, ShrinkToUsesWorkList &WorkList, @@ -593,7 +609,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, // Find all blocks that are reachable from KillMBB without leaving VNI's live // range. It is possible that KillMBB itself is reachable, so start a DFS // from each successor. - typedef df_iterator_default_set VisitedTy; + using VisitedTy = df_iterator_default_set; VisitedTy Visited; for (MachineBasicBlock *Succ : KillMBB->successors()) { for (df_ext_iterator @@ -822,7 +838,6 @@ LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) { return S; } - //===----------------------------------------------------------------------===// // Register mask functions //===----------------------------------------------------------------------===// @@ -855,7 +870,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, return false; bool Found = false; - for (;;) { + while (true) { assert(*SlotI >= LiveI->start); // Loop over all slots overlapping this segment. while (*SlotI < LiveI->end) { diff --git a/interpreter/llvm/src/lib/CodeGen/LiveIntervalUnion.cpp b/interpreter/llvm/src/lib/CodeGen/LiveIntervalUnion.cpp index b4aa0dc326a58..b3248e53d0a5a 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveIntervalUnion.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveIntervalUnion.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include diff --git a/interpreter/llvm/src/lib/CodeGen/LivePhysRegs.cpp b/interpreter/llvm/src/lib/CodeGen/LivePhysRegs.cpp index 9f7d7cf548480..cde6ccd29dfd8 100644 --- a/interpreter/llvm/src/lib/CodeGen/LivePhysRegs.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LivePhysRegs.cpp @@ -53,7 +53,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { continue; removeReg(Reg); } else if (O->isRegMask()) - removeRegsInMask(*O, nullptr); + removeRegsInMask(*O); } // Add uses to the set. @@ -142,66 +142,84 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI, /// Add live-in registers of basic block \p MBB to \p LiveRegs. void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) { - MCSubRegIndexIterator S(LI.PhysReg, TRI); - if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) { - addReg(LI.PhysReg); + unsigned Reg = LI.PhysReg; + LaneBitmask Mask = LI.LaneMask; + MCSubRegIndexIterator S(Reg, TRI); + assert(Mask.any() && "Invalid livein mask"); + if (Mask.all() || !S.isValid()) { + addReg(Reg); continue; } for (; S.isValid(); ++S) { unsigned SI = S.getSubRegIndex(); - if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any()) + if ((Mask & TRI->getSubRegIndexLaneMask(SI)).any()) addReg(S.getSubReg()); } } } -/// Add pristine registers to the given \p LiveRegs. This function removes -/// actually saved callee save registers when \p InPrologueEpilogue is false. -static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, - const MachineFrameInfo &MFI, - const TargetRegisterInfo &TRI) { +/// Adds all callee saved registers to \p LiveRegs. +static void addCalleeSavedRegs(LivePhysRegs &LiveRegs, + const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; - ++CSR) + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) LiveRegs.addReg(*CSR); +} + +/// Adds pristine registers to the given \p LiveRegs. Pristine registers are +/// callee saved registers that are unused in the function. +static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + /// Add all callee saved regs, then remove the ones that are saved+restored. + addCalleeSavedRegs(LiveRegs, MF); + /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); } void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addBlockLiveIns(*Succ); + if (!MBB.succ_empty()) { + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*Succ); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers that are saved and + // restored (somewhere); This does not include callee saved registers that + // are unused and hence not saved and restored; they are called pristine. + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) { + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + addReg(Info.getReg()); + } + } } void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - if (MBB.isReturnBlock()) { - // The return block has no successors whose live-ins we could merge - // below. So instead we add the callee saved registers manually. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I) - addReg(*I); - } else { - addPristines(*this, MF, MFI, *TRI); - } + if (!MBB.succ_empty()) { + addPristines(*this, MF); + addLiveOutsNoPristines(MBB); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) + addCalleeSavedRegs(*this, MF); } - - addLiveOutsNoPristines(MBB); } void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) - addPristines(*this, MF, MFI, *TRI); + addPristines(*this, MF); addBlockLiveIns(MBB); } -void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, +void llvm::computeLiveIns(LivePhysRegs &LiveRegs, + const MachineRegisterInfo &MRI, MachineBasicBlock &MBB) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); assert(MBB.livein_empty()); LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); @@ -209,10 +227,12 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, LiveRegs.stepBackward(MI); for (unsigned Reg : LiveRegs) { + if (MRI.isReserved(Reg)) + continue; // Skip the register if we are about to add one of its super registers. bool ContainsSuperReg = false; for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) { - if (LiveRegs.contains(*SReg)) { + if (LiveRegs.contains(*SReg) && !MRI.isReserved(*SReg)) { ContainsSuperReg = true; break; } diff --git a/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.cpp b/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.cpp index 398066bf8903e..8c43c9f3f8846 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.cpp @@ -20,11 +20,14 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" +// Reserve an address that indicates a value that is known to be "undef". +static VNInfo UndefVNI(0xbad, SlotIndex()); + void LiveRangeCalc::resetLiveOutMap() { unsigned NumBlocks = MF->getNumBlockIDs(); Seen.clear(); Seen.resize(NumBlocks); - EntryInfoMap.clear(); + EntryInfos.clear(); Map.resize(NumBlocks); } @@ -283,8 +286,11 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef Undefs, // Determine if the exit from the block is reached by some def. unsigned N = WorkList[i]; MachineBasicBlock &B = *MF->getBlockNumbered(N); - if (Seen[N] && Map[&B].first != nullptr) - return MarkDefined(B); + if (Seen[N]) { + const LiveOutPair &LOB = Map[&B]; + if (LOB.first != nullptr && LOB.first != &UndefVNI) + return MarkDefined(B); + } SlotIndex Begin, End; std::tie(Begin, End) = Indexes->getMBBRange(&B); // Treat End as not belonging to B. @@ -365,10 +371,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, #endif FoundUndef |= MBB->pred_empty(); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; - + for (MachineBasicBlock *Pred : MBB->predecessors()) { // Is this a known live-out block? if (Seen.test(Pred->getNumber())) { if (VNInfo *VNI = Map[Pred].first) { @@ -387,7 +390,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, auto EP = LR.extendInBlock(Undefs, Start, End); VNInfo *VNI = EP.first; FoundUndef |= EP.second; - setLiveOutValue(Pred, VNI); + setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI); if (VNI) { if (TheVNI && TheVNI != VNI) UniqueVNI = false; @@ -406,7 +409,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, } LiveIn.clear(); - FoundUndef |= (TheVNI == nullptr); + FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI); if (Undefs.size() > 0 && FoundUndef) UniqueVNI = false; @@ -417,7 +420,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { - assert(TheVNI != nullptr); + assert(TheVNI != nullptr && TheVNI != &UndefVNI); LiveRangeUpdater Updater(&LR); for (unsigned BN : WorkList) { SlotIndex Start, End; @@ -433,22 +436,26 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, } // Prepare the defined/undefined bit vectors. - auto EF = EntryInfoMap.find(&LR); - if (EF == EntryInfoMap.end()) { + EntryInfoMap::iterator Entry; + bool DidInsert; + std::tie(Entry, DidInsert) = EntryInfos.insert( + std::make_pair(&LR, std::make_pair(BitVector(), BitVector()))); + if (DidInsert) { + // Initialize newly inserted entries. unsigned N = MF->getNumBlockIDs(); - EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first; - EF->second.first.resize(N); - EF->second.second.resize(N); + Entry->second.first.resize(N); + Entry->second.second.resize(N); } - BitVector &DefOnEntry = EF->second.first; - BitVector &UndefOnEntry = EF->second.second; + BitVector &DefOnEntry = Entry->second.first; + BitVector &UndefOnEntry = Entry->second.second; // Multiple values were found, so transfer the work list to the LiveIn array // where UpdateSSA will use it as a work list. LiveIn.reserve(WorkList.size()); for (unsigned BN : WorkList) { MachineBasicBlock *MBB = MF->getBlockNumbered(BN); - if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry)) + if (Undefs.size() > 0 && + !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry)) continue; addLiveInBlock(LR, DomTree->getNode(MBB)); if (MBB == &UseMBB) @@ -466,9 +473,9 @@ void LiveRangeCalc::updateSSA() { assert(DomTree && "Missing dominator tree"); // Interate until convergence. - unsigned Changes; + bool Changed; do { - Changes = 0; + Changed = false; // Propagate live-out values down the dominator tree, inserting phi-defs // when necessary. for (LiveInBlock &I : LiveIn) { @@ -491,15 +498,20 @@ void LiveRangeCalc::updateSSA() { IDomValue = Map[IDom->getBlock()]; // Cache the DomTree node that defined the value. - if (IDomValue.first && !IDomValue.second) + if (IDomValue.first && IDomValue.first != &UndefVNI && + !IDomValue.second) { Map[IDom->getBlock()].second = IDomValue.second = DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); + } - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - LiveOutPair &Value = Map[*PI]; + for (MachineBasicBlock *Pred : MBB->predecessors()) { + LiveOutPair &Value = Map[Pred]; if (!Value.first || Value.first == IDomValue.first) continue; + if (Value.first == &UndefVNI) { + needPHI = true; + break; + } // Cache the DomTree node that defined the value. if (!Value.second) @@ -523,7 +535,7 @@ void LiveRangeCalc::updateSSA() { // Create a phi-def if required. if (needPHI) { - ++Changes; + Changed = true; assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(MBB); @@ -542,7 +554,7 @@ void LiveRangeCalc::updateSSA() { LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } - } else if (IDomValue.first) { + } else if (IDomValue.first && IDomValue.first != &UndefVNI) { // No phi-def here. Remember incoming value. I.Value = IDomValue.first; @@ -554,9 +566,9 @@ void LiveRangeCalc::updateSSA() { // MBB is live-out and doesn't define its own value. if (LOP.first == IDomValue.first) continue; - ++Changes; + Changed = true; LOP = IDomValue; } } - } while (Changes); + } while (Changed); } diff --git a/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.h b/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.h index 1a7598f8044a5..d41b782d9bdf2 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.h +++ b/interpreter/llvm/src/lib/CodeGen/LiveRangeCalc.h @@ -24,6 +24,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/LiveInterval.h" @@ -65,7 +66,8 @@ class LiveRangeCalc { /// registers do not overlap), but the defined/undefined information must /// be kept separate for each individual range. /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. - std::map> EntryInfoMap; + typedef DenseMap> EntryInfoMap; + EntryInfoMap EntryInfos; /// Map each basic block where a live range is live out to the live-out value /// and its defining block. diff --git a/interpreter/llvm/src/lib/CodeGen/LiveRangeShrink.cpp b/interpreter/llvm/src/lib/CodeGen/LiveRangeShrink.cpp index 00182e2c779f5..552f4b5393fef 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveRangeShrink.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveRangeShrink.cpp @@ -14,10 +14,10 @@ /// uses, all of which are the only use of the def. /// ///===---------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "lrshrink" @@ -103,7 +103,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { // register is used last. When moving instructions up, we need to // make sure all its defs (including dead def) will not cross its // last use when moving up. - DenseMap UseMap; + DenseMap> UseMap; for (MachineBasicBlock &MBB : MF) { if (MBB.empty()) @@ -122,15 +122,19 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { unsigned CurrentOrder = IOM[&MI]; unsigned Barrier = 0; + MachineInstr *BarrierMI = nullptr; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDebug()) continue; if (MO.isUse()) - UseMap[MO.getReg()] = CurrentOrder; + UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI); else if (MO.isDead() && UseMap.count(MO.getReg())) // Barrier is the last instruction where MO get used. MI should not // be moved above Barrier. - Barrier = std::max(Barrier, UseMap[MO.getReg()]); + if (Barrier < UseMap[MO.getReg()].first) { + Barrier = UseMap[MO.getReg()].first; + BarrierMI = UseMap[MO.getReg()].second; + } } if (!MI.isSafeToMove(nullptr, SawStore)) { @@ -156,9 +160,10 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { continue; unsigned Reg = MO.getReg(); // Do not move the instruction if it def/uses a physical register, - // unless it is a constant physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && - !MRI.isConstantPhysReg(Reg)) { + // unless it is a constant physical register or a noreg. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Reg || MRI.isConstantPhysReg(Reg)) + continue; Insert = nullptr; break; } @@ -169,7 +174,13 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { break; } DefMO = &MO; - } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg)) { + } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO && + MRI.getRegClass(DefMO->getReg()) == + MRI.getRegClass(MO.getReg())) { + // The heuristic does not handle different register classes yet + // (registers of different sizes, looser/tighter constraints). This + // is because it needs more accurate model to handle register + // pressure correctly. MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); if (!DefInstr.isCopy()) NumEligibleUse++; @@ -179,6 +190,15 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { break; } } + + // If Barrier equals IOM[I], traverse forward to find if BarrierMI is + // after Insert, if yes, then we should not hoist. + for (MachineInstr *I = Insert; I && IOM[I] == Barrier; + I = I->getNextNode()) + if (I == BarrierMI) { + Insert = nullptr; + break; + } // Move the instruction when # of shrunk live range > 1. if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { MachineBasicBlock::iterator I = std::next(Insert->getIterator()); diff --git a/interpreter/llvm/src/lib/CodeGen/LiveRegMatrix.cpp b/interpreter/llvm/src/lib/CodeGen/LiveRegMatrix.cpp index 882de1a3fad96..60033db38ee44 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveRegMatrix.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveRegMatrix.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LiveRegMatrix.h" #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Pass.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/interpreter/llvm/src/lib/CodeGen/LiveRegUnits.cpp b/interpreter/llvm/src/lib/CodeGen/LiveRegUnits.cpp index dff555f49565e..f9ba4ffa6527c 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveRegUnits.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveRegUnits.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveRegUnits.h" + #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -65,7 +67,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { } } -void LiveRegUnits::accumulateBackward(const MachineInstr &MI) { +void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { @@ -81,46 +83,50 @@ void LiveRegUnits::accumulateBackward(const MachineInstr &MI) { } /// Add live-in registers of basic block \p MBB to \p LiveUnits. -static void addLiveIns(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) { +static void addBlockLiveIns(LiveRegUnits &LiveUnits, + const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask); } -static void addLiveOuts(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) { - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addLiveIns(LiveUnits, *Succ); +/// Adds all callee saved registers to \p LiveUnits. +static void addCalleeSavedRegs(LiveRegUnits &LiveUnits, + const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) + LiveUnits.addReg(*CSR); } -/// Add pristine registers to the given \p LiveUnits. This function removes -/// actually saved callee save registers when \p InPrologueEpilogue is false. -static void removeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF, - const MachineFrameInfo &MFI, - const TargetRegisterInfo &TRI) { +/// Adds pristine registers to the given \p LiveUnits. Pristine registers are +/// callee saved registers that are unused in the function. +static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + /// Add all callee saved regs, then remove the ones that are saved+restored. + addCalleeSavedRegs(LiveUnits, MF); + /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveUnits.removeReg(Info.getReg()); } void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) - addReg(*I); - if (!MBB.isReturnBlock()) - removeSavedRegs(*this, MF, MFI, *TRI); + if (!MBB.succ_empty()) { + addPristines(*this, MF); + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*this, *Succ); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) + addCalleeSavedRegs(*this, MF); } - ::addLiveOuts(*this, MBB); } void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) - addReg(*I); - if (&MBB != &MF.front()) - removeSavedRegs(*this, MF, MFI, *TRI); - } - ::addLiveIns(*this, MBB); + addPristines(*this, MF); + addBlockLiveIns(*this, MBB); } diff --git a/interpreter/llvm/src/lib/CodeGen/LiveStackAnalysis.cpp b/interpreter/llvm/src/lib/CodeGen/LiveStackAnalysis.cpp index dbf1f96102d14..b51f8b0aa6bb7 100644 --- a/interpreter/llvm/src/lib/CodeGen/LiveStackAnalysis.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,10 +25,10 @@ using namespace llvm; #define DEBUG_TYPE "livestacks" char LiveStacks::ID = 0; -INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", +INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_END(LiveStacks, "livestacks", +INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) char &llvm::LiveStacksID = LiveStacks::ID; diff --git a/interpreter/llvm/src/lib/CodeGen/LocalStackSlotAllocation.cpp b/interpreter/llvm/src/lib/CodeGen/LocalStackSlotAllocation.cpp index e189fb0dd89d8..b109f1922a3ec 100644 --- a/interpreter/llvm/src/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -23,6 +22,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -103,10 +103,10 @@ namespace { char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; -INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc", +INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", +INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) diff --git a/interpreter/llvm/src/lib/CodeGen/LowerEmuTLS.cpp b/interpreter/llvm/src/lib/CodeGen/LowerEmuTLS.cpp index 6966c8ca4a5f8..0fc48d4e0b6bc 100644 --- a/interpreter/llvm/src/lib/CodeGen/LowerEmuTLS.cpp +++ b/interpreter/llvm/src/lib/CodeGen/LowerEmuTLS.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" @@ -28,14 +29,12 @@ using namespace llvm; namespace { class LowerEmuTLS : public ModulePass { - const TargetMachine *TM; public: static char ID; // Pass identification, replacement for typeid - explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { } - explicit LowerEmuTLS(const TargetMachine *TM) - : ModulePass(ID), TM(TM) { + LowerEmuTLS() : ModulePass(ID) { initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry()); } + bool runOnModule(Module &M) override; private: bool addEmuTlsVar(Module &M, const GlobalVariable *GV); @@ -54,19 +53,22 @@ class LowerEmuTLS : public ModulePass { char LowerEmuTLS::ID = 0; -INITIALIZE_PASS(LowerEmuTLS, "loweremutls", - "Add __emutls_[vt]. variables for emultated TLS model", - false, false) +INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE, + "Add __emutls_[vt]. variables for emultated TLS model", false, + false) -ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) { - return new LowerEmuTLS(TM); -} +ModulePass *llvm::createLowerEmuTLSPass() { return new LowerEmuTLS(); } bool LowerEmuTLS::runOnModule(Module &M) { if (skipModule(M)) return false; - if (!TM || !TM->Options.EmulatedTLS) + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + if (!TM.Options.EmulatedTLS) return false; bool Changed = false; diff --git a/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.cpp b/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.cpp index 1f1ce6e8d7250..58a655a4dee4f 100644 --- a/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.cpp @@ -365,6 +365,14 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token, return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); } +static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '"') + return None; + return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, + ErrorCallback); +} + static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { auto Range = C; C.advance(); // Skip '%' @@ -630,6 +638,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) return R.remaining(); + if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) + return R.remaining(); Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), diff --git a/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.h b/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.h index edba749b5fce4..08b82e59c4fc1 100644 --- a/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.h +++ b/interpreter/llvm/src/lib/CodeGen/MIRParser/MILexer.h @@ -16,8 +16,8 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { @@ -127,7 +127,8 @@ struct MIToken { NamedIRValue, IRValue, QuotedIRValue, // `` - SubRegisterIndex + SubRegisterIndex, + StringConstant }; private: @@ -168,7 +169,8 @@ struct MIToken { bool isMemoryOperandFlag() const { return Kind == kw_volatile || Kind == kw_non_temporal || - Kind == kw_dereferenceable || Kind == kw_invariant; + Kind == kw_dereferenceable || Kind == kw_invariant || + Kind == StringConstant; } bool is(TokenKind K) const { return Kind == K; } diff --git a/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.cpp b/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.cpp index 1d36ff4e1458d..c68d87b15a317 100644 --- a/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.cpp @@ -11,11 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "MIParser.h" - #include "MILexer.h" +#include "MIParser.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MIRPrinter.h" @@ -26,19 +34,48 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include #include +#include +#include +#include +#include +#include using namespace llvm; @@ -104,6 +141,8 @@ class MIParser { StringMap Names2DirectTargetFlags; /// Maps from direct target flag names to the bitmask target flag values. StringMap Names2BitmaskTargetFlags; + /// Maps from MMO target flag names to MMO target flag values. + StringMap Names2MMOTargetFlags; public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, @@ -192,6 +231,7 @@ class MIParser { bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID); bool parseOptionalAtomicOrdering(AtomicOrdering &Order); bool parseMachineMemoryOperand(MachineMemOperand *&Dest); @@ -281,6 +321,18 @@ class MIParser { /// /// Return true if the name isn't a name of a bitmask target flag. bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); + + void initNames2MMOTargetFlags(); + + /// Try to convert a name of a MachineMemOperand target flag to the + /// corresponding target flag. + /// + /// Return true if the name isn't a name of a target MMO flag. + bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag); + + /// parseStringConstant + /// ::= StringConstant + bool parseStringConstant(std::string &Result); }; } // end anonymous namespace @@ -542,12 +594,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, // // is equivalent to // liveins: %edi, %esi - bool ExplicitSuccesors = false; + bool ExplicitSuccessors = false; while (true) { if (Token.is(MIToken::kw_successors)) { if (parseBasicBlockSuccessors(MBB)) return true; - ExplicitSuccesors = true; + ExplicitSuccessors = true; } else if (Token.is(MIToken::kw_liveins)) { if (parseBasicBlockLiveins(MBB)) return true; @@ -599,7 +651,7 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, } // Construct successor list by searching for basic block machine operands. - if (!ExplicitSuccesors) { + if (!ExplicitSuccessors) { SmallVector Successors; bool IsFallthrough; guessSuccessors(MBB, Successors, IsFallthrough); @@ -1997,7 +2049,14 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { case MIToken::kw_invariant: Flags |= MachineMemOperand::MOInvariant; break; - // TODO: parse the target specific memory operand flags. + case MIToken::StringConstant: { + MachineMemOperand::Flags TF; + if (getMMOTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target MMO flag '" + Token.stringValue() + + "'"); + Flags |= TF; + break; + } default: llvm_unreachable("The current token should be a memory operand flag"); } @@ -2039,7 +2098,7 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { // The token was already consumed, so use return here instead of break. return false; } - case MIToken::kw_call_entry: { + case MIToken::kw_call_entry: lex(); switch (Token.kind()) { case MIToken::GlobalValue: @@ -2059,7 +2118,6 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { "expected a global value or an external symbol after 'call-entry'"); } break; - } default: llvm_unreachable("The current token should be pseudo source value"); } @@ -2099,6 +2157,26 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { return false; } +bool MIParser::parseOptionalScope(LLVMContext &Context, + SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") { + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected '(' in syncscope"); + + std::string SSN; + if (parseStringConstant(SSN)) + return true; + + SSID = Context.getOrInsertSyncScopeID(SSN); + if (expectAndConsume(MIToken::rparen)) + return error("expected ')' in syncscope"); + } + + return false; +} + bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) { Order = AtomicOrdering::NotAtomic; if (Token.isNot(MIToken::Identifier)) @@ -2138,12 +2216,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { Flags |= MachineMemOperand::MOStore; lex(); - // Optional "singlethread" scope. - SynchronizationScope Scope = SynchronizationScope::CrossThread; - if (Token.is(MIToken::Identifier) && Token.stringValue() == "singlethread") { - Scope = SynchronizationScope::SingleThread; - lex(); - } + // Optional synchronization scope. + SyncScope::ID SSID; + if (parseOptionalScope(MF.getFunction()->getContext(), SSID)) + return true; // Up to two atomic orderings (cmpxchg provides guarantees on failure). AtomicOrdering Order, FailureOrder; @@ -2208,7 +2284,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (expectAndConsume(MIToken::rparen)) return true; Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range, - Scope, Order, FailureOrder); + SSID, Order, FailureOrder); return false; } @@ -2421,6 +2497,35 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { return false; } +void MIParser::initNames2MMOTargetFlags() { + if (!Names2MMOTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) + Names2MMOTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getMMOTargetFlag(StringRef Name, + MachineMemOperand::Flags &Flag) { + initNames2MMOTargetFlags(); + auto FlagInfo = Names2MMOTargetFlags.find(Name); + if (FlagInfo == Names2MMOTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +bool MIParser::parseStringConstant(std::string &Result) { + if (Token.isNot(MIToken::StringConstant)) + return error("expected string constant"); + Result = Token.stringValue(); + lex(); + return false; +} + bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, StringRef Src, SMDiagnostic &Error) { diff --git a/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.h b/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.h index 9b3879cf83772..2307881068efb 100644 --- a/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.h +++ b/interpreter/llvm/src/lib/CodeGen/MIRParser/MIParser.h @@ -1,4 +1,4 @@ -//===- MIParser.h - Machine Instructions Parser ---------------------------===// +//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,21 +15,19 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Allocator.h" namespace llvm { -class StringRef; -class BasicBlock; class MachineBasicBlock; class MachineFunction; -class MachineInstr; -class MachineRegisterInfo; class MDNode; class RegisterBank; struct SlotMapping; class SMDiagnostic; class SourceMgr; +class StringRef; class TargetRegisterClass; struct VRegInfo { @@ -45,8 +43,8 @@ struct VRegInfo { unsigned PreferredReg = 0; }; -typedef StringMap Name2RegClassMap; -typedef StringMap Name2RegBankMap; +using Name2RegClassMap = StringMap; +using Name2RegBankMap = StringMap; struct PerFunctionMIParsingState { BumpPtrAllocator Allocator; @@ -122,4 +120,4 @@ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H diff --git a/interpreter/llvm/src/lib/CodeGen/MIRParser/MIRParser.cpp b/interpreter/llvm/src/lib/CodeGen/MIRParser/MIRParser.cpp index bd04acd049dba..78b57f357781e 100644 --- a/interpreter/llvm/src/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MIRParser/MIRParser.cpp @@ -50,18 +50,24 @@ namespace llvm { /// file. class MIRParserImpl { SourceMgr SM; + yaml::Input In; StringRef Filename; LLVMContext &Context; - StringMap> Functions; SlotMapping IRSlots; /// Maps from register class names to register classes. Name2RegClassMap Names2RegClasses; /// Maps from register bank names to register banks. Name2RegBankMap Names2RegBanks; + /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are + /// created and inserted into the given module when this is true. + bool NoLLVMIR = false; + /// True when a well formed MIR file does not contain any MIR/machine function + /// parts. + bool NoMIRDocuments = false; public: - MIRParserImpl(std::unique_ptr Contents, StringRef Filename, - LLVMContext &Context); + MIRParserImpl(std::unique_ptr Contents, + StringRef Filename, LLVMContext &Context); void reportDiagnostic(const SMDiagnostic &Diag); @@ -85,22 +91,22 @@ class MIRParserImpl { /// file. /// /// Return null if an error occurred. - std::unique_ptr parse(); + std::unique_ptr parseIRModule(); + + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); /// Parse the machine function in the current YAML document. /// - /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR. - /// A dummy IR function is created and inserted into the given module when - /// this parameter is true. /// /// Return true if an error occurred. - bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR); + bool parseMachineFunction(Module &M, MachineModuleInfo &MMI); /// Initialize the machine function to the state that's described in the MIR /// file. /// /// Return true if error occurred. - bool initializeMachineFunction(MachineFunction &MF); + bool initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF); bool parseRegisterInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF); @@ -144,9 +150,6 @@ class MIRParserImpl { SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, SMRange SourceRange); - /// Create an empty function with the given name. - void createDummyFunction(StringRef Name, Module &M); - void initNames2RegClasses(const MachineFunction &MF); void initNames2RegBanks(const MachineFunction &MF); @@ -166,10 +169,19 @@ class MIRParserImpl { } // end namespace llvm +static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { + reinterpret_cast(Context)->reportDiagnostic(Diag); +} + MIRParserImpl::MIRParserImpl(std::unique_ptr Contents, StringRef Filename, LLVMContext &Context) - : SM(), Filename(Filename), Context(Context) { - SM.AddNewSourceBuffer(std::move(Contents), SMLoc()); + : SM(), + In(SM.getMemoryBuffer( + SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(), + nullptr, handleYAMLDiag, this), + Filename(Filename), + Context(Context) { + In.setContext(&In); } bool MIRParserImpl::error(const Twine &Message) { @@ -206,24 +218,16 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); } -static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { - reinterpret_cast(Context)->reportDiagnostic(Diag); -} - -std::unique_ptr MIRParserImpl::parse() { - yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(), - /*Ctxt=*/nullptr, handleYAMLDiag, this); - In.setContext(&In); - +std::unique_ptr MIRParserImpl::parseIRModule() { if (!In.setCurrentDocument()) { if (In.error()) return nullptr; // Create an empty module when the MIR file is empty. + NoMIRDocuments = true; return llvm::make_unique(Filename, Context); } std::unique_ptr M; - bool NoLLVMIR = false; // Parse the block scalar manually so that we can return unique pointer // without having to go trough YAML traits. if (const auto *BSN = @@ -237,49 +241,68 @@ std::unique_ptr MIRParserImpl::parse() { } In.nextDocument(); if (!In.setCurrentDocument()) - return M; + NoMIRDocuments = true; } else { // Create an new, empty module. M = llvm::make_unique(Filename, Context); NoLLVMIR = true; } + return M; +} + +bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + if (NoMIRDocuments) + return false; // Parse the machine functions. do { - if (parseMachineFunction(In, *M, NoLLVMIR)) - return nullptr; + if (parseMachineFunction(M, MMI)) + return true; In.nextDocument(); } while (In.setCurrentDocument()); - return M; -} - -bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M, - bool NoLLVMIR) { - auto MF = llvm::make_unique(); - yaml::EmptyContext Ctx; - yaml::yamlize(In, *MF, false, Ctx); - if (In.error()) - return true; - auto FunctionName = MF->Name; - if (Functions.find(FunctionName) != Functions.end()) - return error(Twine("redefinition of machine function '") + FunctionName + - "'"); - Functions.insert(std::make_pair(FunctionName, std::move(MF))); - if (NoLLVMIR) - createDummyFunction(FunctionName, M); - else if (!M.getFunction(FunctionName)) - return error(Twine("function '") + FunctionName + - "' isn't defined in the provided LLVM IR"); return false; } -void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) { +/// Create an empty function with the given name. +static Function *createDummyFunction(StringRef Name, Module &M) { auto &Context = M.getContext(); Function *F = cast(M.getOrInsertFunction( Name, FunctionType::get(Type::getVoidTy(Context), false))); BasicBlock *BB = BasicBlock::Create(Context, "entry", F); new UnreachableInst(Context, BB); + return F; +} + +bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { + // Parse the yaml. + yaml::MachineFunction YamlMF; + yaml::EmptyContext Ctx; + yaml::yamlize(In, YamlMF, false, Ctx); + if (In.error()) + return true; + + // Search for the corresponding IR function. + StringRef FunctionName = YamlMF.Name; + Function *F = M.getFunction(FunctionName); + if (!F) { + if (NoLLVMIR) { + F = createDummyFunction(FunctionName, M); + } else { + return error(Twine("function '") + FunctionName + + "' isn't defined in the provided LLVM IR"); + } + } + if (MMI.getMachineFunction(*F) != nullptr) + return error(Twine("redefinition of machine function '") + FunctionName + + "'"); + + // Create the MachineFunction. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + if (initializeMachineFunction(YamlMF, MF)) + return true; + + return false; } static bool isSSA(const MachineFunction &MF) { @@ -319,21 +342,16 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) { Properties.set(MachineFunctionProperties::Property::NoVRegs); } -bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { - auto It = Functions.find(MF.getName()); - if (It == Functions.end()) - return error(Twine("no machine function information for function '") + - MF.getName() + "' in the MIR file"); +bool +MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF) { // TODO: Recreate the machine function. initNames2RegClasses(MF); initNames2RegBanks(MF); - const yaml::MachineFunction &YamlMF = *It->getValue(); if (YamlMF.Alignment) MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); - if (YamlMF.NoVRegs) - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); if (YamlMF.Legalized) MF.getProperties().set(MachineFunctionProperties::Property::Legalized); if (YamlMF.RegBankSelected) @@ -840,16 +858,18 @@ MIRParser::MIRParser(std::unique_ptr Impl) MIRParser::~MIRParser() {} -std::unique_ptr MIRParser::parseLLVMModule() { return Impl->parse(); } +std::unique_ptr MIRParser::parseIRModule() { + return Impl->parseIRModule(); +} -bool MIRParser::initializeMachineFunction(MachineFunction &MF) { - return Impl->initializeMachineFunction(MF); +bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + return Impl->parseMachineFunctions(M, MMI); } std::unique_ptr llvm::createMIRParserFromFile(StringRef Filename, SMDiagnostic &Error, LLVMContext &Context) { - auto FileOrErr = MemoryBuffer::getFile(Filename); + auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { Error = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + EC.message()); diff --git a/interpreter/llvm/src/lib/CodeGen/MIRPrinter.cpp b/interpreter/llvm/src/lib/CodeGen/MIRPrinter.cpp index 6f6a67d81b0fe..ddeacf1d1bfb1 100644 --- a/interpreter/llvm/src/lib/CodeGen/MIRPrinter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MIRPrinter.cpp @@ -12,35 +12,66 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MIRPrinter.h" - -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Options.h" -#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/YAMLTraits.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -109,6 +140,8 @@ class MIPrinter { ModuleSlotTracker &MST; const DenseMap &RegisterMaskIds; const DenseMap &StackObjectOperandMapping; + /// Synchronization scope names registered with LLVMContext. + SmallVector SSNs; bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const; bool canPredictSuccessors(const MachineBasicBlock &MBB) const; @@ -132,7 +165,9 @@ class MIPrinter { void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef = false); - void print(const MachineMemOperand &Op); + void print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op); + void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID); void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); }; @@ -147,6 +182,7 @@ template <> struct BlockScalarTraits { static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { Mod.print(OS, nullptr); } + static StringRef input(StringRef Str, void *Ctxt, Module &Mod) { llvm_unreachable("LLVM Module is supposed to be parsed separately"); return ""; @@ -183,8 +219,6 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); - YamlMF.NoVRegs = MF.getProperties().hasProperty( - MachineFunctionProperties::Property::NoVRegs); YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); YamlMF.RegBankSelected = MF.getProperties().hasProperty( @@ -212,6 +246,8 @@ void MIRPrinter::print(const MachineFunction &MF) { } StrOS.flush(); yaml::Output Out(OS); + if (!SimplifyMIR) + Out.setWriteDefaultValues(true); Out << YamlMF; } @@ -518,7 +554,6 @@ bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const { return std::equal(MBB.succ_begin(), MBB.succ_end(), GuessedSuccs.begin()); } - void MIPrinter::print(const MachineBasicBlock &MBB) { assert(MBB.getNumber() >= 0 && "Invalid MBB number"); OS << "bb." << MBB.getNumber(); @@ -701,11 +736,12 @@ void MIPrinter::print(const MachineInstr &MI) { if (!MI.memoperands_empty()) { OS << " :: "; + const LLVMContext &Context = MF->getFunction()->getContext(); bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) OS << ", "; - print(*Op); + print(Context, *TII, *Op); NeedComma = true; } } @@ -910,7 +946,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << "%const." << Op.getIndex(); printOffset(Op.getOffset()); break; - case MachineOperand::MO_TargetIndex: { + case MachineOperand::MO_TargetIndex: OS << "target-index("; if (const auto *Name = getTargetIndexName( *Op.getParent()->getParent()->getParent(), Op.getIndex())) @@ -920,15 +956,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << ')'; printOffset(Op.getOffset()); break; - } case MachineOperand::MO_JumpTableIndex: OS << "%jump-table." << Op.getIndex(); break; - case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_ExternalSymbol: { + StringRef Name = Op.getSymbolName(); OS << '$'; - printLLVMNameWithoutPrefix(OS, Op.getSymbolName()); + if (Name.empty()) { + OS << "\"\""; + } else { + printLLVMNameWithoutPrefix(OS, Name); + } printOffset(Op.getOffset()); break; + } case MachineOperand::MO_GlobalAddress: Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); printOffset(Op.getOffset()); @@ -996,9 +1037,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, } } -void MIPrinter::print(const MachineMemOperand &Op) { +static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, + unsigned TMMOFlag) { + auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TMMOFlag) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op) { OS << '('; - // TODO: Print operand's target specific flags. if (Op.isVolatile()) OS << "volatile "; if (Op.isNonTemporal()) @@ -1007,6 +1059,15 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "dereferenceable "; if (Op.isInvariant()) OS << "invariant "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag1) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag2) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) + << "\" "; if (Op.isLoad()) OS << "load "; else { @@ -1014,8 +1075,7 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "store "; } - if (Op.getSynchScope() == SynchronizationScope::SingleThread) - OS << "singlethread "; + printSyncScope(Context, Op.getSyncScopeID()); if (Op.getOrdering() != AtomicOrdering::NotAtomic) OS << toIRString(Op.getOrdering()) << ' '; @@ -1084,6 +1144,23 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << ')'; } +void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) { + switch (SSID) { + case SyncScope::System: { + break; + } + default: { + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); + + OS << "syncscope(\""; + PrintEscapedString(SSNs[SSID], OS); + OS << "\") "; + break; + } + } +} + static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, const TargetRegisterInfo *TRI) { int Reg = TRI->getLLVMRegNum(DwarfReg, true); diff --git a/interpreter/llvm/src/lib/CodeGen/MIRPrintingPass.cpp b/interpreter/llvm/src/lib/CodeGen/MIRPrintingPass.cpp index 671cf1eddc2dc..09354cf70c3c1 100644 --- a/interpreter/llvm/src/lib/CodeGen/MIRPrintingPass.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MIRPrintingPass.cpp @@ -14,9 +14,9 @@ #include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/CodeGen/MachineBasicBlock.cpp b/interpreter/llvm/src/lib/CodeGen/MachineBasicBlock.cpp index 06112723497b0..81597afe6b02b 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineBasicBlock.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineBasicBlock.cpp @@ -228,6 +228,12 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { } #endif +bool MachineBasicBlock::isLegalToHoistInto() const { + if (isReturnBlock() || hasEHPadSuccessor()) + return false; + return true; +} + StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) return LBB->getName(); @@ -350,6 +356,13 @@ void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { LiveIns.erase(I); } +MachineBasicBlock::livein_iterator +MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) { + // Get non-const version of iterator. + LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin()); + return LiveIns.erase(LI); +} + bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { livein_iterator I = find_if( LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); diff --git a/interpreter/llvm/src/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/interpreter/llvm/src/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 9c7367b4c7802..4d1ec11df46c8 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; -#define DEBUG_TYPE "block-freq" +#define DEBUG_TYPE "machine-block-freq" static cl::opt ViewMachineBlockFreqPropagationDAG( @@ -149,11 +149,11 @@ struct DOTGraphTraits } // end namespace llvm -INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", +INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", +INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) char MachineBlockFrequencyInfo::ID = 0; diff --git a/interpreter/llvm/src/lib/CodeGen/MachineBlockPlacement.cpp b/interpreter/llvm/src/lib/CodeGen/MachineBlockPlacement.cpp index 4cfc128a8c1d6..447ad629885bf 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineBlockPlacement.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineBlockPlacement.cpp @@ -25,8 +25,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "BranchFolding.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,7 +39,9 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -133,6 +133,14 @@ static cl::opt TailDupPlacementThreshold( "that won't conflict."), cl::init(2), cl::Hidden); +// Heuristic for aggressive tail duplication. +static cl::opt TailDupPlacementAggressiveThreshold( + "tail-dup-placement-aggressive-threshold", + cl::desc("Instruction cutoff for aggressive tail duplication during " + "layout. Used at -O3. Tail merging during layout is forced to " + "have a threshold that won't conflict."), cl::init(3), + cl::Hidden); + // Heuristic for tail duplication. static cl::opt TailDupPlacementPenalty( "tail-dup-placement-penalty", @@ -237,25 +245,26 @@ class BlockChain { /// updating the block -> chain mapping. It does not free or tear down the /// old chain, but the old chain's block list is no longer valid. void merge(MachineBasicBlock *BB, BlockChain *Chain) { - assert(BB); - assert(!Blocks.empty()); + assert(BB && "Can't merge a null block."); + assert(!Blocks.empty() && "Can't merge into an empty chain."); // Fast path in case we don't have a chain already. if (!Chain) { - assert(!BlockToChain[BB]); + assert(!BlockToChain[BB] && + "Passed chain is null, but BB has entry in BlockToChain."); Blocks.push_back(BB); BlockToChain[BB] = this; return; } - assert(BB == *Chain->begin()); + assert(BB == *Chain->begin() && "Passed BB is not head of Chain."); assert(Chain->begin() != Chain->end()); // Update the incoming blocks to point to this chain, and add them to the // chain structure. for (MachineBasicBlock *ChainBB : *Chain) { Blocks.push_back(ChainBB); - assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain"); + assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain."); BlockToChain[ChainBB] = this; } } @@ -490,13 +499,13 @@ class MachineBlockPlacement : public MachineFunctionPass { char MachineBlockPlacement::ID = 0; char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; -INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement", +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", +INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG @@ -585,8 +594,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors( // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after // A->C is chosen as a fall-through, D won't be selected as a successor of C // due to CFG constraint (the probability of C->D is not greater than - // HotProb to break top-order). If we exclude E that is not in BlockFilter - // when calculating the probability of C->D, D will be selected and we + // HotProb to break topo-order). If we exclude E that is not in BlockFilter + // when calculating the probability of C->D, D will be selected and we // will get A C D B as the layout of this loop. auto AdjustedSumProb = BranchProbability::getOne(); for (MachineBasicBlock *Succ : BB->successors()) { @@ -1147,7 +1156,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { continue; // Now we have an interesting triangle. Insert it if it's not part of an - // existing chain + // existing chain. // Note: This cannot be replaced with a call insert() or emplace() because // the find key is BB, but the insert/emplace key is PDom. auto Found = TriangleChainMap.find(&BB); @@ -1289,9 +1298,9 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( // | | | | // ---BB | | BB // | | | | - // | pred-- | Succ-- + // | Pred-- | Succ-- // | | | | - // ---succ ---pred-- + // ---Succ ---Pred-- // // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred) // = freq(S->Pred) + freq(S->BB) @@ -1539,13 +1548,15 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (MachineBasicBlock *MBB : WorkList) { - assert(MBB->isEHPad() == IsEHPad); + assert(MBB->isEHPad() == IsEHPad && + "EHPad mismatch between block and work list."); BlockChain &SuccChain = *BlockToChain[MBB]; if (&SuccChain == &Chain) continue; - assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block"); + assert(SuccChain.UnscheduledPredecessors == 0 && + "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; @@ -1613,9 +1624,12 @@ void MachineBlockPlacement::fillWorkLists( if (!UpdatedPreds.insert(&Chain).second) return; - assert(Chain.UnscheduledPredecessors == 0); + assert( + Chain.UnscheduledPredecessors == 0 && + "Attempting to place block with unscheduled predecessors in worklist."); for (MachineBasicBlock *ChainBB : Chain) { - assert(BlockToChain[ChainBB] == &Chain); + assert(BlockToChain[ChainBB] == &Chain && + "Block in chain doesn't match BlockToChain map."); for (MachineBasicBlock *Pred : ChainBB->predecessors()) { if (BlockFilter && !BlockFilter->count(Pred)) continue; @@ -1903,6 +1917,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, return; MachineBasicBlock *Top = *LoopChain.begin(); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + + // If ExitingBB is already the last one in a chain then nothing to do. + if (Bottom == ExitingBB) + return; + bool ViableTopFallthrough = false; for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; @@ -1917,7 +1937,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock *Succ : Bottom->successors()) { BlockChain *SuccChain = BlockToChain[Succ]; if (!LoopBlockSet.count(Succ) && @@ -1930,6 +1949,36 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; + // Rotating a loop exit to the bottom when there is a fallthrough to top + // trades the entry fallthrough for an exit fallthrough. + // If there is no bottom->top edge, but the chosen exit block does have + // a fallthrough, we break that fallthrough for nothing in return. + + // Let's consider an example. We have a built chain of basic blocks + // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. + // By doing a rotation we get + // Bk+1, ..., Bn, B1, ..., Bk + // Break of fallthrough to B1 is compensated by a fallthrough from Bk. + // If we had a fallthrough Bk -> Bk+1 it is broken now. + // It might be compensated by fallthrough Bn -> B1. + // So we have a condition to avoid creation of extra branch by loop rotation. + // All below must be true to avoid loop rotation: + // If there is a fallthrough to top (B1) + // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) + // There is no fallthrough from bottom (Bn) to top (B1). + // Please note that there is no exit fallthrough from Bn because we checked it + // above. + if (ViableTopFallthrough) { + assert(std::next(ExitIt) != LoopChain.end() && + "Exit should not be last BB"); + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + if (ExitingBB->isSuccessor(NextBlockInChain)) + if (!Bottom->isSuccessor(Top)) + return; + } + + DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) + << " at bottom\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } @@ -2128,8 +2177,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { for (const MachineLoop *InnerLoop : L) buildLoopChains(*InnerLoop); - assert(BlockWorkList.empty()); - assert(EHPadWorkList.empty()); + assert(BlockWorkList.empty() && + "BlockWorkList not empty when starting to build loop chains."); + assert(EHPadWorkList.empty() && + "EHPadWorkList not empty when starting to build loop chains."); BlockFilterSet LoopBlockSet = collectLoopBlockSet(L); // Check if we have profile data for this function. If yes, we will rotate @@ -2159,7 +2210,8 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // walk the blocks, and use a set to prevent visiting a particular chain // twice. SmallPtrSet UpdatedPreds; - assert(LoopChain.UnscheduledPredecessors == 0); + assert(LoopChain.UnscheduledPredecessors == 0 && + "LoopChain should not have unscheduled predecessors."); UpdatedPreds.insert(&LoopChain); for (const MachineBasicBlock *LoopBB : LoopBlockSet) @@ -2248,8 +2300,10 @@ void MachineBlockPlacement::buildCFGChains() { for (MachineLoop *L : *MLI) buildLoopChains(*L); - assert(BlockWorkList.empty()); - assert(EHPadWorkList.empty()); + assert(BlockWorkList.empty() && + "BlockWorkList should be empty before building final chain."); + assert(EHPadWorkList.empty() && + "EHPadWorkList should be empty before building final chain."); SmallPtrSet UpdatedPreds; for (MachineBasicBlock &MBB : *F) @@ -2643,12 +2697,31 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // there are no MachineLoops. PreferredLoopExit = nullptr; - assert(BlockToChain.empty()); - assert(ComputedEdges.empty()); + assert(BlockToChain.empty() && + "BlockToChain map should be empty before starting placement."); + assert(ComputedEdges.empty() && + "Computed Edge map should be empty before starting placement."); + + unsigned TailDupSize = TailDupPlacementThreshold; + // If only the aggressive threshold is explicitly set, use it. + if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 && + TailDupPlacementThreshold.getNumOccurrences() == 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + + TargetPassConfig *PassConfig = &getAnalysis(); + // For agressive optimization, we can adjust some thresholds to be less + // conservative. + if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) { + // At O3 we should be more willing to copy blocks for tail duplication. This + // increases size pressure, so we only do it at O3 + // Do this unless only the regular threshold is explicitly set. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 || + TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + } if (TailDupPlacement) { MPDT = &getAnalysis(); - unsigned TailDupSize = TailDupPlacementThreshold; if (MF.getFunction()->optForSize()) TailDupSize = 1; TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize); @@ -2658,7 +2731,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { buildCFGChains(); // Changing the layout can create new tail merging opportunities. - TargetPassConfig *PassConfig = &getAnalysis(); // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structured CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && @@ -2666,7 +2738,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFoldPlacement; // No tail merging opportunities if the block number is less than four. if (MF.size() > 3 && EnableTailMerge) { - unsigned TailMergeSize = TailDupPlacementThreshold + 1; + unsigned TailMergeSize = TailDupSize + 1; BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, TailMergeSize); diff --git a/interpreter/llvm/src/lib/CodeGen/MachineCSE.cpp b/interpreter/llvm/src/lib/CodeGen/MachineCSE.cpp index 0766f465456c9..582ff139f8860 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineCSE.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineCSE.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SmallSet.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" @@ -108,12 +108,12 @@ namespace { char MachineCSE::ID = 0; char &llvm::MachineCSEID = MachineCSE::ID; -INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", - "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, + "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineCSE, "machine-cse", - "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, + "Machine Common Subexpression Elimination", false, false) /// The source register of a COPY machine instruction can be propagated to all /// its users, and this propagation could increase the probability of finding @@ -180,8 +180,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, I = skipDebugInstructionsForward(I, E); if (I == E) - // Reached end of block, register is obviously dead. - return true; + // Reached end of block, we don't know if register is dead or not. + return false; bool SeenDef = false; for (const MachineOperand &MO : I->operands()) { diff --git a/interpreter/llvm/src/lib/CodeGen/MachineCombiner.cpp b/interpreter/llvm/src/lib/CodeGen/MachineCombiner.cpp index 50e453e4067cc..e6f80dbb86302 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineCombiner.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineCombiner.cpp @@ -11,8 +11,6 @@ // instructions do not lengthen the critical path or the resource depth. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-combiner" - #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" @@ -32,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "machine-combiner" + STATISTIC(NumInstCombined, "Number of machineinst combined"); namespace { @@ -86,11 +86,11 @@ class MachineCombiner : public MachineFunctionPass { char MachineCombiner::ID = 0; char &llvm::MachineCombinerID = MachineCombiner::ID; -INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", +INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", +INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/interpreter/llvm/src/lib/CodeGen/MachineCopyPropagation.cpp b/interpreter/llvm/src/lib/CodeGen/MachineCopyPropagation.cpp index 7312dc5e94bdd..7d5a68192e6b2 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineCopyPropagation.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineCopyPropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -19,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -27,7 +27,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "codegen-cp" +#define DEBUG_TYPE "machine-cp" STATISTIC(NumDeletes, "Number of dead copies deleted"); @@ -79,7 +79,7 @@ namespace { char MachineCopyPropagation::ID = 0; char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; -INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", +INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) /// Remove any entry in \p Map where the register is a subregister or equal to diff --git a/interpreter/llvm/src/lib/CodeGen/MachineDominanceFrontier.cpp b/interpreter/llvm/src/lib/CodeGen/MachineDominanceFrontier.cpp index acb7c4810b16e..b559e4e513a6f 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineDominanceFrontier.cpp @@ -12,11 +12,11 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" - using namespace llvm; namespace llvm { -template class DominanceFrontierBase; +template class DominanceFrontierBase; +template class DominanceFrontierBase; template class ForwardDominanceFrontierBase; } diff --git a/interpreter/llvm/src/lib/CodeGen/MachineDominators.cpp b/interpreter/llvm/src/lib/CodeGen/MachineDominators.cpp index e3a6c51c47ad5..845e8232477c5 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineDominators.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineDominators.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -31,7 +31,7 @@ static cl::opt VerifyMachineDomInfoX( namespace llvm { template class DomTreeNodeBase; -template class DominatorTreeBase; +template class DominatorTreeBase; // DomTreeBase } char MachineDominatorTree::ID = 0; @@ -49,7 +49,7 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { CriticalEdgesToSplit.clear(); NewBBs.clear(); - DT.reset(new DominatorTreeBase(false)); + DT.reset(new DomTreeBase()); DT->recalculate(F); return false; } @@ -144,7 +144,7 @@ void MachineDominatorTree::verifyDomTree() const { return; MachineFunction &F = *getRoot()->getParent(); - DominatorTreeBase OtherDT(false); + DomTreeBase OtherDT; OtherDT.recalculate(F); if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || DT->compare(OtherDT)) { diff --git a/interpreter/llvm/src/lib/CodeGen/MachineFunction.cpp b/interpreter/llvm/src/lib/CodeGen/MachineFunction.cpp index ac4ccb81b884a..742b095d955e8 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineFunction.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineFunction.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionInitializer.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -52,8 +51,6 @@ static cl::opt cl::desc("Force the alignment of all functions."), cl::init(0), cl::Hidden); -void MachineFunctionInitializer::anchor() {} - static const char *getPropertyName(MachineFunctionProperties::Property Prop) { typedef MachineFunctionProperties::Property P; switch(Prop) { @@ -308,11 +305,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand *MachineFunction::getMachineMemOperand( MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, AtomicOrdering Ordering, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges, - SynchScope, Ordering, FailureOrdering); + SSID, Ordering, FailureOrdering); } MachineMemOperand * @@ -323,13 +320,27 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineMemOperand(MachinePointerInfo(MMO->getValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), + MMO->getOrdering(), MMO->getFailureOrdering()); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + const AAMDNodes &AAInfo) { + MachinePointerInfo MPI = MMO->getValue() ? + MachinePointerInfo(MMO->getValue(), MMO->getOffset()) : + MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()); + + return new (Allocator) + MachineMemOperand(MPI, MMO->getFlags(), MMO->getSize(), + MMO->getBaseAlignment(), AAInfo, + MMO->getRanges(), MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } @@ -362,7 +373,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOStore, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustLoad; } @@ -396,7 +407,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOLoad, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustStore; } diff --git a/interpreter/llvm/src/lib/CodeGen/MachineFunctionPass.cpp b/interpreter/llvm/src/lib/CodeGen/MachineFunctionPass.cpp index 2265676ff8b14..5ffe330061313 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineFunctionPass.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineFunctionPass.cpp @@ -42,7 +42,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { return false; MachineModuleInfo &MMI = getAnalysis(); - MachineFunction &MF = MMI.getMachineFunction(F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(F); MachineFunctionProperties &MFProps = MF.getProperties(); diff --git a/interpreter/llvm/src/lib/CodeGen/MachineFunctionPrinterPass.cpp b/interpreter/llvm/src/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0d533c3f4f23f..55d9defced3aa 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/CodeGen/MachineInstr.cpp b/interpreter/llvm/src/lib/CodeGen/MachineInstr.cpp index d665201a5d17c..535757ed87c1a 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineInstr.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineInstr.cpp @@ -1,4 +1,4 @@ -//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,20 +12,34 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -34,10 +48,14 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -45,6 +63,14 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; static cl::opt PrintWholeRegMask( @@ -256,7 +282,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_GlobalAddress: return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); case MachineOperand::MO_ExternalSymbol: - return !strcmp(getSymbolName(), Other.getSymbolName()) && + return strcmp(getSymbolName(), Other.getSymbolName()) == 0 && getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress() && @@ -421,6 +447,14 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallString<16> Str; getFPImm()->getValueAPF().toString(Str); OS << "quad " << Str; + } else if (getFPImm()->getType()->isX86_FP80Ty()) { + APFloat APF = getFPImm()->getValueAPF(); + OS << "x86_fp80 0xK"; + APInt API = APF.bitcastToAPInt(); + OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); } else { OS << getFPImm()->getValueAPF().convertToDouble(); } @@ -533,6 +567,21 @@ unsigned MachinePointerInfo::getAddrSpace() const { return cast(V.get()->getType())->getAddressSpace(); } +/// isDereferenceable - Return true if V is always dereferenceable for +/// Offset + Size byte. +bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const { + if (!V.is()) + return false; + + const Value *BasePtr = V.get(); + if (BasePtr == nullptr) + return false; + + return isDereferenceableAndAlignedPointer( + BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); +} + /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { @@ -563,7 +612,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, uint64_t s, unsigned int a, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), @@ -574,8 +623,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); - AtomicInfo.SynchScope = static_cast(SynchScope); - assert(getSynchScope() == SynchScope && "Value truncated"); + AtomicInfo.SSID = static_cast(SSID); + assert(getSyncScopeID() == SSID && "Value truncated"); AtomicInfo.Ordering = static_cast(Ordering); assert(getOrdering() == Ordering && "Value truncated"); AtomicInfo.FailureOrdering = static_cast(FailureOrdering); @@ -701,6 +750,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { OS << "(dereferenceable)"; if (isInvariant()) OS << "(invariant)"; + if (getFlags() & MOTargetFlag1) + OS << "(flag1)"; + if (getFlags() & MOTargetFlag2) + OS << "(flag2)"; + if (getFlags() & MOTargetFlag3) + OS << "(flag3)"; } //===----------------------------------------------------------------------===// @@ -723,9 +778,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, DebugLoc dl, bool NoImp) - : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), - AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), - debugLoc(std::move(dl)) { + : MCID(&tid), debugLoc(std::move(dl)) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -742,9 +795,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), - Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), - MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { + : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), + debugLoc(MI.getDebugLoc()) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -1633,8 +1685,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; @@ -1667,7 +1719,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { return true; // Check if any of our memory operands are ordered. - return any_of(memoperands(), [](const MachineMemOperand *MMO) { + return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) { return !MMO->isUnordered(); }); } @@ -1841,7 +1893,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, return; // Print the rest of the operands. - bool OmittedAnyCallClobbers = false; bool FirstOp = true; unsigned AsmDescOp = ~0u; unsigned AsmOpCount = 0; @@ -1878,31 +1929,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) VirtRegs.push_back(MO.getReg()); - // Omit call-clobbered registers which aren't used anywhere. This makes - // call instructions much less noisy on targets where calls clobber lots - // of registers. Don't rely on MO.isDead() because we may be called before - // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MRI && isCall() && - MO.isReg() && MO.isImplicit() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (MRI->use_empty(Reg)) { - bool HasAliasLive = false; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - unsigned AliasReg = *AI; - if (!MRI->use_empty(AliasReg)) { - HasAliasLive = true; - break; - } - } - if (!HasAliasLive) { - OmittedAnyCallClobbers = true; - continue; - } - } - } - } - if (FirstOp) FirstOp = false; else OS << ","; OS << " "; if (i < getDesc().NumOperands) { @@ -1984,12 +2010,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, MO.print(OS, MST, TRI); } - // Briefly indicate whether any call clobbers were omitted. - if (OmittedAnyCallClobbers) { - if (!FirstOp) OS << ","; - OS << " ..."; - } - bool HaveSemi = false; const unsigned PrintableFlags = FrameSetup | FrameDestroy; if (Flags & PrintableFlags) { @@ -2255,8 +2275,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // If there are no uses, including partial uses, the def is dead. - if (none_of(UsedRegs, - [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + if (llvm::none_of(UsedRegs, + [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) MO.setIsDead(); } diff --git a/interpreter/llvm/src/lib/CodeGen/MachineLICM.cpp b/interpreter/llvm/src/lib/CodeGen/MachineLICM.cpp index 7eb991744f01c..c7113f1fdc47a 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineLICM.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineLICM.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" @@ -38,7 +38,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "machine-licm" +#define DEBUG_TYPE "machinelicm" static cl::opt AvoidSpeculation("avoid-speculation", @@ -237,13 +237,13 @@ namespace { char MachineLICM::ID = 0; char &llvm::MachineLICMID = MachineLICM::ID; -INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", - "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, + "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineLICM, "machinelicm", - "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, + "Machine Loop Invariant Code Motion", false, false) /// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { @@ -895,8 +895,11 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->isConstantPhysReg(Reg)) - return false; + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent()))) + return false; // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { diff --git a/interpreter/llvm/src/lib/CodeGen/MachineModuleInfo.cpp b/interpreter/llvm/src/lib/CodeGen/MachineModuleInfo.cpp index 2f0f4297ef5c5..825290a438a6c 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineModuleInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineModuleInfo.cpp @@ -8,43 +8,51 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionInitializer.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include + using namespace llvm; using namespace llvm::dwarf; // Handle the Pass registration stuff necessary to use DataLayout's. -INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo", - "Machine Module Information", false, false) +INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false) char MachineModuleInfo::ID = 0; // Out of line virtual method. -MachineModuleInfoImpl::~MachineModuleInfoImpl() {} +MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; namespace llvm { + class MMIAddrLabelMapCallbackPtr final : CallbackVH { - MMIAddrLabelMap *Map; + MMIAddrLabelMap *Map = nullptr; + public: - MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} - MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {} + MMIAddrLabelMapCallbackPtr() = default; + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {} void setPtr(BasicBlock *BB) { ValueHandleBase::operator=(BB); @@ -75,11 +83,12 @@ class MMIAddrLabelMap { /// This is a per-function list of symbols whose corresponding BasicBlock got /// deleted. These symbols need to be emitted at some point in the file, so /// AsmPrinter emits them after the function body. - DenseMap, std::vector > + DenseMap, std::vector> DeletedAddrLabelsNeedingEmission; -public: +public: MMIAddrLabelMap(MCContext &context) : Context(context) {} + ~MMIAddrLabelMap() { assert(DeletedAddrLabelsNeedingEmission.empty() && "Some labels for deleted blocks never got emitted"); @@ -93,7 +102,8 @@ class MMIAddrLabelMap { void UpdateForDeletedBlock(BasicBlock *BB); void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); }; -} + +} // end namespace llvm ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { assert(BB->hasAddressTaken() && @@ -119,7 +129,7 @@ ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { /// If we have any deleted symbols for F, return them. void MMIAddrLabelMap:: takeDeletedSymbolsForFunction(Function *F, std::vector &Result) { - DenseMap, std::vector >::iterator I = + DenseMap, std::vector>::iterator I = DeletedAddrLabelsNeedingEmission.find(F); // If there are no entries for the function, just return. @@ -130,7 +140,6 @@ takeDeletedSymbolsForFunction(Function *F, std::vector &Result) { DeletedAddrLabelsNeedingEmission.erase(I); } - void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { // If the block got deleted, there is no need for the symbol. If the symbol // was already emitted, we can just forget about it, otherwise we need to @@ -177,7 +186,6 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { OldEntry.Symbols.end()); } - void MMIAddrLabelMapCallbackPtr::deleted() { Map->UpdateForDeletedBlock(cast(getValPtr())); } @@ -186,9 +194,6 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { Map->UpdateForRAUWBlock(cast(getValPtr()), cast(V2)); } - -//===----------------------------------------------------------------------===// - MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) : ImmutablePass(ID), TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), @@ -196,11 +201,9 @@ MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } -MachineModuleInfo::~MachineModuleInfo() { -} +MachineModuleInfo::~MachineModuleInfo() = default; bool MachineModuleInfo::doInitialization(Module &M) { - ObjFileMMI = nullptr; CurCallSite = 0; DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; @@ -211,7 +214,6 @@ bool MachineModuleInfo::doInitialization(Module &M) { } bool MachineModuleInfo::doFinalization(Module &M) { - Personalities.clear(); delete AddrLabelSymbols; @@ -256,7 +258,14 @@ void MachineModuleInfo::addPersonality(const Function *Personality) { /// \} -MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) { +MachineFunction * +MachineModuleInfo::getMachineFunction(const Function &F) const { + auto I = MachineFunctions.find(&F); + return I != MachineFunctions.end() ? I->second.get() : nullptr; +} + +MachineFunction & +MachineModuleInfo::getOrCreateMachineFunction(const Function &F) { // Shortcut for the common case where a sequence of MachineFunctionPasses // all query for the same Function. if (LastRequest == &F) @@ -270,10 +279,6 @@ MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) { MF = new MachineFunction(&F, TM, NextFnNum++, *this); // Update the set entry. I.first->second.reset(MF); - - if (MFInitializer) - if (MFInitializer->initializeMachineFunction(*MF)) - report_fatal_error("Unable to initialize machine function"); } else { MF = I.first->second.get(); } @@ -290,10 +295,12 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) { } namespace { + /// This pass frees the MachineFunction object associated with a Function. class FreeMachineFunction : public FunctionPass { public: static char ID; + FreeMachineFunction() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -311,14 +318,14 @@ class FreeMachineFunction : public FunctionPass { return "Free MachineFunction"; } }; -char FreeMachineFunction::ID; + } // end anonymous namespace -namespace llvm { -FunctionPass *createFreeMachineFunctionPass() { +char FreeMachineFunction::ID; + +FunctionPass *llvm::createFreeMachineFunctionPass() { return new FreeMachineFunction(); } -} // end namespace llvm //===- MMI building helpers -----------------------------------------------===// diff --git a/interpreter/llvm/src/lib/CodeGen/MachineModuleInfoImpls.cpp b/interpreter/llvm/src/lib/CodeGen/MachineModuleInfoImpls.cpp index 4c81fd91cb829..22d519e5d88fa 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -23,7 +23,6 @@ using namespace llvm; // Out of line virtual method. void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} -void MachineModuleInfoWasm::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair PairTy; diff --git a/interpreter/llvm/src/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/interpreter/llvm/src/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 6b6b5f2814a90..73c3428a6e535 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -52,6 +52,14 @@ void MachineOptimizationRemarkEmitter::emit( computeHotness(OptDiag); LLVMContext &Ctx = MF.getFunction()->getContext(); + + // If a diagnostic has a hotness value, then only emit it if its hotness + // meets the threshold. + if (OptDiag.getHotness() && + *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) { + return; + } + yaml::Output *Out = Ctx.getDiagnosticsOutputFile(); if (Out) { auto *P = &const_cast(OptDiagCommon); @@ -73,7 +81,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction( MachineFunction &MF) { MachineBlockFrequencyInfo *MBFI; - if (MF.getFunction()->getContext().getDiagnosticHotnessRequested()) + if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested()) MBFI = &getAnalysis().getBFI(); else MBFI = nullptr; diff --git a/interpreter/llvm/src/lib/CodeGen/MachineOutliner.cpp b/interpreter/llvm/src/lib/CodeGen/MachineOutliner.cpp index 581a8ad811497..fd6b2427891d1 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineOutliner.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineOutliner.cpp @@ -901,7 +901,7 @@ namespace llvm { ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); } } -INITIALIZE_PASS(MachineOutliner, "machine-outliner", +INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) void MachineOutliner::pruneOverlaps(std::vector &CandidateList, @@ -1111,7 +1111,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, Builder.CreateRetVoid(); MachineModuleInfo &MMI = getAnalysis(); - MachineFunction &MF = MMI.getMachineFunction(*F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); @@ -1207,7 +1207,7 @@ bool MachineOutliner::runOnModule(Module &M) { return false; MachineModuleInfo &MMI = getAnalysis(); - const TargetSubtargetInfo &STI = MMI.getMachineFunction(*M.begin()) + const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin()) .getSubtarget(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); @@ -1216,7 +1216,7 @@ bool MachineOutliner::runOnModule(Module &M) { // Build instruction mappings for each function in the module. for (Function &F : M) { - MachineFunction &MF = MMI.getMachineFunction(F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(F); // Is the function empty? Safe to outline from? if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF)) diff --git a/interpreter/llvm/src/lib/CodeGen/MachinePipeliner.cpp b/interpreter/llvm/src/lib/CodeGen/MachinePipeliner.cpp index d06c38cf4ed81..19e9a50e2c438 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachinePipeliner.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachinePipeliner.cpp @@ -61,7 +61,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SetVector.h" @@ -69,6 +68,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" @@ -715,13 +715,13 @@ char MachinePipeliner::ID = 0; int MachinePipeliner::NumTries = 0; #endif char &llvm::MachinePipelinerID = MachinePipeliner::ID; -INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner", +INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachinePipeliner, "pipeliner", +INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) /// The "main" function for implementing Swing Modulo Scheduling. diff --git a/interpreter/llvm/src/lib/CodeGen/MachinePostDominators.cpp b/interpreter/llvm/src/lib/CodeGen/MachinePostDominators.cpp index c3f6e9249e7da..488377998cb31 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachinePostDominators.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachinePostDominators.cpp @@ -16,6 +16,10 @@ using namespace llvm; +namespace llvm { +template class DominatorTreeBase; // PostDomTreeBase +} + char MachinePostDominatorTree::ID = 0; //declare initializeMachinePostDominatorTreePass @@ -24,8 +28,7 @@ INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); - DT = new DominatorTreeBase(true); //true indicate - // postdominator + DT = new PostDomTreeBase(); } FunctionPass * diff --git a/interpreter/llvm/src/lib/CodeGen/MachineRegionInfo.cpp b/interpreter/llvm/src/lib/CodeGen/MachineRegionInfo.cpp index 71ad4e6aa7f52..1e74104e89edd 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineRegionInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineRegionInfo.cpp @@ -1,7 +1,19 @@ +//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + #include "llvm/CodeGen/MachineRegionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "machine-region-info" @@ -11,36 +23,29 @@ STATISTIC(numMachineRegions, "The # of machine regions"); STATISTIC(numMachineSimpleRegions, "The # of simple machine regions"); namespace llvm { + template class RegionBase>; template class RegionNodeBase>; template class RegionInfoBase>; -} + +} // end namespace llvm //===----------------------------------------------------------------------===// // MachineRegion implementation -// MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, MachineRegionInfo* RI, MachineDominatorTree *DT, MachineRegion *Parent) : - RegionBase>(Entry, Exit, RI, DT, Parent) { + RegionBase>(Entry, Exit, RI, DT, Parent) {} -} - -MachineRegion::~MachineRegion() { } +MachineRegion::~MachineRegion() = default; //===----------------------------------------------------------------------===// // MachineRegionInfo implementation -// -MachineRegionInfo::MachineRegionInfo() : - RegionInfoBase>() { +MachineRegionInfo::MachineRegionInfo() = default; -} - -MachineRegionInfo::~MachineRegionInfo() { - -} +MachineRegionInfo::~MachineRegionInfo() = default; void MachineRegionInfo::updateStatistics(MachineRegion *R) { ++numMachineRegions; @@ -73,9 +78,7 @@ MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) { initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry()); } -MachineRegionInfoPass::~MachineRegionInfoPass() { - -} +MachineRegionInfoPass::~MachineRegionInfoPass() = default; bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { releaseMemory(); @@ -137,8 +140,9 @@ INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE, // the link time optimization. namespace llvm { - FunctionPass *createMachineRegionInfoPass() { - return new MachineRegionInfoPass(); - } + +FunctionPass *createMachineRegionInfoPass() { + return new MachineRegionInfoPass(); } +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/CodeGen/MachineRegisterInfo.cpp b/interpreter/llvm/src/lib/CodeGen/MachineRegisterInfo.cpp index 128910f8eb2aa..9a92ee279cdc9 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineRegisterInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -18,7 +19,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" diff --git a/interpreter/llvm/src/lib/CodeGen/MachineScheduler.cpp b/interpreter/llvm/src/lib/CodeGen/MachineScheduler.cpp index 41e161f71e532..eaba9a58557c3 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineScheduler.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineScheduler.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PriorityQueue.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -30,12 +31,11 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" -#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" @@ -69,7 +69,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace llvm { @@ -191,13 +191,13 @@ char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -532,7 +532,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, // thumb2 size reduction is currently an exception, so the PostMIScheduler // needs to do this. if (FixKillFlags) - Scheduler.fixupKills(&*MBB); + Scheduler.fixupKills(*MBB); } Scheduler.finalizeSchedule(); } @@ -542,10 +542,10 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void ReadyQueue::dump() { +LLVM_DUMP_METHOD void ReadyQueue::dump() const { dbgs() << "Queue " << Name << ": "; - for (unsigned i = 0, e = Queue.size(); i < e; ++i) - dbgs() << Queue[i]->NodeNum << " "; + for (const SUnit *SU : Queue) + dbgs() << SU->NodeNum << " "; dbgs() << "\n"; } #endif @@ -609,10 +609,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { /// releaseSuccessors - Call releaseSucc on each of SU's successors. void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - releaseSucc(SU, &*I); - } + for (SDep &Succ : SU->Succs) + releaseSucc(SU, &Succ); } /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When @@ -648,10 +646,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { /// releasePredecessors - Call releasePred on each of SU's predecessors. void ScheduleDAGMI::releasePredecessors(SUnit *SU) { - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - releasePred(SU, &*I); - } + for (SDep &Pred : SU->Preds) + releasePred(SU, &Pred); } /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after @@ -724,8 +720,8 @@ void ScheduleDAGMI::schedule() { DEBUG( if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this); + for (const SUnit &SU : SUnits) + SU.dumpAll(this); if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this); ); @@ -786,28 +782,25 @@ void ScheduleDAGMI::schedule() { /// Apply each ScheduleDAGMutation step in order. void ScheduleDAGMI::postprocessDAG() { - for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { - Mutations[i]->apply(this); - } + for (auto &m : Mutations) + m->apply(this); } void ScheduleDAGMI:: findRootsAndBiasEdges(SmallVectorImpl &TopRoots, SmallVectorImpl &BotRoots) { - for (std::vector::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - SUnit *SU = &(*I); - assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + for (SUnit &SU : SUnits) { + assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits"); // Order predecessors so DFSResult follows the critical path. - SU->biasCriticalPath(); + SU.biasCriticalPath(); // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft) - TopRoots.push_back(SU); + if (!SU.NumPredsLeft) + TopRoots.push_back(&SU); // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft) - BotRoots.push_back(SU); + if (!SU.NumSuccsLeft) + BotRoots.push_back(&SU); } ExitSU.biasCriticalPath(); } @@ -822,10 +815,9 @@ void ScheduleDAGMI::initQueues(ArrayRef TopRoots, // // Nodes with unreleased weak edges can still be roots. // Release top roots in forward order. - for (SmallVectorImpl::const_iterator - I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { - SchedImpl->releaseTopNode(*I); - } + for (SUnit *SU : TopRoots) + SchedImpl->releaseTopNode(SU); + // Release bottom roots in reverse order so the higher priority nodes appear // first. This is more natural and slightly more efficient. for (SmallVectorImpl::const_reverse_iterator @@ -1029,9 +1021,9 @@ void ScheduleDAGMILive::initRegPressure() { } } DEBUG(dbgs() << "Excess PSets: "; - for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + for (const PressureChange &RCPS : RegionCriticalPSets) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].getPSet()) << " "; + RCPS.getPSet()) << " "; dbgs() << "\n"); } @@ -1040,11 +1032,10 @@ updateScheduledPressure(const SUnit *SU, const std::vector &NewMaxPressure) { const PressureDiff &PDiff = getPressureDiff(SU); unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); - for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); - I != E; ++I) { - if (!I->isValid()) + for (const PressureChange &PC : PDiff) { + if (!PC.isValid()) break; - unsigned ID = I->getPSet(); + unsigned ID = PC.getPSet(); while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) ++CritIdx; if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { @@ -1508,8 +1499,7 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef MemOps, ScheduleDAGMI *DAG) { SmallVector MemOpRecords; - for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) { - SUnit *SU = MemOps[Idx]; + for (SUnit *SU : MemOps) { unsigned BaseReg; int64_t Offset; if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) @@ -1537,12 +1527,11 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( // dependent on SUa can prevent load combining due to register reuse. // Predecessor edges do not need to be copied from SUb to SUa since nearby // loads should have effectively the same inputs. - for (SUnit::const_succ_iterator - SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { - if (SI->getSUnit() == SUb) + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) continue; - DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); - DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); } ++ClusterLength; } else @@ -1559,17 +1548,15 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { DenseMap StoreChainIDs; // Map each store chain to a set of dependent MemOps. SmallVector, 32> StoreChainDependents; - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if ((IsLoad && !SU->getInstr()->mayLoad()) || - (!IsLoad && !SU->getInstr()->mayStore())) + for (SUnit &SU : DAG->SUnits) { + if ((IsLoad && !SU.getInstr()->mayLoad()) || + (!IsLoad && !SU.getInstr()->mayStore())) continue; unsigned ChainPredID = DAG->SUnits.size(); - for (SUnit::const_pred_iterator - PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { - if (PI->isCtrl()) { - ChainPredID = PI->getSUnit()->NodeNum; + for (const SDep &Pred : SU.Preds) { + if (Pred.isCtrl()) { + ChainPredID = Pred.getSUnit()->NodeNum; break; } } @@ -1580,12 +1567,12 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); if (Result.second) StoreChainDependents.resize(NumChains + 1); - StoreChainDependents[Result.first->second].push_back(SU); + StoreChainDependents[Result.first->second].push_back(&SU); } // Iterate over the store chains. - for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) - clusterNeighboringMemOps(StoreChainDependents[Idx], DAG); + for (auto &SCD : StoreChainDependents) + clusterNeighboringMemOps(SCD, DAG); } //===----------------------------------------------------------------------===// @@ -1728,16 +1715,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); - for (SUnit::const_succ_iterator - I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + for (const SDep &Succ : LastLocalSU->Succs) { + if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg) continue; - if (I->getSUnit() == GlobalSU) + if (Succ.getSUnit() == GlobalSU) continue; - if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) return; - LocalUses.push_back(I->getSUnit()); + LocalUses.push_back(Succ.getSUnit()); } // Open the top of the GlobalLI hole by constraining any earlier global uses // to precede the start of LocalLI. @@ -1745,15 +1730,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { MachineInstr *FirstLocalDef = LIS->getInstructionFromIndex(LocalLI->beginIndex()); SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); - for (SUnit::const_pred_iterator - I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { - if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + for (const SDep &Pred : GlobalSU->Preds) { + if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg) continue; - if (I->getSUnit() == FirstLocalSU) + if (Pred.getSUnit() == FirstLocalSU) continue; - if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) return; - GlobalUses.push_back(I->getSUnit()); + GlobalUses.push_back(Pred.getSUnit()); } DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); // Add the weak edges. @@ -1784,12 +1768,11 @@ void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { RegionEndIdx = DAG->getLIS()->getInstructionIndex( *priorNonDebug(DAG->end(), DAG->begin())); - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if (!SU->getInstr()->isCopy()) + for (SUnit &SU : DAG->SUnits) { + if (!SU.getInstr()->isCopy()) continue; - constrainLocalCopy(SU, static_cast(DAG)); + constrainLocalCopy(&SU, static_cast(DAG)); } } @@ -1840,10 +1823,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!SchedModel->hasInstrSchedModel()) return; RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); - for (std::vector::iterator - I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { - const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + for (SUnit &SU : DAG->SUnits) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); + RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), @@ -1957,12 +1939,11 @@ unsigned SchedBoundary:: findMaxLatency(ArrayRef ReadySUs) { SUnit *LateSU = nullptr; unsigned RemLatency = 0; - for (ArrayRef::iterator I = ReadySUs.begin(), E = ReadySUs.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); + for (SUnit *SU : ReadySUs) { + unsigned L = getUnscheduledLatency(SU); if (L > RemLatency) { RemLatency = L; - LateSU = *I; + LateSU = SU; } } if (LateSU) { @@ -2328,7 +2309,7 @@ SUnit *SchedBoundary::pickOnlyChoice() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. -LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() { +LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { unsigned ResFactor; unsigned ResCount; if (ZoneCritResIdx) { @@ -2667,7 +2648,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } -void GenericScheduler::dumpPolicy() { +void GenericScheduler::dumpPolicy() const { // Cannot completely remove virtual function even in release mode. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << "GenericScheduler RegionPolicy: " @@ -2719,10 +2700,9 @@ void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (std::vector::const_iterator - I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : Bot.Available) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { @@ -2969,10 +2949,10 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, RegPressureTracker &TempTracker = const_cast(RPTracker); ReadyQueue &Q = Zone.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(ZonePolicy); - initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidate(Cand, TryCand, ZoneArg); @@ -3118,18 +3098,17 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. - for (SmallVectorImpl::iterator I = Deps.begin(), E = Deps.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + for (SDep &Dep : Deps) { + if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg())) continue; - SUnit *DepSU = I->getSUnit(); + SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) continue; MachineInstr *Copy = DepSU->getInstr(); if (!Copy->isCopy()) continue; DEBUG(dbgs() << " Rescheduling physreg copy "; - I->getSUnit()->dump(DAG)); + Dep.getSUnit()->dump(DAG)); DAG->moveInstruction(Copy, InsertPos); } } @@ -3204,10 +3183,9 @@ void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (SmallVectorImpl::const_iterator - I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : BotRoots) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { @@ -3233,6 +3211,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) return; + // Keep clustered nodes together. + if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), + Cand.SU == DAG->getNextClusterSucc(), + TryCand, Cand, Cluster)) + return; + // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) @@ -3254,9 +3238,9 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { ReadyQueue &Q = Top.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(Cand.Policy); - TryCand.SU = *I; + TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); tryCandidate(Cand, TryCand); diff --git a/interpreter/llvm/src/lib/CodeGen/MachineSink.cpp b/interpreter/llvm/src/lib/CodeGen/MachineSink.cpp index 5f87b68123f1c..79e3fea3f90c3 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineSink.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineSink.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SparseBitVector.h" @@ -33,6 +32,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -173,14 +173,14 @@ namespace { char MachineSinking::ID = 0; char &llvm::MachineSinkingID = MachineSinking::ID; -INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", - "Machine code sinking", false, false) +INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, + "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineSinking, "machine-sink", - "Machine code sinking", false, false) +INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, + "Machine code sinking", false, false) bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB) { diff --git a/interpreter/llvm/src/lib/CodeGen/MachineTraceMetrics.cpp b/interpreter/llvm/src/lib/CodeGen/MachineTraceMetrics.cpp index 998a9645e68bf..6c5abc66fba15 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineTraceMetrics.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineTraceMetrics.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" @@ -21,7 +22,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -44,12 +44,12 @@ using namespace llvm; char MachineTraceMetrics::ID = 0; char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; -INITIALIZE_PASS_BEGIN(MachineTraceMetrics, - "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, + "Machine Trace Metrics", false, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineTraceMetrics, - "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE, + "Machine Trace Metrics", false, true) MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) { std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); diff --git a/interpreter/llvm/src/lib/CodeGen/MachineVerifier.cpp b/interpreter/llvm/src/lib/CodeGen/MachineVerifier.cpp index ab433273b1896..fcb544806dda0 100644 --- a/interpreter/llvm/src/lib/CodeGen/MachineVerifier.cpp +++ b/interpreter/llvm/src/lib/CodeGen/MachineVerifier.cpp @@ -23,7 +23,6 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -36,6 +35,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -87,7 +88,6 @@ namespace { RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; RegMaskVector regMasks; - RegSet regsLiveInButUnused; SlotIndex lastIndex; @@ -419,7 +419,6 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { regsDead.clear(); regsKilled.clear(); regMasks.clear(); - regsLiveInButUnused.clear(); MBBInfoMap.clear(); return foundErrors; @@ -756,11 +755,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { regsLive.insert(*SubRegs); } } - regsLiveInButUnused = regsLive; const MachineFrameInfo &MFI = MF->getFrameInfo(); BitVector PR = MFI.getPristineRegs(*MF); - for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { + for (unsigned I : PR.set_bits()) { for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); @@ -912,17 +910,42 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } - // Generic loads and stores must have a single MachineMemOperand - // describing that access. - if ((MI->getOpcode() == TargetOpcode::G_LOAD || - MI->getOpcode() == TargetOpcode::G_STORE) && - !MI->hasOneMemOperand()) - report("Generic instruction accessing memory must have one mem operand", - MI); - StringRef ErrorInfo; if (!TII->verifyInstruction(*MI, ErrorInfo)) report(ErrorInfo.data(), MI); + + // Verify properties of various specific instruction types + switch(MI->getOpcode()) { + default: + break; + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: + // Generic loads and stores must have a single MachineMemOperand + // describing that access. + if (!MI->hasOneMemOperand()) + report("Generic instruction accessing memory must have one mem operand", + MI); + break; + case TargetOpcode::STATEPOINT: + if (!MI->getOperand(StatepointOpers::IDPos).isImm() || + !MI->getOperand(StatepointOpers::NBytesPos).isImm() || + !MI->getOperand(StatepointOpers::NCallArgsPos).isImm()) + report("meta operands to STATEPOINT not constant!", MI); + break; + + auto VerifyStackMapConstant = [&](unsigned Offset) { + if (!MI->getOperand(Offset).isImm() || + MI->getOperand(Offset).getImm() != StackMaps::ConstantOp || + !MI->getOperand(Offset + 1).isImm()) + report("stack map constant to STATEPOINT not well formed!", MI); + }; + const unsigned VarStart = StatepointOpers(MI).getVarIdx(); + VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset); + VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset); + VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset); + + // TODO: verify we have properly encoded deopt arguments + }; } void @@ -962,6 +985,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); + else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) { + const MachineOperand &MOTied = MI->getOperand(TiedTo); + if (!MOTied.isReg()) + report("Tied counterpart must be a register", &MOTied, TiedTo); + else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) && + MO->getReg() != MOTied.getReg()) + report("Tied physical registers must match.", &MOTied, TiedTo); + } } else if (MO->isReg() && MO->isTied()) report("Explicit operand should not be tied", MO, MONum); } else { @@ -1268,8 +1299,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Both use and def operands can read a register. if (MO->readsReg()) { - regsLiveInButUnused.erase(Reg); - if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); @@ -1925,9 +1954,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); - // All predecessors must have a live-out value if this is not a - // subregister liverange. - if (!PVNI && LaneMask.none()) { + // All predecessors must have a live-out value. However for a phi + // instruction with subregister intervals + // only one of the subregisters (not necessarily the current one) needs to + // be defined. + if (!PVNI && (LaneMask.none() || !IsPHI) ) { report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); diff --git a/interpreter/llvm/src/lib/CodeGen/MacroFusion.cpp b/interpreter/llvm/src/lib/CodeGen/MacroFusion.cpp new file mode 100644 index 0000000000000..633a853b2c748 --- /dev/null +++ b/interpreter/llvm/src/lib/CodeGen/MacroFusion.cpp @@ -0,0 +1,153 @@ +//===- MacroFusion.cpp - Macro Fusion -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the implementation of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define DEBUG_TYPE "machine-scheduler" + +STATISTIC(NumFused, "Number of instr pairs fused"); + +using namespace llvm; + +static cl::opt EnableMacroFusion("misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + +static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, + SUnit &SecondSU) { + // Create a single weak edge between the adjacent instrs. The only effect is + // to cause bottom-up scheduling to heavily prioritize the clustered instrs. + DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); + + // Adjust the latency between the anchor instr and its + // predecessors. + for (SDep &IDep : SecondSU.Preds) + if (IDep.getSUnit() == &FirstSU) + IDep.setLatency(0); + + // Adjust the latency between the dependent instr and its + // predecessors. + for (SDep &IDep : FirstSU.Succs) + if (IDep.getSUnit() == &SecondSU) + IDep.setLatency(0); + + DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse "; + FirstSU.print(dbgs(), &DAG); dbgs() << " - "; + SecondSU.print(dbgs(), &DAG); dbgs() << " / "; + dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << + DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); + + if (&SecondSU != &DAG.ExitSU) + // Make instructions dependent on FirstSU also dependent on SecondSU to + // prevent them from being scheduled between FirstSU and and SecondSU. + for (const SDep &SI : FirstSU.Succs) { + if (SI.getSUnit() == &SecondSU) + continue; + DEBUG(dbgs() << " Copy Succ "; + SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';); + DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial)); + } + + ++NumFused; +} + +namespace { + +/// \brief Post-process the DAG to create cluster edges between instrs that may +/// be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { + ShouldSchedulePredTy shouldScheduleAdjacent; + bool FuseBlock; + bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU); + +public: + MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock) + : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {} + + void apply(ScheduleDAGInstrs *DAGInstrs) override; +}; + +} // end anonymous namespace + +void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { + ScheduleDAGMI *DAG = static_cast(DAGInstrs); + + if (FuseBlock) + // For each of the SUnits in the scheduling block, try to fuse the instr in + // it with one in its predecessors. + for (SUnit &ISU : DAG->SUnits) + scheduleAdjacentImpl(*DAG, ISU); + + if (DAG->ExitSU.getInstr()) + // Try to fuse the instr in the ExitSU with one in its predecessors. + scheduleAdjacentImpl(*DAG, DAG->ExitSU); +} + +/// \brief Implement the fusion of instr pairs in the scheduling DAG, +/// anchored at the instr in AnchorSU.. +bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { + const MachineInstr &AnchorMI = *AnchorSU.getInstr(); + const TargetInstrInfo &TII = *DAG.TII; + const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); + + // Check if the anchor instr may be fused. + if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI)) + return false; + + // Explorer for fusion candidates among the dependencies of the anchor instr. + for (SDep &Dep : AnchorSU.Preds) { + // Ignore dependencies that don't enforce ordering. + if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output || + Dep.isWeak()) + continue; + + SUnit &DepSU = *Dep.getSUnit(); + if (DepSU.isBoundaryNode()) + continue; + + const MachineInstr *DepMI = DepSU.getInstr(); + if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) + continue; + + fuseInstructionPair(DAG, DepSU, AnchorSU); + return true; + } + + return false; +} + +std::unique_ptr +llvm::createMacroFusionDAGMutation( + ShouldSchedulePredTy shouldScheduleAdjacent) { + if(EnableMacroFusion) + return llvm::make_unique(shouldScheduleAdjacent, true); + return nullptr; +} + +std::unique_ptr +llvm::createBranchMacroFusionDAGMutation( + ShouldSchedulePredTy shouldScheduleAdjacent) { + if(EnableMacroFusion) + return llvm::make_unique(shouldScheduleAdjacent, false); + return nullptr; +} diff --git a/interpreter/llvm/src/lib/CodeGen/OptimizePHIs.cpp b/interpreter/llvm/src/lib/CodeGen/OptimizePHIs.cpp index 2a8531f337a0f..f7aeb4204c5bb 100644 --- a/interpreter/llvm/src/lib/CodeGen/OptimizePHIs.cpp +++ b/interpreter/llvm/src/lib/CodeGen/OptimizePHIs.cpp @@ -12,18 +12,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "phi-opt" +#define DEBUG_TYPE "opt-phis" STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); @@ -59,7 +59,7 @@ namespace { char OptimizePHIs::ID = 0; char &llvm::OptimizePHIsID = OptimizePHIs::ID; -INITIALIZE_PASS(OptimizePHIs, "opt-phis", +INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE, "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { diff --git a/interpreter/llvm/src/lib/CodeGen/PHIElimination.cpp b/interpreter/llvm/src/lib/CodeGen/PHIElimination.cpp index db2264b2439dc..9c898fa40d7e7 100644 --- a/interpreter/llvm/src/lib/CodeGen/PHIElimination.cpp +++ b/interpreter/llvm/src/lib/CodeGen/PHIElimination.cpp @@ -112,11 +112,11 @@ STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; char& llvm::PHIEliminationID = PHIElimination::ID; -INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination", +INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE, "Eliminate PHI nodes for register allocation", false, false) INITIALIZE_PASS_DEPENDENCY(LiveVariables) -INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", +INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE, "Eliminate PHI nodes for register allocation", false, false) void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/interpreter/llvm/src/lib/CodeGen/PatchableFunction.cpp b/interpreter/llvm/src/lib/CodeGen/PatchableFunction.cpp index 00e72971a01e8..513e82716564e 100644 --- a/interpreter/llvm/src/lib/CodeGen/PatchableFunction.cpp +++ b/interpreter/llvm/src/lib/CodeGen/PatchableFunction.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/interpreter/llvm/src/lib/CodeGen/PeepholeOptimizer.cpp b/interpreter/llvm/src/lib/CodeGen/PeepholeOptimizer.cpp index 6d643457e9a96..b13f6b68c420f 100644 --- a/interpreter/llvm/src/lib/CodeGen/PeepholeOptimizer.cpp +++ b/interpreter/llvm/src/lib/CodeGen/PeepholeOptimizer.cpp @@ -66,7 +66,6 @@ // C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -77,8 +76,10 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -119,6 +120,14 @@ static cl::opt RewritePHILimit( "rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup")); +// Limit the length of recurrence chain when evaluating the benefit of +// commuting operands. +static cl::opt MaxRecurrenceChain( + "recurrence-chain-limit", cl::Hidden, cl::init(3), + cl::desc("Maximum length of recurrence chain when evaluating the benefit " + "of commuting operands")); + + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { class ValueTrackerResult; + class RecurrenceInstr; class PeepholeOptimizer : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; MachineDominatorTree *DT; // Machine dominator tree + MachineLoopInfo *MLI; public: static char ID; // Pass identification @@ -150,6 +161,8 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); if (Aggressive) { AU.addRequired(); AU.addPreserved(); @@ -160,6 +173,9 @@ namespace { typedef SmallDenseMap RewriteMapTy; + /// \brief Sequence of instructions that formulate recurrence cycle. + typedef SmallVector RecurrenceCycle; + private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, @@ -170,6 +186,7 @@ namespace { bool optimizeCoalescableCopy(MachineInstr *MI); bool optimizeUncoalescableCopy(MachineInstr *MI, SmallPtrSetImpl &LocalMIs); + bool optimizeRecurrence(MachineInstr &PHI); bool findNextSource(unsigned Reg, unsigned SubReg, RewriteMapTy &RewriteMap); bool isMoveImmediate(MachineInstr *MI, @@ -178,6 +195,13 @@ namespace { bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); + /// \brief Finds recurrence cycles, but only ones that formulated around + /// a def operand and a use operand that are tied. If there is a use + /// operand commutable with the tied use operand, find recurrence cycle + /// along that operand as well. + bool findTargetRecurrence(unsigned Reg, + const SmallSet &TargetReg, + RecurrenceCycle &RC); /// \brief If copy instruction \p MI is a virtual register copy, track it in /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was @@ -222,6 +246,28 @@ namespace { } }; + /// \brief Helper class to hold instructions that are inside recurrence + /// cycles. The recurrence cycle is formulated around 1) a def operand and its + /// tied use operand, or 2) a def operand and a use operand that is commutable + /// with another use operand which is tied to the def operand. In the latter + /// case, index of the tied use operand and the commutable use operand are + /// maintained with CommutePair. + class RecurrenceInstr { + public: + typedef std::pair IndexPair; + + RecurrenceInstr(MachineInstr *MI) : MI(MI) {} + RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2) + : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {} + + MachineInstr *getMI() const { return MI; } + Optional getCommutePair() const { return CommutePair; } + + private: + MachineInstr *MI; + Optional CommutePair; + }; + /// \brief Helper class to hold a reply for ValueTracker queries. Contains the /// returned sources for a given search and the instructions where the sources /// were tracked from. @@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) @@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } +/// \bried Returns true if \p MO is a virtual register operand. +static bool isVirtualRegisterOperand(MachineOperand &MO) { + if (!MO.isReg()) + return false; + return TargetRegisterInfo::isVirtualRegister(MO.getReg()); +} + +bool PeepholeOptimizer::findTargetRecurrence( + unsigned Reg, const SmallSet &TargetRegs, + RecurrenceCycle &RC) { + // Recurrence found if Reg is in TargetRegs. + if (TargetRegs.count(Reg)) + return true; + + // TODO: Curerntly, we only allow the last instruction of the recurrence + // cycle (the instruction that feeds the PHI instruction) to have more than + // one uses to guarantee that commuting operands does not tie registers + // with overlapping live range. Once we have actual live range info of + // each register, this constraint can be relaxed. + if (!MRI->hasOneNonDBGUse(Reg)) + return false; + + // Give up if the reccurrence chain length is longer than the limit. + if (RC.size() >= MaxRecurrenceChain) + return false; + + MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg)); + unsigned Idx = MI.findRegisterUseOperandIdx(Reg); + + // Only interested in recurrences whose instructions have only one def, which + // is a virtual register. + if (MI.getDesc().getNumDefs() != 1) + return false; + + MachineOperand &DefOp = MI.getOperand(0); + if (!isVirtualRegisterOperand(DefOp)) + return false; + + // Check if def operand of MI is tied to any use operand. We are only + // interested in the case that all the instructions in the recurrence chain + // have there def operand tied with one of the use operand. + unsigned TiedUseIdx; + if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx)) + return false; + + if (Idx == TiedUseIdx) { + RC.push_back(RecurrenceInstr(&MI)); + return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); + } else { + // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx. + unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex; + if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) { + RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx)); + return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); + } + } + + return false; +} + +/// \brief Phi instructions will eventually be lowered to copy instructions. If +/// phi is in a loop header, a recurrence may formulated around the source and +/// destination of the phi. For such case commuting operands of the instructions +/// in the recurrence may enable coalescing of the copy instruction generated +/// from the phi. For example, if there is a recurrence of +/// +/// LoopHeader: +/// %vreg1 = phi(%vreg0, %vreg100) +/// LoopLatch: +/// %vreg0 = ADD %vreg2, %vreg1 +/// +/// , the fact that vreg0 and vreg2 are in the same tied operands set makes +/// the coalescing of copy instruction generated from the phi in +/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and +/// %vreg2 have overlapping live range. This introduces additional move +/// instruction to the final assembly. However, if we commute %vreg2 and +/// %vreg1 of ADD instruction, the redundant move instruction can be +/// avoided. +bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { + SmallSet TargetRegs; + for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) { + MachineOperand &MO = PHI.getOperand(Idx); + assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction"); + TargetRegs.insert(MO.getReg()); + } + + bool Changed = false; + RecurrenceCycle RC; + if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) { + // Commutes operands of instructions in RC if necessary so that the copy to + // be generated from PHI can be coalesced. + DEBUG(dbgs() << "Optimize recurrence chain from " << PHI); + for (auto &RI : RC) { + DEBUG(dbgs() << "\tInst: " << *(RI.getMI())); + auto CP = RI.getCommutePair(); + if (CP) { + Changed = true; + TII->commuteInstruction(*(RI.getMI()), false, (*CP).first, + (*CP).second); + DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); + } + } + } + + return Changed; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis() : nullptr; + MLI = &getAnalysis(); bool Changed = false; @@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallSet CopySrcRegs; DenseMap CopySrcMIs; + bool IsLoopHeader = MLI->isLoopHeader(&MBB); + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ) { MachineInstr *MI = &*MII; @@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isDebugValue()) continue; - if (MI->isPosition() || MI->isPHI()) + if (MI->isPosition()) continue; + if (IsLoopHeader && MI->isPHI()) { + if (optimizeRecurrence(*MI)) { + Changed = true; + continue; + } + } + if (!MI->isCopy()) { for (const auto &Op : MI->operands()) { // Visit all operands: definitions can be implicit or explicit. @@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MRI->markUsesInDebugValueAsUndef(FoldedReg); FoldAsLoadDefCandidates.erase(FoldedReg); ++NumLoadFold; - + // MI is replaced with FoldMI so we can continue trying to fold Changed = true; MI = FoldMI; @@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { } } } - + // If we run into an instruction we can't fold across, discard // the load candidates. Note: We might be able to fold *into* this // instruction, so this needs to be after the folding logic. diff --git a/interpreter/llvm/src/lib/CodeGen/PostRAHazardRecognizer.cpp b/interpreter/llvm/src/lib/CodeGen/PostRAHazardRecognizer.cpp index 5bc5f7524dbfb..4a50d895340a5 100644 --- a/interpreter/llvm/src/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/interpreter/llvm/src/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -23,13 +23,13 @@ /// This pass traverses all the instructions in a program in top-down order. /// In contrast to the instruction scheduling passes, this pass never resets /// the hazard recognizer to ensure it can correctly handles noop hazards at -/// the begining of blocks. +/// the beginning of blocks. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/interpreter/llvm/src/lib/CodeGen/PostRASchedulerList.cpp b/interpreter/llvm/src/lib/CodeGen/PostRASchedulerList.cpp index 61dccdde8f1dc..f2249f9e37e0f 100644 --- a/interpreter/llvm/src/lib/CodeGen/PostRASchedulerList.cpp +++ b/interpreter/llvm/src/lib/CodeGen/PostRASchedulerList.cpp @@ -200,7 +200,7 @@ namespace { char &llvm::PostRASchedulerID = PostRAScheduler::ID; -INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", +INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE, "Post RA top-down list latency scheduler", false, false) SchedulePostRATDList::SchedulePostRATDList( @@ -367,7 +367,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.fixupKills(&MBB); + Scheduler.fixupKills(MBB); } return true; diff --git a/interpreter/llvm/src/lib/CodeGen/ProcessImplicitDefs.cpp b/interpreter/llvm/src/lib/CodeGen/ProcessImplicitDefs.cpp index d27ea2f51867a..0118580a626af 100644 --- a/interpreter/llvm/src/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ProcessImplicitDefs.cpp @@ -20,7 +20,7 @@ using namespace llvm; -#define DEBUG_TYPE "processimplicitdefs" +#define DEBUG_TYPE "processimpdefs" namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def @@ -51,9 +51,7 @@ class ProcessImplicitDefs : public MachineFunctionPass { char ProcessImplicitDefs::ID = 0; char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; -INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", - "Process Implicit Definitions", false, false) -INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", +INITIALIZE_PASS(ProcessImplicitDefs, DEBUG_TYPE, "Process Implicit Definitions", false, false) void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/interpreter/llvm/src/lib/CodeGen/PrologEpilogInserter.cpp b/interpreter/llvm/src/lib/CodeGen/PrologEpilogInserter.cpp index d2afeae9e70b1..e9f8d43fe6433 100644 --- a/interpreter/llvm/src/lib/CodeGen/PrologEpilogInserter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/PrologEpilogInserter.cpp @@ -45,7 +45,7 @@ using namespace llvm; -#define DEBUG_TYPE "pei" +#define DEBUG_TYPE "prologepilog" typedef SmallVector MBBVector; static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, @@ -54,25 +54,12 @@ static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks); -static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS); - namespace { class PEI : public MachineFunctionPass { public: static char ID; - explicit PEI(const TargetMachine *TM = nullptr) : MachineFunctionPass(ID) { + PEI() : MachineFunctionPass(ID) { initializePEIPass(*PassRegistry::getPassRegistry()); - - if (TM && (!TM->usesPhysRegsForPEI())) { - SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, - unsigned &, unsigned &, const MBBVector &, - const MBBVector &) {}; - ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {}; - } else { - SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; - ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs; - UsesCalleeSaves = true; - } } void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -95,7 +82,7 @@ class PEI : public MachineFunctionPass { const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks)> SpillCalleeSavedRegisters; - std::function + std::function ScavengeFrameVirtualRegs; bool UsesCalleeSaves = false; @@ -140,21 +127,19 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), cl::desc("Warn for stack size bigger than the given" " number")); -INITIALIZE_TM_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", - false, false) +INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, + false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_TM_PASS_END(PEI, "prologepilog", - "Prologue/Epilogue Insertion & Frame Finalization", - false, false) +INITIALIZE_PASS_END(PEI, DEBUG_TYPE, + "Prologue/Epilogue Insertion & Frame Finalization", false, + false) -MachineFunctionPass * -llvm::createPrologEpilogInserterPass(const TargetMachine *TM) { - return new PEI(TM); +MachineFunctionPass *llvm::createPrologEpilogInserterPass() { + return new PEI(); } -STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); @@ -174,6 +159,20 @@ typedef SmallSetVector StackObjSet; /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { + if (!SpillCalleeSavedRegisters) { + const TargetMachine &TM = Fn.getTarget(); + if (!TM.usesPhysRegsForPEI()) { + SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, + unsigned &, unsigned &, const MBBVector &, + const MBBVector &) {}; + ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {}; + } else { + SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; + ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs; + UsesCalleeSaves = true; + } + } + const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); @@ -220,7 +219,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { - ScavengeFrameVirtualRegs(Fn, RS); + ScavengeFrameVirtualRegs(Fn, *RS); // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); @@ -448,12 +447,13 @@ static void updateLiveness(MachineFunction &MF) { const std::vector &CSI = MFI.getCalleeSavedInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { for (MachineBasicBlock *MBB : Visited) { MCPhysReg Reg = CSI[i].getReg(); // Add the callee-saved register as live-in. // It's killed at the spill. - if (!MBB->isLiveIn(Reg)) + if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) MBB->addLiveIn(Reg); } } @@ -1150,92 +1150,3 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, RS->forward(MI); } } - -/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers -/// with physical registers. Use the register scavenger to find an -/// appropriate register to use. -/// -/// FIXME: Iterating over the instruction stream is unnecessary. We can simply -/// iterate over the vreg use list, which at this point only contains machine -/// operands for which eliminateFrameIndex need a new scratch reg. -static void -doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) { - // Run through the instructions and find any virtual registers. - MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) { - RS->enterBasicBlock(MBB); - - int SPAdj = 0; - - // The instruction stream may change in the loop, so check MBB.end() - // directly. - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - // We might end up here again with a NULL iterator if we scavenged a - // register for which we inserted spill code for definition by what was - // originally the first instruction in MBB. - if (I == MachineBasicBlock::iterator(nullptr)) - I = MBB.begin(); - - const MachineInstr &MI = *I; - MachineBasicBlock::iterator J = std::next(I); - MachineBasicBlock::iterator P = - I == MBB.begin() ? MachineBasicBlock::iterator(nullptr) - : std::prev(I); - - // RS should process this instruction before we might scavenge at this - // location. This is because we might be replacing a virtual register - // defined by this instruction, and if so, registers killed by this - // instruction are available, and defined registers are not. - RS->forward(I); - - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // When we first encounter a new virtual register, it - // must be a definition. - assert(MO.isDef() && "frame index virtual missing def!"); - // Scavenge a new scratch register - const TargetRegisterClass *RC = MRI.getRegClass(Reg); - unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); - - ++NumScavengedRegs; - - // Replace this reference to the virtual register with the - // scratch register. - assert(ScratchReg && "Missing scratch register!"); - MRI.replaceRegWith(Reg, ScratchReg); - - // Because this instruction was processed by the RS before this - // register was allocated, make sure that the RS now records the - // register as being used. - RS->setRegUsed(ScratchReg); - } - - // If the scavenger needed to use one of its spill slots, the - // spill code will have been inserted in between I and J. This is a - // problem because we need the spill code before I: Move I to just - // prior to J. - if (I != std::prev(J)) { - MBB.splice(J, &MBB, I); - - // Before we move I, we need to prepare the RS to visit I again. - // Specifically, RS will assert if it sees uses of registers that - // it believes are undefined. Because we have already processed - // register kills in I, when it visits I again, it will believe that - // those registers are undefined. To avoid this situation, unprocess - // the instruction I. - assert(RS->getCurrentPosition() == I && - "The register scavenger has an unexpected position"); - I = P; - RS->unprocess(P); - } else - ++I; - } - } - - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); -} diff --git a/interpreter/llvm/src/lib/CodeGen/RegAllocBase.cpp b/interpreter/llvm/src/lib/CodeGen/RegAllocBase.cpp index fb49a934431c4..7b4fbace2c1c1 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegAllocBase.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegAllocBase.cpp @@ -21,13 +21,12 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -134,18 +133,19 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); - for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); - I != E; ++I) { - LiveInterval *SplitVirtReg = &LIS->getInterval(*I); + for (unsigned Reg : SplitVRegs) { + assert(LIS->hasInterval(Reg)); + + LiveInterval *SplitVirtReg = &LIS->getInterval(Reg); assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + assert(SplitVirtReg->empty() && "Non-empty but used interval"); DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); aboutToRemoveInterval(*SplitVirtReg); LIS->removeInterval(SplitVirtReg->reg); continue; } DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(!SplitVirtReg->empty() && "expecting non-empty interval"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); diff --git a/interpreter/llvm/src/lib/CodeGen/RegAllocBasic.cpp b/interpreter/llvm/src/lib/CodeGen/RegAllocBasic.cpp index a87fed3a687e1..774306154a894 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegAllocBasic.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegAllocBasic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "LiveDebugVariables.h" #include "RegAllocBase.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/PassAnalysisSupport.h" @@ -58,8 +58,9 @@ namespace { /// whenever a register is unavailable. This is not practical in production but /// provides a useful baseline both for measuring other allocators and comparing /// the speed of the basic algorithm against other styles of allocators. -class RABasic : public MachineFunctionPass, public RegAllocBase -{ +class RABasic : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { // context MachineFunction *MF; @@ -72,6 +73,9 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // selectOrSplit(). BitVector UsableRegs; + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + public: RABasic(); @@ -121,17 +125,46 @@ char RABasic::ID = 0; } // end anonymous namespace +char &llvm::RABasicID = RABasic::ID; + +INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) +INITIALIZE_PASS_DEPENDENCY(MachineScheduler) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) +INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, + false) + +bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (VRM->hasPhys(VirtReg)) { + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) { + if (!VRM->hasPhys(VirtReg)) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + enqueue(&LI); +} + RABasic::RABasic(): MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); - initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); } void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { @@ -200,7 +233,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Matrix->unassign(Spill); // Spill the extracted interval. - LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); } return true; @@ -259,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell diff --git a/interpreter/llvm/src/lib/CodeGen/RegAllocFast.cpp b/interpreter/llvm/src/lib/CodeGen/RegAllocFast.cpp index c606b7b833104..d5538be4bba25 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegAllocFast.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegAllocFast.cpp @@ -203,6 +203,8 @@ namespace { char RAFast::ID = 0; } +INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false) + /// getStackSpaceFor - This allocates space for the specified virtual register /// to be held on the stack. int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { @@ -244,8 +246,15 @@ void RAFast::addKillFlag(const LiveReg &LR) { if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { if (MO.getReg() == LR.PhysReg) MO.setIsKill(); - else - LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); + // else, don't do anything we are problably redefining a + // subreg of this register and given we don't track which + // lanes are actually dead, we cannot insert a kill flag here. + // Otherwise we may end up in a situation like this: + // ... = (MO) physreg:sub1, physreg + // ... <== Here we would allow later pass to reuse physreg:sub1 + // which is potentially wrong. + // LR:sub0 = ... + // ... = LR.sub1 <== This is going to use physreg:sub1 } } diff --git a/interpreter/llvm/src/lib/CodeGen/RegAllocGreedy.cpp b/interpreter/llvm/src/lib/CodeGen/RegAllocGreedy.cpp index 06500289c971a..020e81eca2dd2 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegAllocGreedy.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegAllocGreedy.cpp @@ -1,4 +1,4 @@ -//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// +//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===// // // The LLVM Compiler Infrastructure // @@ -19,36 +19,63 @@ #include "SpillPlacement.h" #include "Spiller.h" #include "SplitKit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/PassAnalysisSupport.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include #include +#include +#include using namespace llvm; @@ -106,13 +133,14 @@ static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); namespace { + class RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { // Convenient shortcuts. - typedef std::priority_queue > PQueue; - typedef SmallPtrSet SmallLISet; - typedef SmallSet SmallVirtRegSet; + using PQueue = std::priority_queue>; + using SmallLISet = SmallPtrSet; + using SmallVirtRegSet = SmallSet; // context MachineFunction *MF; @@ -201,12 +229,12 @@ class RAGreedy : public MachineFunctionPass, // RegInfo - Keep additional information about each live range. struct RegInfo { - LiveRangeStage Stage; + LiveRangeStage Stage = RS_New; // Cascade - Eviction loop prevention. See canEvictInterference(). - unsigned Cascade; + unsigned Cascade = 0; - RegInfo() : Stage(RS_New), Cascade(0) {} + RegInfo() = default; }; IndexedMap ExtraRegInfo; @@ -232,10 +260,10 @@ class RAGreedy : public MachineFunctionPass, /// Cost of evicting interference. struct EvictionCost { - unsigned BrokenHints; ///< Total number of broken hints. - float MaxWeight; ///< Maximum spill weight evicted. + unsigned BrokenHints = 0; ///< Total number of broken hints. + float MaxWeight = 0; ///< Maximum spill weight evicted. - EvictionCost(): BrokenHints(0), MaxWeight(0) {} + EvictionCost() = default; bool isMax() const { return BrokenHints == ~0u; } @@ -285,8 +313,7 @@ class RAGreedy : public MachineFunctionPass, // Set B[i] = C for every live bundle where B[i] was NoCand. unsigned getBundles(SmallVectorImpl &B, unsigned C) { unsigned Count = 0; - for (int i = LiveBundles.find_first(); i >= 0; - i = LiveBundles.find_next(i)) + for (unsigned i : LiveBundles.set_bits()) if (B[i] == NoCand) { B[i] = C; Count++; @@ -414,10 +441,12 @@ class RAGreedy : public MachineFunctionPass, /// Its currently assigned register. /// In case of a physical register Reg == PhysReg. unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} }; - typedef SmallVector HintsInfo; + using HintsInfo = SmallVector; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); void collectHintInfo(unsigned, HintsInfo &); @@ -437,6 +466,7 @@ class RAGreedy : public MachineFunctionPass, } } }; + } // end anonymous namespace char RAGreedy::ID = 0; @@ -476,7 +506,6 @@ const char *const RAGreedy::StageName[] = { // This helps stabilize decisions based on float comparisons. const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 - FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } @@ -512,7 +541,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } - //===----------------------------------------------------------------------===// // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// @@ -635,7 +663,6 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { return LI; } - //===----------------------------------------------------------------------===// // Direct Assignment //===----------------------------------------------------------------------===// @@ -683,7 +710,6 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, return CheapReg ? CheapReg : PhysReg; } - //===----------------------------------------------------------------------===// // Interference eviction //===----------------------------------------------------------------------===// @@ -955,7 +981,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, return BestPhys; } - //===----------------------------------------------------------------------===// // Region Splitting //===----------------------------------------------------------------------===// @@ -1026,7 +1051,6 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, return SpillPlacer->scanActiveBundles(); } - /// addThroughConstraints - Add constraints and links to SpillPlacer from the /// live-through blocks in Blocks. void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, @@ -1084,7 +1108,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { unsigned Visited = 0; #endif - for (;;) { + while (true) { ArrayRef NewBundles = SpillPlacer->getRecentPositive(); // Find new through blocks in the periphery of PrefRegBundles. for (int i = 0, e = NewBundles.size(); i != e; ++i) { @@ -1162,9 +1186,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { } DEBUG({ - for (int i = Cand.LiveBundles.find_first(); i>=0; - i = Cand.LiveBundles.find_next(i)) - dbgs() << " EB#" << i; + for (int i : Cand.LiveBundles.set_bits()) + dbgs() << " EB#" << i; dbgs() << ".\n"; }); return true; @@ -1199,8 +1222,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; unsigned Ins = 0; if (BI.LiveIn) @@ -1213,8 +1236,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { unsigned Number = Cand.ActiveBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, false)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, true)]; if (!RegIn && !RegOut) continue; if (RegIn && RegOut) { @@ -1266,7 +1289,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned IntvIn = 0, IntvOut = 0; SlotIndex IntfIn, IntfOut; if (BI.LiveIn) { - unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; if (CandIn != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandIn]; IntvIn = Cand.IntvIdx; @@ -1275,7 +1298,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } } if (BI.LiveOut) { - unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; if (CandOut != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandOut]; IntvOut = Cand.IntvIdx; @@ -1315,7 +1338,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned IntvIn = 0, IntvOut = 0; SlotIndex IntfIn, IntfOut; - unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; if (CandIn != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandIn]; IntvIn = Cand.IntvIdx; @@ -1323,7 +1346,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, IntfIn = Cand.Intf.first(); } - unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; if (CandOut != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandOut]; IntvOut = Cand.IntvIdx; @@ -1482,8 +1505,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; - for (int i = Cand.LiveBundles.find_first(); i>=0; - i = Cand.LiveBundles.find_next(i)) + for (int i : Cand.LiveBundles.set_bits()) dbgs() << " EB#" << i; dbgs() << ".\n"; }); @@ -1536,7 +1558,6 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, return 0; } - //===----------------------------------------------------------------------===// // Per-Block Splitting //===----------------------------------------------------------------------===// @@ -1583,7 +1604,6 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } - //===----------------------------------------------------------------------===// // Per-Instruction Splitting //===----------------------------------------------------------------------===// @@ -1667,12 +1687,10 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } - //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// - /// calcGapWeights - Compute the maximum spill weight that needs to be evicted /// in order to use PhysReg between two entries in SA->UseSlots. /// @@ -1743,7 +1761,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; for (; Gap != NumGaps; ++Gap) { - GapWeight[Gap] = llvm::huge_valf; + GapWeight[Gap] = huge_valf; if (Uses[Gap+1].getBaseIndex() >= I->end) break; } @@ -1849,7 +1867,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = llvm::huge_valf; + GapWeight[RegMaskGaps[i]] = huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1861,7 +1879,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // It is the spill weight that needs to be evicted. float MaxGap = GapWeight[0]; - for (;;) { + while (true) { // Live before/after split? const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; @@ -1884,7 +1902,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Legally, without causing looping? bool Legal = !ProgressRequired || NewGaps < NumGaps; - if (Legal && MaxGap < llvm::huge_valf) { + if (Legal && MaxGap < huge_valf) { // Estimate the new spill weight. Each instruction reads or writes the // register. Conservatively assume there are no read-modify-write // instructions. @@ -2440,7 +2458,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { do { Reg = RecoloringCandidates.pop_back_val(); - // We cannot recolor physcal register. + // We cannot recolor physical register. if (TargetRegisterInfo::isPhysicalRegister(Reg)) continue; @@ -2604,7 +2622,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, } // If we couldn't allocate a register from spilling, there is probably some - // invalid inline assembly. The base class wil report it. + // invalid inline assembly. The base class will report it. if (Stage >= RS_Done || !VirtReg.isSpillable()) return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, Depth); @@ -2683,6 +2701,7 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, if (Reloads || FoldedReloads || Spills || FoldedSpills) { using namespace ore; + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", L->getStartLoc(), L->getHeader()); if (Spills) diff --git a/interpreter/llvm/src/lib/CodeGen/RegAllocPBQP.cpp b/interpreter/llvm/src/lib/CodeGen/RegAllocPBQP.cpp index 3b5964eef55e4..9778103575fab 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegAllocPBQP.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegAllocPBQP.cpp @@ -29,15 +29,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -49,12 +50,13 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PBQP/Graph.h" +#include "llvm/CodeGen/PBQP/Math.h" #include "llvm/CodeGen/PBQP/Solution.h" #include "llvm/CodeGen/PBQPRAConstraint.h" -#include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -82,8 +84,8 @@ #include #include #include -#include #include +#include using namespace llvm; @@ -139,13 +141,13 @@ class RegAllocPBQP : public MachineFunctionPass { } private: - typedef std::map LI2NodeMap; - typedef std::vector Node2LIMap; - typedef std::vector AllowedSet; - typedef std::vector AllowedSetMap; - typedef std::pair RegPair; - typedef std::map CoalesceMap; - typedef std::set RegSet; + using LI2NodeMap = std::map; + using Node2LIMap = std::vector; + using AllowedSet = std::vector; + using AllowedSetMap = std::vector; + using RegPair = std::pair; + using CoalesceMap = std::map; + using RegSet = std::set; char *customPassID; @@ -212,12 +214,12 @@ class SpillCosts : public PBQPRAConstraint { /// @brief Add interference edges between overlapping vregs. class Interference : public PBQPRAConstraint { private: - typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; - typedef std::pair IKey; - typedef DenseMap IMatrixCache; - typedef DenseSet DisjointAllowedRegsCache; - typedef std::pair IEdgeKey; - typedef DenseSet IEdgeCache; + using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *; + using IKey = std::pair; + using IMatrixCache = DenseMap; + using DisjointAllowedRegsCache = DenseSet; + using IEdgeKey = std::pair; + using IEdgeCache = DenseSet; bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, @@ -252,8 +254,8 @@ class Interference : public PBQPRAConstraint { // for the fast interference graph construction algorithm. The last is there // to save us from looking up node ids via the VRegToNode map in the graph // metadata. - typedef std::tuple - IntervalInfo; + using IntervalInfo = + std::tuple; static SlotIndex getStartPoint(const IntervalInfo &I) { return std::get<0>(I)->segments[std::get<1>(I)].start; @@ -320,9 +322,10 @@ class Interference : public PBQPRAConstraint { // Cache known disjoint allowed registers pairs DisjointAllowedRegsCache D; - typedef std::set IntervalSet; - typedef std::priority_queue, - decltype(&lowestStartPoint)> IntervalQueue; + using IntervalSet = std::set; + using IntervalQueue = + std::priority_queue, + decltype(&lowestStartPoint)>; IntervalSet Active(lowestEndPoint); IntervalQueue Inactive(lowestStartPoint); @@ -658,7 +661,6 @@ void RegAllocPBQP::spillVReg(unsigned VReg, SmallVectorImpl &NewIntervals, MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, Spiller &VRegSpiller) { - VRegsToAlloc.erase(VReg); LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM, nullptr, &DeadRemats); @@ -736,7 +738,15 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, if (PReg == 0) { const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); - PReg = RC.getRawAllocationOrder(MF).front(); + const ArrayRef RawPRegOrder = RC.getRawAllocationOrder(MF); + for (unsigned CandidateReg : RawPRegOrder) { + if (!VRM.getRegInfo().isReserved(CandidateReg)) { + PReg = CandidateReg; + break; + } + } + assert(PReg && + "No un-reserved physical registers in this register class"); } VRM.assignVirt2Phys(LI.reg, PReg); @@ -914,5 +924,3 @@ FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { return createPBQPRegisterAllocator(); } - -#undef DEBUG_TYPE diff --git a/interpreter/llvm/src/lib/CodeGen/RegisterClassInfo.cpp b/interpreter/llvm/src/lib/CodeGen/RegisterClassInfo.cpp index 82a3bd9a0bd17..956dec39fc381 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegisterClassInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegisterClassInfo.cpp @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/CodeGen/RegisterCoalescer.cpp b/interpreter/llvm/src/lib/CodeGen/RegisterCoalescer.cpp index 1803ea2b92490..a67d07b36474a 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegisterCoalescer.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegisterCoalescer.cpp @@ -979,6 +979,11 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); for (LiveInterval::SubRange &SR : IntB.subranges()) SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); + + // If the newly created Instruction has an address of an instruction that was + // deleted before (object recycled by the allocator) it needs to be removed from + // the deleted list. + ErasedInstrs.erase(NewCopyMI); } else { DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#" << MBB.getNumber() << '\t' << CopyMI); @@ -989,6 +994,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // While updating the live-ranges, we only look at slot indices and // never go back to the instruction. LIS->RemoveMachineInstrFromMaps(CopyMI); + // Mark instructions as deleted. + ErasedInstrs.insert(&CopyMI); CopyMI.eraseFromParent(); // Update the liveness. @@ -1220,6 +1227,34 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SR->createDeadDef(DefIndex, Alloc); } } + + // Make sure that the subrange for resultant undef is removed + // For example: + // vreg1:sub1 = LOAD CONSTANT 1 + // vreg2 = COPY vreg1 + // ==> + // vreg2:sub1 = LOAD CONSTANT 1 + // ; Correct but need to remove the subrange for vreg2:sub0 + // ; as it is now undef + if (NewIdx != 0 && DstInt.hasSubRanges()) { + // The affected subregister segments can be removed. + SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); + bool UpdatedSubRanges = false; + for (LiveInterval::SubRange &SR : DstInt.subranges()) { + if ((SR.LaneMask & DstMask).none()) { + DEBUG(dbgs() << "Removing undefined SubRange " + << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); + // VNI is in ValNo - remove any segments in this SubRange that have this ValNo + if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) { + SR.removeValNo(RmValNo); + UpdatedSubRanges = true; + } + } + } + if (UpdatedSubRanges) + DstInt.removeEmptySubRanges(); + } } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. @@ -2666,11 +2701,17 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Look for values being erased. bool DidPrune = false; for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { - if (Vals[i].Resolution != CR_Erase) + // We should trigger in all cases in which eraseInstrs() does something. + // match what eraseInstrs() is doing, print a message so + if (Vals[i].Resolution != CR_Erase && + (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef || + !Vals[i].Pruned)) continue; // Check subranges at the point where the copy will be removed. SlotIndex Def = LR.getValNumInfo(i)->def; + // Print message so mismatches with eraseInstrs() can be diagnosed. + DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n'); for (LiveInterval::SubRange &S : LI.subranges()) { LiveQueryResult Q = S.Query(Def); @@ -3089,7 +3130,7 @@ copyCoalesceWorkList(MutableArrayRef CurrList) { continue; // Skip instruction pointers that have already been erased, for example by // dead code elimination. - if (ErasedInstrs.erase(CurrList[i])) { + if (ErasedInstrs.count(CurrList[i])) { CurrList[i] = nullptr; continue; } diff --git a/interpreter/llvm/src/lib/CodeGen/RegisterPressure.cpp b/interpreter/llvm/src/lib/CodeGen/RegisterPressure.cpp index c726edc88b41c..88e0a3b58940e 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegisterPressure.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegisterPressure.cpp @@ -12,9 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -24,7 +25,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/interpreter/llvm/src/lib/CodeGen/RegisterScavenging.cpp b/interpreter/llvm/src/lib/CodeGen/RegisterScavenging.cpp index 0635e5c0a63c9..fc5105aadbffd 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegisterScavenging.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegisterScavenging.cpp @@ -15,21 +15,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterScavenging.h" + #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include #include #include #include @@ -39,6 +45,8 @@ using namespace llvm; #define DEBUG_TYPE "reg-scavenging" +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); + void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { LiveUnits.addRegMasked(Reg, LaneMask); } @@ -253,6 +261,14 @@ void RegScavenger::backward() { const MachineInstr &MI = *MBBI; LiveUnits.stepBackward(MI); + // Expire scavenge spill frameindex uses. + for (ScavengedInfo &I : Scavenged) { + if (I.Restore == &MI) { + I.Reg = 0; + I.Restore = nullptr; + } + } + if (MBBI == MBB->begin()) { MBBI = MachineBasicBlock::iterator(nullptr); Tracking = false; @@ -349,6 +365,86 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, return Survivor; } +/// Given the bitvector \p Available of free register units at position +/// \p From. Search backwards to find a register that is part of \p +/// Candidates and not used/clobbered until the point \p To. If there is +/// multiple candidates continue searching and pick the one that is not used/ +/// clobbered for the longest time. +/// Returns the register and the earliest position we know it to be free or +/// the position MBB.end() if no register is available. +static std::pair +findSurvivorBackwards(const MachineRegisterInfo &MRI, + MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, + const LiveRegUnits &LiveOut, ArrayRef AllocationOrder, + bool RestoreAfter) { + bool FoundTo = false; + MCPhysReg Survivor = 0; + MachineBasicBlock::iterator Pos; + MachineBasicBlock &MBB = *From->getParent(); + unsigned InstrLimit = 25; + unsigned InstrCountDown = InstrLimit; + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + LiveRegUnits Used(TRI); + + for (MachineBasicBlock::iterator I = From;; --I) { + const MachineInstr &MI = *I; + + Used.accumulate(MI); + + if (I == To) { + // See if one of the registers in RC wasn't used so far. + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg) && + LiveOut.available(Reg)) + return std::make_pair(Reg, MBB.end()); + } + // Otherwise we will continue up to InstrLimit instructions to find + // the register which is not defined/used for the longest time. + FoundTo = true; + Pos = To; + // Note: It was fine so far to start our search at From, however now that + // we have to spill, and can only place the restore after From then + // add the regs used/defed by std::next(From) to the set. + if (RestoreAfter) + Used.accumulate(*std::next(From)); + } + if (FoundTo) { + if (Survivor == 0 || !Used.available(Survivor)) { + MCPhysReg AvilableReg = 0; + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg)) { + AvilableReg = Reg; + break; + } + } + if (AvilableReg == 0) + break; + Survivor = AvilableReg; + } + if (--InstrCountDown == 0) + break; + + // Keep searching when we find a vreg since the spilled register will + // be usefull for this other vreg as well later. + bool FoundVReg = false; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + FoundVReg = true; + break; + } + } + if (FoundVReg) { + InstrCountDown = InstrLimit; + Pos = I; + } + if (I == MBB.begin()) + break; + } + } + + return std::make_pair(Survivor, Pos); +} + static unsigned getFrameIndexOperandNum(MachineInstr &MI) { unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -358,44 +454,16 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) { return i; } -unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, - MachineBasicBlock::iterator I, - int SPAdj) { - MachineInstr &MI = *I; - const MachineFunction &MF = *MI.getParent()->getParent(); - // Consider all allocatable registers in the register class initially - BitVector Candidates = TRI->getAllocatableSet(MF, RC); - - // Exclude all the registers being used by the instruction. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) - Candidates.reset(*AI); - } - - // Try to find a register that's unused if there is one, as then we won't - // have to spill. - BitVector Available = getRegsAvailable(RC); - Available &= Candidates; - if (Available.any()) - Candidates = Available; - - // Find the register whose use is furthest away. - MachineBasicBlock::iterator UseMI; - unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - - // If we found an unused register there is no reason to spill it. - if (!isRegUsed(SReg)) { - DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); - return SReg; - } - +RegScavenger::ScavengedInfo & +RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + MachineBasicBlock::iterator Before, + MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. + const MachineFunction &MF = *Before->getParent()->getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned NeedSize = TRI->getSpillSize(*RC); - unsigned NeedAlign = TRI->getSpillAlignment(*RC); + unsigned NeedSize = TRI->getSpillSize(RC); + unsigned NeedAlign = TRI->getSpillAlignment(RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); @@ -430,42 +498,303 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Avoid infinite regress - Scavenged[SI].Reg = SReg; + Scavenged[SI].Reg = Reg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. - if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { - // Spill the scavenged register before I. + if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) { + // Spill the scavenged register before \p Before. int FI = Scavenged[SI].FrameIndex; if (FI < FIB || FI >= FIE) { std::string Msg = std::string("Error while trying to spill ") + - TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) + + TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) + ": Cannot scavenge register without an emergency spill slot!"; report_fatal_error(Msg.c_str()); } - TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, - RC, TRI); - MachineBasicBlock::iterator II = std::prev(I); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex, + &RC, TRI); + MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, - RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex, + &RC, TRI); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } + return Scavenged[SI]; +} - Scavenged[SI].Restore = &*std::prev(UseMI); +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + MachineInstr &MI = *I; + const MachineFunction &MF = *MI.getParent()->getParent(); + // Consider all allocatable registers in the register class initially + BitVector Candidates = TRI->getAllocatableSet(MF, RC); - // Doing this here leads to infinite regress. - // Scavenged[SI].Reg = SReg; + // Exclude all the registers being used by the instruction. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + BitVector Available = getRegsAvailable(RC); + Available &= Candidates; + if (Available.any()) + Candidates = Available; + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + + // If we found an unused register there is no reason to spill it. + if (!isRegUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + return SReg; + } + + ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); + Scavenged.Restore = &*std::prev(UseMI); DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); return SReg; } + +unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, + MachineBasicBlock::iterator To, + bool RestoreAfter, int SPAdj) { + const MachineBasicBlock &MBB = *To->getParent(); + const MachineFunction &MF = *MBB.getParent(); + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + ArrayRef AllocationOrder = RC.getRawAllocationOrder(MF); + std::pair P = + findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder, + RestoreAfter); + MCPhysReg Reg = P.first; + MachineBasicBlock::iterator SpillBefore = P.second; + assert(Reg != 0 && "No register left to scavenge!"); + // Found an available register? + if (SpillBefore != MBB.end()) { + MachineBasicBlock::iterator ReloadAfter = + RestoreAfter ? std::next(MBBI) : MBBI; + MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); + DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); + ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); + Scavenged.Restore = &*std::prev(SpillBefore); + LiveUnits.removeReg(Reg); + DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI) + << " until " << *SpillBefore); + } else { + DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n'); + } + return Reg; +} + +/// Allocate a register for the virtual register \p VReg. The last use of +/// \p VReg is around the current position of the register scavenger \p RS. +/// \p ReserveAfter controls whether the scavenged register needs to be reserved +/// after the current instruction, otherwise it will only be reserved before the +/// current instruction. +static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, + unsigned VReg, bool ReserveAfter) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); +#ifndef NDEBUG + // Verify that all definitions and uses are in the same basic block. + const MachineBasicBlock *CommonMBB = nullptr; + // Real definition for the reg, re-definitions are not considered. + const MachineInstr *RealDef = nullptr; + for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) { + MachineBasicBlock *MBB = MO.getParent()->getParent(); + if (CommonMBB == nullptr) + CommonMBB = MBB; + assert(MBB == CommonMBB && "All defs+uses must be in the same basic block"); + if (MO.isDef()) { + const MachineInstr &MI = *MO.getParent(); + if (!MI.readsRegister(VReg, &TRI)) { + assert((!RealDef || RealDef == &MI) && + "Can have at most one definition which is not a redefinition"); + RealDef = &MI; + } + } + } + assert(RealDef != nullptr && "Must have at least 1 Def"); +#endif + + // We should only have one definition of the register. However to accommodate + // the requirements of two address code we also allow definitions in + // subsequent instructions provided they also read the register. That way + // we get a single contiguous lifetime. + // + // Definitions in MRI.def_begin() are unordered, search for the first. + MachineRegisterInfo::def_iterator FirstDef = + std::find_if(MRI.def_begin(VReg), MRI.def_end(), + [VReg, &TRI](const MachineOperand &MO) { + return !MO.getParent()->readsRegister(VReg, &TRI); + }); + assert(FirstDef != MRI.def_end() && + "Must have one definition that does not redefine vreg"); + MachineInstr &DefMI = *FirstDef->getParent(); + + // The register scavenger will report a free register inserting an emergency + // spill/reload if necessary. + int SPAdj = 0; + const TargetRegisterClass &RC = *MRI.getRegClass(VReg); + unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), + ReserveAfter, SPAdj); + MRI.replaceRegWith(VReg, SReg); + ++NumScavengedRegs; + return SReg; +} + +/// Allocate (scavenge) vregs inside a single basic block. +/// Returns true if the target spill callback created new vregs and a 2nd pass +/// is necessary. +static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, + RegScavenger &RS, + MachineBasicBlock &MBB) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + RS.enterBasicBlockEnd(MBB); + + unsigned InitialNumVirtRegs = MRI.getNumVirtRegs(); + bool NextInstructionReadsVReg = false; + for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) { + --I; + // Move RegScavenger to the position between *I and *std::next(I). + RS.backward(I); + + // Look for unassigned vregs in the uses of *std::next(I). + if (NextInstructionReadsVReg) { + MachineBasicBlock::iterator N = std::next(I); + const MachineInstr &NMI = *N; + for (const MachineOperand &MO : NMI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + // We only care about virtual registers and ignore virtual registers + // created by the target callbacks in the process (those will be handled + // in a scavenging round). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + if (!MO.readsReg()) + continue; + + unsigned SReg = scavengeVReg(MRI, RS, Reg, true); + N->addRegisterKilled(SReg, &TRI, false); + RS.setRegUsed(SReg); + } + } + + // Look for unassigned vregs in the defs of *I. + NextInstructionReadsVReg = false; + const MachineInstr &MI = *I; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + // Only vregs, no newly created vregs (see above). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + // We have to look at all operands anyway so we can precalculate here + // whether there is a reading operand. This allows use to skip the use + // step in the next iteration if there was none. + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + if (MO.readsReg()) { + NextInstructionReadsVReg = true; + } + if (MO.isDef()) { + unsigned SReg = scavengeVReg(MRI, RS, Reg, false); + I->addRegisterDead(SReg, &TRI, false); + } + } + } +#ifndef NDEBUG + for (const MachineOperand &MO : MBB.front().operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + assert(!MO.readsReg() && "Vreg use in first instruction not allowed"); + } +#endif + + return MRI.getNumVirtRegs() != InitialNumVirtRegs; +} + +void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { + // FIXME: Iterating over the instruction stream is unnecessary. We can simply + // iterate over the vreg use list, which at this point only contains machine + // operands for which eliminateFrameIndex need a new scratch reg. + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Shortcut. + if (MRI.getNumVirtRegs() == 0) { + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + return; + } + + // Run through the instructions and find any virtual registers. + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; + + bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + if (Again) { + DEBUG(dbgs() << "Warning: Required two scavenging passes for block " + << MBB.getName() << '\n'); + Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + // The target required a 2nd run (because it created new vregs while + // spilling). Refuse to do another pass to keep compiletime in check. + if (Again) + report_fatal_error("Incomplete scavenging after 2nd pass"); + } + } + + MRI.clearVirtRegs(); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); +} + +namespace { +/// This class runs register scavenging independ of the PrologEpilogInserter. +/// This is used in for testing. +class ScavengerTest : public MachineFunctionPass { +public: + static char ID; + ScavengerTest() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetFrameLowering &TFL = *STI.getFrameLowering(); + + RegScavenger RS; + // Let's hope that calling those outside of PrologEpilogueInserter works + // well enough to initialize the scavenger with some emergency spillslots + // for the target. + BitVector SavedRegs; + TFL.determineCalleeSaves(MF, SavedRegs, &RS); + TFL.processFunctionBeforeFrameFinalized(MF, &RS); + + // Let's scavenge the current function + scavengeFrameVirtualRegs(MF, RS); + return true; + } +}; +char ScavengerTest::ID; + +} // end anonymous namespace + +INITIALIZE_PASS(ScavengerTest, "scavenger-test", + "Scavenge virtual registers inside basic blocks", false, false) diff --git a/interpreter/llvm/src/lib/CodeGen/RegisterUsageInfo.cpp b/interpreter/llvm/src/lib/CodeGen/RegisterUsageInfo.cpp index 66f196678dea5..30757f070cadb 100644 --- a/interpreter/llvm/src/lib/CodeGen/RegisterUsageInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RegisterUsageInfo.cpp @@ -1,4 +1,4 @@ -//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===// +//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===// // // The LLVM Compiler Infrastructure // @@ -12,11 +12,22 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include using namespace llvm; @@ -27,7 +38,7 @@ static cl::opt DumpRegUsage( cl::desc("print register usage details collected for analysis.")); INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info", - "Register Usage Informartion Stroage", false, true) + "Register Usage Information Storage", false, true) char PhysicalRegisterUsageInfo::ID = 0; @@ -63,7 +74,7 @@ PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) { void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { const TargetRegisterInfo *TRI; - typedef std::pair> FuncPtrRegMaskPair; + using FuncPtrRegMaskPair = std::pair>; SmallVector FPRMPairVector; diff --git a/interpreter/llvm/src/lib/CodeGen/RenameIndependentSubregs.cpp b/interpreter/llvm/src/lib/CodeGen/RenameIndependentSubregs.cpp index 2f7ee8bf414cc..bd5ecbd28f293 100644 --- a/interpreter/llvm/src/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/interpreter/llvm/src/lib/CodeGen/RenameIndependentSubregs.cpp @@ -32,10 +32,10 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -112,11 +112,11 @@ char RenameIndependentSubregs::ID; char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID; -INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs", +INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs", +INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { @@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, const SmallVectorImpl &SubRangeInfos, const SmallVectorImpl &Intervals) const { const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = Intervals[0]->reg;; + unsigned Reg = Intervals[0]->reg; for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ) { MachineOperand &MO = *I++; @@ -243,6 +243,15 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, unsigned VReg = Intervals[ID]->reg; MO.setReg(VReg); + + if (MO.isTied() && Reg != VReg) { + /// Undef use operands are not tracked in the equivalence class but need + /// to be update if they are tied. + MO.getParent()->substituteRegister(Reg, VReg, 0, TRI); + + // substituteRegister breaks the iterator, so restart. + I = MRI->reg_nodbg_begin(Reg); + } } // TODO: We could attempt to recompute new register classes while visiting // the operands: Some of the split register may be fine with less constraint diff --git a/interpreter/llvm/src/lib/CodeGen/ResetMachineFunctionPass.cpp b/interpreter/llvm/src/lib/CodeGen/ResetMachineFunctionPass.cpp index 3e259927ac5cb..01b3db43b2836 100644 --- a/interpreter/llvm/src/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/CodeGen/SafeStack.cpp b/interpreter/llvm/src/lib/CodeGen/SafeStack.cpp index 08b3d345f6899..8584a9b7c8973 100644 --- a/interpreter/llvm/src/lib/CodeGen/SafeStack.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SafeStack.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -51,7 +52,7 @@ using namespace llvm; using namespace llvm::safestack; -#define DEBUG_TYPE "safestack" +#define DEBUG_TYPE "safe-stack" namespace llvm { @@ -767,13 +768,12 @@ class SafeStackLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid.. - SafeStackLegacyPass(const TargetMachine *TM) : FunctionPass(ID), TM(TM) { + SafeStackLegacyPass() : FunctionPass(ID), TM(nullptr) { initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry()); } - SafeStackLegacyPass() : SafeStackLegacyPass(nullptr) {} - void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -793,8 +793,7 @@ class SafeStackLegacyPass : public FunctionPass { return false; } - if (!TM) - report_fatal_error("Target machine is required"); + TM = &getAnalysis().getTM(); auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); if (!TL) report_fatal_error("TargetLowering instance is required"); @@ -821,11 +820,10 @@ class SafeStackLegacyPass : public FunctionPass { } // anonymous namespace char SafeStackLegacyPass::ID = 0; -INITIALIZE_TM_PASS_BEGIN(SafeStackLegacyPass, "safe-stack", - "Safe Stack instrumentation pass", false, false) -INITIALIZE_TM_PASS_END(SafeStackLegacyPass, "safe-stack", - "Safe Stack instrumentation pass", false, false) +INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE, + "Safe Stack instrumentation pass", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE, + "Safe Stack instrumentation pass", false, false) -FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) { - return new SafeStackLegacyPass(TM); -} +FunctionPass *llvm::createSafeStackPass() { return new SafeStackLegacyPass(); } diff --git a/interpreter/llvm/src/lib/CodeGen/SafeStackColoring.cpp b/interpreter/llvm/src/lib/CodeGen/SafeStackColoring.cpp index 09289f947dc96..21f2fa497233a 100644 --- a/interpreter/llvm/src/lib/CodeGen/SafeStackColoring.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SafeStackColoring.cpp @@ -20,9 +20,10 @@ using namespace llvm::safestack; #define DEBUG_TYPE "safestackcoloring" +// Disabled by default due to PR32143. static cl::opt ClColoring("safe-stack-coloring", cl::desc("enable safe stack coloring"), - cl::Hidden, cl::init(true)); + cl::Hidden, cl::init(false)); const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) { const auto IT = AllocaNumbering.find(AI); diff --git a/interpreter/llvm/src/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/interpreter/llvm/src/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp new file mode 100644 index 0000000000000..07b43a82ca994 --- /dev/null +++ b/interpreter/llvm/src/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -0,0 +1,656 @@ +//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ===// +//=== instrinsics ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces masked memory intrinsics - when unsupported by the target +// - with a chain of basic blocks, that deal with the elements one-by-one if the +// appropriate mask bit is set. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "scalarize-masked-mem-intrin" + +namespace { + +class ScalarizeMaskedMemIntrin : public FunctionPass { + const TargetTransformInfo *TTI; + +public: + static char ID; // Pass identification, replacement for typeid + explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) { + initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Scalarize Masked Memory Intrinsics"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + +private: + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); +}; +} // namespace + +char ScalarizeMaskedMemIntrin::ID = 0; +INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE, + "Scalarize unsupported masked memory intrinsics", false, false) + +FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { + return new ScalarizeMaskedMemIntrin(); +} + +// Translate a masked load intrinsic like +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i1> %mask, <16 x i32> %passthru) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.load, label %else +// +// cond.load: ; preds = %0 +// %4 = getelementptr i32* %1, i32 0 +// %5 = load i32* %4 +// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 +// br label %else +// +// else: ; preds = %0, %cond.load +// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] +// %7 = extractelement <16 x i1> %mask, i32 1 +// %8 = icmp eq i1 %7, true +// br i1 %8, label %cond.load1, label %else2 +// +// cond.load1: ; preds = %else +// %9 = getelementptr i32* %1, i32 1 +// %10 = load i32* %9 +// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 +// br label %else2 +// +// else2: ; preds = %else, %cond.load1 +// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] +// %12 = extractelement <16 x i1> %mask, i32 2 +// %13 = icmp eq i1 %12, true +// br i1 %13, label %cond.load4, label %else5 +// +static void scalarizeMaskedLoad(CallInst *CI) { + Value *Ptr = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + unsigned AlignVal = cast(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast(CI->getType()); + assert(VecType && "Unexpected return type of masked load intrinsic"); + + Type *EltTy = CI->getType()->getVectorElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = + isa(Mask) && cast(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + + if (isa(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = + Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_load = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = + Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked store intrinsic, like +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// <16 x i1> %mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.store, label %else +// +// cond.store: ; preds = %0 +// %4 = extractelement <16 x i32> %val, i32 0 +// %5 = getelementptr i32* %1, i32 0 +// store i32 %4, i32* %5 +// br label %else +// +// else: ; preds = %0, %cond.store +// %6 = extractelement <16 x i1> %mask, i32 1 +// %7 = icmp eq i1 %6, true +// br i1 %7, label %cond.store1, label %else2 +// +// cond.store1: ; preds = %else +// %8 = extractelement <16 x i32> %val, i32 1 +// %9 = getelementptr i32* %1, i32 1 +// store i32 %8, i32* %9 +// br label %else2 +// . . . +static void scalarizeMaskedStore(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + unsigned AlignVal = cast(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast(Src->getType()); + assert(VecType && "Unexpected data type in masked store intrinsic"); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = + isa(Mask) && cast(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Builder.CreateAlignedStore(Src, Ptr, AlignVal); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + if (isa(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + } + CI->eraseFromParent(); + return; + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_store = icmp eq i1 %mask_1, true + // br i1 %to_store, label %cond.store, label %else + // + Value *Predicate = + Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +// Translate a masked gather intrinsic like +// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, +// <16 x i1> %Mask, <16 x i32> %Src) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> %Mask, i32 0 +// % ToLoad0 = icmp eq i1 % Mask0, true +// br i1 % ToLoad0, label %cond.load, label %else +// +// cond.load: +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// % Load0 = load i32, i32* % Ptr0, align 4 +// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 +// br label %else +// +// else: +// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] +// % Mask1 = extractelement <16 x i1> %Mask, i32 1 +// % ToLoad1 = icmp eq i1 % Mask1, true +// br i1 % ToLoad1, label %cond.load1, label %else2 +// +// cond.load1: +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// % Load1 = load i32, i32* % Ptr1, align 4 +// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 +// br label %else2 +// . . . +// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// ret <16 x i32> %Result +static void scalarizeMaskedGather(CallInst *CI) { + Value *Ptrs = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast(CI->getType()); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + unsigned AlignVal = cast(Alignment)->getZExtValue(); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement( + VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %ToLoad1 = icmp eq i1 %Mask1, true + // br i1 %ToLoad1, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToLoad" + Twine(Idx)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), + "Res" + Twine(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked scatter intrinsic, like +// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, +// <16 x i1> %Mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set. +// +// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> % Mask, i32 0 +// % ToStore0 = icmp eq i1 % Mask0, true +// br i1 %ToStore0, label %cond.store, label %else +// +// cond.store: +// % Elt0 = extractelement <16 x i32> %Src, i32 0 +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* % Ptr0, align 4 +// br label %else +// +// else: +// % Mask1 = extractelement <16 x i1> % Mask, i32 1 +// % ToStore1 = icmp eq i1 % Mask1, true +// br i1 % ToStore1, label %cond.store1, label %else2 +// +// cond.store1: +// % Elt1 = extractelement <16 x i32> %Src, i32 1 +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 % Elt1, i32* % Ptr1, align 4 +// br label %else2 +// . . . +static void scalarizeMaskedScatter(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptrs = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + assert(isa(Src->getType()) && + "Unexpected data type in masked scatter intrinsic"); + assert(isa(Ptrs->getType()) && + isa(Ptrs->getType()->getVectorElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned AlignVal = cast(Alignment)->getZExtValue(); + unsigned VectorWidth = Src->getType()->getVectorNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx + // % ToStore = icmp eq i1 % Mask1, true + // br i1 % ToStore, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToStore" + Twine(Idx)); + + // Create "cond" block + // + // % Elt1 = extractelement <16 x i32> %Src, i32 1 + // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 % Elt1, i32* % Ptr1 + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + bool EverMadeChange = false; + + TTI = &getAnalysis().getTTI(F); + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end();) { + BasicBlock *BB = &*I++; + bool ModifiedDTOnIteration = false; + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); + + // Restart BB iteration if the dominator tree of the Function was changed + if (ModifiedDTOnIteration) + break; + } + + EverMadeChange |= MadeChange; + } + + return EverMadeChange; +} + +bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { + bool MadeChange = false; + + BasicBlock::iterator CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) { + if (CallInst *CI = dyn_cast(&*CurInstIterator++)) + MadeChange |= optimizeCallInst(CI, ModifiedDT); + if (ModifiedDT) + return true; + } + + return MadeChange; +} + +bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, + bool &ModifiedDT) { + + IntrinsicInst *II = dyn_cast(CI); + if (II) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::masked_load: { + // Scalarize unsupported vector masked load + if (!TTI->isLegalMaskedLoad(CI->getType())) { + scalarizeMaskedLoad(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_store: { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { + scalarizeMaskedStore(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_gather: { + if (!TTI->isLegalMaskedGather(CI->getType())) { + scalarizeMaskedGather(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_scatter: { + if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { + scalarizeMaskedScatter(CI); + ModifiedDT = true; + return true; + } + return false; + } + } + } + + return false; +} diff --git a/interpreter/llvm/src/lib/CodeGen/ScheduleDAG.cpp b/interpreter/llvm/src/lib/CodeGen/ScheduleDAG.cpp index dc72ac0732588..5e95f760aaa24 100644 --- a/interpreter/llvm/src/lib/CodeGen/ScheduleDAG.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ScheduleDAG.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CommandLine.h" @@ -67,6 +67,41 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { return &TII->get(Node->getMachineOpcode()); } +LLVM_DUMP_METHOD +raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + switch (getKind()) { + case Data: OS << "Data"; break; + case Anti: OS << "Anti"; break; + case Output: OS << "Out "; break; + case Order: OS << "Ord "; break; + } + + switch (getKind()) { + case Data: + OS << " Latency=" << getLatency(); + if (TRI && isAssignedRegDep()) + OS << " Reg=" << PrintReg(getReg(), TRI); + break; + case Anti: + case Output: + OS << " Latency=" << getLatency(); + break; + case Order: + OS << " Latency=" << getLatency(); + switch(Contents.OrdKind) { + case Barrier: OS << " Barrier"; break; + case MayAliasMem: + case MustAliasMem: OS << " Memory"; break; + case Artificial: OS << " Artificial"; break; + case Weak: OS << " Weak"; break; + case Cluster: OS << " Cluster"; break; + } + break; + } + + return OS; +} + bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this dependence, don't add a redundant one. for (SDep &PredDep : Preds) { @@ -302,16 +337,24 @@ void SUnit::biasCriticalPath() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const { - if (this == &DAG->ExitSU) - OS << "ExitSU"; - else if (this == &DAG->EntrySU) +raw_ostream &SUnit::print(raw_ostream &OS, + const SUnit *Entry, const SUnit *Exit) const { + if (this == Entry) OS << "EntrySU"; + else if (this == Exit) + OS << "ExitSU"; else OS << "SU(" << NodeNum << ")"; + return OS; } -LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const { +LLVM_DUMP_METHOD +raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const { + return print(OS, &G->EntrySU, &G->ExitSU); +} + +LLVM_DUMP_METHOD +void SUnit::dump(const ScheduleDAG *G) const { print(dbgs(), G); dbgs() << ": "; G->dumpNode(this); @@ -333,40 +376,18 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const { if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; - for (const SDep &SuccDep : Preds) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Preds) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } if (Succs.size() != 0) { dbgs() << " Successors:\n"; - for (const SDep &SuccDep : Succs) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Succs) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } } diff --git a/interpreter/llvm/src/lib/CodeGen/ScheduleDAGInstrs.cpp b/interpreter/llvm/src/lib/CodeGen/ScheduleDAGInstrs.cpp index 18823b74c47fe..99baa07390eb9 100644 --- a/interpreter/llvm/src/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,34 +14,56 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), @@ -90,74 +112,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), - RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr), + RemoveKillFlags(RemoveKillFlags), UnknownValue(UndefValue::get( - Type::getVoidTy(mf.getFunction()->getContext()))), - FirstDbgValue(nullptr) { + Type::getVoidTy(mf.getFunction()->getContext()))) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); SchedModel.init(ST.getSchedModel(), &ST, TII); } -/// This is the function that does the work of looking through basic -/// ptrtoint+arithmetic+inttoptr sequences. -static const Value *getUnderlyingObjectFromInt(const Value *V) { - do { - if (const Operator *U = dyn_cast(V)) { - // If we find a ptrtoint, we can transfer control back to the - // regular getUnderlyingObjectFromInt. - if (U->getOpcode() == Instruction::PtrToInt) - return U->getOperand(0); - // If we find an add of a constant, a multiplied value, or a phi, it's - // likely that the other operand will lead us to the base - // object. We don't have to worry about the case where the - // object address is somehow being computed by the multiply, - // because our callers only care when the result is an - // identifiable object. - if (U->getOpcode() != Instruction::Add || - (!isa(U->getOperand(1)) && - Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && - !isa(U->getOperand(1)))) - return V; - V = U->getOperand(0); - } else { - return V; - } - assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); - } while (1); -} - -/// This is a wrapper around GetUnderlyingObjects and adds support for basic -/// ptrtoint+arithmetic+inttoptr sequences. -static void getUnderlyingObjects(const Value *V, - SmallVectorImpl &Objects, - const DataLayout &DL) { - SmallPtrSet Visited; - SmallVector Working(1, V); - do { - V = Working.pop_back_val(); - - SmallVector Objs; - GetUnderlyingObjects(const_cast(V), Objs, DL); - - for (Value *V : Objs) { - if (!Visited.insert(V).second) - continue; - if (Operator::getOpcode(V) == Instruction::IntToPtr) { - const Value *O = - getUnderlyingObjectFromInt(cast(V)->getOperand(0)); - if (O->getType()->isPointerTy()) { - Working.push_back(O); - continue; - } - } - Objects.push_back(const_cast(V)); - } - } while (!Working.empty()); -} - /// If this machine instr has memory reference information and it can be tracked /// to a normal reference to a known object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, @@ -188,12 +151,10 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); } else if (const Value *V = MMO->getValue()) { SmallVector Objs; - getUnderlyingObjects(V, Objs, DL); + getUnderlyingObjectsForCodeGen(V, Objs, DL); for (Value *V : Objs) { - if (!isIdentifiedObject(V)) - return false; - + assert(isIdentifiedObject(V)); Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); } } else @@ -563,7 +524,7 @@ void ScheduleDAGInstrs::initSUnits() { // which is contained within a basic block. SUnits.reserve(NumRegionInstrs); - for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) { + for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { if (MI.isDebugValue()) continue; @@ -606,13 +567,13 @@ void ScheduleDAGInstrs::initSUnits() { class ScheduleDAGInstrs::Value2SUsMap : public MapVector { /// Current total number of SUs in map. - unsigned NumNodes; + unsigned NumNodes = 0; /// 1 for loads, 0 for stores. (see comment in SUList) unsigned TrueMemOrderLatency; public: - Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {} + Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {} /// To keep NumNodes up to date, insert() is used instead of /// this operator w/ push_back(). @@ -630,7 +591,7 @@ class ScheduleDAGInstrs::Value2SUsMap : public MapVector { void inline clearList(ValueType V) { iterator Itr = find(V); if (Itr != end()) { - assert (NumNodes >= Itr->second.size()); + assert(NumNodes >= Itr->second.size()); NumNodes -= Itr->second.size(); Itr->second.clear(); @@ -646,7 +607,7 @@ class ScheduleDAGInstrs::Value2SUsMap : public MapVector { unsigned inline size() const { return NumNodes; } /// Counts the number of SUs in this map after a reduction. - void reComputeSize(void) { + void reComputeSize() { NumNodes = 0; for (auto &I : *this) NumNodes += I.second.size(); @@ -676,7 +637,7 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, } void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); + assert(BarrierChain != nullptr); for (auto &I : map) { SUList &sus = I.second; @@ -687,7 +648,7 @@ void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { } void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); + assert(BarrierChain != nullptr); // Go through all lists of SUs. for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) { @@ -1028,7 +989,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, // The N last elements in NodeNums will be removed, and the SU with // the lowest NodeNum of them will become the new BarrierChain to // let the not yet seen SUs have a dependency to the removed SUs. - assert (N <= NodeNums.size()); + assert(N <= NodeNums.size()); SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; if (BarrierChain) { // The aliasing and non-aliasing maps reduce independently of each @@ -1057,179 +1018,71 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, loads.dump()); } -void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { - // Start with no live registers. - LiveRegs.reset(); - - // Examine the live-in regs of all successors. - for (const MachineBasicBlock *Succ : BB->successors()) { - for (const auto &LI : Succ->liveins()) { - // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - -/// \brief If we change a kill flag on the bundle instruction implicit register -/// operands, then we also need to propagate that to any instructions inside -/// the bundle which had the same kill state. -static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, - bool NewKillState, - const TargetRegisterInfo *TRI) { - if (MI->getOpcode() != TargetOpcode::BUNDLE) - return; - - // Walk backwards from the last instruction in the bundle to the first. - // Once we set a kill flag on an instruction, we bail out, as otherwise we - // might set it on too many operands. We will clear as many flags as we - // can though. - MachineBasicBlock::instr_iterator Begin = MI->getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); - while (Begin != End) { - if (NewKillState) { - if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) - return; - } else - (--End)->clearRegisterKills(Reg, TRI); - } -} - -void ScheduleDAGInstrs::toggleKillFlag(MachineInstr &MI, MachineOperand &MO) { - if (MO.isDebug()) - return; - - // Setting kill flag... - if (!MO.isKill()) { - MO.setIsKill(true); - toggleBundleKillFlag(&MI, MO.getReg(), true, TRI); - return; - } - - // If MO itself is live, clear the kill flag... - if (LiveRegs.test(MO.getReg())) { - MO.setIsKill(false); - toggleBundleKillFlag(&MI, MO.getReg(), false, TRI); - return; - } - - // If any subreg of MO is live, then create an imp-def for that - // subreg and keep MO marked as killed. - MO.setIsKill(false); - toggleBundleKillFlag(&MI, MO.getReg(), false, TRI); - bool AllDead = true; - const unsigned SuperReg = MO.getReg(); - MachineInstrBuilder MIB(MF, &MI); - for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - MIB.addReg(*SubRegs, RegState::ImplicitDefine); - AllDead = false; - } - } +static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, + MachineInstr &MI, bool addToLiveRegs) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; - if(AllDead) { - MO.setIsKill(true); - toggleBundleKillFlag(&MI, MO.getReg(), true, TRI); + // Things that are available after the instruction are killed by it. + bool IsKill = LiveRegs.available(MRI, Reg); + MO.setIsKill(IsKill); + if (addToLiveRegs) + LiveRegs.addReg(Reg); } } -void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { - // FIXME: Reuse the LivePhysRegs utility for this. - DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n'); - LiveRegs.resize(TRI->getNumRegs()); - BitVector killedRegs(TRI->getNumRegs()); - - startBlockForKills(MBB); + LiveRegs.init(*TRI); + LiveRegs.addLiveOuts(MBB); // Examine block from end to start... - unsigned Count = MBB->size(); - for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); - I != E; --Count) { - MachineInstr &MI = *--I; + for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { if (MI.isDebugValue()) continue; // Update liveness. Registers that are defed but not used in this // instruction are now dead. Mark register and all subregs as they // are completely defined. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (MO.isRegMask()) - LiveRegs.clearBitsNotInMask(MO.getRegMask()); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI.isRegTiedToUseOperand(i)) continue; - - // Repeat for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.reset(*SubRegs); - } - - // Examine all used registers and set/clear kill flag. When a - // register is used multiple times we only set the kill flag on - // the first use. Don't set kill flags on undef operands. - killedRegs.reset(); - - // toggleKillFlag can append new operands (implicit defs), so using - // a range-based loop is not safe. The new operands will be appended - // at the end of the operand list and they don't need to be visited, - // so iterating until the currently last operand is ok. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - bool kill = false; - if (!killedRegs.test(Reg)) { - kill = true; - // A register is not killed if any subregs are live... - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - kill = false; - break; - } - } - - // If subreg is not live, then register is killed if it became - // live in this instruction - if (kill) - kill = !LiveRegs.test(Reg); - } - - if (MO.isKill() != kill) { - DEBUG(dbgs() << "Fixing " << MO << " in "); - toggleKillFlag(MI, MO); - DEBUG(MI.dump()); - DEBUG({ - if (MI.getOpcode() == TargetOpcode::BUNDLE) { - MachineBasicBlock::instr_iterator Begin = MI.getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); - while (++Begin != End) - DEBUG(Begin->dump()); - } - }); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + const MachineOperand &MO = *O; + if (MO.isReg()) { + if (!MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LiveRegs.removeReg(Reg); + } else if (MO.isRegMask()) { + LiveRegs.removeRegsInMask(MO); } - - killedRegs.set(Reg); } - // Mark any used register (that is not using undef) and subregs as - // now live... - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); + // If there is a bundle header fix it up first. + if (!MI.isBundled()) { + toggleKills(MRI, LiveRegs, MI, true); + } else { + MachineBasicBlock::instr_iterator First = MI.getIterator(); + if (MI.isBundle()) { + toggleKills(MRI, LiveRegs, MI, false); + ++First; + } + // Some targets make the (questionable) assumtion that the instructions + // inside the bundle are ordered and consequently only the last use of + // a register inside the bundle can kill it. + MachineBasicBlock::instr_iterator I = std::next(First); + while (I->isBundledWithSucc()) + ++I; + do { + if (!I->isDebugValue()) + toggleKills(MRI, LiveRegs, *I, true); + --I; + } while(I != First); } } } @@ -1264,6 +1117,7 @@ std::string ScheduleDAGInstrs::getDAGName() const { //===----------------------------------------------------------------------===// namespace llvm { + /// Internal state used to compute SchedDFSResult. class SchedDFSImpl { SchedDFSResult &R; @@ -1271,16 +1125,16 @@ class SchedDFSImpl { /// Join DAG nodes into equivalence classes by their subtree. IntEqClasses SubtreeClasses; /// List PredSU, SuccSU pairs that represent data edges between subtrees. - std::vector > ConnectionPairs; + std::vector> ConnectionPairs; struct RootData { unsigned NodeID; unsigned ParentNodeID; ///< Parent node (member of the parent subtree). - unsigned SubInstrCount; ///< Instr count in this tree only, not children. + unsigned SubInstrCount = 0; ///< Instr count in this tree only, not + /// children. RootData(unsigned id): NodeID(id), - ParentNodeID(SchedDFSResult::InvalidSubtreeID), - SubInstrCount(0) {} + ParentNodeID(SchedDFSResult::InvalidSubtreeID) {} unsigned getSparseSetIndex() const { return NodeID; } }; @@ -1448,12 +1302,15 @@ class SchedDFSImpl { } while (FromTree != SchedDFSResult::InvalidSubtreeID); } }; + } // end namespace llvm namespace { + /// Manage the stack used by a reverse depth-first search over the DAG. class SchedDAGReverseDFS { - std::vector > DFSStack; + std::vector> DFSStack; + public: bool isComplete() const { return DFSStack.empty(); } @@ -1475,7 +1332,8 @@ class SchedDAGReverseDFS { return getCurr()->Preds.end(); } }; -} // anonymous + +} // end anonymous namespace static bool hasDataSucc(const SUnit *SU) { for (const SDep &SuccDep : SU->Succs) { @@ -1500,7 +1358,7 @@ void SchedDFSResult::compute(ArrayRef SUnits) { SchedDAGReverseDFS DFS; Impl.visitPreorder(&SU); DFS.follow(&SU); - for (;;) { + while (true) { // Traverse the leftmost path as far as possible. while (DFS.getPred() != DFS.getPredEnd()) { const SDep &PredDep = *DFS.getPred(); @@ -1565,4 +1423,5 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { } } // end namespace llvm + #endif diff --git a/interpreter/llvm/src/lib/CodeGen/ScheduleDAGPrinter.cpp b/interpreter/llvm/src/lib/CodeGen/ScheduleDAGPrinter.cpp index ca2881cb91e02..bb6a45996f632 100644 --- a/interpreter/llvm/src/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/CMakeLists.txt b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/CMakeLists.txt index a668ddb7389f1..ae9c5adb03979 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_library(LLVMSelectionDAG ScheduleDAGVLIW.cpp SelectionDAGBuilder.cpp SelectionDAG.cpp + SelectionDAGAddressAnalysis.cpp SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fb8c50a4d69dc..432c86dd6f1e1 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -280,6 +281,7 @@ namespace { SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSETCCE(SDNode *N); + SDValue visitSETCCCARRY(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); @@ -398,6 +400,7 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); + SDValue reduceBuildVecToTrunc(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); @@ -468,7 +471,8 @@ namespace { /// \return True if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, - bool IsConstantSrc, bool UseVector); + bool IsConstantSrc, bool UseVector, + bool UseTrunc); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. @@ -1027,13 +1031,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { switch (Opc) { default: break; case ISD::AssertSext: - return DAG.getNode(ISD::AssertSext, DL, PVT, - SExtPromoteOperand(Op.getOperand(0), PVT), - Op.getOperand(1)); + if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); + break; case ISD::AssertZext: - return DAG.getNode(ISD::AssertZext, DL, PVT, - ZExtPromoteOperand(Op.getOperand(0), PVT), - Op.getOperand(1)); + if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); + break; case ISD::Constant: { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -1114,22 +1118,30 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); - // New replace instances of N0 and N1 - if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 && - NN0.getOpcode() != ISD::DELETED_NODE) { + // We are always replacing N0/N1's use in N and only need + // additional replacements if there are additional uses. + Replace0 &= !N0->hasOneUse(); + Replace1 &= (N0 != N1) && !N1->hasOneUse(); + + // Combine Op here so it is presreved past replacements. + CombineTo(Op.getNode(), RV); + + // If operands have a use ordering, make sur we deal with + // predecessor first. + if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { + std::swap(N0, N1); + std::swap(NN0, NN1); + } + + if (Replace0) { AddToWorklist(NN0.getNode()); ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); } - - if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 && - NN1.getOpcode() != ISD::DELETED_NODE) { + if (Replace1) { AddToWorklist(NN1.getNode()); ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); } - - // Deal with Op being deleted. - if (Op && Op.getOpcode() != ISD::DELETED_NODE) - return RV; + return Op; } return SDValue(); } @@ -1457,6 +1469,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SETCCE: return visitSETCCE(N); + case ISD::SETCCCARRY: return visitSETCCCARRY(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); @@ -1561,7 +1574,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. - if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1729,10 +1742,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { NumLeftToConsider--; } - SDValue Result; - // If we've changed things around then replace token factor. if (Changed) { + SDValue Result; if (Ops.empty()) { // The entry token is the only possible outcome. Result = DAG.getEntryNode(); @@ -1749,13 +1761,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } } - - // Add users to worklist, since we may introduce a lot of new - // chained token factors while removing memory deps. - return CombineTo(N, Result, true /*add to worklist*/); + return Result; } - - return Result; + return SDValue(); } /// MERGE_VALUES can always be eliminated. @@ -1963,7 +1971,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) + DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); if (SDValue Combined = visitADDLike(N0, N1, N)) @@ -1975,6 +1983,44 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return SDValue(); } +static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { + bool Masked = false; + + // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. + while (true) { + if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { + V = V.getOperand(0); + continue; + } + + if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { + Masked = true; + V = V.getOperand(0); + continue; + } + + break; + } + + // If this is not a carry, return. + if (V.getResNo() != 1) + return SDValue(); + + if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && + V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) + return SDValue(); + + // If the result is masked, then no matter what kind of bool it is we can + // return. If it isn't, then we need to make sure the bool type is either 0 or + // 1 and not other values. + if (Masked || + TLI.getBooleanContents(V.getValueType()) == + TargetLoweringBase::ZeroOrOneBooleanContent) + return V; + + return SDValue(); +} + SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); @@ -2022,6 +2068,13 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), N0, N1.getOperand(0), N1.getOperand(2)); + // (add X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), N0, + DAG.getConstant(0, DL, VT), Carry); + return SDValue(); } @@ -2095,6 +2148,8 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { } SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { + auto VT = N0.getValueType(); + // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) // If Y + 1 cannot overflow. if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { @@ -2105,6 +2160,12 @@ SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { N1.getOperand(2)); } + // (uaddo X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), VT), Carry); + return SDValue(); } @@ -2143,6 +2204,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { if (isNullConstant(CarryIn)) return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); + // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. + if (isNullConstant(N0) && isNullConstant(N1)) { + EVT VT = N0.getValueType(); + EVT CarryVT = CarryIn.getValueType(); + SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); + AddToWorklist(CarryExt.getNode()); + return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, + DAG.getConstant(1, DL, VT)), + DAG.getConstant(0, DL, CarryVT)); + } + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; @@ -2156,11 +2228,47 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // Iff the flag result is dead: // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) - if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::UADDO) && + if ((N0.getOpcode() == ISD::ADD || + (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && isNullConstant(N1) && !N->hasAnyUseOfValue(1)) return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); + /** + * When one of the addcarry argument is itself a carry, we may be facing + * a diamond carry propagation. In which case we try to transform the DAG + * to ensure linear carry propagation if that is possible. + * + * We are trying to get: + * (addcarry X, 0, (addcarry A, B, Z):Carry) + */ + if (auto Y = getAsCarry(TLI, N1)) { + /** + * (uaddo A, B) + * / \ + * Carry Sum + * | \ + * | (addcarry *, 0, Z) + * | / + * \ Carry + * | / + * (addcarry X, *, *) + */ + if (Y.getOpcode() == ISD::UADDO && + CarryIn.getResNo() == 1 && + CarryIn.getOpcode() == ISD::ADDCARRY && + isNullConstant(CarryIn.getOperand(1)) && + CarryIn.getOperand(0) == Y.getValue(0)) { + auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), + Y.getOperand(0), Y.getOperand(1), + CarryIn.getOperand(2)); + AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), N0.getValueType()), + NewY.getValue(1)); + } + } + return SDValue(); } @@ -2452,14 +2560,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1IsConst && ConstValue1 == 0) + if (N1IsConst && ConstValue1.isNullValue()) return N1; // We require a splat of the entire scalar bit width for non-contiguous // bit patterns. bool IsFullSplat = ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -3588,7 +3696,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) - if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue())) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -3789,9 +3897,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Note: the SimplifyDemandedBits fold below can make an information-losing // transform, and then we have no way to find this better fold. if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { - ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0)); - SDValue SubRHS = N0.getOperand(1); - if (SubLHS && SubLHS->isNullValue()) { + if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) { + SDValue SubRHS = N0.getOperand(1); if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) return SubRHS; @@ -4486,6 +4593,20 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, return nullptr; } +// if Left + Right == Sum (constant or constant splat vector) +static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum, + SelectionDAG &DAG, const SDLoc &DL) { + EVT ShiftVT = Left.getValueType(); + if (ShiftVT != Right.getValueType()) return false; + + SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT, + Left.getNode(), Right.getNode()); + if (!ShiftSum) return false; + + ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum); + return CSum && CSum->getZExtValue() == Sum; +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. @@ -4531,30 +4652,24 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { - uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); - uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); - if ((LShVal + RShVal) != EltSizeInBits) - return nullptr; - + if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) { SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { - SDValue Mask = DAG.getAllOnesConstant(DL, VT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); + SDValue Mask = AllOnes; if (LHSMask.getNode()) { - APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); + SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, - DAG.getNode(ISD::OR, DL, VT, LHSMask, - DAG.getConstant(RHSBits, DL, VT))); + DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); } if (RHSMask.getNode()) { - APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); + SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, - DAG.getNode(ISD::OR, DL, VT, RHSMask, - DAG.getConstant(LHSBits, DL, VT))); + DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); } Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); @@ -4596,110 +4711,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { return nullptr; } -namespace { -/// Helper struct to parse and store a memory address as base + index + offset. -/// We ignore sign extensions when it is safe to do so. -/// The following two expressions are not equivalent. To differentiate we need -/// to store whether there was a sign extension involved in the index -/// computation. -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (add (i8 load %index) -/// (i8 1)))) -/// vs -/// -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) -/// (i32 1))))) -struct BaseIndexOffset { - SDValue Base; - SDValue Index; - int64_t Offset; - bool IsIndexSignExt; - - BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} - - BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, - bool IsIndexSignExt) : - Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} - - bool equalBaseIndex(const BaseIndexOffset &Other) { - return Other.Base == Base && Other.Index == Index && - Other.IsIndexSignExt == IsIndexSignExt; - } - - /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, - int64_t PartialOffset = 0) { - bool IsIndexSignExt = false; - - // Split up a folded GlobalAddress+Offset into its component parts. - if (GlobalAddressSDNode *GA = dyn_cast(Ptr)) - if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { - return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), - SDLoc(GA), - GA->getValueType(0), - /*Offset=*/PartialOffset, - /*isTargetGA=*/false, - GA->getTargetFlags()), - SDValue(), - GA->getOffset(), - IsIndexSignExt); - } - - // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD - // instruction, then it could be just the BASE or everything else we don't - // know how to handle. Just use Ptr as BASE and give up. - if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // We know that we have at least an ADD instruction. Try to pattern match - // the simple case of BASE + OFFSET. - if (isa(Ptr->getOperand(1))) { - int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); - return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); - } - - // Inside a loop the current BASE pointer is calculated using an ADD and a - // MUL instruction. In this case Ptr is the actual BASE pointer. - // (i64 add (i64 %array_ptr) - // (i64 mul (i64 %induction_var) - // (i64 %element_size))) - if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // Look at Base + Index + Offset cases. - SDValue Base = Ptr->getOperand(0); - SDValue IndexOffset = Ptr->getOperand(1); - - // Skip signextends. - if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { - IndexOffset = IndexOffset->getOperand(0); - IsIndexSignExt = true; - } - - // Either the case of Base + Index (no offset) or something else. - if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); - - // Now we have the case of Base + Index + offset. - SDValue Index = IndexOffset->getOperand(0); - SDValue Offset = IndexOffset->getOperand(1); - - if (!isa(Offset)) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // Ignore signextends. - if (Index->getOpcode() == ISD::SIGN_EXTEND) { - Index = Index->getOperand(0); - IsIndexSignExt = true; - } else IsIndexSignExt = false; - - int64_t Off = cast(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); - } -}; -} // namespace - namespace { /// Represents known origin of an individual byte in load combine pattern. The /// value of the byte is either constant zero or comes from memory. @@ -4921,13 +4932,14 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Loads must share the same base address BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); + int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; - else if (!Base->equalBaseIndex(Ptr)) + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); // Calculate the offset of the current byte from the base address - int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P); + ByteOffsetFromBase += MemoryByteOffset(*P); ByteOffsets[i] = ByteOffsetFromBase; // Remember the first byte load @@ -5271,13 +5283,51 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { } SDValue DAGCombiner::visitRotate(SDNode *N) { + SDLoc dl(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + unsigned Bitsize = VT.getScalarSizeInBits(); + + // fold (rot x, 0) -> x + if (isNullConstantOrNullSplatConstant(N1)) + return N0; + + // fold (rot x, c) -> (rot x, c % BitSize) + if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { + if (Cst->getAPIntValue().uge(Bitsize)) { + uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); + return DAG.getNode(N->getOpcode(), dl, VT, N0, + DAG.getConstant(RotAmt, dl, N1.getValueType())); + } + } + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). - if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && - N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { - if (SDValue NewOp1 = - distributeTruncateThroughAnd(N->getOperand(1).getNode())) - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - N->getOperand(0), NewOp1); + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); + } + + unsigned NextOp = N0.getOpcode(); + // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { + SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); + SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); + if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { + EVT ShiftVT = C1->getValueType(0); + bool SameSide = (N->getOpcode() == NextOp); + unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; + if (SDValue CombinedShift = + DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), + BitsizeC.getNode()); + return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), + CombinedShiftNorm); + } + } } return SDValue(); } @@ -6095,19 +6145,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); + SDLoc DL(N); // fold (select C, X, X) -> X if (N1 == N2) return N1; + if (const ConstantSDNode *N0C = dyn_cast(N0)) { // fold (select true, X, Y) -> X // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } + // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or C, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); + return DAG.getNode(ISD::OR, DL, VT, N0, N2); if (SDValue V = foldSelectOfConstants(N)) return V; @@ -6116,22 +6169,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); } // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::AND, DL, VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) - return SDValue(N, 0); // Don't revisit N. + return SDValue(N, 0); // Don't revisit N. if (VT0 == MVT::i1) { // The code in this block deals with the following 2 equivalences: @@ -6142,27 +6195,27 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further // optimize the combination of the conditions. - bool normalizeToSequence - = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + bool normalizeToSequence = + TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); // select (and Cond0, Cond1), X, Y // -> select Cond0, (select Cond1, X, Y), Y if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, InnerSelect, N2); } // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, InnerSelect); } @@ -6174,15 +6227,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!normalizeToSequence) { - SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), - N0, N1_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, - N1_1, N2); + SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); } // Otherwise see if we can optimize the "and" to a better pattern. if (SDValue Combined = visitANDLike(N0, N1_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1_1, N2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, + N2); } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y @@ -6193,15 +6244,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { - SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), - N0, N2_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, - N1, N2_2); + SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); } // Otherwise see if we can optimize to a better pattern. if (SDValue Combined = visitORLike(N0, N2_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1, N2_2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, + N2_2); } } } @@ -6212,8 +6261,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (auto *C = dyn_cast(N0->getOperand(1))) { SDValue Cond0 = N0->getOperand(0); if (C->isOne()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), - Cond0, N2, N1); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); } } } @@ -6230,24 +6278,21 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // FIXME: Instead of testing for UnsafeFPMath, this should be checking for // no signed zeros as well as no nans. const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && - VT.isFloatingPoint() && N0.hasOneUse() && + if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { ISD::CondCode CC = cast(N0.getOperand(2))->get(); - if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), - N0.getOperand(1), N1, N2, CC, - TLI, DAG)) + if (SDValue FMinMax = combineMinNumMaxNum( + DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) return FMinMax; } if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - N1, N2, N0.getOperand(2)); - return SimplifySelect(SDLoc(N), N0, N1, N2); + return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, N0.getOperand(2)); + return SimplifySelect(DL, N0, N1, N2); } return SDValue(); @@ -6748,6 +6793,19 @@ SDValue DAGCombiner::visitSETCCE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (isNullConstant(Carry)) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext @@ -7118,12 +7176,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); - CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + CombineTo(N, ExtLoad); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N, 0); } } @@ -7179,10 +7242,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); - CombineTo(N, And); - CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + CombineTo(N, And); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N,0); // Return N so it doesn't get rechecked! } } } @@ -7421,12 +7488,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), - ISD::ZERO_EXTEND); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); CombineTo(N, ExtLoad); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -7476,11 +7546,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); CombineTo(N, And); - CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, - ISD::ZERO_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N,0); // Return N so it doesn't get rechecked! } } } @@ -7652,13 +7725,18 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); - CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ANY_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = N0.hasOneUse(); + CombineTo(N, ExtLoad); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8208,18 +8286,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { - if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t Amt = CAmt->getZExtValue(); - unsigned Size = VT.getScalarSizeInBits(); - - if (Amt < Size) { - SDLoc SL(N); - EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Amt = N0.getOperand(1); + KnownBits Known; + DAG.computeKnownBits(Amt, Known); + unsigned Size = VT.getScalarSizeInBits(); + if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + SDLoc SL(N); + EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); - return DAG.getNode(ISD::SHL, SL, VT, Trunc, - DAG.getConstant(Amt, SL, AmtVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + if (AmtVT != Amt.getValueType()) { + Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); + AddToWorklist(Amt.getNode()); } + return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); } } @@ -9749,6 +9829,52 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } } + // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) + // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) + if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() && + (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) && + TLI.isOperationLegal(ISD::FABS, VT)) { + SDValue Select = N0, X = N1; + if (Select.getOpcode() != ISD::SELECT) + std::swap(Select, X); + + SDValue Cond = Select.getOperand(0); + auto TrueOpnd = dyn_cast(Select.getOperand(1)); + auto FalseOpnd = dyn_cast(Select.getOperand(2)); + + if (TrueOpnd && FalseOpnd && + Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X && + isa(Cond.getOperand(1)) && + cast(Cond.getOperand(1))->isExactlyValue(0.0)) { + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + switch (CC) { + default: break; + case ISD::SETOLT: + case ISD::SETULT: + case ISD::SETOLE: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + std::swap(TrueOpnd, FalseOpnd); + // Fall through + case ISD::SETOGT: + case ISD::SETUGT: + case ISD::SETOGE: + case ISD::SETUGE: + case ISD::SETGT: + case ISD::SETGE: + if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && + TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, + DAG.getNode(ISD::FABS, DL, VT, X)); + if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) + return DAG.getNode(ISD::FABS, DL, VT, X); + + break; + } + } + } + // FMUL -> FMA combines: if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { AddToWorklist(Fused.getNode()); @@ -10995,7 +11121,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the - // indexed load/store and the expresion that needs to be re-written. + // indexed load/store and the expression that needs to be re-written. // // Therefore, we have: // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 @@ -11301,12 +11427,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); - // Make sure the new and old chains are cleaned up. - AddToWorklist(Token.getNode()); - - // Replace uses with load result and token factor. Don't add users - // to work list. - return CombineTo(N, ReplLoad.getValue(0), Token, false); + // Replace uses with load result and token factor + return CombineTo(N, ReplLoad.getValue(0), Token); } } @@ -11329,7 +11451,7 @@ namespace { /// Shift = srl Ty1 Origin, CstTy Amount /// Inst = trunc Shift to Ty2 /// -/// Then, it will be rewriten into: +/// Then, it will be rewritten into: /// Slice = load SliceTy, Base + SliceOffset /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 /// @@ -12275,8 +12397,8 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl &StoreNodes, } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( - SmallVectorImpl &StoreNodes, EVT MemVT, - unsigned NumStores, bool IsConstantSrc, bool UseVector) { + SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector, bool UseTrunc) { // Make sure we have something to merge. if (NumStores < 2) return false; @@ -12343,9 +12465,9 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getAPIntValue().zext(SizeInBits); + StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); + StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); } else { llvm_unreachable("Invalid constant element type"); } @@ -12358,10 +12480,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); - SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - FirstInChain->getAlignment()); + + // make sure we use trunc store if it's necessary to be legal. + SDValue NewStore; + if (UseVector || !UseTrunc) { + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlignment()); + } else { // Must be realized as a trunc store + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); + ConstantSDNode *C = cast(StoredVal); + SDValue ExtendedStoreVal = + DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, + LegalizedStoredValueTy); + NewStore = DAG.getTruncStore( + NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } // Replace all merged stores with the new store. for (unsigned i = 0; i < NumStores; ++i) @@ -12379,20 +12518,27 @@ void DAGCombiner::getStoreMergeCandidates( EVT MemVT = St->getMemoryVT(); // We must have a base and an offset. - if (!BasePtr.Base.getNode()) + if (!BasePtr.getBase().getNode()) return; // Do not handle stores to undef base pointers. - if (BasePtr.Base.isUndef()) + if (BasePtr.getBase().isUndef()) return; - bool IsLoadSrc = isa(St->getValue()); bool IsConstantSrc = isa(St->getValue()) || isa(St->getValue()); bool IsExtractVecSrc = (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); - auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool { + bool IsLoadSrc = isa(St->getValue()); + BaseIndexOffset LBasePtr; + // Match on loadbaseptr if relevant. + if (IsLoadSrc) + LBasePtr = BaseIndexOffset::match( + cast(St->getValue())->getBasePtr(), DAG); + + auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, + int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; // We can merge constant floats to equivalent integers @@ -12400,9 +12546,15 @@ void DAGCombiner::getStoreMergeCandidates( if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && isa(Other->getValue()))) return false; - if (IsLoadSrc) - if (!isa(Other->getValue())) + if (IsLoadSrc) { + // The Load's Base Ptr must also match + if (LoadSDNode *OtherLd = dyn_cast(Other->getValue())) { + auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) + return false; + } else return false; + } if (IsConstantSrc) if (!(isa(Other->getValue()) || isa(Other->getValue()))) @@ -12412,7 +12564,7 @@ void DAGCombiner::getStoreMergeCandidates( Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) return false; Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); - return (Ptr.equalBaseIndex(BasePtr)); + return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down @@ -12440,38 +12592,52 @@ void DAGCombiner::getStoreMergeCandidates( if (I2.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast(*I2)) { BaseIndexOffset Ptr; - if (CandidateMatch(OtherST, Ptr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) if (I.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast(*I)) { BaseIndexOffset Ptr; - if (CandidateMatch(OtherST, Ptr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } -// We need to check that merging these stores does not cause a loop -// in the DAG. Any store candidate may depend on another candidate +// We need to check that merging these stores does not cause a loop in +// the DAG. Any store candidate may depend on another candidate // indirectly through its operand (we already consider dependencies // through the chain). Check in parallel by searching up from // non-chain operands of candidates. + bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes, unsigned NumStores) { + + // FIXME: We should be able to truncate a full search of + // predecessors by doing a BFS and keeping tabs the originating + // stores from which worklist nodes come from in a similar way to + // TokenFactor simplfication. + SmallPtrSet Visited; SmallVector Worklist; - // search ops of store candidates + unsigned int Max = 8192; + // Search Ops of store candidates. for (unsigned i = 0; i < NumStores; ++i) { SDNode *n = StoreNodes[i].MemNode; // Potential loops may happen only through non-chain operands for (unsigned j = 1; j < n->getNumOperands(); ++j) Worklist.push_back(n->getOperand(j).getNode()); } - // search through DAG. We can stop early if we find a storenode + // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) { - if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, + Max)) + return false; + // Check if we ended early, failing conservatively if so. + if (Visited.size() >= Max) return false; } return true; @@ -12538,54 +12704,60 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // mergeable cases. To prevent this, we prune such stores from the // front of StoreNodes here. - unsigned StartIdx = 0; - while ((StartIdx + 1 < StoreNodes.size()) && - StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != - StoreNodes[StartIdx + 1].OffsetFromBase) - ++StartIdx; - - // Bail if we don't have enough candidates to merge. - if (StartIdx + 1 >= StoreNodes.size()) - return false; + bool RV = false; + while (StoreNodes.size() > 1) { + unsigned StartIdx = 0; + while ((StartIdx + 1 < StoreNodes.size()) && + StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != + StoreNodes[StartIdx + 1].OffsetFromBase) + ++StartIdx; - if (StartIdx) - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); - - // Scan the memory operations on the chain and find the first non-consecutive - // store memory address. - unsigned NumConsecutiveStores = 0; - int64_t StartAddress = StoreNodes[0].OffsetFromBase; - - // Check that the addresses are consecutive starting from the second - // element in the list of stores. - for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { - int64_t CurrAddress = StoreNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - NumConsecutiveStores = i + 1; - } + // Bail if we don't have enough candidates to merge. + if (StartIdx + 1 >= StoreNodes.size()) + return RV; - if (NumConsecutiveStores < 2) - return false; + if (StartIdx) + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); + + // Scan the memory operations on the chain and find the first + // non-consecutive store memory address. + unsigned NumConsecutiveStores = 1; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + NumConsecutiveStores = i + 1; + } - // Check that we can merge these candidates without causing a cycle - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumConsecutiveStores)) - return false; + if (NumConsecutiveStores < 2) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumConsecutiveStores); + continue; + } + // Check that we can merge these candidates without causing a cycle + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, + NumConsecutiveStores)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumConsecutiveStores); + continue; + } - // The node with the lowest store address. - LLVMContext &Context = *DAG.getContext(); - const DataLayout &DL = DAG.getDataLayout(); + // The node with the lowest store address. + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); - // Store the constants into memory as one consecutive store. - if (IsConstantSrc) { - bool RV = false; - while (NumConsecutiveStores > 1) { + // Store the constants into memory as one consecutive store. + if (IsConstantSrc) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 0; - unsigned LastLegalVectorType = 0; + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; bool NonZero = false; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *ST = cast(StoreNodes[i].MemNode); @@ -12606,9 +12778,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast = false; if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { + LastIntegerTrunc = false; LastLegalType = i + 1; // Or check whether a truncstore is legal. } else if (TLI.getTypeAction(Context, StoreTy) == @@ -12616,9 +12790,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { + LastIntegerTrunc = true; LastLegalType = i + 1; } } @@ -12630,8 +12806,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && !NoVectors) { // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) && + unsigned Elts = i + 1; + if (MemVT.isVector()) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12640,33 +12822,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } // Check if we found a legal integer type that creates a meaningful merge. - if (LastLegalType < 2 && LastLegalVectorType < 2) - break; + if (LastLegalType < 2 && LastLegalVectorType < 2) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + continue; + } bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, - true, UseVector); - if (!Merged) - break; + bool Merged = MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); + if (!Merged) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + continue; + } // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); RV = true; - NumConsecutiveStores -= NumElem; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + continue; } - return RV; - } - // When extracting multiple vector elements, try to store them - // in one vector store rather than a sequence of scalar stores. - if (IsExtractVecSrc) { - bool RV = false; - while (StoreNodes.size() >= 2) { + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecSrc) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 0; + unsigned NumStoresToMerge = 1; bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); @@ -12678,7 +12860,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // handles consecutive loads). if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && StoreValOpcode != ISD::EXTRACT_SUBVECTOR) - return false; + return RV; // Find a legal type for the vector store. unsigned Elts = i + 1; @@ -12690,6 +12872,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12697,188 +12880,238 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } bool Merged = MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true); - if (!Merged) - break; + StoreNodes, MemVT, NumStoresToMerge, false, true, false); + if (!Merged) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + continue; + } // Remove merged stores for next iteration. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); RV = true; - NumConsecutiveStores -= NumStoresToMerge; + continue; } - return RV; - } - - // Below we handle the case of multiple consecutive stores that - // come from multiple consecutive loads. We merge them into a single - // wide load and a single wide store. - // Look for load nodes which are used by the stored values. - SmallVector LoadNodes; + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. - // Find acceptable loads. Loads need to have the same chain (token factor), - // must not be zext, volatile, indexed, and they must be consecutive. - BaseIndexOffset LdBasePtr; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast(StoreNodes[i].MemNode); - LoadSDNode *Ld = dyn_cast(St->getValue()); - if (!Ld) break; + // Look for load nodes which are used by the stored values. + SmallVector LoadNodes; - // Loads must only have one use. - if (!Ld->hasNUsesOfValue(1, 0)) - break; - - // The memory operands must not be volatile. - if (Ld->isVolatile() || Ld->isIndexed()) - break; - - // We do not accept ext loads. - if (Ld->getExtensionType() != ISD::NON_EXTLOAD) - break; + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + LoadSDNode *Ld = dyn_cast(St->getValue()); + if (!Ld) + break; - // The stored memory type must be the same. - if (Ld->getMemoryVT() != MemVT) - break; + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); - // If this is not the first ptr that we check. - if (LdBasePtr.Base.getNode()) { - // The base ptr must be the same. - if (!LdPtr.equalBaseIndex(LdBasePtr)) + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) break; - } else { - // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr; - } - // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset)); - } + // We do not accept ext loads. + if (Ld->getExtensionType() != ISD::NON_EXTLOAD) + break; - if (LoadNodes.size() < 2) - return false; + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; - // If we have load/store pair instructions and we only have two values, - // don't bother. - unsigned RequiredAlignment; - if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && - St->getAlignment() >= RequiredAlignment) - return false; - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); - unsigned FirstLoadAS = FirstLoad->getAddressSpace(); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); - - // Scan the memory operations on the chain and find the first non-consecutive - // load memory address. These variables hold the index in the store node - // array. - unsigned LastConsecutiveLoad = 0; - // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 0; - unsigned LastLegalIntegerType = 0; - StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = FirstLoad->getChain(); - for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads must share the same chain. - if (LoadNodes[i].MemNode->getChain() != FirstChain) - break; + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + // If this is not the first ptr that we check. + int64_t LdOffset = 0; + if (LdBasePtr.getBase().getNode()) { + // The base ptr must be the same. + if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; + } - int64_t CurrAddress = LoadNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; - // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); - bool IsFastSt, IsFastLd; - if (TLI.isTypeLegal(StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && IsFastLd) { - LastLegalVectorType = i + 1; - } - - // Find a legal type for the integer store. - unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(Context, SizeInBits); - if (TLI.isTypeLegal(StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && IsFastLd) - LastLegalIntegerType = i + 1; - // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstLoadAS, FirstLoadAlign, &IsFastLd) && - IsFastLd) - LastLegalIntegerType = i+1; + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); } - } - - // Only use vector types if the vector type is larger than the integer type. - // If they are the same, use integers. - bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; - unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); - - // We add +1 here because the LastXXX variables refer to location while - // the NumElem refers to array/index size. - unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); - NumElem = std::min(LastLegalType, NumElem); - if (NumElem < 2) - return false; + if (LoadNodes.size() < 2) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + continue; + } - // Find if it is better to use vectors or integers to load and store - // to memory. - EVT JointMemOpVT; - if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); - } else { - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); - } + // If we have load/store pair instructions and we only have two values, + // don't bother merging. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); + continue; + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. + unsigned LastConsecutiveLoad = 1; + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; + bool DoIntegerTruncate = false; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; - SDLoc LoadDL(LoadNodes[0].MemNode); - SDLoc StoreDL(StoreNodes[0].MemNode); + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; + + // Find a legal type for the vector store. + EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); + bool IsFastSt, IsFastLd; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } - // The merged loads are required to have the same incoming chain, so - // using the first's chain is acceptable. - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoadAlign); + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = true; + } + } + } - SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + // Only use vector types if the vector type is larger than the integer type. + // If they are the same, use integers. + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); - AddToWorklist(NewStoreChain.getNode()); + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); - SDValue NewStore = - DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign); + if (NumElem < 2) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + continue; + } - // Transfer chain users from old loads to the new load. - for (unsigned i = 0; i < NumElem; ++i) { - LoadSDNode *Ld = cast(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + AddToWorklist(NewStoreChain.getNode()); + + MachineMemOperand::Flags MMOFlags = isDereferenceable ? + MachineMemOperand::MODereferenceable: + MachineMemOperand::MONone; + + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign, + MMOFlags); + NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = + DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), + JointMemOpVT, FirstLoadAlign, MMOFlags); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace the all stores with the new store. + for (unsigned i = 0; i < NumElem; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + RV = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + continue; } - - // Replace the all stores with the new store. - for (unsigned i = 0; i < NumElem; ++i) - CombineTo(StoreNodes[i].MemNode, NewStore); - return true; + return RV; } SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { @@ -13045,10 +13278,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Chain = ST->getChain(); } - // Try transforming N to an indexed store. - if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) - return SDValue(N, 0); - // FIXME: is there such a thing as a truncating indexed store? if (ST->isTruncatingStore() && ST->isUnindexed() && Value.getValueType().isInteger()) { @@ -13092,14 +13321,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } - // If this is a store followed by a store with the same value to the same - // location, then the store is dead/noop. if (StoreSDNode *ST1 = dyn_cast(Chain)) { - if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && - ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && - ST1->isUnindexed() && !ST1->isVolatile()) { - // The store is dead, remove it. - return Chain; + if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && + !ST1->isVolatile() && ST1->getBasePtr() == Ptr && + ST->getMemoryVT() == ST1->getMemoryVT()) { + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (ST1->getValue() == Value) { + // The store is dead, remove it. + return Chain; + } + + // If this is a store who's preceeding store to the same location + // and no one other node is chained to that store we can effectively + // drop the store. Do not remove stores to undef as they may be used as + // data sinks. + if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && + !ST1->getBasePtr().isUndef()) { + // ST1 is fully overwritten and can be elided. Combine with it's chain + // value. + CombineTo(ST1, ST1->getChain()); + return SDValue(); + } } } @@ -13115,7 +13358,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Only perform this optimization before the types are legal, because we // don't want to perform this optimization on every DAGCombine invocation. - if (!LegalTypes) { + if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG + : !LegalTypes) { for (;;) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so @@ -13129,6 +13373,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // // Make sure to do this only after attempting to merge stores in order to @@ -13861,6 +14109,11 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // when we start sorting the vectors by type. return SDValue(); } + } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && + InVT1.getSizeInBits() == VT.getSizeInBits()) { + SmallVector ConcatOps(2, DAG.getUNDEF(InVT2)); + ConcatOps[0] = VecIn2; + VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. @@ -14066,6 +14319,73 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return Shuffles[0]; } +// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT +// operations which can be matched to a truncate. +SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { + // TODO: Add support for big-endian. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + if (N->getNumOperands() < 2) + return SDValue(); + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElems = N->getNumOperands(); + + if (!isTypeLegal(VT)) + return SDValue(); + + // If the input is something other than an EXTRACT_VECTOR_ELT with a constant + // index, bail out. + // TODO: Allow undef elements in some cases? + if (any_of(N->ops(), [VT](SDValue Op) { + return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa(Op.getOperand(1)) || + Op.getValueType() != VT.getVectorElementType(); + })) + return SDValue(); + + // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index + auto GetExtractIdx = [](SDValue Extract) { + return cast(Extract.getOperand(1))->getSExtValue(); + }; + + // The first BUILD_VECTOR operand must be an an extract from index zero + // (assuming no undef and little-endian). + if (GetExtractIdx(N->getOperand(0)) != 0) + return SDValue(); + + // Compute the stride from the first index. + int Stride = GetExtractIdx(N->getOperand(1)); + SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); + + // Proceed only if the stride and the types can be matched to a truncate. + if ((Stride == 1 || !isPowerOf2_32(Stride)) || + (ExtractedFromVec.getValueType().getVectorNumElements() != + Stride * NumElems) || + (VT.getScalarSizeInBits() * Stride > 64)) + return SDValue(); + + // Check remaining operands are consistent with the computed stride. + for (unsigned i = 1; i != NumElems; ++i) { + SDValue Op = N->getOperand(i); + + if ((Op.getOperand(0) != ExtractedFromVec) || + (GetExtractIdx(Op) != Stride * i)) + return SDValue(); + } + + // All checks were ok, construct the truncate. + LLVMContext &Ctx = *DAG.getContext(); + EVT NewVT = VT.getVectorVT( + Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); + EVT TruncVT = + VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; + + SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec); + Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); + return DAG.getBitcast(VT, Res); +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14108,6 +14428,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; + if (TLI.isDesirableToCombineBuildVectorToTruncate()) + if (SDValue V = reduceBuildVecToTrunc(N)) + return V; + if (SDValue V = reduceBuildVecToShuffle(N)) return V; @@ -14412,6 +14736,132 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +/// If we are extracting a subvector produced by a wide binary operator with at +/// at least one operand that was the result of a vector concatenation, then try +/// to use the narrow vector operands directly to avoid the concatenation and +/// extraction. +static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share + // some of these bailouts with other transforms. + + // The extract index must be a constant, so we can map it to a concat operand. + auto *ExtractIndex = dyn_cast(Extract->getOperand(1)); + if (!ExtractIndex) + return SDValue(); + + // Only handle the case where we are doubling and then halving. A larger ratio + // may require more than two narrow binops to replace the wide binop. + EVT VT = Extract->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + assert((ExtractIndex->getZExtValue() % NumElems) == 0 && + "Extract index is not a multiple of the vector length."); + if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2) + return SDValue(); + + // We are looking for an optionally bitcasted wide vector binary operator + // feeding an extract subvector. + SDValue BinOp = Extract->getOperand(0); + if (BinOp.getOpcode() == ISD::BITCAST) + BinOp = BinOp.getOperand(0); + + // TODO: The motivating case for this transform is an x86 AVX1 target. That + // target has temptingly almost legal versions of bitwise logic ops in 256-bit + // flavors, but no other 256-bit integer support. This could be extended to + // handle any binop, but that may require fixing/adding other folds to avoid + // codegen regressions. + unsigned BOpcode = BinOp.getOpcode(); + if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) + return SDValue(); + + // The binop must be a vector type, so we can chop it in half. + EVT WideBVT = BinOp.getValueType(); + if (!WideBVT.isVector()) + return SDValue(); + + // Bail out if the target does not support a narrower version of the binop. + EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), + WideBVT.getVectorNumElements() / 2); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) + return SDValue(); + + // Peek through bitcasts of the binary operator operands if needed. + SDValue LHS = BinOp.getOperand(0); + if (LHS.getOpcode() == ISD::BITCAST) + LHS = LHS.getOperand(0); + + SDValue RHS = BinOp.getOperand(1); + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + + // We need at least one concatenation operation of a binop operand to make + // this transform worthwhile. The concat must double the input vector sizes. + // TODO: Should we also handle INSERT_SUBVECTOR patterns? + bool ConcatL = + LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; + bool ConcatR = + RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; + if (!ConcatL && !ConcatR) + return SDValue(); + + // If one of the binop operands was not the result of a concat, we must + // extract a half-sized operand for our new narrow binop. We can't just reuse + // the original extract index operand because we may have bitcasted. + unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems; + unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); + EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); + SDLoc DL(Extract); + + // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN + // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) + // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN + SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), + DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + + SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), + DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + + SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); + return DAG.getBitcast(VT, NarrowBinOp); +} + +/// If we are extracting a subvector from a wide vector load, convert to a +/// narrow load to eliminate the extraction: +/// (extract_subvector (load wide vector)) --> (load narrow vector) +static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Add support for big-endian. The offset calculation must be adjusted. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + // TODO: The one-use check is overly conservative. Check the cost of the + // extract instead or remove that condition entirely. + auto *Ld = dyn_cast(Extract->getOperand(0)); + auto *ExtIdx = dyn_cast(Extract->getOperand(1)); + if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() || + !ExtIdx) + return SDValue(); + + // The narrow load will be offset from the base address of the old load if + // we are extracting from something besides index 0 (little-endian). + EVT VT = Extract->getValueType(0); + SDLoc DL(Extract); + SDValue BaseAddr = Ld->getOperand(1); + unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + + // TODO: Use "BaseIndexOffset" to make this more effective. + SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, + VT.getStoreSize()); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); + DAG.makeEquivalentMemoryOrdering(Ld, NewLd); + return NewLd; +} + SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -14420,6 +14870,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (V.isUndef()) return DAG.getUNDEF(NVT); + if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) + if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) + return NarrowLoad; + // Combine: // (extract_subvec (concat V1, V2, ...), i) // Into: @@ -14467,6 +14921,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } + if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) + return NarrowBOp; + return SDValue(); } @@ -14702,10 +15159,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. -SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, - SelectionDAG &DAG, - const TargetLowering &TLI, - bool LegalOperations) { +static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -14752,7 +15209,8 @@ SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, // destination type. This is often generated during legalization. // If the source node itself was a '*_extend_vector_inreg' node then we should // then be able to remove it. -SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { +static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -14775,6 +15233,11 @@ SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); + + if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) + return SDValue(); + unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> @@ -14796,11 +15259,10 @@ SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { if (EltSizeInBits != ExtSrcSizeInBits) return SDValue(); - // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for - // power-of-2 truncations as they are the most likely. - for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) - if (isTruncate(Scale)) - return DAG.getBitcast(VT, N00); + // We can remove *extend_vector_inreg only if the truncation happens at + // the same scale as the extension. + if (isTruncate(ExtScale)) + return DAG.getBitcast(VT, N00); return SDValue(); } @@ -16304,9 +16766,23 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // Check for BaseIndexOffset matching. BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); - if (BasePtr0.equalBaseIndex(BasePtr1)) - return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) || - (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset)); + int64_t PtrDiff; + if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) + return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); + + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast(BasePtr0.getBase())) + if (auto *B = dyn_cast(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // If the base are the same frame index but the we couldn't find a + // constant offset, (indices are different) be conservative. + if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex()))) + return false; + } // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis // modified to use BaseIndexOffset. @@ -16516,11 +16992,11 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. - if (!BasePtr.Base.getNode()) + if (!BasePtr.getBase().getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.Base.isUndef()) + if (BasePtr.getBase().isUndef()) return false; SmallVector ChainedStores; @@ -16542,7 +17018,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. - if (!Ptr.equalBaseIndex(BasePtr)) + if (!BasePtr.equalBaseIndex(Ptr, DAG)) break; // Walk up the chain to find the next store node, ignoring any diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FastISel.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FastISel.cpp index 5003b79974eba..b2599b2e17f10 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" @@ -50,7 +51,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 606b8952f3c17..b736037d71ddc 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -523,3 +523,29 @@ void FunctionLoweringInfo::setCurrentSwiftErrorVReg( const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) { SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg; } + +std::pair +FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) { + auto Key = PointerIntPair(I, true); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + auto &DL = MF->getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} + +std::pair +FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) { + auto Key = PointerIntPair(I, false); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index b235e19aaab29..b96c96f0b4df4 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -589,7 +589,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } else AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - // Add the subregster being inserted + // Add the subregister being inserted AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9a47a914df91a..7e4bc3ccb5d39 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -899,6 +899,35 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } +static TargetLowering::LegalizeAction +getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { + unsigned EqOpc; + switch (Opcode) { + default: llvm_unreachable("Unexpected FP pseudo-opcode"); + case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; + case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; + case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; + case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; + case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; + case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; + case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; + case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; + case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + } + + auto Action = TLI.getOperationAction(EqOpc, VT); + + // We don't currently handle Custom or Promote for strict FP pseudo-ops. + // For now, we just expand for those cases. + if (Action != TargetLowering::Legal) + Action = TargetLowering::Expand; + + return Action; +} + /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); @@ -994,7 +1023,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: - case ISD::FPOWI: case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: @@ -1043,6 +1071,25 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; + case ISD::STRICT_FSQRT: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + // These pseudo-ops get legalized as if they were their non-strict + // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT + // is also legal, but if ISD::FSQRT requires expansion then so does + // ISD::STRICT_FSQRT. + Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), + Node->getValueType(0)); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { @@ -1944,7 +1991,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::move(Args)) .setTailCall(isTailCall) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -1982,7 +2030,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -2032,6 +2081,9 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2142,19 +2194,6 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != nullptr; } -/// Return true if sincos libcall is available and can be used to combine sin -/// and cos. -static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, - const TargetMachine &TM) { - if (!isSinCosLibcallAvailable(Node, TLI)) - return false; - // GNU sin/cos functions set errno while sincos does not. Therefore - // combining sin and cos is only safe if unsafe-fpmath is enabled. - if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath) - return false; - return true; -} - /// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN @@ -3197,7 +3236,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || - canCombineSinCosLibcall(Node, TLI, TM)) + isSinCosLibcallAvailable(Node, TLI)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); @@ -3493,17 +3532,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - // The high part is obtained by SRA'ing all but one of the bits of low - // part. - unsigned LoSize = VT.getSizeInBits(); - SDValue HiLHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); - SDValue HiRHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + SDValue HiLHS; + SDValue HiRHS; + if (isSigned) { + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + HiLHS = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + HiRHS = + DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } else { + HiLHS = DAG.getConstant(0, dl, VT); + HiRHS = DAG.getConstant(0, dl, VT); + } // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not @@ -3521,16 +3567,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); } - BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(0, dl)); - TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(1, dl)); - // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, make sure the - // node has no more uses. The above EXTRACT_ELEMENT nodes should have been - // folded. - assert(Ret->use_empty() && - "Unexpected uses of illegally type from expanded lib call."); + assert(Ret.getOpcode() == ISD::MERGE_VALUES && + "Ret value is a collection of constituent nodes holding result."); + BottomHalf = Ret.getOperand(0); + TopHalf = Ret.getOperand(1); } if (isSigned) { @@ -3907,16 +3947,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMAX_PPCF128)); break; case ISD::FSQRT: + case ISD::STRICT_FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: + case ISD::STRICT_FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128)); break; case ISD::FCOS: + case ISD::STRICT_FCOS: Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128)); @@ -3926,26 +3969,31 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: + case ISD::STRICT_FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: + case ISD::STRICT_FLOG2: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: + case ISD::STRICT_FLOG10: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: + case ISD::STRICT_FEXP: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: + case ISD::STRICT_FEXP2: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128)); @@ -3966,11 +4014,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::CEIL_PPCF128)); break; case ISD::FRINT: + case ISD::STRICT_FRINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128)); break; case ISD::FNEARBYINT: + case ISD::STRICT_FNEARBYINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -3985,11 +4035,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::ROUND_PPCF128)); break; case ISD::FPOWI: + case ISD::STRICT_FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128)); break; case ISD::FPOW: + case ISD::STRICT_FPOW: Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); @@ -4536,6 +4588,14 @@ void SelectionDAG::Legalize() { AssignTopologicalOrder(); SmallPtrSet LegalizedNodes; + // Use a delete listener to remove nodes which were deleted during + // legalization from LegalizeNodes. This is needed to handle the situation + // where a new node is allocated by the object pool to the same address of a + // previously deleted node. + DAGNodeDeletedListener DeleteListener( + *this, + [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); }); + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); // Visit all the nodes. We start in topological order, so that we see diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c1cb5d9b5235e..eaf177d0661b3 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -112,15 +112,15 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - // If R is null, the sub-method took care of registering the result. - if (R.getNode()) { + if (R.getNode() && R.getNode() != N) { SetSoftenedFloat(SDValue(N, ResNo), R); - ReplaceSoftenFloatResult(N, ResNo, R); + // Return true only if the node is changed, assuming that the operands + // are also converted when necessary. + return true; } - // Return true only if the node is changed, - // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. - return R.getNode() && R.getNode() != N; + return false; } SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { @@ -753,12 +753,17 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; + case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; + case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: @@ -791,9 +796,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) return false; - // When the operand type can be kept in registers, SoftenFloatResult - // will call ReplaceValueWith to replace all references and we can - // skip softening this operand. + + // When the operand type can be kept in registers there is nothing to do for + // the following opcodes. switch (N->getOperand(OpNo).getOpcode()) { case ISD::BITCAST: case ISD::ConstantFP: @@ -807,18 +812,12 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: return true; } - // For some opcodes, SoftenFloatResult handles all conversion of softening - // and replacing operands, so that there is no need to soften operands - // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: + case ISD::ConstantFP: // Leaf node. + case ISD::CopyFromReg: // Operand is a register that we know to be left + // unchanged by SoftenFloatResult(). + case ISD::Register: // Leaf node. return true; } return false; @@ -829,6 +828,21 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + if (N->getNumOperands() == 3) + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, + N->getOperand(3)), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { // If we get here, the result must be legal but the source illegal. EVT SVT = N->getOperand(0).getValueType(); @@ -884,6 +898,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + + if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -913,6 +955,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 92b0d2ae4015c..75fec7bd1d485 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -615,9 +615,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, N->getOperand(2)); - assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); // Convert to the expected type. - return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); + return DAG.getSExtOrTrunc(SetCC, dl, NVT); } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { @@ -1828,10 +1827,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + if (hasOVF) { EVT OvfVT = getSetCCResultType(NVT); SDVTList VTList = DAG.getVTList(NVT, OvfVT); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; @@ -1864,6 +1864,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); + + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { + SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + return; + } + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); @@ -1878,9 +1885,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, - DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); + + SDValue Borrow; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -2875,6 +2887,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; + case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; @@ -3009,14 +3022,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - // Lower with SETCCE if the target supports it. + // Lower with SETCCE or SETCCCARRY if the target supports it. + EVT HiVT = LHSHi.getValueType(); + EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT); + bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT); + // FIXME: Make all targets support this, then remove the other lowering. - if (TLI.getOperationAction( - ISD::SETCCE, - TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == - TargetLowering::Custom) { - // SETCCE can detect < and >= directly. For > and <=, flip operands and - // condition code. + if (HasSETCCCARRY || + TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) { + // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip + // operands and condition code. bool FlipOperands = false; switch (CCCode) { case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; @@ -3030,27 +3045,28 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, std::swap(LHSHi, RHSHi); } // Perform a wide subtraction, feeding the carry from the low part into - // SETCCE. The SETCCE operation is essentially looking at the high part of - // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or - // positive iff LHS >= RHS. - SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); - SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); - SDValue Res = - DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), - LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially + // looking at the high part of the result of LHS - RHS. It is negative + // iff LHS < RHS. It is zero or positive iff LHS >= RHS. + EVT LoVT = LHSLo.getValueType(); + SDVTList VTList = DAG.getVTList( + LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue); + SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl, + VTList, LHSLo, RHSLo); + SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl, + getSetCCResultType(HiVT), LHSHi, RHSHi, + LowCmp.getValue(1), DAG.getCondCode(CCCode)); NewLHS = Res; NewRHS = SDValue(); return; } - NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETEQ, false, - DagCombineInfo, dl); + NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ, + false, DagCombineInfo, dl); if (!NewLHS.getNode()) - NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), - NewLHS, LoCmp, HiCmp); + NewLHS = + DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp); NewRHS = SDValue(); } @@ -3103,8 +3119,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, - DAG.getCondCode(CCCode)), 0); + return SDValue( + DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { @@ -3125,6 +3141,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { LowCmp.getValue(1), Cond); } +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType()); + SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 154af46c94464..001eed9fb8f62 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -80,6 +80,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); + EVT VT = Res.getValueType(); bool Failed = false; unsigned Mapped = 0; @@ -129,13 +130,17 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - if (Mapped == 0) { + // If the value can be kept in HW registers, softening machinery can + // leave it unchanged and don't put it to any map. + if (Mapped == 0 && + !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && + isLegalInHWReg(VT))) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -331,11 +336,6 @@ bool DAGTypeLegalizer::run() { if (NeedsReanalyzing) { assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); - // Remove any result values from SoftenedFloats as N will be revisited - // again. - for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) - SoftenedFloats.erase(SDValue(N, i)); - N->setNodeId(NewNode); // Recompute the NodeId and correct processed operands, adding the node to // the worklist if ready. @@ -754,8 +754,6 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // new uses of From due to CSE. If this happens, replace the new uses of // From with To. } while (!From.use_empty()); - - SoftenedFloats.erase(From); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4c3b514856b78..c46d1b04804c9 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -381,6 +381,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); SDValue ExpandIntOp_SETCCE(SDNode *N); + SDValue ExpandIntOp_SETCCCARRY(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -415,16 +416,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. - void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { - // When the result type can be kept in HW registers, the converted - // NewRes node could have the same type. We can save the effort in - // cloning every user of N in SoftenFloatOperand or other legalization functions, - // by calling ReplaceValueWith here to update all users. - if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) - ReplaceValueWith(SDValue(N, ResNo), NewRes); - } - // Convert Float Results to Integer for Non-HW-supported Operations. bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -470,17 +461,23 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); // Return true if we can skip softening the given operand or SDNode because - // it was soften before by SoftenFloatResult and references to the operand - // were replaced by ReplaceValueWith. + // either it was soften before by SoftenFloatResult and references to the + // operand were replaced by ReplaceValueWith or it's value type is legal in HW + // registers and the operand can be left unchanged. bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); + SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FABS(SDNode *N); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -630,6 +627,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); + SDValue ScalarizeVecOp_VSETCC(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index aa69e0e2adfce..f3306151d864b 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -57,7 +57,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the floating point operand only if it was converted to integers. // Otherwise, it is a legal type like f128 that can be saved in a register. auto SoftenedOp = GetSoftenedFloat(InOp); - if (SoftenedOp == InOp) + if (isLegalInHWReg(SoftenedOp.getValueType())) break; SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5f167f8de1cfc..9355dbe77f94e 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -225,6 +225,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } return TranslateLegalizeResults(Op, Lowered); } + LLVM_FALLTHROUGH; case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandLoad(Op)); diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ff0e609803d8a..6aa3270883f08 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -302,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { - SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue Cond = N->getOperand(0); + EVT OpVT = Cond.getValueType(); + SDLoc DL(N); + // The vselect result and true/value operands needs scalarizing, but it's + // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Cond = GetScalarizedVector(Cond); + } else { + EVT VT = OpVT.getVectorElementType(); + Cond = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + SDValue LHS = GetScalarizedVector(N->getOperand(1)); TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false, false); @@ -470,6 +484,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VSELECT: Res = ScalarizeVecOp_VSELECT(N); break; + case ISD::SETCC: + Res = ScalarizeVecOp_VSETCC(N); + break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; @@ -546,6 +563,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { N->getOperand(2)); } +/// If the operand is a vector that needs to be scalarized then the +/// result must be v1i1, so just convert to a scalar SETCC and wrap +/// with a scalar_to_vector since the res type is legal if we got here +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"); + + EVT VT = N->getValueType(0); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + + EVT OpVT = N->getOperand(0).getValueType(); + EVT NVT = VT.getVectorElementType(); + SDLoc DL(N); + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + + Res = DAG.getNode(ExtendCode, DL, NVT, Res); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); +} + /// If the value to store is a vector that needs to be scalarized, it must be /// <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -2965,7 +3012,12 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { else if (N.getOpcode() == ISD::SIGN_EXTEND) N = N.getOperand(0); - return (N.getOpcode() == ISD::SETCC); + if (isLogicalMaskOp(N.getOpcode())) + return isSETCCorConvertedSETCC(N.getOperand(0)) && + isSETCCorConvertedSETCC(N.getOperand(1)); + + return (N.getOpcode() == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -2973,24 +3025,20 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { // to ToMaskVT if needed with vector extension or truncation. SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT) { - LLVMContext &Ctx = *DAG.getContext(); - // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. - unsigned InMaskOpc = InMask->getOpcode(); - assert((InMaskOpc == ISD::SETCC || - (isLogicalMaskOp(InMaskOpc) && - isSETCCorConvertedSETCC(InMask->getOperand(0)) && - isSETCCorConvertedSETCC(InMask->getOperand(1)))) && - "Unexpected mask argument."); + // FIXME: This code seems to be too restrictive, we might consider + // generalizing it or dropping it. + assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. SmallVector Ops; for (unsigned i = 0; i < InMask->getNumOperands(); ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMaskOpc, SDLoc(InMask), MaskVT, Ops); + SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. + LLVMContext &Ctx = *DAG.getContext(); unsigned MaskScalarBits = MaskVT.getScalarSizeInBits(); unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits(); if (MaskScalarBits < ToMaskScalBits) { diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index d80a281279b6b..1379940932772 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 4f4025d8ae6ad..70b1fa77a0991 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -15,13 +15,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -226,6 +226,7 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { void UnscheduleNodeBottomUp(SUnit*); void RestoreHazardCheckerBottomUp(); void BacktrackBottomUp(SUnit*, SUnit*); + SUnit *TryUnfoldSU(SUnit *); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, @@ -780,7 +781,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } /// CapturePred - This does the opposite of ReleasePred. Since SU is being -/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// unscheduled, increase the succ left count of its predecessors. Remove /// them from AvailableQueue if necessary. void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); @@ -934,6 +935,146 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { return false; } +/// TryUnfold - Attempt to unfold +SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { + SDNode *N = SU->getNode(); + // Use while over if to ease fall through. + SmallVector NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return nullptr; + + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return nullptr; + + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + // If LoadSU has already been scheduled, we should clone it but + // this would negate the benefit to unfolding so just return SU. + if (LoadSU->isScheduled) + return SU; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); + computeLatency(LoadSU); + } + + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + + // Now that we are committed to unfolding replace DAG Uses. + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector ChainPreds; + SmallVector ChainSuccs; + SmallVector LoadPreds; + SmallVector NodePreds; + SmallVector NodeSuccs; + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPreds.push_back(Pred); + else if (isOperandOf(Pred.getSUnit(), LoadNode)) + LoadPreds.push_back(Pred); + else + NodePreds.push_back(Pred); + } + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); + else + NodeSuccs.push_back(Succ); + } + + // Now assign edges to the newly-created nodes. + for (const SDep &Pred : ChainPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (const SDep &Pred : LoadPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (const SDep &Pred : NodePreds) { + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (SDep D : NodeSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && + !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; + } + for (SDep D : ChainSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) + NewSU->isAvailable = true; + + return NewSU; +} + /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { @@ -959,135 +1100,16 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return nullptr; } + // If possible unfold instruction. if (TryUnfold) { - SmallVector NewNodes; - if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + SUnit *UnfoldSU = TryUnfoldSU(SU); + if (!UnfoldSU) return nullptr; - - // unfolding an x86 DEC64m operation results in store, dec, load which - // can't be handled here so quit - if (NewNodes.size() == 3) - return nullptr; - - DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); - assert(NewNodes.size() == 2 && "Expected a load folding node!"); - - N = NewNodes[1]; - SDNode *LoadNode = NewNodes[0]; - unsigned NumVals = N->getNumValues(); - unsigned OldNumVals = SU->getNode()->getNumValues(); - for (unsigned i = 0; i != NumVals; ++i) - DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); - DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), - SDValue(LoadNode, 1)); - - // LoadNode may already exist. This can happen when there is another - // load from the same location and producing the same type of value - // but it has different alignment or volatileness. - bool isNewLoad = true; - SUnit *LoadSU; - if (LoadNode->getNodeId() != -1) { - LoadSU = &SUnits[LoadNode->getNodeId()]; - isNewLoad = false; - } else { - LoadSU = CreateNewSUnit(LoadNode); - LoadNode->setNodeId(LoadSU->NodeNum); - - InitNumRegDefsLeft(LoadSU); - computeLatency(LoadSU); - } - - SUnit *NewSU = CreateNewSUnit(N); - assert(N->getNodeId() == -1 && "Node already inserted!"); - N->setNodeId(NewSU->NodeNum); - - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { - if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { - NewSU->isTwoAddress = true; - break; - } - } - if (MCID.isCommutable()) - NewSU->isCommutable = true; - - InitNumRegDefsLeft(NewSU); - computeLatency(NewSU); - - // Record all the edges to and from the old SU, by category. - SmallVector ChainPreds; - SmallVector ChainSuccs; - SmallVector LoadPreds; - SmallVector NodePreds; - SmallVector NodeSuccs; - for (SDep &Pred : SU->Preds) { - if (Pred.isCtrl()) - ChainPreds.push_back(Pred); - else if (isOperandOf(Pred.getSUnit(), LoadNode)) - LoadPreds.push_back(Pred); - else - NodePreds.push_back(Pred); - } - for (SDep &Succ : SU->Succs) { - if (Succ.isCtrl()) - ChainSuccs.push_back(Succ); - else - NodeSuccs.push_back(Succ); - } - - // Now assign edges to the newly-created nodes. - for (const SDep &Pred : ChainPreds) { - RemovePred(SU, Pred); - if (isNewLoad) - AddPred(LoadSU, Pred); - } - for (const SDep &Pred : LoadPreds) { - RemovePred(SU, Pred); - if (isNewLoad) - AddPred(LoadSU, Pred); - } - for (const SDep &Pred : NodePreds) { - RemovePred(SU, Pred); - AddPred(NewSU, Pred); - } - for (SDep D : NodeSuccs) { - SUnit *SuccDep = D.getSUnit(); - D.setSUnit(SU); - RemovePred(SuccDep, D); - D.setSUnit(NewSU); - AddPred(SuccDep, D); - // Balance register pressure. - if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled - && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) - --NewSU->NumRegDefsLeft; - } - for (SDep D : ChainSuccs) { - SUnit *SuccDep = D.getSUnit(); - D.setSUnit(SU); - RemovePred(SuccDep, D); - if (isNewLoad) { - D.setSUnit(LoadSU); - AddPred(SuccDep, D); - } - } - - // Add a data dependency to reflect that NewSU reads the value defined - // by LoadSU. - SDep D(LoadSU, SDep::Data, 0); - D.setLatency(LoadSU->Latency); - AddPred(NewSU, D); - - if (isNewLoad) - AvailableQueue->addNode(LoadSU); - AvailableQueue->addNode(NewSU); - - ++NumUnfolds; - - if (NewSU->NumSuccsLeft == 0) { - NewSU->isAvailable = true; - return NewSU; - } - SU = NewSU; + SU = UnfoldSU; + N = SU->getNode(); + // If this can be scheduled don't bother duplicating and just return + if (SU->NumSuccsLeft == 0) + return SU; } DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); @@ -1839,28 +1861,68 @@ static int checkSpecialNodes(const SUnit *left, const SUnit *right) { /// Smaller number is the higher priority. static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector &SUNumbers) { - unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; - if (SethiUllmanNumber != 0) - return SethiUllmanNumber; - - unsigned Extra = 0; - for (const SDep &Pred : SU->Preds) { - if (Pred.isCtrl()) continue; // ignore chain preds - SUnit *PredSU = Pred.getSUnit(); - unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); - if (PredSethiUllman > SethiUllmanNumber) { - SethiUllmanNumber = PredSethiUllman; - Extra = 0; - } else if (PredSethiUllman == SethiUllmanNumber) - ++Extra; - } + if (SUNumbers[SU->NodeNum] != 0) + return SUNumbers[SU->NodeNum]; + + // Use WorkList to avoid stack overflow on excessively large IRs. + struct WorkState { + WorkState(const SUnit *SU) : SU(SU) {} + const SUnit *SU; + unsigned PredsProcessed = 0; + }; - SethiUllmanNumber += Extra; + SmallVector WorkList; + WorkList.push_back(SU); + while (!WorkList.empty()) { + auto &Temp = WorkList.back(); + auto *TempSU = Temp.SU; + bool AllPredsKnown = true; + // Try to find a non-evaluated pred and push it into the processing stack. + for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { + auto &Pred = TempSU->Preds[P]; + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + if (SUNumbers[PredSU->NodeNum] == 0) { +#ifndef NDEBUG + // In debug mode, check that we don't have such element in the stack. + for (auto It : WorkList) + assert(It.SU != PredSU && "Trying to push an element twice?"); +#endif + // Next time start processing this one starting from the next pred. + Temp.PredsProcessed = P + 1; + WorkList.push_back(PredSU); + AllPredsKnown = false; + break; + } + } - if (SethiUllmanNumber == 0) - SethiUllmanNumber = 1; + if (!AllPredsKnown) + continue; - return SethiUllmanNumber; + // Once all preds are known, we can calculate the answer for this one. + unsigned SethiUllmanNumber = 0; + unsigned Extra = 0; + for (const SDep &Pred : TempSU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; + assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; + WorkList.pop_back(); + } + + assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); + return SUNumbers[SU->NodeNum]; } /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index eee4a4b067186..631cb34717c4f 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -18,12 +18,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 28de92c6c8817..16f425dc7969a 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// // // The LLVM Compiler Infrastructure // @@ -13,27 +13,45 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/IR/CallingConv.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -41,16 +59,20 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include -#include +#include +#include +#include +#include +#include +#include #include +#include using namespace llvm; @@ -94,7 +116,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // ISD Namespace //===----------------------------------------------------------------------===// -bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal, + bool AllowShrink) { auto *BV = dyn_cast(N); if (!BV) return false; @@ -102,9 +125,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { APInt SplatUndef; unsigned SplatBitSize; bool HasUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) && - EltVT.getSizeInBits() >= SplatBitSize; + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + unsigned MinSplatBits = AllowShrink ? 0 : EltSize; + return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, + MinSplatBits) && + EltSize >= SplatBitSize; } // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be @@ -269,7 +294,6 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { return ISD::CondCode(Operation); } - /// For an integer comparison, return 1 if the comparison is a signed operation /// and 2 if the result is an unsigned comparison. Return zero if the operation /// does not depend on the sign of the input (setne and seteq). @@ -338,7 +362,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, //===----------------------------------------------------------------------===// /// AddNodeIDOpcode - Add the node opcode to the NodeID data. -/// static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { ID.AddInteger(OpC); } @@ -350,7 +373,6 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. -/// static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef Ops) { for (auto& Op : Ops) { @@ -360,7 +382,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. -/// static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef Ops) { for (auto& Op : Ops) { @@ -392,10 +413,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } case ISD::TargetConstantFP: - case ISD::ConstantFP: { + case ISD::ConstantFP: ID.AddPointer(cast(N)->getConstantFPValue()); break; - } case ISD::TargetGlobalAddress: case ISD::GlobalAddress: case ISD::TargetGlobalTLSAddress: @@ -573,6 +593,11 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes) { // worklist. while (!DeadNodes.empty()) { SDNode *N = DeadNodes.pop_back_val(); + // Skip to next node if we've already managed to delete the node. This could + // happen if replacing a node causes a node previously added to the node to + // be deleted. + if (N->getOpcode() == ISD::DELETED_NODE) + continue; for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) DUL->NodeDeleted(N, nullptr); @@ -770,7 +795,6 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { /// maps and modified in place. Add it back to the CSE maps, unless an identical /// node already exists, in which case transfer all its users to the existing /// node. This transfer can potentially trigger recursive merging. -/// void SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { // For node types that aren't CSE'd, just act as if no identical node @@ -835,7 +859,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, return Node; } - /// FindModifiedNodeSlot - Find a slot for the specified node if its operands /// were replaced with those specified. If this node is never memoized, /// return null, otherwise return a pointer to the slot it would take. If a @@ -864,10 +887,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL), + : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(nullptr) { + Root(getEntryNode()) { InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } @@ -1038,7 +1060,6 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, } /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). -/// SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = @@ -1317,7 +1338,6 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, return SDValue(N, 0); } - SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, @@ -1451,7 +1471,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, // Validate that all indices in Mask are within the range of the elements // input to the shuffle. int NElts = Mask.size(); - assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) && "Index out of range"); // Copy the mask so we can do any needed cleanup. @@ -2650,7 +2670,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (DemandedElts[EltIdx]) { computeKnownBits(InVal, Known2, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } // If we demand the source vector then add its common known bits, ensuring @@ -2666,7 +2686,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, computeKnownBits(InVec, Known, Depth + 1); computeKnownBits(InVal, Known2, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } break; } @@ -2855,6 +2875,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarSizeInBits(); + unsigned NumElts = DemandedElts.getBitWidth(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2898,6 +2919,39 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } return Tmp; + case ISD::VECTOR_SHUFFLE: { + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + const ShuffleVectorSDNode *SVN = cast(Op); + assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + int M = SVN->getMaskElt(i); + if (!DemandedElts[i]) + continue; + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + if (M < 0) + return 1; + if ((unsigned)M < NumElts) + DemandedLHS.setBit((unsigned)M % NumElts); + else + DemandedRHS.setBit((unsigned)M % NumElts); + } + Tmp = std::numeric_limits::max(); + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits fall-back. + if (Tmp == 1) + break; + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + case ISD::SIGN_EXTEND: case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); @@ -3088,7 +3142,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned EltIdx = CEltNo->getZExtValue(); // If we demand the inserted element then get its sign bits. - Tmp = UINT_MAX; + Tmp = std::numeric_limits::max(); if (DemandedElts[EltIdx]) { // TODO - handle implicit truncation of inserted elements. if (InVal.getScalarValueSizeInBits() != VTBits) @@ -3137,14 +3191,36 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } - case ISD::EXTRACT_SUBVECTOR: - return ComputeNumSignBits(Op.getOperand(0), Depth + 1); + case ISD::EXTRACT_SUBVECTOR: { + // If we know the element index, just demand that subvector elements, + // otherwise demand them all. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + } + return ComputeNumSignBits(Src, Depth + 1); + } case ISD::CONCAT_VECTORS: - // Determine the minimum number of sign bits across all input vectors. - // Early out if the result is already 1. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); - for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) - Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1)); + // Determine the minimum number of sign bits across all demanded + // elts of the input vectors. Early out if the result is already 1. + Tmp = std::numeric_limits::max(); + EVT SubVectorVT = Op.getOperand(0).getValueType(); + unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); + unsigned NumSubVectors = Op.getNumOperands(); + for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { + APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); + DemandedSub = DemandedSub.trunc(NumSubVectorElts); + if (!DemandedSub) + continue; + Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -3271,7 +3347,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ArrayRef Ops, - llvm::SelectionDAG &DAG) { + SelectionDAG &DAG) { assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); assert(llvm::all_of(Ops, [Ops](SDValue Op) { @@ -3780,8 +3856,9 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef Ops) { return true; return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && - any_of(Divisor->op_values(), - [](SDValue V) { return V.isUndef() || isNullConstant(V); }); + llvm::any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || + isNullConstant(V); }); // TODO: Handle signed overflow. } // TODO: Handle oversized shifts. @@ -3815,7 +3892,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // fold (add Sym, c) -> Sym+c if (GlobalAddressSDNode *GA = dyn_cast(Cst1)) return FoldSymbolOffset(Opcode, VT, GA, Cst2); - if (isCommutativeBinOp(Opcode)) + if (TLI->isCommutativeBinOp(Opcode)) if (GlobalAddressSDNode *GA = dyn_cast(Cst2)) return FoldSymbolOffset(Opcode, VT, GA, Cst1); @@ -3892,8 +3969,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // All operands must be vector types with the same number of elements as // the result type and must be either UNDEF or a build vector of constant // or UNDEF scalars. - if (!all_of(Ops, IsConstantBuildVectorOrUndef) || - !all_of(Ops, IsScalarOrSameVectorSize)) + if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) || + !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); // If we are comparing vectors, then the result needs to be a i1 boolean @@ -3961,7 +4038,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ConstantFPSDNode *N2CFP = dyn_cast(N2); // Canonicalize constant to RHS if commutative. - if (isCommutativeBinOp(Opcode)) { + if (TLI->isCommutativeBinOp(Opcode)) { if (N1C && !N2C) { std::swap(N1C, N2C); std::swap(N1, N2); @@ -4345,7 +4422,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { - if (isCommutativeBinOp(Opcode)) { + if (TLI->isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { switch (Opcode) { @@ -4629,9 +4706,10 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, - const TargetLowering &TLI, StringRef Str) { + const TargetLowering &TLI, + const ConstantDataArraySlice &Slice) { // Handle vector with all elements zero. - if (Str.empty()) { + if (Slice.Array == nullptr) { if (VT.isInteger()) return DAG.getConstant(0, dl, VT); else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) @@ -4650,15 +4728,15 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, assert(!VT.isVector() && "Can't handle vector type here!"); unsigned NumVTBits = VT.getSizeInBits(); unsigned NumVTBytes = NumVTBits / 8; - unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length)); APInt Val(NumVTBits, 0); if (DAG.getDataLayout().isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) - Val |= (uint64_t)(unsigned char)Str[i] << i*8; + Val |= (uint64_t)(unsigned char)Slice[i] << i*8; } else { for (unsigned i = 0; i != NumBytes; ++i) - Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; + Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8; } // If the "cost" of materializing the integer immediate is less than the cost @@ -4675,9 +4753,8 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); } -/// isMemSrcFromString - Returns true if memcpy source is a string constant. -/// -static bool isMemSrcFromString(SDValue Src, StringRef &Str) { +/// Returns true if memcpy source is constant data. +static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { uint64_t SrcDelta = 0; GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) @@ -4691,8 +4768,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { if (!G) return false; - return getConstantStringInfo(G->getGlobal(), Str, - SrcDelta + G->getOffset(), false); + return getConstantDataArrayInfo(G->getGlobal(), Slice, 8, + SrcDelta + G->getOffset()); } /// Determines the optimal series of memory ops to replace the memset / memcpy. @@ -4723,23 +4800,23 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { - VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); - } else { - switch (DstAlign & 7) { - case 0: VT = MVT::i64; break; - case 4: VT = MVT::i32; break; - case 2: VT = MVT::i16; break; - default: VT = MVT::i8; break; - } - } - + // Use the largest integer type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + VT = MVT::i64; + while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && + !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + assert(VT.isInteger()); + + // Find the largest legal integer type. MVT LVT = MVT::i64; while (!TLI.isTypeLegal(LVT)) LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); + // If the type we've chosen is larger than the largest legal integer type + // then use that instead. if (VT.bitsGT(LVT)) VT = LVT; } @@ -4824,6 +4901,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // TODO: In the AlwaysInline case, if the size is big then generate a loop // rather than maybe a humongous number of loads and stores. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -4835,30 +4914,30 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - StringRef Str; - bool CopyFromStr = isMemSrcFromString(Src, Str); - bool isZeroStr = CopyFromStr && Str.empty(); + ConstantDataArraySlice Slice; + bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); + bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - (isZeroStr ? 0 : SrcAlign), - false, false, CopyFromStr, true, + (isZeroConstant ? 0 : SrcAlign), + false, false, CopyFromConstant, true, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), DAG, TLI)) return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && - DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign)) + DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign /= 2; if (NewAlign > Align) { @@ -4887,18 +4966,29 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, DstOff -= VTSize - Size; } - if (CopyFromStr && - (isZeroStr || (VT.isInteger() && !VT.isVector()))) { + if (CopyFromConstant && + (isZeroConstant || (VT.isInteger() && !VT.isVector()))) { // It's unlikely a store of a vector immediate can be done in a single // instruction. It would require a load from a constantpool first. // We only handle zero vectors here. // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. - Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); + ConstantDataArraySlice SubSlice; + if (SrcOff < Slice.Length) { + SubSlice = Slice; + SubSlice.move(SrcOff); + } else { + // This is an out-of-bounds access and hence UB. Pretend we read zero. + SubSlice.Array = nullptr; + SubSlice.Offset = 0; + SubSlice.Length = VTSize; + } + Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) Store = DAG.getStore(Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Align, + MMOFlags); } if (!Store.getNode()) { @@ -4907,12 +4997,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // thing to do is generate a LoadExt/StoreTrunc pair. These simplify // to Load/Store if NVT==VT. // FIXME does the case above also need this? - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT NVT = TLI.getTypeToTransformTo(C, VT); assert(NVT.bitsGE(VT)); + + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), MMOFlags); + MinAlign(SrcAlign, SrcOff), SrcMMOFlags); OutChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), @@ -4940,6 +5037,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -4962,8 +5061,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) @@ -4984,9 +5083,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value; + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags); + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -5341,7 +5446,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + AtomicOrdering FailureOrdering, SyncScope::ID SSID) { assert(Opcode == ISD::ATOMIC_CMP_SWAP || Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); @@ -5357,7 +5462,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SynchScope, SuccessOrdering, + AAMDNodes(), nullptr, SSID, SuccessOrdering, FailureOrdering); return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); @@ -5379,7 +5484,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); @@ -5399,7 +5504,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SynchScope, Ordering); + nullptr, SSID, Ordering); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); } @@ -5483,7 +5588,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, Opcode == ISD::PREFETCH || Opcode == ISD::LIFETIME_START || Opcode == ISD::LIFETIME_END || - (Opcode <= INT_MAX && + ((int)Opcode <= std::numeric_limits::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -5817,7 +5922,6 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; FoldingSetNodeID ID; @@ -5971,13 +6075,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, switch (Opcode) { default: break; - case ISD::CONCAT_VECTORS: { + case ISD::CONCAT_VECTORS: // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; - } - case ISD::SELECT_CC: { + case ISD::SELECT_CC: assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); assert(Ops[0].getValueType() == Ops[1].getValueType() && "LHS and RHS of condition must have same type!"); @@ -5986,14 +6089,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); break; - } - case ISD::BR_CC: { + case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); assert(Ops[2].getValueType() == Ops[3].getValueType() && "LHS/RHS of comparison should match types!"); break; } - } // Memoize nodes. SDNode *N; @@ -6475,6 +6576,62 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, return N; } +SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { + unsigned OrigOpc = Node->getOpcode(); + unsigned NewOpc; + bool IsUnary = false; + switch (OrigOpc) { + default: + llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); + case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; + case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; + case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break; + case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break; + case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break; + case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break; + case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break; + case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break; + case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break; + case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break; + case ISD::STRICT_FNEARBYINT: + NewOpc = ISD::FNEARBYINT; + IsUnary = true; + break; + } + + // We're taking this node out of the chain, so we need to re-link things. + SDValue InputChain = Node->getOperand(0); + SDValue OutputChain = SDValue(Node, 1); + ReplaceAllUsesOfValueWith(OutputChain, InputChain); + + SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); + SDNode *Res = nullptr; + if (IsUnary) + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); + else + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), + Node->getOperand(2) }); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } else { + ReplaceAllUsesWith(Node, Res); + RemoveDeadNode(Node); + } + + return Res; +} /// getMachineNode - These are used for target selectors to create a new node /// with specified return type(s), MachineInstr opcode, and operands. @@ -6688,7 +6845,7 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} }; -} +} // end anonymous namespace /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. @@ -6734,7 +6891,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { AddModifiedNodeToCSEMaps(User); } - // If we just RAUW'd the root, take note. if (FromN == getRoot()) setRoot(To); @@ -6904,6 +7060,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ } namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith /// to record information about a use. struct UseMemo { @@ -6916,7 +7073,8 @@ namespace { bool operator<(const UseMemo &L, const UseMemo &R) { return (intptr_t)L.User < (intptr_t)R.User; } -} + +} // end anonymous namespace /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value @@ -6982,7 +7140,6 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, /// based on their topological order. It returns the maximum id and a vector /// of the SDNodes* in assigned order by reference. unsigned SelectionDAG::AssignTopologicalOrder() { - unsigned DAGSize = 0; // SortedPos tracks the progress of the algorithm. Nodes before it are @@ -7108,6 +7265,25 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { AddDbgValue(I, ToNode, false); } +SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { + assert(isa(NewMemOp.getNode()) && "Expected a memop node"); + // The new memory operation must have the same position as the old load in + // terms of memory dependency. Create a TokenFactor for the old load and new + // memory operation and update uses of the old load's output chain to use that + // TokenFactor. + SDValue OldChain = SDValue(OldLoad, 1); + SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + if (!OldLoad->hasAnyUseOfValue(1)) + return NewChain; + + SDValue TokenFactor = + getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); + ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + return TokenFactor; +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// @@ -7209,6 +7385,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const { } namespace { + struct EVTArray { std::vector VTs; @@ -7218,11 +7395,12 @@ namespace { VTs.push_back(MVT((MVT::SimpleValueType)i)); } }; -} -static ManagedStatic > EVTs; +} // end anonymous namespace + +static ManagedStatic> EVTs; static ManagedStatic SimpleVTArray; -static ManagedStatic > VTMutex; +static ManagedStatic> VTMutex; /// getValueTypeList - Return a pointer to the specified value type. /// @@ -7256,7 +7434,6 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { return NUses == 0; } - /// hasAnyUseOfValue - Return true if there are any use of the indicated /// value. This method ignores uses of other values defined by this operation. bool SDNode::hasAnyUseOfValue(unsigned Value) const { @@ -7269,9 +7446,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const { return false; } - /// isOnlyUserOf - Return true if this node is the only use of N. -/// bool SDNode::isOnlyUserOf(const SDNode *N) const { bool Seen = false; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { @@ -7301,7 +7476,6 @@ bool SDNode::areOnlyUsersOf(ArrayRef Nodes, const SDNode *N) { } /// isOperand - Return true if this node is an operand of N. -/// bool SDValue::isOperandOf(const SDNode *N) const { for (const SDValue &Op : N->op_values()) if (*this == Op) @@ -7351,7 +7525,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, } // Next, try a deep search: check whether every operand of the TokenFactor // reaches Dest. - return all_of((*this)->ops(), [=](SDValue Op) { + return llvm::all_of((*this)->ops(), [=](SDValue Op) { return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); }); } @@ -7461,49 +7635,16 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, SDValue Loc = LD->getOperand(1); SDValue BaseLoc = Base->getOperand(1); - if (Loc.getOpcode() == ISD::FrameIndex) { - if (BaseLoc.getOpcode() != ISD::FrameIndex) - return false; - const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FI = cast(Loc)->getIndex(); - int BFI = cast(BaseLoc)->getIndex(); - int FS = MFI.getObjectSize(FI); - int BFS = MFI.getObjectSize(BFI); - if (FS != BFS || FS != (int)Bytes) return false; - return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); - } - - // Handle X + C. - if (isBaseWithConstantOffset(Loc)) { - int64_t LocOffset = cast(Loc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc) { - // If the base location is a simple address with no offset itself, then - // the second load's first add operand should be the base address. - if (LocOffset == Dist * (int)Bytes) - return true; - } else if (isBaseWithConstantOffset(BaseLoc)) { - // The base location itself has an offset, so subtract that value from the - // second load's offset before comparing to distance * size. - int64_t BOffset = - cast(BaseLoc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { - if ((LocOffset - BOffset) == Dist * (int)Bytes) - return true; - } - } - } - const GlobalValue *GV1 = nullptr; - const GlobalValue *GV2 = nullptr; - int64_t Offset1 = 0; - int64_t Offset2 = 0; - bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); - if (isGA1 && isGA2 && GV1 == GV2) - return Offset1 == (Offset2 + Dist*Bytes); + + auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); + auto LocDecomp = BaseIndexOffset::match(Loc, *this); + + int64_t Offset = 0; + if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) + return (Dist * Bytes == Offset); return false; } - /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if /// it cannot be inferred. unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { @@ -7513,8 +7654,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); KnownBits Known(PtrWidth); - llvm::computeKnownBits(const_cast(GV), Known, - getDataLayout()); + llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -7595,7 +7735,6 @@ unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); } - Type *ConstantPoolSDNode::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp new file mode 100644 index 0000000000000..0d69441ebb7f7 --- /dev/null +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -0,0 +1,115 @@ +//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address +//Analysis ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, + const SelectionDAG &DAG, int64_t &Off) { + // Initial Offset difference. + Off = Other.Offset - Offset; + + if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) { + // Trivial match. + if (Other.Base == Base) + return true; + + // Match GlobalAddresses + if (auto *A = dyn_cast(Base)) + if (auto *B = dyn_cast(Other.Base)) + if (A->getGlobal() == B->getGlobal()) { + Off += B->getOffset() - A->getOffset(); + return true; + } + + const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + + // Match non-equal FrameIndexes - If both frame indices are fixed + // we know their relative offsets and can compare them. Otherwise + // we must be conservative. + if (auto *A = dyn_cast(Base)) + if (auto *B = dyn_cast(Other.Base)) + if (MFI.isFixedObjectIndex(A->getIndex()) && + MFI.isFixedObjectIndex(B->getIndex())) { + Off += MFI.getObjectOffset(B->getIndex()) - + MFI.getObjectOffset(A->getIndex()); + return true; + } + } + return false; +} + +/// Parses tree in Ptr for base, index, offset addresses. +BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { + // (((B + I*M) + c)) + c ... + SDValue Base = Ptr; + SDValue Index = SDValue(); + int64_t Offset = 0; + bool IsIndexSignExt = false; + + // Consume constant adds & ors with appropriate masking. + while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { + if (auto *C = dyn_cast(Base->getOperand(1))) { + // Only consider ORs which act as adds. + if (Base->getOpcode() == ISD::OR && + !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) + break; + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; + } + + if (Base->getOpcode() == ISD::ADD) { + // TODO: The following code appears to be needless as it just + // bails on some Ptrs early, reducing the cases where we + // find equivalence. We should be able to remove this. + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Base is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Base->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); + + // Look at Base + Index + Offset cases. + Index = Base->getOperand(1); + SDValue PotentialBase = Base->getOperand(0); + + // Skip signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } + + // Check if Index Offset pattern + if (Index->getOpcode() != ISD::ADD || + !isa(Index->getOperand(1))) + return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt); + + Offset += cast(Index->getOperand(1))->getSExtValue(); + Index = Index->getOperand(0); + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else + IsIndexSignExt = false; + Base = PotentialBase; + } + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); +} +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1c32f7a0ce53b..127312076207c 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -99,9 +99,31 @@ LimitFPPrecision("limit-float-precision", // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; +// True if the Value passed requires ABI mangling as it is a parameter to a +// function or a return value from a function which is not an intrinsic. +static bool isABIRegCopy(const Value * V) { + const bool IsRetInst = V && isa(V); + const bool IsCallInst = V && isa(V); + const bool IsInLineAsm = + IsCallInst && static_cast(V)->isInlineAsm(); + const bool IsIndirectFunctionCall = + IsCallInst && !IsInLineAsm && + !static_cast(V)->getCalledFunction(); + // It is possible that the call instruction is an inline asm statement or an + // indirect function call in which case the return value of + // getCalledFunction() would be nullptr. + const bool IsInstrinsicCall = + IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall && + static_cast(V)->getCalledFunction()->getIntrinsicID() != + Intrinsic::not_intrinsic; + + return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall)); +} + static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V); + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -111,10 +133,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional AssertOp = None) { + Optional AssertOp = None, + bool IsABIRegCopy = false) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, - PartVT, ValueVT, V); + PartVT, ValueVT, V, IsABIRegCopy); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -258,7 +281,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V) { + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -269,9 +293,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -300,9 +333,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + (IntermediateVT.isVector() + ? IntermediateVT.getVectorNumElements() * NumParts + : NumIntermediates)); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, ValueVT, Ops); + DL, BuiltVectorTy, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -341,13 +379,29 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, - "non-trivial scalar-to-vector conversion"); - return DAG.getUNDEF(ValueVT); + // Certain ABIs require that vectors are passed as integers. For vectors + // are the same size, this is an obvious bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + // Bitcast Val back the original type and extract the corresponding + // vector we want. + unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); + EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), Elts); + Val = DAG.getBitcast(WiderVecType, Val); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + diagnosePossiblyInvalidConstraint( + *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); + return DAG.getUNDEF(ValueVT); } + // Handle cases such as i8 -> <1 x i1> EVT ValueSVT = ValueVT.getVectorElementType(); if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) @@ -358,7 +412,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V); + MVT PartVT, const Value *V, bool IsABIRegCopy); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for @@ -366,12 +420,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + ISD::NodeType ExtendKind = ISD::ANY_EXTEND, + bool IsABIRegCopy = false) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, + IsABIRegCopy); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -496,7 +552,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V) { + MVT PartVT, const Value *V, + bool IsABIRegCopy) { + EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -537,13 +595,20 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); - } else{ - // Vector -> scalar conversion. - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } else { + if (ValueVT.getVectorNumElements() == 1) { + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + } else { + assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + "lossy conversion of vector to scalar type"); + EVT IntermediateType = + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getBitcast(IntermediateType, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } } assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); @@ -555,15 +620,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + // Convert the vector to the appropiate type if necessary. + unsigned DestVectorNoElts = + NumIntermediates * + (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1); + EVT BuiltVectorTy = EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + if (Val.getValueType() != BuiltVectorTy) + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -596,22 +677,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue() {} +RegsForValue::RegsForValue() { IsABIMangled = false; } RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, - EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + EVT valuevt, bool IsABIMangledValue) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), + RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty) { + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); + IsABIMangled = IsABIMangledValue; + for (EVT ValueVT : ValueVTs) { - unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); - MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + unsigned NumRegs = IsABIMangledValue + ? TLI.getNumRegistersForCallingConv(Context, ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = IsABIMangledValue + ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) + : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); + RegCount.push_back(NumRegs); Reg += NumRegs; } } @@ -632,8 +722,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = RegCount[Value]; + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -728,9 +820,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumParts = RegCount[Value]; + + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -953,10 +1047,12 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty); + DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); - Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + V); resolveDanglingDebugInfo(V, Result); } @@ -1142,8 +1238,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType()); + Inst->getType(), isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1371,12 +1468,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI.getNumRegisters(Context, VT); - MVT PartVT = TLI.getRegisterType(Context, VT); + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, &I, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind, true); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1412,9 +1509,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. - OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg( - FuncInfo.MBB, FuncInfo.SwiftErrorArg), - EVT(TLI.getPointerTy(DL)))); + OutVals.push_back( + DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( + &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); @@ -3135,7 +3233,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } -void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(const User &I) { + ArrayRef Indices; + if (const InsertValueInst *IV = dyn_cast(&I)) + Indices = IV->getIndices(); + else + Indices = cast(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -3143,7 +3247,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector AggValueVTs; @@ -3183,13 +3287,19 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const User &I) { + ArrayRef Indices; + if (const ExtractValueInst *EV = dyn_cast(&I)) + Indices = EV->getIndices(); + else + Indices = cast(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValValueVTs; @@ -3290,7 +3400,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue IdxN = getValue(Idx); if (!IdxN.getValueType().isVector() && VectorWidth) { - MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } @@ -3474,6 +3584,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MOInvariant; if (isDereferenceable) MMOFlags |= MachineMemOperand::MODereferenceable; + MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -3497,8 +3608,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - assert(TLI.supportSwiftError() && + assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector ValueVTs; @@ -3511,15 +3621,15 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - auto &DL = DAG.getDataLayout(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3549,7 +3659,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), - FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); + FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, + ValueVTs[0]); setValue(&I, L); } @@ -3603,6 +3714,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { MMOFlags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) MMOFlags |= MachineMemOperand::MONonTemporal; + MMOFlags |= TLI.getMMOFlags(I); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -3893,7 +4005,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3903,7 +4015,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); SDValue OutChain = L.getValue(2); @@ -3929,7 +4041,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3940,7 +4052,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), - /* Alignment=*/ 0, Order, Scope); + /* Alignment=*/ 0, Order, SSID); SDValue OutChain = L.getValue(1); @@ -3955,7 +4067,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, + Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3963,7 +4075,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3981,7 +4093,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT), - AAMDNodes(), nullptr, Scope, Order); + AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = @@ -3998,7 +4110,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4015,7 +4127,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - Order, Scope); + Order, SSID); DAG.setRoot(OutChain); } @@ -4736,24 +4848,15 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DIExpression *Expr, int64_t Offset, const DebugLoc &dl, unsigned DbgSDNodeOrder) { - SDDbgValue *SDV; - auto *FISDN = dyn_cast(N.getNode()); - if (FISDN && Expr->startsWithDeref()) { + if (auto *FISDN = dyn_cast(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. - ArrayRef TrailingElements(Expr->elements_begin() + 1, - Expr->elements_end()); - DIExpression *DerefedDIExpr = - DIExpression::get(*DAG.getContext(), TrailingElements); - int FI = FISDN->getIndex(); - SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl, - DbgSDNodeOrder); - } else { - SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, - Offset, dl, DbgSDNodeOrder); + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl, + DbgSDNodeOrder); } - return SDV; + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, + Offset, dl, DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -4867,11 +4970,50 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { updateDAGForMaybeTailCall(MM); return nullptr; } - case Intrinsic::memcpy_element_atomic: { - SDValue Dst = getValue(I.getArgOperand(0)); - SDValue Src = getValue(I.getArgOperand(1)); - SDValue NumElements = getValue(I.getArgOperand(2)); - SDValue ElementSize = getValue(I.getArgOperand(3)); + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst &MI = + cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memmove_element_unordered_atomic: { + auto &MI = cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); // Emit a library call. TargetLowering::ArgListTy Args; @@ -4883,18 +5025,52 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Entry.Node = Src; Args.push_back(Entry); - Entry.Ty = I.getArgOperand(2)->getType(); - Entry.Node = NumElements; + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; Args.push_back(Entry); - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.Node = ElementSize; + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; Args.push_back(Entry); - uint64_t ElementSizeConstant = - cast(I.getArgOperand(3))->getZExtValue(); + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); RTLIB::Libcall LibraryCall = - RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported element size"); @@ -5254,7 +5430,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: - visitConstrainedFPIntrinsic(I, Intrinsic); + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + visitConstrainedFPIntrinsic(cast(I)); return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -5645,7 +5833,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(MF.getName()), Idx); + GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) @@ -5666,7 +5854,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); + GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. @@ -5752,11 +5940,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } -void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, - unsigned Intrinsic) { +void SelectionDAGBuilder::visitConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); unsigned Opcode; - switch (Intrinsic) { + switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::experimental_constrained_fadd: Opcode = ISD::STRICT_FADD; @@ -5773,23 +5961,64 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, case Intrinsic::experimental_constrained_frem: Opcode = ISD::STRICT_FREM; break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_pow: + Opcode = ISD::STRICT_FPOW; + break; + case Intrinsic::experimental_constrained_powi: + Opcode = ISD::STRICT_FPOWI; + break; + case Intrinsic::experimental_constrained_sin: + Opcode = ISD::STRICT_FSIN; + break; + case Intrinsic::experimental_constrained_cos: + Opcode = ISD::STRICT_FCOS; + break; + case Intrinsic::experimental_constrained_exp: + Opcode = ISD::STRICT_FEXP; + break; + case Intrinsic::experimental_constrained_exp2: + Opcode = ISD::STRICT_FEXP2; + break; + case Intrinsic::experimental_constrained_log: + Opcode = ISD::STRICT_FLOG; + break; + case Intrinsic::experimental_constrained_log10: + Opcode = ISD::STRICT_FLOG10; + break; + case Intrinsic::experimental_constrained_log2: + Opcode = ISD::STRICT_FLOG2; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getRoot(); - SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)) }; SmallVector ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); ValueVTs.push_back(MVT::Other); // Out chain SDVTList VTs = DAG.getVTList(ValueVTs); - SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops); + SDValue Result; + if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)) }); + else + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }); assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); DAG.setRoot(OutChain); SDValue FPResult = Result.getValue(0); - setValue(&I, FPResult); + setValue(&FPI, FPResult); } std::pair @@ -5902,9 +6131,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = - DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = DAG.getRegister(FuncInfo + .getOrCreateSwiftErrorVRegUseAt( + CS.getInstruction(), FuncInfo.MBB, V) + .first, + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -5945,29 +6176,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = + FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } -/// Return true if it only matters that the value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (const User *U : V->users()) { - if (const ICmpInst *IC = dyn_cast(U)) - if (IC->isEquality()) - if (const Constant *C = dyn_cast(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SelectionDAGBuilder &Builder) { @@ -6054,7 +6273,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I)) + if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)) return false; // If the target has a fast compare for the given size, it will return a @@ -7082,8 +7301,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -7727,6 +7946,22 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + if (CLI.IsPostTypeLegalization) { + // If we are lowering a libcall after legalization, split the return type. + SmallVector OldRetTys = std::move(RetTys); + SmallVector OldOffsets = std::move(Offsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { + EVT RetVT = OldRetTys[i]; + uint64_t Offset = OldOffsets[i]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + RetTys.append(NumRegs, RegisterVT); + for (unsigned j = 0; j != NumRegs; ++j) + Offsets.push_back(Offset + j * RegisterVTSize); + } + } + SmallVector Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); @@ -7769,8 +8004,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; @@ -7807,6 +8044,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; if (Args[i].IsByVal) FinalType = cast(Args[i].Ty)->getElementType(); @@ -7819,7 +8057,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); if (Args[i].IsZExt) Flags.setZExt(); @@ -7874,8 +8116,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumParts = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -7905,7 +8148,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, + true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -8005,12 +8249,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, - AssertOp)); + AssertOp, true)); CurReg += NumRegs; } @@ -8046,8 +8292,11 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is an InlineAsm we have to match the registers required, not the + // notional registers required by the type. + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType()); + V->getType(), isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -8289,7 +8538,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = + TLI->getABIAlignmentForCallingConv(ArgTy, DL); if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); @@ -8351,8 +8605,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, ArgNo, PartBase+i*RegisterVT.getStoreSize()); @@ -8456,8 +8712,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumParts = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the @@ -8470,7 +8728,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, - PartVT, VT, nullptr, AssertOp)); + PartVT, VT, nullptr, AssertOp, + true)); } i += NumParts; diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index bdaee858da615..ac1d6aae65a52 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -38,7 +38,6 @@ class BranchInst; class CallInst; class DbgValueInst; class ExtractElementInst; -class ExtractValueInst; class FCmpInst; class FPExtInst; class FPToSIInst; @@ -53,7 +52,6 @@ class IntToPtrInst; class IndirectBrInst; class InvokeInst; class InsertElementInst; -class InsertValueInst; class Instruction; class LoadInst; class MachineBasicBlock; @@ -859,8 +857,8 @@ class SelectionDAGBuilder { void visitInsertElement(const User &I); void visitShuffleVector(const User &I); - void visitExtractValue(const ExtractValueInst &I); - void visitInsertValue(const InsertValueInst &I); + void visitExtractValue(const User &I); + void visitInsertValue(const User &I); void visitLandingPad(const LandingPadInst &I); void visitGetElementPtr(const User &I); @@ -895,7 +893,7 @@ class SelectionDAGBuilder { void visitInlineAsm(ImmutableCallSite CS); const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); - void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic); + void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); @@ -975,18 +973,28 @@ struct RegsForValue { /// expanded value requires multiple registers. SmallVector Regs; + /// This list holds the number of registers for each value. + SmallVector RegCount; + + /// Records if this value needs to be treated in an ABI dependant manner, + /// different to normal type legalization. + bool IsABIMangled; + RegsForValue(); - RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt); + RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt, + bool IsABIMangledValue = false); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty); + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue = false); /// Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + RegCount.push_back(RHS.Regs.size()); } /// Emit a series of CopyFromReg nodes that copies from this value and returns diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c37d7080f2c5a..3dd58975b1f10 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" @@ -214,6 +214,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCE: return "setcce"; + case ISD::SETCCCARRY: return "setcccarry"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5e0feccb6b4c6..bdf57e8058426 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===// // // The LLVM Compiler Infrastructure // @@ -17,11 +17,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -31,6 +31,7 @@ #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -38,7 +39,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -51,9 +51,11 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" @@ -64,6 +66,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -89,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -333,11 +337,13 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. -/// +/// execute the possibly trapping instruction. If available, we pass domtree +/// and loop info to be updated when we split critical edges. This is because +/// SelectionDAGISel preserves these analyses. /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn) { +static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, + LoopInfo *LI) { // Loop for blocks with phi nodes. for (BasicBlock &BB : Fn) { PHINode *PN = dyn_cast(BB.begin()); @@ -363,7 +369,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn) { // Okay, we have to split this edge. SplitCriticalEdge( Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); + CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -399,10 +405,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LibInfo = &getAnalysis().getTLI(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; ORE = make_unique(&Fn); + auto *DTWP = getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable(); + LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast(Fn)); + SplitCriticalSideEffectEdges(const_cast(Fn), DT, LI); CurDAG->init(*MF, *ORE); FuncInfo->set(Fn, *MF, CurDAG); @@ -763,7 +773,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - } { @@ -905,50 +914,6 @@ class ISelUpdater : public SelectionDAG::DAGUpdateListener { } // end anonymous namespace -static bool isStrictFPOp(SDNode *Node, unsigned &NewOpc) { - unsigned OrigOpc = Node->getOpcode(); - switch (OrigOpc) { - case ISD::STRICT_FADD: NewOpc = ISD::FADD; return true; - case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; return true; - case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; return true; - case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; return true; - case ISD::STRICT_FREM: NewOpc = ISD::FREM; return true; - default: return false; - } -} - -SDNode* SelectionDAGISel::MutateStrictFPToFP(SDNode *Node, unsigned NewOpc) { - assert(((Node->getOpcode() == ISD::STRICT_FADD && NewOpc == ISD::FADD) || - (Node->getOpcode() == ISD::STRICT_FSUB && NewOpc == ISD::FSUB) || - (Node->getOpcode() == ISD::STRICT_FMUL && NewOpc == ISD::FMUL) || - (Node->getOpcode() == ISD::STRICT_FDIV && NewOpc == ISD::FDIV) || - (Node->getOpcode() == ISD::STRICT_FREM && NewOpc == ISD::FREM)) && - "Unexpected StrictFP opcode!"); - - // We're taking this node out of the chain, so we need to re-link things. - SDValue InputChain = Node->getOperand(0); - SDValue OutputChain = SDValue(Node, 1); - CurDAG->ReplaceAllUsesOfValueWith(OutputChain, InputChain); - - SDVTList VTs = CurDAG->getVTList(Node->getOperand(1).getValueType()); - SDValue Ops[2] = { Node->getOperand(1), Node->getOperand(2) }; - SDNode *Res = CurDAG->MorphNodeTo(Node, NewOpc, VTs, Ops); - - // MorphNodeTo can operate in two ways: if an existing node with the - // specified operands exists, it can just return it. Otherwise, it - // updates the node in place to have the requested operands. - if (Res == Node) { - // If we updated the node in place, reset the node ID. To the isel, - // this should be just like a newly allocated machine node. - Res->setNodeId(-1); - } else { - CurDAG->ReplaceAllUsesWith(Node, Res); - CurDAG->RemoveDeadNode(Node); - } - - return Res; -} - void SelectionDAGISel::DoInstructionSelection() { DEBUG(dbgs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() @@ -992,15 +957,12 @@ void SelectionDAGISel::DoInstructionSelection() { // If the current node is a strict FP pseudo-op, the isStrictFPOp() // function will provide the corresponding normal FP opcode to which the // node should be mutated. - unsigned NormalFPOpc = ISD::UNDEF; - bool IsStrictFPOp = isStrictFPOp(Node, NormalFPOpc); - if (IsStrictFPOp) - Node = MutateStrictFPToFP(Node, NormalFPOpc); + // + // FIXME: The backends need a way to handle FP constraints. + if (Node->isStrictFPOpcode()) + Node = CurDAG->mutateStrictFPToFP(Node); Select(Node); - - // FIXME: Add code here to attach an implicit def and use of - // target-specific FP environment registers. } CurDAG->setRoot(Dummy.getValue()); @@ -1096,6 +1058,7 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, FuncInfo->SwiftErrorVals.clear(); FuncInfo->SwiftErrorVRegDefMap.clear(); FuncInfo->SwiftErrorVRegUpwardsUse.clear(); + FuncInfo->SwiftErrorVRegDefUses.clear(); FuncInfo->SwiftErrorArg = nullptr; // Check if function has a swifterror argument. @@ -1181,7 +1144,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { // Check if the variable is a static alloca or a byval or inalloca // argument passed in memory. If it is not, then we will ignore this // intrinsic and handle this during isel like dbg.value. - int FI = INT_MAX; + int FI = std::numeric_limits::max(); if (const auto *AI = dyn_cast(Address)) { auto SI = FuncInfo->StaticAllocaMap.find(AI); if (SI != FuncInfo->StaticAllocaMap.end()) @@ -1189,7 +1152,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { } else if (const auto *Arg = dyn_cast(Address)) FI = FuncInfo->getArgumentFrameIndex(Arg); - if (FI == INT_MAX) + if (FI == std::numeric_limits::max()) continue; DIExpression *Expr = DI->getExpression(); @@ -1319,6 +1282,80 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } } +void preassignSwiftErrorRegs(const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { + if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty()) + return; + + // Iterator over instructions and assign vregs to swifterror defs and uses. + for (auto It = Begin; It != End; ++It) { + ImmutableCallSite CS(&*It); + if (CS) { + // A call-site with a swifterror argument is both use and def. + const Value *SwiftErrorAddr = nullptr; + for (auto &Arg : CS.args()) { + if (!Arg->isSwiftError()) + continue; + // Use of swifterror. + assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments"); + SwiftErrorAddr = &*Arg; + assert(SwiftErrorAddr->isSwiftError() && + "Must have a swifterror value argument"); + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + &*It, FuncInfo->MBB, SwiftErrorAddr); + assert(CreatedReg); + } + if (!SwiftErrorAddr) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A load is a use. + } else if (const LoadInst *LI = dyn_cast(&*It)) { + const Value *V = LI->getOperand(0); + if (!V->isSwiftError()) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V); + assert(CreatedReg); + + // A store is a def. + } else if (const StoreInst *SI = dyn_cast(&*It)) { + const Value *SwiftErrorAddr = SI->getOperand(1); + if (!SwiftErrorAddr->isSwiftError()) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A return in a swiferror returning function is a use. + } else if (const ReturnInst *R = dyn_cast(&*It)) { + const Function *F = R->getParent()->getParent(); + if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + R, FuncInfo->MBB, FuncInfo->SwiftErrorArg); + assert(CreatedReg); + } + } +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. @@ -1425,6 +1462,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->startNewBlock(); unsigned NumFastIselRemaining = std::distance(Begin, End); + + // Pre-assign swifterror vregs. + preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End); + // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = &*std::prev(BI); @@ -1442,7 +1483,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->selectInstruction(Inst)) { - FastISelFailed = true; --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and @@ -1465,8 +1505,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } + FastISelFailed = true; + // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa(Inst)) { + // We cannot separate out GCrelocates to their own blocks since we need + // to keep track of gc-relocates for a particular gc-statepoint. This is + // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before + // visitGCRelocate. + if (isa(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); @@ -2069,7 +2115,7 @@ static SDNode *findGlueUse(SDNode *N) { } /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". -/// This function recursively traverses up the operand chain, ignoring +/// This function iteratively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, SDNode *Root, SmallPtrSetImpl &Visited, @@ -2082,30 +2128,36 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // The Use may be -1 (unassigned) if it is a newly allocated node. This can // happen because we scan down to newly selected nodes in the case of glue // uses. - if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) - return false; + std::vector WorkList; + WorkList.push_back(Use); - // Don't revisit nodes if we already scanned it and didn't fail, we know we - // won't fail if we scan it again. - if (!Visited.insert(Use).second) - return false; + while (!WorkList.empty()) { + Use = WorkList.back(); + WorkList.pop_back(); + if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) + continue; - for (const SDValue &Op : Use->op_values()) { - // Ignore chain uses, they are validated by HandleMergeInputChains. - if (Op.getValueType() == MVT::Other && IgnoreChains) + // Don't revisit nodes if we already scanned it and didn't fail, we know we + // won't fail if we scan it again. + if (!Visited.insert(Use).second) continue; - SDNode *N = Op.getNode(); - if (N == Def) { - if (Use == ImmedUse || Use == Root) - continue; // We are not looking for immediate use. - assert(N != Root); - return true; - } + for (const SDValue &Op : Use->op_values()) { + // Ignore chain uses, they are validated by HandleMergeInputChains. + if (Op.getValueType() == MVT::Other && IgnoreChains) + continue; + + SDNode *N = Op.getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } - // Traverse up the operand chain. - if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) - return true; + // Traverse up the operand chain. + WorkList.push_back(N); + } } return false; } diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 2764688518c2b..11561dfa59474 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index c0a5041b13952..5d78bba86d73b 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/CallingConv.h" @@ -110,8 +110,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, Builder.FuncInfo.StatepointStackSlots.size() && "Broken invariant"); - StatepointMaxSlotsRequired = std::max( - StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size()); + StatepointMaxSlotsRequired.updateMax( + Builder.FuncInfo.StatepointStackSlots.size()); return SpillSlot; } @@ -818,7 +818,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, SI.GCTransitionArgs = ArrayRef(ISP.gc_args_begin(), ISP.gc_args_end()); SI.ID = ISP.getID(); - SI.DeoptState = ArrayRef(ISP.vm_state_begin(), ISP.vm_state_end()); + SI.DeoptState = ArrayRef(ISP.deopt_begin(), ISP.deopt_end()); SI.StatepointFlags = ISP.getFlags(); SI.NumPatchBytes = ISP.getNumPatchBytes(); SI.EHPadBB = EHPadBB; @@ -840,7 +840,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, // completely and make statepoint call to return a tuple. unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy); + DAG.getDataLayout(), Reg, RetTy, true); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); diff --git a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/TargetLowering.cpp index befbd80d7965b..8652df7bbd706 100644 --- a/interpreter/llvm/src/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -365,10 +365,10 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, // If this is a 'not' op, don't touch it because that's a canonical form. const APInt &C = Op1C->getAPIntValue(); - if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue()) + if (Opcode == ISD::XOR && Demanded.isSubsetOf(C)) return false; - if (C.intersects(~Demanded)) { + if (!C.isSubsetOf(Demanded)) { EVT VT = Op.getValueType(); SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); @@ -603,11 +603,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask, Known2, TLO, Depth+1)) return true; - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. @@ -633,11 +633,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::OR: if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask, Known2, TLO, Depth+1)) return true; - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. @@ -660,10 +660,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::XOR: { if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1)) return true; - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. @@ -725,8 +725,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return true; if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) @@ -741,8 +741,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return true; if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) @@ -907,7 +907,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Compute the new bits that are at the top now. if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); @@ -919,7 +919,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (NewMask == 1) + if (NewMask.isOneValue()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -947,7 +947,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); @@ -1029,7 +1029,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -1084,7 +1084,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); Known.Zero |= NewBits; break; @@ -1134,7 +1134,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); break; } @@ -1193,7 +1193,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -1205,7 +1205,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero |= ~InMask; break; @@ -1349,7 +1349,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { case UndefinedBooleanContent: return CVal[0]; case ZeroOrOneBooleanContent: - return CVal == 1; + return CVal.isOneValue(); case ZeroOrNegativeOneBooleanContent: return CVal.isAllOnesValue(); } @@ -1493,8 +1493,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS and fold constant comparisons. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); if (isa(N0.getNode()) && (DCI.isBeforeLegalizeOps() || @@ -1507,7 +1506,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. - if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && N0.getOperand(1).getOpcode() == ISD::Constant) { const APInt &ShAmt @@ -1638,14 +1637,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), TopSetCC.getOperand(1), InvCond); - } } } - // If the LHS is '(and load, const)', the RHS is 0, - // the test is for equality or unsigned, and all 1 bits of the const are - // in the same partial word, see if we can shorten the load. + // If the LHS is '(and load, const)', the RHS is 0, the test is for + // equality or unsigned, and all 1 bits of the const are in the same + // partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && @@ -1668,11 +1666,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, for (unsigned width = origWidth / 2; width>=8; width /= 2) { APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offsetisNullValue() || N1C->getAPIntValue() == 1) && + } else if ((N1C->isNullValue() || N1C->isOne()) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. @@ -1807,7 +1807,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa(N0.getOperand(1)) && - cast(N0.getOperand(1))->getAPIntValue() == 1) { + cast(N0.getOperand(1))->isOne()) { // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We // can only do this if the top bits are known zero. unsigned BitWidth = N0.getValueSizeInBits(); @@ -1816,9 +1816,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, BitWidth-1))) { // Okay, get the un-inverted input value. SDValue Val; - if (N0.getOpcode() == ISD::XOR) + if (N0.getOpcode() == ISD::XOR) { Val = N0.getOperand(0); - else { + } else { assert(N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR); // ((X^1)&1)^1 -> X & 1 @@ -1830,7 +1830,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } - } else if (N1C->getAPIntValue() == 1 && + } else if (N1C->isOne() && (VT == MVT::i1 || getBooleanContents(N0->getValueType(0)) == ZeroOrOneBooleanContent)) { @@ -1848,7 +1848,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (Op0.getOpcode() == ISD::AND && isa(Op0.getOperand(1)) && - cast(Op0.getOperand(1))->getAPIntValue() == 1) { + cast(Op0.getOperand(1))->isOne()) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -1883,7 +1883,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { - if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true + // X >= MIN --> true + if (C1 == MinVal) + return DAG.getConstant(1, dl, VT); + // X >= C0 --> X > (C0 - 1) APInt C = C1 - 1; ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; @@ -1898,7 +1901,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { - if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true + // X <= MAX --> true + if (C1 == MaxVal) + return DAG.getConstant(1, dl, VT); + // X <= C0 --> X < (C0 + 1) APInt C = C1 + 1; ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; @@ -2160,7 +2166,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); if (N0.getOperand(1) == N1.getOperand(1)) return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); - if (DAG.isCommutativeBinOp(N0.getOpcode())) { + if (isCommutativeBinOp(N0.getOpcode())) { // If X op Y == Y op X, try other combinations. if (N0.getOperand(0) == N1.getOperand(1)) return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), @@ -2224,7 +2230,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(1), DAG.getConstant(0, dl, N0.getValueType()), Cond); if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) + if (isCommutativeBinOp(N0.getOpcode())) return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(0, dl, N0.getValueType()), Cond); @@ -2251,7 +2257,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N1.getOperand(1), DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getOperand(1) == N0) { - if (DAG.isCommutativeBinOp(N1.getOpcode())) + if (isCommutativeBinOp(N1.getOpcode())) return DAG.getSetCC(dl, VT, N1.getOperand(0), DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getNode()->hasOneUse()) { @@ -2476,7 +2482,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // gcc prints these as sign extended. Sign extend value to 64 bits // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. - Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(C), MVT::i64)); } return; diff --git a/interpreter/llvm/src/lib/CodeGen/ShadowStackGCLowering.cpp b/interpreter/llvm/src/lib/CodeGen/ShadowStackGCLowering.cpp index ff7d205c1f4ce..7b60d22c7ace6 100644 --- a/interpreter/llvm/src/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ShadowStackGCLowering.cpp @@ -16,9 +16,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -27,7 +27,7 @@ using namespace llvm; -#define DEBUG_TYPE "shadowstackgclowering" +#define DEBUG_TYPE "shadow-stack-gc-lowering" namespace { @@ -66,10 +66,10 @@ class ShadowStackGCLowering : public FunctionPass { }; } -INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering", +INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) -INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering", +INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); } diff --git a/interpreter/llvm/src/lib/CodeGen/ShrinkWrap.cpp b/interpreter/llvm/src/lib/CodeGen/ShrinkWrap.cpp index 4837495777da2..aa75f5e2caa23 100644 --- a/interpreter/llvm/src/lib/CodeGen/ShrinkWrap.cpp +++ b/interpreter/llvm/src/lib/CodeGen/ShrinkWrap.cpp @@ -210,13 +210,12 @@ class ShrinkWrap : public MachineFunctionPass { char ShrinkWrap::ID = 0; char &llvm::ShrinkWrapID = ShrinkWrap::ID; -INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, - false) +INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) +INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { @@ -282,8 +281,14 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, if (!Restore) Restore = &MBB; - else + else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it + // means the block never returns. If that's the + // case, we don't want to call + // `findNearestCommonDominator`, which will + // return `Restore`. Restore = MPDT->findNearestCommonDominator(Restore, &MBB); + else + Restore = nullptr; // Abort, we can't find a restore point in this case. // Make sure we would be able to insert the restore code before the // terminator. @@ -293,7 +298,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { - Restore = nullptr; + Restore = nullptr; // Abort, we can't find a restore point in this case. break; } // Look for a restore point that post-dominates all the successors. @@ -419,7 +424,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF, } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (MF.empty() || !isShrinkWrapEnabled(MF)) + if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); diff --git a/interpreter/llvm/src/lib/CodeGen/SjLjEHPrepare.cpp b/interpreter/llvm/src/lib/CodeGen/SjLjEHPrepare.cpp index e9eff4d0acb21..17a3a84ecda57 100644 --- a/interpreter/llvm/src/lib/CodeGen/SjLjEHPrepare.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SjLjEHPrepare.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -74,7 +74,7 @@ class SjLjEHPrepare : public FunctionPass { } // end anonymous namespace char SjLjEHPrepare::ID = 0; -INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions", +INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions", false, false) // Public Interface To the SjLjEHPrepare pass. @@ -125,8 +125,11 @@ static void MarkBlocksLiveIn(BasicBlock *BB, if (!LiveBBs.insert(BB).second) return; // already been here. - for (BasicBlock *PredBB : predecessors(BB)) - MarkBlocksLiveIn(PredBB, LiveBBs); + df_iterator_default_set Visited; + + for (BasicBlock *B : inverse_depth_first_ext(BB, Visited)) + LiveBBs.insert(B); + } /// substituteLPadValues - Substitute the values returned by the landingpad diff --git a/interpreter/llvm/src/lib/CodeGen/SlotIndexes.cpp b/interpreter/llvm/src/lib/CodeGen/SlotIndexes.cpp index bc2a1d09056bd..3656832a7f1a8 100644 --- a/interpreter/llvm/src/lib/CodeGen/SlotIndexes.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SlotIndexes.cpp @@ -19,7 +19,7 @@ using namespace llvm; #define DEBUG_TYPE "slotindexes" char SlotIndexes::ID = 0; -INITIALIZE_PASS(SlotIndexes, "slotindexes", +INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE, "Slot index numbering", false, false) STATISTIC(NumLocalRenum, "Number of local renumberings"); diff --git a/interpreter/llvm/src/lib/CodeGen/SpillPlacement.cpp b/interpreter/llvm/src/lib/CodeGen/SpillPlacement.cpp index f10c98ef4e508..0abe1c47da55a 100644 --- a/interpreter/llvm/src/lib/CodeGen/SpillPlacement.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SpillPlacement.cpp @@ -40,14 +40,14 @@ using namespace llvm; -#define DEBUG_TYPE "spillplacement" +#define DEBUG_TYPE "spill-code-placement" char SpillPlacement::ID = 0; -INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", +INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(EdgeBundles) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement", +INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) char &llvm::SpillPlacementID = SpillPlacement::ID; @@ -310,7 +310,7 @@ void SpillPlacement::addLinks(ArrayRef Links) { bool SpillPlacement::scanActiveBundles() { RecentPositive.clear(); - for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + for (unsigned n : ActiveNodes->set_bits()) { update(n); // A node that must spill, or a node without any links is not going to // change its value ever again, so exclude it from iterations. @@ -365,7 +365,7 @@ SpillPlacement::finish() { // Write preferences back to ActiveNodes. bool Perfect = true; - for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) + for (unsigned n : ActiveNodes->set_bits()) if (!nodes[n].preferReg()) { ActiveNodes->reset(n); Perfect = false; diff --git a/interpreter/llvm/src/lib/CodeGen/SplitKit.cpp b/interpreter/llvm/src/lib/CodeGen/SplitKit.cpp index 3a50aaa69985d..323045fd2aaae 100644 --- a/interpreter/llvm/src/lib/CodeGen/SplitKit.cpp +++ b/interpreter/llvm/src/lib/CodeGen/SplitKit.cpp @@ -53,10 +53,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, std::pair &LIP = LastInsertPoint[Num]; SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB); - SmallVector EHPadSucessors; + SmallVector EHPadSuccessors; for (const MachineBasicBlock *SMBB : MBB.successors()) if (SMBB->isEHPad()) - EHPadSucessors.push_back(SMBB); + EHPadSuccessors.push_back(SMBB); // Compute insert points on the first call. The pair is independent of the // current live interval. @@ -68,7 +68,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, LIP.first = LIS.getInstructionIndex(*FirstTerm); // If there is a landing pad successor, also find the call instruction. - if (EHPadSucessors.empty()) + if (EHPadSuccessors.empty()) return LIP.first; // There may not be a call instruction (?) in which case we ignore LPad. LIP.second = LIP.first; @@ -87,7 +87,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, if (!LIP.second) return LIP.first; - if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) { + if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) { return LIS.isLiveInToMBB(CurLI, EHPad); })) return LIP.first; @@ -569,8 +569,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, // Greedy heuristic: Keep iterating keeping the best covering subreg index // each time. - LaneBitmask LanesLeft = - LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover)); + LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx)); while (LanesLeft.any()) { unsigned BestIdx = 0; int BestCover = INT_MIN; diff --git a/interpreter/llvm/src/lib/CodeGen/StackColoring.cpp b/interpreter/llvm/src/lib/CodeGen/StackColoring.cpp index f51d959a089aa..e5fc5402cb41b 100644 --- a/interpreter/llvm/src/lib/CodeGen/StackColoring.cpp +++ b/interpreter/llvm/src/lib/CodeGen/StackColoring.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" @@ -53,7 +54,7 @@ using namespace llvm; -#define DEBUG_TYPE "stackcoloring" +#define DEBUG_TYPE "stack-coloring" static cl::opt DisableColoring("no-stack-coloring", @@ -86,10 +87,134 @@ STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// +// +// Stack Coloring reduces stack usage by merging stack slots when they +// can't be used together. For example, consider the following C program: +// +// void bar(char *, int); +// void foo(bool var) { +// A: { +// char z[4096]; +// bar(z, 0); +// } +// +// char *p; +// char x[4096]; +// char y[4096]; +// if (var) { +// p = x; +// } else { +// bar(y, 1); +// p = y + 1024; +// } +// B: +// bar(p, 2); +// } +// +// Naively-compiled, this program would use 12k of stack space. However, the +// stack slot corresponding to `z` is always destroyed before either of the +// stack slots for `x` or `y` are used, and then `x` is only used if `var` +// is true, while `y` is only used if `var` is false. So in no time are 2 +// of the stack slots used together, and therefore we can merge them, +// compiling the function using only a single 4k alloca: +// +// void foo(bool var) { // equivalent +// char x[4096]; +// char *p; +// bar(x, 0); +// if (var) { +// p = x; +// } else { +// bar(x, 1); +// p = x + 1024; +// } +// bar(p, 2); +// } +// +// This is an important optimization if we want stack space to be under +// control in large functions, both open-coded ones and ones created by +// inlining. // // Implementation Notes: // --------------------- // +// An important part of the above reasoning is that `z` can't be accessed +// while the latter 2 calls to `bar` are running. This is justified because +// `z`'s lifetime is over after we exit from block `A:`, so any further +// accesses to it would be UB. The way we represent this information +// in LLVM is by having frontends delimit blocks with `lifetime.start` +// and `lifetime.end` intrinsics. +// +// The effect of these intrinsics seems to be as follows (maybe I should +// specify this in the reference?): +// +// L1) at start, each stack-slot is marked as *out-of-scope*, unless no +// lifetime intrinsic refers to that stack slot, in which case +// it is marked as *in-scope*. +// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and +// the stack slot is overwritten with `undef`. +// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*. +// L4) on function exit, all stack slots are marked as *out-of-scope*. +// L5) `lifetime.end` is a no-op when called on a slot that is already +// *out-of-scope*. +// L6) memory accesses to *out-of-scope* stack slots are UB. +// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it +// are invalidated, unless the slot is "degenerate". This is used to +// justify not marking slots as in-use until the pointer to them is +// used, but feels a bit hacky in the presence of things like LICM. See +// the "Degenerate Slots" section for more details. +// +// Now, let's ground stack coloring on these rules. We'll define a slot +// as *in-use* at a (dynamic) point in execution if it either can be +// written to at that point, or if it has a live and non-undef content +// at that point. +// +// Obviously, slots that are never *in-use* together can be merged, and +// in our example `foo`, the slots for `x`, `y` and `z` are never +// in-use together (of course, sometimes slots that *are* in-use together +// might still be mergable, but we don't care about that here). +// +// In this implementation, we successively merge pairs of slots that are +// not *in-use* together. We could be smarter - for example, we could merge +// a single large slot with 2 small slots, or we could construct the +// interference graph and run a "smart" graph coloring algorithm, but with +// that aside, how do we find out whether a pair of slots might be *in-use* +// together? +// +// From our rules, we see that *out-of-scope* slots are never *in-use*, +// and from (L7) we see that "non-degenerate" slots remain non-*in-use* +// until their address is taken. Therefore, we can approximate slot activity +// using dataflow. +// +// A subtle point: naively, we might try to figure out which pairs of +// stack-slots interfere by propagating `S in-use` through the CFG for every +// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in +// which they are both *in-use*. +// +// That is sound, but overly conservative in some cases: in our (artificial) +// example `foo`, either `x` or `y` might be in use at the label `B:`, but +// as `x` is only in use if we came in from the `var` edge and `y` only +// if we came from the `!var` edge, they still can't be in use together. +// See PR32488 for an important real-life case. +// +// If we wanted to find all points of interference precisely, we could +// propagate `S in-use` and `S&T in-use` predicates through the CFG. That +// would be precise, but requires propagating `O(n^2)` dataflow facts. +// +// However, we aren't interested in the *set* of points of interference +// between 2 stack slots, only *whether* there *is* such a point. So we +// can rely on a little trick: for `S` and `T` to be in-use together, +// one of them needs to become in-use while the other is in-use (or +// they might both become in use simultaneously). We can check this +// by also keeping track of the points at which a stack slot might *start* +// being in-use. +// +// Exact first use: +// ---------------- +// // Consider the following motivating example: // // int foo() { @@ -158,6 +283,9 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 // byte stack (better). // +// Degenerate Slots: +// ----------------- +// // Relying entirely on first-use of stack slots is problematic, // however, due to the fact that optimizations can sometimes migrate // uses of a variable outside of its lifetime start/end region. Here @@ -237,10 +365,6 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // for "b" then it will appear that 'b' has a degenerate lifetime. // -//===----------------------------------------------------------------------===// -// StackColoring Pass -//===----------------------------------------------------------------------===// - namespace { /// StackColoring - A machine pass for merging disjoint stack allocations, /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. @@ -271,8 +395,11 @@ class StackColoring : public MachineFunctionPass { /// Maps basic blocks to a serial number. SmallVector BasicBlockNumbering; - /// Maps liveness intervals for each slot. + /// Maps slots to their use interval. Outside of this interval, slots + /// values are either dead or `undef` and they will not be written to. SmallVector, 16> Intervals; + /// Maps slots to the points where they can become in-use. + SmallVector, 16> LiveStarts; /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; /// SlotIndex analysis object. @@ -371,12 +498,12 @@ class StackColoring : public MachineFunctionPass { char StackColoring::ID = 0; char &llvm::StackColoringID = StackColoring::ID; -INITIALIZE_PASS_BEGIN(StackColoring, - "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, + "Merge disjoint stack slots", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(StackColoring, - "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE, + "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -672,15 +799,22 @@ void StackColoring::calculateLocalLiveness() void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SmallVector Starts; - SmallVector Finishes; + SmallVector DefinitelyInUse; // For each block, find which slots are active within this block // and update the live intervals. for (const MachineBasicBlock &MBB : *MF) { Starts.clear(); Starts.resize(NumSlots); - Finishes.clear(); - Finishes.resize(NumSlots); + DefinitelyInUse.clear(); + DefinitelyInUse.resize(NumSlots); + + // Start the interval of the slots that we previously found to be 'in-use'. + BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(&MBB); + } // Create the interval for the basic blocks containing lifetime begin/end. for (const MachineInstr &MI : MBB) { @@ -692,68 +826,35 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); for (auto Slot : slots) { if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + // If a slot is already definitely in use, we don't have to emit + // a new start marker because there is already a pre-existing + // one. + if (!DefinitelyInUse[Slot]) { + LiveStarts[Slot].push_back(ThisIndex); + DefinitelyInUse[Slot] = true; + } + if (!Starts[Slot].isValid()) Starts[Slot] = ThisIndex; } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + if (Starts[Slot].isValid()) { + VNInfo *VNI = Intervals[Slot]->getValNumInfo(0); + Intervals[Slot]->addSegment( + LiveInterval::Segment(Starts[Slot], ThisIndex, VNI)); + Starts[Slot] = SlotIndex(); // Invalidate the start index + DefinitelyInUse[Slot] = false; + } } } } - // Create the interval of the blocks that we previously found to be 'alive'. - BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; - for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; - pos = MBBLiveness.LiveIn.find_next(pos)) { - Starts[pos] = Indexes->getMBBStartIdx(&MBB); - } - for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; - pos = MBBLiveness.LiveOut.find_next(pos)) { - Finishes[pos] = Indexes->getMBBEndIdx(&MBB); - } - + // Finish up started segments for (unsigned i = 0; i < NumSlots; ++i) { - // - // When LifetimeStartOnFirstUse is turned on, data flow analysis - // is forward (from starts to ends), not bidirectional. A - // consequence of this is that we can wind up in situations - // where Starts[i] is invalid but Finishes[i] is valid and vice - // versa. Example: - // - // LIFETIME_START x - // if (...) { - // - // throw ...; - // } - // LIFETIME_END x - // return 2; - // - // - // Here the slot for "x" will not be live into the block - // containing the "return 2" (since lifetimes start with first - // use, not at the dominating LIFETIME_START marker). - // - if (Starts[i].isValid() && !Finishes[i].isValid()) { - Finishes[i] = Indexes->getMBBEndIdx(&MBB); - } if (!Starts[i].isValid()) continue; - assert(Starts[i] && Finishes[i] && "Invalid interval"); - VNInfo *ValNum = Intervals[i]->getValNumInfo(0); - SlotIndex S = Starts[i]; - SlotIndex F = Finishes[i]; - if (S < F) { - // We have a single consecutive region. - Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); - } else { - // We have two non-consecutive regions. This happens when - // LIFETIME_START appears after the LIFETIME_END marker. - SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); - SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); - Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); - Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); - } + SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB); + VNInfo *VNI = Intervals[i]->getValNumInfo(0); + Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI)); } } } @@ -789,6 +890,10 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { // Keep a list of *allocas* which need to be remapped. DenseMap Allocas; + + // Keep a list of allocas which has been affected by the remap. + SmallPtrSet MergedAllocas; + for (const std::pair &SI : SlotRemap) { const AllocaInst *From = MFI->getObjectAllocation(SI.first); const AllocaInst *To = MFI->getObjectAllocation(SI.second); @@ -808,6 +913,10 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { Inst = Cast; } + // We keep both slots to maintain AliasAnalysis metadata later. + MergedAllocas.insert(From); + MergedAllocas.insert(To); + // Allow the stack protector to adjust its value map to account for the // upcoming replacement. SP->adjustForColoring(From, To); @@ -839,13 +948,6 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { // Update the MachineMemOperand to use the new alloca. for (MachineMemOperand *MMO : I.memoperands()) { - // FIXME: In order to enable the use of TBAA when using AA in CodeGen, - // we'll also need to update the TBAA nodes in MMOs with values - // derived from the merged allocas. When doing this, we'll need to use - // the same variant of GetUnderlyingObjects that is used by the - // instruction scheduler (that can look through ptrtoint/inttoptr - // pairs). - // We've replaced IR-level uses of the remapped allocas, so we only // need to replace direct uses here. const AllocaInst *AI = dyn_cast_or_null(MMO->getValue()); @@ -897,6 +999,48 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { MO.setIndex(ToSlot); FixedInstr++; } + + // We adjust AliasAnalysis information for merged stack slots. + MachineSDNode::mmo_iterator NewMemOps = + MF->allocateMemRefsArray(I.getNumMemOperands()); + unsigned MemOpIdx = 0; + bool ReplaceMemOps = false; + for (MachineMemOperand *MMO : I.memoperands()) { + // If this memory location can be a slot remapped here, + // we remove AA information. + bool MayHaveConflictingAAMD = false; + if (MMO->getAAInfo()) { + if (const Value *MMOV = MMO->getValue()) { + SmallVector Objs; + getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout()); + + if (Objs.empty()) + MayHaveConflictingAAMD = true; + else + for (Value *V : Objs) { + // If this memory location comes from a known stack slot + // that is not remapped, we continue checking. + // Otherwise, we need to invalidate AA infomation. + const AllocaInst *AI = dyn_cast_or_null(V); + if (AI && MergedAllocas.count(AI)) { + MayHaveConflictingAAMD = true; + break; + } + } + } + } + if (MayHaveConflictingAAMD) { + NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes()); + ReplaceMemOps = true; + } + else + NewMemOps[MemOpIdx++] = MMO; + } + + // If any memory operand is updated, set memory references of + // this instruction. + if (ReplaceMemOps) + I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands())); } // Update the location of C++ catch objects for the MSVC personality routine. @@ -983,6 +1127,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { BasicBlockNumbering.clear(); Markers.clear(); Intervals.clear(); + LiveStarts.clear(); VNInfoAllocator.Reset(); unsigned NumSlots = MFI->getObjectIndexEnd(); @@ -994,6 +1139,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { SmallVector SortedSlots; SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); + LiveStarts.resize(NumSlots); unsigned NumMarkers = collectMarkers(NumSlots); @@ -1065,6 +1211,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); }); + for (auto &s : LiveStarts) + std::sort(s.begin(), s.end()); + bool Changed = true; while (Changed) { Changed = false; @@ -1080,12 +1229,22 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int SecondSlot = SortedSlots[J]; LiveInterval *First = &*Intervals[FirstSlot]; LiveInterval *Second = &*Intervals[SecondSlot]; + auto &FirstS = LiveStarts[FirstSlot]; + auto &SecondS = LiveStarts[SecondSlot]; assert (!First->empty() && !Second->empty() && "Found an empty range"); - // Merge disjoint slots. - if (!First->overlaps(*Second)) { + // Merge disjoint slots. This is a little bit tricky - see the + // Implementation Notes section for an explanation. + if (!First->isLiveAtIndexes(SecondS) && + !Second->isLiveAtIndexes(FirstS)) { Changed = true; First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); + + int OldSize = FirstS.size(); + FirstS.append(SecondS.begin(), SecondS.end()); + auto Mid = FirstS.begin() + OldSize; + std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); + SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"< EnableSelectionDAGSP("enable-selectiondag-sp", cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors", - false, true) -FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { - return new StackProtector(TM); -} +INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE, + "Insert stack protectors", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE, + "Insert stack protectors", false, true) + +FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); } StackProtector::SSPLayoutKind StackProtector::getSSPLayout(const AllocaInst *AI) const { @@ -91,12 +95,19 @@ void StackProtector::adjustForColoring(const AllocaInst *From, } } +void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); +} + bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; + TM = &getAnalysis().getTM(); + Trip = TM->getTargetTriple(); TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); HasPrologue = false; HasIRCheck = false; diff --git a/interpreter/llvm/src/lib/CodeGen/StackSlotColoring.cpp b/interpreter/llvm/src/lib/CodeGen/StackSlotColoring.cpp index 234b2043a6a14..856bca19dee85 100644 --- a/interpreter/llvm/src/lib/CodeGen/StackSlotColoring.cpp +++ b/interpreter/llvm/src/lib/CodeGen/StackSlotColoring.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -32,7 +32,7 @@ #include using namespace llvm; -#define DEBUG_TYPE "stackslotcoloring" +#define DEBUG_TYPE "stack-slot-coloring" static cl::opt DisableSharing("no-stack-slot-sharing", @@ -116,12 +116,12 @@ namespace { char StackSlotColoring::ID = 0; char &llvm::StackSlotColoringID = StackSlotColoring::ID; -INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", +INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", +INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) namespace { diff --git a/interpreter/llvm/src/lib/CodeGen/TailDuplication.cpp b/interpreter/llvm/src/lib/CodeGen/TailDuplication.cpp index e2377d89497de..489a607eb1764 100644 --- a/interpreter/llvm/src/lib/CodeGen/TailDuplication.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TailDuplication.cpp @@ -1,4 +1,4 @@ -//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// +//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" + using namespace llvm; #define DEBUG_TYPE "tailduplication" namespace { + /// Perform tail duplication. Delegates to TailDuplicator class TailDuplicatePass : public MachineFunctionPass { TailDuplicator Duplicator; public: static char ID; + explicit TailDuplicatePass() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -35,13 +38,13 @@ class TailDuplicatePass : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override; }; +} // end anonymous namespace + char TailDuplicatePass::ID = 0; -} char &llvm::TailDuplicateID = TailDuplicatePass::ID; -INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false, - false) +INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) diff --git a/interpreter/llvm/src/lib/CodeGen/TailDuplicator.cpp b/interpreter/llvm/src/lib/CodeGen/TailDuplicator.cpp index d2414200e9d57..dc7265dcf6c24 100644 --- a/interpreter/llvm/src/lib/CodeGen/TailDuplicator.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TailDuplicator.cpp @@ -1,4 +1,4 @@ -//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===// +//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,36 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "tailduplication" @@ -41,15 +55,13 @@ STATISTIC(NumTailDupRemoved, STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumAddedPHIs, "Number of phis added"); -namespace llvm { - // Heuristic for tail duplication. static cl::opt TailDuplicateSize( "tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden); -cl::opt TailDupIndirectBranchSize( +static cl::opt TailDupIndirectBranchSize( "tail-dup-indirect-size", cl::desc("Maximum instructions to consider tail duplicating blocks that " "end with indirect branches."), cl::init(20), @@ -138,7 +150,7 @@ bool TailDuplicator::tailDuplicateAndUpdate( bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds, - llvm::function_ref *RemovalCallback) { + function_ref *RemovalCallback) { // Save the successors list. SmallSetVector Succs(MBB->succ_begin(), MBB->succ_end()); @@ -749,7 +761,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, if (PredBB->succ_size() > 1) return false; - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; @@ -832,7 +844,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, appendCopies(PredBB, CopyInfos, Copies); // Simplify - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); @@ -971,7 +983,7 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB, /// the CFG. void TailDuplicator::removeDeadBlock( MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback) { + function_ref *RemovalCallback) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); @@ -985,5 +997,3 @@ void TailDuplicator::removeDeadBlock( // Remove the block. MBB->eraseFromParent(); } - -} // End llvm namespace diff --git a/interpreter/llvm/src/lib/CodeGen/TargetFrameLoweringImpl.cpp b/interpreter/llvm/src/lib/CodeGen/TargetFrameLoweringImpl.cpp index e5def6752e071..9dd98b4020d25 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -1,4 +1,4 @@ -//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// +//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==// // // The LLVM Compiler Infrastructure // @@ -14,19 +14,21 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include + using namespace llvm; -TargetFrameLowering::~TargetFrameLowering() { -} +TargetFrameLowering::~TargetFrameLowering() = default; /// The default implementation just looks at attribute "no-frame-pointer-elim". bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { diff --git a/interpreter/llvm/src/lib/CodeGen/TargetLoweringBase.cpp b/interpreter/llvm/src/lib/CodeGen/TargetLoweringBase.cpp index 39aa946fa840d..3914ee5147122 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetLoweringBase.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetLoweringBase.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -34,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -374,11 +374,36 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memcpy_element_unordered_atomic_1"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memcpy_element_unordered_atomic_2"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memcpy_element_unordered_atomic_4"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memcpy_element_unordered_atomic_8"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memcpy_element_unordered_atomic_16"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memmove_element_unordered_atomic_1"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memmove_element_unordered_atomic_2"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memmove_element_unordered_atomic_4"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memmove_element_unordered_atomic_8"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memmove_element_unordered_atomic_16"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memset_element_unordered_atomic_1"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memset_element_unordered_atomic_2"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memset_element_unordered_atomic_4"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memset_element_unordered_atomic_8"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memset_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -781,22 +806,55 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { return UNKNOWN_LIBCALL; } -RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) { +RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { switch (ElementSize) { case 1: - return MEMCPY_ELEMENT_ATOMIC_1; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; case 2: - return MEMCPY_ELEMENT_ATOMIC_2; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; case 4: - return MEMCPY_ELEMENT_ATOMIC_4; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; case 8: - return MEMCPY_ELEMENT_ATOMIC_8; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; case 16: - return MEMCPY_ELEMENT_ATOMIC_16; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; default: return UNKNOWN_LIBCALL; } +} +RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + +RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } } /// InitCmpLibcallCCs - Set default comparison libcall CC. @@ -842,9 +900,10 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { initActions(); // Perform these initializations only once. - MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; - MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize - = MaxStoresPerMemmoveOptSize = 4; + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = + MaxLoadsPerMemcmp = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = + MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; HasMultipleConditionRegisters = false; @@ -926,6 +985,7 @@ void TargetLoweringBase::initActions() { // ADDCARRY operations default to expand setOperationAction(ISD::ADDCARRY, VT, Expand); setOperationAction(ISD::SUBCARRY, VT, Expand); + setOperationAction(ISD::SETCCCARRY, VT, Expand); // These default to Expand so they will be expanded to CTLZ/CTTZ by default. setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -935,6 +995,7 @@ void TargetLoweringBase::initActions() { // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); // These operations default to expand for vector types. if (VT.isVector()) { @@ -1312,7 +1373,7 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, // Find the first legal register class with the largest spill size. const TargetRegisterClass *BestRC = RC; - for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + for (unsigned i : SuperRegRC.set_bits()) { const TargetRegisterClass *SuperRC = TRI->getRegClass(i); // We want the largest possible spill size. if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC)) @@ -1453,6 +1514,7 @@ void TargetLoweringBase::computeRegisterProperties( } if (IsLegalWiderType) break; + LLVM_FALLTHROUGH; } case TypeWidenVector: { // Try to widen the vector. @@ -1470,6 +1532,7 @@ void TargetLoweringBase::computeRegisterProperties( } if (IsLegalWiderType) break; + LLVM_FALLTHROUGH; } case TypeSplitVector: case TypeScalarizeVector: { @@ -1632,8 +1695,10 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr, VT = MinVT; } - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned NumParts = + TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT); + MVT PartVT = + TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); diff --git a/interpreter/llvm/src/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/interpreter/llvm/src/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 1d232c71d824a..6922e33c8d6cb 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -12,14 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -48,11 +52,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include @@ -61,10 +61,54 @@ using namespace llvm; using namespace dwarf; +static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, + StringRef &Section) { + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + + for (const auto &MFE: ModuleFlags) { + // Ignore flags with 'Require' behaviour. + if (MFE.Behavior == Module::Require) + continue; + + StringRef Key = MFE.Key->getString(); + if (Key == "Objective-C Image Info Version") { + Version = mdconst::extract(MFE.Val)->getZExtValue(); + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated" || + Key == "Objective-C Class Properties" || + Key == "Objective-C Image Swift Version") { + Flags |= mdconst::extract(MFE.Val)->getZExtValue(); + } else if (Key == "Objective-C Image Info Section") { + Section = cast(MFE.Val)->getString(); + } + } +} + //===----------------------------------------------------------------------===// // ELF //===----------------------------------------------------------------------===// +void TargetLoweringObjectFileELF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { + unsigned Version = 0; + unsigned Flags = 0; + StringRef Section; + + GetObjCImageInfo(M, Version, Flags, Section); + if (Section.empty()) + return; + + auto &C = getContext(); + auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); +} + MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( const GlobalValue *GV, const TargetMachine &TM, MachineModuleInfo *MMI) const { @@ -248,6 +292,25 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef SectionName = GO->getSection(); + // Check if '#pragma clang section' name is applicable. + // Note that pragma directive overrides -ffunction-section, -fdata-section + // and so section name is exactly as user specified and not uniqued. + const GlobalVariable *GV = dyn_cast(GO); + if (GV && GV->hasImplicitSection()) { + auto Attrs = GV->getAttributes(); + if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) { + SectionName = Attrs.getAttribute("bss-section").getValueAsString(); + } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) { + SectionName = Attrs.getAttribute("rodata-section").getValueAsString(); + } else if (Attrs.hasAttribute("data-section") && Kind.isData()) { + SectionName = Attrs.getAttribute("data-section").getValueAsString(); + } + } + const Function *F = dyn_cast(GO); + if (F && F->hasFnAttribute("implicit-section-name")) { + SectionName = F->getFnAttribute("implicit-section-name").getValueAsString(); + } + // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); @@ -556,40 +619,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } } -/// emitModuleFlags - Perform code emission for module flags. -void TargetLoweringObjectFileMachO::emitModuleFlags( - MCStreamer &Streamer, ArrayRef ModuleFlags, - const TargetMachine &TM) const { - unsigned VersionVal = 0; - unsigned ImageInfoFlags = 0; - MDNode *LinkerOptions = nullptr; - StringRef SectionVal; - - for (const auto &MFE : ModuleFlags) { - // Ignore flags with 'Require' behavior. - if (MFE.Behavior == Module::Require) - continue; - - StringRef Key = MFE.Key->getString(); - Metadata *Val = MFE.Val; - - if (Key == "Objective-C Image Info Version") { - VersionVal = mdconst::extract(Val)->getZExtValue(); - } else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated" || - Key == "Objective-C Class Properties" || - Key == "Objective-C Image Swift Version") { - ImageInfoFlags |= mdconst::extract(Val)->getZExtValue(); - } else if (Key == "Objective-C Image Info Section") { - SectionVal = cast(Val)->getString(); - } else if (Key == "Linker Options") { - LinkerOptions = cast(Val); - } - } - +void TargetLoweringObjectFileMachO::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { // Emit the linker options if present. - if (LinkerOptions) { + if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { for (const auto &Option : LinkerOptions->operands()) { SmallVector StrOptions; for (const auto &Piece : cast(Option)->operands()) @@ -598,8 +631,15 @@ void TargetLoweringObjectFileMachO::emitModuleFlags( } } + unsigned VersionVal = 0; + unsigned ImageInfoFlags = 0; + StringRef SectionVal; + + GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal); + // The section is mandatory. If we don't have it, then we don't have GC info. - if (SectionVal.empty()) return; + if (SectionVal.empty()) + return; StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; @@ -1111,18 +1151,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } -void TargetLoweringObjectFileCOFF::emitModuleFlags( - MCStreamer &Streamer, ArrayRef ModuleFlags, - const TargetMachine &TM) const { - MDNode *LinkerOptions = nullptr; - - for (const auto &MFE : ModuleFlags) { - StringRef Key = MFE.Key->getString(); - if (Key == "Linker Options") - LinkerOptions = cast(MFE.Val); - } - - if (LinkerOptions) { +void TargetLoweringObjectFileCOFF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { + if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for // linker. @@ -1137,6 +1168,24 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags( } } } + + unsigned Version = 0; + unsigned Flags = 0; + StringRef Section; + + GetObjCImageInfo(M, Version, Flags, Section); + if (Section.empty()) + return; + + auto &C = getContext(); + auto *S = C.getCOFFSection( + Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); } void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, diff --git a/interpreter/llvm/src/lib/CodeGen/TargetOptionsImpl.cpp b/interpreter/llvm/src/lib/CodeGen/TargetOptionsImpl.cpp index c20d5ab814f82..ed845e1706f8c 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetOptionsImpl.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetOptionsImpl.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/interpreter/llvm/src/lib/CodeGen/TargetPassConfig.cpp b/interpreter/llvm/src/lib/CodeGen/TargetPassConfig.cpp index 1863d177b2195..817e58ce59e10 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetPassConfig.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetPassConfig.cpp @@ -1,4 +1,4 @@ -//===-- TargetPassConfig.cpp - Target independent code generation passes --===// +//===- TargetPassConfig.cpp - Target independent code generation passes ---===// // // The LLVM Compiler Infrastructure // @@ -13,28 +13,37 @@ //===---------------------------------------------------------------------===// #include "llvm/CodeGen/TargetPassConfig.h" - +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassRegistry.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Threading.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" +#include +#include using namespace llvm; @@ -95,6 +104,16 @@ static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, static cl::opt EnableMachineOutliner("enable-machine-outliner", cl::Hidden, cl::desc("Enable machine outliner")); +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -O0. +static cl::opt +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the \"fast\" instruction selector")); + +static cl::opt + EnableGlobalISel("global-isel", cl::Hidden, + cl::desc("Enable the \"global\" instruction selector")); static cl::opt PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, @@ -214,6 +233,7 @@ char TargetPassConfig::EarlyTailDuplicateID = 0; char TargetPassConfig::PostRAMachineLICMID = 0; namespace { + struct InsertedPass { AnalysisID TargetPassID; IdentifyingPassPtr InsertedPassID; @@ -234,9 +254,11 @@ struct InsertedPass { return NP; } }; -} + +} // end anonymous namespace namespace llvm { + class PassConfigImpl { public: // List of passes explicitly substituted by this target. Normally this is @@ -252,7 +274,8 @@ class PassConfigImpl { /// is inserted after each instance of the first one. SmallVector InsertedPasses; }; -} // namespace llvm + +} // end namespace llvm // Out of line virtual method. TargetPassConfig::~TargetPassConfig() { @@ -261,12 +284,8 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. -TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false), - AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), - DisableVerify(false), EnableTailMerge(true), - RequireCodeGenSCCOrder(false) { - +TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) + : ImmutablePass(ID), PM(&pm), TM(&TM) { Impl = new PassConfigImpl(); // Register all target independent codegen passes to activate their PassIDs, @@ -282,9 +301,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&PostRAMachineLICMID, &MachineLICMID); if (StringRef(PrintMachineInstrs.getValue()).equals("")) - TM->Options.PrintMachineCode = true; + TM.Options.PrintMachineCode = true; - if (TM->Options.EnableIPRA) + if (TM.Options.EnableIPRA) setRequiresCodeGenSCCOrder(); } @@ -310,12 +329,14 @@ void TargetPassConfig::insertPass(AnalysisID TargetPassID, /// /// Targets may override this to extend TargetPassConfig. TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { - return new TargetPassConfig(this, PM); + return new TargetPassConfig(*this, PM); } TargetPassConfig::TargetPassConfig() - : ImmutablePass(ID), PM(nullptr) { - llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); + : ImmutablePass(ID) { + report_fatal_error("Trying to construct TargetPassConfig without a target " + "machine. Scheduling a CodeGen pass without a target " + "triple set?"); } // Helper to verify the analysis is really immutable. @@ -428,7 +449,12 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) { } void TargetPassConfig::addVerifyPass(const std::string &Banner) { - if (VerifyMachineCode) + bool Verify = VerifyMachineCode; +#ifdef EXPENSIVE_CHECKS + if (VerifyMachineCode == cl::BOU_UNSET) + Verify = TM->isMachineVerifierClean(); +#endif + if (Verify) PM->add(createMachineVerifierPass(Banner)); } @@ -488,6 +514,11 @@ void TargetPassConfig::addIRPasses() { // Insert calls to mcount-like functions. addPass(createCountingFunctionInserterPass()); + // Add scalarization of target's unsupported masked memory intrinsics pass. + // the unsupported intrinsic will be replaced with a chain of basic blocks, + // that stores/loads element one-by-one if the appropriate mask bit is set. + addPass(createScalarizeMaskedMemIntrinPass()); + // Expand reduction intrinsics into shuffle sequences if the target wants to. addPass(createExpandReductionsPass()); } @@ -509,14 +540,14 @@ void TargetPassConfig::addPassesToHandleExceptions() { LLVM_FALLTHROUGH; case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - addPass(createDwarfEHPass(TM)); + addPass(createDwarfEHPass()); break; case ExceptionHandling::WinEH: // We support using both GCC-style and MSVC-style exceptions on Windows, so // add both preparation passes. Each pass will only actually run if it // recognizes the personality function. - addPass(createWinEHPass(TM)); - addPass(createDwarfEHPass(TM)); + addPass(createWinEHPass()); + addPass(createDwarfEHPass()); break; case ExceptionHandling::None: addPass(createLowerInvokePass()); @@ -531,7 +562,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - addPass(createCodeGenPreparePass(TM)); + addPass(createCodeGenPreparePass()); addPass(createRewriteSymbolsPass()); } @@ -546,8 +577,8 @@ void TargetPassConfig::addISelPrepare() { // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. - addPass(createSafeStackPass(TM)); - addPass(createStackProtectorPass(TM)); + addPass(createSafeStackPass()); + addPass(createStackProtectorPass()); if (PrintISelInput) addPass(createPrintFunctionPass( @@ -559,6 +590,74 @@ void TargetPassConfig::addISelPrepare() { addPass(createVerifierPass()); } +bool TargetPassConfig::addCoreISelPasses() { + // Enable FastISel with -fast, but allow that to be overridden. + TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); + if (EnableFastISelOption == cl::BOU_TRUE || + (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())) + TM->setFastISel(true); + + // Ask the target for an isel. + // Enable GlobalISel if the target wants to, but allow that to be overriden. + if (EnableGlobalISel == cl::BOU_TRUE || + (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) { + if (addIRTranslator()) + return true; + + addPreLegalizeMachineIR(); + + if (addLegalizeMachineIR()) + return true; + + // Before running the register bank selector, ask the target if it + // wants to run some passes. + addPreRegBankSelect(); + + if (addRegBankSelect()) + return true; + + addPreGlobalInstructionSelect(); + + if (addGlobalInstructionSelect()) + return true; + + // Pass to reset the MachineFunction if the ISel failed. + addPass(createResetMachineFunctionPass( + reportDiagnosticWhenGlobalISelFallback(), isGlobalISelAbortEnabled())); + + // Provide a fallback path when we do not want to abort on + // not-yet-supported input. + if (!isGlobalISelAbortEnabled() && addInstSelector()) + return true; + + } else if (addInstSelector()) + return true; + + return false; +} + +bool TargetPassConfig::addISelPasses() { + if (TM->Options.EmulatedTLS) + addPass(createLowerEmuTLSPass()); + + addPass(createPreISelIntrinsicLoweringPass()); + addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + addIRPasses(); + addCodeGenPrepare(); + addPassesToHandleExceptions(); + addISelPrepare(); + + return addCoreISelPasses(); +} + +/// -regalloc=... command line option. +static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } +static cl::opt > +RegAlloc("regalloc", + cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use")); + /// Add the complete set of target-independent postISel code generator passes. /// /// This can be read as the standard order of major LLVM CodeGen stages. Stages @@ -610,9 +709,6 @@ void TargetPassConfig::addMachinePasses() { addPass(&LocalStackSlotAllocationID, false); } - if (getOptLevel() != CodeGenOpt::None) - addPass(&LiveRangeShrinkID); - // Run pre-ra passes. addPreRegAlloc(); @@ -620,8 +716,12 @@ void TargetPassConfig::addMachinePasses() { // including phi elimination and scheduling. if (getOptimizeRegAlloc()) addOptimizedRegAlloc(createRegAllocPass(true)); - else + else { + if (RegAlloc != &useDefaultRegisterAllocator && + RegAlloc != &createFastRegisterAllocator) + report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc."); addFastRegAlloc(createRegAllocPass(false)); + } // Run post-ra passes. addPostRegAlloc(); @@ -633,7 +733,7 @@ void TargetPassConfig::addMachinePasses() { // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only // do so if it hasn't been disabled, substituted, or overridden. if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID)) - addPass(createPrologEpilogInserterPass(TM)); + addPass(createPrologEpilogInserterPass()); /// Add passes that optimize machine instructions after register allocation. if (getOptLevel() != CodeGenOpt::None) @@ -754,19 +854,12 @@ MachinePassRegistry RegisterRegAlloc::Registry; /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. static llvm::once_flag InitializeDefaultRegisterAllocatorFlag; -static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } + static RegisterRegAlloc defaultRegAlloc("default", "pick register allocator based on -O option", useDefaultRegisterAllocator); -/// -regalloc=... command line option. -static cl::opt > -RegAlloc("regalloc", - cl::init(&useDefaultRegisterAllocator), - cl::desc("Register allocator to use")); - static void initializeDefaultRegisterAllocatorOnce() { RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); @@ -776,7 +869,6 @@ static void initializeDefaultRegisterAllocatorOnce() { } } - /// Instantiate the default register allocator pass for this target for either /// the optimized or unoptimized allocation path. This will be added to the pass /// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc diff --git a/interpreter/llvm/src/lib/CodeGen/TargetRegisterInfo.cpp b/interpreter/llvm/src/lib/CodeGen/TargetRegisterInfo.cpp index f6e4c17d514cd..eeb00a784b0d9 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===// +//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==// // // The LLVM Compiler Infrastructure // @@ -11,17 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include #define DEBUG_TYPE "target-reg-info" @@ -38,7 +48,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, CoveringLanes(SRICoveringLanes) { } -TargetRegisterInfo::~TargetRegisterInfo() {} +TargetRegisterInfo::~TargetRegisterInfo() = default; void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg) const { @@ -50,8 +60,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, ArrayRef Exceptions) const { // Check that all super registers of reserved regs are reserved as well. BitVector Checked(getNumRegs()); - for (int Reg = RegisterSet.find_first(); Reg>=0; - Reg = RegisterSet.find_next(Reg)) { + for (unsigned Reg : RegisterSet.set_bits()) { if (Checked[Reg]) continue; for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) { @@ -127,7 +136,7 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -} // End of llvm namespace +} // end namespace llvm /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. diff --git a/interpreter/llvm/src/lib/CodeGen/TargetSchedule.cpp b/interpreter/llvm/src/lib/CodeGen/TargetSchedule.cpp index 0df34ce43112a..9210ea8a83f6b 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetSchedule.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetSchedule.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -337,8 +337,8 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, } static Optional -getRTroughputFromItineraries(unsigned schedClass, - const InstrItineraryData *IID){ +getRThroughputFromItineraries(unsigned schedClass, + const InstrItineraryData *IID){ double Unknown = std::numeric_limits::infinity(); double Throughput = Unknown; @@ -356,9 +356,9 @@ getRTroughputFromItineraries(unsigned schedClass, } static Optional -getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, - const TargetSubtargetInfo *STI, - const MCSchedModel &SchedModel) { +getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, + const TargetSubtargetInfo *STI, + const MCSchedModel &SchedModel) { double Unknown = std::numeric_limits::infinity(); double Throughput = Unknown; @@ -380,11 +380,11 @@ getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, Optional TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const { if (hasInstrItineraries()) - return getRTroughputFromItineraries(MI->getDesc().getSchedClass(), - getInstrItineraries()); + return getRThroughputFromItineraries(MI->getDesc().getSchedClass(), + getInstrItineraries()); if (hasInstrSchedModel()) - return getRTroughputFromInstrSchedModel(resolveSchedClass(MI), STI, - SchedModel); + return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI, + SchedModel); return Optional(); } @@ -392,11 +392,11 @@ Optional TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const { unsigned SchedClass = TII->get(Opcode).getSchedClass(); if (hasInstrItineraries()) - return getRTroughputFromItineraries(SchedClass, getInstrItineraries()); + return getRThroughputFromItineraries(SchedClass, getInstrItineraries()); if (hasInstrSchedModel()) { const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); if (SCDesc->isValid() && !SCDesc->isVariant()) - return getRTroughputFromInstrSchedModel(SCDesc, STI, SchedModel); + return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel); } return Optional(); } diff --git a/interpreter/llvm/src/lib/CodeGen/TargetSubtargetInfo.cpp b/interpreter/llvm/src/lib/CodeGen/TargetSubtargetInfo.cpp index 0a444e0fff07e..f6d5bc80ddffb 100644 --- a/interpreter/llvm/src/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TargetSubtargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==// +//===- TargetSubtargetInfo.cpp - General Target Information ----------------==// // // The LLVM Compiler Infrastructure // @@ -11,15 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCInst.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include + using namespace llvm; -//--------------------------------------------------------------------------- -// TargetSubtargetInfo Class -// TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef FS, ArrayRef PF, ArrayRef PD, @@ -29,7 +31,7 @@ TargetSubtargetInfo::TargetSubtargetInfo( : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { } -TargetSubtargetInfo::~TargetSubtargetInfo() {} +TargetSubtargetInfo::~TargetSubtargetInfo() = default; bool TargetSubtargetInfo::enableAtomicExpand() const { return true; diff --git a/interpreter/llvm/src/lib/CodeGen/TwoAddressInstructionPass.cpp b/interpreter/llvm/src/lib/CodeGen/TwoAddressInstructionPass.cpp index 7392c83271487..83c00e24d14fc 100644 --- a/interpreter/llvm/src/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/interpreter/llvm/src/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -52,7 +52,7 @@ using namespace llvm; -#define DEBUG_TYPE "twoaddrinstr" +#define DEBUG_TYPE "twoaddressinstruction" STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); @@ -68,6 +68,13 @@ EnableRescheduling("twoaddr-reschedule", cl::desc("Coalesce copies by rescheduling (default=true)"), cl::init(true), cl::Hidden); +// Limit the number of dataflow edges to traverse when evaluating the benefit +// of commuting operands. +static cl::opt MaxDataFlowEdge( + "dataflow-edge-limit", cl::Hidden, cl::init(3), + cl::desc("Maximum number of dataflow edges to traverse when evaluating " + "the benefit of commuting operands")); + namespace { class TwoAddressInstructionPass : public MachineFunctionPass { MachineFunction *MF; @@ -171,10 +178,10 @@ class TwoAddressInstructionPass : public MachineFunctionPass { } // end anonymous namespace char TwoAddressInstructionPass::ID = 0; -INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", +INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", +INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; @@ -637,10 +644,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // To more generally minimize register copies, ideally the logic of two addr // instruction pass should be integrated with register allocation pass where // interference graph is available. - if (isRevCopyChain(regC, regA, 3)) + if (isRevCopyChain(regC, regA, MaxDataFlowEdge)) return true; - if (isRevCopyChain(regB, regA, 3)) + if (isRevCopyChain(regB, regA, MaxDataFlowEdge)) return false; // Since there are no intervening uses for both registers, then commute diff --git a/interpreter/llvm/src/lib/CodeGen/VirtRegMap.cpp b/interpreter/llvm/src/lib/CodeGen/VirtRegMap.cpp index d10ca1a7ff918..f8aacdb8649d4 100644 --- a/interpreter/llvm/src/lib/CodeGen/VirtRegMap.cpp +++ b/interpreter/llvm/src/lib/CodeGen/VirtRegMap.cpp @@ -72,6 +72,17 @@ void VirtRegMap::grow() { Virt2SplitMap.resize(NumRegs); } +void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg) && + TargetRegisterInfo::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && + "attempt to assign physical register to already mapped " + "virtual register"); + assert(!getRegInfo().isReserved(physReg) && + "Attempt to map virtReg to a reserved physReg"); + Virt2PhysMap[virtReg] = physReg; +} + unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); @@ -169,6 +180,7 @@ class VirtRegRewriter : public MachineFunctionPass { void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; void handleIdentityCopy(MachineInstr &MI) const; void expandCopyBundle(MachineInstr &MI) const; + bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const; public: static char ID; @@ -404,6 +416,32 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { } } +/// Check whether (part of) \p SuperPhysReg is live through \p MI. +/// \pre \p MI defines a subregister of a virtual register that +/// has been assigned to \p SuperPhysReg. +bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, + unsigned SuperPhysReg) const { + SlotIndex MIIndex = LIS->getInstructionIndex(MI); + SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); + SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex(); + for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) { + const LiveRange &UnitRange = LIS->getRegUnit(*Unit); + // If the regunit is live both before and after MI, + // we assume it is live through. + // Generally speaking, this is not true, because something like + // "RU = op RU" would match that description. + // However, we know that we are trying to assess whether + // a def of a virtual reg, vreg, is live at the same time of RU. + // If we are in the "RU = op RU" situation, that means that vreg + // is defined at the same time as RU (i.e., "vreg, RU = op RU"). + // Thus, vreg and RU interferes and vreg cannot be assigned to + // SuperPhysReg. Therefore, this situation cannot happen. + if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses)) + return true; + } + return false; +} + void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector SuperDeads; @@ -441,7 +479,8 @@ void VirtRegRewriter::rewrite() { // A virtual register kill refers to the whole register, so we may // have to add operands for the super-register. A // partial redef always kills and redefines the super-register. - if (MO.readsReg() && (MO.isDef() || MO.isKill())) + if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || + (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) SuperKills.push_back(PhysReg); if (MO.isDef()) { diff --git a/interpreter/llvm/src/lib/CodeGen/WinEHPrepare.cpp b/interpreter/llvm/src/lib/CodeGen/WinEHPrepare.cpp index ae07e8b2fa032..c63a0a9e60ea3 100644 --- a/interpreter/llvm/src/lib/CodeGen/WinEHPrepare.cpp +++ b/interpreter/llvm/src/lib/CodeGen/WinEHPrepare.cpp @@ -16,13 +16,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCSymbol.h" @@ -54,7 +54,7 @@ namespace { class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {} + WinEHPrepare() : FunctionPass(ID) {} bool runOnFunction(Function &Fn) override; @@ -94,12 +94,10 @@ class WinEHPrepare : public FunctionPass { } // end anonymous namespace char WinEHPrepare::ID = 0; -INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions", - false, false) +INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions", + false, false) -FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { - return new WinEHPrepare(TM); -} +FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); } bool WinEHPrepare::runOnFunction(Function &Fn) { if (!Fn.hasPersonalityFn()) diff --git a/interpreter/llvm/src/lib/CodeGen/XRayInstrumentation.cpp b/interpreter/llvm/src/lib/CodeGen/XRayInstrumentation.cpp index 2df3602733f3e..0b4c6e551667b 100644 --- a/interpreter/llvm/src/lib/CodeGen/XRayInstrumentation.cpp +++ b/interpreter/llvm/src/lib/CodeGen/XRayInstrumentation.cpp @@ -1,4 +1,4 @@ -//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===// +//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===// // // The LLVM Compiler Infrastructure // @@ -14,20 +14,26 @@ // //===---------------------------------------------------------------------===// -#include "llvm/CodeGen/Analysis.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { + struct XRayInstrumentation : public MachineFunctionPass { static char ID; @@ -66,7 +72,8 @@ struct XRayInstrumentation : public MachineFunctionPass { void prependRetWithPatchableExit(MachineFunction &MF, const TargetInstrInfo *TII); }; -} // anonymous namespace + +} // end anonymous namespace void XRayInstrumentation::replaceRetWithPatchableRet( MachineFunction &MF, const TargetInstrInfo *TII) { @@ -134,18 +141,23 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) return false; // Invalid value for threshold. + // Count the number of MachineInstr`s in MachineFunction + int64_t MICount = 0; + for (const auto& MBB : MF) + MICount += MBB.size(); + // Check if we have a loop. // FIXME: Maybe make this smarter, and see whether the loops are dependent // on inputs or side-effects? MachineLoopInfo &MLI = getAnalysis(); - if (MLI.empty() && F.size() < XRayThreshold) + if (MLI.empty() && MICount < XRayThreshold) return false; // Function is too small and has no loops. } // We look for the first non-empty MachineBasicBlock, so that we can insert // the function instrumentation in the appropriate place. - auto MBI = - find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); }); + auto MBI = llvm::find_if( + MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); }); if (MBI == MF.end()) return false; // The function is empty. diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/CMakeLists.txt b/interpreter/llvm/src/lib/DebugInfo/CodeView/CMakeLists.txt index 8d9353ae5f5e4..b94bb0c80c793 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/CMakeLists.txt @@ -2,29 +2,36 @@ add_llvm_library(LLVMDebugInfoCodeView CodeViewError.cpp CodeViewRecordIO.cpp CVSymbolVisitor.cpp - CVTypeDumper.cpp CVTypeVisitor.cpp + DebugChecksumsSubsection.cpp + DebugCrossExSubsection.cpp + DebugCrossImpSubsection.cpp + DebugFrameDataSubsection.cpp + DebugInlineeLinesSubsection.cpp + DebugLinesSubsection.cpp + DebugStringTableSubsection.cpp + DebugSubsection.cpp + DebugSubsectionRecord.cpp + DebugSubsectionVisitor.cpp + DebugSymbolRVASubsection.cpp + DebugSymbolsSubsection.cpp EnumTables.cpp Formatters.cpp + LazyRandomTypeCollection.cpp Line.cpp - ModuleDebugFileChecksumFragment.cpp - ModuleDebugFragment.cpp - ModuleDebugFragmentRecord.cpp - ModuleDebugFragmentVisitor.cpp - ModuleDebugInlineeLinesFragment.cpp - ModuleDebugLineFragment.cpp - RandomAccessTypeVisitor.cpp RecordSerialization.cpp - StringTable.cpp + StringsAndChecksums.cpp SymbolRecordMapping.cpp SymbolDumper.cpp SymbolSerializer.cpp - TypeDatabase.cpp - TypeDatabaseVisitor.cpp TypeDumpVisitor.cpp + TypeIndex.cpp + TypeIndexDiscovery.cpp + TypeName.cpp TypeRecordMapping.cpp TypeSerializer.cpp TypeStreamMerger.cpp + TypeTableCollection.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp index 4c78caf034777..e0c7ef58c3041 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp @@ -29,10 +29,8 @@ static Error visitKnownRecord(CVSymbol &Record, return Error::success(); } -Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) { - if (auto EC = Callbacks.visitSymbolBegin(Record)) - return EC; - +static Error finishVisitation(CVSymbol &Record, + SymbolVisitorCallbacks &Callbacks) { switch (Record.Type) { default: if (auto EC = Callbacks.visitUnknownSymbol(Record)) @@ -46,7 +44,7 @@ Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) { } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ SYMBOL_RECORD(EnumVal, EnumVal, AliasName) -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" } if (auto EC = Callbacks.visitSymbolEnd(Record)) @@ -55,6 +53,18 @@ Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) { return Error::success(); } +Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) { + if (auto EC = Callbacks.visitSymbolBegin(Record)) + return EC; + return finishVisitation(Record, Callbacks); +} + +Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record, uint32_t Offset) { + if (auto EC = Callbacks.visitSymbolBegin(Record, Offset)) + return EC; + return finishVisitation(Record, Callbacks); +} + Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) { for (auto I : Symbols) { if (auto EC = visitSymbolRecord(I)) @@ -62,3 +72,13 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) { } return Error::success(); } + +Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols, + uint32_t InitialOffset) { + for (auto I : Symbols) { + if (auto EC = visitSymbolRecord(I, InitialOffset)) + return EC; + InitialOffset += I.length(); + } + return Error::success(); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/CVTypeDumper.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/CVTypeDumper.cpp deleted file mode 100644 index bcc8218d94460..0000000000000 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/CVTypeDumper.cpp +++ /dev/null @@ -1,77 +0,0 @@ -//===-- CVTypeDumper.cpp - CodeView type info dumper ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" -#include "llvm/Support/BinaryByteStream.h" - -using namespace llvm; -using namespace llvm::codeview; - -Error CVTypeDumper::dump(const CVType &Record, TypeVisitorCallbacks &Dumper) { - TypeDatabaseVisitor DBV(TypeDB); - TypeDeserializer Deserializer; - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(DBV); - Pipeline.addCallbackToPipeline(Dumper); - - CVTypeVisitor Visitor(Pipeline); - if (Handler) - Visitor.addTypeServerHandler(*Handler); - - CVType RecordCopy = Record; - if (auto EC = Visitor.visitTypeRecord(RecordCopy)) - return EC; - return Error::success(); -} - -Error CVTypeDumper::dump(const CVTypeArray &Types, - TypeVisitorCallbacks &Dumper) { - TypeDatabaseVisitor DBV(TypeDB); - TypeDeserializer Deserializer; - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(DBV); - Pipeline.addCallbackToPipeline(Dumper); - - CVTypeVisitor Visitor(Pipeline); - if (Handler) - Visitor.addTypeServerHandler(*Handler); - - if (auto EC = Visitor.visitTypeStream(Types)) - return EC; - return Error::success(); -} - -Error CVTypeDumper::dump(ArrayRef Data, TypeVisitorCallbacks &Dumper) { - BinaryByteStream Stream(Data, llvm::support::little); - CVTypeArray Types; - BinaryStreamReader Reader(Stream); - if (auto EC = Reader.readArray(Types, Reader.getLength())) - return EC; - - return dump(Types, Dumper); -} - -void CVTypeDumper::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, - TypeIndex TI, TypeDatabase &DB) { - StringRef TypeName; - if (!TI.isNoneType()) - TypeName = DB.getTypeName(TI); - if (!TypeName.empty()) - Printer.printHex(FieldName, TypeName, TI.getIndex()); - else - Printer.printHex(FieldName, TI.getIndex()); -} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index b6ed0453d9c49..79b9fdefd40e5 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -9,12 +9,11 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" -#include "llvm/DebugInfo/CodeView/TypeServerHandler.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" @@ -22,8 +21,6 @@ using namespace llvm; using namespace llvm::codeview; -CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks) - : Callbacks(Callbacks) {} template static Error visitKnownRecord(CVType &Record, TypeVisitorCallbacks &Callbacks) { @@ -44,59 +41,61 @@ static Error visitKnownMember(CVMemberRecord &Record, return Error::success(); } -static Expected deserializeTypeServerRecord(CVType &Record) { - class StealTypeServerVisitor : public TypeVisitorCallbacks { - public: - explicit StealTypeServerVisitor(TypeServer2Record &TR) : TR(TR) {} - - Error visitKnownRecord(CVType &CVR, TypeServer2Record &Record) override { - TR = Record; - return Error::success(); - } +static Error visitMemberRecord(CVMemberRecord &Record, + TypeVisitorCallbacks &Callbacks) { + if (auto EC = Callbacks.visitMemberBegin(Record)) + return EC; - private: - TypeServer2Record &TR; - }; + switch (Record.Kind) { + default: + if (auto EC = Callbacks.visitUnknownMember(Record)) + return EC; + break; +#define MEMBER_RECORD(EnumName, EnumVal, Name) \ + case EnumName: { \ + if (auto EC = visitKnownMember(Record, Callbacks)) \ + return EC; \ + break; \ + } +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ + MEMBER_RECORD(EnumVal, EnumVal, AliasName) +#define TYPE_RECORD(EnumName, EnumVal, Name) +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + } - TypeServer2Record R(TypeRecordKind::TypeServer2); - TypeDeserializer Deserializer; - StealTypeServerVisitor Thief(R); - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(Thief); - CVTypeVisitor Visitor(Pipeline); - if (auto EC = Visitor.visitTypeRecord(Record)) - return std::move(EC); + if (auto EC = Callbacks.visitMemberEnd(Record)) + return EC; - return R; + return Error::success(); } -void CVTypeVisitor::addTypeServerHandler(TypeServerHandler &Handler) { - Handlers.push_back(&Handler); -} +namespace { -Expected CVTypeVisitor::handleTypeServer(CVType &Record) { - if (Record.Type == TypeLeafKind::LF_TYPESERVER2 && !Handlers.empty()) { - auto TS = deserializeTypeServerRecord(Record); - if (!TS) - return TS.takeError(); +class CVTypeVisitor { +public: + explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks); - for (auto Handler : Handlers) { - auto ExpectedResult = Handler->handle(*TS, Callbacks); - // If there was an error, return the error. - if (!ExpectedResult) - return ExpectedResult.takeError(); + Error visitTypeRecord(CVType &Record, TypeIndex Index); + Error visitTypeRecord(CVType &Record); - // If the handler processed the record, return success. - if (*ExpectedResult) - return true; + /// Visits the type records in Data. Sets the error flag on parse failures. + Error visitTypeStream(const CVTypeArray &Types); + Error visitTypeStream(CVTypeRange Types); + Error visitTypeStream(TypeCollection &Types); - // Otherwise keep searching for a handler, eventually falling out and - // using the default record handler. - } - } - return false; -} + Error visitMemberRecord(CVMemberRecord Record); + Error visitFieldListMemberStream(BinaryStreamReader &Stream); + +private: + Error finishVisitation(CVType &Record); + + /// The interface to the class that gets notified of each visitation. + TypeVisitorCallbacks &Callbacks; +}; + +CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks) + : Callbacks(Callbacks) {} Error CVTypeVisitor::finishVisitation(CVType &Record) { switch (Record.Type) { @@ -114,7 +113,7 @@ Error CVTypeVisitor::finishVisitation(CVType &Record) { TYPE_RECORD(EnumVal, EnumVal, AliasName) #define MEMBER_RECORD(EnumName, EnumVal, Name) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" } if (auto EC = Callbacks.visitTypeEnd(Record)) @@ -124,12 +123,6 @@ Error CVTypeVisitor::finishVisitation(CVType &Record) { } Error CVTypeVisitor::visitTypeRecord(CVType &Record, TypeIndex Index) { - auto ExpectedResult = handleTypeServer(Record); - if (!ExpectedResult) - return ExpectedResult.takeError(); - if (*ExpectedResult) - return Error::success(); - if (auto EC = Callbacks.visitTypeBegin(Record, Index)) return EC; @@ -137,48 +130,13 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record, TypeIndex Index) { } Error CVTypeVisitor::visitTypeRecord(CVType &Record) { - auto ExpectedResult = handleTypeServer(Record); - if (!ExpectedResult) - return ExpectedResult.takeError(); - if (*ExpectedResult) - return Error::success(); - if (auto EC = Callbacks.visitTypeBegin(Record)) return EC; return finishVisitation(Record); } -static Error visitMemberRecord(CVMemberRecord &Record, - TypeVisitorCallbacks &Callbacks) { - if (auto EC = Callbacks.visitMemberBegin(Record)) - return EC; - - switch (Record.Kind) { - default: - if (auto EC = Callbacks.visitUnknownMember(Record)) - return EC; - break; -#define MEMBER_RECORD(EnumName, EnumVal, Name) \ - case EnumName: { \ - if (auto EC = visitKnownMember(Record, Callbacks)) \ - return EC; \ - break; \ - } -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ - MEMBER_RECORD(EnumVal, EnumVal, AliasName) -#define TYPE_RECORD(EnumName, EnumVal, Name) -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" - } - - if (auto EC = Callbacks.visitMemberEnd(Record)) - return EC; - - return Error::success(); -} - -Error CVTypeVisitor::visitMemberRecord(CVMemberRecord &Record) { +Error CVTypeVisitor::visitMemberRecord(CVMemberRecord Record) { return ::visitMemberRecord(Record, Callbacks); } @@ -199,12 +157,18 @@ Error CVTypeVisitor::visitTypeStream(CVTypeRange Types) { return Error::success(); } -Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader Reader) { - FieldListDeserializer Deserializer(Reader); - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(Callbacks); +Error CVTypeVisitor::visitTypeStream(TypeCollection &Types) { + Optional I = Types.getFirst(); + while (I) { + CVType Type = Types.getType(*I); + if (auto EC = visitTypeRecord(Type, *I)) + return EC; + I = Types.getNext(*I); + } + return Error::success(); +} +Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader &Reader) { TypeLeafKind Leaf; while (!Reader.empty()) { if (auto EC = Reader.readEnum(Leaf)) @@ -212,15 +176,101 @@ Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader Reader) { CVMemberRecord Record; Record.Kind = Leaf; - if (auto EC = ::visitMemberRecord(Record, Pipeline)) + if (auto EC = ::visitMemberRecord(Record, Callbacks)) return EC; } return Error::success(); } -Error CVTypeVisitor::visitFieldListMemberStream(ArrayRef Data) { - BinaryByteStream S(Data, llvm::support::little); - BinaryStreamReader SR(S); - return visitFieldListMemberStream(SR); +struct FieldListVisitHelper { + FieldListVisitHelper(TypeVisitorCallbacks &Callbacks, ArrayRef Data, + VisitorDataSource Source) + : Stream(Data, llvm::support::little), Reader(Stream), + Deserializer(Reader), + Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) { + if (Source == VDS_BytesPresent) { + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Callbacks); + } + } + + BinaryByteStream Stream; + BinaryStreamReader Reader; + FieldListDeserializer Deserializer; + TypeVisitorCallbackPipeline Pipeline; + CVTypeVisitor Visitor; +}; + +struct VisitHelper { + VisitHelper(TypeVisitorCallbacks &Callbacks, VisitorDataSource Source) + : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) { + if (Source == VDS_BytesPresent) { + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Callbacks); + } + } + + TypeDeserializer Deserializer; + TypeVisitorCallbackPipeline Pipeline; + CVTypeVisitor Visitor; +}; +} + +Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source) { + VisitHelper V(Callbacks, Source); + return V.Visitor.visitTypeRecord(Record, Index); +} + +Error llvm::codeview::visitTypeRecord(CVType &Record, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source) { + VisitHelper V(Callbacks, Source); + return V.Visitor.visitTypeRecord(Record); +} + +Error llvm::codeview::visitTypeStream(const CVTypeArray &Types, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source) { + VisitHelper V(Callbacks, Source); + return V.Visitor.visitTypeStream(Types); +} + +Error llvm::codeview::visitTypeStream(CVTypeRange Types, + TypeVisitorCallbacks &Callbacks) { + VisitHelper V(Callbacks, VDS_BytesPresent); + return V.Visitor.visitTypeStream(Types); +} + +Error llvm::codeview::visitTypeStream(TypeCollection &Types, + TypeVisitorCallbacks &Callbacks) { + // When the internal visitor calls Types.getType(Index) the interface is + // required to return a CVType with the bytes filled out. So we can assume + // that the bytes will be present when individual records are visited. + VisitHelper V(Callbacks, VDS_BytesPresent); + return V.Visitor.visitTypeStream(Types); +} + +Error llvm::codeview::visitMemberRecord(CVMemberRecord Record, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source) { + FieldListVisitHelper V(Callbacks, Record.Data, Source); + return V.Visitor.visitMemberRecord(Record); +} + +Error llvm::codeview::visitMemberRecord(TypeLeafKind Kind, + ArrayRef Record, + TypeVisitorCallbacks &Callbacks) { + CVMemberRecord R; + R.Data = Record; + R.Kind = Kind; + return visitMemberRecord(R, Callbacks, VDS_BytesPresent); +} + +Error llvm::codeview::visitMemberRecordStream(ArrayRef FieldList, + TypeVisitorCallbacks &Callbacks) { + FieldListVisitHelper V(Callbacks, FieldList, VDS_BytesPresent); + return V.Visitor.visitFieldListMemberStream(V.Reader); } diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 282e3103adc93..4fc14480578e6 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -27,6 +27,14 @@ Error CodeViewRecordIO::beginRecord(Optional MaxLength) { Error CodeViewRecordIO::endRecord() { assert(!Limits.empty() && "Not in a record!"); Limits.pop_back(); + // We would like to assert that we actually read / wrote all the bytes that we + // expected to for this record, but unfortunately we can't do this. Some + // producers such as MASM over-allocate for certain types of records and + // commit the extraneous data, so when reading we can't be sure every byte + // will have been read. And when writing we over-allocate temporarily since + // we don't know how big the record is until we're finished writing it, so + // even though we don't commit the extraneous data, we still can't guarantee + // we're at the end of the allocated data. return Error::success(); } @@ -49,6 +57,12 @@ uint32_t CodeViewRecordIO::maxFieldLength() const { return *Min; } +Error CodeViewRecordIO::padToAlignment(uint32_t Align) { + if (isReading()) + return Reader->padToAlignment(Align); + return Writer->padToAlignment(Align); +} + Error CodeViewRecordIO::skipPadding() { assert(!isWriting() && "Cannot skip padding while writing!"); @@ -154,18 +168,19 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value) { return Error::success(); } -Error CodeViewRecordIO::mapGuid(StringRef &Guid) { +Error CodeViewRecordIO::mapGuid(GUID &Guid) { constexpr uint32_t GuidSize = 16; if (maxFieldLength() < GuidSize) return make_error(cv_error_code::insufficient_buffer); if (isWriting()) { - assert(Guid.size() == 16 && "Invalid Guid Size!"); - if (auto EC = Writer->writeFixedString(Guid)) + if (auto EC = Writer->writeBytes(Guid.Guid)) return EC; } else { - if (auto EC = Reader->readFixedString(Guid, 16)) + ArrayRef GuidBytes; + if (auto EC = Reader->readBytes(GuidBytes, GuidSize)) return EC; + memcpy(Guid.Guid, GuidBytes.data(), GuidSize); } return Error::success(); } diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp similarity index 63% rename from interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp rename to interpreter/llvm/src/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp index 42f0afc3e2d74..ccc20eb748874 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp @@ -1,4 +1,4 @@ -//===- ModuleDebugFileChecksumFragment.cpp ----------------------*- C++ -*-===// +//===- DebugChecksumsSubsection.cpp ---------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,11 +7,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" - -#include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/StringTable.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include using namespace llvm; using namespace llvm::codeview; @@ -25,8 +32,8 @@ struct FileChecksumEntryHeader { // Checksum bytes follow. }; -Error llvm::VarStreamArrayExtractor::extract( - BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { +Error VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { BinaryStreamReader Reader(Stream); const FileChecksumEntryHeader *Header; @@ -42,22 +49,25 @@ Error llvm::VarStreamArrayExtractor::extract( return Error::success(); } -Error ModuleDebugFileChecksumFragmentRef::initialize( - BinaryStreamReader Reader) { +Error DebugChecksumsSubsectionRef::initialize(BinaryStreamReader Reader) { if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining())) return EC; return Error::success(); } -ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment( - StringTable &Strings) - : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums), - Strings(Strings) {} +Error DebugChecksumsSubsectionRef::initialize(BinaryStreamRef Section) { + BinaryStreamReader Reader(Section); + return initialize(Reader); +} + +DebugChecksumsSubsection::DebugChecksumsSubsection( + DebugStringTableSubsection &Strings) + : DebugSubsection(DebugSubsectionKind::FileChecksums), Strings(Strings) {} -void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName, - FileChecksumKind Kind, - ArrayRef Bytes) { +void DebugChecksumsSubsection::addChecksum(StringRef FileName, + FileChecksumKind Kind, + ArrayRef Bytes) { FileChecksumEntry Entry; if (!Bytes.empty()) { uint8_t *Copy = Storage.Allocate(Bytes.size()); @@ -78,11 +88,11 @@ void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName, SerializedSize += Len; } -uint32_t ModuleDebugFileChecksumFragment::calculateSerializedLength() { +uint32_t DebugChecksumsSubsection::calculateSerializedSize() const { return SerializedSize; } -Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) { +Error DebugChecksumsSubsection::commit(BinaryStreamWriter &Writer) const { for (const auto &FC : Checksums) { FileChecksumEntryHeader Header; Header.ChecksumKind = uint8_t(FC.Kind); @@ -98,8 +108,7 @@ Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -uint32_t -ModuleDebugFileChecksumFragment::mapChecksumOffset(StringRef FileName) const { +uint32_t DebugChecksumsSubsection::mapChecksumOffset(StringRef FileName) const { uint32_t Offset = Strings.getStringId(FileName); auto Iter = OffsetMap.find(Offset); assert(Iter != OffsetMap.end()); diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp new file mode 100644 index 0000000000000..cef27787cfd10 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp @@ -0,0 +1,53 @@ +//===- DebugCrossExSubsection.cpp -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Error.h" +#include + +using namespace llvm; +using namespace llvm::codeview; + +Error DebugCrossModuleExportsSubsectionRef::initialize( + BinaryStreamReader Reader) { + if (Reader.bytesRemaining() % sizeof(CrossModuleExport) != 0) + return make_error( + cv_error_code::corrupt_record, + "Cross Scope Exports section is an invalid size!"); + + uint32_t Size = Reader.bytesRemaining() / sizeof(CrossModuleExport); + return Reader.readArray(References, Size); +} + +Error DebugCrossModuleExportsSubsectionRef::initialize(BinaryStreamRef Stream) { + BinaryStreamReader Reader(Stream); + return initialize(Reader); +} + +void DebugCrossModuleExportsSubsection::addMapping(uint32_t Local, + uint32_t Global) { + Mappings[Local] = Global; +} + +uint32_t DebugCrossModuleExportsSubsection::calculateSerializedSize() const { + return Mappings.size() * sizeof(CrossModuleExport); +} + +Error DebugCrossModuleExportsSubsection::commit( + BinaryStreamWriter &Writer) const { + for (const auto &M : Mappings) { + if (auto EC = Writer.writeInteger(M.first)) + return EC; + if (auto EC = Writer.writeInteger(M.second)) + return EC; + } + return Error::success(); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp new file mode 100644 index 0000000000000..88c0076915b58 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp @@ -0,0 +1,97 @@ +//===- DebugCrossImpSubsection.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; + +Error VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CrossModuleImportItem &Item) { + BinaryStreamReader Reader(Stream); + if (Reader.bytesRemaining() < sizeof(CrossModuleImport)) + return make_error( + cv_error_code::insufficient_buffer, + "Not enough bytes for a Cross Module Import Header!"); + if (auto EC = Reader.readObject(Item.Header)) + return EC; + if (Reader.bytesRemaining() < Item.Header->Count * sizeof(uint32_t)) + return make_error( + cv_error_code::insufficient_buffer, + "Not enough to read specified number of Cross Module References!"); + if (auto EC = Reader.readArray(Item.Imports, Item.Header->Count)) + return EC; + return Error::success(); +} + +Error DebugCrossModuleImportsSubsectionRef::initialize( + BinaryStreamReader Reader) { + return Reader.readArray(References, Reader.bytesRemaining()); +} + +Error DebugCrossModuleImportsSubsectionRef::initialize(BinaryStreamRef Stream) { + BinaryStreamReader Reader(Stream); + return initialize(Reader); +} + +void DebugCrossModuleImportsSubsection::addImport(StringRef Module, + uint32_t ImportId) { + Strings.insert(Module); + std::vector Targets = {support::ulittle32_t(ImportId)}; + auto Result = Mappings.insert(std::make_pair(Module, Targets)); + if (!Result.second) + Result.first->getValue().push_back(Targets[0]); +} + +uint32_t DebugCrossModuleImportsSubsection::calculateSerializedSize() const { + uint32_t Size = 0; + for (const auto &Item : Mappings) { + Size += sizeof(CrossModuleImport); + Size += sizeof(support::ulittle32_t) * Item.second.size(); + } + return Size; +} + +Error DebugCrossModuleImportsSubsection::commit( + BinaryStreamWriter &Writer) const { + using T = decltype(&*Mappings.begin()); + std::vector Ids; + Ids.reserve(Mappings.size()); + + for (const auto &M : Mappings) + Ids.push_back(&M); + + std::sort(Ids.begin(), Ids.end(), [this](const T &L1, const T &L2) { + return Strings.getStringId(L1->getKey()) < + Strings.getStringId(L2->getKey()); + }); + + for (const auto &Item : Ids) { + CrossModuleImport Imp; + Imp.ModuleNameOffset = Strings.getStringId(Item->getKey()); + Imp.Count = Item->getValue().size(); + if (auto EC = Writer.writeObject(Imp)) + return EC; + if (auto EC = Writer.writeArray(makeArrayRef(Item->getValue()))) + return EC; + } + return Error::success(); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp new file mode 100644 index 0000000000000..fd558aa9cc8a5 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp @@ -0,0 +1,44 @@ +//===- DebugFrameDataSubsection.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error DebugFrameDataSubsectionRef::initialize(BinaryStreamReader Reader) { + if (auto EC = Reader.readObject(RelocPtr)) + return EC; + if (Reader.bytesRemaining() % sizeof(FrameData) != 0) + return make_error(cv_error_code::corrupt_record, + "Invalid frame data record format!"); + + uint32_t Count = Reader.bytesRemaining() / sizeof(FrameData); + if (auto EC = Reader.readArray(Frames, Count)) + return EC; + return Error::success(); +} + +uint32_t DebugFrameDataSubsection::calculateSerializedSize() const { + return 4 + sizeof(FrameData) * Frames.size(); +} + +Error DebugFrameDataSubsection::commit(BinaryStreamWriter &Writer) const { + if (auto EC = Writer.writeInteger(0)) + return EC; + + if (auto EC = Writer.writeArray(makeArrayRef(Frames))) + return EC; + return Error::success(); +} + +void DebugFrameDataSubsection::addFrameData(const FrameData &Frame) { + Frames.push_back(Frame); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp similarity index 57% rename from interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp rename to interpreter/llvm/src/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp index cb6a8478797f9..077c103a615b8 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp @@ -1,4 +1,4 @@ -//===- ModuleDebugInlineeLineFragment.cpp ------------------------*- C++-*-===// +//===- DebugInlineeLinesSubsection.cpp ------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,19 +7,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" - -#include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" -#include "llvm/DebugInfo/CodeView/StringTable.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include using namespace llvm; using namespace llvm::codeview; -Error VarStreamArrayExtractor::extract( - BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item, - bool HasExtraFiles) { +Error VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item) { BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Item.Header)) @@ -37,31 +40,31 @@ Error VarStreamArrayExtractor::extract( return Error::success(); } -ModuleDebugInlineeLineFragmentRef::ModuleDebugInlineeLineFragmentRef() - : ModuleDebugFragmentRef(ModuleDebugFragmentKind::InlineeLines) {} +DebugInlineeLinesSubsectionRef::DebugInlineeLinesSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::InlineeLines) {} -Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) { +Error DebugInlineeLinesSubsectionRef::initialize(BinaryStreamReader Reader) { if (auto EC = Reader.readEnum(Signature)) return EC; - if (auto EC = - Reader.readArray(Lines, Reader.bytesRemaining(), hasExtraFiles())) + Lines.getExtractor().HasExtraFiles = hasExtraFiles(); + if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining())) return EC; assert(Reader.bytesRemaining() == 0); return Error::success(); } -bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const { +bool DebugInlineeLinesSubsectionRef::hasExtraFiles() const { return Signature == InlineeLinesSignature::ExtraFiles; } -ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment( - ModuleDebugFileChecksumFragment &Checksums, bool HasExtraFiles) - : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines), - Checksums(Checksums), HasExtraFiles(HasExtraFiles) {} +DebugInlineeLinesSubsection::DebugInlineeLinesSubsection( + DebugChecksumsSubsection &Checksums, bool HasExtraFiles) + : DebugSubsection(DebugSubsectionKind::InlineeLines), Checksums(Checksums), + HasExtraFiles(HasExtraFiles) {} -uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() { +uint32_t DebugInlineeLinesSubsection::calculateSerializedSize() const { // 4 bytes for the signature uint32_t Size = sizeof(InlineeLinesSignature); @@ -78,7 +81,7 @@ uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() { return Size; } -Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) { +Error DebugInlineeLinesSubsection::commit(BinaryStreamWriter &Writer) const { InlineeLinesSignature Sig = InlineeLinesSignature::Normal; if (HasExtraFiles) Sig = InlineeLinesSignature::ExtraFiles; @@ -102,7 +105,7 @@ Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) { +void DebugInlineeLinesSubsection::addExtraFile(StringRef FileName) { uint32_t Offset = Checksums.mapChecksumOffset(FileName); auto &Entry = Entries.back(); @@ -110,9 +113,9 @@ void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) { ++ExtraFileCount; } -void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId, - StringRef FileName, - uint32_t SourceLine) { +void DebugInlineeLinesSubsection::addInlineSite(TypeIndex FuncId, + StringRef FileName, + uint32_t SourceLine) { uint32_t Offset = Checksums.mapChecksumOffset(FileName); Entries.emplace_back(); diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp similarity index 63% rename from interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp rename to interpreter/llvm/src/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp index e0ee934709ba5..57ad40819fbc4 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp @@ -1,4 +1,4 @@ -//===- ModuleDebugLineFragment.cpp -------------------------------*- C++-*-===// +//===- DebugLinesSubsection.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,20 +7,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" - +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" -#include "llvm/DebugInfo/CodeView/StringTable.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Error.h" +#include +#include using namespace llvm; using namespace llvm::codeview; -Error LineColumnExtractor::extract(BinaryStreamRef Stream, uint32_t &Len, - LineColumnEntry &Item, - const LineFragmentHeader *Header) { - using namespace codeview; +Error LineColumnExtractor::operator()(BinaryStreamRef Stream, uint32_t &Len, + LineColumnEntry &Item) { const LineBlockFragmentHeader *BlockHeader; BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(BlockHeader)) @@ -49,37 +51,35 @@ Error LineColumnExtractor::extract(BinaryStreamRef Stream, uint32_t &Len, return Error::success(); } -ModuleDebugLineFragmentRef::ModuleDebugLineFragmentRef() - : ModuleDebugFragmentRef(ModuleDebugFragmentKind::Lines) {} +DebugLinesSubsectionRef::DebugLinesSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::Lines) {} -Error ModuleDebugLineFragmentRef::initialize(BinaryStreamReader Reader) { +Error DebugLinesSubsectionRef::initialize(BinaryStreamReader Reader) { if (auto EC = Reader.readObject(Header)) return EC; - if (auto EC = - Reader.readArray(LinesAndColumns, Reader.bytesRemaining(), Header)) + LinesAndColumns.getExtractor().Header = Header; + if (auto EC = Reader.readArray(LinesAndColumns, Reader.bytesRemaining())) return EC; return Error::success(); } -bool ModuleDebugLineFragmentRef::hasColumnInfo() const { +bool DebugLinesSubsectionRef::hasColumnInfo() const { return !!(Header->Flags & LF_HaveColumns); } -ModuleDebugLineFragment::ModuleDebugLineFragment( - ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings) - : ModuleDebugFragment(ModuleDebugFragmentKind::Lines), - Checksums(Checksums) {} +DebugLinesSubsection::DebugLinesSubsection(DebugChecksumsSubsection &Checksums, + DebugStringTableSubsection &Strings) + : DebugSubsection(DebugSubsectionKind::Lines), Checksums(Checksums) {} -void ModuleDebugLineFragment::createBlock(StringRef FileName) { +void DebugLinesSubsection::createBlock(StringRef FileName) { uint32_t Offset = Checksums.mapChecksumOffset(FileName); Blocks.emplace_back(Offset); } -void ModuleDebugLineFragment::addLineInfo(uint32_t Offset, - const LineInfo &Line) { +void DebugLinesSubsection::addLineInfo(uint32_t Offset, const LineInfo &Line) { Block &B = Blocks.back(); LineNumberEntry LNE; LNE.Flags = Line.getRawData(); @@ -87,10 +87,10 @@ void ModuleDebugLineFragment::addLineInfo(uint32_t Offset, B.Lines.push_back(LNE); } -void ModuleDebugLineFragment::addLineAndColumnInfo(uint32_t Offset, - const LineInfo &Line, - uint32_t ColStart, - uint32_t ColEnd) { +void DebugLinesSubsection::addLineAndColumnInfo(uint32_t Offset, + const LineInfo &Line, + uint32_t ColStart, + uint32_t ColEnd) { Block &B = Blocks.back(); assert(B.Lines.size() == B.Columns.size()); @@ -101,7 +101,7 @@ void ModuleDebugLineFragment::addLineAndColumnInfo(uint32_t Offset, B.Columns.push_back(CNE); } -Error ModuleDebugLineFragment::commit(BinaryStreamWriter &Writer) { +Error DebugLinesSubsection::commit(BinaryStreamWriter &Writer) const { LineFragmentHeader Header; Header.CodeSize = CodeSize; Header.Flags = hasColumnInfo() ? LF_HaveColumns : 0; @@ -135,7 +135,7 @@ Error ModuleDebugLineFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -uint32_t ModuleDebugLineFragment::calculateSerializedLength() { +uint32_t DebugLinesSubsection::calculateSerializedSize() const { uint32_t Size = sizeof(LineFragmentHeader); for (const auto &B : Blocks) { Size += sizeof(LineBlockFragmentHeader); @@ -146,16 +146,16 @@ uint32_t ModuleDebugLineFragment::calculateSerializedLength() { return Size; } -void ModuleDebugLineFragment::setRelocationAddress(uint16_t Segment, - uint16_t Offset) { +void DebugLinesSubsection::setRelocationAddress(uint16_t Segment, + uint32_t Offset) { RelocOffset = Offset; RelocSegment = Segment; } -void ModuleDebugLineFragment::setCodeSize(uint32_t Size) { CodeSize = Size; } +void DebugLinesSubsection::setCodeSize(uint32_t Size) { CodeSize = Size; } -void ModuleDebugLineFragment::setFlags(LineFlags Flags) { this->Flags = Flags; } +void DebugLinesSubsection::setFlags(LineFlags Flags) { this->Flags = Flags; } -bool ModuleDebugLineFragment::hasColumnInfo() const { +bool DebugLinesSubsection::hasColumnInfo() const { return Flags & LF_HaveColumns; } diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp new file mode 100644 index 0000000000000..d723282eb7158 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp @@ -0,0 +1,90 @@ +//===- DebugStringTableSubsection.cpp - CodeView String Table -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; + +DebugStringTableSubsectionRef::DebugStringTableSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::StringTable) {} + +Error DebugStringTableSubsectionRef::initialize(BinaryStreamRef Contents) { + Stream = Contents; + return Error::success(); +} + +Error DebugStringTableSubsectionRef::initialize(BinaryStreamReader &Reader) { + return Reader.readStreamRef(Stream); +} + +Expected +DebugStringTableSubsectionRef::getString(uint32_t Offset) const { + BinaryStreamReader Reader(Stream); + Reader.setOffset(Offset); + StringRef Result; + if (auto EC = Reader.readCString(Result)) + return std::move(EC); + return Result; +} + +DebugStringTableSubsection::DebugStringTableSubsection() + : DebugSubsection(DebugSubsectionKind::StringTable) {} + +uint32_t DebugStringTableSubsection::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +uint32_t DebugStringTableSubsection::calculateSerializedSize() const { + return StringSize; +} + +Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const { + uint32_t Begin = Writer.getOffset(); + uint32_t End = Begin + StringSize; + + // Write a null string at the beginning. + if (auto EC = Writer.writeCString(StringRef())) + return EC; + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Begin + Pair.getValue(); + Writer.setOffset(Offset); + if (auto EC = Writer.writeCString(S)) + return EC; + assert(Writer.getOffset() <= End); + } + + Writer.setOffset(End); + assert((End - Begin) == StringSize); + return Error::success(); +} + +uint32_t DebugStringTableSubsection::size() const { return Strings.size(); } + +uint32_t DebugStringTableSubsection::getStringId(StringRef S) const { + auto Iter = Strings.find(S); + assert(Iter != Strings.end()); + return Iter->second; +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsection.cpp similarity index 55% rename from interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp rename to interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsection.cpp index 2af1917413daf..67b428bfa7133 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsection.cpp @@ -1,4 +1,4 @@ -//===- ModuleDebugFragment.cpp -----------------------------------*- C++-*-===// +//===- DebugSubsection.cpp -----------------------------------*- C++-*-===// // // The LLVM Compiler Infrastructure // @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" using namespace llvm::codeview; -ModuleDebugFragmentRef::~ModuleDebugFragmentRef() {} +DebugSubsectionRef::~DebugSubsectionRef() {} -ModuleDebugFragment::~ModuleDebugFragment() {} +DebugSubsection::~DebugSubsection() {} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp new file mode 100644 index 0000000000000..55f343c11e7fe --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -0,0 +1,97 @@ +//===- DebugSubsectionRecord.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; + +DebugSubsectionRecord::DebugSubsectionRecord() = default; + +DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind, + BinaryStreamRef Data, + CodeViewContainer Container) + : Container(Container), Kind(Kind), Data(Data) {} + +Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, + DebugSubsectionRecord &Info, + CodeViewContainer Container) { + const DebugSubsectionHeader *Header; + BinaryStreamReader Reader(Stream); + if (auto EC = Reader.readObject(Header)) + return EC; + + DebugSubsectionKind Kind = + static_cast(uint32_t(Header->Kind)); + if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) + return EC; + Info.Container = Container; + Info.Kind = Kind; + return Error::success(); +} + +uint32_t DebugSubsectionRecord::getRecordLength() const { + return sizeof(DebugSubsectionHeader) + Data.getLength(); +} + +DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; } + +BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; } + +DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( + std::shared_ptr Subsection, CodeViewContainer Container) + : Subsection(std::move(Subsection)), Container(Container) {} + +DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( + const DebugSubsectionRecord &Contents, CodeViewContainer Container) + : Contents(Contents), Container(Container) {} + +uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { + uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize() + : Contents.getRecordData().getLength(); + // The length of the entire subsection is always padded to 4 bytes, + // regardless of the container kind. + return sizeof(DebugSubsectionHeader) + alignTo(DataSize, 4); +} + +Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const { + assert(Writer.getOffset() % alignOf(Container) == 0 && + "Debug Subsection not properly aligned"); + + DebugSubsectionHeader Header; + Header.Kind = uint32_t(Subsection ? Subsection->kind() : Contents.kind()); + // The value written into the Header's Length field is only padded to the + // container's alignment + uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize() + : Contents.getRecordData().getLength(); + Header.Length = alignTo(DataSize, alignOf(Container)); + + if (auto EC = Writer.writeObject(Header)) + return EC; + if (Subsection) { + if (auto EC = Subsection->commit(Writer)) + return EC; + } else { + if (auto EC = Writer.writeStreamRef(Contents.getRecordData())) + return EC; + } + if (auto EC = Writer.padToAlignment(4)) + return EC; + + return Error::success(); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp new file mode 100644 index 0000000000000..9b824333369be --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp @@ -0,0 +1,95 @@ +//===- DebugSubsectionVisitor.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" + +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error llvm::codeview::visitDebugSubsection( + const DebugSubsectionRecord &R, DebugSubsectionVisitor &V, + const StringsAndChecksumsRef &State) { + BinaryStreamReader Reader(R.getRecordData()); + switch (R.kind()) { + case DebugSubsectionKind::Lines: { + DebugLinesSubsectionRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + + return V.visitLines(Fragment, State); + } + case DebugSubsectionKind::FileChecksums: { + DebugChecksumsSubsectionRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + + return V.visitFileChecksums(Fragment, State); + } + case DebugSubsectionKind::InlineeLines: { + DebugInlineeLinesSubsectionRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + return V.visitInlineeLines(Fragment, State); + } + case DebugSubsectionKind::CrossScopeExports: { + DebugCrossModuleExportsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCrossModuleExports(Section, State); + } + case DebugSubsectionKind::CrossScopeImports: { + DebugCrossModuleImportsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCrossModuleImports(Section, State); + } + case DebugSubsectionKind::Symbols: { + DebugSymbolsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitSymbols(Section, State); + } + case DebugSubsectionKind::StringTable: { + DebugStringTableSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitStringTable(Section, State); + } + case DebugSubsectionKind::FrameData: { + DebugFrameDataSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitFrameData(Section, State); + } + case DebugSubsectionKind::CoffSymbolRVA: { + DebugSymbolRVASubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCOFFSymbolRVAs(Section, State); + } + default: { + DebugUnknownSubsectionRef Fragment(R.kind(), R.getRecordData()); + return V.visitUnknown(Fragment); + } + } +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp new file mode 100644 index 0000000000000..60fbf9d747b28 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp @@ -0,0 +1,36 @@ +//===- DebugSymbolRVASubsection.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include + +using namespace llvm; +using namespace llvm::codeview; + +DebugSymbolRVASubsectionRef::DebugSymbolRVASubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CoffSymbolRVA) {} + +Error DebugSymbolRVASubsectionRef::initialize(BinaryStreamReader &Reader) { + return Reader.readArray(RVAs, Reader.bytesRemaining() / sizeof(uint32_t)); +} + +DebugSymbolRVASubsection::DebugSymbolRVASubsection() + : DebugSubsection(DebugSubsectionKind::CoffSymbolRVA) {} + +Error DebugSymbolRVASubsection::commit(BinaryStreamWriter &Writer) const { + return Writer.writeArray(makeArrayRef(RVAs)); +} + +uint32_t DebugSymbolRVASubsection::calculateSerializedSize() const { + return RVAs.size() * sizeof(uint32_t); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp new file mode 100644 index 0000000000000..dc8ba8c929aed --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp @@ -0,0 +1,34 @@ +//===- DebugSymbolsSubsection.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error DebugSymbolsSubsectionRef::initialize(BinaryStreamReader Reader) { + return Reader.readArray(Records, Reader.getLength()); +} + +uint32_t DebugSymbolsSubsection::calculateSerializedSize() const { + return Length; +} + +Error DebugSymbolsSubsection::commit(BinaryStreamWriter &Writer) const { + for (const auto &Record : Records) { + if (auto EC = Writer.writeBytes(Record.RecordData)) + return EC; + } + return Error::success(); +} + +void DebugSymbolsSubsection::addSymbol(CVSymbol Symbol) { + Records.push_back(Symbol); + Length += Symbol.length(); +} \ No newline at end of file diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/EnumTables.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/EnumTables.cpp index fc6008ba66de3..4cfb55a31b356 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/EnumTables.cpp @@ -1,4 +1,4 @@ -//===- EnumTables.cpp - Enum to string conversion tables --------*- C++ -*-===// +//===- EnumTables.cpp - Enum to string conversion tables ------------------===// // // The LLVM Compiler Infrastructure // @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/Support/ScopedPrinter.h" +#include using namespace llvm; using namespace codeview; @@ -20,13 +22,13 @@ using namespace codeview; static const EnumEntry SymbolTypeNames[] = { #define CV_SYMBOL(enum, val) {#enum, enum}, -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" #undef CV_SYMBOL }; static const EnumEntry TypeLeafNames[] = { #define CV_TYPE(name, val) {#name, name}, -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" #undef CV_TYPE }; @@ -82,6 +84,13 @@ static const EnumEntry RegisterNames[] = { CV_ENUM_CLASS_ENT(RegisterId, R15), }; +static const EnumEntry PublicSymFlagNames[] = { + CV_ENUM_CLASS_ENT(PublicSymFlags, Code), + CV_ENUM_CLASS_ENT(PublicSymFlags, Function), + CV_ENUM_CLASS_ENT(PublicSymFlags, Managed), + CV_ENUM_CLASS_ENT(PublicSymFlags, MSIL), +}; + static const EnumEntry ProcSymFlagNames[] = { CV_ENUM_CLASS_ENT(ProcSymFlags, HasFP), CV_ENUM_CLASS_ENT(ProcSymFlags, HasIRET), @@ -245,20 +254,20 @@ static const EnumEntry FrameProcSymFlagNames[] = { }; static const EnumEntry ModuleSubstreamKindNames[] = { - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, None), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Symbols), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Lines), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, StringTable), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FileChecksums), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FrameData), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, InlineeLines), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeImports), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeExports), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, ILLines), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FuncMDTokenMap), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, TypeMDTokenMap), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, MergedAssemblyInput), - CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CoffSymbolRVA), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, None), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, Symbols), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, Lines), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, StringTable), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, FileChecksums), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, FrameData), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, InlineeLines), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, CrossScopeImports), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, CrossScopeExports), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, ILLines), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, FuncMDTokenMap), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, TypeMDTokenMap), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, MergedAssemblyInput), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, CoffSymbolRVA), }; static const EnumEntry ExportSymFlagNames[] = { @@ -326,6 +335,7 @@ static const EnumEntry namespace llvm { namespace codeview { + ArrayRef> getSymbolTypeNames() { return makeArrayRef(SymbolTypeNames); } @@ -338,48 +348,66 @@ ArrayRef> getRegisterNames() { return makeArrayRef(RegisterNames); } +ArrayRef> getPublicSymFlagNames() { + return makeArrayRef(PublicSymFlagNames); +} + ArrayRef> getProcSymFlagNames() { return makeArrayRef(ProcSymFlagNames); } + ArrayRef> getLocalFlagNames() { return makeArrayRef(LocalFlags); } + ArrayRef> getFrameCookieKindNames() { return makeArrayRef(FrameCookieKinds); } + ArrayRef> getSourceLanguageNames() { return makeArrayRef(SourceLanguages); } + ArrayRef> getCompileSym2FlagNames() { return makeArrayRef(CompileSym2FlagNames); } + ArrayRef> getCompileSym3FlagNames() { return makeArrayRef(CompileSym3FlagNames); } + ArrayRef> getFileChecksumNames() { return makeArrayRef(FileChecksumNames); } + ArrayRef> getCPUTypeNames() { return makeArrayRef(CPUTypeNames); } + ArrayRef> getFrameProcSymFlagNames() { return makeArrayRef(FrameProcSymFlagNames); } + ArrayRef> getExportSymFlagNames() { return makeArrayRef(ExportSymFlagNames); } + ArrayRef> getModuleSubstreamKindNames() { return makeArrayRef(ModuleSubstreamKindNames); } + ArrayRef> getThunkOrdinalNames() { return makeArrayRef(ThunkOrdinalNames); } + ArrayRef> getTrampolineNames() { return makeArrayRef(TrampolineNames); } + ArrayRef> getImageSectionCharacteristicNames() { return makeArrayRef(ImageSectionCharacteristicNames); } -} -} + +} // end namespace codeview +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/Formatters.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/Formatters.cpp index ef00bd8570fa9..b8d89c76da3b6 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/Formatters.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/Formatters.cpp @@ -1,4 +1,4 @@ -//===- Formatters.cpp -------------------------------------------*- C++ -*-===// +//===- Formatters.cpp -----------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,6 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/Formatters.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/GUID.h" +#include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; using namespace llvm::codeview; @@ -19,7 +24,7 @@ GuidAdapter::GuidAdapter(StringRef Guid) GuidAdapter::GuidAdapter(ArrayRef Guid) : FormatAdapter(std::move(Guid)) {} -void GuidAdapter::format(llvm::raw_ostream &Stream, StringRef Style) { +void GuidAdapter::format(raw_ostream &Stream, StringRef Style) { static const char *Lookup = "0123456789ABCDEF"; assert(Item.size() == 16 && "Expected 16-byte GUID"); @@ -35,3 +40,9 @@ void GuidAdapter::format(llvm::raw_ostream &Stream, StringRef Style) { } Stream << "}"; } + +raw_ostream &llvm::codeview::operator<<(raw_ostream &OS, const GUID &Guid) { + codeview::detail::GuidAdapter A(Guid.Guid); + A.format(OS, ""); + return OS; +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp new file mode 100644 index 0000000000000..5aaf3f1453a8f --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -0,0 +1,252 @@ +//===- LazyRandomTypeCollection.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/TypeName.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; + +static void error(Error &&EC) { + assert(!static_cast(EC)); + if (EC) + consumeError(std::move(EC)); +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(uint32_t RecordCountHint) + : LazyRandomTypeCollection(CVTypeArray(), RecordCountHint, + PartialOffsetArray()) {} + +LazyRandomTypeCollection::LazyRandomTypeCollection( + const CVTypeArray &Types, uint32_t RecordCountHint, + PartialOffsetArray PartialOffsets) + : NameStorage(Allocator), Types(Types), PartialOffsets(PartialOffsets) { + Records.resize(RecordCountHint); +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(ArrayRef Data, + uint32_t RecordCountHint) + : LazyRandomTypeCollection(RecordCountHint) { +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(StringRef Data, + uint32_t RecordCountHint) + : LazyRandomTypeCollection( + makeArrayRef(Data.bytes_begin(), Data.bytes_end()), RecordCountHint) { +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(const CVTypeArray &Types, + uint32_t NumRecords) + : LazyRandomTypeCollection(Types, NumRecords, PartialOffsetArray()) {} + +void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) { + Count = 0; + PartialOffsets = PartialOffsetArray(); + + BinaryStreamReader Reader(Data, support::little); + error(Reader.readArray(Types, Reader.getLength())); + + // Clear and then resize, to make sure existing data gets destroyed. + Records.clear(); + Records.resize(RecordCountHint); +} + +void LazyRandomTypeCollection::reset(ArrayRef Data, + uint32_t RecordCountHint) { + reset(toStringRef(Data), RecordCountHint); +} + +uint32_t LazyRandomTypeCollection::getOffsetOfType(TypeIndex Index) { + error(ensureTypeExists(Index)); + assert(contains(Index)); + + return Records[Index.toArrayIndex()].Offset; +} + +CVType LazyRandomTypeCollection::getType(TypeIndex Index) { + error(ensureTypeExists(Index)); + assert(contains(Index)); + + return Records[Index.toArrayIndex()].Type; +} + +StringRef LazyRandomTypeCollection::getTypeName(TypeIndex Index) { + if (Index.isNoneType() || Index.isSimple()) + return TypeIndex::simpleTypeName(Index); + + // Try to make sure the type exists. Even if it doesn't though, it may be + // because we're dumping a symbol stream with no corresponding type stream + // present, in which case we still want to be able to print + // for the type names. + if (auto EC = ensureTypeExists(Index)) { + consumeError(std::move(EC)); + return ""; + } + + uint32_t I = Index.toArrayIndex(); + ensureCapacityFor(Index); + if (Records[I].Name.data() == nullptr) { + StringRef Result = NameStorage.save(computeTypeName(*this, Index)); + Records[I].Name = Result; + } + return Records[I].Name; +} + +bool LazyRandomTypeCollection::contains(TypeIndex Index) { + if (Records.size() <= Index.toArrayIndex()) + return false; + if (!Records[Index.toArrayIndex()].Type.valid()) + return false; + return true; +} + +uint32_t LazyRandomTypeCollection::size() { return Count; } + +uint32_t LazyRandomTypeCollection::capacity() { return Records.size(); } + +Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) { + if (contains(TI)) + return Error::success(); + + return visitRangeForType(TI); +} + +void LazyRandomTypeCollection::ensureCapacityFor(TypeIndex Index) { + uint32_t MinSize = Index.toArrayIndex() + 1; + + if (MinSize <= capacity()) + return; + + uint32_t NewCapacity = MinSize * 3 / 2; + + assert(NewCapacity > capacity()); + Records.resize(NewCapacity); +} + +Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) { + if (PartialOffsets.empty()) + return fullScanForType(TI); + + auto Next = std::upper_bound(PartialOffsets.begin(), PartialOffsets.end(), TI, + [](TypeIndex Value, const TypeIndexOffset &IO) { + return Value < IO.Type; + }); + + assert(Next != PartialOffsets.begin()); + auto Prev = std::prev(Next); + + TypeIndex TIB = Prev->Type; + if (contains(TIB)) { + // They've asked us to fetch a type index, but the entry we found in the + // partial offsets array has already been visited. Since we visit an entire + // block every time, that means this record should have been previously + // discovered. Ultimately, this means this is a request for a non-existant + // type index. + return make_error("Invalid type index"); + } + + TypeIndex TIE; + if (Next == PartialOffsets.end()) { + TIE = TypeIndex::fromArrayIndex(capacity()); + } else { + TIE = Next->Type; + } + + visitRange(TIB, Prev->Offset, TIE); + return Error::success(); +} + +Optional LazyRandomTypeCollection::getFirst() { + TypeIndex TI = TypeIndex::fromArrayIndex(0); + if (auto EC = ensureTypeExists(TI)) { + consumeError(std::move(EC)); + return None; + } + return TI; +} + +Optional LazyRandomTypeCollection::getNext(TypeIndex Prev) { + // We can't be sure how long this type stream is, given that the initial count + // given to the constructor is just a hint. So just try to make sure the next + // record exists, and if anything goes wrong, we must be at the end. + if (auto EC = ensureTypeExists(Prev + 1)) { + consumeError(std::move(EC)); + return None; + } + + return Prev + 1; +} + +Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) { + assert(PartialOffsets.empty()); + + TypeIndex CurrentTI = TypeIndex::fromArrayIndex(0); + auto Begin = Types.begin(); + + if (Count > 0) { + // In the case of type streams which we don't know the number of records of, + // it's possible to search for a type index triggering a full scan, but then + // later additional records are added since we didn't know how many there + // would be until we did a full visitation, then you try to access the new + // type triggering another full scan. To avoid this, we assume that if the + // database has some records, this must be what's going on. We can also + // assume that this index must be larger than the largest type index we've + // visited, so we start from there and scan forward. + uint32_t Offset = Records[LargestTypeIndex.toArrayIndex()].Offset; + CurrentTI = LargestTypeIndex + 1; + Begin = Types.at(Offset); + ++Begin; + } + + auto End = Types.end(); + while (Begin != End) { + ensureCapacityFor(CurrentTI); + LargestTypeIndex = std::max(LargestTypeIndex, CurrentTI); + auto Idx = CurrentTI.toArrayIndex(); + Records[Idx].Type = *Begin; + Records[Idx].Offset = Begin.offset(); + ++Count; + ++Begin; + ++CurrentTI; + } + if (CurrentTI <= TI) { + return make_error("Type Index does not exist!"); + } + return Error::success(); +} + +void LazyRandomTypeCollection::visitRange(TypeIndex Begin, uint32_t BeginOffset, + TypeIndex End) { + auto RI = Types.at(BeginOffset); + assert(RI != Types.end()); + + ensureCapacityFor(End); + while (Begin != End) { + LargestTypeIndex = std::max(LargestTypeIndex, Begin); + auto Idx = Begin.toArrayIndex(); + Records[Idx].Type = *RI; + Records[Idx].Offset = RI.offset(); + ++Count; + ++Begin; + ++RI; + } +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp deleted file mode 100644 index b2543de780699..0000000000000 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp +++ /dev/null @@ -1,84 +0,0 @@ -//===- ModuleDebugFragmentRecord.cpp -----------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h" - -#include "llvm/Support/BinaryStreamReader.h" - -using namespace llvm; -using namespace llvm::codeview; - -ModuleDebugFragmentRecord::ModuleDebugFragmentRecord() - : Kind(ModuleDebugFragmentKind::None) {} - -ModuleDebugFragmentRecord::ModuleDebugFragmentRecord( - ModuleDebugFragmentKind Kind, BinaryStreamRef Data) - : Kind(Kind), Data(Data) {} - -Error ModuleDebugFragmentRecord::initialize(BinaryStreamRef Stream, - ModuleDebugFragmentRecord &Info) { - const ModuleDebugFragmentHeader *Header; - BinaryStreamReader Reader(Stream); - if (auto EC = Reader.readObject(Header)) - return EC; - - ModuleDebugFragmentKind Kind = - static_cast(uint32_t(Header->Kind)); - switch (Kind) { - case ModuleDebugFragmentKind::FileChecksums: - case ModuleDebugFragmentKind::Lines: - case ModuleDebugFragmentKind::InlineeLines: - break; - default: - llvm_unreachable("Unexpected debug fragment kind!"); - } - if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) - return EC; - Info.Kind = Kind; - return Error::success(); -} - -uint32_t ModuleDebugFragmentRecord::getRecordLength() const { - uint32_t Result = sizeof(ModuleDebugFragmentHeader) + Data.getLength(); - assert(Result % 4 == 0); - return Result; -} - -ModuleDebugFragmentKind ModuleDebugFragmentRecord::kind() const { return Kind; } - -BinaryStreamRef ModuleDebugFragmentRecord::getRecordData() const { - return Data; -} - -ModuleDebugFragmentRecordBuilder::ModuleDebugFragmentRecordBuilder( - ModuleDebugFragmentKind Kind, ModuleDebugFragment &Frag) - : Kind(Kind), Frag(Frag) {} - -uint32_t ModuleDebugFragmentRecordBuilder::calculateSerializedLength() { - uint32_t Size = sizeof(ModuleDebugFragmentHeader) + - alignTo(Frag.calculateSerializedLength(), 4); - return Size; -} - -Error ModuleDebugFragmentRecordBuilder::commit(BinaryStreamWriter &Writer) { - ModuleDebugFragmentHeader Header; - Header.Kind = uint32_t(Kind); - Header.Length = - calculateSerializedLength() - sizeof(ModuleDebugFragmentHeader); - - if (auto EC = Writer.writeObject(Header)) - return EC; - if (auto EC = Frag.commit(Writer)) - return EC; - if (auto EC = Writer.padToAlignment(4)) - return EC; - - return Error::success(); -} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp deleted file mode 100644 index dc591f3990e27..0000000000000 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===- ModuleDebugFragmentVisitor.cpp ---------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h" - -#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamRef.h" - -using namespace llvm; -using namespace llvm::codeview; - -Error llvm::codeview::visitModuleDebugFragment( - const ModuleDebugFragmentRecord &R, ModuleDebugFragmentVisitor &V) { - BinaryStreamReader Reader(R.getRecordData()); - switch (R.kind()) { - case ModuleDebugFragmentKind::Lines: { - ModuleDebugLineFragmentRef Fragment; - if (auto EC = Fragment.initialize(Reader)) - return EC; - - return V.visitLines(Fragment); - } - case ModuleDebugFragmentKind::FileChecksums: { - ModuleDebugFileChecksumFragmentRef Fragment; - if (auto EC = Fragment.initialize(Reader)) - return EC; - - return V.visitFileChecksums(Fragment); - } - case ModuleDebugFragmentKind::InlineeLines: { - ModuleDebugInlineeLineFragmentRef Fragment; - if (auto EC = Fragment.initialize(Reader)) - return EC; - return V.visitInlineeLines(Fragment); - } - default: { - ModuleDebugUnknownFragmentRef Fragment(R.kind(), R.getRecordData()); - return V.visitUnknown(Fragment); - } - } -} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp deleted file mode 100644 index 4cb9acbe07d9d..0000000000000 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp +++ /dev/null @@ -1,91 +0,0 @@ -//===- RandomAccessTypeVisitor.cpp ---------------------------- *- C++ --*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h" - -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeServerHandler.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" - -using namespace llvm; -using namespace llvm::codeview; - -RandomAccessTypeVisitor::RandomAccessTypeVisitor( - const CVTypeArray &Types, uint32_t NumRecords, - PartialOffsetArray PartialOffsets) - : Database(NumRecords), Types(Types), DatabaseVisitor(Database), - InternalVisitor(Pipeline), PartialOffsets(PartialOffsets) { - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(DatabaseVisitor); - - KnownOffsets.resize(Database.capacity()); -} - -Error RandomAccessTypeVisitor::visitTypeIndex(TypeIndex TI, - TypeVisitorCallbacks &Callbacks) { - assert(TI.toArrayIndex() < Database.capacity()); - - if (!Database.contains(TI)) { - if (auto EC = visitRangeForType(TI)) - return EC; - } - - assert(Database.contains(TI)); - auto &Record = Database.getTypeRecord(TI); - CVTypeVisitor V(Callbacks); - return V.visitTypeRecord(Record, TI); -} - -Error RandomAccessTypeVisitor::visitRangeForType(TypeIndex TI) { - if (PartialOffsets.empty()) { - TypeIndex TIB(TypeIndex::FirstNonSimpleIndex); - TypeIndex TIE = TIB + Database.capacity(); - return visitRange(TIB, 0, TIE); - } - - auto Next = std::upper_bound(PartialOffsets.begin(), PartialOffsets.end(), TI, - [](TypeIndex Value, const TypeIndexOffset &IO) { - return Value < IO.Type; - }); - - assert(Next != PartialOffsets.begin()); - auto Prev = std::prev(Next); - - TypeIndex TIB = Prev->Type; - TypeIndex TIE; - if (Next == PartialOffsets.end()) { - TIE = TypeIndex::fromArrayIndex(Database.capacity()); - } else { - TIE = Next->Type; - } - - if (auto EC = visitRange(TIB, Prev->Offset, TIE)) - return EC; - return Error::success(); -} - -Error RandomAccessTypeVisitor::visitRange(TypeIndex Begin, uint32_t BeginOffset, - TypeIndex End) { - - auto RI = Types.at(BeginOffset); - assert(RI != Types.end()); - - while (Begin != End) { - assert(!Database.contains(Begin)); - if (auto EC = InternalVisitor.visitTypeRecord(*RI, Begin)) - return EC; - KnownOffsets[Begin.toArrayIndex()] = BeginOffset; - - BeginOffset += RI.getRecordLength(); - ++Begin; - ++RI; - } - - return Error::success(); -} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/StringTable.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/StringTable.cpp deleted file mode 100644 index 21f11204686b4..0000000000000 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/StringTable.cpp +++ /dev/null @@ -1,71 +0,0 @@ -//===- StringTable.cpp - CodeView String Table Reader/Writer ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/StringTable.h" - -#include "llvm/Support/BinaryStream.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamWriter.h" - -using namespace llvm; -using namespace llvm::codeview; - -StringTableRef::StringTableRef() {} - -Error StringTableRef::initialize(BinaryStreamRef Contents) { - Stream = Contents; - return Error::success(); -} - -Expected StringTableRef::getString(uint32_t Offset) const { - BinaryStreamReader Reader(Stream); - Reader.setOffset(Offset); - StringRef Result; - if (auto EC = Reader.readCString(Result)) - return std::move(EC); - return Result; -} - -uint32_t StringTable::insert(StringRef S) { - auto P = Strings.insert({S, StringSize}); - - // If a given string didn't exist in the string table, we want to increment - // the string table size. - if (P.second) - StringSize += S.size() + 1; // +1 for '\0' - return P.first->second; -} - -uint32_t StringTable::calculateSerializedSize() const { return StringSize; } - -Error StringTable::commit(BinaryStreamWriter &Writer) const { - assert(Writer.bytesRemaining() == StringSize); - uint32_t MaxOffset = 1; - - for (auto &Pair : Strings) { - StringRef S = Pair.getKey(); - uint32_t Offset = Pair.getValue(); - Writer.setOffset(Offset); - if (auto EC = Writer.writeCString(S)) - return EC; - MaxOffset = std::max(MaxOffset, Offset + S.size() + 1); - } - - Writer.setOffset(MaxOffset); - assert(Writer.bytesRemaining() == 0); - return Error::success(); -} - -uint32_t StringTable::size() const { return Strings.size(); } - -uint32_t StringTable::getStringId(StringRef S) const { - auto P = Strings.find(S); - assert(P != Strings.end()); - return P->second; -} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/StringsAndChecksums.cpp new file mode 100644 index 0000000000000..306af1d1ef6bc --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/StringsAndChecksums.cpp @@ -0,0 +1,59 @@ +//===- StringsAndChecksums.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/Support/Error.h" +#include + +using namespace llvm; +using namespace llvm::codeview; + +StringsAndChecksumsRef::StringsAndChecksumsRef() = default; + +StringsAndChecksumsRef::StringsAndChecksumsRef( + const DebugStringTableSubsectionRef &Strings) + : Strings(&Strings) {} + +StringsAndChecksumsRef::StringsAndChecksumsRef( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) + : Strings(&Strings), Checksums(&Checksums) {} + +void StringsAndChecksumsRef::initializeStrings( + const DebugSubsectionRecord &SR) { + assert(SR.kind() == DebugSubsectionKind::StringTable); + assert(!Strings && "Found a string table even though we already have one!"); + + OwnedStrings = llvm::make_unique(); + consumeError(OwnedStrings->initialize(SR.getRecordData())); + Strings = OwnedStrings.get(); +} + +void StringsAndChecksumsRef::setChecksums( + const DebugChecksumsSubsectionRef &CS) { + OwnedChecksums = llvm::make_unique(); + *OwnedChecksums = CS; + Checksums = OwnedChecksums.get(); +} + +void StringsAndChecksumsRef::initializeChecksums( + const DebugSubsectionRecord &FCR) { + assert(FCR.kind() == DebugSubsectionKind::FileChecksums); + if (Checksums) + return; + + OwnedChecksums = llvm::make_unique(); + consumeError(OwnedChecksums->initialize(FCR.getRecordData())); + Checksums = OwnedChecksums.get(); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolDumper.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolDumper.cpp index 5395e4349b28d..62e73acc72d6d 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -11,9 +11,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" -#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -33,16 +32,16 @@ namespace { /// the visitor out of SymbolDumper.h. class CVSymbolDumperImpl : public SymbolVisitorCallbacks { public: - CVSymbolDumperImpl(TypeDatabase &TypeDB, SymbolDumpDelegate *ObjDelegate, + CVSymbolDumperImpl(TypeCollection &Types, SymbolDumpDelegate *ObjDelegate, ScopedPrinter &W, bool PrintRecordBytes) - : TypeDB(TypeDB), ObjDelegate(ObjDelegate), W(W), + : Types(Types), ObjDelegate(ObjDelegate), W(W), PrintRecordBytes(PrintRecordBytes), InFunctionScope(false) {} /// CVSymbolVisitor overrides. #define SYMBOL_RECORD(EnumName, EnumVal, Name) \ Error visitKnownRecord(CVSymbol &CVR, Name &Record) override; #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" Error visitSymbolBegin(CVSymbol &Record) override; Error visitSymbolEnd(CVSymbol &Record) override; @@ -54,7 +53,7 @@ class CVSymbolDumperImpl : public SymbolVisitorCallbacks { void printLocalVariableAddrGap(ArrayRef Gaps); void printTypeIndex(StringRef FieldName, TypeIndex TI); - TypeDatabase &TypeDB; + TypeCollection &Types; SymbolDumpDelegate *ObjDelegate; ScopedPrinter &W; @@ -63,6 +62,18 @@ class CVSymbolDumperImpl : public SymbolVisitorCallbacks { }; } +static StringRef getSymbolKindName(SymbolKind Kind) { + switch (Kind) { +#define SYMBOL_RECORD(EnumName, EnumVal, Name) \ + case EnumName: \ + return #Name; +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + break; + } + return "UnknownSym"; +} + void CVSymbolDumperImpl::printLocalVariableAddrRange( const LocalVariableAddrRange &Range, uint32_t RelocationOffset) { DictScope S(W, "LocalVariableAddrRange"); @@ -83,22 +94,27 @@ void CVSymbolDumperImpl::printLocalVariableAddrGap( } void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) { - CVTypeDumper::printTypeIndex(W, FieldName, TI, TypeDB); + codeview::printTypeIndex(W, FieldName, TI, Types); } Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) { + W.startLine() << getSymbolKindName(CVR.Type); + W.getOStream() << " {\n"; + W.indent(); + W.printEnum("Kind", unsigned(CVR.Type), getSymbolTypeNames()); return Error::success(); } Error CVSymbolDumperImpl::visitSymbolEnd(CVSymbol &CVR) { if (PrintRecordBytes && ObjDelegate) ObjDelegate->printBinaryBlockWithRelocs("SymData", CVR.content()); + + W.unindent(); + W.startLine() << "}\n"; return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) { - DictScope S(W, "BlockStart"); - StringRef LinkageName; W.printHex("PtrParent", Block.Parent); W.printHex("PtrEnd", Block.End); @@ -114,7 +130,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) { - DictScope S(W, "Thunk32"); W.printNumber("Parent", Thunk.Parent); W.printNumber("End", Thunk.End); W.printNumber("Next", Thunk.Next); @@ -127,7 +142,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, TrampolineSym &Tramp) { - DictScope S(W, "Trampoline"); W.printEnum("Type", uint16_t(Tramp.Type), getTrampolineNames()); W.printNumber("Size", Tramp.Size); W.printNumber("ThunkOff", Tramp.ThunkOffset); @@ -138,7 +152,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) { - DictScope S(W, "Section"); W.printNumber("SectionNumber", Section.SectionNumber); W.printNumber("Alignment", Section.Alignment); W.printNumber("Rva", Section.Rva); @@ -153,7 +166,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CoffGroupSym &CoffGroup) { - DictScope S(W, "COFF Group"); W.printNumber("Size", CoffGroup.Size); W.printFlags("Characteristics", CoffGroup.Characteristics, getImageSectionCharacteristicNames(), @@ -166,8 +178,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BPRelativeSym &BPRel) { - DictScope S(W, "BPRelativeSym"); - W.printNumber("Offset", BPRel.Offset); printTypeIndex("Type", BPRel.Type); W.printString("VarName", BPRel.Name); @@ -176,16 +186,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BuildInfoSym &BuildInfo) { - DictScope S(W, "BuildInfo"); - - W.printNumber("BuildId", BuildInfo.BuildId); + printTypeIndex("BuildId", BuildInfo.BuildId); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallSiteInfoSym &CallSiteInfo) { - DictScope S(W, "CallSiteInfo"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -201,8 +207,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, EnvBlockSym &EnvBlock) { - DictScope S(W, "EnvBlock"); - ListScope L(W, "Entries"); for (auto Entry : EnvBlock.Fields) { W.printString(Entry); @@ -212,8 +216,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FileStaticSym &FileStatic) { - DictScope S(W, "FileStatic"); - W.printNumber("Index", FileStatic.Index); + printTypeIndex("Index", FileStatic.Index); W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset); W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames()); W.printString("Name", FileStatic.Name); @@ -221,7 +224,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) { - DictScope S(W, "Export"); W.printNumber("Ordinal", Export.Ordinal); W.printFlags("Flags", uint16_t(Export.Flags), getExportSymFlagNames()); W.printString("Name", Export.Name); @@ -230,8 +232,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Compile2Sym &Compile2) { - DictScope S(W, "CompilerFlags2"); - W.printEnum("Language", Compile2.getLanguage(), getSourceLanguageNames()); W.printFlags("Flags", Compile2.getFlags(), getCompileSym2FlagNames()); W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames()); @@ -255,8 +255,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Compile3Sym &Compile3) { - DictScope S(W, "CompilerFlags3"); - W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames()); W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames()); W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames()); @@ -282,8 +280,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ConstantSym &Constant) { - DictScope S(W, "Constant"); - printTypeIndex("Type", Constant.Type); W.printNumber("Value", Constant.Value); W.printString("Name", Constant.Name); @@ -291,9 +287,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { - DictScope S(W, "DataSym"); - - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(), @@ -309,15 +302,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) { - DictScope S(W, "DefRangeFramePointerRelFullScope"); W.printNumber("Offset", DefRangeFramePointerRelFullScope.Offset); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { - DictScope S(W, "DefRangeFramePointerRel"); - W.printNumber("Offset", DefRangeFramePointerRel.Offset); printLocalVariableAddrRange(DefRangeFramePointerRel.Range, DefRangeFramePointerRel.getRelocationOffset()); @@ -327,8 +317,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) { - DictScope S(W, "DefRangeRegisterRel"); - W.printNumber("BaseRegister", DefRangeRegisterRel.Hdr.Register); W.printBoolean("HasSpilledUDTMember", DefRangeRegisterRel.hasSpilledUDTMember()); @@ -342,8 +330,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) { - DictScope S(W, "DefRangeRegister"); - W.printNumber("Register", DefRangeRegister.Hdr.Register); W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName); printLocalVariableAddrRange(DefRangeRegister.Range, @@ -354,8 +340,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) { - DictScope S(W, "DefRangeSubfieldRegister"); - W.printNumber("Register", DefRangeSubfieldRegister.Hdr.Register); W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName); W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent); @@ -367,10 +351,8 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeSubfieldSym &DefRangeSubfield) { - DictScope S(W, "DefRangeSubfield"); - if (ObjDelegate) { - StringTableRef Strings = ObjDelegate->getStringTable(); + DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); if (!ExpectedProgram) { consumeError(ExpectedProgram.takeError()); @@ -388,10 +370,8 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DefRangeSym &DefRange) { - DictScope S(W, "DefRange"); - if (ObjDelegate) { - StringTableRef Strings = ObjDelegate->getStringTable(); + DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); auto ExpectedProgram = Strings.getString(DefRange.Program); if (!ExpectedProgram) { consumeError(ExpectedProgram.takeError()); @@ -407,8 +387,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FrameCookie) { - DictScope S(W, "FrameCookie"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -424,8 +402,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FrameProc) { - DictScope S(W, "FrameProc"); - W.printHex("TotalFrameBytes", FrameProc.TotalFrameBytes); W.printHex("PaddingFrameBytes", FrameProc.PaddingFrameBytes); W.printHex("OffsetToPadding", FrameProc.OffsetToPadding); @@ -441,8 +417,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, HeapAllocationSiteSym &HeapAllocSite) { - DictScope S(W, "HeapAllocationSite"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -459,8 +433,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &InlineSite) { - DictScope S(W, "InlineSite"); - W.printHex("PtrParent", InlineSite.Parent); W.printHex("PtrEnd", InlineSite.End); printTypeIndex("Inlinee", InlineSite.Inlinee); @@ -516,16 +488,14 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegisterSym &Register) { - DictScope S(W, "RegisterSym"); - W.printNumber("Type", Register.Index); + printTypeIndex("Type", Register.Index); W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames()); W.printString("Name", Register.Name); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { - DictScope S(W, "PublicSym"); - W.printNumber("Type", Public.Index); + W.printFlags("Flags", uint32_t(Public.Flags), getPublicSymFlagNames()); W.printNumber("Seg", Public.Segment); W.printNumber("Off", Public.Offset); W.printString("Name", Public.Name); @@ -533,7 +503,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) { - DictScope S(W, "ProcRef"); W.printNumber("SumName", ProcRef.SumName); W.printNumber("SymOffset", ProcRef.SymOffset); W.printNumber("Mod", ProcRef.Module); @@ -542,8 +511,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) { - DictScope S(W, "Label"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", Label.getRelocationOffset(), @@ -559,8 +526,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) { - DictScope S(W, "Local"); - printTypeIndex("Type", Local.Type); W.printFlags("Flags", uint16_t(Local.Flags), getLocalFlagNames()); W.printString("VarName", Local.Name); @@ -568,16 +533,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ObjNameSym &ObjName) { - DictScope S(W, "ObjectName"); - W.printHex("Signature", ObjName.Signature); W.printString("ObjectName", ObjName.Name); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { - DictScope S(W, "ProcStart"); - if (InFunctionScope) return llvm::make_error( "Visiting a ProcSym while inside function scope!"); @@ -585,7 +546,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { InFunctionScope = true; StringRef LinkageName; - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); W.printHex("PtrParent", Proc.Parent); W.printHex("PtrEnd", Proc.End); W.printHex("PtrNext", Proc.Next); @@ -608,13 +568,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ScopeEndSym &ScopeEnd) { - if (CVR.kind() == SymbolKind::S_END) - DictScope S(W, "BlockEnd"); - else if (CVR.kind() == SymbolKind::S_PROC_ID_END) - DictScope S(W, "ProcEnd"); - else if (CVR.kind() == SymbolKind::S_INLINESITE_END) - DictScope S(W, "InlineSiteEnd"); - InFunctionScope = false; return Error::success(); } @@ -628,19 +581,15 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegRelativeSym &RegRel) { - DictScope S(W, "RegRelativeSym"); - W.printHex("Offset", RegRel.Offset); printTypeIndex("Type", RegRel.Type); - W.printHex("Register", RegRel.Register); + W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames()); W.printString("VarName", RegRel.Name); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ThreadLocalDataSym &Data) { - DictScope S(W, "ThreadLocalDataSym"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(), @@ -654,23 +603,20 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) { - DictScope S(W, "UDT"); printTypeIndex("Type", UDT.Type); W.printString("UDTName", UDT.Name); return Error::success(); } Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) { - DictScope S(W, "UnknownSym"); - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); W.printNumber("Length", CVR.length()); return Error::success(); } Error CVSymbolDumper::dump(CVRecord &Record) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); - CVSymbolDumperImpl Dumper(TypeDB, ObjDelegate.get(), W, PrintRecordBytes); + SymbolDeserializer Deserializer(ObjDelegate.get(), Container); + CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); Pipeline.addCallbackToPipeline(Dumper); @@ -680,8 +626,8 @@ Error CVSymbolDumper::dump(CVRecord &Record) { Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); - CVSymbolDumperImpl Dumper(TypeDB, ObjDelegate.get(), W, PrintRecordBytes); + SymbolDeserializer Deserializer(ObjDelegate.get(), Container); + CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); Pipeline.addCallbackToPipeline(Dumper); diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index bb17314654951..923837a45d9fc 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -40,6 +40,7 @@ Error SymbolRecordMapping::visitSymbolBegin(CVSymbol &Record) { } Error SymbolRecordMapping::visitSymbolEnd(CVSymbol &Record) { + error(IO.padToAlignment(alignOf(Container))); error(IO.endRecord()); return Error::success(); } @@ -306,7 +307,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, error(IO.mapInteger(FrameCookie.CodeOffset)); error(IO.mapInteger(FrameCookie.Register)); - error(IO.mapInteger(FrameCookie.CookieKind)); + error(IO.mapEnum(FrameCookie.CookieKind)); error(IO.mapInteger(FrameCookie.Flags)); return Error::success(); @@ -360,7 +361,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { - error(IO.mapInteger(Public.Index)); + error(IO.mapEnum(Public.Flags)); error(IO.mapInteger(Public.Offset)); error(IO.mapInteger(Public.Segment)); error(IO.mapStringZ(Public.Name)); @@ -438,7 +439,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, error(IO.mapInteger(RegRel.Offset)); error(IO.mapInteger(RegRel.Type)); - error(IO.mapInteger(RegRel.Register)); + error(IO.mapEnum(RegRel.Register)); error(IO.mapStringZ(RegRel.Name)); return Error::success(); diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolSerializer.cpp index 251cc431f52b3..9a2e776feb756 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolSerializer.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/SymbolSerializer.cpp @@ -1,4 +1,4 @@ -//===- SymbolSerializer.cpp -------------------------------------*- C++ -*-===// +//===- SymbolSerializer.cpp -----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,13 +8,22 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include using namespace llvm; using namespace llvm::codeview; -SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator) - : Storage(Allocator), RecordBuffer(MaxRecordLength), Stream(RecordBuffer, llvm::support::little), - Writer(Stream), Mapping(Writer) { } +SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator, + CodeViewContainer Container) + : Storage(Allocator), RecordBuffer(MaxRecordLength), + Stream(RecordBuffer, support::little), Writer(Stream), + Mapping(Writer, Container) {} Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) { assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!"); diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDatabase.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDatabase.cpp deleted file mode 100644 index 7924440e5e29b..0000000000000 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDatabase.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===- TypeDatabase.cpp --------------------------------------- *- C++ --*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" - -using namespace llvm; -using namespace llvm::codeview; - -namespace { -struct SimpleTypeEntry { - StringRef Name; - SimpleTypeKind Kind; -}; -} - -/// The names here all end in "*". If the simple type is a pointer type, we -/// return the whole name. Otherwise we lop off the last character in our -/// StringRef. -static const SimpleTypeEntry SimpleTypeNames[] = { - {"void*", SimpleTypeKind::Void}, - {"*", SimpleTypeKind::NotTranslated}, - {"HRESULT*", SimpleTypeKind::HResult}, - {"signed char*", SimpleTypeKind::SignedCharacter}, - {"unsigned char*", SimpleTypeKind::UnsignedCharacter}, - {"char*", SimpleTypeKind::NarrowCharacter}, - {"wchar_t*", SimpleTypeKind::WideCharacter}, - {"char16_t*", SimpleTypeKind::Character16}, - {"char32_t*", SimpleTypeKind::Character32}, - {"__int8*", SimpleTypeKind::SByte}, - {"unsigned __int8*", SimpleTypeKind::Byte}, - {"short*", SimpleTypeKind::Int16Short}, - {"unsigned short*", SimpleTypeKind::UInt16Short}, - {"__int16*", SimpleTypeKind::Int16}, - {"unsigned __int16*", SimpleTypeKind::UInt16}, - {"long*", SimpleTypeKind::Int32Long}, - {"unsigned long*", SimpleTypeKind::UInt32Long}, - {"int*", SimpleTypeKind::Int32}, - {"unsigned*", SimpleTypeKind::UInt32}, - {"__int64*", SimpleTypeKind::Int64Quad}, - {"unsigned __int64*", SimpleTypeKind::UInt64Quad}, - {"__int64*", SimpleTypeKind::Int64}, - {"unsigned __int64*", SimpleTypeKind::UInt64}, - {"__int128*", SimpleTypeKind::Int128}, - {"unsigned __int128*", SimpleTypeKind::UInt128}, - {"__half*", SimpleTypeKind::Float16}, - {"float*", SimpleTypeKind::Float32}, - {"float*", SimpleTypeKind::Float32PartialPrecision}, - {"__float48*", SimpleTypeKind::Float48}, - {"double*", SimpleTypeKind::Float64}, - {"long double*", SimpleTypeKind::Float80}, - {"__float128*", SimpleTypeKind::Float128}, - {"_Complex float*", SimpleTypeKind::Complex32}, - {"_Complex double*", SimpleTypeKind::Complex64}, - {"_Complex long double*", SimpleTypeKind::Complex80}, - {"_Complex __float128*", SimpleTypeKind::Complex128}, - {"bool*", SimpleTypeKind::Boolean8}, - {"__bool16*", SimpleTypeKind::Boolean16}, - {"__bool32*", SimpleTypeKind::Boolean32}, - {"__bool64*", SimpleTypeKind::Boolean64}, -}; - -TypeDatabase::TypeDatabase(uint32_t Capacity) : TypeNameStorage(Allocator) { - CVUDTNames.resize(Capacity); - TypeRecords.resize(Capacity); - ValidRecords.resize(Capacity); -} - -TypeIndex TypeDatabase::appendType(StringRef Name, const CVType &Data) { - TypeIndex TI; - TI = getAppendIndex(); - if (TI.toArrayIndex() >= capacity()) - grow(); - recordType(Name, TI, Data); - return TI; -} - -void TypeDatabase::recordType(StringRef Name, TypeIndex Index, - const CVType &Data) { - uint32_t AI = Index.toArrayIndex(); - - assert(!contains(Index)); - assert(AI < capacity()); - - CVUDTNames[AI] = Name; - TypeRecords[AI] = Data; - ValidRecords.set(AI); - ++Count; -} - -/// Saves the name in a StringSet and creates a stable StringRef. -StringRef TypeDatabase::saveTypeName(StringRef TypeName) { - return TypeNameStorage.save(TypeName); -} - -StringRef TypeDatabase::getTypeName(TypeIndex Index) const { - if (Index.isNoneType()) - return ""; - - if (Index.isSimple()) { - // This is a simple type. - for (const auto &SimpleTypeName : SimpleTypeNames) { - if (SimpleTypeName.Kind == Index.getSimpleKind()) { - if (Index.getSimpleMode() == SimpleTypeMode::Direct) - return SimpleTypeName.Name.drop_back(1); - // Otherwise, this is a pointer type. We gloss over the distinction - // between near, far, 64, 32, etc, and just give a pointer type. - return SimpleTypeName.Name; - } - } - return ""; - } - - if (contains(Index)) - return CVUDTNames[Index.toArrayIndex()]; - - return ""; -} - -const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const { - assert(contains(Index)); - return TypeRecords[Index.toArrayIndex()]; -} - -CVType &TypeDatabase::getTypeRecord(TypeIndex Index) { - assert(contains(Index)); - return TypeRecords[Index.toArrayIndex()]; -} - -bool TypeDatabase::contains(TypeIndex Index) const { - uint32_t AI = Index.toArrayIndex(); - if (AI >= capacity()) - return false; - - return ValidRecords.test(AI); -} - -uint32_t TypeDatabase::size() const { return Count; } - -uint32_t TypeDatabase::capacity() const { return TypeRecords.size(); } - -void TypeDatabase::grow() { - TypeRecords.emplace_back(); - CVUDTNames.emplace_back(); - ValidRecords.resize(ValidRecords.size() + 1); -} - -bool TypeDatabase::empty() const { return size() == 0; } - -TypeIndex TypeDatabase::getAppendIndex() const { - if (empty()) - return TypeIndex::fromArrayIndex(0); - - int Index = ValidRecords.find_last(); - assert(Index != -1); - return TypeIndex::fromArrayIndex(Index) + 1; -} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp deleted file mode 100644 index 8d97f8b1cb401..0000000000000 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp +++ /dev/null @@ -1,330 +0,0 @@ -//===- TypeDatabaseVisitor.cpp -------------------------------- *- C++ --*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" - -#include "llvm/ADT/SmallString.h" - -using namespace llvm; - -using namespace llvm::codeview; - -Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record) { - assert(!IsInFieldList); - // Reset Name to the empty string. If the visitor sets it, we know it. - Name = ""; - - if (Record.Type == LF_FIELDLIST) { - // Record that we're in a field list so that members do not get assigned - // type indices. - IsInFieldList = true; - } - return Error::success(); -} - -Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { - if (auto EC = visitTypeBegin(Record)) - return EC; - - CurrentTypeIndex = Index; - return Error::success(); -} - -StringRef TypeDatabaseVisitor::getTypeName(TypeIndex Index) const { - return TypeDB->getTypeName(Index); -} - -StringRef TypeDatabaseVisitor::saveTypeName(StringRef Name) { - return TypeDB->saveTypeName(Name); -} - -Error TypeDatabaseVisitor::visitTypeEnd(CVType &CVR) { - if (CVR.Type == LF_FIELDLIST) { - assert(IsInFieldList); - IsInFieldList = false; - } - assert(!IsInFieldList); - - // Record every type that is not a field list member, even if Name is empty. - // CVUDTNames is indexed by type index, and must have one entry for every - // type. Field list members are not recorded, and are only referenced by - // their containing field list record. - if (CurrentTypeIndex) - TypeDB->recordType(Name, *CurrentTypeIndex, CVR); - else - TypeDB->appendType(Name, CVR); - - CurrentTypeIndex.reset(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitMemberBegin(CVMemberRecord &Record) { - assert(IsInFieldList); - // Reset Name to the empty string. If the visitor sets it, we know it. - Name = ""; - return Error::success(); -} - -Error TypeDatabaseVisitor::visitMemberEnd(CVMemberRecord &Record) { - assert(IsInFieldList); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - FieldListRecord &FieldList) { - Name = ""; - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVRecord &CVR, - StringIdRecord &String) { - // Put this in the database so it gets printed with LF_UDT_SRC_LINE. - Name = String.getString(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { - auto Indices = Args.getIndices(); - uint32_t Size = Indices.size(); - SmallString<256> TypeName("("); - for (uint32_t I = 0; I < Size; ++I) { - StringRef ArgTypeName = getTypeName(Indices[I]); - TypeName.append(ArgTypeName); - if (I + 1 != Size) - TypeName.append(", "); - } - TypeName.push_back(')'); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - StringListRecord &Strings) { - auto Indices = Strings.getIndices(); - uint32_t Size = Indices.size(); - SmallString<256> TypeName("\""); - for (uint32_t I = 0; I < Size; ++I) { - StringRef ArgTypeName = getTypeName(Indices[I]); - TypeName.append(ArgTypeName); - if (I + 1 != Size) - TypeName.append("\" \""); - } - TypeName.push_back('\"'); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ClassRecord &Class) { - Name = Class.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, UnionRecord &Union) { - Name = Union.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, EnumRecord &Enum) { - Name = Enum.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArrayRecord &AT) { - Name = AT.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) { - Name = VFT.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - MemberFuncIdRecord &Id) { - Name = Id.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - ProcedureRecord &Proc) { - StringRef ReturnTypeName = getTypeName(Proc.getReturnType()); - StringRef ArgListTypeName = getTypeName(Proc.getArgumentList()); - SmallString<256> TypeName(ReturnTypeName); - TypeName.push_back(' '); - TypeName.append(ArgListTypeName); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - MemberFunctionRecord &MF) { - StringRef ReturnTypeName = getTypeName(MF.getReturnType()); - StringRef ClassTypeName = getTypeName(MF.getClassType()); - StringRef ArgListTypeName = getTypeName(MF.getArgumentList()); - SmallString<256> TypeName(ReturnTypeName); - TypeName.push_back(' '); - TypeName.append(ClassTypeName); - TypeName.append("::"); - TypeName.append(ArgListTypeName); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { - Name = Func.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - TypeServer2Record &TS) { - Name = TS.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { - - if (Ptr.isPointerToMember()) { - const MemberPointerInfo &MI = Ptr.getMemberInfo(); - - StringRef PointeeName = getTypeName(Ptr.getReferentType()); - StringRef ClassName = getTypeName(MI.getContainingType()); - SmallString<256> TypeName(PointeeName); - TypeName.push_back(' '); - TypeName.append(ClassName); - TypeName.append("::*"); - Name = saveTypeName(TypeName); - } else { - SmallString<256> TypeName; - if (Ptr.isConst()) - TypeName.append("const "); - if (Ptr.isVolatile()) - TypeName.append("volatile "); - if (Ptr.isUnaligned()) - TypeName.append("__unaligned "); - - TypeName.append(getTypeName(Ptr.getReferentType())); - - if (Ptr.getMode() == PointerMode::LValueReference) - TypeName.append("&"); - else if (Ptr.getMode() == PointerMode::RValueReference) - TypeName.append("&&"); - else if (Ptr.getMode() == PointerMode::Pointer) - TypeName.append("*"); - - if (!TypeName.empty()) - Name = saveTypeName(TypeName); - } - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { - uint16_t Mods = static_cast(Mod.getModifiers()); - - StringRef ModifiedName = getTypeName(Mod.getModifiedType()); - SmallString<256> TypeName; - if (Mods & uint16_t(ModifierOptions::Const)) - TypeName.append("const "); - if (Mods & uint16_t(ModifierOptions::Volatile)) - TypeName.append("volatile "); - if (Mods & uint16_t(ModifierOptions::Unaligned)) - TypeName.append("__unaligned "); - TypeName.append(ModifiedName); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - VFTableShapeRecord &Shape) { - Name = - saveTypeName(""); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - NestedTypeRecord &Nested) { - Name = Nested.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - OneMethodRecord &Method) { - Name = Method.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - OverloadedMethodRecord &Method) { - Name = Method.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - DataMemberRecord &Field) { - Name = Field.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - StaticDataMemberRecord &Field) { - Name = Field.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - EnumeratorRecord &Enum) { - Name = Enum.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - BaseClassRecord &Base) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - VirtualBaseClassRecord &VBase) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - ListContinuationRecord &Cont) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord( - CVType &CVR, UdtModSourceLineRecord &ModSourceLine) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - UdtSourceLineRecord &SourceLine) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord( - CVType &CVR, MethodOverloadListRecord &Overloads) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, LabelRecord &R) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - VFPtrRecord &VFP) { - return Error::success(); -} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp index 27a6e09878861..e18a35ca1f389 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp @@ -10,15 +10,11 @@ #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/ADT/SmallString.h" -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/Formatters.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ScopedPrinter.h" @@ -28,7 +24,7 @@ using namespace llvm::codeview; static const EnumEntry LeafTypeNames[] = { #define CV_TYPE(enum, val) {#enum, enum}, -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" }; #define ENUM_ENTRY(enum_class, enum) \ @@ -157,7 +153,7 @@ static StringRef getLeafTypeName(TypeLeafKind LT) { #define TYPE_RECORD(ename, value, name) \ case ename: \ return #name; -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" default: break; } @@ -165,16 +161,15 @@ static StringRef getLeafTypeName(TypeLeafKind LT) { } void TypeDumpVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI) const { - CVTypeDumper::printTypeIndex(*W, FieldName, TI, TypeDB); + codeview::printTypeIndex(*W, FieldName, TI, TpiTypes); } void TypeDumpVisitor::printItemIndex(StringRef FieldName, TypeIndex TI) const { - CVTypeDumper::printTypeIndex(*W, FieldName, TI, getSourceDB()); + codeview::printTypeIndex(*W, FieldName, TI, getSourceTypes()); } Error TypeDumpVisitor::visitTypeBegin(CVType &Record) { - TypeIndex TI = getSourceDB().getAppendIndex(); - return visitTypeBegin(Record, TI); + return visitTypeBegin(Record, TypeIndex::fromArrayIndex(TpiTypes.size())); } Error TypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { @@ -216,8 +211,7 @@ Error TypeDumpVisitor::visitMemberEnd(CVMemberRecord &Record) { Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, FieldListRecord &FieldList) { - CVTypeVisitor Visitor(*this); - if (auto EC = Visitor.visitFieldListMemberStream(FieldList.Data)) + if (auto EC = codeview::visitMemberRecordStream(FieldList.Data, *this)) return EC; return Error::success(); @@ -246,7 +240,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, StringListRecord &Strs) { W->printNumber("NumStrings", Size); ListScope Arguments(*W, "Strings"); for (uint32_t I = 0; I < Size; ++I) { - printTypeIndex("String", Indices[I]); + printItemIndex("String", Indices[I]); } return Error::success(); } @@ -360,7 +354,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { } Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) { - W->printString("Guid", formatv("{0}", fmt_guid(TS.getGuid())).str()); + W->printString("Guid", formatv("{0}", TS.getGuid()).str()); W->printNumber("Age", TS.getAge()); W->printString("Name", TS.getName()); return Error::success(); diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeIndex.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeIndex.cpp new file mode 100644 index 0000000000000..24fe5fcb28d4f --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeIndex.cpp @@ -0,0 +1,104 @@ +//===-- TypeIndex.cpp - CodeView type index ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/Support/ScopedPrinter.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +struct SimpleTypeEntry { + StringRef Name; + SimpleTypeKind Kind; +}; + +/// The names here all end in "*". If the simple type is a pointer type, we +/// return the whole name. Otherwise we lop off the last character in our +/// StringRef. +static const SimpleTypeEntry SimpleTypeNames[] = { + {"void*", SimpleTypeKind::Void}, + {"*", SimpleTypeKind::NotTranslated}, + {"HRESULT*", SimpleTypeKind::HResult}, + {"signed char*", SimpleTypeKind::SignedCharacter}, + {"unsigned char*", SimpleTypeKind::UnsignedCharacter}, + {"char*", SimpleTypeKind::NarrowCharacter}, + {"wchar_t*", SimpleTypeKind::WideCharacter}, + {"char16_t*", SimpleTypeKind::Character16}, + {"char32_t*", SimpleTypeKind::Character32}, + {"__int8*", SimpleTypeKind::SByte}, + {"unsigned __int8*", SimpleTypeKind::Byte}, + {"short*", SimpleTypeKind::Int16Short}, + {"unsigned short*", SimpleTypeKind::UInt16Short}, + {"__int16*", SimpleTypeKind::Int16}, + {"unsigned __int16*", SimpleTypeKind::UInt16}, + {"long*", SimpleTypeKind::Int32Long}, + {"unsigned long*", SimpleTypeKind::UInt32Long}, + {"int*", SimpleTypeKind::Int32}, + {"unsigned*", SimpleTypeKind::UInt32}, + {"__int64*", SimpleTypeKind::Int64Quad}, + {"unsigned __int64*", SimpleTypeKind::UInt64Quad}, + {"__int64*", SimpleTypeKind::Int64}, + {"unsigned __int64*", SimpleTypeKind::UInt64}, + {"__int128*", SimpleTypeKind::Int128}, + {"unsigned __int128*", SimpleTypeKind::UInt128}, + {"__half*", SimpleTypeKind::Float16}, + {"float*", SimpleTypeKind::Float32}, + {"float*", SimpleTypeKind::Float32PartialPrecision}, + {"__float48*", SimpleTypeKind::Float48}, + {"double*", SimpleTypeKind::Float64}, + {"long double*", SimpleTypeKind::Float80}, + {"__float128*", SimpleTypeKind::Float128}, + {"_Complex float*", SimpleTypeKind::Complex32}, + {"_Complex double*", SimpleTypeKind::Complex64}, + {"_Complex long double*", SimpleTypeKind::Complex80}, + {"_Complex __float128*", SimpleTypeKind::Complex128}, + {"bool*", SimpleTypeKind::Boolean8}, + {"__bool16*", SimpleTypeKind::Boolean16}, + {"__bool32*", SimpleTypeKind::Boolean32}, + {"__bool64*", SimpleTypeKind::Boolean64}, +}; +} // namespace + +StringRef TypeIndex::simpleTypeName(TypeIndex TI) { + assert(TI.isNoneType() || TI.isSimple()); + + if (TI.isNoneType()) + return ""; + + // This is a simple type. + for (const auto &SimpleTypeName : SimpleTypeNames) { + if (SimpleTypeName.Kind == TI.getSimpleKind()) { + if (TI.getSimpleMode() == SimpleTypeMode::Direct) + return SimpleTypeName.Name.drop_back(1); + // Otherwise, this is a pointer type. We gloss over the distinction + // between near, far, 64, 32, etc, and just give a pointer type. + return SimpleTypeName.Name; + } + } + return ""; +} + +void llvm::codeview::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, + TypeIndex TI, TypeCollection &Types) { + StringRef TypeName; + if (!TI.isNoneType()) { + if (TI.isSimple()) + TypeName = TypeIndex::simpleTypeName(TI); + else + TypeName = Types.getTypeName(TI); + } + + if (!TypeName.empty()) + Printer.printHex(FieldName, TypeName, TI.getIndex()); + else + Printer.printHex(FieldName, TI.getIndex()); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp new file mode 100644 index 0000000000000..0d935c4472aef --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -0,0 +1,484 @@ +//===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::codeview; + +static inline MethodKind getMethodKind(uint16_t Attrs) { + Attrs &= uint16_t(MethodOptions::MethodKindMask); + Attrs >>= 2; + return MethodKind(Attrs); +} + +static inline bool isIntroVirtual(uint16_t Attrs) { + MethodKind MK = getMethodKind(Attrs); + return MK == MethodKind::IntroducingVirtual || + MK == MethodKind::PureIntroducingVirtual; +} + +static inline PointerMode getPointerMode(uint32_t Attrs) { + return static_cast((Attrs >> PointerRecord::PointerModeShift) & + PointerRecord::PointerModeMask); +} + +static inline bool isMemberPointer(uint32_t Attrs) { + PointerMode Mode = getPointerMode(Attrs); + return Mode == PointerMode::PointerToDataMember || + Mode == PointerMode::PointerToMemberFunction; +} + +static inline uint32_t getEncodedIntegerLength(ArrayRef Data) { + uint16_t N = support::endian::read16le(Data.data()); + if (N < LF_NUMERIC) + return 2; + + assert(N <= LF_UQUADWORD); + + constexpr uint32_t Sizes[] = { + 1, // LF_CHAR + 2, // LF_SHORT + 2, // LF_USHORT + 4, // LF_LONG + 4, // LF_ULONG + 4, // LF_REAL32 + 8, // LF_REAL64 + 10, // LF_REAL80 + 16, // LF_REAL128 + 8, // LF_QUADWORD + 8, // LF_UQUADWORD + }; + + return Sizes[N - LF_NUMERIC]; +} + +static inline uint32_t getCStringLength(ArrayRef Data) { + const char *S = reinterpret_cast(Data.data()); + return strlen(S) + 1; +} + +static void handleMethodOverloadList(ArrayRef Content, + SmallVectorImpl &Refs) { + uint32_t Offset = 0; + + while (!Content.empty()) { + // Array of: + // 0: Attrs + // 2: Padding + // 4: TypeIndex + // if (isIntroVirtual()) + // 8: VFTableOffset + + // At least 8 bytes are guaranteed. 4 extra bytes come iff function is an + // intro virtual. + uint32_t Len = 8; + + uint16_t Attrs = support::endian::read16le(Content.data()); + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + + if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) + Len += 4; + Offset += Len; + Content = Content.drop_front(Len); + } +} + +static uint32_t handleBaseClass(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Encoded Integer + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getEncodedIntegerLength(Data.drop_front(8)); +} + +static uint32_t handleEnumerator(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: Encoded Integer + // : Name + uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4)); + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleDataMember(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Encoded Integer + // : Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8)); + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleOverloadedMethod(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleOneMethod(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Attributes + // 4: Type + // if (isIntroVirtual) + // 8: VFTableOffset + // : Name + uint32_t Size = 8; + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + + uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data()); + if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) + Size += 4; + + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleNestedType(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleStaticDataMember(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleVirtualBaseClass(ArrayRef Data, uint32_t Offset, + bool IsIndirect, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Attrs + // 4: TypeIndex + // 8: TypeIndex + // 12: Encoded Integer + // : Encoded Integer + uint32_t Size = 12; + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2}); + Size += getEncodedIntegerLength(Data.drop_front(Size)); + Size += getEncodedIntegerLength(Data.drop_front(Size)); + return Size; +} + +static uint32_t handleVFPtr(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8; +} + +static uint32_t handleListContinuation(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8; +} + +static void handleFieldList(ArrayRef Content, + SmallVectorImpl &Refs) { + uint32_t Offset = 0; + uint32_t ThisLen = 0; + while (!Content.empty()) { + TypeLeafKind Kind = + static_cast(support::endian::read16le(Content.data())); + switch (Kind) { + case LF_BCLASS: + ThisLen = handleBaseClass(Content, Offset, Refs); + break; + case LF_ENUMERATE: + ThisLen = handleEnumerator(Content, Offset, Refs); + break; + case LF_MEMBER: + ThisLen = handleDataMember(Content, Offset, Refs); + break; + case LF_METHOD: + ThisLen = handleOverloadedMethod(Content, Offset, Refs); + break; + case LF_ONEMETHOD: + ThisLen = handleOneMethod(Content, Offset, Refs); + break; + case LF_NESTTYPE: + ThisLen = handleNestedType(Content, Offset, Refs); + break; + case LF_STMEMBER: + ThisLen = handleStaticDataMember(Content, Offset, Refs); + break; + case LF_VBCLASS: + case LF_IVBCLASS: + ThisLen = + handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs); + break; + case LF_VFUNCTAB: + ThisLen = handleVFPtr(Content, Offset, Refs); + break; + case LF_INDEX: + ThisLen = handleListContinuation(Content, Offset, Refs); + break; + default: + return; + } + Content = Content.drop_front(ThisLen); + Offset += ThisLen; + if (!Content.empty()) { + uint8_t Pad = Content.front(); + if (Pad >= LF_PAD0) { + uint32_t Skip = Pad & 0x0F; + Content = Content.drop_front(Skip); + Offset += Skip; + } + } + } +} + +static void handlePointer(ArrayRef Content, + SmallVectorImpl &Refs) { + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + + uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data()); + if (isMemberPointer(Attrs)) + Refs.push_back({TiRefKind::TypeRef, 8, 1}); +} + +static void discoverTypeIndices(ArrayRef Content, TypeLeafKind Kind, + SmallVectorImpl &Refs) { + uint32_t Count; + // FIXME: In the future it would be nice if we could avoid hardcoding these + // values. One idea is to define some structures representing these types + // that would allow the use of offsetof(). + switch (Kind) { + case TypeLeafKind::LF_FUNC_ID: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); + Refs.push_back({TiRefKind::TypeRef, 4, 1}); + break; + case TypeLeafKind::LF_MFUNC_ID: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_STRING_ID: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); + break; + case TypeLeafKind::LF_SUBSTR_LIST: + Count = support::endian::read32le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::IndexRef, 4, Count}); + break; + case TypeLeafKind::LF_BUILDINFO: + Count = support::endian::read16le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::IndexRef, 2, Count}); + break; + case TypeLeafKind::LF_UDT_SRC_LINE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + Refs.push_back({TiRefKind::IndexRef, 4, 1}); + break; + case TypeLeafKind::LF_UDT_MOD_SRC_LINE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_MODIFIER: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_PROCEDURE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + Refs.push_back({TiRefKind::TypeRef, 8, 1}); + break; + case TypeLeafKind::LF_MFUNCTION: + Refs.push_back({TiRefKind::TypeRef, 0, 3}); + Refs.push_back({TiRefKind::TypeRef, 16, 1}); + break; + case TypeLeafKind::LF_ARGLIST: + Count = support::endian::read32le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::TypeRef, 4, Count}); + break; + case TypeLeafKind::LF_ARRAY: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_CLASS: + case TypeLeafKind::LF_STRUCTURE: + case TypeLeafKind::LF_INTERFACE: + Refs.push_back({TiRefKind::TypeRef, 4, 3}); + break; + case TypeLeafKind::LF_UNION: + Refs.push_back({TiRefKind::TypeRef, 4, 1}); + break; + case TypeLeafKind::LF_ENUM: + Refs.push_back({TiRefKind::TypeRef, 4, 2}); + break; + case TypeLeafKind::LF_BITFIELD: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_VFTABLE: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_VTSHAPE: + break; + case TypeLeafKind::LF_METHODLIST: + handleMethodOverloadList(Content, Refs); + break; + case TypeLeafKind::LF_FIELDLIST: + handleFieldList(Content, Refs); + break; + case TypeLeafKind::LF_POINTER: + handlePointer(Content, Refs); + break; + default: + break; + } +} + +static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, + SmallVectorImpl &Refs) { + uint32_t Count; + // FIXME: In the future it would be nice if we could avoid hardcoding these + // values. One idea is to define some structures representing these types + // that would allow the use of offsetof(). + switch (Kind) { + case SymbolKind::S_GPROC32: + case SymbolKind::S_LPROC32: + case SymbolKind::S_GPROC32_ID: + case SymbolKind::S_LPROC32_ID: + case SymbolKind::S_LPROC32_DPC: + case SymbolKind::S_LPROC32_DPC_ID: + Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID + break; + case SymbolKind::S_UDT: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT + break; + case SymbolKind::S_GDATA32: + case SymbolKind::S_LDATA32: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_BUILDINFO: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags + break; + case SymbolKind::S_LTHREAD32: + case SymbolKind::S_GTHREAD32: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_FILESTATIC: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_LOCAL: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_CONSTANT: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_REGREL32: + Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type + break; + case SymbolKind::S_CALLSITEINFO: + Refs.push_back({TiRefKind::TypeRef, 8, 1}); // Call signature + break; + case SymbolKind::S_CALLERS: + case SymbolKind::S_CALLEES: + // The record is a count followed by an array of type indices. + Count = *reinterpret_cast(Content.data()); + Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees + break; + case SymbolKind::S_INLINESITE: + Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee + break; + case SymbolKind::S_HEAPALLOCSITE: + // FIXME: It's not clear if this is a type or item reference. + Refs.push_back({TiRefKind::IndexRef, 8, 1}); // signature + break; + + // Defranges don't have types, just registers and code offsets. + case SymbolKind::S_DEFRANGE_REGISTER: + case SymbolKind::S_DEFRANGE_REGISTER_REL: + case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL: + case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE: + case SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER: + case SymbolKind::S_DEFRANGE_SUBFIELD: + break; + + // No type refernces. + case SymbolKind::S_LABEL32: + case SymbolKind::S_OBJNAME: + case SymbolKind::S_COMPILE: + case SymbolKind::S_COMPILE2: + case SymbolKind::S_COMPILE3: + case SymbolKind::S_ENVBLOCK: + case SymbolKind::S_BLOCK32: + case SymbolKind::S_FRAMEPROC: + break; + // Scope ending symbols. + case SymbolKind::S_END: + case SymbolKind::S_INLINESITE_END: + case SymbolKind::S_PROC_ID_END: + break; + default: + return false; // Unknown symbol. + } + return true; +} + +void llvm::codeview::discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Refs) { + ::discoverTypeIndices(Type.content(), Type.kind(), Refs); +} + +void llvm::codeview::discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Indices) { + + Indices.clear(); + + SmallVector Refs; + discoverTypeIndices(Type, Refs); + if (Refs.empty()) + return; + + BinaryStreamReader Reader(Type.content(), support::little); + for (const auto &Ref : Refs) { + Reader.setOffset(Ref.Offset); + FixedStreamArray Run; + cantFail(Reader.readArray(Run, Ref.Count)); + Indices.append(Run.begin(), Run.end()); + } +} + +void llvm::codeview::discoverTypeIndices(ArrayRef RecordData, + SmallVectorImpl &Refs) { + const RecordPrefix *P = + reinterpret_cast(RecordData.data()); + TypeLeafKind K = static_cast(uint16_t(P->RecordKind)); + ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs); +} + +bool llvm::codeview::discoverTypeIndices(const CVSymbol &Sym, + SmallVectorImpl &Refs) { + SymbolKind K = Sym.kind(); + return ::discoverTypeIndices(Sym.content(), K, Refs); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeName.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeName.cpp new file mode 100644 index 0000000000000..2eb8b81862f3c --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeName.cpp @@ -0,0 +1,243 @@ +//===- TypeName.cpp ------------------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeName.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +class TypeNameComputer : public TypeVisitorCallbacks { + /// The type collection. Used to calculate names of nested types. + TypeCollection &Types; + TypeIndex CurrentTypeIndex = TypeIndex::None(); + + /// Name of the current type. Only valid before visitTypeEnd. + SmallString<256> Name; + +public: + explicit TypeNameComputer(TypeCollection &Types) : Types(Types) {} + + StringRef name() const { return Name; } + + /// Paired begin/end actions for all types. Receives all record data, + /// including the fixed-length record prefix. + Error visitTypeBegin(CVType &Record) override; + Error visitTypeBegin(CVType &Record, TypeIndex Index) override; + Error visitTypeEnd(CVType &Record) override; + +#define TYPE_RECORD(EnumName, EnumVal, Name) \ + Error visitKnownRecord(CVType &CVR, Name##Record &Record) override; +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#define MEMBER_RECORD(EnumName, EnumVal, Name) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +}; +} // namespace + +Error TypeNameComputer::visitTypeBegin(CVType &Record) { + llvm_unreachable("Must call visitTypeBegin with a TypeIndex!"); + return Error::success(); +} + +Error TypeNameComputer::visitTypeBegin(CVType &Record, TypeIndex Index) { + // Reset Name to the empty string. If the visitor sets it, we know it. + Name = ""; + CurrentTypeIndex = Index; + return Error::success(); +} + +Error TypeNameComputer::visitTypeEnd(CVType &CVR) { return Error::success(); } + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + FieldListRecord &FieldList) { + Name = ""; + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVRecord &CVR, + StringIdRecord &String) { + Name = String.getString(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { + auto Indices = Args.getIndices(); + uint32_t Size = Indices.size(); + Name = "("; + for (uint32_t I = 0; I < Size; ++I) { + assert(Indices[I] < CurrentTypeIndex); + + Name.append(Types.getTypeName(Indices[I])); + if (I + 1 != Size) + Name.append(", "); + } + Name.push_back(')'); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + StringListRecord &Strings) { + auto Indices = Strings.getIndices(); + uint32_t Size = Indices.size(); + Name = "\""; + for (uint32_t I = 0; I < Size; ++I) { + Name.append(Types.getTypeName(Indices[I])); + if (I + 1 != Size) + Name.append("\" \""); + } + Name.push_back('\"'); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ClassRecord &Class) { + Name = Class.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, UnionRecord &Union) { + Name = Union.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, EnumRecord &Enum) { + Name = Enum.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArrayRecord &AT) { + Name = AT.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) { + Name = VFT.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, MemberFuncIdRecord &Id) { + Name = Id.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ProcedureRecord &Proc) { + StringRef Ret = Types.getTypeName(Proc.getReturnType()); + StringRef Params = Types.getTypeName(Proc.getArgumentList()); + Name = formatv("{0} {1}", Ret, Params).sstr<256>(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + MemberFunctionRecord &MF) { + StringRef Ret = Types.getTypeName(MF.getReturnType()); + StringRef Class = Types.getTypeName(MF.getClassType()); + StringRef Params = Types.getTypeName(MF.getArgumentList()); + Name = formatv("{0} {1}::{2}", Ret, Class, Params).sstr<256>(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { + Name = Func.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) { + Name = TS.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { + + if (Ptr.isPointerToMember()) { + const MemberPointerInfo &MI = Ptr.getMemberInfo(); + + StringRef Pointee = Types.getTypeName(Ptr.getReferentType()); + StringRef Class = Types.getTypeName(MI.getContainingType()); + Name = formatv("{0} {1}::*", Pointee, Class); + } else { + if (Ptr.isConst()) + Name.append("const "); + if (Ptr.isVolatile()) + Name.append("volatile "); + if (Ptr.isUnaligned()) + Name.append("__unaligned "); + + Name.append(Types.getTypeName(Ptr.getReferentType())); + + if (Ptr.getMode() == PointerMode::LValueReference) + Name.append("&"); + else if (Ptr.getMode() == PointerMode::RValueReference) + Name.append("&&"); + else if (Ptr.getMode() == PointerMode::Pointer) + Name.append("*"); + } + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { + uint16_t Mods = static_cast(Mod.getModifiers()); + + SmallString<256> TypeName; + if (Mods & uint16_t(ModifierOptions::Const)) + Name.append("const "); + if (Mods & uint16_t(ModifierOptions::Volatile)) + Name.append("volatile "); + if (Mods & uint16_t(ModifierOptions::Unaligned)) + Name.append("__unaligned "); + Name.append(Types.getTypeName(Mod.getModifiedType())); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + VFTableShapeRecord &Shape) { + Name = formatv("", Shape.getEntryCount()); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord( + CVType &CVR, UdtModSourceLineRecord &ModSourceLine) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + UdtSourceLineRecord &SourceLine) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + MethodOverloadListRecord &Overloads) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, LabelRecord &R) { + return Error::success(); +} + +std::string llvm::codeview::computeTypeName(TypeCollection &Types, + TypeIndex Index) { + TypeNameComputer Computer(Types); + CVType Record = Types.getType(Index); + if (auto EC = visitTypeRecord(Record, Index, Computer)) { + consumeError(std::move(EC)); + return ""; + } + return Computer.name(); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeSerializer.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeSerializer.cpp index fd4d1853fa544..003c13b4a20d0 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeSerializer.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeSerializer.cpp @@ -1,4 +1,4 @@ -//===- TypeSerialzier.cpp ---------------------------------------*- C++ -*-===// +//===- TypeSerialzier.cpp -------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,29 +8,136 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/TypeSerializer.h" - +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamWriter.h" - -#include +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include using namespace llvm; using namespace llvm::codeview; -bool TypeSerializer::isInFieldList() const { - return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; +namespace { + +struct HashedType { + uint64_t Hash; + const uint8_t *Data; + unsigned Size; // FIXME: Go to uint16_t? + TypeIndex Index; +}; + +/// Wrapper around a poitner to a HashedType. Hash and equality operations are +/// based on data in the pointee. +struct HashedTypePtr { + HashedTypePtr() = default; + HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {} + + HashedType *Ptr = nullptr; +}; + +} // end anonymous namespace + +namespace llvm { + +template <> struct DenseMapInfo { + static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); } + + static inline HashedTypePtr getTombstoneKey() { + return HashedTypePtr(reinterpret_cast(1)); + } + + static unsigned getHashValue(HashedTypePtr Val) { + assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr); + return Val.Ptr->Hash; + } + + static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) { + HashedType *LHS = LHSP.Ptr; + HashedType *RHS = RHSP.Ptr; + if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr) + return LHS == RHS; + if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size) + return false; + return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0; + } +}; + +} // end namespace llvm + +/// Private implementation so that we don't leak our DenseMap instantiations to +/// users. +class llvm::codeview::TypeHasher { +private: + /// Storage for type record provided by the caller. Records will outlive the + /// hasher object, so they should be allocated here. + BumpPtrAllocator &RecordStorage; + + /// Storage for hash keys. These only need to live as long as the hashing + /// operation. + BumpPtrAllocator KeyStorage; + + /// Hash table. We really want a DenseMap, TypeIndex> here, + /// but DenseMap is inefficient when the keys are long (like type records) + /// because it recomputes the hash value of every key when it grows. This + /// value type stores the hash out of line in KeyStorage, so that table + /// entries are small and easy to rehash. + DenseSet HashedRecords; + +public: + TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {} + + void reset() { HashedRecords.clear(); } + + /// Takes the bytes of type record, inserts them into the hash table, saves + /// them, and returns a pointer to an identical stable type record along with + /// its type index in the destination stream. + TypeIndex getOrCreateRecord(ArrayRef &Record, TypeIndex TI); +}; + +TypeIndex TypeHasher::getOrCreateRecord(ArrayRef &Record, + TypeIndex TI) { + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + + // Compute the hash up front so we can store it in the key. + HashedType TempHashedType = {hash_value(Record), Record.data(), + unsigned(Record.size()), TI}; + auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType)); + HashedType *&Hashed = Result.first->Ptr; + + if (Result.second) { + // This was a new type record. We need stable storage for both the key and + // the record. The record should outlive the hashing operation. + Hashed = KeyStorage.Allocate(); + *Hashed = TempHashedType; + + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Hashed->Data = Stable; + assert(Hashed->Size == Record.size()); + } + + // Update the caller's copy of Record to point a stable copy. + Record = ArrayRef(Hashed->Data, Hashed->Size); + return Hashed->Index; } -TypeIndex TypeSerializer::calcNextTypeIndex() const { - if (LastTypeIndex.isNoneType()) - return TypeIndex(TypeIndex::FirstNonSimpleIndex); - else - return TypeIndex(LastTypeIndex.getIndex() + 1); +TypeIndex TypeSerializer::nextTypeIndex() const { + return TypeIndex::fromArrayIndex(SeenRecords.size()); } -TypeIndex TypeSerializer::incrementTypeIndex() { - TypeIndex Previous = LastTypeIndex; - LastTypeIndex = calcNextTypeIndex(); - return Previous; +bool TypeSerializer::isInFieldList() const { + return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; } MutableArrayRef TypeSerializer::getCurrentSubRecordData() { @@ -51,21 +158,6 @@ Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) { return Error::success(); } -TypeIndex -TypeSerializer::insertRecordBytesPrivate(MutableArrayRef Record) { - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast(Record.data()), Record.size()); - - TypeIndex NextTypeIndex = calcNextTypeIndex(); - auto Result = HashedRecords.try_emplace(S, NextTypeIndex); - if (Result.second) { - LastTypeIndex = NextTypeIndex; - SeenRecords.push_back(Record); - } - return Result.first->getValue(); -} - Expected> TypeSerializer::addPadding(MutableArrayRef Record) { uint32_t Align = Record.size() % 4; @@ -83,27 +175,79 @@ TypeSerializer::addPadding(MutableArrayRef Record) { return MutableArrayRef(Record.data(), Record.size() + N); } -TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage) - : RecordStorage(Storage), LastTypeIndex(), - RecordBuffer(MaxRecordLength * 2), - Stream(RecordBuffer, llvm::support::little), Writer(Stream), +TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash) + : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2), + Stream(RecordBuffer, support::little), Writer(Stream), Mapping(Writer) { // RecordBuffer needs to be able to hold enough data so that if we are 1 // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes, // we won't overflow. + if (Hash) + Hasher = llvm::make_unique(Storage); } -ArrayRef> TypeSerializer::records() const { +TypeSerializer::~TypeSerializer() = default; + +ArrayRef> TypeSerializer::records() const { return SeenRecords; } -TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; } +void TypeSerializer::reset() { + if (Hasher) + Hasher->reset(); + Writer.setOffset(0); + CurrentSegment = RecordSegment(); + FieldListSegments.clear(); + TypeKind.reset(); + MemberKind.reset(); + SeenRecords.clear(); +} + +TypeIndex TypeSerializer::insertRecordBytes(ArrayRef &Record) { + assert(!TypeKind.hasValue() && "Already in a type mapping!"); + assert(Writer.getOffset() == 0 && "Stream has data already!"); + + if (Hasher) { + TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex()); + if (nextTypeIndex() == ActualTI) + SeenRecords.push_back(Record); + return ActualTI; + } + + TypeIndex NewTI = nextTypeIndex(); + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Record = ArrayRef(Stable, Record.size()); + SeenRecords.push_back(Record); + return NewTI; +} -TypeIndex TypeSerializer::insertRecordBytes(MutableArrayRef Record) { +TypeIndex TypeSerializer::insertRecord(const RemappedType &Record) { assert(!TypeKind.hasValue() && "Already in a type mapping!"); assert(Writer.getOffset() == 0 && "Stream has data already!"); - return insertRecordBytesPrivate(Record); + TypeIndex TI; + ArrayRef OriginalData = Record.OriginalRecord.RecordData; + if (Record.Mappings.empty()) { + // This record did not remap any type indices. Just write it. + return insertRecordBytes(OriginalData); + } + + // At least one type index was remapped. Before we can hash it we have to + // copy the full record bytes, re-write each type index, then hash the copy. + // We do this in temporary storage since only the DenseMap can decide whether + // this record already exists, and if it does we don't want the memory to + // stick around. + RemapStorage.resize(OriginalData.size()); + ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size()); + uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix); + for (const auto &M : Record.Mappings) { + // First 4 bytes of every record are the record prefix, but the mapping + // offset is relative to the content which starts after. + *(TypeIndex *)(ContentBegin + M.first) = M.second; + } + auto RemapRef = makeArrayRef(RemapStorage); + return insertRecordBytes(RemapRef); } Error TypeSerializer::visitTypeBegin(CVType &Record) { @@ -137,11 +281,14 @@ Expected TypeSerializer::visitTypeEndGetIndex(CVType &Record) { reinterpret_cast(ThisRecordData.data()); Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t); - uint8_t *Copy = RecordStorage.Allocate(ThisRecordData.size()); - ::memcpy(Copy, ThisRecordData.data(), ThisRecordData.size()); - ThisRecordData = MutableArrayRef(Copy, ThisRecordData.size()); - Record = CVType(*TypeKind, ThisRecordData); - TypeIndex InsertedTypeIndex = insertRecordBytesPrivate(ThisRecordData); + Record.Type = *TypeKind; + Record.RecordData = ThisRecordData; + + // insertRecordBytes assumes we're not in a mapping, so do this first. + TypeKind.reset(); + Writer.setOffset(0); + + TypeIndex InsertedTypeIndex = insertRecordBytes(Record.RecordData); // Write out each additional segment in reverse order, and update each // record's continuation index to point to the previous one. @@ -151,11 +298,9 @@ Expected TypeSerializer::visitTypeEndGetIndex(CVType &Record) { reinterpret_cast(CIBytes.data()); assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder"); *CI = InsertedTypeIndex.getIndex(); - InsertedTypeIndex = insertRecordBytesPrivate(X); + InsertedTypeIndex = insertRecordBytes(X); } - TypeKind.reset(); - Writer.setOffset(0); FieldListSegments.clear(); CurrentSegment.SubRecords.clear(); @@ -204,7 +349,7 @@ Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) { uint8_t *SegmentBytes = RecordStorage.Allocate(LengthWithSize); auto SavedSegment = MutableArrayRef(SegmentBytes, LengthWithSize); - MutableBinaryByteStream CS(SavedSegment, llvm::support::little); + MutableBinaryByteStream CS(SavedSegment, support::little); BinaryStreamWriter CW(CS); if (auto EC = CW.writeBytes(CopyData)) return EC; diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index aad20ae6dda16..bff3516203a08 100644 --- a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -10,13 +10,11 @@ #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" #include "llvm/Support/ScopedPrinter.h" @@ -57,36 +55,37 @@ namespace { /// streams: an item (or IPI) stream and a type stream, as this is what is /// actually stored in the final PDB. We choose which records go where by /// looking at the record kind. -class TypeStreamMerger : public TypeVisitorCallbacks { +class TypeStreamMerger { public: - TypeStreamMerger(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, TypeServerHandler *Handler) - : DestIdStream(DestIdStream), DestTypeStream(DestTypeStream), - FieldListBuilder(DestTypeStream), Handler(Handler) {} + explicit TypeStreamMerger(SmallVectorImpl &SourceToDest) + : IndexMap(SourceToDest) { + SourceToDest.clear(); + } static const TypeIndex Untranslated; -/// TypeVisitorCallbacks overrides. -#define TYPE_RECORD(EnumName, EnumVal, Name) \ - Error visitKnownRecord(CVType &CVR, Name##Record &Record) override; -#define MEMBER_RECORD(EnumName, EnumVal, Name) \ - Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" + Error mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes); + Error mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids); + Error mergeTypeRecords(TypeTableBuilder &Dest, const CVTypeArray &Types); - Error visitUnknownType(CVType &Record) override; +private: + Error doit(const CVTypeArray &Types); - Error visitTypeBegin(CVType &Record) override; - Error visitTypeEnd(CVType &Record) override; - Error visitMemberEnd(CVMemberRecord &Record) override; + Error remapAllTypes(const CVTypeArray &Types); - Error mergeStream(const CVTypeArray &Types); + Error remapType(const CVType &Type); -private: void addMapping(TypeIndex Idx); - bool remapIndex(TypeIndex &Idx); + bool remapTypeIndex(TypeIndex &Idx); + bool remapItemIndex(TypeIndex &Idx); + + bool remapIndices(RemappedType &Record, ArrayRef Refs); + + bool remapIndex(TypeIndex &Idx, ArrayRef Map); size_t slotForIndex(TypeIndex Idx) const { assert(!Idx.isSimple() && "simple type indices have no slots"); @@ -97,73 +96,52 @@ class TypeStreamMerger : public TypeVisitorCallbacks { return llvm::make_error(cv_error_code::corrupt_record); } - template - Error writeRecord(RecordType &R, bool RemapSuccess) { + Error writeRecord(TypeTableBuilder &Dest, const RemappedType &Record, + bool RemapSuccess) { TypeIndex DestIdx = Untranslated; if (RemapSuccess) - DestIdx = DestTypeStream.writeKnownType(R); + DestIdx = Dest.writeSerializedRecord(Record); addMapping(DestIdx); return Error::success(); } - template - Error writeIdRecord(RecordType &R, bool RemapSuccess) { - TypeIndex DestIdx = Untranslated; - if (RemapSuccess) - DestIdx = DestIdStream.writeKnownType(R); - addMapping(DestIdx); - return Error::success(); - } - - template - Error writeMember(RecordType &R, bool RemapSuccess) { - if (RemapSuccess) - FieldListBuilder.writeMemberType(R); - else - HadUntranslatedMember = true; - return Error::success(); - } - Optional LastError; bool IsSecondPass = false; - bool HadUntranslatedMember = false; - unsigned NumBadIndices = 0; - BumpPtrAllocator Allocator; + TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; - TypeTableBuilder &DestIdStream; - TypeTableBuilder &DestTypeStream; - FieldListRecordBuilder FieldListBuilder; - TypeServerHandler *Handler; + TypeTableBuilder *DestIdStream = nullptr; + TypeTableBuilder *DestTypeStream = nullptr; - TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; + // If we're only mapping id records, this array contains the mapping for + // type records. + ArrayRef TypeLookup; /// Map from source type index to destination type index. Indexed by source /// type index minus 0x1000. - SmallVector IndexMap; + SmallVectorImpl &IndexMap; }; } // end anonymous namespace const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); -Error TypeStreamMerger::visitTypeBegin(CVRecord &Rec) { - return Error::success(); -} - -Error TypeStreamMerger::visitTypeEnd(CVRecord &Rec) { - CurIndex = TypeIndex(CurIndex.getIndex() + 1); - if (!IsSecondPass) - assert(IndexMap.size() == slotForIndex(CurIndex) && - "visitKnownRecord should add one index map entry"); - return Error::success(); -} - -Error TypeStreamMerger::visitMemberEnd(CVMemberRecord &Rec) { - return Error::success(); +static bool isIdRecord(TypeLeafKind K) { + switch (K) { + case TypeLeafKind::LF_FUNC_ID: + case TypeLeafKind::LF_MFUNC_ID: + case TypeLeafKind::LF_STRING_ID: + case TypeLeafKind::LF_SUBSTR_LIST: + case TypeLeafKind::LF_BUILDINFO: + case TypeLeafKind::LF_UDT_SRC_LINE: + case TypeLeafKind::LF_UDT_MOD_SRC_LINE: + return true; + default: + return false; + } } void TypeStreamMerger::addMapping(TypeIndex Idx) { @@ -177,7 +155,7 @@ void TypeStreamMerger::addMapping(TypeIndex Idx) { } } -bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { +bool TypeStreamMerger::remapIndex(TypeIndex &Idx, ArrayRef Map) { // Simple types are unchanged. if (Idx.isSimple()) return true; @@ -186,14 +164,14 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { // successfully. If it refers to a type later in the stream or a record we // had to defer, defer it until later pass. unsigned MapPos = slotForIndex(Idx); - if (MapPos < IndexMap.size() && IndexMap[MapPos] != Untranslated) { - Idx = IndexMap[MapPos]; + if (MapPos < Map.size() && Map[MapPos] != Untranslated) { + Idx = Map[MapPos]; return true; } // If this is the second pass and this index isn't in the map, then it points // outside the current type stream, and this is a corrupt record. - if (IsSecondPass && MapPos >= IndexMap.size()) { + if (IsSecondPass && MapPos >= Map.size()) { // FIXME: Print a more useful error. We can give the current record and the // index that we think its pointing to. LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); @@ -207,251 +185,50 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { return false; } -//----------------------------------------------------------------------------// -// Item records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownRecord(CVType &, FuncIdRecord &R) { - bool Success = true; - Success &= remapIndex(R.ParentScope); - Success &= remapIndex(R.FunctionType); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFuncIdRecord &R) { - bool Success = true; - Success &= remapIndex(R.ClassType); - Success &= remapIndex(R.FunctionType); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, StringIdRecord &R) { - return writeIdRecord(R, remapIndex(R.Id)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, StringListRecord &R) { - bool Success = true; - for (TypeIndex &Str : R.StringIndices) - Success &= remapIndex(Str); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, BuildInfoRecord &R) { - bool Success = true; - for (TypeIndex &Arg : R.ArgIndices) - Success &= remapIndex(Arg); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UdtSourceLineRecord &R) { - bool Success = true; - Success &= remapIndex(R.UDT); - Success &= remapIndex(R.SourceFile); - // FIXME: Translate UdtSourceLineRecord into UdtModSourceLineRecords in the - // IPI stream. - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UdtModSourceLineRecord &R) { - bool Success = true; - Success &= remapIndex(R.UDT); - Success &= remapIndex(R.SourceFile); - return writeIdRecord(R, Success); -} - -//----------------------------------------------------------------------------// -// Type records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownRecord(CVType &, ModifierRecord &R) { - return writeRecord(R, remapIndex(R.ModifiedType)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ProcedureRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReturnType); - Success &= remapIndex(R.ArgumentList); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFunctionRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReturnType); - Success &= remapIndex(R.ClassType); - Success &= remapIndex(R.ThisType); - Success &= remapIndex(R.ArgumentList); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &Type, ArgListRecord &R) { - bool Success = true; - for (TypeIndex &Arg : R.ArgIndices) - Success &= remapIndex(Arg); - if (auto EC = writeRecord(R, Success)) - return EC; - return Error::success(); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, PointerRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReferentType); - if (R.isPointerToMember()) - Success &= remapIndex(R.MemberInfo->ContainingType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ArrayRecord &R) { - bool Success = true; - Success &= remapIndex(R.ElementType); - Success &= remapIndex(R.IndexType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ClassRecord &R) { - bool Success = true; - Success &= remapIndex(R.FieldList); - Success &= remapIndex(R.DerivationList); - Success &= remapIndex(R.VTableShape); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UnionRecord &R) { - return writeRecord(R, remapIndex(R.FieldList)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, EnumRecord &R) { - bool Success = true; - Success &= remapIndex(R.FieldList); - Success &= remapIndex(R.UnderlyingType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, BitFieldRecord &R) { - return writeRecord(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableShapeRecord &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, TypeServer2Record &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, LabelRecord &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableRecord &R) { - bool Success = true; - Success &= remapIndex(R.CompleteClass); - Success &= remapIndex(R.OverriddenVFTable); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, - MethodOverloadListRecord &R) { - bool Success = true; - for (OneMethodRecord &Meth : R.Methods) - Success &= remapIndex(Meth.Type); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, FieldListRecord &R) { - // Visit the members inside the field list. - HadUntranslatedMember = false; - FieldListBuilder.begin(); - CVTypeVisitor Visitor(*this); - if (auto EC = Visitor.visitFieldListMemberStream(R.Data)) - return EC; - - // Write the record if we translated all field list members. - TypeIndex DestIdx = Untranslated; - if (!HadUntranslatedMember) - DestIdx = FieldListBuilder.end(); - else - FieldListBuilder.reset(); - addMapping(DestIdx); - - return Error::success(); -} - -//----------------------------------------------------------------------------// -// Member records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - NestedTypeRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, OneMethodRecord &R) { - bool Success = true; - Success &= remapIndex(R.Type); - return writeMember(R, Success); -} +bool TypeStreamMerger::remapTypeIndex(TypeIndex &Idx) { + // If we're mapping a pure index stream, then IndexMap only contains mappings + // from OldIdStream -> NewIdStream, in which case we will need to use the + // special mapping from OldTypeStream -> NewTypeStream which was computed + // externally. Regardless, we use this special map if and only if we are + // doing an id-only mapping. + if (DestTypeStream == nullptr) + return remapIndex(Idx, TypeLookup); -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - OverloadedMethodRecord &R) { - return writeMember(R, remapIndex(R.MethodList)); + assert(TypeLookup.empty()); + return remapIndex(Idx, IndexMap); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - DataMemberRecord &R) { - return writeMember(R, remapIndex(R.Type)); +bool TypeStreamMerger::remapItemIndex(TypeIndex &Idx) { + assert(DestIdStream); + return remapIndex(Idx, IndexMap); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - StaticDataMemberRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} +Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest, + const CVTypeArray &Types) { + DestTypeStream = &Dest; -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - EnumeratorRecord &R) { - return writeMember(R, true); + return doit(Types); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, VFPtrRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, BaseClassRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} +Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids) { + DestIdStream = &Dest; + TypeLookup = TypeSourceToDest; -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - VirtualBaseClassRecord &R) { - bool Success = true; - Success &= remapIndex(R.BaseType); - Success &= remapIndex(R.VBPtrType); - return writeMember(R, Success); + return doit(Ids); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - ListContinuationRecord &R) { - return writeMember(R, remapIndex(R.ContinuationIndex)); +Error TypeStreamMerger::mergeTypesAndIds(TypeTableBuilder &DestIds, + TypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes) { + DestIdStream = &DestIds; + DestTypeStream = &DestTypes; + return doit(IdsAndTypes); } -Error TypeStreamMerger::visitUnknownType(CVType &Rec) { - // We failed to translate a type. Translate this index as "not translated". - addMapping(TypeIndex(SimpleTypeKind::NotTranslated)); - return errorCorruptRecord(); -} - -Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { - assert(IndexMap.empty()); - TypeVisitorCallbackPipeline Pipeline; - LastError = Error::success(); - - TypeDeserializer Deserializer; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(*this); - - CVTypeVisitor Visitor(Pipeline); - if (Handler) - Visitor.addTypeServerHandler(*Handler); - - if (auto EC = Visitor.visitTypeStream(Types)) +Error TypeStreamMerger::doit(const CVTypeArray &Types) { + if (auto EC = remapAllTypes(Types)) return EC; // If we found bad indices but no other errors, try doing another pass and see @@ -461,33 +238,92 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { // topologically sorted. The standard library contains MASM-produced objects, // so this is important to handle correctly, but we don't have to be too // efficient. MASM type streams are usually very small. - while (!*LastError && NumBadIndices > 0) { + while (!LastError && NumBadIndices > 0) { unsigned BadIndicesRemaining = NumBadIndices; IsSecondPass = true; NumBadIndices = 0; CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); - if (auto EC = Visitor.visitTypeStream(Types)) + + if (auto EC = remapAllTypes(Types)) return EC; assert(NumBadIndices <= BadIndicesRemaining && "second pass found more bad indices"); - if (!*LastError && NumBadIndices == BadIndicesRemaining) { + if (!LastError && NumBadIndices == BadIndicesRemaining) { return llvm::make_error( cv_error_code::corrupt_record, "input type graph contains cycles"); } } - IndexMap.clear(); + if (LastError) + return std::move(*LastError); + return Error::success(); +} - Error Ret = std::move(*LastError); - LastError.reset(); - return Ret; +Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { + for (const CVType &Type : Types) + if (auto EC = remapType(Type)) + return EC; + return Error::success(); +} + +Error TypeStreamMerger::remapType(const CVType &Type) { + RemappedType R(Type); + SmallVector Refs; + discoverTypeIndices(Type.RecordData, Refs); + bool MappedAllIndices = remapIndices(R, Refs); + TypeTableBuilder &Dest = + isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; + if (auto EC = writeRecord(Dest, R, MappedAllIndices)) + return EC; + + ++CurIndex; + assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && + "visitKnownRecord should add one index map entry"); + return Error::success(); +} + +bool TypeStreamMerger::remapIndices(RemappedType &Record, + ArrayRef Refs) { + ArrayRef OriginalData = Record.OriginalRecord.content(); + bool Success = true; + for (auto &Ref : Refs) { + uint32_t Offset = Ref.Offset; + ArrayRef Bytes = OriginalData.slice(Ref.Offset, sizeof(TypeIndex)); + ArrayRef TIs(reinterpret_cast(Bytes.data()), + Ref.Count); + for (auto TI : TIs) { + TypeIndex NewTI = TI; + bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef) + ? remapItemIndex(NewTI) + : remapTypeIndex(NewTI); + if (ThisSuccess && NewTI != TI) + Record.Mappings.emplace_back(Offset, NewTI); + Offset += sizeof(TypeIndex); + Success &= ThisSuccess; + } + } + return Success; } -Error llvm::codeview::mergeTypeStreams(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, - TypeServerHandler *Handler, +Error llvm::codeview::mergeTypeRecords(TypeTableBuilder &Dest, + SmallVectorImpl &SourceToDest, const CVTypeArray &Types) { - return TypeStreamMerger(DestIdStream, DestTypeStream, Handler) - .mergeStream(Types); + TypeStreamMerger M(SourceToDest); + return M.mergeTypeRecords(Dest, Types); +} + +Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids) { + TypeStreamMerger M(SourceToDest); + return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); +} + +Error llvm::codeview::mergeTypeAndIdRecords( + TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes) { + TypeStreamMerger M(SourceToDest); + return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes); } diff --git a/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeTableCollection.cpp new file mode 100644 index 0000000000000..4eca5aeaa0ae3 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -0,0 +1,67 @@ +//===- TypeTableCollection.cpp -------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeTableCollection.h" + +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeName.h" +#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamReader.h" + +using namespace llvm; +using namespace llvm::codeview; + +TypeTableCollection::TypeTableCollection(ArrayRef> Records) + : NameStorage(Allocator), Records(Records) { + Names.resize(Records.size()); +} + +Optional TypeTableCollection::getFirst() { + if (empty()) + return None; + return TypeIndex::fromArrayIndex(0); +} + +Optional TypeTableCollection::getNext(TypeIndex Prev) { + assert(contains(Prev)); + ++Prev; + if (Prev.toArrayIndex() == size()) + return None; + return Prev; +} + +CVType TypeTableCollection::getType(TypeIndex Index) { + assert(Index.toArrayIndex() < Records.size()); + ArrayRef Bytes = Records[Index.toArrayIndex()]; + const RecordPrefix *Prefix = + reinterpret_cast(Bytes.data()); + TypeLeafKind Kind = static_cast(uint16_t(Prefix->RecordKind)); + return CVType(Kind, Bytes); +} + +StringRef TypeTableCollection::getTypeName(TypeIndex Index) { + if (Index.isNoneType() || Index.isSimple()) + return TypeIndex::simpleTypeName(Index); + + uint32_t I = Index.toArrayIndex(); + if (Names[I].data() == nullptr) { + StringRef Result = NameStorage.save(computeTypeName(*this, Index)); + Names[I] = Result; + } + return Names[I]; +} + +bool TypeTableCollection::contains(TypeIndex Index) { + return Index.toArrayIndex() <= size(); +} + +uint32_t TypeTableCollection::size() { return Records.size(); } + +uint32_t TypeTableCollection::capacity() { return Records.size(); } diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/CMakeLists.txt b/interpreter/llvm/src/lib/DebugInfo/DWARF/CMakeLists.txt index 6ca6e64bd8e6f..11f94509e8fae 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/CMakeLists.txt +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMDebugInfoDWARF DWARFAcceleratorTable.cpp DWARFCompileUnit.cpp DWARFContext.cpp + DWARFDataExtractor.cpp DWARFDebugAbbrev.cpp DWARFDebugArangeSet.cpp DWARFDebugAranges.cpp diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index e7b4b777b43fa..bb475a669efb2 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" + #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -64,46 +65,52 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, if (A && F) { Optional V; bool IsImplicitConst = (F == DW_FORM_implicit_const); - if (IsImplicitConst) + if (IsImplicitConst) { V = Data.getSLEB128(OffsetPtr); - else if (auto Size = DWARFFormValue::getFixedByteSize(F)) - V = *Size; - AttributeSpecs.push_back(AttributeSpec(A, F, V)); - if (IsImplicitConst) + AttributeSpecs.push_back(AttributeSpec(A, F, V)); continue; + } // If this abbrevation still has a fixed byte size, then update the // FixedAttributeSize as needed. - if (FixedAttributeSize) { - if (V) - FixedAttributeSize->NumBytes += *V; - else { - switch (F) { - case DW_FORM_addr: - ++FixedAttributeSize->NumAddrs; - break; - - case DW_FORM_ref_addr: - ++FixedAttributeSize->NumRefAddrs; - break; - - case DW_FORM_strp: - case DW_FORM_GNU_ref_alt: - case DW_FORM_GNU_strp_alt: - case DW_FORM_line_strp: - case DW_FORM_sec_offset: - case DW_FORM_strp_sup: - ++FixedAttributeSize->NumDwarfOffsets; - break; - - default: - // Indicate we no longer have a fixed byte size for this - // abbreviation by clearing the FixedAttributeSize optional value - // so it doesn't have a value. - FixedAttributeSize.reset(); - break; - } + switch (F) { + case DW_FORM_addr: + if (FixedAttributeSize) + ++FixedAttributeSize->NumAddrs; + break; + + case DW_FORM_ref_addr: + if (FixedAttributeSize) + ++FixedAttributeSize->NumRefAddrs; + break; + + case DW_FORM_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + if (FixedAttributeSize) + ++FixedAttributeSize->NumDwarfOffsets; + break; + + default: + // The form has a byte size that doesn't depend on Params. + // If it's a fixed size, keep track of it. + if (auto Size = + DWARFFormValue::getFixedByteSize(F, DWARFFormParams())) { + V = *Size; + if (FixedAttributeSize) + FixedAttributeSize->NumBytes += *V; + break; } + // Indicate we no longer have a fixed byte size for this + // abbreviation by clearing the FixedAttributeSize optional value + // so it doesn't have a value. + FixedAttributeSize.reset(); + break; } + // Record this attribute and its fixed size if it has one. + AttributeSpecs.push_back(AttributeSpec(A, F, V)); } else if (A == 0 && F == 0) { // We successfully reached the end of this abbreviation declaration // since both attribute and form are zero. @@ -185,7 +192,8 @@ Optional DWARFAbbreviationDeclaration::getAttributeValue( if (auto FixedSize = Spec.getByteSize(U)) Offset += *FixedSize; else - DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, &U); + DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, + U.getFormParams()); ++AttrIndex; } return None; @@ -210,7 +218,8 @@ Optional DWARFAbbreviationDeclaration::AttributeSpec::getByteSize( if (ByteSizeOrValue) return ByteSizeOrValue; Optional S; - auto FixedByteSize = DWARFFormValue::getFixedByteSize(Form, &U); + auto FixedByteSize = + DWARFFormValue::getFixedByteSize(Form, U.getFormParams()); if (FixedByteSize) S = *FixedByteSize; return S; diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index a12f8adfafe51..9ae7c9a07f76d 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -54,6 +55,52 @@ bool DWARFAcceleratorTable::extract() { return true; } +uint32_t DWARFAcceleratorTable::getNumBuckets() { return Hdr.NumBuckets; } +uint32_t DWARFAcceleratorTable::getNumHashes() { return Hdr.NumHashes; } +uint32_t DWARFAcceleratorTable::getSizeHdr() { return sizeof(Hdr); } +uint32_t DWARFAcceleratorTable::getHeaderDataLength() { + return Hdr.HeaderDataLength; +} + +ArrayRef> +DWARFAcceleratorTable::getAtomsDesc() { + return HdrData.Atoms; +} + +bool DWARFAcceleratorTable::validateForms() { + for (auto Atom : getAtomsDesc()) { + DWARFFormValue FormValue(Atom.second); + switch (Atom.first) { + case dwarf::DW_ATOM_die_offset: + if ((!FormValue.isFormClass(DWARFFormValue::FC_Constant) && + !FormValue.isFormClass(DWARFFormValue::FC_Flag)) || + FormValue.getForm() == dwarf::DW_FORM_sdata) + return false; + default: + break; + } + } + return true; +} + +uint32_t DWARFAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { + uint32_t DieOffset = dwarf::DW_INVALID_OFFSET; + + for (auto Atom : getAtomsDesc()) { + DWARFFormValue FormValue(Atom.second); + FormValue.extractValue(AccelSection, &HashDataOffset, NULL); + switch (Atom.first) { + case dwarf::DW_ATOM_die_offset: + DieOffset = *FormValue.getAsUnsignedConstant(); + break; + default: + break; + } + } + return DieOffset; +} + LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { // Dump the header. OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n' @@ -113,8 +160,7 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { continue; } while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { - unsigned StringOffset = - getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs); + unsigned StringOffset = AccelSection.getRelocatedValue(4, &DataOffset); if (!StringOffset) break; OS << format(" Name: %08x \"%s\"\n", StringOffset, diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index 6e550f2e9ec95..358e9bf43d003 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -15,7 +15,7 @@ using namespace llvm; -void DWARFCompileUnit::dump(raw_ostream &OS) { +void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { OS << format("0x%08x", getOffset()) << ": Compile Unit:" << " length = " << format("0x%08x", getLength()) << " version = " << format("0x%04x", getVersion()); @@ -27,7 +27,7 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { << ")\n"; if (DWARFDie CUDie = getUnitDIE(false)) - CUDie.dump(OS, -1U); + CUDie.dump(OS, -1U, 0, DumpOpts); else OS << "\n\n"; } diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFContext.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFContext.cpp index 246899ac12b97..dd3235244e243 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" @@ -36,7 +37,6 @@ #include "llvm/Object/RelocVisitor.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" @@ -44,8 +44,8 @@ #include #include #include -#include #include +#include #include #include @@ -55,34 +55,144 @@ using namespace object; #define DEBUG_TYPE "dwarf" -typedef DWARFDebugLine::LineTable DWARFLineTable; -typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; -typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; - -uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size, - uint32_t *Off, const RelocAddrMap *Relocs) { - if (!Relocs) - return Data.getUnsigned(Off, Size); - RelocAddrMap::const_iterator AI = Relocs->find(*Off); - if (AI == Relocs->end()) - return Data.getUnsigned(Off, Size); - return Data.getUnsigned(Off, Size) + AI->second.second; -} +using DWARFLineTable = DWARFDebugLine::LineTable; +using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; +using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; static void dumpAccelSection(raw_ostream &OS, StringRef Name, const DWARFSection& Section, StringRef StringSection, bool LittleEndian) { - DataExtractor AccelSection(Section.Data, LittleEndian, 0); + DWARFDataExtractor AccelSection(Section, LittleEndian, 0); DataExtractor StrData(StringSection, LittleEndian, 0); OS << "\n." << Name << " contents:\n"; - DWARFAcceleratorTable Accel(AccelSection, StrData, Section.Relocs); + DWARFAcceleratorTable Accel(AccelSection, StrData); if (!Accel.extract()) return; Accel.dump(OS); } -void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, - bool SummarizeTypes) { +static void +dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, + const DWARFSection &StringOffsetsSection, + StringRef StringSection, bool LittleEndian) { + DWARFDataExtractor StrOffsetExt(StringOffsetsSection, LittleEndian, 0); + uint32_t Offset = 0; + uint64_t SectionSize = StringOffsetsSection.Data.size(); + + while (Offset < SectionSize) { + unsigned Version = 0; + DwarfFormat Format = DWARF32; + unsigned EntrySize = 4; + // Perform validation and extract the segment size from the header. + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + uint32_t ContributionStart = Offset; + uint64_t ContributionSize = StrOffsetExt.getU32(&Offset); + // A contribution size of 0xffffffff indicates DWARF64, with the actual size + // in the following 8 bytes. Otherwise, the DWARF standard mandates that + // the contribution size must be at most 0xfffffff0. + if (ContributionSize == 0xffffffff) { + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + Format = DWARF64; + EntrySize = 8; + ContributionSize = StrOffsetExt.getU64(&Offset); + } else if (ContributionSize > 0xfffffff0) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + + // We must ensure that we don't read a partial record at the end, so we + // validate for a multiple of EntrySize. Also, we're expecting a version + // number and padding, which adds an additional 4 bytes. + uint64_t ValidationSize = + 4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize)); + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) { + OS << "error: contribution to string offsets table in section ." + << SectionName << " has invalid length.\n"; + return; + } + + Version = StrOffsetExt.getU16(&Offset); + Offset += 2; + OS << format("0x%8.8x: ", ContributionStart); + OS << "Contribution size = " << ContributionSize + << ", Version = " << Version << "\n"; + + uint32_t ContributionBase = Offset; + DataExtractor StrData(StringSection, LittleEndian, 0); + while (Offset - ContributionBase < ContributionSize) { + OS << format("0x%8.8x: ", Offset); + // FIXME: We can only extract strings in DWARF32 format at the moment. + uint64_t StringOffset = + StrOffsetExt.getRelocatedValue(EntrySize, &Offset); + if (Format == DWARF32) { + uint32_t StringOffset32 = (uint32_t)StringOffset; + OS << format("%8.8x ", StringOffset32); + const char *S = StrData.getCStr(&StringOffset32); + if (S) + OS << format("\"%s\"", S); + } else + OS << format("%16.16" PRIx64 " ", StringOffset); + OS << "\n"; + } + } +} + +// Dump a DWARF string offsets section. This may be a DWARF v5 formatted +// string offsets section, where each compile or type unit contributes a +// number of entries (string offsets), with each contribution preceded by +// a header containing size and version number. Alternatively, it may be a +// monolithic series of string offsets, as generated by the pre-DWARF v5 +// implementation of split DWARF. +static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, + const DWARFSection &StringOffsetsSection, + StringRef StringSection, bool LittleEndian, + unsigned MaxVersion) { + if (StringOffsetsSection.Data.empty()) + return; + OS << "\n." << SectionName << " contents:\n"; + // If we have at least one (compile or type) unit with DWARF v5 or greater, + // we assume that the section is formatted like a DWARF v5 string offsets + // section. + if (MaxVersion >= 5) + dumpDWARFv5StringOffsetsSection(OS, SectionName, StringOffsetsSection, + StringSection, LittleEndian); + else { + DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); + uint32_t offset = 0; + uint64_t size = StringOffsetsSection.Data.size(); + // Ensure that size is a multiple of the size of an entry. + if (size & ((uint64_t)(sizeof(uint32_t) - 1))) { + OS << "error: size of ." << SectionName << " is not a multiple of " + << sizeof(uint32_t) << ".\n"; + size &= -(uint64_t)sizeof(uint32_t); + } + DataExtractor StrData(StringSection, LittleEndian, 0); + while (offset < size) { + OS << format("0x%8.8x: ", offset); + uint32_t StringOffset = strOffsetExt.getU32(&offset); + OS << format("%8.8x ", StringOffset); + const char *S = StrData.getCStr(&StringOffset); + if (S) + OS << format("\"%s\"", S); + OS << "\n"; + } + } +} + +void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { + DIDumpType DumpType = DumpOpts.DumpType; + bool DumpEH = DumpOpts.DumpEH; + bool SummarizeTypes = DumpOpts.SummarizeTypes; + if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) { OS << ".debug_abbrev contents:\n"; getDebugAbbrev()->dump(OS); @@ -97,14 +207,14 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, if (DumpType == DIDT_All || DumpType == DIDT_Info) { OS << "\n.debug_info contents:\n"; for (const auto &CU : compile_units()) - CU->dump(OS); + CU->dump(OS, DumpOpts); } if ((DumpType == DIDT_All || DumpType == DIDT_InfoDwo) && getNumDWOCompileUnits()) { OS << "\n.debug_info.dwo contents:\n"; for (const auto &DWOCU : dwo_compile_units()) - DWOCU->dump(OS); + DWOCU->dump(OS, DumpOpts); } if ((DumpType == DIDT_All || DumpType == DIDT_Types) && getNumTypeUnits()) { @@ -164,11 +274,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, if (!CUDIE) continue; if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list))) { - DataExtractor lineData(getLineSection().Data, isLittleEndian(), - savedAddressByteSize); + DWARFDataExtractor lineData(getLineSection(), isLittleEndian(), + savedAddressByteSize); DWARFDebugLine::LineTable LineTable; uint32_t Offset = *StmtOffset; - LineTable.parse(lineData, &getLineSection().Relocs, &Offset); + LineTable.parse(lineData, &Offset); LineTable.dump(OS); } } @@ -187,8 +297,8 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, if (DumpType == DIDT_All || DumpType == DIDT_LineDwo) { OS << "\n.debug_line.dwo contents:\n"; unsigned stmtOffset = 0; - DataExtractor lineData(getLineDWOSection().Data, isLittleEndian(), - savedAddressByteSize); + DWARFDataExtractor lineData(getLineDWOSection(), isLittleEndian(), + savedAddressByteSize); DWARFDebugLine::LineTable LineTable; while (LineTable.Prologue.parse(lineData, &stmtOffset)) { LineTable.dump(OS); @@ -225,11 +335,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, // sizes, but for simplicity we just use the address byte size of the last // compile unit (there is no easy and fast way to associate address range // list and the compile unit it describes). - DataExtractor rangesData(getRangeSection().Data, isLittleEndian(), - savedAddressByteSize); + DWARFDataExtractor rangesData(getRangeSection(), isLittleEndian(), + savedAddressByteSize); offset = 0; DWARFDebugRangeList rangeList; - while (rangeList.extract(rangesData, &offset, getRangeSection().Relocs)) + while (rangeList.extract(rangesData, &offset)) rangeList.dump(OS); } @@ -251,17 +361,15 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, true /* GnuStyle */) .dump("debug_gnu_pubtypes", OS); - if ((DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) && - !getStringOffsetDWOSection().empty()) { - OS << "\n.debug_str_offsets.dwo contents:\n"; - DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), - 0); - offset = 0; - uint64_t size = getStringOffsetDWOSection().size(); - while (offset < size) { - OS << format("0x%8.8x: ", offset); - OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); - } + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsets) + dumpStringOffsetsSection(OS, "debug_str_offsets", getStringOffsetSection(), + getStringSection(), isLittleEndian(), + getMaxVersion()); + + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) { + dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", + getStringOffsetDWOSection(), getStringDWOSection(), + isLittleEndian(), getMaxVersion()); } if ((DumpType == DIDT_All || DumpType == DIDT_GdbIndex) && @@ -287,6 +395,15 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, getStringSection(), isLittleEndian()); } +DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) { + // FIXME: Improve this for the case where this DWO file is really a DWP file + // with an index - use the index for lookup instead of a linear search. + for (const auto &DWOCU : dwo_compile_units()) + if (DWOCU->getDWOId() == Hash) + return DWOCU.get(); + return nullptr; +} + DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { parseCompileUnits(); if (auto *CU = CUs.getUnitForOffset(Offset)) @@ -294,248 +411,6 @@ DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { return DWARFDie(); } -namespace { - -class Verifier { - raw_ostream &OS; - DWARFContext &DCtx; -public: - Verifier(raw_ostream &S, DWARFContext &D) : OS(S), DCtx(D) {} - - bool HandleDebugInfo() { - bool Success = true; - // A map that tracks all references (converted absolute references) so we - // can verify each reference points to a valid DIE and not an offset that - // lies between to valid DIEs. - std::map> ReferenceToDIEOffsets; - - OS << "Verifying .debug_info...\n"; - for (const auto &CU : DCtx.compile_units()) { - unsigned NumDies = CU->getNumDIEs(); - for (unsigned I = 0; I < NumDies; ++I) { - auto Die = CU->getDIEAtIndex(I); - const auto Tag = Die.getTag(); - if (Tag == DW_TAG_null) - continue; - for (auto AttrValue : Die.attributes()) { - const auto Attr = AttrValue.Attr; - const auto Form = AttrValue.Value.getForm(); - switch (Attr) { - case DW_AT_ranges: - // Make sure the offset in the DW_AT_ranges attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { - Success = false; - OS << "error: DW_AT_ranges offset is beyond .debug_ranges " - "bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - case DW_AT_stmt_list: - // Make sure the offset in the DW_AT_stmt_list attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DCtx.getLineSection().Data.size()) { - Success = false; - OS << "error: DW_AT_stmt_list offset is beyond .debug_line " - "bounds: " - << format("0x%08" PRIx32, *SectionOffset) << "\n"; - CU->getUnitDIE().dump(OS, 0); - OS << "\n"; - } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - - default: - break; - } - switch (Form) { - case DW_FORM_ref1: - case DW_FORM_ref2: - case DW_FORM_ref4: - case DW_FORM_ref8: - case DW_FORM_ref_udata: { - // Verify all CU relative references are valid CU offsets. - Optional RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal) { - auto DieCU = Die.getDwarfUnit(); - auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); - auto CUOffset = AttrValue.Value.getRawUValue(); - if (CUOffset >= CUSize) { - Success = false; - OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx32, CUOffset) - << " is invalid (must be less than CU size of " - << format("0x%08" PRIx32, CUSize) << "):\n"; - Die.dump(OS, 0); - OS << "\n"; - } else { - // Valid reference, but we will verify it points to an actual - // DIE later. - ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); - } - } - break; - } - case DW_FORM_ref_addr: { - // Verify all absolute DIE references have valid offsets in the - // .debug_info section. - Optional RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal) { - if(*RefVal >= DCtx.getInfoSection().Data.size()) { - Success = false; - OS << "error: DW_FORM_ref_addr offset beyond .debug_info " - "bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } else { - // Valid reference, but we will verify it points to an actual - // DIE later. - ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); - } - } - break; - } - case DW_FORM_strp: { - auto SecOffset = AttrValue.Value.getAsSectionOffset(); - assert(SecOffset); // DW_FORM_strp is a section offset. - if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { - Success = false; - OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - } - default: - break; - } - } - } - } - - // Take all references and make sure they point to an actual DIE by - // getting the DIE by offset and emitting an error - OS << "Verifying .debug_info references...\n"; - for (auto Pair: ReferenceToDIEOffsets) { - auto Die = DCtx.getDIEForOffset(Pair.first); - if (Die) - continue; - Success = false; - OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) - << ". Offset is in between DIEs:\n"; - for (auto Offset: Pair.second) { - auto ReferencingDie = DCtx.getDIEForOffset(Offset); - ReferencingDie.dump(OS, 0); - OS << "\n"; - } - OS << "\n"; - } - return Success; - } - - bool HandleDebugLine() { - std::map StmtListToDie; - bool Success = true; - OS << "Verifying .debug_line...\n"; - for (const auto &CU : DCtx.compile_units()) { - uint32_t LineTableOffset = 0; - auto CUDie = CU->getUnitDIE(); - auto StmtFormValue = CUDie.find(DW_AT_stmt_list); - if (!StmtFormValue) { - // No line table for this compile unit. - continue; - } - // Get the attribute value as a section offset. No need to produce an - // error here if the encoding isn't correct because we validate this in - // the .debug_info verifier. - if (auto StmtSectionOffset = toSectionOffset(StmtFormValue)) { - LineTableOffset = *StmtSectionOffset; - if (LineTableOffset >= DCtx.getLineSection().Data.size()) { - // Make sure we don't get a valid line table back if the offset - // is wrong. - assert(DCtx.getLineTableForUnit(CU.get()) == nullptr); - // Skip this line table as it isn't valid. No need to create an error - // here because we validate this in the .debug_info verifier. - continue; - } else { - auto Iter = StmtListToDie.find(LineTableOffset); - if (Iter != StmtListToDie.end()) { - Success = false; - OS << "error: two compile unit DIEs, " - << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " - << format("0x%08" PRIx32, CUDie.getOffset()) - << ", have the same DW_AT_stmt_list section offset:\n"; - Iter->second.dump(OS, 0); - CUDie.dump(OS, 0); - OS << '\n'; - // Already verified this line table before, no need to do it again. - continue; - } - StmtListToDie[LineTableOffset] = CUDie; - } - } - auto LineTable = DCtx.getLineTableForUnit(CU.get()); - if (!LineTable) { - Success = false; - OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) - << "] was not able to be parsed for CU:\n"; - CUDie.dump(OS, 0); - OS << '\n'; - continue; - } - uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); - uint64_t PrevAddress = 0; - uint32_t RowIndex = 0; - for (const auto &Row : LineTable->Rows) { - if (Row.Address < PrevAddress) { - Success = false; - OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) - << "] row[" << RowIndex - << "] decreases in address from previous row:\n"; - - DWARFDebugLine::Row::dumpTableHeader(OS); - if (RowIndex > 0) - LineTable->Rows[RowIndex - 1].dump(OS); - Row.dump(OS); - OS << '\n'; - } - - if (Row.File > MaxFileIndex) { - Success = false; - OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) - << "][" << RowIndex << "] has invalid file index " << Row.File - << " (valid values are [1," << MaxFileIndex << "]):\n"; - DWARFDebugLine::Row::dumpTableHeader(OS); - Row.dump(OS); - OS << '\n'; - } - if (Row.EndSequence) - PrevAddress = 0; - else - PrevAddress = Row.Address; - ++RowIndex; - } - } - return Success; - } -}; - -} // anonymous namespace - bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { bool Success = true; DWARFVerifier verifier(OS, *this); @@ -547,8 +422,13 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { if (!verifier.handleDebugLine()) Success = false; } + if (DumpType == DIDT_All || DumpType == DIDT_AppleNames) { + if (!verifier.handleAppleNames()) + Success = false; + } return Success; } + const DWARFUnitIndex &DWARFContext::getCUIndex() { if (CUIndex) return *CUIndex; @@ -606,11 +486,13 @@ const DWARFDebugLoc *DWARFContext::getDebugLoc() { if (Loc) return Loc.get(); - DataExtractor LocData(getLocSection().Data, isLittleEndian(), 0); - Loc.reset(new DWARFDebugLoc(getLocSection().Relocs)); + Loc.reset(new DWARFDebugLoc); // assume all compile units have the same address byte size - if (getNumCompileUnits()) - Loc->parse(LocData, getCompileUnitAtIndex(0)->getAddressByteSize()); + if (getNumCompileUnits()) { + DWARFDataExtractor LocData(getLocSection(), isLittleEndian(), + getCompileUnitAtIndex(0)->getAddressByteSize()); + Loc->parse(LocData); + } return Loc.get(); } @@ -677,7 +559,7 @@ const DWARFDebugMacro *DWARFContext::getDebugMacro() { const DWARFLineTable * DWARFContext::getLineTableForUnit(DWARFUnit *U) { if (!Line) - Line.reset(new DWARFDebugLine(&getLineSection().Relocs)); + Line.reset(new DWARFDebugLine); auto UnitDIE = U->getUnitDIE(); if (!UnitDIE) @@ -693,12 +575,12 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) { return lt; // Make sure the offset is good before we try to parse. - if (stmtOffset >= U->getLineSection().size()) + if (stmtOffset >= U->getLineSection().Data.size()) return nullptr; // We have to parse it first. - DataExtractor lineData(U->getLineSection(), isLittleEndian(), - U->getAddressByteSize()); + DWARFDataExtractor lineData(U->getLineSection(), isLittleEndian(), + U->getAddressByteSize()); return Line->getOrParseLineTable(lineData, stmtOffset); } @@ -709,10 +591,10 @@ void DWARFContext::parseCompileUnits() { void DWARFContext::parseTypeUnits() { if (!TUs.empty()) return; - for (const auto &I : getTypesSections()) { + forEachTypesSections([&](const DWARFSection &S) { TUs.emplace_back(); - TUs.back().parse(*this, I.second); - } + TUs.back().parse(*this, S); + }); } void DWARFContext::parseDWOCompileUnits() { @@ -722,10 +604,10 @@ void DWARFContext::parseDWOCompileUnits() { void DWARFContext::parseDWOTypeUnits() { if (!DWOTUs.empty()) return; - for (const auto &I : getTypesDWOSections()) { + forEachTypesDWOSections([&](const DWARFSection &S) { DWOTUs.emplace_back(); - DWOTUs.back().parseDWO(*this, I.second); - } + DWOTUs.back().parseDWO(*this, S); + }); } DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { @@ -897,42 +779,105 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, return InliningInfo; } +std::shared_ptr +DWARFContext::getDWOContext(StringRef AbsolutePath) { + if (auto S = DWP.lock()) { + DWARFContext *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); + } + + std::weak_ptr *Entry = &DWOFiles[AbsolutePath]; + + if (auto S = Entry->lock()) { + DWARFContext *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); + } + + SmallString<128> DWPName; + Expected> Obj = [&] { + if (!CheckedForDWP) { + (getFileName() + ".dwp").toVector(DWPName); + auto Obj = object::ObjectFile::createObjectFile(DWPName); + if (Obj) { + Entry = &DWP; + return Obj; + } else { + CheckedForDWP = true; + // TODO: Should this error be handled (maybe in a high verbosity mode) + // before falling back to .dwo files? + consumeError(Obj.takeError()); + } + } + + return object::ObjectFile::createObjectFile(AbsolutePath); + }(); + + if (!Obj) { + // TODO: Actually report errors helpfully. + consumeError(Obj.takeError()); + return nullptr; + } + + auto S = std::make_shared(); + S->File = std::move(Obj.get()); + S->Context = llvm::make_unique(*S->File.getBinary()); + *Entry = S; + auto *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); +} + static Error createError(const Twine &Reason, llvm::Error E) { return make_error(Reason + toString(std::move(E)), inconvertibleErrorCode()); } -/// Returns the address of symbol relocation used against. Used for futher -/// relocations computation. Symbol's section load address is taken in account if -/// LoadedObjectInfo interface is provided. -static Expected getSymbolAddress(const object::ObjectFile &Obj, - const RelocationRef &Reloc, - const LoadedObjectInfo *L) { - uint64_t Ret = 0; +/// SymInfo contains information about symbol: it's address +/// and section index which is -1LL for absolute symbols. +struct SymInfo { + uint64_t Address; + uint64_t SectionIndex; +}; + +/// Returns the address of symbol relocation used against and a section index. +/// Used for futher relocations computation. Symbol's section load address is +static Expected getSymbolInfo(const object::ObjectFile &Obj, + const RelocationRef &Reloc, + const LoadedObjectInfo *L, + std::map &Cache) { + SymInfo Ret = {0, (uint64_t)-1LL}; object::section_iterator RSec = Obj.section_end(); object::symbol_iterator Sym = Reloc.getSymbol(); + std::map::iterator CacheIt = Cache.end(); // First calculate the address of the symbol or section as it appears // in the object file if (Sym != Obj.symbol_end()) { + bool New; + std::tie(CacheIt, New) = Cache.insert({*Sym, {0, 0}}); + if (!New) + return CacheIt->second; + Expected SymAddrOrErr = Sym->getAddress(); if (!SymAddrOrErr) - return createError("error: failed to compute symbol address: ", + return createError("failed to compute symbol address: ", SymAddrOrErr.takeError()); // Also remember what section this symbol is in for later auto SectOrErr = Sym->getSection(); if (!SectOrErr) - return createError("error: failed to get symbol section: ", + return createError("failed to get symbol section: ", SectOrErr.takeError()); RSec = *SectOrErr; - Ret = *SymAddrOrErr; + Ret.Address = *SymAddrOrErr; } else if (auto *MObj = dyn_cast(&Obj)) { RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl()); - Ret = RSec->getAddress(); + Ret.Address = RSec->getAddress(); } + if (RSec != Obj.section_end()) + Ret.SectionIndex = RSec->getIndex(); + // If we are given load addresses for the sections, we need to adjust: // SymAddr = (Address of Symbol Or Section in File) - // (Address of Section in File) + @@ -942,7 +887,11 @@ static Expected getSymbolAddress(const object::ObjectFile &Obj, // we need to perform the same computation. if (L && RSec != Obj.section_end()) if (uint64_t SectionLoadAddress = L->getSectionLoadAddress(*RSec)) - Ret += SectionLoadAddress - RSec->getAddress(); + Ret.Address += SectionLoadAddress - RSec->getAddress(); + + if (CacheIt != Cache.end()) + CacheIt->second = Ret; + return Ret; } @@ -968,7 +917,7 @@ Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec, return Decompressor.takeError(); SmallString<32> Out; - if (auto Err = Decompressor->decompress(Out)) + if (auto Err = Decompressor->resizeAndDecompress(Out)) return Err; UncompressedSections.emplace_back(std::move(Out)); @@ -977,53 +926,60 @@ Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec, return Error::success(); } -DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, - const LoadedObjectInfo *L) - : IsLittleEndian(Obj.isLittleEndian()), +ErrorPolicy DWARFContextInMemory::defaultErrorHandler(Error E) { + errs() << "error: " + toString(std::move(E)) << '\n'; + return ErrorPolicy::Continue; +} + +DWARFContextInMemory::DWARFContextInMemory( + const object::ObjectFile &Obj, const LoadedObjectInfo *L, + function_ref HandleError) + : FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()), AddressSize(Obj.getBytesInAddress()) { for (const SectionRef &Section : Obj.sections()) { - StringRef name; - Section.getName(name); + StringRef Name; + Section.getName(Name); // Skip BSS and Virtual sections, they aren't interesting. - bool IsBSS = Section.isBSS(); - if (IsBSS) + if (Section.isBSS() || Section.isVirtual()) continue; - bool IsVirtual = Section.isVirtual(); - if (IsVirtual) - continue; - StringRef data; + StringRef Data; section_iterator RelocatedSection = Section.getRelocatedSection(); // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. - if (!L || !L->getLoadedSectionContents(*RelocatedSection,data)) - Section.getContents(data); - - if (auto Err = maybeDecompress(Section, name, data)) { - errs() << "error: failed to decompress '" + name + "', " + - toString(std::move(Err)) - << '\n'; + if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) + Section.getContents(Data); + + if (auto Err = maybeDecompress(Section, Name, Data)) { + ErrorPolicy EP = HandleError( + createError("failed to decompress '" + Name + "', ", std::move(Err))); + if (EP == ErrorPolicy::Halt) + return; continue; } // Compressed sections names in GNU style starts from ".z", // at this point section is decompressed and we drop compression prefix. - name = name.substr( - name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes. + Name = Name.substr( + Name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes. + + // Map platform specific debug section names to DWARF standard section + // names. + Name = Obj.mapDebugSectionName(Name); - if (StringRef *SectionData = MapSectionToMember(name)) { - *SectionData = data; - if (name == "debug_ranges") { + if (StringRef *SectionData = mapSectionToMember(Name)) { + *SectionData = Data; + if (Name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection.Data = data; + RangeDWOSection.Data = Data; } - } else if (name == "debug_types") { + } else if (Name == "debug_types") { // Find debug_types data by section rather than name as there are // multiple, comdat grouped, debug_types sections. - TypesSections[Section].Data = data; - } else if (name == "debug_types.dwo") { - TypesDWOSections[Section].Data = data; + TypesSections[Section].Data = Data; + } else if (Name == "debug_types.dwo") { + TypesDWOSections[Section].Data = Data; } if (RelocatedSection == Obj.section_end()) @@ -1036,7 +992,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // If the section we're relocating was relocated already by the JIT, // then we used the relocated version above, so we do not need to process // relocations for it now. - if (L && L->getLoadedSectionContents(*RelocatedSection,RelSecData)) + if (L && L->getLoadedSectionContents(*RelocatedSection, RelSecData)) continue; // In Mach-o files, the relocations do not need to be applied if @@ -1048,22 +1004,12 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, continue; RelSecName = RelSecName.substr( - RelSecName.find_first_not_of("._")); // Skip . and _ prefixes. + RelSecName.find_first_not_of("._z")); // Skip . and _ prefixes. // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. - RelocAddrMap *Map = StringSwitch(RelSecName) - .Case("debug_info", &InfoSection.Relocs) - .Case("debug_loc", &LocSection.Relocs) - .Case("debug_info.dwo", &InfoDWOSection.Relocs) - .Case("debug_line", &LineSection.Relocs) - .Case("debug_ranges", &RangeSection.Relocs) - .Case("apple_names", &AppleNamesSection.Relocs) - .Case("apple_types", &AppleTypesSection.Relocs) - .Case("apple_namespaces", &AppleNamespacesSection.Relocs) - .Case("apple_namespac", &AppleNamespacesSection.Relocs) - .Case("apple_objc", &AppleObjCSection.Relocs) - .Default(nullptr); + DWARFSection *Sec = mapNameToDWARFSection(RelSecName); + RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr; if (!Map) { // Find debug_types relocs by section rather than name as there are // multiple, comdat grouped, debug_types sections. @@ -1075,47 +1021,38 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, continue; } - if (Section.relocation_begin() != Section.relocation_end()) { - uint64_t SectionSize = RelocatedSection->getSize(); - for (const RelocationRef &Reloc : Section.relocations()) { - // FIXME: it's not clear how to correctly handle scattered - // relocations. - if (isRelocScattered(Obj, Reloc)) - continue; - - Expected SymAddrOrErr = getSymbolAddress(Obj, Reloc, L); - if (!SymAddrOrErr) { - errs() << toString(SymAddrOrErr.takeError()) << '\n'; - continue; - } - - object::RelocVisitor V(Obj); - object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr)); - if (V.error()) { - SmallString<32> Name; - Reloc.getTypeName(Name); - errs() << "error: failed to compute relocation: " - << Name << "\n"; - continue; - } - uint64_t Address = Reloc.getOffset(); - if (Address + R.Width > SectionSize) { - errs() << "error: " << R.Width << "-byte relocation starting " - << Address << " bytes into section " << name << " which is " - << SectionSize << " bytes long.\n"; - continue; - } - if (R.Width > 8) { - errs() << "error: can't handle a relocation of more than 8 bytes at " - "a time.\n"; - continue; - } - DEBUG(dbgs() << "Writing " << format("%p", R.Value) - << " at " << format("%p", Address) - << " with width " << format("%d", R.Width) - << "\n"); - Map->insert(std::make_pair(Address, std::make_pair(R.Width, R.Value))); + if (Section.relocation_begin() == Section.relocation_end()) + continue; + + // Symbol to [address, section index] cache mapping. + std::map AddrCache; + for (const RelocationRef &Reloc : Section.relocations()) { + // FIXME: it's not clear how to correctly handle scattered + // relocations. + if (isRelocScattered(Obj, Reloc)) + continue; + + Expected SymInfoOrErr = getSymbolInfo(Obj, Reloc, L, AddrCache); + if (!SymInfoOrErr) { + if (HandleError(SymInfoOrErr.takeError()) == ErrorPolicy::Halt) + return; + continue; + } + + object::RelocVisitor V(Obj); + uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address); + if (V.error()) { + SmallString<32> Type; + Reloc.getTypeName(Type); + ErrorPolicy EP = HandleError( + createError("failed to compute relocation: " + Type + ", ", + errorCodeToError(object_error::parse_failed))); + if (EP == ErrorPolicy::Halt) + return; + continue; } + RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val}; + Map->insert({Reloc.getOffset(), Rel}); } } } @@ -1125,39 +1062,47 @@ DWARFContextInMemory::DWARFContextInMemory( bool isLittleEndian) : IsLittleEndian(isLittleEndian), AddressSize(AddrSize) { for (const auto &SecIt : Sections) { - if (StringRef *SectionData = MapSectionToMember(SecIt.first())) + if (StringRef *SectionData = mapSectionToMember(SecIt.first())) *SectionData = SecIt.second->getBuffer(); } } -StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { +DWARFSection *DWARFContextInMemory::mapNameToDWARFSection(StringRef Name) { + return StringSwitch(Name) + .Case("debug_info", &InfoSection) + .Case("debug_loc", &LocSection) + .Case("debug_line", &LineSection) + .Case("debug_str_offsets", &StringOffsetSection) + .Case("debug_ranges", &RangeSection) + .Case("debug_info.dwo", &InfoDWOSection) + .Case("debug_loc.dwo", &LocDWOSection) + .Case("debug_line.dwo", &LineDWOSection) + .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_addr", &AddrSection) + .Case("apple_names", &AppleNamesSection) + .Case("apple_types", &AppleTypesSection) + .Case("apple_namespaces", &AppleNamespacesSection) + .Case("apple_namespac", &AppleNamespacesSection) + .Case("apple_objc", &AppleObjCSection) + .Default(nullptr); +} + +StringRef *DWARFContextInMemory::mapSectionToMember(StringRef Name) { + if (DWARFSection *Sec = mapNameToDWARFSection(Name)) + return &Sec->Data; return StringSwitch(Name) - .Case("debug_info", &InfoSection.Data) .Case("debug_abbrev", &AbbrevSection) - .Case("debug_loc", &LocSection.Data) - .Case("debug_line", &LineSection.Data) .Case("debug_aranges", &ARangeSection) .Case("debug_frame", &DebugFrameSection) .Case("eh_frame", &EHFrameSection) .Case("debug_str", &StringSection) - .Case("debug_ranges", &RangeSection.Data) .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) .Case("debug_pubtypes", &PubTypesSection) .Case("debug_gnu_pubnames", &GnuPubNamesSection) .Case("debug_gnu_pubtypes", &GnuPubTypesSection) - .Case("debug_info.dwo", &InfoDWOSection.Data) .Case("debug_abbrev.dwo", &AbbrevDWOSection) - .Case("debug_loc.dwo", &LocDWOSection.Data) - .Case("debug_line.dwo", &LineDWOSection.Data) .Case("debug_str.dwo", &StringDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) - .Case("debug_addr", &AddrSection) - .Case("apple_names", &AppleNamesSection.Data) - .Case("apple_types", &AppleTypesSection.Data) - .Case("apple_namespaces", &AppleNamespacesSection.Data) - .Case("apple_namespac", &AppleNamespacesSection.Data) - .Case("apple_objc", &AppleObjCSection.Data) .Case("debug_cu_index", &CUIndexSection) .Case("debug_tu_index", &TUIndexSection) .Case("gdb_index", &GdbIndexSection) diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp new file mode 100644 index 0000000000000..001097e56c716 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -0,0 +1,24 @@ +//===- DWARFDataExtractor.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" + +using namespace llvm; + +uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, + uint64_t *SecNdx) const { + if (!RelocMap) + return getUnsigned(Off, Size); + RelocAddrMap::const_iterator AI = RelocMap->find(*Off); + if (AI == RelocMap->end()) + return getUnsigned(Off, Size); + if (SecNdx) + *SecNdx = AI->second.SectionIndex; + return getUnsigned(Off, Size) + AI->second.Value; +} diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp index 0cf71f530446b..6601393d7459b 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp @@ -54,9 +54,8 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { if (ParsedCUOffsets.insert(CUOffset).second) { DWARFAddressRangesVector CURanges; CU->collectAddressRanges(CURanges); - for (const auto &R : CURanges) { - appendRange(CUOffset, R.first, R.second); - } + for (const auto &R : CURanges) + appendRange(CUOffset, R.LowPC, R.HighPC); } } diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index b55ed6a468496..475cf25b781b4 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -70,7 +70,7 @@ class llvm::FrameEntry { /// An entry may contain CFI instructions. An instruction consists of an /// opcode and an optional sequence of operands. - typedef std::vector Operands; + using Operands = std::vector; struct Instruction { Instruction(uint8_t Opcode) : Opcode(Opcode) @@ -513,6 +513,19 @@ static uint64_t readPointer(const DataExtractor &Data, uint32_t &Offset, } } +// This is a workaround for old compilers which do not allow +// noreturn attribute usage in lambdas. Once the support for those +// compilers are phased out, we can remove this and return back to +// a ReportError lambda: [StartOffset](const char *ErrorMsg). +static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset, + const char *ErrorMsg) { + std::string Str; + raw_string_ostream OS(Str); + OS << format(ErrorMsg, StartOffset); + OS.flush(); + report_fatal_error(Str); +} + void DWARFDebugFrame::parse(DataExtractor Data) { uint32_t Offset = 0; DenseMap CIEs; @@ -520,14 +533,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) { while (Data.isValidOffset(Offset)) { uint32_t StartOffset = Offset; - auto ReportError = [StartOffset](const char *ErrorMsg) { - std::string Str; - raw_string_ostream OS(Str); - OS << format(ErrorMsg, StartOffset); - OS.flush(); - report_fatal_error(Str); - }; - bool IsDWARF64 = false; uint64_t Length = Data.getU32(&Offset); uint64_t Id; @@ -583,13 +588,15 @@ void DWARFDebugFrame::parse(DataExtractor Data) { for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { switch (AugmentationString[i]) { default: - ReportError("Unknown augmentation character in entry at %lx"); + ReportError(StartOffset, + "Unknown augmentation character in entry at %lx"); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; case 'P': { if (Personality) - ReportError("Duplicate personality in entry at %lx"); + ReportError(StartOffset, + "Duplicate personality in entry at %lx"); PersonalityEncoding = Data.getU8(&Offset); Personality = readPointer(Data, Offset, *PersonalityEncoding); break; @@ -599,7 +606,8 @@ void DWARFDebugFrame::parse(DataExtractor Data) { break; case 'z': if (i) - ReportError("'z' must be the first character at %lx"); + ReportError(StartOffset, + "'z' must be the first character at %lx"); // Parse the augmentation length first. We only parse it if // the string contains a 'z'. AugmentationLength = Data.getULEB128(&Offset); @@ -611,7 +619,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) { if (AugmentationLength.hasValue()) { if (Offset != EndAugmentationOffset) - ReportError("Parsing augmentation data at %lx failed"); + ReportError(StartOffset, "Parsing augmentation data at %lx failed"); AugmentationData = Data.getData().slice(StartAugmentationOffset, EndAugmentationOffset); @@ -638,7 +646,8 @@ void DWARFDebugFrame::parse(DataExtractor Data) { if (IsEH) { // The address size is encoded in the CIE we reference. if (!Cie) - ReportError("Parsing FDE data at %lx failed due to missing CIE"); + ReportError(StartOffset, + "Parsing FDE data at %lx failed due to missing CIE"); InitialLocation = readPointer(Data, Offset, Cie->getFDEPointerEncoding()); @@ -658,7 +667,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) { readPointer(Data, Offset, Cie->getLSDAPointerEncoding()); if (Offset != EndAugmentationOffset) - ReportError("Parsing augmentation data at %lx failed"); + ReportError(StartOffset, "Parsing augmentation data at %lx failed"); } } else { InitialLocation = Data.getAddress(&Offset); @@ -673,7 +682,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) { Entries.back()->parseInstructions(Data, &Offset, EndStructureOffset); if (Offset != EndStructureOffset) - ReportError("Parsing entry instructions at %lx failed"); + ReportError(StartOffset, "Parsing entry instructions at %lx failed"); } } diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 35f673c7acc69..976bc4651ae69 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" @@ -21,13 +21,13 @@ using namespace dwarf; bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr) { - DataExtractor DebugInfoData = U.getDebugInfoExtractor(); + DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor(); const uint32_t UEndOffset = U.getNextUnitOffset(); return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0); } bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, - const DataExtractor &DebugInfoData, + const DWARFDataExtractor &DebugInfoData, uint32_t UEndOffset, uint32_t D) { Offset = *OffsetPtr; Depth = D; @@ -59,7 +59,7 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, // Attribute byte size if fixed, just add the size to the offset. *OffsetPtr += *FixedSize; } else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData, - OffsetPtr, &U)) { + OffsetPtr, U.getFormParams())) { // We failed to skip this attribute's value, restore the original offset // and return the failure status. *OffsetPtr = Offset; diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index f32e8fe763579..7d180564e9f7a 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -9,10 +9,12 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -26,23 +28,27 @@ using namespace llvm; using namespace dwarf; -typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; +using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; + namespace { + struct ContentDescriptor { dwarf::LineNumberEntryFormat Type; dwarf::Form Form; }; -typedef SmallVector ContentDescriptors; + +using ContentDescriptors = SmallVector; + } // end anonmyous namespace DWARFDebugLine::Prologue::Prologue() { clear(); } void DWARFDebugLine::Prologue::clear() { - TotalLength = Version = PrologueLength = 0; - AddressSize = SegSelectorSize = 0; + TotalLength = PrologueLength = 0; + SegSelectorSize = 0; MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; - IsDWARF64 = false; + FormParams = DWARFFormParams({0, 0, DWARF32}); StandardOpcodeLengths.clear(); IncludeDirectories.clear(); FileNames.clear(); @@ -51,12 +57,13 @@ void DWARFDebugLine::Prologue::clear() { void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) - << format(" version: %u\n", Version) - << format(Version >= 5 ? " address_size: %u\n" : "", AddressSize) - << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize) - << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) + << format(" version: %u\n", getVersion()); + if (getVersion() >= 5) + OS << format(" address_size: %u\n", getAddressSize()) + << format(" seg_select_size: %u\n", SegSelectorSize); + OS << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) - << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) + << format(getVersion() >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) << format(" default_is_stmt: %u\n", DefaultIsStmt) << format(" line_base: %i\n", LineBase) << format(" line_range: %u\n", LineRange) @@ -87,8 +94,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { // Parse v2-v4 directory and file tables. static void -parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, - uint64_t EndPrologueOffset, +parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, + uint32_t *OffsetPtr, uint64_t EndPrologueOffset, std::vector &IncludeDirectories, std::vector &FileNames) { while (*OffsetPtr < EndPrologueOffset) { @@ -115,7 +122,7 @@ parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, // Returns the descriptors, or an empty vector if we did not find a path or // ran off the end of the prologue. static ContentDescriptors -parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr, +parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, uint64_t EndPrologueOffset) { ContentDescriptors Descriptors; int FormatCount = DebugLineData.getU8(OffsetPtr); @@ -135,8 +142,9 @@ parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr, } static bool -parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, - uint64_t EndPrologueOffset, +parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, + uint32_t *OffsetPtr, uint64_t EndPrologueOffset, + const DWARFFormParams &FormParams, std::vector &IncludeDirectories, std::vector &FileNames) { // Get the directory entry description. @@ -159,7 +167,7 @@ parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, IncludeDirectories.push_back(Value.getAsCString().getValue()); break; default: - if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr)) + if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams)) return false; } } @@ -204,31 +212,33 @@ parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, return true; } -bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, +bool DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr) { const uint64_t PrologueOffset = *OffsetPtr; clear(); TotalLength = DebugLineData.getU32(OffsetPtr); if (TotalLength == UINT32_MAX) { - IsDWARF64 = true; + FormParams.Format = dwarf::DWARF64; TotalLength = DebugLineData.getU64(OffsetPtr); - } else if (TotalLength > 0xffffff00) { + } else if (TotalLength >= 0xffffff00) { return false; } - Version = DebugLineData.getU16(OffsetPtr); - if (Version < 2) + FormParams.Version = DebugLineData.getU16(OffsetPtr); + if (getVersion() < 2) return false; - if (Version >= 5) { - AddressSize = DebugLineData.getU8(OffsetPtr); + if (getVersion() >= 5) { + FormParams.AddrSize = DebugLineData.getU8(OffsetPtr); + assert(getAddressSize() == DebugLineData.getAddressSize() && + "Line table header and data extractor disagree"); SegSelectorSize = DebugLineData.getU8(OffsetPtr); } PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength()); const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr; MinInstLength = DebugLineData.getU8(OffsetPtr); - if (Version >= 4) + if (getVersion() >= 4) MaxOpsPerInst = DebugLineData.getU8(OffsetPtr); DefaultIsStmt = DebugLineData.getU8(OffsetPtr); LineBase = DebugLineData.getU8(OffsetPtr); @@ -241,9 +251,9 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, StandardOpcodeLengths.push_back(OpLen); } - if (Version >= 5) { + if (getVersion() >= 5) { if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, - IncludeDirectories, FileNames)) { + getFormParams(), IncludeDirectories, FileNames)) { fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64 " found an invalid directory or file table description at" @@ -333,7 +343,7 @@ void DWARFDebugLine::LineTable::clear() { } DWARFDebugLine::ParsingState::ParsingState(struct LineTable *LT) - : LineTable(LT), RowNumber(0) { + : LineTable(LT) { resetRowAndSequence(); } @@ -371,20 +381,19 @@ DWARFDebugLine::getLineTable(uint32_t Offset) const { } const DWARFDebugLine::LineTable * -DWARFDebugLine::getOrParseLineTable(DataExtractor DebugLineData, +DWARFDebugLine::getOrParseLineTable(const DWARFDataExtractor &DebugLineData, uint32_t Offset) { std::pair Pos = LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable())); LineTable *LT = &Pos.first->second; if (Pos.second) { - if (!LT->parse(DebugLineData, RelocMap, &Offset)) + if (!LT->parse(DebugLineData, &Offset)) return nullptr; } return LT; } -bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData, - const RelocAddrMap *RMap, +bool DWARFDebugLine::LineTable::parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr) { const uint32_t DebugLineOffset = *OffsetPtr; @@ -433,8 +442,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData, // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - State.Row.Address = getRelocatedValue( - DebugLineData, DebugLineData.getAddressSize(), OffsetPtr, RMap); + State.Row.Address = DebugLineData.getRelocatedAddress(OffsetPtr); break; case DW_LNE_define_file: diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index d5c34216ed53e..c240dd7406d9f 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -40,9 +40,9 @@ void DWARFDebugLoc::dump(raw_ostream &OS) const { } } -void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) { +void DWARFDebugLoc::parse(const DWARFDataExtractor &data) { uint32_t Offset = 0; - while (data.isValidOffset(Offset+AddressSize-1)) { + while (data.isValidOffset(Offset+data.getAddressSize()-1)) { Locations.resize(Locations.size() + 1); LocationList &Loc = Locations.back(); Loc.Offset = Offset; @@ -51,8 +51,8 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) { while (true) { // A beginning and ending address offsets. Entry E; - E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); - E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); + E.Begin = data.getRelocatedAddress(&Offset); + E.End = data.getRelocatedAddress(&Offset); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index e0a9adde8e58d..1b77be6192ddc 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "SyntaxHighlighting.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" -#include "llvm/Support/Dwarf.h" +#include "SyntaxHighlighting.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp index daded255f8c76..5a4e39f3c2af8 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index 9380fe8fe85d8..0b6ae86fd94b2 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -23,8 +23,8 @@ void DWARFDebugRangeList::clear() { Entries.clear(); } -bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr, - const RelocAddrMap &Relocs) { +bool DWARFDebugRangeList::extract(const DWARFDataExtractor &data, + uint32_t *offset_ptr) { clear(); if (!data.isValidOffset(*offset_ptr)) return false; @@ -36,9 +36,8 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr, RangeListEntry entry; uint32_t prev_offset = *offset_ptr; entry.StartAddress = - getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); - entry.EndAddress = - getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); + data.getRelocatedAddress(offset_ptr, &entry.SectionIndex); + entry.EndAddress = data.getRelocatedAddress(offset_ptr); // Check that both values were extracted correctly. if (*offset_ptr != prev_offset + 2 * AddressSize) { @@ -69,8 +68,8 @@ DWARFDebugRangeList::getAbsoluteRanges(uint64_t BaseAddress) const { if (RLE.isBaseAddressSelectionEntry(AddressSize)) { BaseAddress = RLE.EndAddress; } else { - Res.push_back(std::make_pair(BaseAddress + RLE.StartAddress, - BaseAddress + RLE.EndAddress)); + Res.push_back({BaseAddress + RLE.StartAddress, + BaseAddress + RLE.EndAddress, RLE.SectionIndex}); } } return Res; diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDie.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDie.cpp index 24039eb35209a..111f0bbd44448 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "SyntaxHighlighting.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" -#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -60,14 +60,15 @@ static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges, OS << '\n'; OS.indent(Indent); OS << format("[0x%0*" PRIx64 " - 0x%0*" PRIx64 ")", - AddressSize*2, Range.first, - AddressSize*2, Range.second); + AddressSize*2, Range.LowPC, + AddressSize*2, Range.HighPC); } } static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, uint32_t *OffsetPtr, dwarf::Attribute Attr, - dwarf::Form Form, unsigned Indent) { + dwarf::Form Form, unsigned Indent, + DIDumpOptions DumpOpts) { if (!Die.isValid()) return; const char BaseIndent[] = " "; @@ -78,13 +79,15 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, WithColor(OS, syntax::Attribute) << attrString; else WithColor(OS, syntax::Attribute).get() << format("DW_AT_Unknown_%x", Attr); - - auto formString = FormEncodingString(Form); - if (!formString.empty()) - OS << " [" << formString << ']'; - else - OS << format(" [DW_FORM_Unknown_%x]", Form); - + + if (!DumpOpts.Brief) { + auto formString = FormEncodingString(Form); + if (!formString.empty()) + OS << " [" << formString << ']'; + else + OS << format(" [DW_FORM_Unknown_%x]", Form); + } + DWARFUnit *U = Die.getDwarfUnit(); DWARFFormValue formValue(Form); @@ -211,13 +214,16 @@ Optional DWARFDie::getHighPC(uint64_t LowPC) const { return None; } -bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const { - auto LowPcAddr = toAddress(find(DW_AT_low_pc)); +bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, + uint64_t &SectionIndex) const { + auto F = find(DW_AT_low_pc); + auto LowPcAddr = toAddress(F); if (!LowPcAddr) return false; if (auto HighPcAddr = getHighPC(*LowPcAddr)) { LowPC = *LowPcAddr; HighPC = *HighPcAddr; + SectionIndex = F->getSectionIndex(); return true; } return false; @@ -228,10 +234,10 @@ DWARFDie::getAddressRanges() const { if (isNULL()) return DWARFAddressRangesVector(); // Single range specified by low/high PC. - uint64_t LowPC, HighPC; - if (getLowAndHighPC(LowPC, HighPC)) { - return DWARFAddressRangesVector(1, std::make_pair(LowPC, HighPC)); - } + uint64_t LowPC, HighPC, Index; + if (getLowAndHighPC(LowPC, HighPC, Index)) + return {{LowPC, HighPC, Index}}; + // Multiple ranges from .debug_ranges section. auto RangesOffset = toSectionOffset(find(DW_AT_ranges)); if (RangesOffset) { @@ -257,7 +263,7 @@ DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const { bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const { for (const auto& R : getAddressRanges()) { - if (R.first <= Address && Address < R.second) + if (R.LowPC <= Address && Address < R.HighPC) return true; } return false; @@ -298,11 +304,11 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } -void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, - unsigned Indent) const { +void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, unsigned Indent, + DIDumpOptions DumpOpts) const { if (!isValid()) return; - DataExtractor debug_info_data = U->getDebugInfoExtractor(); + DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor(); const uint32_t Offset = getOffset(); uint32_t offset = Offset; @@ -319,10 +325,12 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, else WithColor(OS, syntax::Tag).get().indent(Indent) << format("DW_TAG_Unknown_%x", getTag()); - - OS << format(" [%u] %c\n", abbrCode, - AbbrevDecl->hasChildren() ? '*' : ' '); - + + if (!DumpOpts.Brief) + OS << format(" [%u] %c", abbrCode, + AbbrevDecl->hasChildren() ? '*' : ' '); + OS << '\n'; + // Dump all data in the DIE for the attributes. for (const auto &AttrSpec : AbbrevDecl->attributes()) { if (AttrSpec.Form == DW_FORM_implicit_const) { @@ -332,13 +340,13 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, continue; } dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form, - Indent); + Indent, DumpOpts); } DWARFDie child = getFirstChild(); if (RecurseDepth > 0 && child) { while (child) { - child.dump(OS, RecurseDepth-1, Indent+2); + child.dump(OS, RecurseDepth-1, Indent+2, DumpOpts); child = child.getSibling(); } } @@ -387,7 +395,7 @@ DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) : void DWARFDie::attribute_iterator::updateForIndex( const DWARFAbbreviationDeclaration &AbbrDecl, uint32_t I) { Index = I; - // AbbrDecl must be valid befor calling this function. + // AbbrDecl must be valid before calling this function. auto NumAttrs = AbbrDecl.getNumAttributes(); if (Index < NumAttrs) { AttrValue.Attr = AbbrDecl.getAttrByIndex(Index); diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 1cbd3ea2c869c..83a7792e12447 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -13,10 +13,10 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -59,48 +59,13 @@ static const DWARFFormValue::FormClass DWARF4FormClasses[] = { DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present }; -namespace { - -/// A helper class that can be used in DWARFFormValue.cpp functions that need -/// to know the byte size of DW_FORM values that vary in size depending on the -/// DWARF version, address byte size, or DWARF32 or DWARF64. -class FormSizeHelper { - uint16_t Version; - uint8_t AddrSize; - llvm::dwarf::DwarfFormat Format; - -public: - FormSizeHelper(uint16_t V, uint8_t A, llvm::dwarf::DwarfFormat F) - : Version(V), AddrSize(A), Format(F) {} - - uint8_t getAddressByteSize() const { return AddrSize; } - - uint8_t getRefAddrByteSize() const { - if (Version == 2) - return AddrSize; - return getDwarfOffsetByteSize(); - } - - uint8_t getDwarfOffsetByteSize() const { - switch (Format) { - case dwarf::DwarfFormat::DWARF32: - return 4; - case dwarf::DwarfFormat::DWARF64: - return 8; - } - llvm_unreachable("Invalid Format value"); - } -}; - -} // end anonymous namespace - -template -static Optional getFixedByteSize(dwarf::Form Form, const T *U) { +Optional +DWARFFormValue::getFixedByteSize(dwarf::Form Form, + const DWARFFormParams Params) { switch (Form) { case DW_FORM_addr: - if (U) - return U->getAddressByteSize(); - return None; + assert(Params.Version && Params.AddrSize && "Invalid Params for form"); + return Params.AddrSize; case DW_FORM_block: // ULEB128 length L followed by L bytes. case DW_FORM_block1: // 1 byte length L followed by L bytes. @@ -121,9 +86,8 @@ static Optional getFixedByteSize(dwarf::Form Form, const T *U) { return None; case DW_FORM_ref_addr: - if (U) - return U->getRefAddrByteSize(); - return None; + assert(Params.Version && Params.AddrSize && "Invalid Params for form"); + return Params.getRefAddrByteSize(); case DW_FORM_flag: case DW_FORM_data1: @@ -138,6 +102,9 @@ static Optional getFixedByteSize(dwarf::Form Form, const T *U) { case DW_FORM_addrx2: return 2; + case DW_FORM_strx3: + return 3; + case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: @@ -151,9 +118,8 @@ static Optional getFixedByteSize(dwarf::Form Form, const T *U) { case DW_FORM_line_strp: case DW_FORM_sec_offset: case DW_FORM_strp_sup: - if (U) - return U->getDwarfOffsetByteSize(); - return None; + assert(Params.Version && Params.AddrSize && "Invalid Params for form"); + return Params.getDwarfOffsetByteSize(); case DW_FORM_data8: case DW_FORM_ref8: @@ -178,9 +144,9 @@ static Optional getFixedByteSize(dwarf::Form Form, const T *U) { return None; } -template -static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData, - uint32_t *OffsetPtr, const T *U) { +bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, + uint32_t *OffsetPtr, + const DWARFFormParams Params) { bool Indirect = false; do { switch (Form) { @@ -240,7 +206,8 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData, case DW_FORM_line_strp: case DW_FORM_GNU_ref_alt: case DW_FORM_GNU_strp_alt: - if (Optional FixedSize = ::getFixedByteSize(Form, U)) { + if (Optional FixedSize = + DWARFFormValue::getFixedByteSize(Form, Params)) { *OffsetPtr += *FixedSize; return true; } @@ -274,19 +241,6 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData, return true; } -Optional DWARFFormValue::getFixedByteSize(dwarf::Form Form, - const DWARFUnit *U) { - return ::getFixedByteSize(Form, U); -} - -Optional -DWARFFormValue::getFixedByteSize(dwarf::Form Form, uint16_t Version, - uint8_t AddrSize, - llvm::dwarf::DwarfFormat Format) { - FormSizeHelper FSH(Version, AddrSize, Format); - return ::getFixedByteSize(Form, &FSH); -} - bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { // First, check DWARF4 form classes. if (Form < makeArrayRef(DWARF4FormClasses).size() && @@ -301,6 +255,11 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { return (FC == FC_Address); case DW_FORM_GNU_str_index: case DW_FORM_GNU_strp_alt: + case DW_FORM_strx: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: return (FC == FC_String); case DW_FORM_implicit_const: return (FC == FC_Constant); @@ -316,7 +275,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { FC == FC_SectionOffset; } -bool DWARFFormValue::extractValue(const DataExtractor &Data, +bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr, const DWARFUnit *CU) { U = CU; bool Indirect = false; @@ -331,10 +290,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, case DW_FORM_ref_addr: { if (!U) return false; - uint16_t AddrSize = (Form == DW_FORM_addr) ? U->getAddressByteSize() - : U->getRefAddrByteSize(); - Value.uval = - getRelocatedValue(Data, AddrSize, OffsetPtr, U->getRelocMap()); + uint16_t Size = (Form == DW_FORM_addr) ? U->getAddressByteSize() + : U->getRefAddrByteSize(); + Value.uval = Data.getRelocatedValue(Size, OffsetPtr, &Value.SectionIndex); break; } case DW_FORM_exprloc: @@ -367,15 +325,16 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, case DW_FORM_addrx2: Value.uval = Data.getU16(OffsetPtr); break; + case DW_FORM_strx3: + Value.uval = Data.getU24(OffsetPtr); + break; case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: case DW_FORM_strx4: - case DW_FORM_addrx4: { - const RelocAddrMap *RelocMap = U ? U->getRelocMap() : nullptr; - Value.uval = getRelocatedValue(Data, 4, OffsetPtr, RelocMap); + case DW_FORM_addrx4: + Value.uval = Data.getRelocatedValue(4, OffsetPtr); break; - } case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sup8: @@ -403,8 +362,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, case DW_FORM_strp_sup: { if (!U) return false; - Value.uval = getRelocatedValue(Data, U->getDwarfOffsetByteSize(), - OffsetPtr, U->getRelocMap()); + Value.uval = + Data.getRelocatedValue(U->getDwarfOffsetByteSize(), OffsetPtr); break; } case DW_FORM_flag_present: @@ -415,6 +374,7 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, break; case DW_FORM_GNU_addr_index: case DW_FORM_GNU_str_index: + case DW_FORM_strx: Value.uval = Data.getULEB128(OffsetPtr); break; default: @@ -436,24 +396,6 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, return true; } -bool DWARFFormValue::skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr, - const DWARFUnit *U) const { - return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U); -} - -bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, const DWARFUnit *U) { - return skipFormValue(Form, DebugInfoData, OffsetPtr, U); -} - -bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, uint16_t Version, - uint8_t AddrSize, - llvm::dwarf::DwarfFormat Format) { - FormSizeHelper FSH(Version, AddrSize, Format); - return skipFormValue(Form, DebugInfoData, OffsetPtr, &FSH); -} - void DWARFFormValue::dump(raw_ostream &OS) const { uint64_t UValue = Value.uval; bool CURelativeOffset = false; @@ -542,6 +484,11 @@ void DWARFFormValue::dump(raw_ostream &OS) const { OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)UValue); dumpString(OS); break; + case DW_FORM_strx: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: case DW_FORM_GNU_str_index: OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue); dumpString(OS); @@ -620,8 +567,10 @@ Optional DWARFFormValue::getAsCString() const { if (Form == DW_FORM_GNU_strp_alt || U == nullptr) return None; uint32_t Offset = Value.uval; - if (Form == DW_FORM_GNU_str_index) { - uint32_t StrOffset; + if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx || + Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 || + Form == DW_FORM_strx4) { + uint64_t StrOffset; if (!U->getStringOffsetSectionItem(Offset, StrOffset)) return None; Offset = StrOffset; diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index 0625d01097c9a..ebd6104ab8785 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp index 25824f6eb83bf..fd1684d33a16b 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" -#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnit.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnit.cpp index f50487fc3ba3f..043bdb874f431 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -1,4 +1,4 @@ -//===-- DWARFUnit.cpp -----------------------------------------------------===// +//===- DWARFUnit.cpp ------------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -16,9 +16,6 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Path.h" #include @@ -26,6 +23,7 @@ #include #include #include +#include #include using namespace llvm; @@ -33,8 +31,8 @@ using namespace dwarf; void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(), - C.getStringSection(), StringRef(), C.getAddrSection(), - C.getLineSection().Data, C.isLittleEndian(), false); + C.getStringSection(), C.getStringOffsetSection(), + &C.getAddrSection(), C.getLineSection(), C.isLittleEndian(), false); } void DWARFUnitSectionBase::parseDWO(DWARFContext &C, @@ -42,23 +40,18 @@ void DWARFUnitSectionBase::parseDWO(DWARFContext &C, DWARFUnitIndex *Index) { parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(), C.getStringDWOSection(), C.getStringOffsetDWOSection(), - C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(), + &C.getAddrSection(), C.getLineDWOSection(), C.isLittleEndian(), true); } DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, - bool LE, bool IsDWO, - const DWARFUnitSectionBase &UnitSection, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, const DWARFSection &LS, bool LE, + bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry) : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), - LineSection(LS), StringSection(SS), StringOffsetSection([&]() { - if (IndexEntry) - if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) - return SOS.slice(C->Offset, C->Offset + C->Length); - return SOS; - }()), + LineSection(LS), StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO), UnitSection(UnitSection), IndexEntry(IndexEntry) { clear(); @@ -68,37 +61,39 @@ DWARFUnit::~DWARFUnit() = default; bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const { - uint32_t Offset = AddrOffsetSectionBase + Index * AddrSize; - if (AddrOffsetSection.size() < Offset + AddrSize) + uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize(); + if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize()) return false; - DataExtractor DA(AddrOffsetSection, isLittleEndian, AddrSize); - Result = DA.getAddress(&Offset); + DWARFDataExtractor DA(*AddrOffsetSection, isLittleEndian, + getAddressByteSize()); + Result = DA.getRelocatedAddress(&Offset); return true; } bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index, - uint32_t &Result) const { - // FIXME: string offset section entries are 8-byte for DWARF64. - const uint32_t ItemSize = 4; - uint32_t Offset = Index * ItemSize; - if (StringOffsetSection.size() < Offset + ItemSize) + uint64_t &Result) const { + unsigned ItemSize = getDwarfOffsetByteSize(); + uint32_t Offset = StringOffsetSectionBase + Index * ItemSize; + if (StringOffsetSection.Data.size() < Offset + ItemSize) return false; - DataExtractor DA(StringOffsetSection, isLittleEndian, 0); - Result = DA.getU32(&Offset); + DWARFDataExtractor DA(StringOffsetSection, isLittleEndian, 0); + Result = DA.getRelocatedValue(ItemSize, &Offset); return true; } bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { Length = debug_info.getU32(offset_ptr); - Version = debug_info.getU16(offset_ptr); + // FIXME: Support DWARF64. + FormParams.Format = DWARF32; + FormParams.Version = debug_info.getU16(offset_ptr); uint64_t AbbrOffset; - if (Version >= 5) { + if (FormParams.Version >= 5) { UnitType = debug_info.getU8(offset_ptr); - AddrSize = debug_info.getU8(offset_ptr); + FormParams.AddrSize = debug_info.getU8(offset_ptr); AbbrOffset = debug_info.getU32(offset_ptr); } else { AbbrOffset = debug_info.getU32(offset_ptr); - AddrSize = debug_info.getU8(offset_ptr); + FormParams.AddrSize = debug_info.getU8(offset_ptr); } if (IndexEntry) { if (AbbrOffset) @@ -113,12 +108,15 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { } bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); - bool VersionOK = DWARFContext::isSupportedVersion(Version); - bool AddrSizeOK = AddrSize == 4 || AddrSize == 8; + bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); + bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8; if (!LengthOK || !VersionOK || !AddrSizeOK) return false; + // Keep track of the highest DWARF version we encounter across all units. + Context.setMaxVersionIfGreater(getVersion()); + Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); return Abbrevs != nullptr; } @@ -140,21 +138,20 @@ bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { } bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, - DWARFDebugRangeList &RangeList) const { + DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(!DieArray.empty()); - DataExtractor RangesData(RangeSection->Data, isLittleEndian, AddrSize); + DWARFDataExtractor RangesData(*RangeSection, isLittleEndian, + getAddressByteSize()); uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; - return RangeList.extract(RangesData, &ActualRangeListOffset, - RangeSection->Relocs); + return RangeList.extract(RangesData, &ActualRangeListOffset); } void DWARFUnit::clear() { Offset = 0; Length = 0; - Version = 0; Abbrevs = nullptr; - AddrSize = 0; + FormParams = DWARFFormParams({0, 0, DWARF32}); BaseAddr = 0; RangeSectionBase = 0; AddrOffsetSectionBase = 0; @@ -181,7 +178,7 @@ void DWARFUnit::extractDIEsToVector( uint32_t DIEOffset = Offset + getHeaderSize(); uint32_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntry DIE; - DataExtractor DebugInfoData = getDebugInfoExtractor(); + DWARFDataExtractor DebugInfoData = getDebugInfoExtractor(); uint32_t Depth = 0; bool IsCUDie = true; @@ -242,6 +239,17 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { setBaseAddress(*BaseAddr); AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); + + // In general, we derive the offset of the unit's contibution to the + // debug_str_offsets{.dwo} section from the unit DIE's + // DW_AT_str_offsets_base attribute. In dwp files we add to it the offset + // we get from the index table. + StringOffsetSectionBase = + toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0); + if (IndexEntry) + if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) + StringOffsetSectionBase += C->Offset; + // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for // skeleton CU DIE, so that DWARF users not aware of it are not broken. } @@ -249,20 +257,6 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { return DieArray.size(); } -DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath) { - auto Obj = object::ObjectFile::createObjectFile(DWOPath); - if (!Obj) { - // TODO: Actually report errors helpfully. - consumeError(Obj.takeError()); - return; - } - DWOFile = std::move(Obj.get()); - DWOContext.reset( - cast(new DWARFContextInMemory(*DWOFile.getBinary()))); - if (DWOContext->getNumDWOCompileUnits() > 0) - DWOU = DWOContext->getDWOCompileUnitAtIndex(0); -} - bool DWARFUnit::parseDWO() { if (isDWO) return false; @@ -281,17 +275,21 @@ bool DWARFUnit::parseDWO() { sys::path::append(AbsolutePath, *CompilationDir); } sys::path::append(AbsolutePath, *DWOFileName); - DWO = llvm::make_unique(AbsolutePath); - DWARFUnit *DWOCU = DWO->getUnit(); - // Verify that compile unit in .dwo file is valid. - if (!DWOCU || DWOCU->getDWOId() != getDWOId()) { - DWO.reset(); + auto DWOId = getDWOId(); + if (!DWOId) return false; - } + auto DWOContext = Context.getDWOContext(AbsolutePath); + if (!DWOContext) + return false; + + DWARFCompileUnit *DWOCU = DWOContext->getDWOCompileUnitForHash(*DWOId); + if (!DWOCU) + return false; + DWO = std::shared_ptr(std::move(DWOContext), DWOCU); // Share .debug_addr and .debug_ranges section with compile unit in .dwo - DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); + DWO->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); auto DWORangesBase = UnitDie.getRangesBaseAttribute(); - DWOCU->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); + DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); return true; } @@ -334,8 +332,8 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { // Collect address ranges from DIEs in .dwo if necessary. bool DWOCreated = parseDWO(); - if (DWO.get()) - DWO->getUnit()->collectAddressRanges(CURanges); + if (DWO) + DWO->collectAddressRanges(CURanges); if (DWOCreated) DWO.reset(); @@ -349,18 +347,18 @@ void DWARFUnit::updateAddressDieMap(DWARFDie Die) { if (Die.isSubroutineDIE()) { for (const auto &R : Die.getAddressRanges()) { // Ignore 0-sized ranges. - if (R.first == R.second) + if (R.LowPC == R.HighPC) continue; - auto B = AddrDieMap.upper_bound(R.first); - if (B != AddrDieMap.begin() && R.first < (--B)->second.first) { + auto B = AddrDieMap.upper_bound(R.LowPC); + if (B != AddrDieMap.begin() && R.LowPC < (--B)->second.first) { // The range is a sub-range of existing ranges, we need to split the // existing range. - if (R.second < B->second.first) - AddrDieMap[R.second] = B->second; - if (R.first > B->first) - AddrDieMap[B->first].first = R.first; + if (R.HighPC < B->second.first) + AddrDieMap[R.HighPC] = B->second; + if (R.LowPC > B->first) + AddrDieMap[B->first].first = R.LowPC; } - AddrDieMap[R.first] = std::make_pair(R.second, Die); + AddrDieMap[R.LowPC] = std::make_pair(R.HighPC, Die); } } // Parent DIEs are added to the AddrDieMap prior to the Children DIEs to @@ -395,7 +393,7 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address, // First, find the subroutine that contains the given address (the leaf // of inlined chain). DWARFDie SubroutineDIE = - (DWO ? DWO->getUnit() : this)->getSubroutineForAddress(Address); + (DWO ? DWO.get() : this)->getSubroutineForAddress(Address); while (SubroutineDIE) { if (SubroutineDIE.isSubroutineDIE()) diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 0981a4dfdfa57..59b3d0ca55a63 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 8a544296f65cb..4de46bea301e9 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -23,22 +24,166 @@ using namespace llvm; using namespace dwarf; using namespace object; -void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, - DWARFAttribute &AttrValue) { +bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, + uint32_t *Offset, unsigned UnitIndex, + uint8_t &UnitType, bool &isUnitDWARF64) { + uint32_t AbbrOffset, Length; + uint8_t AddrSize = 0; + uint16_t Version; + bool Success = true; + + bool ValidLength = false; + bool ValidVersion = false; + bool ValidAddrSize = false; + bool ValidType = true; + bool ValidAbbrevOffset = true; + + uint32_t OffsetStart = *Offset; + Length = DebugInfoData.getU32(Offset); + if (Length == UINT32_MAX) { + isUnitDWARF64 = true; + OS << format( + "Unit[%d] is in 64-bit DWARF format; cannot verify from this point.\n", + UnitIndex); + return false; + } + Version = DebugInfoData.getU16(Offset); + + if (Version >= 5) { + UnitType = DebugInfoData.getU8(Offset); + AddrSize = DebugInfoData.getU8(Offset); + AbbrOffset = DebugInfoData.getU32(Offset); + ValidType = DWARFUnit::isValidUnitType(UnitType); + } else { + UnitType = 0; + AbbrOffset = DebugInfoData.getU32(Offset); + AddrSize = DebugInfoData.getU8(Offset); + } + + if (!DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset)) + ValidAbbrevOffset = false; + + ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3); + ValidVersion = DWARFContext::isSupportedVersion(Version); + ValidAddrSize = AddrSize == 4 || AddrSize == 8; + if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset || + !ValidType) { + Success = false; + OS << format("Units[%d] - start offset: 0x%08x \n", UnitIndex, OffsetStart); + if (!ValidLength) + OS << "\tError: The length for this unit is too " + "large for the .debug_info provided.\n"; + if (!ValidVersion) + OS << "\tError: The 16 bit unit header version is not valid.\n"; + if (!ValidType) + OS << "\tError: The unit type encoding is not valid.\n"; + if (!ValidAbbrevOffset) + OS << "\tError: The offset into the .debug_abbrev section is " + "not valid.\n"; + if (!ValidAddrSize) + OS << "\tError: The address size is unsupported.\n"; + } + *Offset = OffsetStart + Length + 4; + return Success; +} + +bool DWARFVerifier::verifyUnitContents(DWARFUnit Unit) { + uint32_t NumUnitErrors = 0; + unsigned NumDies = Unit.getNumDIEs(); + for (unsigned I = 0; I < NumDies; ++I) { + auto Die = Unit.getDIEAtIndex(I); + if (Die.getTag() == DW_TAG_null) + continue; + for (auto AttrValue : Die.attributes()) { + NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue); + NumUnitErrors += verifyDebugInfoForm(Die, AttrValue); + } + } + return NumUnitErrors == 0; +} + +bool DWARFVerifier::handleDebugInfo() { + OS << "Verifying .debug_info Unit Header Chain...\n"; + + DWARFDataExtractor DebugInfoData(DCtx.getInfoSection(), DCtx.isLittleEndian(), + 0); + uint32_t NumDebugInfoErrors = 0; + uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0; + uint8_t UnitType = 0; + bool isUnitDWARF64 = false; + bool isHeaderChainValid = true; + bool hasDIE = DebugInfoData.isValidOffset(Offset); + while (hasDIE) { + OffsetStart = Offset; + if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType, + isUnitDWARF64)) { + isHeaderChainValid = false; + if (isUnitDWARF64) + break; + } else { + std::unique_ptr Unit; + switch (UnitType) { + case dwarf::DW_UT_type: + case dwarf::DW_UT_split_type: { + DWARFUnitSection TUSection{}; + Unit.reset(new DWARFTypeUnit( + DCtx, DCtx.getInfoSection(), DCtx.getDebugAbbrev(), + &DCtx.getRangeSection(), DCtx.getStringSection(), + DCtx.getStringOffsetSection(), &DCtx.getAppleObjCSection(), + DCtx.getLineSection(), DCtx.isLittleEndian(), false, TUSection, + nullptr)); + break; + } + case dwarf::DW_UT_skeleton: + case dwarf::DW_UT_split_compile: + case dwarf::DW_UT_compile: + case dwarf::DW_UT_partial: + // UnitType = 0 means that we are + // verifying a compile unit in DWARF v4. + case 0: { + DWARFUnitSection CUSection{}; + Unit.reset(new DWARFCompileUnit( + DCtx, DCtx.getInfoSection(), DCtx.getDebugAbbrev(), + &DCtx.getRangeSection(), DCtx.getStringSection(), + DCtx.getStringOffsetSection(), &DCtx.getAppleObjCSection(), + DCtx.getLineSection(), DCtx.isLittleEndian(), false, CUSection, + nullptr)); + break; + } + default: { llvm_unreachable("Invalid UnitType."); } + } + Unit->extract(DebugInfoData, &OffsetStart); + if (!verifyUnitContents(*Unit)) + ++NumDebugInfoErrors; + } + hasDIE = DebugInfoData.isValidOffset(Offset); + ++UnitIdx; + } + if (UnitIdx == 0 && !hasDIE) { + OS << "Warning: .debug_info is empty.\n"; + isHeaderChainValid = true; + } + NumDebugInfoErrors += verifyDebugInfoReferences(); + return (isHeaderChainValid && NumDebugInfoErrors == 0); +} + +unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, + DWARFAttribute &AttrValue) { + unsigned NumErrors = 0; const auto Attr = AttrValue.Attr; switch (Attr) { case DW_AT_ranges: // Make sure the offset in the DW_AT_ranges attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_AT_ranges offset is beyond .debug_ranges " "bounds:\n"; Die.dump(OS, 0); OS << "\n"; } } else { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; Die.dump(OS, 0); OS << "\n"; @@ -48,15 +193,15 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, // Make sure the offset in the DW_AT_stmt_list attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { if (*SectionOffset >= DCtx.getLineSection().Data.size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_AT_stmt_list offset is beyond .debug_line " "bounds: " - << format("0x%08" PRIx32, *SectionOffset) << "\n"; + << format("0x%08" PRIx64, *SectionOffset) << "\n"; Die.dump(OS, 0); OS << "\n"; } } else { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; Die.dump(OS, 0); OS << "\n"; @@ -66,10 +211,12 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, default: break; } + return NumErrors; } -void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, - DWARFAttribute &AttrValue) { +unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, + DWARFAttribute &AttrValue) { + unsigned NumErrors = 0; const auto Form = AttrValue.Value.getForm(); switch (Form) { case DW_FORM_ref1: @@ -85,9 +232,9 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); auto CUOffset = AttrValue.Value.getRawUValue(); if (CUOffset >= CUSize) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx32, CUOffset) + << format("0x%08" PRIx64, CUOffset) << " is invalid (must be less than CU size of " << format("0x%08" PRIx32, CUSize) << "):\n"; Die.dump(OS, 0); @@ -107,7 +254,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, assert(RefVal); if (RefVal) { if (*RefVal >= DCtx.getInfoSection().Data.size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_FORM_ref_addr offset beyond .debug_info " "bounds:\n"; Die.dump(OS, 0); @@ -124,7 +271,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, auto SecOffset = AttrValue.Value.getAsSectionOffset(); assert(SecOffset); // DW_FORM_strp is a section offset. if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; Die.dump(OS, 0); OS << "\n"; @@ -134,17 +281,19 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, default: break; } + return NumErrors; } -void DWARFVerifier::verifyDebugInfoReferences() { +unsigned DWARFVerifier::verifyDebugInfoReferences() { // Take all references and make sure they point to an actual DIE by // getting the DIE by offset and emitting an error OS << "Verifying .debug_info references...\n"; + unsigned NumErrors = 0; for (auto Pair : ReferenceToDIEOffsets) { auto Die = DCtx.getDIEForOffset(Pair.first); if (Die) continue; - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) << ". Offset is in between DIEs:\n"; for (auto Offset : Pair.second) { @@ -154,26 +303,7 @@ void DWARFVerifier::verifyDebugInfoReferences() { } OS << "\n"; } -} - -bool DWARFVerifier::handleDebugInfo() { - NumDebugInfoErrors = 0; - OS << "Verifying .debug_info...\n"; - for (const auto &CU : DCtx.compile_units()) { - unsigned NumDies = CU->getNumDIEs(); - for (unsigned I = 0; I < NumDies; ++I) { - auto Die = CU->getDIEAtIndex(I); - const auto Tag = Die.getTag(); - if (Tag == DW_TAG_null) - continue; - for (auto AttrValue : Die.attributes()) { - verifyDebugInfoAttribute(Die, AttrValue); - verifyDebugInfoForm(Die, AttrValue); - } - } - } - verifyDebugInfoReferences(); - return NumDebugInfoErrors == 0; + return NumErrors; } void DWARFVerifier::verifyDebugLineStmtOffsets() { @@ -236,7 +366,7 @@ void DWARFVerifier::verifyDebugLineRows() { if (Row.Address < PrevAddress) { ++NumDebugLineErrors; OS << "error: .debug_line[" - << format("0x%08" PRIx32, + << format("0x%08" PRIx64, *toSectionOffset(Die.find(DW_AT_stmt_list))) << "] row[" << RowIndex << "] decreases in address from previous row:\n"; @@ -251,7 +381,7 @@ void DWARFVerifier::verifyDebugLineRows() { if (Row.File > MaxFileIndex) { ++NumDebugLineErrors; OS << "error: .debug_line[" - << format("0x%08" PRIx32, + << format("0x%08" PRIx64, *toSectionOffset(Die.find(DW_AT_stmt_list))) << "][" << RowIndex << "] has invalid file index " << Row.File << " (valid values are [1," << MaxFileIndex << "]):\n"; @@ -275,3 +405,95 @@ bool DWARFVerifier::handleDebugLine() { verifyDebugLineRows(); return NumDebugLineErrors == 0; } + +bool DWARFVerifier::handleAppleNames() { + NumAppleNamesErrors = 0; + + DWARFDataExtractor AppleNamesSection(DCtx.getAppleNamesSection(), + DCtx.isLittleEndian(), 0); + DataExtractor StrData(DCtx.getStringSection(), DCtx.isLittleEndian(), 0); + DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData); + + if (!AppleNames.extract()) { + return true; + } + + OS << "Verifying .apple_names...\n"; + + // Verify that all buckets have a valid hash index or are empty. + uint32_t NumBuckets = AppleNames.getNumBuckets(); + uint32_t NumHashes = AppleNames.getNumHashes(); + + uint32_t BucketsOffset = + AppleNames.getSizeHdr() + AppleNames.getHeaderDataLength(); + uint32_t HashesBase = BucketsOffset + NumBuckets * 4; + uint32_t OffsetsBase = HashesBase + NumHashes * 4; + + for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) { + uint32_t HashIdx = AppleNamesSection.getU32(&BucketsOffset); + if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) { + OS << format("error: Bucket[%d] has invalid hash index: %u\n", BucketIdx, + HashIdx); + ++NumAppleNamesErrors; + } + } + + uint32_t NumAtoms = AppleNames.getAtomsDesc().size(); + if (NumAtoms == 0) { + OS << "error: no atoms; failed to read HashData\n"; + ++NumAppleNamesErrors; + return false; + } + + if (!AppleNames.validateForms()) { + OS << "error: unsupported form; failed to read HashData\n"; + ++NumAppleNamesErrors; + return false; + } + + for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) { + uint32_t HashOffset = HashesBase + 4 * HashIdx; + uint32_t DataOffset = OffsetsBase + 4 * HashIdx; + uint32_t Hash = AppleNamesSection.getU32(&HashOffset); + uint32_t HashDataOffset = AppleNamesSection.getU32(&DataOffset); + if (!AppleNamesSection.isValidOffsetForDataOfSize(HashDataOffset, + sizeof(uint64_t))) { + OS << format("error: Hash[%d] has invalid HashData offset: 0x%08x\n", + HashIdx, HashDataOffset); + ++NumAppleNamesErrors; + } + + uint32_t StrpOffset; + uint32_t StringOffset; + uint32_t StringCount = 0; + uint32_t DieOffset = dwarf::DW_INVALID_OFFSET; + + while ((StrpOffset = AppleNamesSection.getU32(&HashDataOffset)) != 0) { + const uint32_t NumHashDataObjects = + AppleNamesSection.getU32(&HashDataOffset); + for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects; + ++HashDataIdx) { + DieOffset = AppleNames.readAtoms(HashDataOffset); + if (!DCtx.getDIEForOffset(DieOffset)) { + const uint32_t BucketIdx = + NumBuckets ? (Hash % NumBuckets) : UINT32_MAX; + StringOffset = StrpOffset; + const char *Name = StrData.getCStr(&StringOffset); + if (!Name) + Name = ""; + + OS << format( + "error: .apple_names Bucket[%d] Hash[%d] = 0x%08x " + "Str[%u] = 0x%08x " + "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n", + BucketIdx, HashIdx, Hash, StringCount, StrpOffset, HashDataIdx, + DieOffset, Name); + + ++NumAppleNamesErrors; + } + } + ++StringCount; + } + } + return NumAppleNamesErrors == 0; +} diff --git a/interpreter/llvm/src/lib/DebugInfo/DWARF/LLVMBuild.txt b/interpreter/llvm/src/lib/DebugInfo/DWARF/LLVMBuild.txt index 9f8b1047ef6bf..8242a7f2e7f77 100644 --- a/interpreter/llvm/src/lib/DebugInfo/DWARF/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/DebugInfo/DWARF/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = DebugInfoDWARF parent = DebugInfo -required_libraries = Object Support +required_libraries = BinaryFormat Object Support diff --git a/interpreter/llvm/src/lib/DebugInfo/MSF/MSFBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/MSF/MSFBuilder.cpp index 5b1b5d8dc4d55..0f4f785abf55a 100644 --- a/interpreter/llvm/src/lib/DebugInfo/MSF/MSFBuilder.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/MSF/MSFBuilder.cpp @@ -1,3 +1,4 @@ +//===- MSFBuilder.cpp -----------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,22 +7,30 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFError.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::msf; using namespace llvm::support; -namespace { -const uint32_t kSuperBlockBlock = 0; -const uint32_t kFreePageMap0Block = 1; -const uint32_t kFreePageMap1Block = 2; -const uint32_t kNumReservedPages = 3; +static const uint32_t kSuperBlockBlock = 0; +static const uint32_t kFreePageMap0Block = 1; +static const uint32_t kFreePageMap1Block = 2; +static const uint32_t kNumReservedPages = 3; -const uint32_t kDefaultFreePageMap = kFreePageMap0Block; -const uint32_t kDefaultBlockMapAddr = kNumReservedPages; -} +static const uint32_t kDefaultFreePageMap = kFreePageMap0Block; +static const uint32_t kDefaultBlockMapAddr = kNumReservedPages; MSFBuilder::MSFBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow, BumpPtrAllocator &Allocator) @@ -263,7 +272,7 @@ Expected MSFBuilder::build() { // The stream sizes should be re-allocated as a stable pointer and the stream // map should have each of its entries allocated as a separate stable pointer. - if (StreamData.size() > 0) { + if (!StreamData.empty()) { ulittle32_t *Sizes = Allocator.Allocate(StreamData.size()); L.StreamSizes = ArrayRef(Sizes, StreamData.size()); L.StreamMap.resize(StreamData.size()); diff --git a/interpreter/llvm/src/lib/DebugInfo/MSF/MSFCommon.cpp b/interpreter/llvm/src/lib/DebugInfo/MSF/MSFCommon.cpp index fdab7884646ec..1facf5efb4bbb 100644 --- a/interpreter/llvm/src/lib/DebugInfo/MSF/MSFCommon.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/MSF/MSFCommon.cpp @@ -1,4 +1,4 @@ -//===- MSFCommon.cpp - Common types and functions for MSF files -*- C++ -*-===// +//===- MSFCommon.cpp - Common types and functions for MSF files -----------===// // // The LLVM Compiler Infrastructure // @@ -9,6 +9,10 @@ #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MSFError.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include using namespace llvm; using namespace llvm::msf; diff --git a/interpreter/llvm/src/lib/DebugInfo/MSF/MappedBlockStream.cpp b/interpreter/llvm/src/lib/DebugInfo/MSF/MappedBlockStream.cpp index 57953cfa338ef..e45f4ae0ed940 100644 --- a/interpreter/llvm/src/lib/DebugInfo/MSF/MappedBlockStream.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/MSF/MappedBlockStream.cpp @@ -8,23 +8,33 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/MSF/MappedBlockStream.h" - -#include "llvm/DebugInfo/MSF/IMSFFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MSFStreamLayout.h" -#include "llvm/Support/BinaryStreamError.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::msf; namespace { + template class MappedBlockStreamImpl : public Base { public: template MappedBlockStreamImpl(Args &&... Params) : Base(std::forward(Params)...) {} }; -} + +} // end anonymous namespace static void initializeFpmStreamLayout(const MSFLayout &Layout, MSFStreamLayout &FpmLayout) { @@ -39,51 +49,55 @@ static void initializeFpmStreamLayout(const MSFLayout &Layout, FpmLayout.Length = msf::getFullFpmByteSize(Layout); } -typedef std::pair Interval; +using Interval = std::pair; + static Interval intersect(const Interval &I1, const Interval &I2) { return std::make_pair(std::max(I1.first, I2.first), std::min(I1.second, I2.second)); } -MappedBlockStream::MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, +MappedBlockStream::MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - BinaryStreamRef MsfData) - : BlockSize(BlockSize), NumBlocks(NumBlocks), StreamLayout(Layout), - MsfData(MsfData) {} - -std::unique_ptr -MappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) + : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData), + Allocator(Allocator) {} + +std::unique_ptr MappedBlockStream::createStream( + uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { return llvm::make_unique>( - BlockSize, NumBlocks, Layout, MsfData); + BlockSize, Layout, MsfData, Allocator); } std::unique_ptr MappedBlockStream::createIndexedStream( - const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex) { + const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex, + BumpPtrAllocator &Allocator) { assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index"); MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; return llvm::make_unique>( - Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr MappedBlockStream::createDirectoryStream(const MSFLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr MappedBlockStream::createFpmStream(const MSFLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, @@ -149,7 +163,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, // into it, and return an ArrayRef to that. Do not touch existing pool // allocations, as existing clients may be holding a pointer which must // not be invalidated. - uint8_t *WriteBuffer = static_cast(Pool.Allocate(Size, 8)); + uint8_t *WriteBuffer = static_cast(Allocator.Allocate(Size, 8)); if (auto EC = readBytes(Offset, MutableArrayRef(WriteBuffer, Size))) return EC; @@ -173,7 +187,7 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset, uint32_t First = Offset / BlockSize; uint32_t Last = First; - while (Last < NumBlocks - 1) { + while (Last < getNumBlocks() - 1) { if (StreamLayout.Blocks[Last] != StreamLayout.Blocks[Last + 1] - 1) break; ++Last; @@ -211,7 +225,7 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size, uint32_t OffsetInBlock = Offset % BlockSize; uint32_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock); uint32_t NumAdditionalBlocks = - llvm::alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize; + alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize; uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1; uint32_t E = StreamLayout.Blocks[BlockNum]; @@ -270,10 +284,6 @@ Error MappedBlockStream::readBytes(uint32_t Offset, return Error::success(); } -uint32_t MappedBlockStream::getNumBytesCopied() const { - return static_cast(Pool.getBytesAllocated()); -} - void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); } void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset, @@ -313,45 +323,49 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset, } WritableMappedBlockStream::WritableMappedBlockStream( - uint32_t BlockSize, uint32_t NumBlocks, const MSFStreamLayout &Layout, - WritableBinaryStreamRef MsfData) - : ReadInterface(BlockSize, NumBlocks, Layout, MsfData), + uint32_t BlockSize, const MSFStreamLayout &Layout, + WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator) + : ReadInterface(BlockSize, Layout, MsfData, Allocator), WriteInterface(MsfData) {} std::unique_ptr -WritableMappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks, +WritableMappedBlockStream::createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - WritableBinaryStreamRef MsfData) { + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { return llvm::make_unique>( - BlockSize, NumBlocks, Layout, MsfData); + BlockSize, Layout, MsfData, Allocator); } std::unique_ptr WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, - uint32_t StreamIndex) { + uint32_t StreamIndex, + BumpPtrAllocator &Allocator) { assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index"); MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr WritableMappedBlockStream::createDirectoryStream( - const MSFLayout &Layout, WritableBinaryStreamRef MsfData) { + const MSFLayout &Layout, WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout, - WritableBinaryStreamRef MsfData) { + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/CMakeLists.txt b/interpreter/llvm/src/lib/DebugInfo/PDB/CMakeLists.txt index e9fd29ccc4caf..9b1f37943e678 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/CMakeLists.txt +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/CMakeLists.txt @@ -41,6 +41,7 @@ add_pdb_impl_folder(Native Native/InfoStream.cpp Native/InfoStreamBuilder.cpp Native/ModuleDebugStream.cpp + Native/NativeBuiltinSymbol.cpp Native/NativeCompilandSymbol.cpp Native/NativeEnumModules.cpp Native/NativeExeSymbol.cpp @@ -51,8 +52,8 @@ add_pdb_impl_folder(Native Native/PDBFileBuilder.cpp Native/PDBStringTable.cpp Native/PDBStringTableBuilder.cpp - Native/PDBTypeServerHandler.cpp Native/PublicsStream.cpp + Native/PublicsStreamBuilder.cpp Native/RawError.cpp Native/SymbolStream.cpp Native/TpiHashing.cpp diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp index cae817c1b367d..f62c4991fe33d 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h" +#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp index 4741d9c9a8499..796ce214b3838 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h" #include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp index ccf8c4e622cca..b9311d0601287 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h" #include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp index 3c211b5690449..266638530c2f7 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h" #include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIASession.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index 4e2474c51cb13..4c59d2f2a9d95 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -125,16 +125,16 @@ PrivateGetDIAValue(IDiaSymbol *Symbol, return Result8; } -PDB_UniqueId +codeview::GUID PrivateGetDIAValue(IDiaSymbol *Symbol, HRESULT (__stdcall IDiaSymbol::*Method)(GUID *)) { GUID Result; if (S_OK != (Symbol->*Method)(&Result)) - return PDB_UniqueId(); + return codeview::GUID(); - static_assert(sizeof(PDB_UniqueId) == sizeof(GUID), - "PDB_UniqueId is the wrong size!"); - PDB_UniqueId IdResult; + static_assert(sizeof(codeview::GUID) == sizeof(GUID), + "GUID is the wrong size!"); + codeview::GUID IdResult; ::memcpy(&IdResult, &Result, sizeof(GUID)); return IdResult; } @@ -372,8 +372,11 @@ DIARawSymbol::findChildren(PDB_SymType Type) const { enum SymTagEnum EnumVal = static_cast(Type); CComPtr DiaEnumerator; - if (S_OK != Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) - return nullptr; + if (S_OK != + Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) { + if (S_OK != Symbol->findChildren(EnumVal, nullptr, nsNone, &DiaEnumerator)) + return nullptr; + } return llvm::make_unique(Session, DiaEnumerator); } @@ -743,7 +746,7 @@ PDB_SymType DIARawSymbol::getSymTag() const { &IDiaSymbol::get_symTag); } -PDB_UniqueId DIARawSymbol::getGuid() const { +codeview::GUID DIARawSymbol::getGuid() const { return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_guid); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIASession.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIASession.cpp index ef47b92b4f2f3..ef9390cda3127 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -151,7 +151,7 @@ void DIASession::setLoadAddress(uint64_t Address) { Session->put_loadAddress(Address); } -std::unique_ptr DIASession::getGlobalScope() const { +std::unique_ptr DIASession::getGlobalScope() { CComPtr GlobalScope; if (S_OK != Session->get_globalScope(&GlobalScope)) return nullptr; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/GenericError.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/GenericError.cpp index 789f3b813170d..4fcecb92fd154 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/GenericError.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/GenericError.cpp @@ -26,6 +26,8 @@ class GenericErrorCategory : public std::error_category { switch (static_cast(Condition)) { case generic_error_code::unspecified: return "An unknown error has occurred."; + case generic_error_code::type_server_not_found: + return "Type server PDB was not found."; case generic_error_code::dia_sdk_not_present: return "LLVM was not compiled with support for DIA. This usually means " "that you are are not using MSVC, or your Visual Studio " diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 867864e47dce5..897f78c510322 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -10,7 +10,8 @@ #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" @@ -19,7 +20,6 @@ #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/COFF.h" using namespace llvm; using namespace llvm::codeview; @@ -38,12 +38,12 @@ template <> struct BinaryItemTraits { static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, uint32_t C13Size) { - uint32_t Size = sizeof(uint32_t); // Signature - Size += SymbolByteSize; // Symbol Data - Size += 0; // TODO: Layout.C11Bytes - Size += C13Size; // C13 Debug Info Size - Size += sizeof(uint32_t); // GlobalRefs substream size (always 0) - Size += 0; // GlobalRefs substream bytes + uint32_t Size = sizeof(uint32_t); // Signature + Size += alignTo(SymbolByteSize, 4); // Symbol Data + Size += 0; // TODO: Layout.C11Bytes + Size += C13Size; // C13 Debug Info Size + Size += sizeof(uint32_t); // GlobalRefs substream size (always 0) + Size += 0; // GlobalRefs substream bytes return Size; } @@ -51,6 +51,7 @@ DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName, uint32_t ModIndex, msf::MSFBuilder &Msf) : MSF(Msf), ModuleName(ModuleName) { + ::memset(&Layout, 0, sizeof(Layout)); Layout.Mod = ModIndex; } @@ -64,9 +65,17 @@ void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { ObjFileName = Name; } +void DbiModuleDescriptorBuilder::setPdbFilePathNI(uint32_t NI) { + PdbFilePathNI = NI; +} + void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { Symbols.push_back(Symbol); - SymbolByteSize += Symbol.data().size(); + // Symbols written to a PDB file are required to be 4 byte aligned. The same + // is not true of object files. + assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 && + "Invalid Symbol alignment!"); + SymbolByteSize += Symbol.length(); } void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { @@ -98,6 +107,7 @@ template struct Foo { template Foo makeFoo(T &&t) { return Foo(std::move(t)); } void DbiModuleDescriptorBuilder::finalize() { + Layout.SC.ModuleIndex = Layout.Mod; Layout.FileNameOffs = 0; // TODO: Fix this Layout.Flags = 0; // TODO: Fix this Layout.C11Bytes = 0; @@ -105,7 +115,7 @@ void DbiModuleDescriptorBuilder::finalize() { (void)Layout.Mod; // Set in constructor (void)Layout.ModDiStream; // Set in finalizeMsfLayout Layout.NumFiles = SourceFiles.size(); - Layout.PdbFilePathNI = 0; + Layout.PdbFilePathNI = PdbFilePathNI; Layout.SrcFileNameNI = 0; // This value includes both the signature field as well as the record bytes @@ -140,7 +150,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, if (Layout.ModDiStream != kInvalidStreamIndex) { auto NS = WritableMappedBlockStream::createIndexedStream( - MsfLayout, MsfBuffer, Layout.ModDiStream); + MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator()); WritableBinaryStreamRef Ref(*NS); BinaryStreamWriter SymbolWriter(Ref); // Write the symbols. @@ -152,8 +162,11 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, BinaryStreamRef RecordsRef(Records); if (auto EC = SymbolWriter.writeStreamRef(RecordsRef)) return EC; + if (auto EC = SymbolWriter.padToAlignment(4)) + return EC; // TODO: Write C11 Line data - + assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 && + "Invalid debug section alignment!"); for (const auto &Builder : C13Builders) { assert(Builder && "Empty C13 Fragment Builder!"); if (auto EC = Builder->commit(SymbolWriter)) @@ -169,42 +182,15 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, return Error::success(); } -void DbiModuleDescriptorBuilder::addC13Fragment( - std::unique_ptr Lines) { - ModuleDebugLineFragment &Frag = *Lines; - - // File Checksums have to come first, so push an empty entry on if this - // is the first. - if (C13Builders.empty()) - C13Builders.push_back(nullptr); - - this->LineInfo.push_back(std::move(Lines)); - C13Builders.push_back( - llvm::make_unique(Frag.kind(), Frag)); -} - -void DbiModuleDescriptorBuilder::addC13Fragment( - std::unique_ptr Inlinees) { - ModuleDebugInlineeLineFragment &Frag = *Inlinees; - - // File Checksums have to come first, so push an empty entry on if this - // is the first. - if (C13Builders.empty()) - C13Builders.push_back(nullptr); - - this->Inlinees.push_back(std::move(Inlinees)); - C13Builders.push_back( - llvm::make_unique(Frag.kind(), Frag)); +void DbiModuleDescriptorBuilder::addDebugSubsection( + std::shared_ptr Subsection) { + assert(Subsection); + C13Builders.push_back(llvm::make_unique( + std::move(Subsection), CodeViewContainer::Pdb)); } -void DbiModuleDescriptorBuilder::setC13FileChecksums( - std::unique_ptr Checksums) { - assert(!ChecksumInfo && "Can't have more than one checksum info!"); - - if (C13Builders.empty()) - C13Builders.push_back(nullptr); - - ChecksumInfo = std::move(Checksums); - C13Builders[0] = llvm::make_unique( - ChecksumInfo->kind(), *ChecksumInfo); +void DbiModuleDescriptorBuilder::addDebugSubsection( + const DebugSubsectionRecord &SubsectionContents) { + C13Builders.push_back(llvm::make_unique( + SubsectionContents, CodeViewContainer::Pdb)); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleList.cpp index 434f775097e04..eea70b229c676 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleList.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiModuleList.cpp @@ -1,4 +1,4 @@ -//===- DbiModuleList.cpp - PDB module information list ----------*- C++ -*-===// +//===- DbiModuleList.cpp - PDB module information list --------------------===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,17 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Error.h" +#include +#include +#include +#include using namespace llvm; using namespace llvm::pdb; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStream.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStream.cpp index f7538c580ba45..0eeac7e4c0847 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -72,14 +72,6 @@ Error DbiStream::reload() { return make_error(raw_error_code::feature_unsupported, "Unsupported DBI version."); - auto IS = Pdb.getPDBInfoStream(); - if (!IS) - return IS.takeError(); - - if (Header->Age != IS->getAge()) - return make_error(raw_error_code::corrupt_file, - "DBI Age does not match PDB Age."); - if (Stream->getLength() != sizeof(DbiStreamHeader) + Header->ModiSubstreamSize + Header->SecContrSubstreamSize + Header->SectionMapSize + @@ -107,29 +99,27 @@ Error DbiStream::reload() { return make_error(raw_error_code::corrupt_file, "DBI type server substream not aligned."); - BinaryStreamRef ModInfoSubstream; - BinaryStreamRef FileInfoSubstream; - if (auto EC = - Reader.readStreamRef(ModInfoSubstream, Header->ModiSubstreamSize)) + if (auto EC = Reader.readSubstream(ModiSubstream, Header->ModiSubstreamSize)) return EC; - if (auto EC = Reader.readStreamRef(SecContrSubstream, + if (auto EC = Reader.readSubstream(SecContrSubstream, Header->SecContrSubstreamSize)) return EC; - if (auto EC = Reader.readStreamRef(SecMapSubstream, Header->SectionMapSize)) + if (auto EC = Reader.readSubstream(SecMapSubstream, Header->SectionMapSize)) return EC; - if (auto EC = Reader.readStreamRef(FileInfoSubstream, Header->FileInfoSize)) + if (auto EC = Reader.readSubstream(FileInfoSubstream, Header->FileInfoSize)) return EC; if (auto EC = - Reader.readStreamRef(TypeServerMapSubstream, Header->TypeServerSize)) + Reader.readSubstream(TypeServerMapSubstream, Header->TypeServerSize)) return EC; - if (auto EC = Reader.readStreamRef(ECSubstream, Header->ECSubstreamSize)) + if (auto EC = Reader.readSubstream(ECSubstream, Header->ECSubstreamSize)) return EC; if (auto EC = Reader.readArray( DbgStreams, Header->OptionalDbgHdrSize / sizeof(ulittle16_t))) return EC; - if (auto EC = Modules.initialize(ModInfoSubstream, FileInfoSubstream)) + if (auto EC = Modules.initialize(ModiSubstream.StreamData, + FileInfoSubstream.StreamData)) return EC; if (auto EC = initializeSectionContributionData()) @@ -145,8 +135,8 @@ Error DbiStream::reload() { return make_error(raw_error_code::corrupt_file, "Found unexpected bytes in DBI Stream."); - if (ECSubstream.getLength() > 0) { - BinaryStreamReader ECReader(ECSubstream); + if (!ECSubstream.empty()) { + BinaryStreamReader ECReader(ECSubstream.StreamData); if (auto EC = ECNames.reload(ECReader)) return EC; } @@ -224,20 +214,26 @@ FixedStreamArray DbiStream::getSectionMap() const { void DbiStream::visitSectionContributions( ISectionContribVisitor &Visitor) const { - if (SectionContribVersion == DbiSecContribVer60) { + if (!SectionContribs.empty()) { + assert(SectionContribVersion == DbiSecContribVer60); for (auto &SC : SectionContribs) Visitor.visit(SC); - } else if (SectionContribVersion == DbiSecContribV2) { + } else if (!SectionContribs2.empty()) { + assert(SectionContribVersion == DbiSecContribV2); for (auto &SC : SectionContribs2) Visitor.visit(SC); } } +Expected DbiStream::getECName(uint32_t NI) const { + return ECNames.getStringForID(NI); +} + Error DbiStream::initializeSectionContributionData() { - if (SecContrSubstream.getLength() == 0) + if (SecContrSubstream.empty()) return Error::success(); - BinaryStreamReader SCReader(SecContrSubstream); + BinaryStreamReader SCReader(SecContrSubstream.StreamData); if (auto EC = SCReader.readEnum(SectionContribVersion)) return EC; @@ -256,11 +252,14 @@ Error DbiStream::initializeSectionHeadersData() { return Error::success(); uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr); + if (StreamNum == kInvalidStreamIndex) + return Error::success(); + if (StreamNum >= Pdb.getNumStreams()) return make_error(raw_error_code::no_stream); auto SHS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator()); size_t StreamLen = SHS->getLength(); if (StreamLen % sizeof(object::coff_section)) @@ -292,7 +291,7 @@ Error DbiStream::initializeFpoRecords() { return make_error(raw_error_code::no_stream); auto FS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator()); size_t StreamLen = FS->getLength(); if (StreamLen % sizeof(object::FpoData)) @@ -308,11 +307,33 @@ Error DbiStream::initializeFpoRecords() { return Error::success(); } +BinarySubstreamRef DbiStream::getSectionContributionData() const { + return SecContrSubstream; +} + +BinarySubstreamRef DbiStream::getSecMapSubstreamData() const { + return SecMapSubstream; +} + +BinarySubstreamRef DbiStream::getModiSubstreamData() const { + return ModiSubstream; +} + +BinarySubstreamRef DbiStream::getFileInfoSubstreamData() const { + return FileInfoSubstream; +} + +BinarySubstreamRef DbiStream::getTypeServerMapSubstreamData() const { + return TypeServerMapSubstream; +} + +BinarySubstreamRef DbiStream::getECSubstreamData() const { return ECSubstream; } + Error DbiStream::initializeSectionMapData() { - if (SecMapSubstream.getLength() == 0) + if (SecMapSubstream.empty()) return Error::success(); - BinaryStreamReader SMReader(SecMapSubstream); + BinaryStreamReader SMReader(SecMapSubstream.StreamData); const SecMapHeader *Header; if (auto EC = SMReader.readObject(Header)) return EC; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index c19a2f0d31101..25076e40fc98c 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" @@ -17,7 +18,6 @@ #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Object/COFF.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/COFF.h" using namespace llvm; using namespace llvm::codeview; @@ -45,17 +45,21 @@ void DbiStreamBuilder::setFlags(uint16_t F) { Flags = F; } void DbiStreamBuilder::setMachineType(PDB_Machine M) { MachineType = M; } -void DbiStreamBuilder::setSectionContribs(ArrayRef Arr) { - SectionContribs = Arr; -} - void DbiStreamBuilder::setSectionMap(ArrayRef SecMap) { SectionMap = SecMap; } +void DbiStreamBuilder::setSymbolRecordStreamIndex(uint32_t Index) { + SymRecordStreamIndex = Index; +} + +void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) { + PublicsStreamIndex = Index; +} + Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type, ArrayRef Data) { - if (DbgStreams[(int)Type].StreamNumber) + if (DbgStreams[(int)Type].StreamNumber != kInvalidStreamIndex) return make_error(raw_error_code::duplicate_entry, "The specified stream type already exists"); auto ExpectedIndex = Msf.addStream(Data.size()); @@ -67,11 +71,16 @@ Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type, return Error::success(); } +uint32_t DbiStreamBuilder::addECName(StringRef Name) { + return ECNamesBuilder.insert(Name); +} + uint32_t DbiStreamBuilder::calculateSerializedLength() const { // For now we only support serializing the header. return sizeof(DbiStreamHeader) + calculateFileInfoSubstreamSize() + calculateModiSubstreamSize() + calculateSectionContribsStreamSize() + - calculateSectionMapStreamSize() + calculateDbgStreamsSize(); + calculateSectionMapStreamSize() + calculateDbgStreamsSize() + + ECNamesBuilder.calculateSerializedSize(); } Expected @@ -94,10 +103,14 @@ Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) { if (ModIter == ModiMap.end()) return make_error(raw_error_code::no_entry, "The specified module was not found"); + return addModuleSourceFile(*ModIter->second, File); +} + +Error DbiStreamBuilder::addModuleSourceFile(DbiModuleDescriptorBuilder &Module, + StringRef File) { uint32_t Index = SourceFileNames.size(); SourceFileNames.insert(std::make_pair(File, Index)); - auto &ModEntry = *ModIter; - ModEntry.second->addSourceFile(File); + Module.addSourceFile(File); return Error::success(); } @@ -129,16 +142,21 @@ uint32_t DbiStreamBuilder::calculateSectionMapStreamSize() const { return sizeof(SecMapHeader) + sizeof(SecMapEntry) * SectionMap.size(); } -uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const { - uint32_t Size = 0; - Size += sizeof(ulittle16_t); // NumModules - Size += sizeof(ulittle16_t); // NumSourceFiles - Size += ModiList.size() * sizeof(ulittle16_t); // ModIndices - Size += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts +uint32_t DbiStreamBuilder::calculateNamesOffset() const { + uint32_t Offset = 0; + Offset += sizeof(ulittle16_t); // NumModules + Offset += sizeof(ulittle16_t); // NumSourceFiles + Offset += ModiList.size() * sizeof(ulittle16_t); // ModIndices + Offset += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts uint32_t NumFileInfos = 0; for (const auto &M : ModiList) NumFileInfos += M->source_files().size(); - Size += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets + Offset += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets + return Offset; +} + +uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const { + uint32_t Size = calculateNamesOffset(); Size += calculateNamesBufferSize(); return alignTo(Size, sizeof(uint32_t)); } @@ -157,9 +175,8 @@ uint32_t DbiStreamBuilder::calculateDbgStreamsSize() const { Error DbiStreamBuilder::generateFileInfoSubstream() { uint32_t Size = calculateFileInfoSubstreamSize(); - uint32_t NameSize = calculateNamesBufferSize(); auto Data = Allocator.Allocate(Size); - uint32_t NamesOffset = Size - NameSize; + uint32_t NamesOffset = calculateNamesOffset(); FileInfoBuffer = MutableBinaryByteStream(MutableArrayRef(Data, Size), llvm::support::little); @@ -207,6 +224,9 @@ Error DbiStreamBuilder::generateFileInfoSubstream() { } } + if (auto EC = NameBufferWriter.padToAlignment(sizeof(uint32_t))) + return EC; + if (NameBufferWriter.bytesRemaining() > 0) return make_error(raw_error_code::invalid_format, "The names buffer contained unexpected data."); @@ -230,6 +250,7 @@ Error DbiStreamBuilder::finalize() { return EC; DbiStreamHeader *H = Allocator.Allocate(); + ::memset(H, 0, sizeof(DbiStreamHeader)); H->VersionHeader = *VerHeader; H->VersionSignature = -1; H->Age = Age; @@ -239,15 +260,15 @@ Error DbiStreamBuilder::finalize() { H->PdbDllVersion = PdbDllVersion; H->MachineType = static_cast(MachineType); - H->ECSubstreamSize = 0; + H->ECSubstreamSize = ECNamesBuilder.calculateSerializedSize(); H->FileInfoSize = FileInfoBuffer.getLength(); H->ModiSubstreamSize = calculateModiSubstreamSize(); H->OptionalDbgHdrSize = DbgStreams.size() * sizeof(uint16_t); H->SecContrSubstreamSize = calculateSectionContribsStreamSize(); H->SectionMapSize = calculateSectionMapStreamSize(); H->TypeServerSize = 0; - H->SymRecordStreamIndex = kInvalidStreamIndex; - H->PublicSymbolStreamIndex = kInvalidStreamIndex; + H->SymRecordStreamIndex = SymRecordStreamIndex; + H->PublicSymbolStreamIndex = PublicsStreamIndex; H->MFCTypeServerIndex = kInvalidStreamIndex; H->GlobalSymbolStreamIndex = kInvalidStreamIndex; @@ -286,23 +307,17 @@ static uint16_t toSecMapFlags(uint32_t Flags) { return Ret; } -// A utility function to create Section Contributions -// for a given input sections. -std::vector DbiStreamBuilder::createSectionContribs( - ArrayRef SecHdrs) { - std::vector Ret; - - // Create a SectionContrib for each input section. - for (auto &Sec : SecHdrs) { - Ret.emplace_back(); - auto &Entry = Ret.back(); - memset(&Entry, 0, sizeof(Entry)); - - Entry.Off = Sec.PointerToRawData; - Entry.Size = Sec.SizeOfRawData; - Entry.Characteristics = Sec.Characteristics; - } - return Ret; +void DbiStreamBuilder::addSectionContrib(DbiModuleDescriptorBuilder *ModuleDbi, + const object::coff_section *SecHdr) { + SectionContrib SC; + memset(&SC, 0, sizeof(SC)); + SC.ISect = (uint16_t)~0U; // This represents nil. + SC.Off = SecHdr->PointerToRawData; + SC.Size = SecHdr->SizeOfRawData; + SC.Characteristics = SecHdr->Characteristics; + // Use the module index in the module dbi stream or nil (-1). + SC.Imod = ModuleDbi ? ModuleDbi->getModuleIndex() : (uint16_t)~0U; + SectionContribs.emplace_back(SC); } // A utility function to create a Section Map for a given list of COFF sections. @@ -350,8 +365,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = finalize()) return EC; - auto DbiS = WritableMappedBlockStream::createIndexedStream(Layout, MsfBuffer, - StreamDBI); + auto DbiS = WritableMappedBlockStream::createIndexedStream( + Layout, MsfBuffer, StreamDBI, Allocator); BinaryStreamWriter Writer(*DbiS); if (auto EC = Writer.writeObject(*Header)) @@ -365,7 +380,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (!SectionContribs.empty()) { if (auto EC = Writer.writeEnum(DbiSecContribVer60)) return EC; - if (auto EC = Writer.writeArray(SectionContribs)) + if (auto EC = Writer.writeArray(makeArrayRef(SectionContribs))) return EC; } @@ -381,6 +396,9 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = Writer.writeStreamRef(FileInfoBuffer)) return EC; + if (auto EC = ECNamesBuilder.commit(Writer)) + return EC; + for (auto &Stream : DbgStreams) if (auto EC = Writer.writeInteger(Stream.StreamNumber)) return EC; @@ -389,7 +407,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (Stream.StreamNumber == kInvalidStreamIndex) continue; auto WritableStream = WritableMappedBlockStream::createIndexedStream( - Layout, MsfBuffer, Stream.StreamNumber); + Layout, MsfBuffer, Stream.StreamNumber, Allocator); BinaryStreamWriter DbgStreamWriter(*WritableStream); if (auto EC = DbgStreamWriter.writeArray(Stream.Data)) return EC; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/Hash.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/Hash.cpp index 2ad3f55dc5c37..61188ece2dcb8 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/Hash.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/Hash.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/Hash.h" - #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/JamCRC.h" +#include using namespace llvm; using namespace llvm::support; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/HashTable.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/HashTable.cpp index ebf8c9c04db16..439217f91d047 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/HashTable.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/HashTable.cpp @@ -1,4 +1,4 @@ -//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===// +//===- HashTable.cpp - PDB Hash Table -------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,12 +8,16 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/HashTable.h" - #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" - -#include +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include +#include using namespace llvm; using namespace llvm::pdb; @@ -106,9 +110,11 @@ void HashTable::clear() { } uint32_t HashTable::capacity() const { return Buckets.size(); } + uint32_t HashTable::size() const { return Present.count(); } HashTableIterator HashTable::begin() const { return HashTableIterator(*this); } + HashTableIterator HashTable::end() const { return HashTableIterator(*this, 0, true); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStream.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStream.cpp index 2a1d12e823902..829879060c33c 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -57,6 +57,10 @@ Error InfoStream::reload() { uint32_t NewOffset = Reader.getOffset(); NamedStreamMapByteSize = NewOffset - Offset; + Reader.setOffset(Offset); + if (auto EC = Reader.readSubstream(SubNamedStreams, NamedStreamMapByteSize)) + return EC; + bool Stop = false; while (!Stop && !Reader.empty()) { PdbRaw_FeatureSig Sig; @@ -79,6 +83,7 @@ Error InfoStream::reload() { break; case uint32_t(PdbRaw_FeatureSig::MinimalDebugInfo): Features |= PdbFeatureMinimalDebugInfo; + break; default: continue; } @@ -101,6 +106,10 @@ InfoStream::named_streams() const { return NamedStreams.entries(); } +bool InfoStream::containsIdStream() const { + return !!(Features & PdbFeatureContainsIdStream); +} + PdbRaw_ImplVer InfoStream::getVersion() const { return static_cast(Version); } @@ -109,7 +118,7 @@ uint32_t InfoStream::getSignature() const { return Signature; } uint32_t InfoStream::getAge() const { return Age; } -PDB_UniqueId InfoStream::getGuid() const { return Guid; } +GUID InfoStream::getGuid() const { return Guid; } uint32_t InfoStream::getNamedStreamMapByteSize() const { return NamedStreamMapByteSize; @@ -124,3 +133,7 @@ ArrayRef InfoStream::getFeatureSignatures() const { const NamedStreamMap &InfoStream::getNamedStreams() const { return NamedStreams; } + +BinarySubstreamRef InfoStream::getNamedStreamsBuffer() const { + return SubNamedStreams; +} diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp index f019d410328a8..6450ae752f965 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -34,7 +34,7 @@ void InfoStreamBuilder::setSignature(uint32_t S) { Sig = S; } void InfoStreamBuilder::setAge(uint32_t A) { Age = A; } -void InfoStreamBuilder::setGuid(PDB_UniqueId G) { Guid = G; } +void InfoStreamBuilder::setGuid(GUID G) { Guid = G; } void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) { Features.push_back(Sig); @@ -50,8 +50,8 @@ Error InfoStreamBuilder::finalizeMsfLayout() { Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer) const { - auto InfoS = - WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB); + auto InfoS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, StreamPDB, Msf.getAllocator()); BinaryStreamWriter Writer(*InfoS); InfoStreamHeader H; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index d7a203746a0d5..2e1f61c7a25dc 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -9,11 +9,11 @@ #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" @@ -47,23 +47,27 @@ Error ModuleDebugStreamRef::reload() { if (auto EC = Reader.readInteger(Signature)) return EC; - if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4)) + if (auto EC = Reader.readSubstream(SymbolsSubstream, SymbolSize - 4)) return EC; - - if (auto EC = Reader.readStreamRef(C11LinesSubstream, C11Size)) + if (auto EC = Reader.readSubstream(C11LinesSubstream, C11Size)) return EC; - if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size)) + if (auto EC = Reader.readSubstream(C13LinesSubstream, C13Size)) return EC; - BinaryStreamReader LineReader(C13LinesSubstream); + BinaryStreamReader SymbolReader(SymbolsSubstream.StreamData); if (auto EC = - LineReader.readArray(LinesAndChecksums, LineReader.bytesRemaining())) + SymbolReader.readArray(SymbolArray, SymbolReader.bytesRemaining())) + return EC; + + BinaryStreamReader SubsectionsReader(C13LinesSubstream.StreamData); + if (auto EC = SubsectionsReader.readArray(Subsections, + SubsectionsReader.bytesRemaining())) return EC; uint32_t GlobalRefsSize; if (auto EC = Reader.readInteger(GlobalRefsSize)) return EC; - if (auto EC = Reader.readStreamRef(GlobalRefsSubstream, GlobalRefsSize)) + if (auto EC = Reader.readSubstream(GlobalRefsSubstream, GlobalRefsSize)) return EC; if (Reader.bytesRemaining() > 0) return make_error(raw_error_code::corrupt_file, @@ -72,18 +76,48 @@ Error ModuleDebugStreamRef::reload() { return Error::success(); } +BinarySubstreamRef ModuleDebugStreamRef::getSymbolsSubstream() const { + return SymbolsSubstream; +} + +BinarySubstreamRef ModuleDebugStreamRef::getC11LinesSubstream() const { + return C11LinesSubstream; +} + +BinarySubstreamRef ModuleDebugStreamRef::getC13LinesSubstream() const { + return C13LinesSubstream; +} + +BinarySubstreamRef ModuleDebugStreamRef::getGlobalRefsSubstream() const { + return GlobalRefsSubstream; +} + iterator_range ModuleDebugStreamRef::symbols(bool *HadError) const { - return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end()); + return make_range(SymbolArray.begin(HadError), SymbolArray.end()); } -llvm::iterator_range -ModuleDebugStreamRef::linesAndChecksums() const { - return make_range(LinesAndChecksums.begin(), LinesAndChecksums.end()); +iterator_range +ModuleDebugStreamRef::subsections() const { + return make_range(Subsections.begin(), Subsections.end()); } -bool ModuleDebugStreamRef::hasLineInfo() const { - return C13LinesSubstream.getLength() > 0; +bool ModuleDebugStreamRef::hasDebugSubsections() const { + return !C13LinesSubstream.empty(); } Error ModuleDebugStreamRef::commit() { return Error::success(); } + +Expected +ModuleDebugStreamRef::findChecksumsSubsection() const { + codeview::DebugChecksumsSubsectionRef Result; + for (const auto &SS : subsections()) { + if (SS.kind() != DebugSubsectionKind::FileChecksums) + continue; + + if (auto EC = Result.initialize(SS.getRecordData())) + return std::move(EC); + return Result; + } + return Result; +} diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp index c7ba32b82bc6b..6cdf6dde04d9f 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -1,4 +1,4 @@ -//===- NamedStreamMap.cpp - PDB Named Stream Map ----------------*- C++ -*-===// +//===- NamedStreamMap.cpp - PDB Named Stream Map --------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,21 +8,32 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" - -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include +#include #include +#include using namespace llvm; using namespace llvm::pdb; +// FIXME: This shouldn't be necessary, but if we insert the strings in any +// other order, cvdump cannot read the generated name map. This suggests that +// we may be using the wrong hash function. A closer inspection of the cvdump +// source code may reveal something, but for now this at least makes us work, +// even if only by accident. +static constexpr const char *OrderedStreamNames[] = {"/LinkInfo", "/names", + "/src/headerblock"}; + NamedStreamMap::NamedStreamMap() = default; Error NamedStreamMap::load(BinaryStreamReader &Stream) { @@ -73,9 +84,11 @@ Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const { if (auto EC = Writer.writeInteger(FinalizedInfo->StringDataBytes)) return EC; - // Now all of the string data itself. - for (const auto &Item : Mapping) { - if (auto EC = Writer.writeCString(Item.getKey())) + for (const auto &Name : OrderedStreamNames) { + auto Item = Mapping.find(Name); + if (Item == Mapping.end()) + continue; + if (auto EC = Writer.writeCString(Item->getKey())) return EC; } @@ -93,9 +106,13 @@ uint32_t NamedStreamMap::finalize() { // Build the finalized hash table. FinalizedHashTable.clear(); FinalizedInfo.emplace(); - for (const auto &Item : Mapping) { - FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item.getValue()); - FinalizedInfo->StringDataBytes += Item.getKeyLength() + 1; + + for (const auto &Name : OrderedStreamNames) { + auto Item = Mapping.find(Name); + if (Item == Mapping.end()) + continue; + FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item->getValue()); + FinalizedInfo->StringDataBytes += Item->getKeyLength() + 1; } // Number of bytes of string data. diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp new file mode 100644 index 0000000000000..60416f69e137c --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp @@ -0,0 +1,48 @@ +//===- NativeBuiltinSymbol.cpp ------------------------------------ C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" + +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +namespace llvm { +namespace pdb { + +NativeBuiltinSymbol::NativeBuiltinSymbol(NativeSession &PDBSession, + SymIndexId Id, PDB_BuiltinType T, + uint64_t L) + : NativeRawSymbol(PDBSession, Id), Session(PDBSession), Type(T), Length(L) { +} + +NativeBuiltinSymbol::~NativeBuiltinSymbol() {} + +std::unique_ptr NativeBuiltinSymbol::clone() const { + return llvm::make_unique(Session, SymbolId, Type, Length); +} + +void NativeBuiltinSymbol::dump(raw_ostream &OS, int Indent) const { + // TODO: Apparently nothing needs this yet. +} + +PDB_SymType NativeBuiltinSymbol::getSymTag() const { + return PDB_SymType::BuiltinType; +} + +PDB_BuiltinType NativeBuiltinSymbol::getBuiltinType() const { return Type; } + +bool NativeBuiltinSymbol::isConstType() const { return false; } + +uint64_t NativeBuiltinSymbol::getLength() const { return Length; } + +bool NativeBuiltinSymbol::isUnalignedType() const { return false; } + +bool NativeBuiltinSymbol::isVolatileType() const { return false; } + +} // namespace pdb +} // namespace llvm diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 77f832582f824..7132a99a9f160 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -9,17 +9,24 @@ #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" +#include "llvm/ADT/STLExtras.h" + namespace llvm { namespace pdb { NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session, + SymIndexId SymbolId, DbiModuleDescriptor MI) - : NativeRawSymbol(Session), Module(MI) {} + : NativeRawSymbol(Session, SymbolId), Module(MI) {} PDB_SymType NativeCompilandSymbol::getSymTag() const { return PDB_SymType::Compiland; } +std::unique_ptr NativeCompilandSymbol::clone() const { + return llvm::make_unique(Session, SymbolId, Module); +} + bool NativeCompilandSymbol::isEditAndContinueEnabled() const { return Module.hasECInfo(); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp index 97319fd77d117..a65782e2d4fc6 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp @@ -32,9 +32,7 @@ std::unique_ptr NativeEnumModules::getChildAtIndex(uint32_t Index) const { if (Index >= Modules.getModuleCount()) return nullptr; - return std::unique_ptr(new PDBSymbolCompiland( - Session, std::unique_ptr(new NativeCompilandSymbol( - Session, Modules.getModuleDescriptor(Index))))); + return Session.createCompilandSymbol(Modules.getModuleDescriptor(Index)); } std::unique_ptr NativeEnumModules::getNext() { diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index bb52560be167a..3241000b06db0 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h" @@ -17,8 +18,12 @@ namespace llvm { namespace pdb { -NativeExeSymbol::NativeExeSymbol(NativeSession &Session) - : NativeRawSymbol(Session), File(Session.getPDBFile()) {} +NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId) + : NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {} + +std::unique_ptr NativeExeSymbol::clone() const { + return llvm::make_unique(Session, SymbolId); +} std::unique_ptr NativeExeSymbol::findChildren(PDB_SymType Type) const { @@ -51,12 +56,12 @@ std::string NativeExeSymbol::getSymbolsFileName() const { return File.getFilePath(); } -PDB_UniqueId NativeExeSymbol::getGuid() const { +codeview::GUID NativeExeSymbol::getGuid() const { auto IS = File.getPDBInfoStream(); if (IS) return IS->getGuid(); consumeError(IS.takeError()); - return PDB_UniqueId{{0}}; + return codeview::GUID{{0}}; } bool NativeExeSymbol::hasCTypes() const { diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 70968d4330b07..df3f418052a9d 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -1,4 +1,4 @@ -//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -*- C++ -*-===// +//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -------===// // // The LLVM Compiler Infrastructure // @@ -8,22 +8,13 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/PDB/PDBExtras.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" -#include "llvm/Support/ConvertUTF.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::pdb; -NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession) - : Session(PDBSession) {} +NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId) + : Session(PDBSession), SymbolId(SymbolId) {} void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {} @@ -49,7 +40,7 @@ NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const { return nullptr; } -void NativeRawSymbol::getDataBytes(llvm::SmallVector &bytes) const { +void NativeRawSymbol::getDataBytes(SmallVector &bytes) const { bytes.clear(); } @@ -109,7 +100,7 @@ uint32_t NativeRawSymbol::getClassParentId() const { } std::string NativeRawSymbol::getCompilerName() const { - return 0; + return {}; } uint32_t NativeRawSymbol::getCount() const { @@ -136,7 +127,7 @@ uint32_t NativeRawSymbol::getLexicalParentId() const { } std::string NativeRawSymbol::getLibraryName() const { - return ""; + return {}; } uint32_t NativeRawSymbol::getLiveRangeStartAddressOffset() const { @@ -164,7 +155,7 @@ uint32_t NativeRawSymbol::getMemorySpaceKind() const { } std::string NativeRawSymbol::getName() const { - return 0; + return {}; } uint32_t NativeRawSymbol::getNumberOfAcceleratorPointerTags() const { @@ -188,7 +179,7 @@ uint32_t NativeRawSymbol::getNumberOfRows() const { } std::string NativeRawSymbol::getObjectFileName() const { - return ""; + return {}; } uint32_t NativeRawSymbol::getOemId() const { @@ -240,7 +231,7 @@ uint32_t NativeRawSymbol::getSlot() const { } std::string NativeRawSymbol::getSourceFileName() const { - return 0; + return {}; } uint32_t NativeRawSymbol::getStride() const { @@ -251,11 +242,9 @@ uint32_t NativeRawSymbol::getSubTypeId() const { return 0; } -std::string NativeRawSymbol::getSymbolsFileName() const { return ""; } +std::string NativeRawSymbol::getSymbolsFileName() const { return {}; } -uint32_t NativeRawSymbol::getSymIndexId() const { - return 0; -} +uint32_t NativeRawSymbol::getSymIndexId() const { return SymbolId; } uint32_t NativeRawSymbol::getTargetOffset() const { return 0; @@ -294,7 +283,7 @@ uint32_t NativeRawSymbol::getUavSlot() const { } std::string NativeRawSymbol::getUndecoratedName() const { - return 0; + return {}; } uint32_t NativeRawSymbol::getUnmodifiedTypeId() const { @@ -334,9 +323,7 @@ PDB_SymType NativeRawSymbol::getSymTag() const { return PDB_SymType::None; } -PDB_UniqueId NativeRawSymbol::getGuid() const { - return PDB_UniqueId{{0}}; -} +codeview::GUID NativeRawSymbol::getGuid() const { return codeview::GUID{{0}}; } int32_t NativeRawSymbol::getOffset() const { return 0; @@ -703,5 +690,5 @@ bool NativeRawSymbol::wasInlined() const { } std::string NativeRawSymbol::getUnused() const { - return ""; + return {}; } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeSession.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeSession.cpp index 7e6843bceb7db..76de0d8f9e7ef 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -10,9 +10,12 @@ #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" @@ -23,13 +26,37 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" + #include #include +#include using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; +namespace { +// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary +// to instantiate a NativeBuiltinSymbol for that type. +static const struct BuiltinTypeEntry { + codeview::SimpleTypeKind Kind; + PDB_BuiltinType Type; + uint32_t Size; +} BuiltinTypes[] = { + {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4}, + {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4}, + {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4}, + {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8}, + {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1}, + {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1}, + {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1}, + {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2}, + {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1} + // This table can be grown as necessary, but these are the only types we've + // needed so far. +}; +} // namespace + NativeSession::NativeSession(std::unique_ptr PdbFile, std::unique_ptr Allocator) : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {} @@ -66,22 +93,67 @@ Error NativeSession::createFromExe(StringRef Path, return make_error(raw_error_code::feature_unsupported); } +std::unique_ptr +NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) { + const auto Id = static_cast(SymbolCache.size()); + SymbolCache.push_back( + llvm::make_unique(*this, Id, MI)); + return llvm::make_unique( + *this, std::unique_ptr(SymbolCache[Id]->clone())); +} + +SymIndexId NativeSession::findSymbolByTypeIndex(codeview::TypeIndex Index) { + // First see if it's already in our cache. + const auto Entry = TypeIndexToSymbolId.find(Index); + if (Entry != TypeIndexToSymbolId.end()) + return Entry->second; + + // Symbols for built-in types are created on the fly. + if (Index.isSimple()) { + // FIXME: We will eventually need to handle pointers to other simple types, + // which are still simple types in the world of CodeView TypeIndexes. + if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct) + return 0; + const auto Kind = Index.getSimpleKind(); + const auto It = + std::find_if(std::begin(BuiltinTypes), std::end(BuiltinTypes), + [Kind](const BuiltinTypeEntry &Builtin) { + return Builtin.Kind == Kind; + }); + if (It == std::end(BuiltinTypes)) + return 0; + SymIndexId Id = SymbolCache.size(); + SymbolCache.emplace_back( + llvm::make_unique(*this, Id, It->Type, It->Size)); + TypeIndexToSymbolId[Index] = Id; + return Id; + } + + // TODO: Look up PDB type by type index + + return 0; +} + uint64_t NativeSession::getLoadAddress() const { return 0; } void NativeSession::setLoadAddress(uint64_t Address) {} -std::unique_ptr NativeSession::getGlobalScope() const { - auto RawSymbol = - llvm::make_unique(const_cast(*this)); +std::unique_ptr NativeSession::getGlobalScope() { + const auto Id = static_cast(SymbolCache.size()); + SymbolCache.push_back(llvm::make_unique(*this, Id)); + auto RawSymbol = SymbolCache[Id]->clone(); auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol))); std::unique_ptr ExeSymbol( - static_cast(PdbSymbol.release())); + static_cast(PdbSymbol.release())); return ExeSymbol; } std::unique_ptr NativeSession::getSymbolById(uint32_t SymbolId) const { - return nullptr; + // If the caller has a SymbolId, it'd better be in our SymbolCache. + return SymbolId < SymbolCache.size() + ? PDBSymbol::create(*this, SymbolCache[SymbolId]->clone()) + : nullptr; } std::unique_ptr diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFile.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFile.cpp index 859295d2c7d33..0b6492efc70f3 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -146,7 +146,8 @@ Error PDBFile::parseFileHeaders() { // at getBlockSize() intervals, so we have to be compatible. // See the function fpmPn() for more information: // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 - auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer); + auto FpmStream = + MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); BinaryStreamReader FpmReader(*FpmStream); ArrayRef FpmBytes; if (auto EC = FpmReader.readBytes(FpmBytes, @@ -184,7 +185,8 @@ Error PDBFile::parseStreamData() { // is exactly what we are attempting to parse. By specifying a custom // subclass of IPDBStreamData which only accesses the fields that have already // been parsed, we can avoid this and reuse MappedBlockStream. - auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer); + auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, + Allocator); BinaryStreamReader Reader(*DS); if (auto EC = Reader.readInteger(NumStreams)) return EC; @@ -228,6 +230,14 @@ ArrayRef PDBFile::getDirectoryBlockArray() const { return ContainerLayout.DirectoryBlocks; } +MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { + MSFStreamLayout Result; + auto Blocks = getStreamBlockList(StreamIdx); + Result.Blocks.assign(Blocks.begin(), Blocks.end()); + Result.Length = getStreamByteSize(StreamIdx); + return Result; +} + Expected PDBFile::getPDBGlobalsStream() { if (!Globals) { auto DbiS = getPDBDbiStream(); @@ -361,12 +371,25 @@ Expected PDBFile::getStringTable() { return *Strings; } +uint32_t PDBFile::getPointerSize() { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return 0; + PDB_Machine Machine = DbiS->getMachineType(); + if (Machine == PDB_Machine::Amd64) + return 8; + return 4; +} + bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } bool PDBFile::hasPDBGlobalsStream() { auto DbiS = getPDBDbiStream(); - if (!DbiS) + if (!DbiS) { + consumeError(DbiS.takeError()); return false; + } + return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); } @@ -376,8 +399,10 @@ bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); } bool PDBFile::hasPDBPublicsStream() { auto DbiS = getPDBDbiStream(); - if (!DbiS) + if (!DbiS) { + consumeError(DbiS.takeError()); return false; + } return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); } @@ -407,5 +432,6 @@ PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout, uint32_t StreamIndex) const { if (StreamIndex >= getNumStreams()) return make_error(raw_error_code::no_stream); - return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex); + return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex, + Allocator); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 4dd965c69071e..9f35fd73629cd 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" @@ -33,6 +34,8 @@ using namespace llvm::support; PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator) : Allocator(Allocator) {} +PDBFileBuilder::~PDBFileBuilder() {} + Error PDBFileBuilder::initialize(uint32_t BlockSize) { auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize); if (!ExpectedMsf) @@ -71,6 +74,12 @@ PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } +PublicsStreamBuilder &PDBFileBuilder::getPublicsBuilder() { + if (!Publics) + Publics = llvm::make_unique(*Msf); + return *Publics; +} + Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { auto ExpectedStream = Msf->addStream(Size); if (!ExpectedStream) @@ -80,14 +89,22 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { } Expected PDBFileBuilder::finalizeMsfLayout() { + + if (Ipi && Ipi->getRecordCount() > 0) { + // In theory newer PDBs always have an ID stream, but by saying that we're + // only going to *really* have an ID stream if there is at least one ID + // record, we leave open the opportunity to test older PDBs such as those + // that don't have an ID stream. + auto &Info = getInfoBuilder(); + Info.addFeature(PdbRaw_FeatureSig::VC140); + } + uint32_t StringsLen = Strings.calculateSerializedSize(); if (auto EC = addNamedStream("/names", StringsLen)) return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); - if (auto EC = addNamedStream("/src/headerblock", 0)) - return std::move(EC); if (Info) { if (auto EC = Info->finalizeMsfLayout()) @@ -105,6 +122,14 @@ Expected PDBFileBuilder::finalizeMsfLayout() { if (auto EC = Ipi->finalizeMsfLayout()) return std::move(EC); } + if (Publics) { + if (auto EC = Publics->finalizeMsfLayout()) + return std::move(EC); + if (Dbi) { + Dbi->setPublicsStreamIndex(Publics->getStreamIndex()); + Dbi->setSymbolRecordStreamIndex(Publics->getRecordStreamIdx()); + } + } return Msf->build(); } @@ -117,6 +142,7 @@ Expected PDBFileBuilder::getNamedStreamIndex(StringRef Name) const { } Error PDBFileBuilder::commit(StringRef Filename) { + assert(!Filename.empty()); auto ExpectedLayout = finalizeMsfLayout(); if (!ExpectedLayout) return ExpectedLayout.takeError(); @@ -139,8 +165,8 @@ Error PDBFileBuilder::commit(StringRef Filename) { if (auto EC = Writer.writeArray(Layout.DirectoryBlocks)) return EC; - auto DirStream = - WritableMappedBlockStream::createDirectoryStream(Layout, Buffer); + auto DirStream = WritableMappedBlockStream::createDirectoryStream( + Layout, Buffer, Allocator); BinaryStreamWriter DW(*DirStream); if (auto EC = DW.writeInteger(Layout.StreamSizes.size())) return EC; @@ -157,8 +183,8 @@ Error PDBFileBuilder::commit(StringRef Filename) { if (!ExpectedSN) return ExpectedSN.takeError(); - auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - *ExpectedSN); + auto NS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, *ExpectedSN, Allocator); BinaryStreamWriter NSWriter(*NS); if (auto EC = Strings.commit(NSWriter)) return EC; @@ -183,5 +209,13 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } + if (Publics) { + auto PS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, Publics->getStreamIndex(), Allocator); + BinaryStreamWriter PSWriter(*PS); + if (auto EC = Publics->commit(PSWriter)) + return EC; + } + return Buffer.commit(); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index e84573fe07b8e..acd45f7a62192 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -21,7 +21,7 @@ using namespace llvm; using namespace llvm::support; using namespace llvm::pdb; -uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getByteSize() const { return Header->ByteSize; } uint32_t PDBStringTable::getNameCount() const { return NameCount; } uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } uint32_t PDBStringTable::getSignature() const { return Header->Signature; } @@ -56,6 +56,11 @@ Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { return Error::success(); } +const codeview::DebugStringTableSubsectionRef & +PDBStringTable::getStringTable() const { + return Strings; +} + Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) { const support::ulittle32_t *HashCount; if (auto EC = Reader.readObject(HashCount)) diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp index a472181a4895c..90acfadd311ff 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -52,6 +52,11 @@ uint32_t PDBStringTableBuilder::calculateSerializedSize() const { return Size; } +void PDBStringTableBuilder::setStrings( + const codeview::DebugStringTableSubsection &Strings) { + this->Strings = Strings; +} + Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { // Write a header PDBStringTableHeader H; diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 58202577672a3..9c3e654f808ba 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -41,19 +41,6 @@ using namespace llvm::msf; using namespace llvm::support; using namespace llvm::pdb; -// This is PSGSIHDR struct defined in -// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h -struct PublicsStream::HeaderInfo { - ulittle32_t SymHash; - ulittle32_t AddrMap; - ulittle32_t NumThunks; - ulittle32_t SizeOfThunk; - ulittle16_t ISectThunkTable; - char Padding[2]; - ulittle32_t OffThunkTable; - ulittle32_t NumSections; -}; - PublicsStream::PublicsStream(PDBFile &File, std::unique_ptr Stream) : Pdb(File), Stream(std::move(Stream)) {} @@ -72,7 +59,8 @@ Error PublicsStream::reload() { BinaryStreamReader Reader(*Stream); // Check stream size. - if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader)) + if (Reader.bytesRemaining() < + sizeof(PublicsStreamHeader) + sizeof(GSIHashHeader)) return make_error(raw_error_code::corrupt_file, "Publics Stream does not contain a header."); @@ -105,10 +93,12 @@ Error PublicsStream::reload() { "Could not read a thunk map.")); // Something called "section map" follows. - if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Could not read a section map.")); + if (Reader.bytesRemaining() > 0) { + if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) + return joinErrors(std::move(EC), + make_error(raw_error_code::corrupt_file, + "Could not read a section map.")); + } if (Reader.bytesRemaining() > 0) return make_error(raw_error_code::corrupt_file, @@ -128,4 +118,13 @@ PublicsStream::getSymbols(bool *HadError) const { return SS.getSymbols(HadError); } +Expected +PublicsStream::getSymbolArray() const { + auto SymbolS = Pdb.getPDBSymbolStream(); + if (!SymbolS) + return SymbolS.takeError(); + + return SymbolS->getSymbolArray(); +} + Error PublicsStream::commit() { return Error::success(); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp new file mode 100644 index 0000000000000..28c4a8fc35d92 --- /dev/null +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp @@ -0,0 +1,89 @@ +//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" + +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" + +#include "GSI.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf) {} + +PublicsStreamBuilder::~PublicsStreamBuilder() {} + +uint32_t PublicsStreamBuilder::calculateSerializedLength() const { + uint32_t Size = 0; + Size += sizeof(PublicsStreamHeader); + Size += sizeof(GSIHashHeader); + Size += HashRecords.size() * sizeof(PSHashRecord); + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + Size += NumBitmapEntries; + + // FIXME: Account for hash buckets. For now since we we write a zero-bitmap + // indicating that no hash buckets are valid, we also write zero byets of hash + // bucket data. + Size += 0; + return Size; +} + +Error PublicsStreamBuilder::finalizeMsfLayout() { + Expected Idx = Msf.addStream(calculateSerializedLength()); + if (!Idx) + return Idx.takeError(); + StreamIdx = *Idx; + + Expected RecordIdx = Msf.addStream(0); + if (!RecordIdx) + return RecordIdx.takeError(); + RecordStreamIdx = *RecordIdx; + return Error::success(); +} + +Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter) { + PublicsStreamHeader PSH; + GSIHashHeader GSH; + + // FIXME: Figure out what to put for these values. + PSH.AddrMap = 0; + PSH.ISectThunkTable = 0; + PSH.NumSections = 0; + PSH.NumThunks = 0; + PSH.OffThunkTable = 0; + PSH.SizeOfThunk = 0; + PSH.SymHash = 0; + + GSH.VerSignature = GSIHashHeader::HdrSignature; + GSH.VerHdr = GSIHashHeader::HdrVersion; + GSH.HrSize = 0; + GSH.NumBuckets = 0; + + if (auto EC = PublicsWriter.writeObject(PSH)) + return EC; + if (auto EC = PublicsWriter.writeObject(GSH)) + return EC; + if (auto EC = PublicsWriter.writeArray(makeArrayRef(HashRecords))) + return EC; + + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + std::vector BitmapData(NumBitmapEntries); + // FIXME: Build an actual bitmap + if (auto EC = PublicsWriter.writeBytes(makeArrayRef(BitmapData))) + return EC; + + // FIXME: Write actual hash buckets. + return Error::success(); +} diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiHashing.cpp index 16904a5a27ed3..77a2d57a83698 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiHashing.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -9,102 +9,81 @@ #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/JamCRC.h" using namespace llvm; using namespace llvm::codeview; using namespace llvm::pdb; // Corresponds to `fUDTAnon`. -template static bool isAnonymous(T &Rec) { - StringRef Name = Rec.getName(); +static bool isAnonymous(StringRef Name) { return Name == "" || Name == "__unnamed" || Name.endswith("::") || Name.endswith("::__unnamed"); } -// Computes a hash for a given TPI record. -template -static uint32_t getTpiHash(T &Rec, ArrayRef FullRecord) { - auto Opts = static_cast(Rec.getOptions()); - - bool ForwardRef = - Opts & static_cast(ClassOptions::ForwardReference); - bool Scoped = Opts & static_cast(ClassOptions::Scoped); - bool UniqueName = Opts & static_cast(ClassOptions::HasUniqueName); - bool IsAnon = UniqueName && isAnonymous(Rec); +// Computes the hash for a user-defined type record. This could be a struct, +// class, union, or enum. +static uint32_t getHashForUdt(const TagRecord &Rec, + ArrayRef FullRecord) { + ClassOptions Opts = Rec.getOptions(); + bool ForwardRef = bool(Opts & ClassOptions::ForwardReference); + bool Scoped = bool(Opts & ClassOptions::Scoped); + bool HasUniqueName = bool(Opts & ClassOptions::HasUniqueName); + bool IsAnon = HasUniqueName && isAnonymous(Rec.getName()); if (!ForwardRef && !Scoped && !IsAnon) return hashStringV1(Rec.getName()); - if (!ForwardRef && UniqueName && !IsAnon) + if (!ForwardRef && HasUniqueName && !IsAnon) return hashStringV1(Rec.getUniqueName()); return hashBufferV8(FullRecord); } -template static uint32_t getSourceLineHash(T &Rec) { - char Buf[4]; - support::endian::write32le(Buf, Rec.getUDT().getIndex()); - return hashStringV1(StringRef(Buf, 4)); -} - -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, - UdtSourceLineRecord &Rec) { - CVR.Hash = getSourceLineHash(Rec); -} - -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, - UdtModSourceLineRecord &Rec) { - CVR.Hash = getSourceLineHash(Rec); -} - -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, ClassRecord &Rec) { - CVR.Hash = getTpiHash(Rec, CVR.data()); -} - -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, EnumRecord &Rec) { - CVR.Hash = getTpiHash(Rec, CVR.data()); -} - -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, UnionRecord &Rec) { - CVR.Hash = getTpiHash(Rec, CVR.data()); -} - -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UdtSourceLineRecord &Rec) { - return verifySourceLine(Rec.getUDT()); -} - -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, - UdtModSourceLineRecord &Rec) { - return verifySourceLine(Rec.getUDT()); -} - -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, ClassRecord &Rec) { - if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); -} -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, EnumRecord &Rec) { - if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); -} -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UnionRecord &Rec) { - if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); +template +static Expected getHashForUdt(const CVType &Rec) { + T Deserialized; + if (auto E = TypeDeserializer::deserializeAs(const_cast(Rec), + Deserialized)) + return std::move(E); + return getHashForUdt(Deserialized, Rec.data()); } -Error TpiHashVerifier::verifySourceLine(codeview::TypeIndex TI) { +template +static Expected getSourceLineHash(const CVType &Rec) { + T Deserialized; + if (auto E = TypeDeserializer::deserializeAs(const_cast(Rec), + Deserialized)) + return std::move(E); char Buf[4]; - support::endian::write32le(Buf, TI.getIndex()); - uint32_t Hash = hashStringV1(StringRef(Buf, 4)); - if (Hash % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); + support::endian::write32le(Buf, Deserialized.getUDT().getIndex()); + return hashStringV1(StringRef(Buf, 4)); } -Error TpiHashVerifier::visitTypeBegin(CVType &Rec) { - ++Index; - RawRecord = Rec; - return Error::success(); +Expected llvm::pdb::hashTypeRecord(const CVType &Rec) { + switch (Rec.kind()) { + case LF_CLASS: + case LF_STRUCTURE: + case LF_INTERFACE: + return getHashForUdt(Rec); + case LF_UNION: + return getHashForUdt(Rec); + case LF_ENUM: + return getHashForUdt(Rec); + + case LF_UDT_SRC_LINE: + return getSourceLineHash(Rec); + case LF_UDT_MOD_SRC_LINE: + return getSourceLineHash(Rec); + + default: + break; + } + + // Run CRC32 over the bytes. This corresponds to `hashBufv8`. + JamCRC JC(/*Init=*/0U); + ArrayRef Bytes(reinterpret_cast(Rec.data().data()), + Rec.data().size()); + JC.update(Bytes); + return JC.getCRC(); } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStream.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStream.cpp index c0999d93dbb98..d3ef87d9009de 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -8,14 +8,12 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/TpiStream.h" + #include "llvm/ADT/iterator_range.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" @@ -33,8 +31,7 @@ using namespace llvm::support; using namespace llvm::msf; using namespace llvm::pdb; -TpiStream::TpiStream(const PDBFile &File, - std::unique_ptr Stream) +TpiStream::TpiStream(PDBFile &File, std::unique_ptr Stream) : Pdb(File), Stream(std::move(Stream)) {} TpiStream::~TpiStream() = default; @@ -68,7 +65,13 @@ Error TpiStream::reload() { "TPI Stream Invalid number of hash buckets."); // The actual type records themselves come from this stream - if (auto EC = Reader.readArray(TypeRecords, Header->TypeRecordBytes)) + if (auto EC = + Reader.readSubstream(TypeRecordsSubstream, Header->TypeRecordBytes)) + return EC; + + BinaryStreamReader RecordReader(TypeRecordsSubstream.StreamData); + if (auto EC = + RecordReader.readArray(TypeRecords, TypeRecordsSubstream.size())) return EC; // Hash indices, hash values, etc come from the hash stream. @@ -78,7 +81,8 @@ Error TpiStream::reload() { "Invalid TPI hash stream index."); auto HS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex, + Pdb.getAllocator()); BinaryStreamReader HSR(*HS); // There should be a hash value for every type record, or no hashes at all. @@ -91,9 +95,6 @@ Error TpiStream::reload() { HSR.setOffset(Header->HashValueBuffer.Off); if (auto EC = HSR.readArray(HashValues, NumHashValues)) return EC; - std::vector HashValueList; - for (auto I : HashValues) - HashValueList.push_back(I); HSR.setOffset(Header->IndexOffsetBuffer.Off); uint32_t NumTypeIndexOffsets = @@ -110,6 +111,8 @@ Error TpiStream::reload() { HashStream = std::move(HS); } + Types = llvm::make_unique( + TypeRecords, getNumTypeRecords(), getTypeIndexOffsets()); return Error::success(); } @@ -137,6 +140,10 @@ uint16_t TpiStream::getTypeHashStreamAuxIndex() const { uint32_t TpiStream::getNumHashBuckets() const { return Header->NumHashBuckets; } uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; } +BinarySubstreamRef TpiStream::getTypeRecordsSubstream() const { + return TypeRecordsSubstream; +} + FixedStreamArray TpiStream::getHashValues() const { return HashValues; } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 701a318511b8f..9e943c7f114d5 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -69,7 +69,7 @@ Error TpiStreamBuilder::finalize() { uint32_t Count = TypeRecords.size(); - H->Version = *VerHeader; + H->Version = VerHeader; H->HeaderSize = sizeof(TpiStreamHeader); H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex; H->TypeIndexEnd = H->TypeIndexBegin + Count; @@ -147,8 +147,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = finalize()) return EC; - auto InfoS = - WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx); + auto InfoS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, + Idx, Allocator); BinaryStreamWriter Writer(*InfoS); if (auto EC = Writer.writeObject(*Header)) @@ -159,8 +159,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; if (HashStreamIndex != kInvalidStreamIndex) { - auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - HashStreamIndex); + auto HVS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, HashStreamIndex, Allocator); BinaryStreamWriter HW(*HVS); if (HashValueStream) { if (auto EC = HW.writeStreamRef(*HashValueStream)) diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDB.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDB.cpp index 7e3acc1165f33..501d4f5985b7d 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDB.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDB.cpp @@ -1,4 +1,4 @@ -//===- PDB.cpp - base header file for creating a PDB reader -----*- C++ -*-===// +//===- PDB.cpp - base header file for creating a PDB reader ---------------===// // // The LLVM Compiler Infrastructure // @@ -8,18 +8,14 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/PDB.h" - #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/PDB/GenericError.h" -#include "llvm/DebugInfo/PDB/IPDBSession.h" -#include "llvm/DebugInfo/PDB/PDB.h" #if LLVM_ENABLE_DIA_SDK #include "llvm/DebugInfo/PDB/DIA/DIASession.h" #endif #include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Error.h" using namespace llvm; using namespace llvm::pdb; @@ -33,7 +29,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path, #if LLVM_ENABLE_DIA_SDK return DIASession::createFromPdb(Path, Session); #else - return llvm::make_error("DIA is not installed on the system"); + return make_error("DIA is not installed on the system"); #endif } @@ -46,6 +42,6 @@ Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path, #if LLVM_ENABLE_DIA_SDK return DIASession::createFromExe(Path, Session); #else - return llvm::make_error("DIA is not installed on the system"); + return make_error("DIA is not installed on the system"); #endif } diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBContext.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBContext.cpp index 94b81ecf561e2..df0feac2bc40a 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBContext.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBContext.cpp @@ -12,8 +12,8 @@ #include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" +#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" #include "llvm/Object/COFF.h" @@ -29,8 +29,7 @@ PDBContext::PDBContext(const COFFObjectFile &Object, Session->setLoadAddress(ImageBase.get()); } -void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, - bool SummarizeTypes) {} +void PDBContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){} DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier) { diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBExtras.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBExtras.cpp index dc22a30facab3..c291185bc67a6 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBExtras.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBExtras.cpp @@ -1,4 +1,4 @@ -//===- PDBExtras.cpp - helper functions and classes for PDBs -----*- C++-*-===// +//===- PDBExtras.cpp - helper functions and classes for PDBs --------------===// // // The LLVM Compiler Infrastructure // @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/PDBExtras.h" - #include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/Formatters.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::pdb; @@ -260,12 +260,6 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, return OS; } -raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UniqueId &Guid) { - codeview::detail::GuidAdapter A(Guid.Guid); - A.format(OS, ""); - return OS; -} - raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UdtType &Type) { switch (Type) { CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Class, "class", OS) diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolBlock.cpp index 7385d3ba14898..7076b4aec3478 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolBlock.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolBlock.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolBlock.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp index e08450e0ad0c4..f73cd36d057a0 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp index 2f1c43666ae54..df696fa8c5f25 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCustom.cpp index 9ec20bb62d758..a7b69a755941e 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCustom.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolCustom.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCustom.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index 0734a1f8314ac..5a5cb4c1b5cac 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -12,10 +12,10 @@ #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" -#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp index 482c95e3a8509..4a4195beb4ea5 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp index ae23c7619e2aa..a448a404dc4a0 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp index 87bb4044216b0..dbec16fcbaac2 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp index 0ee18d4716249..0fdf8b6d0f774 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp index f617d8d0c2df5..726e7e1cdbb40 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp index 68ba87c1cdf8d..6c84b984d210b 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp index ec27985e91d17..c018772878886 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 473529d1b0432..0304c6286c8f1 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -12,9 +12,9 @@ #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h" -#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp index 86e0ec4f85657..7cfba823b4fa5 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp index a516a4d2c4299..ddc0574617c5a 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp index dbbea9c93e206..fdbe845f455a5 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp index 020aec9e98a88..f40578f4372a6 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/interpreter/llvm/src/lib/DebugInfo/PDB/UDTLayout.cpp b/interpreter/llvm/src/lib/DebugInfo/PDB/UDTLayout.cpp index aacefae80c3a2..5f4390bbaf12b 100644 --- a/interpreter/llvm/src/lib/DebugInfo/PDB/UDTLayout.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/PDB/UDTLayout.cpp @@ -1,4 +1,4 @@ -//===- UDTLayout.cpp --------------------------------------------*- C++ -*-===// +//===- UDTLayout.cpp ------------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,20 +8,25 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/UDTLayout.h" - +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" -#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" - -#include +#include "llvm/DebugInfo/PDB/PDBTypes.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include using namespace llvm; using namespace llvm::pdb; @@ -176,18 +181,18 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { else Bases.push_back(std::move(Base)); } - else if (auto Data = unique_dyn_cast(Child)) { if (Data->getDataKind() == PDB_DataKind::Member) Members.push_back(std::move(Data)); else - Other.push_back(std::move(Child)); + Other.push_back(std::move(Data)); } else if (auto VT = unique_dyn_cast(Child)) VTables.push_back(std::move(VT)); else if (auto Func = unique_dyn_cast(Child)) Funcs.push_back(std::move(Func)); - else + else { Other.push_back(std::move(Child)); + } } // We don't want to have any re-allocations in the list of bases, so make @@ -295,4 +300,4 @@ void UDTLayoutBase::addChildToLayout(std::unique_ptr Child) { } ChildStorage.push_back(std::move(Child)); -} \ No newline at end of file +} diff --git a/interpreter/llvm/src/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/interpreter/llvm/src/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index f672680cb9ea7..2a89faff96470 100644 --- a/interpreter/llvm/src/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -15,12 +15,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolSize.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Error.h" diff --git a/interpreter/llvm/src/lib/DebugInfo/Symbolize/Symbolize.cpp b/interpreter/llvm/src/lib/DebugInfo/Symbolize/Symbolize.cpp index 9de3ddc039d69..19711ca58c6f0 100644 --- a/interpreter/llvm/src/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/interpreter/llvm/src/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -16,6 +16,7 @@ #include "SymbolizableObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/PDB/PDB.h" @@ -24,7 +25,6 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" @@ -39,6 +39,8 @@ #if defined(_MSC_VER) #include + +// This must be included after windows.h. #include #pragma comment(lib, "dbghelp.lib") diff --git a/interpreter/llvm/src/lib/Demangle/ItaniumDemangle.cpp b/interpreter/llvm/src/lib/Demangle/ItaniumDemangle.cpp index 49dbe74d25df8..34f4017d98283 100644 --- a/interpreter/llvm/src/lib/Demangle/ItaniumDemangle.cpp +++ b/interpreter/llvm/src/lib/Demangle/ItaniumDemangle.cpp @@ -1947,7 +1947,7 @@ static const char *parse_type(const char *first, const char *last, C &db) { break; } } - // drop through + // falls through default: // must check for builtin-types before class-enum-types to avoid // ambiguities with operator-names @@ -2525,6 +2525,9 @@ static std::string base_name(std::string &s) { ++p0; break; } + if (!isalpha(*p0) && !isdigit(*p0) && *p0 != '_') { + return std::string(); + } } return std::string(p0, pe); } @@ -2612,39 +2615,45 @@ static const char *parse_unnamed_type_name(const char *first, const char *last, first = t0 + 1; } break; case 'l': { + size_t lambda_pos = db.names.size(); db.names.push_back(std::string("'lambda'(")); const char *t0 = first + 2; if (first[2] == 'v') { db.names.back().first += ')'; ++t0; } else { - const char *t1 = parse_type(t0, last, db); - if (t1 == t0) { - if (!db.names.empty()) - db.names.pop_back(); - return first; - } - if (db.names.size() < 2) - return first; - auto tmp = db.names.back().move_full(); - db.names.pop_back(); - db.names.back().first.append(tmp); - t0 = t1; + bool is_first_it = true; while (true) { - t1 = parse_type(t0, last, db); + long k0 = static_cast(db.names.size()); + const char *t1 = parse_type(t0, last, db); + long k1 = static_cast(db.names.size()); if (t1 == t0) break; - if (db.names.size() < 2) + if (k0 >= k1) return first; - tmp = db.names.back().move_full(); - db.names.pop_back(); - if (!tmp.empty()) { - db.names.back().first.append(", "); - db.names.back().first.append(tmp); - } + // If the call to parse_type above found a pack expansion + // substitution, then multiple names could have been + // inserted into the name table. Walk through the names, + // appending each onto the lambda's parameter list. + std::for_each(db.names.begin() + k0, db.names.begin() + k1, + [&](typename C::sub_type::value_type &pair) { + if (pair.empty()) + return; + auto &lambda = db.names[lambda_pos].first; + if (!is_first_it) + lambda.append(", "); + is_first_it = false; + lambda.append(pair.move_full()); + }); + db.names.erase(db.names.begin() + k0, db.names.end()); t0 = t1; } - if (db.names.empty()) + if (is_first_it) { + if (!db.names.empty()) + db.names.pop_back(); + return first; + } + if (db.names.empty() || db.names.size() - 1 != lambda_pos) return first; db.names.back().first.append(")"); } @@ -4030,6 +4039,8 @@ static const char *parse_encoding(const char *first, const char *last, C &db) { save_value sb(db.tag_templates); if (db.encoding_depth > 1) db.tag_templates = true; + save_value sp(db.parsed_ctor_dtor_cv); + db.parsed_ctor_dtor_cv = false; switch (*first) { case 'G': case 'T': @@ -4229,6 +4240,7 @@ template struct string_pair { template string_pair(const char (&s)[N]) : first(s, N - 1) {} size_t size() const { return first.size() + second.size(); } + bool empty() const { return first.empty() && second.empty(); } StrT full() const { return first + second; } StrT move_full() { return std::move(first) + std::move(second); } }; diff --git a/interpreter/llvm/src/lib/ExecutionEngine/CMakeLists.txt b/interpreter/llvm/src/lib/ExecutionEngine/CMakeLists.txt index 2d9337bbefd2d..84b34919e442b 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/CMakeLists.txt +++ b/interpreter/llvm/src/lib/ExecutionEngine/CMakeLists.txt @@ -14,6 +14,10 @@ add_llvm_library(LLVMExecutionEngine intrinsics_gen ) +if(BUILD_SHARED_LIBS) + target_link_libraries(LLVMExecutionEngine PUBLIC LLVMRuntimeDyld) +endif() + add_subdirectory(Interpreter) add_subdirectory(MCJIT) add_subdirectory(Orc) diff --git a/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 0051c69efb7d7..a7b1fe206f108 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "IntelJITEventsWrapper.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/config.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/ExecutionEngine/JITEventListener.h" diff --git a/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c index e9668892c05bf..f2d36a76a3154 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c +++ b/interpreter/llvm/src/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c @@ -22,8 +22,8 @@ #include #pragma optimize("", off) #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include #include +#include #include #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #include diff --git a/interpreter/llvm/src/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/interpreter/llvm/src/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index ee75bee9c533c..64dca930722e6 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -22,7 +22,7 @@ #include "Interpreter.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Config/config.h" // Detect libffi +#include "llvm/Config/config.h" // Detect libffi #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -33,8 +33,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/UniqueLock.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/interpreter/llvm/src/lib/ExecutionEngine/MCJIT/MCJIT.cpp index ff8749fbfed48..1164d60ffc104 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -317,7 +317,13 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, raw_string_ostream MangledNameStream(MangledName); Mangler::getNameWithPrefix(MangledNameStream, Name, getDataLayout()); } - return findSymbol(MangledName, CheckFunctionsOnly).getAddress(); + if (auto Sym = findSymbol(MangledName, CheckFunctionsOnly)) { + if (auto AddrOrErr = Sym.getAddress()) + return *AddrOrErr; + else + report_fatal_error(AddrOrErr.takeError()); + } else + report_fatal_error(Sym.takeError()); } JITSymbol MCJIT::findSymbol(const std::string &Name, @@ -599,11 +605,12 @@ GenericValue MCJIT::runFunction(Function *F, ArrayRef ArgValues) { void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) { if (!isSymbolSearchingDisabled()) { - void *ptr = - reinterpret_cast( - static_cast(Resolver.findSymbol(Name).getAddress())); - if (ptr) - return ptr; + if (auto Sym = Resolver.findSymbol(Name)) { + if (auto AddrOrErr = Sym.getAddress()) + return reinterpret_cast( + static_cast(*AddrOrErr)); + } else if (auto Err = Sym.takeError()) + report_fatal_error(std::move(Err)); } /// If a LazyFunctionCreator is installed, use it to get/create the function. diff --git a/interpreter/llvm/src/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/interpreter/llvm/src/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 57b5d85bb5500..3581d64583957 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" diff --git a/interpreter/llvm/src/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/interpreter/llvm/src/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 711b887da6ef1..e3a456849f903 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" diff --git a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindings.cpp index 8dcd49aaab5bb..de80cb1d0dd4c 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindings.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindings.cpp @@ -12,6 +12,24 @@ using namespace llvm; +LLVMSharedModuleRef LLVMOrcMakeSharedModule(LLVMModuleRef Mod) { + return wrap(new std::shared_ptr(unwrap(Mod))); +} + +void LLVMOrcDisposeSharedModuleRef(LLVMSharedModuleRef SharedMod) { + delete unwrap(SharedMod); +} + +LLVMSharedObjectBufferRef +LLVMOrcMakeSharedObjectBuffer(LLVMMemoryBufferRef ObjBuffer) { + return wrap(new std::shared_ptr(unwrap(ObjBuffer))); +} + +void +LLVMOrcDisposeSharedObjectBufferRef(LLVMSharedObjectBufferRef SharedObjBuffer) { + delete unwrap(SharedObjBuffer); +} + LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) { TargetMachine *TM2(unwrap(TM)); @@ -42,12 +60,13 @@ void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName, void LLVMOrcDisposeMangledSymbol(char *MangledName) { delete[] MangledName; } -LLVMOrcTargetAddress +LLVMOrcErrorCode LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) { OrcCBindingsStack &J = *unwrap(JITStack); - return J.createLazyCompileCallback(Callback, CallbackCtx); + return J.createLazyCompileCallback(*RetAddr, Callback, CallbackCtx); } LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack, @@ -64,36 +83,44 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, return J.setIndirectStubPointer(StubName, NewAddr); } -LLVMOrcModuleHandle -LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, +LLVMOrcErrorCode +LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, + LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); - Module *M(unwrap(Mod)); - return J.addIRModuleEager(M, SymbolResolver, SymbolResolverCtx); + std::shared_ptr *M(unwrap(Mod)); + return J.addIRModuleEager(*RetHandle, *M, SymbolResolver, SymbolResolverCtx); } -LLVMOrcModuleHandle -LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, +LLVMOrcErrorCode +LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, + LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); - Module *M(unwrap(Mod)); - return J.addIRModuleLazy(M, SymbolResolver, SymbolResolverCtx); + std::shared_ptr *M(unwrap(Mod)); + return J.addIRModuleLazy(*RetHandle, *M, SymbolResolver, SymbolResolverCtx); } -void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) { +LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle H) { OrcCBindingsStack &J = *unwrap(JITStack); - J.removeModule(H); + return J.removeModule(H); } -LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, - const char *SymbolName) { +LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, + const char *SymbolName) { OrcCBindingsStack &J = *unwrap(JITStack); - auto Sym = J.findSymbol(SymbolName, true); - return Sym.getAddress(); + return J.findSymbolAddress(*RetAddr, SymbolName, true); } -void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) { - delete unwrap(JITStack); +LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) { + auto *J = unwrap(JITStack); + auto Err = J->shutdown(); + delete J; + return Err; } diff --git a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index a79dd844bf4f7..e38decf94f3e9 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -1,4 +1,4 @@ -//===--- OrcCBindingsStack.h - Orc JIT stack for C bindings ---*- C++ -*---===// +//===- OrcCBindingsStack.h - Orc JIT stack for C bindings -----*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -11,47 +11,71 @@ #define LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H #include "llvm-c/OrcBindings.h" -#include "llvm/ADT/Triple.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include +#include +#include namespace llvm { class OrcCBindingsStack; +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::shared_ptr, + LLVMSharedModuleRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::shared_ptr, + LLVMSharedObjectBufferRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcCBindingsStack, LLVMOrcJITStackRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) class OrcCBindingsStack { public: - typedef orc::JITCompileCallbackManager CompileCallbackMgr; - typedef orc::RTDyldObjectLinkingLayer<> ObjLayerT; - typedef orc::IRCompileLayer CompileLayerT; - typedef orc::CompileOnDemandLayer - CODLayerT; - typedef std::function()> - CallbackManagerBuilder; + using CompileCallbackMgr = orc::JITCompileCallbackManager; + using ObjLayerT = orc::RTDyldObjectLinkingLayer; + using CompileLayerT = orc::IRCompileLayer; + using CODLayerT = + orc::CompileOnDemandLayer; - typedef CODLayerT::IndirectStubsManagerBuilderT IndirectStubsManagerBuilder; + using CallbackManagerBuilder = + std::function()>; + + using IndirectStubsManagerBuilder = CODLayerT::IndirectStubsManagerBuilderT; private: class GenericHandle { public: - virtual ~GenericHandle() {} + virtual ~GenericHandle() = default; + virtual JITSymbol findSymbolIn(const std::string &Name, bool ExportedSymbolsOnly) = 0; - virtual void removeModule() = 0; + virtual Error removeModule() = 0; }; template class GenericHandleImpl : public GenericHandle { public: - GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) + GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleHandleT Handle) : Layer(Layer), Handle(std::move(Handle)) {} JITSymbol findSymbolIn(const std::string &Name, @@ -59,31 +83,32 @@ class OrcCBindingsStack { return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly); } - void removeModule() override { return Layer.removeModuleSet(Handle); } + Error removeModule() override { return Layer.removeModule(Handle); } private: LayerT &Layer; - typename LayerT::ModuleSetHandleT Handle; + typename LayerT::ModuleHandleT Handle; }; template std::unique_ptr> - createGenericHandle(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) { + createGenericHandle(LayerT &Layer, typename LayerT::ModuleHandleT Handle) { return llvm::make_unique>(Layer, std::move(Handle)); } public: - // We need a 'ModuleSetHandleT' to conform to the layer concept. - typedef unsigned ModuleSetHandleT; - - typedef unsigned ModuleHandleT; + using ModuleHandleT = unsigned; OrcCBindingsStack(TargetMachine &TM, std::unique_ptr CCMgr, IndirectStubsManagerBuilder IndirectStubsMgrBuilder) : DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()), - CCMgr(std::move(CCMgr)), ObjectLayer(), + CCMgr(std::move(CCMgr)), + ObjectLayer( + []() { + return std::make_shared(); + }), CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)), CODLayer(CompileLayer, [](Function &F) { return std::set({&F}); }, @@ -91,12 +116,14 @@ class OrcCBindingsStack { CXXRuntimeOverrides( [this](const std::string &S) { return mangle(S); }) {} - ~OrcCBindingsStack() { + LLVMOrcErrorCode shutdown() { // Run any destructors registered with __cxa_atexit. CXXRuntimeOverrides.runDestructors(); // Run any IR destructors. for (auto &DtorRunner : IRStaticDestructorRunners) - DtorRunner.runViaLayer(*this); + if (auto Err = DtorRunner.runViaLayer(*this)) + return mapError(std::move(Err)); + return LLVMOrcErrSuccess; } std::string mangle(StringRef Name) { @@ -113,14 +140,17 @@ class OrcCBindingsStack { return reinterpret_cast(static_cast(Addr)); } - JITTargetAddress - createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback, + + LLVMOrcErrorCode + createLazyCompileCallback(JITTargetAddress &RetAddr, + LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) { auto CCInfo = CCMgr->getCompileCallback(); CCInfo.setCompileAction([=]() -> JITTargetAddress { return Callback(wrap(this), CallbackCtx); }); - return CCInfo.getAddress(); + RetAddr = CCInfo.getAddress(); + return LLVMOrcErrSuccess; } LLVMOrcErrorCode createIndirectStub(StringRef StubName, @@ -134,12 +164,12 @@ class OrcCBindingsStack { return mapError(IndirectStubsMgr->updatePointer(Name, Addr)); } - std::unique_ptr + std::shared_ptr createResolver(LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return orc::createLambdaResolver( [this, ExternalResolver, ExternalResolverCtx](const std::string &Name) - -> JITSymbol { + -> JITSymbol { // Search order: // 1. JIT'd symbols. // 2. Runtime overrides. @@ -147,26 +177,31 @@ class OrcCBindingsStack { if (auto Sym = CODLayer.findSymbol(Name, true)) return Sym; + else if (auto Err = Sym.takeError()) + return Sym.takeError(); + if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name)) return Sym; if (ExternalResolver) return JITSymbol( ExternalResolver(Name.c_str(), ExternalResolverCtx), - llvm::JITSymbolFlags::Exported); + JITSymbolFlags::Exported); return JITSymbol(nullptr); }, - [](const std::string &Name) { + [](const std::string &Name) -> JITSymbol { return JITSymbol(nullptr); }); } template - ModuleHandleT addIRModule(LayerT &Layer, Module *M, - std::unique_ptr MemMgr, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { + LLVMOrcErrorCode + addIRModule(ModuleHandleT &RetHandle, LayerT &Layer, + std::shared_ptr M, + std::unique_ptr MemMgr, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { // Attach a data-layout if one isn't already present. if (M->getDataLayout().isDefault()) @@ -184,46 +219,52 @@ class OrcCBindingsStack { auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx); // Add the module to the JIT. - std::vector S; - S.push_back(std::move(M)); - - auto LH = Layer.addModuleSet(std::move(S), std::move(MemMgr), - std::move(Resolver)); - ModuleHandleT H = createHandle(Layer, LH); + ModuleHandleT H; + if (auto LHOrErr = Layer.addModule(std::move(M), std::move(Resolver))) + H = createHandle(Layer, *LHOrErr); + else + return mapError(LHOrErr.takeError()); // Run the static constructors, and save the static destructor runner for // execution when the JIT is torn down. orc::CtorDtorRunner CtorRunner(std::move(CtorNames), H); - CtorRunner.runViaLayer(*this); + if (auto Err = CtorRunner.runViaLayer(*this)) + return mapError(std::move(Err)); IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H); - return H; + RetHandle = H; + return LLVMOrcErrSuccess; } - ModuleHandleT addIRModuleEager(Module *M, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { - return addIRModule(CompileLayer, std::move(M), + LLVMOrcErrorCode addIRModuleEager(ModuleHandleT &RetHandle, + std::shared_ptr M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(RetHandle, CompileLayer, std::move(M), llvm::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } - ModuleHandleT addIRModuleLazy(Module *M, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { - return addIRModule(CODLayer, std::move(M), + LLVMOrcErrorCode addIRModuleLazy(ModuleHandleT &RetHandle, + std::shared_ptr M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(RetHandle, CODLayer, std::move(M), llvm::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } - void removeModule(ModuleHandleT H) { - GenericHandles[H]->removeModule(); + LLVMOrcErrorCode removeModule(ModuleHandleT H) { + if (auto Err = GenericHandles[H]->removeModule()) + return mapError(std::move(Err)); GenericHandles[H] = nullptr; FreeHandleIndexes.push_back(H); + return LLVMOrcErrSuccess; } - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { + JITSymbol findSymbol(const std::string &Name, + bool ExportedSymbolsOnly) { if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly)) return Sym; return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly); @@ -234,12 +275,31 @@ class OrcCBindingsStack { return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly); } + LLVMOrcErrorCode findSymbolAddress(JITTargetAddress &RetAddr, + const std::string &Name, + bool ExportedSymbolsOnly) { + RetAddr = 0; + if (auto Sym = findSymbol(Name, ExportedSymbolsOnly)) { + // Successful lookup, non-null symbol: + if (auto AddrOrErr = Sym.getAddress()) { + RetAddr = *AddrOrErr; + return LLVMOrcErrSuccess; + } else + return mapError(AddrOrErr.takeError()); + } else if (auto Err = Sym.takeError()) { + // Lookup failure - report error. + return mapError(std::move(Err)); + } + // Otherwise we had a successful lookup but got a null result. We already + // set RetAddr to '0' above, so just return success. + return LLVMOrcErrSuccess; + } + const std::string &getErrorMessage() const { return ErrMsg; } private: template - unsigned createHandle(LayerT &Layer, - typename LayerT::ModuleSetHandleT Handle) { + unsigned createHandle(LayerT &Layer, typename LayerT::ModuleHandleT Handle) { unsigned NewHandle; if (!FreeHandleIndexes.empty()) { NewHandle = FreeHandleIndexes.back(); diff --git a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcError.cpp b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcError.cpp index 9e70c4ac1dbff..df2d320e0f7aa 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcError.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcError.cpp @@ -45,6 +45,8 @@ class OrcErrorCategory : public std::error_category { return "Could not negotiate RPC function"; case OrcErrorCode::RPCResponseAbandoned: return "RPC response abandoned"; + case OrcErrorCode::JITSymbolNotFound: + return "JIT symbol not found"; case OrcErrorCode::UnexpectedRPCCall: return "Unexpected RPC call"; case OrcErrorCode::UnexpectedRPCResponse: @@ -63,10 +65,29 @@ static ManagedStatic OrcErrCat; namespace llvm { namespace orc { +char JITSymbolNotFound::ID = 0; + std::error_code orcError(OrcErrorCode ErrCode) { typedef std::underlying_type::type UT; return std::error_code(static_cast(ErrCode), *OrcErrCat); } +JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName) + : SymbolName(std::move(SymbolName)) {} + +std::error_code JITSymbolNotFound::convertToErrorCode() const { + typedef std::underlying_type::type UT; + return std::error_code(static_cast(OrcErrorCode::JITSymbolNotFound), + *OrcErrCat); +} + +void JITSymbolNotFound::log(raw_ostream &OS) const { + OS << "Could not find symbol '" << SymbolName << "'"; +} + +const std::string &JITSymbolNotFound::getSymbolName() const { + return SymbolName; +} + } } diff --git a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp index b7a68e041c128..f89f21adff417 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp @@ -124,5 +124,10 @@ OrcMCJITReplacement::runFunction(Function *F, llvm_unreachable("Full-featured argument passing not supported yet!"); } +void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) { + for (auto &M : LocalModules) + ExecutionEngine::runStaticConstructorsDestructors(*M, isDtors); +} + } // End namespace orc. } // End namespace llvm. diff --git a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index a27573f93b97c..346a40405ff18 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/interpreter/llvm/src/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -1,4 +1,4 @@ -//===---- OrcMCJITReplacement.h - Orc based MCJIT replacement ---*- C++ -*-===// +//===- OrcMCJITReplacement.h - Orc based MCJIT replacement ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,13 +20,16 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" @@ -34,10 +37,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include #include #include #include -#include #include #include #include @@ -45,6 +48,9 @@ #include namespace llvm { + +class ObjectCache; + namespace orc { class OrcMCJITReplacement : public ExecutionEngine { @@ -151,7 +157,6 @@ class OrcMCJITReplacement : public ExecutionEngine { }; private: - static ExecutionEngine * createOrcMCJITReplacement(std::string *ErrorMsg, std::shared_ptr MemMgr, @@ -162,24 +167,26 @@ class OrcMCJITReplacement : public ExecutionEngine { } public: - static void Register() { - OrcMCJITReplacementCtor = createOrcMCJITReplacement; - } - OrcMCJITReplacement( std::shared_ptr MemMgr, std::shared_ptr ClientResolver, std::unique_ptr TM) : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)), - MemMgr(*this, std::move(MemMgr)), Resolver(*this), + MemMgr(std::make_shared(*this, + std::move(MemMgr))), + Resolver(std::make_shared(*this)), ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this), NotifyFinalized(*this), - ObjectLayer(NotifyObjectLoaded, NotifyFinalized), + ObjectLayer([this]() { return this->MemMgr; }, NotifyObjectLoaded, + NotifyFinalized), CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)), LazyEmitLayer(CompileLayer) {} - void addModule(std::unique_ptr M) override { + static void Register() { + OrcMCJITReplacementCtor = createOrcMCJITReplacement; + } + void addModule(std::unique_ptr M) override { // If this module doesn't have a DataLayout attached then attach the // default. if (M->getDataLayout().isDefault()) { @@ -187,32 +194,47 @@ class OrcMCJITReplacement : public ExecutionEngine { } else { assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch"); } - Modules.push_back(std::move(M)); - std::vector Ms; - Ms.push_back(&*Modules.back()); - LazyEmitLayer.addModuleSet(std::move(Ms), &MemMgr, &Resolver); + auto *MPtr = M.release(); + ShouldDelete[MPtr] = true; + auto Deleter = [this](Module *Mod) { + auto I = ShouldDelete.find(Mod); + if (I != ShouldDelete.end() && I->second) + delete Mod; + }; + LocalModules.push_back(std::shared_ptr(MPtr, std::move(Deleter))); + cantFail(LazyEmitLayer.addModule(LocalModules.back(), Resolver)); } void addObjectFile(std::unique_ptr O) override { - std::vector> Objs; - Objs.push_back(std::move(O)); - ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); + auto Obj = + std::make_shared>(std::move(O), + nullptr); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); } void addObjectFile(object::OwningBinary O) override { - std::vector>> Objs; - Objs.push_back( - llvm::make_unique>( - std::move(O))); - ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); + auto Obj = + std::make_shared>(std::move(O)); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); } void addArchive(object::OwningBinary A) override { Archives.push_back(std::move(A)); } + + bool removeModule(Module *M) override { + for (auto I = LocalModules.begin(), E = LocalModules.end(); I != E; ++I) { + if (I->get() == M) { + ShouldDelete[M] = false; + LocalModules.erase(I); + return true; + } + } + return false; + } uint64_t getSymbolAddress(StringRef Name) { - return findSymbol(Name).getAddress(); + return cantFail(findSymbol(Name).getAddress()); } JITSymbol findSymbol(StringRef Name) { @@ -256,13 +278,15 @@ class OrcMCJITReplacement : public ExecutionEngine { ArrayRef ArgValues) override; void setObjectCache(ObjectCache *NewCache) override { - CompileLayer.setObjectCache(NewCache); + CompileLayer.getCompiler().setObjectCache(NewCache); } void setProcessAllSections(bool ProcessAllSections) override { ObjectLayer.setProcessAllSections(ProcessAllSections); } + void runStaticConstructorsDestructors(bool isDtors) override; + private: JITSymbol findMangledSymbol(StringRef Name) { if (auto Sym = LazyEmitLayer.findSymbol(Name, false)) @@ -294,10 +318,12 @@ class OrcMCJITReplacement : public ExecutionEngine { } std::unique_ptr &ChildBin = ChildBinOrErr.get(); if (ChildBin->isObject()) { - std::vector> ObjSet; - ObjSet.push_back(std::unique_ptr( - static_cast(ChildBin.release()))); - ObjectLayer.addObjectSet(std::move(ObjSet), &MemMgr, &Resolver); + std::unique_ptr ChildObj( + static_cast(ChildBinOrErr->release())); + auto Obj = + std::make_shared>( + std::move(ChildObj), nullptr); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); if (auto Sym = ObjectLayer.findSymbol(Name, true)) return Sym; } @@ -308,34 +334,19 @@ class OrcMCJITReplacement : public ExecutionEngine { class NotifyObjectLoadedT { public: - typedef std::vector> - LoadedObjInfoListT; + using LoadedObjInfoListT = + std::vector>; NotifyObjectLoadedT(OrcMCJITReplacement &M) : M(M) {} - template - void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H, - const ObjListT &Objects, - const LoadedObjInfoListT &Infos) const { + void operator()(RTDyldObjectLinkingLayerBase::ObjHandleT H, + const RTDyldObjectLinkingLayer::ObjectPtr &Obj, + const LoadedObjectInfo &Info) const { M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad); M.SectionsAllocatedSinceLastLoad = SectionAddrSet(); - assert(Objects.size() == Infos.size() && - "Incorrect number of Infos for Objects."); - for (unsigned I = 0; I < Objects.size(); ++I) - M.MemMgr.notifyObjectLoaded(&M, getObject(*Objects[I])); + M.MemMgr->notifyObjectLoaded(&M, *Obj->getBinary()); } - private: - static const object::ObjectFile& getObject(const object::ObjectFile &Obj) { - return Obj; - } - - template - static const object::ObjectFile& - getObject(const object::OwningBinary &Obj) { - return *Obj.getBinary(); - } - OrcMCJITReplacement &M; }; @@ -343,7 +354,7 @@ class OrcMCJITReplacement : public ExecutionEngine { public: NotifyFinalizedT(OrcMCJITReplacement &M) : M(M) {} - void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H) { + void operator()(RTDyldObjectLinkingLayerBase::ObjHandleT H) { M.UnfinalizedSections.erase(H); } @@ -360,13 +371,13 @@ class OrcMCJITReplacement : public ExecutionEngine { return MangledName; } - typedef RTDyldObjectLinkingLayer ObjectLayerT; - typedef IRCompileLayer CompileLayerT; - typedef LazyEmittingLayer LazyEmitLayerT; + using ObjectLayerT = RTDyldObjectLinkingLayer; + using CompileLayerT = IRCompileLayer; + using LazyEmitLayerT = LazyEmittingLayer; std::unique_ptr TM; - MCJITReplacementMemMgr MemMgr; - LinkingResolver Resolver; + std::shared_ptr MemMgr; + std::shared_ptr Resolver; std::shared_ptr ClientResolver; Mangler Mang; @@ -380,21 +391,24 @@ class OrcMCJITReplacement : public ExecutionEngine { // We need to store ObjLayerT::ObjSetHandles for each of the object sets // that have been emitted but not yet finalized so that we can forward the // mapSectionAddress calls appropriately. - typedef std::set SectionAddrSet; - struct ObjSetHandleCompare { - bool operator()(ObjectLayerT::ObjSetHandleT H1, - ObjectLayerT::ObjSetHandleT H2) const { + using SectionAddrSet = std::set; + struct ObjHandleCompare { + bool operator()(ObjectLayerT::ObjHandleT H1, + ObjectLayerT::ObjHandleT H2) const { return &*H1 < &*H2; } }; SectionAddrSet SectionsAllocatedSinceLastLoad; - std::map + std::map UnfinalizedSections; + std::map ShouldDelete; + std::vector> LocalModules; std::vector> Archives; }; } // end namespace orc + } // end namespace llvm #endif // LLVM_LIB_EXECUTIONENGINE_ORC_MCJITREPLACEMENT_H diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index e9a4b71c903d3..8198836f7a0c9 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "RuntimeDyldCheckerImpl.h" #include "RuntimeDyldCOFF.h" +#include "RuntimeDyldCheckerImpl.h" #include "RuntimeDyldELF.h" #include "RuntimeDyldImpl.h" #include "RuntimeDyldMachO.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MutexGuard.h" @@ -128,7 +128,10 @@ void RuntimeDyldImpl::resolveRelocations() { ); // First, resolve relocations associated with external symbols. - resolveExternalSymbols(); + if (auto Err = resolveExternalSymbols()) { + HasError = true; + ErrorStr = toString(std::move(Err)); + } // Iterate over all outstanding relocations for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) { @@ -243,9 +246,11 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { continue; // Then check the symbol resolver to see if there's a definition // elsewhere in this logical dylib. - if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) + if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) { if (Sym.getFlags().isStrongDefinition()) continue; + } else if (auto Err = Sym.takeError()) + return std::move(Err); // else JITSymFlags &= ~JITSymbolFlags::Weak; } @@ -705,7 +710,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; unsigned PaddingSize = 0; unsigned StubBufSize = 0; - bool IsRequired = isRequiredForExecution(Section) || ProcessAllSections; + bool IsRequired = isRequiredForExecution(Section); bool IsVirtual = Section.isVirtual(); bool IsZeroInit = isZeroInit(Section); bool IsReadOnly = isReadOnlyData(Section); @@ -745,8 +750,8 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, Alignment = std::max(Alignment, getStubAlignment()); // Some sections, such as debug info, don't need to be loaded for execution. - // Leave those where they are. - if (IsRequired) { + // Process those only if explicitly requested. + if (IsRequired || ProcessAllSections) { Allocate = DataSize + PaddingSize + StubBufSize; if (!Allocate) Allocate = 1; @@ -790,6 +795,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, Sections.push_back( SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData)); + // Debug info sections are linked as if their load address was zero + if (!IsRequired) + Sections.back().setLoadAddress(0); + if (Checker) Checker->registerSection(Obj.getFileName(), SectionID); @@ -949,7 +958,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, } } -void RuntimeDyldImpl::resolveExternalSymbols() { +Error RuntimeDyldImpl::resolveExternalSymbols() { while (!ExternalSymbolRelocations.empty()) { StringMap::iterator i = ExternalSymbolRelocations.begin(); @@ -967,10 +976,24 @@ void RuntimeDyldImpl::resolveExternalSymbols() { // This is an external symbol, try to get its address from the symbol // resolver. // First search for the symbol in this logical dylib. - Addr = Resolver.findSymbolInLogicalDylib(Name.data()).getAddress(); + if (auto Sym = Resolver.findSymbolInLogicalDylib(Name.data())) { + if (auto AddrOrErr = Sym.getAddress()) + Addr = *AddrOrErr; + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return Err; + // If that fails, try searching for an external symbol. - if (!Addr) - Addr = Resolver.findSymbol(Name.data()).getAddress(); + if (!Addr) { + if (auto Sym = Resolver.findSymbol(Name.data())) { + if (auto AddrOrErr = Sym.getAddress()) + Addr = *AddrOrErr; + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return Err; + } // The call to getSymbolAddress may have caused additional modules to // be loaded, which may have added new entries to the // ExternalSymbolRelocations map. Consquently, we need to update our @@ -1005,6 +1028,8 @@ void RuntimeDyldImpl::resolveExternalSymbols() { ExternalSymbolRelocations.erase(i); } + + return Error::success(); } //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index 1bd28ef37ed1c..1c54ad6fb03f8 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -27,9 +27,12 @@ using namespace llvm::object; namespace { class LoadedCOFFObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper { + : public LoadedObjectInfoHelper { public: - LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) + LoadedCOFFObjectInfo( + RuntimeDyldImpl &RTDyld, + RuntimeDyld::LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap) : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} OwningBinary diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index e45fdc7aee18a..5bc7434e703f6 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -742,7 +742,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const { if (auto InternalSymbol = getRTDyld().getSymbol(Symbol)) return InternalSymbol.getAddress(); - return getRTDyld().Resolver.findSymbol(Symbol).getAddress(); + return cantFail(getRTDyld().Resolver.findSymbol(Symbol).getAddress()); } uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr, diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 9ce3974529bbd..77c968401c160 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -18,10 +18,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TargetRegistry.h" @@ -78,11 +78,11 @@ template class DyldELFObject : public ELFObjectFile { void updateSymbolAddress(const SymbolRef &SymRef, uint64_t Addr); // Methods for type inquiry through isa, cast and dyn_cast - static inline bool classof(const Binary *v) { + static bool classof(const Binary *v) { return (isa>(v) && classof(cast>(v))); } - static inline bool classof(const ELFObjectFile *v) { + static bool classof(const ELFObjectFile *v) { return v->isDyldType(); } }; @@ -123,7 +123,8 @@ void DyldELFObject::updateSymbolAddress(const SymbolRef &SymRef, } class LoadedELFObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper { + : public LoadedObjectInfoHelper { public: LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 18c23c5a2a5d9..95b04fd932511 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -28,8 +28,8 @@ #include "llvm/Support/Mutex.h" #include "llvm/Support/SwapByteOrder.h" #include -#include #include +#include using namespace llvm; using namespace llvm::object; @@ -417,7 +417,7 @@ class RuntimeDyldImpl { StubMap &Stubs) = 0; /// \brief Resolve relocations to external symbols. - void resolveExternalSymbols(); + Error resolveExternalSymbols(); // \brief Compute an upper bound of the memory that is required to load all // sections diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 00541e8c06fea..80e9c7ac18aac 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -27,7 +27,8 @@ using namespace llvm::object; namespace { class LoadedMachOObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper { + : public LoadedObjectInfoHelper { public: LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h index 6aa1a2bdb9265..901f77865ba18 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h index 318afa21a88b5..3e4b0c8f75bb4 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index 26e73989d7edd..7cbb438541519 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp index cae4d69789a2b..926996d6f7b3c 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "RuntimeDyldELFMips.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" #define DEBUG_TYPE "dyld" diff --git a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index adca0eeb08b4e..43461de4c491e 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/interpreter/llvm/src/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -288,7 +288,6 @@ class RuntimeDyldMachOARM HalfDiffKindBits); addRelocationForSection(R, SectionAID); - addRelocationForSection(R, SectionBID); return ++RelI; } diff --git a/interpreter/llvm/src/lib/ExecutionEngine/SectionMemoryManager.cpp b/interpreter/llvm/src/lib/ExecutionEngine/SectionMemoryManager.cpp index 50478eac6827c..8904475f084f1 100644 --- a/interpreter/llvm/src/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/interpreter/llvm/src/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/Config/config.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Process.h" diff --git a/interpreter/llvm/src/lib/Fuzzer/CMakeLists.txt b/interpreter/llvm/src/lib/Fuzzer/CMakeLists.txt index b886021aee3fd..bc744890b997d 100644 --- a/interpreter/llvm/src/lib/Fuzzer/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Fuzzer/CMakeLists.txt @@ -13,6 +13,7 @@ if( APPLE ) endif() endif() +set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") if( LLVM_USE_SANITIZE_COVERAGE ) if(NOT "${LLVM_USE_SANITIZER}" STREQUAL "Address") message(FATAL_ERROR @@ -20,7 +21,6 @@ if( LLVM_USE_SANITIZE_COVERAGE ) "LLVM_USE_SANITIZE_COVERAGE=YES to be set." ) endif() - set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") # Disable the coverage and sanitizer instrumentation for the fuzzer itself. set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") @@ -46,7 +46,6 @@ if ( LLVM_USE_SANITIZE_COVERAGE OR CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" ) FuzzerShmemPosix.cpp FuzzerShmemWindows.cpp FuzzerTracePC.cpp - FuzzerTraceState.cpp FuzzerUtil.cpp FuzzerUtilDarwin.cpp FuzzerUtilLinux.cpp diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerCorpus.h b/interpreter/llvm/src/lib/Fuzzer/FuzzerCorpus.h index 0f0573994a035..bae0aea78f13a 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerCorpus.h +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerCorpus.h @@ -34,6 +34,8 @@ struct InputInfo { size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; bool MayDeleteFile = false; + bool Reduced = false; + std::vector UniqFeatureSet; }; class InputCorpus { @@ -68,24 +70,72 @@ class InputCorpus { } bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } - void AddToCorpus(const Unit &U, size_t NumFeatures, - bool MayDeleteFile = false) { + void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, + const std::vector &FeatureSet) { assert(!U.empty()); - uint8_t Hash[kSHA1NumBytes]; if (FeatureDebug) Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); - ComputeSHA1(U.data(), U.size(), Hash); - Hashes.insert(Sha1ToString(Hash)); Inputs.push_back(new InputInfo()); InputInfo &II = *Inputs.back(); II.U = U; II.NumFeatures = NumFeatures; II.MayDeleteFile = MayDeleteFile; - memcpy(II.Sha1, Hash, kSHA1NumBytes); + II.UniqFeatureSet = FeatureSet; + std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); + ComputeSHA1(U.data(), U.size(), II.Sha1); + Hashes.insert(Sha1ToString(II.Sha1)); UpdateCorpusDistribution(); + PrintCorpus(); // ValidateFeatureSet(); } + // Debug-only + void PrintUnit(const Unit &U) { + if (!FeatureDebug) return; + for (uint8_t C : U) { + if (C != 'F' && C != 'U' && C != 'Z') + C = '.'; + Printf("%c", C); + } + } + + // Debug-only + void PrintFeatureSet(const std::vector &FeatureSet) { + if (!FeatureDebug) return; + Printf("{"); + for (uint32_t Feature: FeatureSet) + Printf("%u,", Feature); + Printf("}"); + } + + // Debug-only + void PrintCorpus() { + if (!FeatureDebug) return; + Printf("======= CORPUS:\n"); + int i = 0; + for (auto II : Inputs) { + if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) { + Printf("[%2d] ", i); + Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size()); + PrintUnit(II->U); + Printf(" "); + PrintFeatureSet(II->UniqFeatureSet); + Printf("\n"); + } + i++; + } + } + + void Replace(InputInfo *II, const Unit &U) { + assert(II->U.size() > U.size()); + Hashes.erase(Sha1ToString(II->Sha1)); + DeleteFile(*II); + ComputeSHA1(U.data(), U.size(), II->Sha1); + Hashes.insert(Sha1ToString(II->Sha1)); + II->U = U; + II->Reduced = true; + } + bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } bool HasUnit(const std::string &H) { return Hashes.count(H); } InputInfo &ChooseUnitToMutate(Random &Rand) { @@ -124,16 +174,20 @@ class InputCorpus { Printf("\n"); } - void DeleteInput(size_t Idx) { - InputInfo &II = *Inputs[Idx]; + void DeleteFile(const InputInfo &II) { if (!OutputCorpus.empty() && II.MayDeleteFile) RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); + } + + void DeleteInput(size_t Idx) { + InputInfo &II = *Inputs[Idx]; + DeleteFile(II); Unit().swap(II.U); if (FeatureDebug) Printf("EVICTED %zd\n", Idx); } - void AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { + bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { assert(NewSize); Idx = Idx % kFeatureSetSize; uint32_t OldSize = GetFeature(Idx); @@ -153,8 +207,9 @@ class InputCorpus { Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); SmallestElementPerFeature[Idx] = Inputs.size(); InputSizesPerFeature[Idx] = NewSize; - CountingFeatures = true; + return true; } + return false; } size_t NumFeatures() const { return NumAddedFeatures; } @@ -173,7 +228,6 @@ class InputCorpus { size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } void ValidateFeatureSet() { - if (!CountingFeatures) return; if (FeatureDebug) PrintFeatureSet(); for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) @@ -191,14 +245,12 @@ class InputCorpus { // Must be called whenever the corpus or unit weights are changed. void UpdateCorpusDistribution() { size_t N = Inputs.size(); + assert(N); Intervals.resize(N + 1); Weights.resize(N); std::iota(Intervals.begin(), Intervals.end(), 0); - if (CountingFeatures) - for (size_t i = 0; i < N; i++) - Weights[i] = Inputs[i]->NumFeatures * (i + 1); - else - std::iota(Weights.begin(), Weights.end(), 1); + for (size_t i = 0; i < N; i++) + Weights[i] = Inputs[i]->NumFeatures * (i + 1); CorpusDistribution = std::piecewise_constant_distribution( Intervals.begin(), Intervals.end(), Weights.begin()); } @@ -210,7 +262,6 @@ class InputCorpus { std::unordered_set Hashes; std::vector Inputs; - bool CountingFeatures = false; size_t NumAddedFeatures = 0; size_t NumUpdatedFeatures = 0; uint32_t InputSizesPerFeature[kFeatureSetSize]; diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerDriver.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerDriver.cpp index e93c79cfcec6c..fd8cab38a7bb4 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerDriver.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerDriver.cpp @@ -10,9 +10,9 @@ //===----------------------------------------------------------------------===// #include "FuzzerCorpus.h" +#include "FuzzerIO.h" #include "FuzzerInterface.h" #include "FuzzerInternal.h" -#include "FuzzerIO.h" #include "FuzzerMutate.h" #include "FuzzerRandom.h" #include "FuzzerShmem.h" @@ -149,7 +149,7 @@ static bool ParseOneFlag(const char *Param) { int Val = MyStol(Str); *FlagDescriptions[F].IntFlag = Val; if (Flags.verbosity >= 2) - Printf("Flag: %s %d\n", Name, Val);; + Printf("Flag: %s %d\n", Name, Val); return true; } else if (FlagDescriptions[F].UIntFlag) { unsigned int Val = std::stoul(Str); @@ -186,7 +186,11 @@ static void ParseFlags(const std::vector &Args) { } Inputs = new std::vector; for (size_t A = 1; A < Args.size(); A++) { - if (ParseOneFlag(Args[A].c_str())) continue; + if (ParseOneFlag(Args[A].c_str())) { + if (Flags.ignore_remaining_args) + break; + continue; + } Inputs->push_back(Args[A]); } } @@ -265,7 +269,7 @@ int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) { Unit U = FileToVector(InputFilePath); if (MaxLen && MaxLen < U.size()) U.resize(MaxLen); - F->RunOne(U.data(), U.size()); + F->ExecuteCallback(U.data(), U.size()); F->TryDetectingAMemoryLeak(U.data(), U.size(), true); return 0; } @@ -356,16 +360,17 @@ int MinimizeCrashInput(const std::vector &Args, exit(1); } std::string InputFilePath = Inputs->at(0); - std::string BaseCmd = - CloneArgsWithoutX(Args, "minimize_crash", "exact_artifact_path"); - auto InputPos = BaseCmd.find(" " + InputFilePath + " "); + auto BaseCmd = SplitBefore( + "-ignore_remaining_args=1", + CloneArgsWithoutX(Args, "minimize_crash", "exact_artifact_path")); + auto InputPos = BaseCmd.first.find(" " + InputFilePath + " "); assert(InputPos != std::string::npos); - BaseCmd.erase(InputPos, InputFilePath.size() + 1); + BaseCmd.first.erase(InputPos, InputFilePath.size() + 1); if (Flags.runs <= 0 && Flags.max_total_time == 0) { Printf("INFO: you need to specify -runs=N or " "-max_total_time=N with -minimize_crash=1\n" "INFO: defaulting to -max_total_time=600\n"); - BaseCmd += " -max_total_time=600"; + BaseCmd.first += " -max_total_time=600"; } auto LogFilePath = DirPlusFile( @@ -378,7 +383,8 @@ int MinimizeCrashInput(const std::vector &Args, Printf("CRASH_MIN: minimizing crash input: '%s' (%zd bytes)\n", CurrentFilePath.c_str(), U.size()); - auto Cmd = BaseCmd + " " + CurrentFilePath + LogFileRedirect; + auto Cmd = BaseCmd.first + " " + CurrentFilePath + LogFileRedirect + " " + + BaseCmd.second; Printf("CRASH_MIN: executing: %s\n", Cmd.c_str()); int ExitCode = ExecuteCommand(Cmd); @@ -441,7 +447,6 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { Printf("INFO: The input is small enough, exiting\n"); exit(0); } - Corpus->AddToCorpus(U, 0); F->SetMaxInputLen(U.size()); F->SetMaxMutationLen(U.size() - 1); F->MinimizeCrashLoop(U); @@ -553,12 +558,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); const size_t kMaxSaneLen = 1 << 20; - const size_t kMinDefaultLen = 64; + const size_t kMinDefaultLen = 4096; FuzzingOptions Options; Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; Options.ExperimentalLenControl = Flags.experimental_len_control; - if (Flags.experimental_len_control && Flags.max_len == 64) + if (Flags.experimental_len_control && Flags.max_len == kMinDefaultLen) Options.MaxLen = 1 << 20; Options.UnitTimeoutSec = Flags.timeout; Options.ErrorExitCode = Flags.error_exitcode; @@ -572,6 +577,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.UseCmp = Flags.use_cmp; Options.UseValueProfile = Flags.use_value_profile; Options.Shrink = Flags.shrink; + Options.ReduceInputs = Flags.reduce_inputs; Options.ShuffleAtStartUp = Flags.shuffle; Options.PreferSmall = Flags.prefer_small; Options.ReloadIntervalSec = Flags.reload; @@ -657,7 +663,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { size_t Size = SMR.ReadByteArraySize(); SMR.WriteByteArray(nullptr, 0); const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size); - F->RunOne(tmp.data(), tmp.size()); + F->ExecuteCallback(tmp.data(), tmp.size()); SMR.PostServer(); } return 0; diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp index 77521698c80a4..321b3ec5d4140 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp @@ -14,6 +14,8 @@ #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" #include "Windows.h" + +// This must be included after Windows.h. #include "Psapi.h" namespace fuzzer { diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp index 7b02b6f0b701b..503f0395cf8f8 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp @@ -41,7 +41,8 @@ namespace fuzzer { ExternalFunctions::ExternalFunctions() { #define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ this->NAME = ::NAME; \ - CheckFnPtr((void *)::NAME, #NAME, WARN); + CheckFnPtr(reinterpret_cast(reinterpret_cast(::NAME)), \ + #NAME, WARN); #include "FuzzerExtFunctions.def" diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerFlags.def b/interpreter/llvm/src/lib/Fuzzer/FuzzerFlags.def index 7ff196c8fa960..526805705b201 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerFlags.def +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerFlags.def @@ -65,7 +65,9 @@ FUZZER_FLAG_INT(use_memmem, 1, FUZZER_FLAG_INT(use_value_profile, 0, "Experimental. Use value profile to guide fuzzing.") FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations") -FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus elements.") +FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.") +FUZZER_FLAG_INT(reduce_inputs, 0, "Experimental. " + "Try to reduce the size of inputs wile preserving their full feature sets") FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" " this number of jobs in separate worker processes" " with stdout/stderr redirected to fuzz-JOB.log.") @@ -119,6 +121,9 @@ FUZZER_FLAG_STRING(exit_on_src_pos, "Exit if a newly found PC originates" FUZZER_FLAG_STRING(exit_on_item, "Exit if an item with a given sha1 sum" " was added to the corpus. " "Used primarily for testing libFuzzer itself.") +FUZZER_FLAG_INT(ignore_remaining_args, 0, "If 1, ignore all arguments passed " + "after this one. Useful for fuzzers that need to do their own " + "argument parsing.") FUZZER_FLAG_STRING(run_equivalence_server, "Experimental") FUZZER_FLAG_STRING(use_equivalence_server, "Experimental") diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerIOWindows.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerIOWindows.cpp index 75d4e3a06071e..742520267b73f 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerIOWindows.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerIOWindows.cpp @@ -182,7 +182,7 @@ static size_t ParseFileName(const std::string &FileName, const size_t Offset) { return Pos - Offset; } -// Parse a directory ending in separator, like: SomeDir\ +// Parse a directory ending in separator, like: `SomeDir\` // Returns number of characters considered if successful. static size_t ParseDir(const std::string &FileName, const size_t Offset) { size_t Pos = Offset; @@ -197,7 +197,7 @@ static size_t ParseDir(const std::string &FileName, const size_t Offset) { return Pos - Offset; } -// Parse a servername and share, like: SomeServer\SomeShare\ +// Parse a servername and share, like: `SomeServer\SomeShare\` // Returns number of characters considered if successful. static size_t ParseServerAndShare(const std::string &FileName, const size_t Offset) { diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerInternal.h b/interpreter/llvm/src/lib/Fuzzer/FuzzerInternal.h index 5f184c2316e2a..3fc3fe004cef9 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerInternal.h +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerInternal.h @@ -38,7 +38,6 @@ class Fuzzer { void Loop(); void MinimizeCrashLoop(const Unit &U); void ShuffleAndMinimize(UnitVector *V); - void InitializeTraceState(); void RereadOutputCorpus(size_t MaxSize); size_t secondsSinceProcessStartUp() { @@ -65,7 +64,8 @@ class Fuzzer { static void StaticFileSizeExceedCallback(); void ExecuteCallback(const uint8_t *Data, size_t Size); - size_t RunOne(const uint8_t *Data, size_t Size); + bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, + InputInfo *II = nullptr); // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector &Corpora); @@ -95,24 +95,14 @@ class Fuzzer { void InterruptCallback(); void MutateAndTestOne(); void ReportNewCoverage(InputInfo *II, const Unit &U); - size_t RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } + void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size); void WriteToOutputCorpus(const Unit &U); void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0); - void PrintStatusForNewUnit(const Unit &U); + void PrintStatusForNewUnit(const Unit &U, const char *Text); void ShuffleCorpus(UnitVector *V); - void AddToCorpus(const Unit &U); void CheckExitOnSrcPosOrItem(); - // Trace-based fuzzing: we run a unit with some kind of tracing - // enabled and record potentially useful mutations. Then - // We apply these mutations one by one to the unit and run it again. - - // Start tracing; forget all previously proposed mutations. - void StartTraceRecording(); - // Stop tracing. - void StopTraceRecording(); - static void StaticDeathCallback(); void DumpCurrentUnit(const char *Prefix); void DeathCallback(); @@ -142,6 +132,8 @@ class Fuzzer { size_t MaxInputLen = 0; size_t MaxMutationLen = 0; + std::vector UniqFeatureSetTmp; + // Need to know our own thread. static thread_local bool IsMyThread; }; diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerLoop.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerLoop.cpp index 14caa203c5ef6..8ac7a847aef75 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerLoop.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerLoop.cpp @@ -10,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "FuzzerCorpus.h" -#include "FuzzerInternal.h" #include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerMutate.h" #include "FuzzerRandom.h" #include "FuzzerShmem.h" @@ -22,9 +22,6 @@ #include #if defined(__has_include) -#if __has_include() -#include -#endif #if __has_include() #include #endif @@ -117,7 +114,6 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, : CB(CB), Corpus(Corpus), MD(MD), Options(Options) { if (EF->__sanitizer_set_death_callback) EF->__sanitizer_set_death_callback(StaticDeathCallback); - InitializeTraceState(); assert(!F); F = this; TPC.ResetMaps(); @@ -301,7 +297,9 @@ void Fuzzer::SetMaxInputLen(size_t MaxInputLen) { this->MaxInputLen = MaxInputLen; this->MaxMutationLen = MaxInputLen; AllocateCurrentUnitData(); - Printf("INFO: -max_len is not provided, using %zd\n", MaxInputLen); + Printf("INFO: -max_len is not provided; " + "libFuzzer will not generate inputs larger than %zd bytes\n", + MaxInputLen); } void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) { @@ -346,11 +344,8 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { if (U.size() > MaxSize) U.resize(MaxSize); if (!Corpus.HasUnit(U)) { - if (size_t NumFeatures = RunOne(U)) { - CheckExitOnSrcPosOrItem(); - Corpus.AddToCorpus(U, NumFeatures); + if (RunOne(U.data(), U.size())) Reloaded = true; - } } } if (Reloaded) @@ -375,10 +370,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { ExecuteCallback(&dummy, 0); for (const auto &U : *InitialCorpus) { - if (size_t NumFeatures = RunOne(U)) { - CheckExitOnSrcPosOrItem(); - Corpus.AddToCorpus(U, NumFeatures); - } + RunOne(U.data(), U.size()); TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); } @@ -390,18 +382,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { } } -size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) { - if (!Size) return 0; - TotalNumberOfRuns++; - - ExecuteCallback(Data, Size); - - size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); - TPC.CollectFeatures([&](size_t Feature) { - Corpus.AddFeature(Feature, Size, Options.Shrink); - }); - size_t NumUpdatesAfter = Corpus.NumFeatureUpdates(); - +void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { auto TimeOfUnit = duration_cast(UnitStopTime - UnitStartTime).count(); if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && @@ -413,7 +394,41 @@ size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) { Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds); WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-"); } - return NumUpdatesAfter - NumUpdatesBefore; +} + +bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, + InputInfo *II) { + if (!Size) return false; + + ExecuteCallback(Data, Size); + + UniqFeatureSetTmp.clear(); + size_t FoundUniqFeaturesOfII = 0; + size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); + TPC.CollectFeatures([&](size_t Feature) { + if (Corpus.AddFeature(Feature, Size, Options.Shrink)) + UniqFeatureSetTmp.push_back(Feature); + if (Options.ReduceInputs && II) + if (std::binary_search(II->UniqFeatureSet.begin(), + II->UniqFeatureSet.end(), Feature)) + FoundUniqFeaturesOfII++; + }); + PrintPulseAndReportSlowInput(Data, Size); + size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; + if (NumNewFeatures) { + Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, + UniqFeatureSetTmp); + CheckExitOnSrcPosOrItem(); + return true; + } + if (II && FoundUniqFeaturesOfII && + FoundUniqFeaturesOfII == II->UniqFeatureSet.size() && + II->U.size() > Size) { + Corpus.Replace(II, {Data, Data + Size}); + CheckExitOnSrcPosOrItem(); + return true; + } + return false; } size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { @@ -441,6 +456,7 @@ static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { } void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { + TotalNumberOfRuns++; assert(InFuzzingThread()); if (SMR.IsClient()) SMR.WriteByteArray(Data, Size); @@ -491,10 +507,10 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { Printf("Base64: %s\n", Base64(U).c_str()); } -void Fuzzer::PrintStatusForNewUnit(const Unit &U) { +void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) { if (!Options.PrintNEW) return; - PrintStats("NEW ", ""); + PrintStats(Text, ""); if (Options.Verbosity) { Printf(" L: %zd ", U.size()); MD.PrintMutationSequence(); @@ -505,7 +521,8 @@ void Fuzzer::PrintStatusForNewUnit(const Unit &U) { void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) { II->NumSuccessfullMutations++; MD.RecordSuccessfulMutationSequence(); - PrintStatusForNewUnit(U); + PrintStatusForNewUnit(U, II->Reduced ? "REDUCE" : + "NEW "); WriteToOutputCorpus(U); NumberOfNewUnitsAdded++; TPC.PrintNewPCs(); @@ -590,16 +607,10 @@ void Fuzzer::MutateAndTestOne() { assert(NewSize > 0 && "Mutator returned empty unit"); assert(NewSize <= CurrentMaxMutationLen && "Mutator return overisized unit"); Size = NewSize; - if (i == 0) - StartTraceRecording(); II.NumExecutedMutations++; - if (size_t NumFeatures = RunOne(CurrentUnitData, Size)) { - Corpus.AddToCorpus({CurrentUnitData, CurrentUnitData + Size}, NumFeatures, - /*MayDeleteFile=*/true); + if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II)) ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size}); - CheckExitOnSrcPosOrItem(); - } - StopTraceRecording(); + TryDetectingAMemoryLeak(CurrentUnitData, Size, /*DuringInitialCorpusExecution*/ false); } @@ -636,7 +647,8 @@ void Fuzzer::MinimizeCrashLoop(const Unit &U) { for (int i = 0; i < Options.MutateDepth; i++) { size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen); assert(NewSize > 0 && NewSize <= MaxMutationLen); - RunOne(CurrentUnitData, NewSize); + ExecuteCallback(CurrentUnitData, NewSize); + PrintPulseAndReportSlowInput(CurrentUnitData, NewSize); TryDetectingAMemoryLeak(CurrentUnitData, NewSize, /*DuringInitialCorpusExecution*/ false); } diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerMerge.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerMerge.cpp index e66460c29e2f8..616c0999aa39d 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerMerge.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerMerge.cpp @@ -9,9 +9,9 @@ // Merging corpora. //===----------------------------------------------------------------------===// -#include "FuzzerInternal.h" -#include "FuzzerIO.h" #include "FuzzerMerge.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerTracePC.h" #include "FuzzerUtil.h" @@ -241,7 +241,6 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { return true; }); // Show stats. - TotalNumberOfRuns++; if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) PrintStats("pulse "); // Write the post-run marker and the coverage. @@ -286,12 +285,13 @@ void Fuzzer::CrashResistantMerge(const std::vector &Args, // Execute the inner process untill it passes. // Every inner process should execute at least one input. - std::string BaseCmd = CloneArgsWithoutX(Args, "keep-all-flags"); + auto BaseCmd = SplitBefore("-ignore_remaining_args=1", + CloneArgsWithoutX(Args, "keep-all-flags")); bool Success = false; for (size_t i = 1; i <= AllFiles.size(); i++) { Printf("MERGE-OUTER: attempt %zd\n", i); - auto ExitCode = - ExecuteCommand(BaseCmd + " -merge_control_file=" + CFPath); + auto ExitCode = ExecuteCommand(BaseCmd.first + " -merge_control_file=" + + CFPath + " " + BaseCmd.second); if (!ExitCode) { Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", i); Success = true; diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.cpp index a84e6eeb42a48..5998ef9d3193d 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.cpp @@ -9,11 +9,11 @@ // Mutate a test input. //===----------------------------------------------------------------------===// +#include "FuzzerMutate.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" -#include "FuzzerMutate.h" #include "FuzzerOptions.h" namespace fuzzer { @@ -43,8 +43,6 @@ MutationDispatcher::MutationDispatcher(Random &Rand, {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, {&MutationDispatcher::Mutate_AddWordFromManualDictionary, "ManualDict"}, - {&MutationDispatcher::Mutate_AddWordFromTemporaryAutoDictionary, - "TempAutoDict"}, {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, "PersAutoDict"}, }); @@ -165,11 +163,6 @@ size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); } -size_t MutationDispatcher::Mutate_AddWordFromTemporaryAutoDictionary( - uint8_t *Data, size_t Size, size_t MaxSize) { - return AddWordFromDictionary(TempAutoDictionary, Data, Size, MaxSize); -} - size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, DictionaryEntry &DE) { @@ -222,7 +215,7 @@ DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( if (!Cur) break; Positions[NumPositions++] = Cur - Data; } - if (!NumPositions) break; + if (!NumPositions) continue; return DictionaryEntry(W, Positions[Rand(NumPositions)]); } DictionaryEntry DE(W); @@ -251,7 +244,7 @@ size_t MutationDispatcher::Mutate_AddWordFromTORC( uint8_t *Data, size_t Size, size_t MaxSize) { Word W; DictionaryEntry DE; - switch (Rand(3)) { + switch (Rand(4)) { case 0: { auto X = TPC.TORC8.Get(Rand.Rand()); DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); @@ -267,6 +260,10 @@ size_t MutationDispatcher::Mutate_AddWordFromTORC( auto X = TPC.TORCW.Get(Rand.Rand()); DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); } break; + case 3: if (Options.UseMemmem) { + auto X = TPC.MMT.Get(Rand.Rand()); + DE = DictionaryEntry(X); + } break; default: assert(0); } @@ -533,14 +530,4 @@ void MutationDispatcher::AddWordToManualDictionary(const Word &W) { {W, std::numeric_limits::max()}); } -void MutationDispatcher::AddWordToAutoDictionary(DictionaryEntry DE) { - static const size_t kMaxAutoDictSize = 1 << 14; - if (TempAutoDictionary.size() >= kMaxAutoDictSize) return; - TempAutoDictionary.push_back(DE); -} - -void MutationDispatcher::ClearAutoDictionary() { - TempAutoDictionary.clear(); -} - } // namespace fuzzer diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.h b/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.h index 8c8fb3fd74c7b..84b04c0dbf3ea 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.h +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerMutate.h @@ -52,10 +52,6 @@ class MutationDispatcher { size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by adding a word from the temporary automatic dictionary. - size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); - /// Mutates data by adding a word from the TORC. size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); @@ -84,8 +80,6 @@ class MutationDispatcher { void AddWordToManualDictionary(const Word &W); - void AddWordToAutoDictionary(DictionaryEntry DE); - void ClearAutoDictionary(); void PrintRecommendedDictionary(); void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; } diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerOptions.h b/interpreter/llvm/src/lib/Fuzzer/FuzzerOptions.h index b1366789be007..9500235e2b1f3 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerOptions.h +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerOptions.h @@ -32,6 +32,7 @@ struct FuzzingOptions { bool UseCmp = false; bool UseValueProfile = false; bool Shrink = false; + bool ReduceInputs = false; int ReloadIntervalSec = 1; bool ShuffleAtStartUp = true; bool PreferSmall = true; diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemPosix.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemPosix.cpp index 2723bdd86f487..50cdcfb509dc2 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemPosix.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemPosix.cpp @@ -14,14 +14,14 @@ #include "FuzzerIO.h" #include "FuzzerShmem.h" -#include -#include #include #include -#include #include #include #include +#include +#include +#include #include namespace fuzzer { diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemWindows.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemWindows.cpp index 6325b4b8e5b41..d330ebf4fd07a 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemWindows.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerShmemWindows.cpp @@ -14,10 +14,10 @@ #include "FuzzerIO.h" #include "FuzzerShmem.h" -#include -#include #include #include +#include +#include namespace fuzzer { diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.cpp index ce0f7a47eee64..ced0a21333408 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "FuzzerTracePC.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" #include "FuzzerDictionary.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" -#include "FuzzerTracePC.h" #include "FuzzerUtil.h" #include "FuzzerValueBitMap.h" #include @@ -37,6 +37,8 @@ namespace fuzzer { TracePC TPC; +int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr; + uint8_t *TracePC::Counters() const { return __sancov_trace_pc_guard_8bit_counters; } @@ -53,6 +55,17 @@ size_t TracePC::GetTotalPCCoverage() { return Res; } + +void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) { + if (Start == Stop) return; + if (NumModulesWithInline8bitCounters && + ModuleCounters[NumModulesWithInline8bitCounters-1].Start == Start) return; + assert(NumModulesWithInline8bitCounters < + sizeof(ModuleCounters) / sizeof(ModuleCounters[0])); + ModuleCounters[NumModulesWithInline8bitCounters++] = {Start, Stop}; + NumInline8bitCounters += Stop - Start; +} + void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) { if (Start == Stop || *Start) return; assert(NumModules < sizeof(Modules) / sizeof(Modules[0])); @@ -76,6 +89,13 @@ void TracePC::PrintModuleInfo() { for (size_t i = 0; i < NumModules; i++) Printf("[%p, %p), ", Modules[i].Start, Modules[i].Stop); Printf("\n"); + if (NumModulesWithInline8bitCounters) { + Printf("INFO: Loaded %zd modules with %zd inline 8-bit counters\n", + NumModulesWithInline8bitCounters, NumInline8bitCounters); + for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) + Printf("[%p, %p), ", ModuleCounters[i].Start, ModuleCounters[i].Stop); + Printf("\n"); + } } ATTRIBUTE_NO_SANITIZE_ALL @@ -275,6 +295,20 @@ void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) { ValueProfileMap.AddValue(Idx); } +static size_t InternalStrnlen(const char *S, size_t MaxLen) { + size_t Len = 0; + for (; Len < MaxLen && S[Len]; Len++) {} + return Len; +} + +// Finds min of (strlen(S1), strlen(S2)). +// Needed bacause one of these strings may actually be non-zero terminated. +static size_t InternalStrnlen2(const char *S1, const char *S2) { + size_t Len = 0; + for (; S1[Len] && S2[Len]; Len++) {} + return Len; +} + } // namespace fuzzer extern "C" { @@ -303,6 +337,11 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) { fuzzer::TPC.HandleInit(Start, Stop); } +ATTRIBUTE_INTERFACE +void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) { + fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop); +} + ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_ALL void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) { @@ -392,4 +431,71 @@ void __sanitizer_cov_trace_gep(uintptr_t Idx) { uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); fuzzer::TPC.HandleCmp(PC, Idx, (uintptr_t)0); } + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1, + const void *s2, size_t n, int result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + if (result == 0) return; // No reason to mutate. + if (n <= 1) return; // Not interesting. + fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1, + const char *s2, size_t n, int result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + if (result == 0) return; // No reason to mutate. + size_t Len1 = fuzzer::InternalStrnlen(s1, n); + size_t Len2 = fuzzer::InternalStrnlen(s2, n); + n = std::min(n, Len1); + n = std::min(n, Len2); + if (n <= 1) return; // Not interesting. + fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1, + const char *s2, int result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + if (result == 0) return; // No reason to mutate. + size_t N = fuzzer::InternalStrnlen2(s1, s2); + if (N <= 1) return; // Not interesting. + fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1, + const char *s2, size_t n, int result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1, + const char *s2, int result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1, + const char *s2, char *result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + fuzzer::TPC.MMT.Add(reinterpret_cast(s2), strlen(s2)); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1, + const char *s2, char *result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + fuzzer::TPC.MMT.Add(reinterpret_cast(s2), strlen(s2)); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, + const void *s2, size_t len2, void *result) { + if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; + fuzzer::TPC.MMT.Add(reinterpret_cast(s2), len2); +} } // extern "C" diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.h b/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.h index 6523fa06005c4..b36c4f54306cb 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.h +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerTracePC.h @@ -45,13 +45,36 @@ struct TableOfRecentCompares { Pair Table[kSize]; }; +template +struct MemMemTable { + static const size_t kSize = kSizeT; + Word MemMemWords[kSize]; + Word EmptyWord; + + void Add(const uint8_t *Data, size_t Size) { + if (Size <= 2) return; + Size = std::min(Size, Word::GetMaxSize()); + size_t Idx = SimpleFastHash(Data, Size) % kSize; + MemMemWords[Idx].Set(Data, Size); + } + const Word &Get(size_t Idx) { + for (size_t i = 0; i < kSize; i++) { + const Word &W = MemMemWords[(Idx + i) % kSize]; + if (W.size()) return W; + } + EmptyWord.Set(nullptr, 0); + return EmptyWord; + } +}; + class TracePC { public: static const size_t kNumPCs = 1 << 21; // How many bits of PC are used from __sanitizer_cov_trace_pc. static const size_t kTracePcBits = 18; - void HandleInit(uint32_t *start, uint32_t *stop); + void HandleInit(uint32_t *Start, uint32_t *Stop); + void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop); void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee); template void HandleCmp(uintptr_t PC, T Arg1, T Arg2); size_t GetTotalPCCoverage(); @@ -80,6 +103,7 @@ class TracePC { TableOfRecentCompares TORC4; TableOfRecentCompares TORC8; TableOfRecentCompares TORCW; + MemMemTable<1024> MMT; void PrintNewPCs(); void InitializePrintNewPCs(); @@ -104,6 +128,10 @@ class TracePC { size_t NumModules; // linker-initialized. size_t NumGuards; // linker-initialized. + struct { uint8_t *Start, *Stop; } ModuleCounters[4096]; + size_t NumModulesWithInline8bitCounters; // linker-initialized. + size_t NumInline8bitCounters; + uint8_t *Counters() const; uintptr_t *PCs() const; @@ -118,12 +146,24 @@ void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End, size_t FirstFeature, Callback Handle8bitCounter) { typedef uintptr_t LargeType; const size_t Step = sizeof(LargeType) / sizeof(uint8_t); - assert(!(reinterpret_cast(Begin) % 64)); - for (auto P = Begin; P < End; P += Step) + const size_t StepMask = Step - 1; + auto P = Begin; + // Iterate by 1 byte until either the alignment boundary or the end. + for (; reinterpret_cast(P) & StepMask && P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature + P - Begin, V); + + // Iterate by Step bytes at a time. + for (; P < End; P += Step) if (LargeType Bundle = *reinterpret_cast(P)) for (size_t I = 0; I < Step; I++, Bundle >>= 8) if (uint8_t V = Bundle & 0xff) Handle8bitCounter(FirstFeature + P - Begin + I, V); + + // Iterate by 1 byte until the end. + for (; P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature + P - Begin, V); } template // bool Callback(size_t Feature) @@ -145,8 +185,16 @@ void TracePC::CollectFeatures(Callback HandleFeature) const { HandleFeature(Idx * 8 + Bit); }; - ForEachNonZeroByte(Counters, Counters + N, 0, Handle8bitCounter); - ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), N * 8, + size_t FirstFeature = 0; + ForEachNonZeroByte(Counters, Counters + N, FirstFeature, Handle8bitCounter); + FirstFeature += N * 8; + for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { + ForEachNonZeroByte(ModuleCounters[i].Start, ModuleCounters[i].Stop, + FirstFeature, Handle8bitCounter); + FirstFeature += 8 * (ModuleCounters[i].Stop - ModuleCounters[i].Start); + } + + ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), FirstFeature, Handle8bitCounter); if (UseValueProfile) diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.cpp index f5fd3a85187ce..2d95f40e46a14 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.cpp @@ -215,4 +215,11 @@ bool ExecuteCommandAndReadOutput(const std::string &Command, std::string *Out) { return true; } +size_t SimpleFastHash(const uint8_t *Data, size_t Size) { + size_t Res = 0; + for (size_t i = 0; i < Size; i++) + Res = Res * 11 + Data[i]; + return Res; +} + } // namespace fuzzer diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.h b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.h index f84fd9ef0fcea..62d6e61dcf171 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.h +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtil.h @@ -67,10 +67,20 @@ inline std::string CloneArgsWithoutX(const std::vector &Args, return CloneArgsWithoutX(Args, X, X); } +inline std::pair SplitBefore(std::string X, + std::string S) { + auto Pos = S.find(X); + if (Pos == std::string::npos) + return std::make_pair(S, ""); + return std::make_pair(S.substr(0, Pos), S.substr(Pos)); +} + std::string DisassembleCmd(const std::string &FileName); std::string SearchRegexCmd(const std::string &Regex); +size_t SimpleFastHash(const uint8_t *Data, size_t Size); + } // namespace fuzzer #endif // LLVM_FUZZER_UTIL_H diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilDarwin.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilDarwin.cpp index 9674368c355ee..2df4872a92069 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilDarwin.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilDarwin.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include // There is no header for this on macOS so declare here @@ -97,11 +99,16 @@ int ExecuteCommand(const std::string &Command) { pid_t Pid; char **Environ = environ; // Read from global const char *CommandCStr = Command.c_str(); - const char *Argv[] = {"sh", "-c", CommandCStr, NULL}; + char *const Argv[] = { + strdup("sh"), + strdup("-c"), + strdup(CommandCStr), + NULL + }; int ErrorCode = 0, ProcessStatus = 0; // FIXME: We probably shouldn't hardcode the shell path. ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes, - (char *const *)Argv, Environ); + Argv, Environ); (void)posix_spawnattr_destroy(&SpawnAttributes); if (!ErrorCode) { pid_t SavedPid = Pid; @@ -120,6 +127,8 @@ int ExecuteCommand(const std::string &Command) { // Shell execution failure. ProcessStatus = W_EXITCODE(127, 0); } + for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i) + free(Argv[i]); // Restore the signal handlers of the current process when the last thread // using this function finishes. diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilPosix.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilPosix.cpp index 0161309fbf86b..bc85264ac187d 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilPosix.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilPosix.cpp @@ -47,8 +47,21 @@ static void FileSizeExceedHandler(int, siginfo_t *, void *) { static void SetSigaction(int signum, void (*callback)(int, siginfo_t *, void *)) { - struct sigaction sigact; - memset(&sigact, 0, sizeof(sigact)); + struct sigaction sigact = {}; + if (sigaction(signum, nullptr, &sigact)) { + Printf("libFuzzer: sigaction failed with %d\n", errno); + exit(1); + } + if (sigact.sa_flags & SA_SIGINFO) { + if (sigact.sa_sigaction) + return; + } else { + if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN && + sigact.sa_handler != SIG_ERR) + return; + } + + sigact = {}; sigact.sa_sigaction = callback; if (sigaction(signum, &sigact, 0)) { Printf("libFuzzer: sigaction failed with %d\n", errno); diff --git a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilWindows.cpp b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilWindows.cpp index 08bb3cf3be157..25ac976fc2dbb 100644 --- a/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilWindows.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/FuzzerUtilWindows.cpp @@ -22,6 +22,8 @@ #include #include #include + +// This must be included after windows.h. #include namespace fuzzer { diff --git a/interpreter/llvm/src/lib/Fuzzer/afl/afl_driver.cpp b/interpreter/llvm/src/lib/Fuzzer/afl/afl_driver.cpp index b3a54e57fcebd..15bceb896e175 100644 --- a/interpreter/llvm/src/lib/Fuzzer/afl/afl_driver.cpp +++ b/interpreter/llvm/src/lib/Fuzzer/afl/afl_driver.cpp @@ -12,8 +12,8 @@ Usage: ################################################################################ cat << EOF > test_fuzzer.cc -#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size > 0 && data[0] == 'H') if (size > 1 && data[1] == 'I') @@ -22,8 +22,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { return 0; } EOF -# Build your target with -fsanitize-coverage=trace-pc using fresh clang. -clang -g -fsanitize-coverage=trace-pc test_fuzzer.cc -c +# Build your target with -fsanitize-coverage=trace-pc-guard using fresh clang. +clang -g -fsanitize-coverage=trace-pc-guard test_fuzzer.cc -c # Build afl-llvm-rt.o.c from the AFL distribution. clang -c -w $AFL_HOME/llvm_mode/afl-llvm-rt.o.c # Build this file, link it with afl-llvm-rt.o.o and the target code. @@ -50,15 +50,20 @@ statistics from the file. If that fails then the process will quit. */ #include -#include +#include +#include #include +#include #include #include -#include -#include -#include #include #include +#include + +#include +#include +#include + // Platform detection. Copied from FuzzerInternal.h #ifdef __linux__ #define LIBFUZZER_LINUX 1 @@ -245,17 +250,39 @@ extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { return 0; } +// Execute any files provided as parameters. +int ExecuteFilesOnyByOne(int argc, char **argv) { + for (int i = 1; i < argc; i++) { + std::ifstream in(argv[i]); + in.seekg(0, in.end); + size_t length = in.tellg(); + in.seekg (0, in.beg); + std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl; + // Allocate exactly length bytes so that we reliably catch buffer overflows. + std::vector bytes(length); + in.read(bytes.data(), bytes.size()); + assert(in); + LLVMFuzzerTestOneInput(reinterpret_cast(bytes.data()), + bytes.size()); + std::cout << "Execution successfull" << std::endl; + } + return 0; +} + int main(int argc, char **argv) { - fprintf(stderr, "======================= INFO =========================\n" - "This binary is built for AFL-fuzz.\n" - "To run the target function on a single input execute this:\n" - " %s < INPUT_FILE\n" - "To run the fuzzing execute this:\n" - " afl-fuzz [afl-flags] %s [N] " - "-- run N fuzzing iterations before " - "re-spawning the process (default: 1000)\n" - "======================================================\n", - argv[0], argv[0]); + fprintf(stderr, + "======================= INFO =========================\n" + "This binary is built for AFL-fuzz.\n" + "To run the target function on individual input(s) execute this:\n" + " %s < INPUT_FILE\n" + "or\n" + " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" + "To fuzz with afl-fuzz execute this:\n" + " afl-fuzz [afl-flags] %s [-N]\n" + "afl-fuzz will run N iterations before " + "re-spawning the process (default: 1000)\n" + "======================================================\n", + argv[0], argv[0], argv[0]); if (LLVMFuzzerInitialize) LLVMFuzzerInitialize(&argc, &argv); // Do any other expensive one-time initialization here. @@ -266,8 +293,14 @@ int main(int argc, char **argv) { __afl_manual_init(); int N = 1000; - if (argc >= 2) - N = atoi(argv[1]); + if (argc == 2 && argv[1][0] == '-') + N = atoi(argv[1] + 1); + else if(argc == 2 && (N = atoi(argv[1])) > 0) + fprintf(stderr, "WARNING: using the deprecated call style `%s %d`\n", + argv[0], N); + else if (argc > 1) + return ExecuteFilesOnyByOne(argc, argv); + assert(N > 0); time_t unit_time_secs; int num_runs = 0; diff --git a/interpreter/llvm/src/lib/IR/AsmWriter.cpp b/interpreter/llvm/src/lib/IR/AsmWriter.cpp index ec4663018bd4b..170bc544d53f8 100644 --- a/interpreter/llvm/src/lib/IR/AsmWriter.cpp +++ b/interpreter/llvm/src/lib/IR/AsmWriter.cpp @@ -1,5 +1,4 @@ - -//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===// +//===- AsmWriter.cpp - Printing LLVM as an assembly file ------------------===// // // The LLVM Compiler Infrastructure // @@ -15,63 +14,105 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalIndirectSymbol.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" +#include "llvm/IR/Use.h" #include "llvm/IR/UseListOrder.h" -#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; // Make virtual table appear in this compilation unit. -AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} +AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default; //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// namespace { + struct OrderMap { DenseMap> IDs; unsigned size() const { return IDs.size(); } std::pair &operator[](const Value *V) { return IDs[V]; } + std::pair lookup(const Value *V) const { return IDs.lookup(V); } + void index(const Value *V) { // Explicitly sequence get-size and insert-value operations to avoid UB. unsigned ID = IDs.size() + 1; IDs[V].first = ID; } }; -} + +} // end anonymous namespace static void orderValue(const Value *V, OrderMap &OM) { if (OM.lookup(V).first) @@ -139,7 +180,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F, unsigned ID, const OrderMap &OM, UseListOrderStack &Stack) { // Predict use-list order for this one. - typedef std::pair Entry; + using Entry = std::pair; SmallVector List; for (const Use &U : V->uses()) // Check if this user will be serialized. @@ -324,7 +365,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break; case CallingConv::PTX_Device: Out << "ptx_device"; break; case CallingConv::X86_64_SysV: Out << "x86_64_sysvcc"; break; - case CallingConv::X86_64_Win64: Out << "x86_64_win64cc"; break; + case CallingConv::Win64: Out << "win64cc"; break; case CallingConv::SPIR_FUNC: Out << "spir_func"; break; case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; case CallingConv::Swift: Out << "swiftcc"; break; @@ -421,13 +462,10 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) { isa(V) ? GlobalPrefix : LocalPrefix); } - namespace { + class TypePrinting { - TypePrinting(const TypePrinting &) = delete; - void operator=(const TypePrinting&) = delete; public: - /// NamedTypes - The named types that are used by the current module. TypeFinder NamedTypes; @@ -435,6 +473,8 @@ class TypePrinting { DenseMap NumberedTypes; TypePrinting() = default; + TypePrinting(const TypePrinting &) = delete; + TypePrinting &operator=(const TypePrinting &) = delete; void incorporateTypes(const Module &M); @@ -442,7 +482,8 @@ class TypePrinting { void printStructBody(StructType *Ty, raw_ostream &OS); }; -} // namespace + +} // end anonymous namespace void TypePrinting::incorporateTypes(const Module &M) { NamedTypes.run(M, false); @@ -574,6 +615,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { } namespace llvm { + //===----------------------------------------------------------------------===// // SlotTracker Class: Enumerate slot numbers for unnamed values //===----------------------------------------------------------------------===// @@ -582,32 +624,33 @@ namespace llvm { class SlotTracker { public: /// ValueMap - A mapping of Values to slot numbers. - typedef DenseMap ValueMap; + using ValueMap = DenseMap; private: /// TheModule - The module for which we are holding slot numbers. const Module* TheModule; /// TheFunction - The function for which we are holding slot numbers. - const Function* TheFunction; - bool FunctionProcessed; + const Function* TheFunction = nullptr; + bool FunctionProcessed = false; bool ShouldInitializeAllMetadata; /// mMap - The slot map for the module level data. ValueMap mMap; - unsigned mNext; + unsigned mNext = 0; /// fMap - The slot map for the function level data. ValueMap fMap; - unsigned fNext; + unsigned fNext = 0; /// mdnMap - Map for MDNodes. DenseMap mdnMap; - unsigned mdnNext; + unsigned mdnNext = 0; /// asMap - The slot map for attribute sets. DenseMap asMap; - unsigned asNext; + unsigned asNext = 0; + public: /// Construct from a module. /// @@ -616,6 +659,7 @@ class SlotTracker { /// within a function (even if no functions have been initialized). explicit SlotTracker(const Module *M, bool ShouldInitializeAllMetadata = false); + /// Construct from a function, starting out in incorp state. /// /// If \c ShouldInitializeAllMetadata, initializes all metadata in all @@ -624,6 +668,9 @@ class SlotTracker { explicit SlotTracker(const Function *F, bool ShouldInitializeAllMetadata = false); + SlotTracker(const SlotTracker &) = delete; + SlotTracker &operator=(const SlotTracker &) = delete; + /// Return the slot number of the specified value in it's type /// plane. If something is not in the SlotTracker, return -1. int getLocalSlot(const Value *V); @@ -646,14 +693,16 @@ class SlotTracker { void purgeFunction(); /// MDNode map iterators. - typedef DenseMap::iterator mdn_iterator; + using mdn_iterator = DenseMap::iterator; + mdn_iterator mdn_begin() { return mdnMap.begin(); } mdn_iterator mdn_end() { return mdnMap.end(); } unsigned mdn_size() const { return mdnMap.size(); } bool mdn_empty() const { return mdnMap.empty(); } /// AttributeSet map iterators. - typedef DenseMap::iterator as_iterator; + using as_iterator = DenseMap::iterator; + as_iterator as_begin() { return asMap.begin(); } as_iterator as_end() { return asMap.end(); } unsigned as_size() const { return asMap.size(); } @@ -691,11 +740,9 @@ class SlotTracker { /// Add all of the metadata from an instruction. void processInstructionMetadata(const Instruction &I); - - SlotTracker(const SlotTracker &) = delete; - void operator=(const SlotTracker &) = delete; }; -} // namespace llvm + +} // end namespace llvm ModuleSlotTracker::ModuleSlotTracker(SlotTracker &Machine, const Module *M, const Function *F) @@ -706,7 +753,7 @@ ModuleSlotTracker::ModuleSlotTracker(const Module *M, : ShouldCreateStorage(M), ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), M(M) {} -ModuleSlotTracker::~ModuleSlotTracker() {} +ModuleSlotTracker::~ModuleSlotTracker() = default; SlotTracker *ModuleSlotTracker::getMachine() { if (!ShouldCreateStorage) @@ -773,17 +820,13 @@ static SlotTracker *createSlotTracker(const Value *V) { // Module level constructor. Causes the contents of the Module (sans functions) // to be added to the slot table. SlotTracker::SlotTracker(const Module *M, bool ShouldInitializeAllMetadata) - : TheModule(M), TheFunction(nullptr), FunctionProcessed(false), - ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0), - fNext(0), mdnNext(0), asNext(0) {} + : TheModule(M), ShouldInitializeAllMetadata(ShouldInitializeAllMetadata) {} // Function level constructor. Causes the contents of the Module and the one // function provided to be added to the slot table. SlotTracker::SlotTracker(const Function *F, bool ShouldInitializeAllMetadata) : TheModule(F ? F->getParent() : nullptr), TheFunction(F), - FunctionProcessed(false), - ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0), - fNext(0), mdnNext(0), asNext(0) {} + ShouldInitializeAllMetadata(ShouldInitializeAllMetadata) {} inline void SlotTracker::initialize() { if (TheModule) { @@ -949,7 +992,6 @@ int SlotTracker::getMetadataSlot(const MDNode *N) { return MI == mdnMap.end() ? -1 : (int)MI->second; } - /// getLocalSlot - Get the slot number for a value that is local to a function. int SlotTracker::getLocalSlot(const Value *V) { assert(!isa(V) && "Can't get a constant or global slot with this!"); @@ -1248,7 +1290,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, return; } - if (const ConstantStruct *CS = dyn_cast(CV)) { if (CS->getType()->isPacked()) Out << '<'; @@ -1381,11 +1422,14 @@ static void writeMDTuple(raw_ostream &Out, const MDTuple *Node, } namespace { + struct FieldSeparator { - bool Skip; + bool Skip = true; const char *Sep; - FieldSeparator(const char *Sep = ", ") : Skip(true), Sep(Sep) {} + + FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} }; + raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { if (FS.Skip) { FS.Skip = false; @@ -1393,19 +1437,20 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { } return OS << FS.Sep; } + struct MDFieldPrinter { raw_ostream &Out; FieldSeparator FS; - TypePrinting *TypePrinter; - SlotTracker *Machine; - const Module *Context; + TypePrinting *TypePrinter = nullptr; + SlotTracker *Machine = nullptr; + const Module *Context = nullptr; - explicit MDFieldPrinter(raw_ostream &Out) - : Out(Out), TypePrinter(nullptr), Machine(nullptr), Context(nullptr) {} + explicit MDFieldPrinter(raw_ostream &Out) : Out(Out) {} MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) { } + void printTag(const DINode *N); void printMacinfoType(const DIMacroNode *N); void printChecksumKind(const DIFile *N); @@ -1422,7 +1467,8 @@ struct MDFieldPrinter { bool ShouldSkipZero = true); void printEmissionKind(StringRef Name, DICompileUnit::DebugEmissionKind EK); }; -} // end namespace + +} // end anonymous namespace void MDFieldPrinter::printTag(const DINode *N) { Out << FS << "tag: "; @@ -1518,7 +1564,6 @@ void MDFieldPrinter::printEmissionKind(StringRef Name, Out << FS << Name << ": " << DICompileUnit::EmissionKindString(EK); } - template void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString, bool ShouldSkipZero) { @@ -1919,11 +1964,11 @@ static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N, Printer.printString("name", N->getName()); Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); Printer.printMetadata("entity", N->getRawEntity()); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); Out << ")"; } - static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, TypePrinting *TypePrinter, SlotTracker *Machine, @@ -2062,6 +2107,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, } namespace { + class AssemblyWriter { formatted_raw_ostream &Out; const Module *TheModule; @@ -2074,6 +2120,8 @@ class AssemblyWriter { bool ShouldPreserveUseListOrder; UseListOrderStack UseListOrders; SmallVector MDNames; + /// Synchronization scope names registered with LLVMContext. + SmallVector SSNs; public: /// Construct an AssemblyWriter with an external SlotTracker @@ -2089,10 +2137,15 @@ class AssemblyWriter { void writeOperand(const Value *Op, bool PrintType); void writeParamOperand(const Value *Operand, AttributeSet Attrs); void writeOperandBundles(ImmutableCallSite CS); - void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); - void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, + void writeSyncScope(const LLVMContext &Context, + SyncScope::ID SSID); + void writeAtomic(const LLVMContext &Context, + AtomicOrdering Ordering, + SyncScope::ID SSID); + void writeAtomicCmpXchg(const LLVMContext &Context, + AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); void writeAllMDNodes(); void writeMDNode(unsigned Slot, const MDNode *Node); @@ -2125,7 +2178,8 @@ class AssemblyWriter { // intrinsic indicating base and derived pointer names. void printGCRelocateComment(const GCRelocateInst &Relocate); }; -} // namespace + +} // end anonymous namespace AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M, AssemblyAnnotationWriter *AAW, @@ -2153,30 +2207,42 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); } -void AssemblyWriter::writeAtomic(AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - if (Ordering == AtomicOrdering::NotAtomic) - return; +void AssemblyWriter::writeSyncScope(const LLVMContext &Context, + SyncScope::ID SSID) { + switch (SSID) { + case SyncScope::System: { + break; + } + default: { + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); - switch (SynchScope) { - case SingleThread: Out << " singlethread"; break; - case CrossThread: break; + Out << " syncscope(\""; + PrintEscapedString(SSNs[SSID], Out); + Out << "\")"; + break; + } } +} + +void AssemblyWriter::writeAtomic(const LLVMContext &Context, + AtomicOrdering Ordering, + SyncScope::ID SSID) { + if (Ordering == AtomicOrdering::NotAtomic) + return; + writeSyncScope(Context, SSID); Out << " " << toIRString(Ordering); } -void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, +void AssemblyWriter::writeAtomicCmpXchg(const LLVMContext &Context, + AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { assert(SuccessOrdering != AtomicOrdering::NotAtomic && FailureOrdering != AtomicOrdering::NotAtomic); - switch (SynchScope) { - case SingleThread: Out << " singlethread"; break; - case CrossThread: break; - } - + writeSyncScope(Context, SSID); Out << " " << toIRString(SuccessOrdering); Out << " " << toIRString(FailureOrdering); } @@ -2594,7 +2660,6 @@ void AssemblyWriter::printTypeIdentities() { } /// printFunction - Print all aspects of a function. -/// void AssemblyWriter::printFunction(const Function *F) { // Print out the return type and name. Out << '\n'; @@ -2730,7 +2795,6 @@ void AssemblyWriter::printFunction(const Function *F) { /// printArgument - This member is called for every argument that is passed into /// the function. Simply print it out -/// void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { // Output type... TypePrinter.print(Arg->getType(), Out); @@ -2747,7 +2811,6 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { } /// printBasicBlock - This member is called for each basic block in a method. -/// void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (BB->hasName()) { // Print out the label if it exists... Out << "\n"; @@ -2813,7 +2876,6 @@ void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) { /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. -/// void AssemblyWriter::printInfoComment(const Value &V) { if (const auto *Relocate = dyn_cast(&V)) printGCRelocateComment(*Relocate); @@ -3046,7 +3108,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes()); writeOperandBundles(CI); - } else if (const InvokeInst *II = dyn_cast(&I)) { Operand = II->getCalledValue(); FunctionType *FTy = II->getFunctionType(); @@ -3087,7 +3148,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(II->getNormalDest(), true); Out << " unwind "; writeOperand(II->getUnwindDest(), true); - } else if (const AllocaInst *AI = dyn_cast(&I)) { Out << ' '; if (AI->isUsedWithInAlloca()) @@ -3113,7 +3173,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) { if (AddrSpace != 0) { Out << ", addrspace(" << AddrSpace << ')'; } - } else if (isa(I)) { if (Operand) { Out << ' '; @@ -3176,21 +3235,22 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // Print atomic ordering/alignment for memory operations if (const LoadInst *LI = dyn_cast(&I)) { if (LI->isAtomic()) - writeAtomic(LI->getOrdering(), LI->getSynchScope()); + writeAtomic(LI->getContext(), LI->getOrdering(), LI->getSyncScopeID()); if (LI->getAlignment()) Out << ", align " << LI->getAlignment(); } else if (const StoreInst *SI = dyn_cast(&I)) { if (SI->isAtomic()) - writeAtomic(SI->getOrdering(), SI->getSynchScope()); + writeAtomic(SI->getContext(), SI->getOrdering(), SI->getSyncScopeID()); if (SI->getAlignment()) Out << ", align " << SI->getAlignment(); } else if (const AtomicCmpXchgInst *CXI = dyn_cast(&I)) { - writeAtomicCmpXchg(CXI->getSuccessOrdering(), CXI->getFailureOrdering(), - CXI->getSynchScope()); + writeAtomicCmpXchg(CXI->getContext(), CXI->getSuccessOrdering(), + CXI->getFailureOrdering(), CXI->getSyncScopeID()); } else if (const AtomicRMWInst *RMWI = dyn_cast(&I)) { - writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope()); + writeAtomic(RMWI->getContext(), RMWI->getOrdering(), + RMWI->getSyncScopeID()); } else if (const FenceInst *FI = dyn_cast(&I)) { - writeAtomic(FI->getOrdering(), FI->getSynchScope()); + writeAtomic(FI->getContext(), FI->getOrdering(), FI->getSyncScopeID()); } // Print Metadata info. diff --git a/interpreter/llvm/src/lib/IR/AttributeImpl.h b/interpreter/llvm/src/lib/IR/AttributeImpl.h index cf29252546952..9c7b61f679236 100644 --- a/interpreter/llvm/src/lib/IR/AttributeImpl.h +++ b/interpreter/llvm/src/lib/IR/AttributeImpl.h @@ -1,4 +1,4 @@ -//===-- AttributeImpl.h - Attribute Internals -------------------*- C++ -*-===// +//===- AttributeImpl.h - Attribute Internals --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,9 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/Attributes.h" #include "llvm/Support/TrailingObjects.h" -#include #include -#include #include #include #include @@ -80,11 +78,13 @@ class AttributeImpl : public FoldingSetNode { else Profile(ID, getKindAsString(), getValueAsString()); } + static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind, uint64_t Val) { ID.AddInteger(Kind); if (Val) ID.AddInteger(Val); } + static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) { ID.AddString(Kind); if (!Values.empty()) ID.AddString(Values); @@ -100,6 +100,7 @@ class AttributeImpl : public FoldingSetNode { class EnumAttributeImpl : public AttributeImpl { virtual void anchor(); + Attribute::AttrKind Kind; protected: @@ -114,9 +115,10 @@ class EnumAttributeImpl : public AttributeImpl { }; class IntAttributeImpl : public EnumAttributeImpl { - void anchor() override; uint64_t Val; + void anchor() override; + public: IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val) : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) { @@ -132,6 +134,7 @@ class IntAttributeImpl : public EnumAttributeImpl { class StringAttributeImpl : public AttributeImpl { virtual void anchor(); + std::string Kind; std::string Val; @@ -188,20 +191,22 @@ class AttributeSetNode final std::pair> getAllocSizeArgs() const; std::string getAsString(bool InAttrGrp) const; - typedef const Attribute *iterator; + using iterator = const Attribute *; + iterator begin() const { return getTrailingObjects(); } iterator end() const { return begin() + NumAttrs; } void Profile(FoldingSetNodeID &ID) const { Profile(ID, makeArrayRef(begin(), end())); } + static void Profile(FoldingSetNodeID &ID, ArrayRef AttrList) { for (const auto &Attr : AttrList) Attr.Profile(ID); } }; -typedef std::pair IndexAttrPair; +using IndexAttrPair = std::pair; //===----------------------------------------------------------------------===// /// \class @@ -209,27 +214,21 @@ typedef std::pair IndexAttrPair; /// return type, and parameters. class AttributeListImpl final : public FoldingSetNode, - private TrailingObjects { + private TrailingObjects { friend class AttributeList; friend TrailingObjects; private: - LLVMContext &Context; - unsigned NumSlots; ///< Number of entries in this set. /// Bitset with a bit for each available attribute Attribute::AttrKind. uint64_t AvailableFunctionAttrs; + LLVMContext &Context; + unsigned NumAttrSets; ///< Number of entries in this set. // Helper fn for TrailingObjects class. - size_t numTrailingObjects(OverloadToken) { return NumSlots; } - - /// \brief Return a pointer to the IndexAttrPair for the specified slot. - const IndexAttrPair *getSlotPair(unsigned Slot) const { - return getTrailingObjects() + Slot; - } + size_t numTrailingObjects(OverloadToken) { return NumAttrSets; } public: - AttributeListImpl(LLVMContext &C, - ArrayRef> Slots); + AttributeListImpl(LLVMContext &C, ArrayRef Sets); // AttributesSetImpt is uniqued, these should not be available. AttributeListImpl(const AttributeListImpl &) = delete; @@ -240,40 +239,19 @@ class AttributeListImpl final /// \brief Get the context that created this AttributeListImpl. LLVMContext &getContext() { return Context; } - /// \brief Return the number of slots used in this attribute list. This is - /// the number of arguments that have an attribute set on them (including the - /// function itself). - unsigned getNumSlots() const { return NumSlots; } - - /// \brief Get the index of the given "slot" in the AttrNodes list. This index - /// is the index of the return, parameter, or function object that the - /// attributes are applied to, not the index into the AttrNodes list where the - /// attributes reside. - unsigned getSlotIndex(unsigned Slot) const { - return getSlotPair(Slot)->first; - } - - /// \brief Retrieve the attribute set node for the given "slot" in the - /// AttrNode list. - AttributeSet getSlotAttributes(unsigned Slot) const { - return getSlotPair(Slot)->second; - } - /// \brief Return true if the AttributeSet or the FunctionIndex has an /// enum attribute of the given kind. bool hasFnAttribute(Attribute::AttrKind Kind) const { return AvailableFunctionAttrs & ((uint64_t)1) << Kind; } - typedef AttributeSet::iterator iterator; - iterator begin(unsigned Slot) const { - return getSlotAttributes(Slot).begin(); - } - iterator end(unsigned Slot) const { return getSlotAttributes(Slot).end(); } + using iterator = const AttributeSet *; + + iterator begin() const { return getTrailingObjects(); } + iterator end() const { return begin() + NumAttrSets; } void Profile(FoldingSetNodeID &ID) const; - static void Profile(FoldingSetNodeID &ID, - ArrayRef> Nodes); + static void Profile(FoldingSetNodeID &ID, ArrayRef Nodes); void dump() const; }; diff --git a/interpreter/llvm/src/lib/IR/Attributes.cpp b/interpreter/llvm/src/lib/IR/Attributes.cpp index b97afb6f4a68f..8f2e641d64b92 100644 --- a/interpreter/llvm/src/lib/IR/Attributes.cpp +++ b/interpreter/llvm/src/lib/IR/Attributes.cpp @@ -13,17 +13,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Attributes.h" #include "AttributeImpl.h" #include "LLVMContextImpl.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" @@ -34,6 +34,8 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include +#include #include #include #include @@ -505,7 +507,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { } AttributeSet AttributeSet::addAttribute(LLVMContext &C, - Attribute::AttrKind Kind) const { + Attribute::AttrKind Kind) const { if (hasAttribute(Kind)) return *this; AttrBuilder B; B.addAttribute(Kind); @@ -513,7 +515,7 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, } AttributeSet AttributeSet::addAttribute(LLVMContext &C, StringRef Kind, - StringRef Value) const { + StringRef Value) const { AttrBuilder B; B.addAttribute(Kind, Value); return addAttributes(C, AttributeSet::get(C, B)); @@ -567,11 +569,11 @@ unsigned AttributeSet::getNumAttributes() const { } bool AttributeSet::hasAttribute(Attribute::AttrKind Kind) const { - return SetNode ? SetNode->hasAttribute(Kind) : 0; + return SetNode ? SetNode->hasAttribute(Kind) : false; } bool AttributeSet::hasAttribute(StringRef Kind) const { - return SetNode ? SetNode->hasAttribute(Kind) : 0; + return SetNode ? SetNode->hasAttribute(Kind) : false; } Attribute AttributeSet::getAttribute(Attribute::AttrKind Kind) const { @@ -786,48 +788,44 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const { // AttributeListImpl Definition //===----------------------------------------------------------------------===// -AttributeListImpl::AttributeListImpl( - LLVMContext &C, ArrayRef> Slots) - : Context(C), NumSlots(Slots.size()), AvailableFunctionAttrs(0) { -#ifndef NDEBUG - assert(!Slots.empty() && "pointless AttributeListImpl"); - if (Slots.size() >= 2) { - auto &PrevPair = Slots.front(); - for (auto &CurPair : Slots.drop_front()) { - assert(PrevPair.first <= CurPair.first && "Attribute set not ordered!"); - } - } -#endif +/// Map from AttributeList index to the internal array index. Adding one works: +/// FunctionIndex: ~0U -> 0 +/// ReturnIndex: 0 -> 1 +/// FirstArgIndex: 1.. -> 2.. +static constexpr unsigned attrIdxToArrayIdx(unsigned Index) { + // MSVC warns about '~0U + 1' wrapping around when this is called on + // FunctionIndex, so cast to int first. + return static_cast(Index) + 1; +} + +AttributeListImpl::AttributeListImpl(LLVMContext &C, + ArrayRef Sets) + : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) { + assert(!Sets.empty() && "pointless AttributeListImpl"); // There's memory after the node where we can store the entries in. - std::copy(Slots.begin(), Slots.end(), getTrailingObjects()); + std::copy(Sets.begin(), Sets.end(), getTrailingObjects()); // Initialize AvailableFunctionAttrs summary bitset. static_assert(Attribute::EndAttrKinds <= sizeof(AvailableFunctionAttrs) * CHAR_BIT, "Too many attributes"); - static_assert(AttributeList::FunctionIndex == ~0u, - "FunctionIndex should be biggest possible index"); - const auto &Last = Slots.back(); - if (Last.first == AttributeList::FunctionIndex) { - AttributeSet Node = Last.second; - for (Attribute I : Node) { - if (!I.isStringAttribute()) - AvailableFunctionAttrs |= ((uint64_t)1) << I.getKindAsEnum(); - } + static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U, + "function should be stored in slot 0"); + for (Attribute I : Sets[0]) { + if (!I.isStringAttribute()) + AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum(); } } void AttributeListImpl::Profile(FoldingSetNodeID &ID) const { - Profile(ID, makeArrayRef(getSlotPair(0), getNumSlots())); + Profile(ID, makeArrayRef(begin(), end())); } -void AttributeListImpl::Profile( - FoldingSetNodeID &ID, ArrayRef> Nodes) { - for (const auto &Node : Nodes) { - ID.AddInteger(Node.first); - ID.AddPointer(Node.second.SetNode); - } +void AttributeListImpl::Profile(FoldingSetNodeID &ID, + ArrayRef Sets) { + for (const auto &Set : Sets) + ID.AddPointer(Set.SetNode); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -840,24 +838,13 @@ LLVM_DUMP_METHOD void AttributeListImpl::dump() const { // AttributeList Construction and Mutation Methods //===----------------------------------------------------------------------===// -AttributeList AttributeList::getImpl( - LLVMContext &C, ArrayRef> Attrs) { - assert(!Attrs.empty() && "creating pointless AttributeList"); -#ifndef NDEBUG - unsigned LastIndex = 0; - bool IsFirst = true; - for (auto &&AttrPair : Attrs) { - assert((IsFirst || LastIndex < AttrPair.first) && - "unsorted or duplicate AttributeList indices"); - assert(AttrPair.second.hasAttributes() && "pointless AttributeList slot"); - LastIndex = AttrPair.first; - IsFirst = false; - } -#endif +AttributeList AttributeList::getImpl(LLVMContext &C, + ArrayRef AttrSets) { + assert(!AttrSets.empty() && "pointless AttributeListImpl"); LLVMContextImpl *pImpl = C.pImpl; FoldingSetNodeID ID; - AttributeListImpl::Profile(ID, Attrs); + AttributeListImpl::Profile(ID, AttrSets); void *InsertPoint; AttributeListImpl *PA = @@ -868,8 +855,8 @@ AttributeList AttributeList::getImpl( if (!PA) { // Coallocate entries after the AttributeListImpl itself. void *Mem = ::operator new( - AttributeListImpl::totalSizeToAlloc(Attrs.size())); - PA = new (Mem) AttributeListImpl(C, Attrs); + AttributeListImpl::totalSizeToAlloc(AttrSets.size())); + PA = new (Mem) AttributeListImpl(C, AttrSets); pImpl->AttrsLists.InsertNode(PA, InsertPoint); } @@ -910,7 +897,7 @@ AttributeList::get(LLVMContext &C, AttrPairVec.emplace_back(Index, AttributeSet::get(C, AttrVec)); } - return getImpl(C, AttrPairVec); + return get(C, AttrPairVec); } AttributeList @@ -920,35 +907,76 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return AttributeList(); - return getImpl(C, Attrs); + assert(std::is_sorted(Attrs.begin(), Attrs.end(), + [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }) && + "Misordered Attributes list!"); + assert(none_of(Attrs, + [](const std::pair &Pair) { + return !Pair.second.hasAttributes(); + }) && + "Pointless attribute!"); + + unsigned MaxIndex = Attrs.back().first; + + SmallVector AttrVec(attrIdxToArrayIdx(MaxIndex) + 1); + for (auto Pair : Attrs) + AttrVec[attrIdxToArrayIdx(Pair.first)] = Pair.second; + + return getImpl(C, AttrVec); } AttributeList AttributeList::get(LLVMContext &C, AttributeSet FnAttrs, AttributeSet RetAttrs, ArrayRef ArgAttrs) { - SmallVector, 8> AttrPairs; - if (RetAttrs.hasAttributes()) - AttrPairs.emplace_back(ReturnIndex, RetAttrs); - size_t Index = 1; - for (AttributeSet AS : ArgAttrs) { - if (AS.hasAttributes()) - AttrPairs.emplace_back(Index, AS); - ++Index; + // Scan from the end to find the last argument with attributes. Most + // arguments don't have attributes, so it's nice if we can have fewer unique + // AttributeListImpls by dropping empty attribute sets at the end of the list. + unsigned NumSets = 0; + for (size_t I = ArgAttrs.size(); I != 0; --I) { + if (ArgAttrs[I - 1].hasAttributes()) { + NumSets = I + 2; + break; + } + } + if (NumSets == 0) { + // Check function and return attributes if we didn't have argument + // attributes. + if (RetAttrs.hasAttributes()) + NumSets = 2; + else if (FnAttrs.hasAttributes()) + NumSets = 1; } - if (FnAttrs.hasAttributes()) - AttrPairs.emplace_back(FunctionIndex, FnAttrs); - if (AttrPairs.empty()) + + // If all attribute sets were empty, we can use the empty attribute list. + if (NumSets == 0) return AttributeList(); - return getImpl(C, AttrPairs); + + SmallVector AttrSets; + AttrSets.reserve(NumSets); + // If we have any attributes, we always have function attributes. + AttrSets.push_back(FnAttrs); + if (NumSets > 1) + AttrSets.push_back(RetAttrs); + if (NumSets > 2) { + // Drop the empty argument attribute sets at the end. + ArgAttrs = ArgAttrs.take_front(NumSets - 2); + AttrSets.insert(AttrSets.end(), ArgAttrs.begin(), ArgAttrs.end()); + } + + return getImpl(C, AttrSets); } AttributeList AttributeList::get(LLVMContext &C, unsigned Index, const AttrBuilder &B) { if (!B.hasAttributes()) return AttributeList(); - AttributeSet AS = AttributeSet::get(C, B); - std::pair Arr[1] = {{Index, AS}}; - return getImpl(C, Arr); + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(Index + 1); + AttrSets[Index] = AttributeSet::get(C, B); + return getImpl(C, AttrSets); } AttributeList AttributeList::get(LLVMContext &C, unsigned Index, @@ -971,32 +999,26 @@ AttributeList AttributeList::get(LLVMContext &C, ArrayRef Attrs) { if (Attrs.empty()) return AttributeList(); - if (Attrs.size() == 1) return Attrs[0]; - - SmallVector, 8> AttrNodeVec; - AttributeListImpl *A0 = Attrs[0].pImpl; - if (A0) - AttrNodeVec.append(A0->getSlotPair(0), A0->getSlotPair(A0->getNumSlots())); - // Copy all attributes from Attrs into AttrNodeVec while keeping AttrNodeVec - // ordered by index. Because we know that each list in Attrs is ordered by - // index we only need to merge each successive list in rather than doing a - // full sort. - for (unsigned I = 1, E = Attrs.size(); I != E; ++I) { - AttributeListImpl *ALI = Attrs[I].pImpl; - if (!ALI) continue; - SmallVector, 8>::iterator - ANVI = AttrNodeVec.begin(), ANVE; - for (const IndexAttrPair *AI = ALI->getSlotPair(0), - *AE = ALI->getSlotPair(ALI->getNumSlots()); - AI != AE; ++AI) { - ANVE = AttrNodeVec.end(); - while (ANVI != ANVE && ANVI->first <= AI->first) - ++ANVI; - ANVI = AttrNodeVec.insert(ANVI, *AI) + 1; - } + if (Attrs.size() == 1) + return Attrs[0]; + + unsigned MaxSize = 0; + for (AttributeList List : Attrs) + MaxSize = std::max(MaxSize, List.getNumAttrSets()); + + // If every list was empty, there is no point in merging the lists. + if (MaxSize == 0) + return AttributeList(); + + SmallVector NewAttrSets(MaxSize); + for (unsigned I = 0; I < MaxSize; ++I) { + AttrBuilder CurBuilder; + for (AttributeList List : Attrs) + CurBuilder.merge(List.getAttributes(I - 1)); + NewAttrSets[I] = AttributeSet::get(C, CurBuilder); } - return getImpl(C, AttrNodeVec); + return getImpl(C, NewAttrSets); } AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, @@ -1015,34 +1037,11 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, return addAttributes(C, Index, B); } -AttributeList AttributeList::addAttribute(LLVMContext &C, - ArrayRef Indices, +AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, Attribute A) const { - assert(std::is_sorted(Indices.begin(), Indices.end())); - - unsigned I = 0, E = pImpl ? pImpl->getNumSlots() : 0; - SmallVector AttrVec; - for (unsigned Index : Indices) { - // Add all attribute slots before the current index. - for (; I < E && getSlotIndex(I) < Index; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); - - // Add the attribute at this index. If we already have attributes at this - // index, merge them into a new set. - AttrBuilder B; - if (I < E && getSlotIndex(I) == Index) { - B.merge(AttrBuilder(pImpl->getSlotAttributes(I))); - ++I; - } - B.addAttribute(A); - AttrVec.emplace_back(Index, AttributeSet::get(C, B)); - } - - // Add remaining attributes. - for (; I < E; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); - - return get(C, AttrVec); + AttrBuilder B; + B.addAttribute(A); + return addAttributes(C, Index, B); } AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, @@ -1056,39 +1055,42 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. For now, say // we can't change a known alignment. - unsigned OldAlign = getParamAlignment(Index); + unsigned OldAlign = getAttributes(Index).getAlignment(); unsigned NewAlign = B.getAlignment(); assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif - SmallVector AttrVec; - uint64_t NumAttrs = pImpl->getNumSlots(); - unsigned I; + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); - // Add all the attribute slots before the one we need to merge. - for (I = 0; I < NumAttrs; ++I) { - if (getSlotIndex(I) >= Index) - break; - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); - } + AttrBuilder Merged(AttrSets[Index]); + Merged.merge(B); + AttrSets[Index] = AttributeSet::get(C, Merged); - AttrBuilder NewAttrs; - if (I < NumAttrs && getSlotIndex(I) == Index) { - // We need to merge the attribute sets. - NewAttrs.merge(pImpl->getSlotAttributes(I)); - ++I; - } - NewAttrs.merge(B); + return getImpl(C, AttrSets); +} + +AttributeList AttributeList::addParamAttribute(LLVMContext &C, + ArrayRef ArgNos, + Attribute A) const { + assert(std::is_sorted(ArgNos.begin(), ArgNos.end())); - // Add the new or merged attribute set at this index. - AttrVec.emplace_back(Index, AttributeSet::get(C, NewAttrs)); + SmallVector AttrSets(this->begin(), this->end()); + unsigned MaxIndex = attrIdxToArrayIdx(ArgNos.back() + FirstArgIndex); + if (MaxIndex >= AttrSets.size()) + AttrSets.resize(MaxIndex + 1); - // Add the remaining entries. - for (; I < NumAttrs; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); + for (unsigned ArgNo : ArgNos) { + unsigned Index = attrIdxToArrayIdx(ArgNo + FirstArgIndex); + AttrBuilder B(AttrSets[Index]); + B.addAttribute(A); + AttrSets[Index] = AttributeSet::get(C, B); + } - return get(C, AttrVec); + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, @@ -1107,54 +1109,38 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, return removeAttributes(C, Index, B); } -AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &Attrs) const { +AttributeList +AttributeList::removeAttributes(LLVMContext &C, unsigned Index, + const AttrBuilder &AttrsToRemove) const { if (!pImpl) return AttributeList(); // FIXME it is not obvious how this should work for alignment. // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!"); + assert(!AttrsToRemove.hasAlignmentAttr() && "Attempt to change alignment!"); - // Add the attribute slots before the one we're trying to add. - SmallVector AttrSets; - uint64_t NumAttrs = pImpl->getNumSlots(); - AttrBuilder B; - uint64_t LastIndex = 0; - for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) - B = AttrBuilder(getSlotAttributes(LastIndex++)); - break; - } - LastIndex = I + 1; - AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); - } + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); - // Remove the attributes from the existing set and add them. - B.remove(Attrs); - if (B.hasAttributes()) - AttrSets.push_back({Index, AttributeSet::get(C, B)}); + AttrBuilder B(AttrSets[Index]); + B.remove(AttrsToRemove); + AttrSets[Index] = AttributeSet::get(C, B); - // Add the remaining attribute slots. - for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); - - return get(C, AttrSets); + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned WithoutIndex) const { if (!pImpl) return AttributeList(); - - SmallVector, 4> AttrSet; - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { - unsigned Index = getSlotIndex(I); - if (Index != WithoutIndex) - AttrSet.push_back({Index, pImpl->getSlotAttributes(I)}); - } - return get(C, AttrSet); + WithoutIndex = attrIdxToArrayIdx(WithoutIndex); + if (WithoutIndex >= getNumAttrSets()) + return *this; + SmallVector AttrSets(this->begin(), this->end()); + AttrSets[WithoutIndex] = AttributeSet(); + return getImpl(C, AttrSets); } AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C, @@ -1223,20 +1209,20 @@ bool AttributeList::hasFnAttribute(StringRef Kind) const { bool AttributeList::hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const { - return hasAttribute(ArgNo + 1, Kind); + return hasAttribute(ArgNo + FirstArgIndex, Kind); } bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr, unsigned *Index) const { if (!pImpl) return false; - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) - for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); - II != IE; ++II) - if (II->hasAttribute(Attr)) { - if (Index) *Index = pImpl->getSlotIndex(I); - return true; - } + for (unsigned I = index_begin(), E = index_end(); I != E; ++I) { + if (hasAttribute(I, Attr)) { + if (Index) + *Index = I; + return true; + } + } return false; } @@ -1280,60 +1266,35 @@ std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const { } AttributeSet AttributeList::getAttributes(unsigned Index) const { - if (!pImpl) return AttributeSet(); - - // Loop through to find the attribute node we want. - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) - if (pImpl->getSlotIndex(I) == Index) - return pImpl->getSlotAttributes(I); - - return AttributeSet(); + Index = attrIdxToArrayIdx(Index); + if (!pImpl || Index >= getNumAttrSets()) + return AttributeSet(); + return pImpl->begin()[Index]; } -AttributeList::iterator AttributeList::begin(unsigned Slot) const { - if (!pImpl) - return ArrayRef().begin(); - return pImpl->begin(Slot); +AttributeList::iterator AttributeList::begin() const { + return pImpl ? pImpl->begin() : nullptr; } -AttributeList::iterator AttributeList::end(unsigned Slot) const { - if (!pImpl) - return ArrayRef().end(); - return pImpl->end(Slot); +AttributeList::iterator AttributeList::end() const { + return pImpl ? pImpl->end() : nullptr; } //===----------------------------------------------------------------------===// // AttributeList Introspection Methods //===----------------------------------------------------------------------===// -unsigned AttributeList::getNumSlots() const { - return pImpl ? pImpl->getNumSlots() : 0; -} - -unsigned AttributeList::getSlotIndex(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumSlots() && - "Slot # out of range!"); - return pImpl->getSlotIndex(Slot); -} - -AttributeSet AttributeList::getSlotAttributes(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumSlots() && - "Slot # out of range!"); - return pImpl->getSlotAttributes(Slot); +unsigned AttributeList::getNumAttrSets() const { + return pImpl ? pImpl->NumAttrSets : 0; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void AttributeList::dump() const { dbgs() << "PAL[\n"; - for (unsigned i = 0, e = getNumSlots(); i < e; ++i) { - uint64_t Index = getSlotIndex(i); - dbgs() << " { "; - if (Index == ~0U) - dbgs() << "~0U"; - else - dbgs() << Index; - dbgs() << " => " << getAsString(Index) << " }\n"; + for (unsigned i = index_begin(), e = index_end(); i != e; ++i) { + if (getAttributes(i).hasAttributes()) + dbgs() << " { " << i << " => " << getAsString(i) << " }\n"; } dbgs() << "]\n"; @@ -1344,26 +1305,16 @@ LLVM_DUMP_METHOD void AttributeList::dump() const { // AttrBuilder Method Implementations //===----------------------------------------------------------------------===// +// FIXME: Remove this ctor, use AttributeSet. AttrBuilder::AttrBuilder(AttributeList AL, unsigned Index) { - AttributeListImpl *pImpl = AL.pImpl; - if (!pImpl) return; - - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { - if (pImpl->getSlotIndex(I) != Index) continue; - - for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); - II != IE; ++II) - addAttribute(*II); - - break; - } + AttributeSet AS = AL.getAttributes(Index); + for (const Attribute &A : AS) + addAttribute(A); } AttrBuilder::AttrBuilder(AttributeSet AS) { - if (AS.hasAttributes()) { - for (const Attribute &A : AS) - addAttribute(A); - } + for (const Attribute &A : AS) + addAttribute(A); } void AttrBuilder::clear() { @@ -1687,6 +1638,39 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { Caller.addFnAttr(Attribute::StackProtect); } +/// \brief If the inlined function required stack probes, then ensure that +/// the calling function has those too. +static void adjustCallerStackProbes(Function &Caller, const Function &Callee) { + if (!Caller.hasFnAttribute("probe-stack") && + Callee.hasFnAttribute("probe-stack")) { + Caller.addFnAttr(Callee.getFnAttribute("probe-stack")); + } +} + +/// \brief If the inlined function defines the size of guard region +/// on the stack, then ensure that the calling function defines a guard region +/// that is no larger. +static void +adjustCallerStackProbeSize(Function &Caller, const Function &Callee) { + if (Callee.hasFnAttribute("stack-probe-size")) { + uint64_t CalleeStackProbeSize; + Callee.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, CalleeStackProbeSize); + if (Caller.hasFnAttribute("stack-probe-size")) { + uint64_t CallerStackProbeSize; + Caller.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, CallerStackProbeSize); + if (CallerStackProbeSize > CalleeStackProbeSize) { + Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size")); + } + } else { + Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size")); + } + } +} + #define GET_ATTR_COMPAT_FUNC #include "AttributesCompatFunc.inc" diff --git a/interpreter/llvm/src/lib/IR/AutoUpgrade.cpp b/interpreter/llvm/src/lib/IR/AutoUpgrade.cpp index 8bcba76723157..a501799b4799b 100644 --- a/interpreter/llvm/src/lib/IR/AutoUpgrade.cpp +++ b/interpreter/llvm/src/lib/IR/AutoUpgrade.cpp @@ -142,6 +142,11 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 + Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 Name == "avx512.mask.add.pd.128" || // Added in 4.0 Name == "avx512.mask.add.pd.256" || // Added in 4.0 Name == "avx512.mask.add.ps.128" || // Added in 4.0 @@ -521,6 +526,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } + break; } case 'o': // We only need to change the name to match the mangling including the @@ -782,12 +788,30 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, } static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, - ICmpInst::Predicate Pred) { + unsigned CC, bool Signed) { Value *Op0 = CI.getArgOperand(0); unsigned NumElts = Op0->getType()->getVectorNumElements(); - Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); - Value *Mask = CI.getArgOperand(2); + Value *Cmp; + if (CC == 3) { + Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); + } else if (CC == 7) { + Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); + } else { + ICmpInst::Predicate Pred; + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case 0: Pred = ICmpInst::ICMP_EQ; break; + case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 4: Pred = ICmpInst::ICMP_NE; break; + case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + } + Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); + } + + Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); const auto *C = dyn_cast(Mask); if (!C || !C->isAllOnesValue()) Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); @@ -1006,9 +1030,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." bool CmpEq = Name[16] == 'e'; - Rep = upgradeMaskedCompare(Builder, *CI, - CmpEq ? ICmpInst::ICMP_EQ - : ICmpInst::ICMP_SGT); + Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); + } else if (IsX86 && Name.startswith("avx512.mask.cmp")) { + unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); + Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); + } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { + unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); + Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); } else if (IsX86 && (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" || Name == "sse41.pmaxsd" || @@ -2211,14 +2239,14 @@ bool llvm::UpgradeDebugInfo(Module &M) { } bool llvm::UpgradeModuleFlags(Module &M) { - const NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); + NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); if (!ModFlags) return false; - bool HasObjCFlag = false, HasClassProperties = false; + bool HasObjCFlag = false, HasClassProperties = false, Changed = false; for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { MDNode *Op = ModFlags->getOperand(I); - if (Op->getNumOperands() < 2) + if (Op->getNumOperands() != 3) continue; MDString *ID = dyn_cast_or_null(Op->getOperand(1)); if (!ID) @@ -2227,7 +2255,24 @@ bool llvm::UpgradeModuleFlags(Module &M) { HasObjCFlag = true; if (ID->getString() == "Objective-C Class Properties") HasClassProperties = true; + // Upgrade PIC/PIE Module Flags. The module flag behavior for these two + // field was Error and now they are Max. + if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") { + if (auto *Behavior = + mdconst::dyn_extract_or_null(Op->getOperand(0))) { + if (Behavior->getLimitedValue() == Module::Error) { + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Metadata *Ops[3] = { + ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)), + MDString::get(M.getContext(), ID->getString()), + Op->getOperand(2)}; + ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); + Changed = true; + } + } + } } + // "Objective-C Class Properties" is recently added for Objective-C. We // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module // flag of value 0, so we can correclty downgrade this flag when trying to @@ -2236,9 +2281,10 @@ bool llvm::UpgradeModuleFlags(Module &M) { if (HasObjCFlag && !HasClassProperties) { M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", (uint32_t)0); - return true; + Changed = true; } - return false; + + return Changed; } static bool isOldLoopArgument(Metadata *MD) { diff --git a/interpreter/llvm/src/lib/IR/BasicBlock.cpp b/interpreter/llvm/src/lib/IR/BasicBlock.cpp index 90ca21ab91f8f..2b780adf6c69c 100644 --- a/interpreter/llvm/src/lib/IR/BasicBlock.cpp +++ b/interpreter/llvm/src/lib/IR/BasicBlock.cpp @@ -263,6 +263,10 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const { return SuccBB; } +iterator_range BasicBlock::phis() { + return make_range(dyn_cast(&front()), nullptr); +} + /// This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to @@ -351,6 +355,19 @@ bool BasicBlock::canSplitPredecessors() const { return true; } +bool BasicBlock::isLegalToHoistInto() const { + auto *Term = getTerminator(); + // No terminator means the block is under construction. + if (!Term) + return true; + + // If the block has no successors, there can be no instructions to hoist. + assert(Term->getNumSuccessors() > 0); + + // Instructions should not be hoisted across exception handling boundaries. + return !Term->isExceptional(); +} + /// This splits a basic block into two at the specified /// instruction. Note that all instructions BEFORE the specified iterator stay /// as part of the original basic block, an unconditional branch is added to @@ -389,13 +406,11 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { // Loop over any phi nodes in the basic block, updating the BB field of // incoming values... BasicBlock *Successor = *I; - PHINode *PN; - for (BasicBlock::iterator II = Successor->begin(); - (PN = dyn_cast(II)); ++II) { - int IDX = PN->getBasicBlockIndex(this); - while (IDX != -1) { - PN->setIncomingBlock((unsigned)IDX, New); - IDX = PN->getBasicBlockIndex(this); + for (auto &PN : Successor->phis()) { + int Idx = PN.getBasicBlockIndex(this); + while (Idx != -1) { + PN.setIncomingBlock((unsigned)Idx, New); + Idx = PN.getBasicBlockIndex(this); } } } diff --git a/interpreter/llvm/src/lib/IR/CMakeLists.txt b/interpreter/llvm/src/lib/IR/CMakeLists.txt index 11259cbe18152..1cc229d68bfce 100644 --- a/interpreter/llvm/src/lib/IR/CMakeLists.txt +++ b/interpreter/llvm/src/lib/IR/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_library(LLVMCore Pass.cpp PassManager.cpp PassRegistry.cpp + SafepointIRVerifier.cpp ProfileSummary.cpp Statepoint.cpp Type.cpp diff --git a/interpreter/llvm/src/lib/IR/Comdat.cpp b/interpreter/llvm/src/lib/IR/Comdat.cpp index e27ecad0a8841..c735f9b2eb1eb 100644 --- a/interpreter/llvm/src/lib/IR/Comdat.cpp +++ b/interpreter/llvm/src/lib/IR/Comdat.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Comdat.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Comdat.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/IR/ConstantFold.cpp b/interpreter/llvm/src/lib/IR/ConstantFold.cpp index a20f3f811c8df..311b0a76ce8ab 100644 --- a/interpreter/llvm/src/lib/IR/ConstantFold.cpp +++ b/interpreter/llvm/src/lib/IR/ConstantFold.cpp @@ -242,7 +242,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, // X | -1 -> -1. if (ConstantInt *RHSC = dyn_cast(RHS)) - if (RHSC->isAllOnesValue()) + if (RHSC->isMinusOne()) return RHSC; Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize); @@ -348,8 +348,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, /// factors factored out. If Folded is false, return null if no factoring was /// possible, to avoid endlessly bouncing an unfoldable expression back into the /// top-level folder. -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, - bool Folded) { +static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) { if (ArrayType *ATy = dyn_cast(Ty)) { Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); @@ -404,8 +403,7 @@ static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, /// factors factored out. If Folded is false, return null if no factoring was /// possible, to avoid endlessly bouncing an unfoldable expression back into the /// top-level folder. -static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, - bool Folded) { +static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) { // The alignment of an array is equal to the alignment of the // array element. Note that this is not always true for vectors. if (ArrayType *ATy = dyn_cast(Ty)) { @@ -469,8 +467,7 @@ static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, /// any known factors factored out. If Folded is false, return null if no /// factoring was possible, to avoid endlessly bouncing an unfoldable expression /// back into the top-level folder. -static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, - Type *DestTy, +static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy, bool Folded) { if (ArrayType *ATy = dyn_cast(Ty)) { Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, @@ -1018,33 +1015,33 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, if (ConstantInt *CI2 = dyn_cast(C2)) { switch (Opcode) { case Instruction::Add: - if (CI2->equalsInt(0)) return C1; // X + 0 == X + if (CI2->isZero()) return C1; // X + 0 == X break; case Instruction::Sub: - if (CI2->equalsInt(0)) return C1; // X - 0 == X + if (CI2->isZero()) return C1; // X - 0 == X break; case Instruction::Mul: - if (CI2->equalsInt(0)) return C2; // X * 0 == 0 - if (CI2->equalsInt(1)) + if (CI2->isZero()) return C2; // X * 0 == 0 + if (CI2->isOne()) return C1; // X * 1 == X break; case Instruction::UDiv: case Instruction::SDiv: - if (CI2->equalsInt(1)) + if (CI2->isOne()) return C1; // X / 1 == X - if (CI2->equalsInt(0)) + if (CI2->isZero()) return UndefValue::get(CI2->getType()); // X / 0 == undef break; case Instruction::URem: case Instruction::SRem: - if (CI2->equalsInt(1)) + if (CI2->isOne()) return Constant::getNullValue(CI2->getType()); // X % 1 == 0 - if (CI2->equalsInt(0)) + if (CI2->isZero()) return UndefValue::get(CI2->getType()); // X % 0 == undef break; case Instruction::And: if (CI2->isZero()) return C2; // X & 0 == 0 - if (CI2->isAllOnesValue()) + if (CI2->isMinusOne()) return C1; // X & -1 == X if (ConstantExpr *CE1 = dyn_cast(C1)) { @@ -1081,12 +1078,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } break; case Instruction::Or: - if (CI2->equalsInt(0)) return C1; // X | 0 == X - if (CI2->isAllOnesValue()) + if (CI2->isZero()) return C1; // X | 0 == X + if (CI2->isMinusOne()) return C2; // X | -1 == -1 break; case Instruction::Xor: - if (CI2->equalsInt(0)) return C1; // X ^ 0 == X + if (CI2->isZero()) return C1; // X ^ 0 == X if (ConstantExpr *CE1 = dyn_cast(C1)) { switch (CE1->getOpcode()) { @@ -1094,7 +1091,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::ICmp: case Instruction::FCmp: // cmp pred ^ true -> cmp !pred - assert(CI2->equalsInt(1)); + assert(CI2->isOne()); CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate(); pred = CmpInst::getInversePredicate(pred); return ConstantExpr::getCompare(pred, CE1->getOperand(0), @@ -1129,18 +1126,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::Mul: return ConstantInt::get(CI1->getContext(), C1V * C2V); case Instruction::UDiv: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V)); case Instruction::SDiv: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V)); case Instruction::URem: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); return ConstantInt::get(CI1->getContext(), C1V.urem(C2V)); case Instruction::SRem: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.srem(C2V)); @@ -1173,7 +1170,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::LShr: case Instruction::AShr: case Instruction::Shl: - if (CI1->equalsInt(0)) return C1; + if (CI1->isZero()) return C1; break; default: break; @@ -2100,15 +2097,19 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, // Subsequent evaluation would get confused and produce erroneous results. // // The following prohibits such a GEP from being formed by checking to see - // if the index is in-range with respect to an array or vector. + // if the index is in-range with respect to an array. + // TODO: This code may be extended to handle vectors as well. bool PerformFold = false; if (Idx0->isNullValue()) PerformFold = true; else if (LastI.isSequential()) if (ConstantInt *CI = dyn_cast(Idx0)) - PerformFold = - !LastI.isBoundedSequential() || - isIndexInRangeOfArrayType(LastI.getSequentialNumElements(), CI); + PerformFold = (!LastI.isBoundedSequential() || + isIndexInRangeOfArrayType( + LastI.getSequentialNumElements(), CI)) && + !CE->getOperand(CE->getNumOperands() - 1) + ->getType() + ->isVectorTy(); if (PerformFold) { SmallVector NewIndices; diff --git a/interpreter/llvm/src/lib/IR/ConstantRange.cpp b/interpreter/llvm/src/lib/IR/ConstantRange.cpp index 509caba3acd49..4bd17257016d7 100644 --- a/interpreter/llvm/src/lib/IR/ConstantRange.cpp +++ b/interpreter/llvm/src/lib/IR/ConstantRange.cpp @@ -1,4 +1,4 @@ -//===-- ConstantRange.cpp - ConstantRange implementation ------------------===// +//===- ConstantRange.cpp - ConstantRange implementation -------------------===// // // The LLVM Compiler Infrastructure // @@ -21,12 +21,21 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instruction.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/ConstantRange.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include + using namespace llvm; ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) @@ -170,7 +179,7 @@ ConstantRange ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, const ConstantRange &Other, unsigned NoWrapKind) { - typedef OverflowingBinaryOperator OBO; + using OBO = OverflowingBinaryOperator; // Computes the intersection of CR0 and CR1. It is different from // intersectWith in that the ConstantRange returned will only contain elements @@ -284,27 +293,14 @@ APInt ConstantRange::getUnsignedMin() const { } APInt ConstantRange::getSignedMax() const { - if (!isWrappedSet()) { - APInt UpperMinusOne = getUpper() - 1; - if (getLower().sle(UpperMinusOne)) - return UpperMinusOne; - return APInt::getSignedMaxValue(getBitWidth()); - } - if (getLower().isNegative() == getUpper().isNegative()) + if (isFullSet() || Lower.sgt(Upper)) return APInt::getSignedMaxValue(getBitWidth()); return getUpper() - 1; } APInt ConstantRange::getSignedMin() const { - if (!isWrappedSet()) { - if (getLower().sle(getUpper() - 1)) - return getLower(); + if (isFullSet() || (Lower.sgt(Upper) && !getUpper().isMinSignedValue())) return APInt::getSignedMinValue(getBitWidth()); - } - if ((getUpper() - 1).slt(getLower())) { - if (!getUpper().isMinSignedValue()) - return APInt::getSignedMinValue(getBitWidth()); - } return getLower(); } @@ -577,9 +573,6 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { if (isFullSet()) return ConstantRange(DstTySize, /*isFullSet=*/true); - APInt MaxValue = APInt::getLowBitsSet(getBitWidth(), DstTySize); - APInt MaxBitValue = APInt::getOneBitSet(getBitWidth(), DstTySize); - APInt LowerDiv(Lower), UpperDiv(Upper); ConstantRange Union(DstTySize, /*isFullSet=*/false); @@ -587,35 +580,42 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { // We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and // then we do the union with [MaxValue, Upper) if (isWrappedSet()) { - // If Upper is greater than Max Value, it covers the whole truncated range. - if (Upper.uge(MaxValue)) + // If Upper is greater than or equal to MaxValue(DstTy), it covers the whole + // truncated range. + if (Upper.getActiveBits() > DstTySize || + Upper.countTrailingOnes() == DstTySize) return ConstantRange(DstTySize, /*isFullSet=*/true); Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize)); UpperDiv.setAllBits(); // Union covers the MaxValue case, so return if the remaining range is just - // MaxValue. + // MaxValue(DstTy). if (LowerDiv == UpperDiv) return Union; } // Chop off the most significant bits that are past the destination bitwidth. - if (LowerDiv.uge(MaxValue)) { - APInt Div(getBitWidth(), 0); - APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv); - UpperDiv -= MaxBitValue * Div; + if (LowerDiv.getActiveBits() > DstTySize) { + // Mask to just the signficant bits and subtract from LowerDiv/UpperDiv. + APInt Adjust = LowerDiv & APInt::getBitsSetFrom(getBitWidth(), DstTySize); + LowerDiv -= Adjust; + UpperDiv -= Adjust; } - if (UpperDiv.ule(MaxValue)) + unsigned UpperDivWidth = UpperDiv.getActiveBits(); + if (UpperDivWidth <= DstTySize) return ConstantRange(LowerDiv.trunc(DstTySize), UpperDiv.trunc(DstTySize)).unionWith(Union); // The truncated value wraps around. Check if we can do better than fullset. - UpperDiv -= MaxBitValue; - if (UpperDiv.ult(LowerDiv)) - return ConstantRange(LowerDiv.trunc(DstTySize), - UpperDiv.trunc(DstTySize)).unionWith(Union); + if (UpperDivWidth == DstTySize + 1) { + // Clear the MSB so that UpperDiv wraps around. + UpperDiv.clearBit(DstTySize); + if (UpperDiv.ult(LowerDiv)) + return ConstantRange(LowerDiv.trunc(DstTySize), + UpperDiv.trunc(DstTySize)).unionWith(Union); + } return ConstantRange(DstTySize, /*isFullSet=*/true); } diff --git a/interpreter/llvm/src/lib/IR/Constants.cpp b/interpreter/llvm/src/lib/IR/Constants.cpp index 4b9d89cda539d..f56fe7089807b 100644 --- a/interpreter/llvm/src/lib/IR/Constants.cpp +++ b/interpreter/llvm/src/lib/IR/Constants.cpp @@ -37,10 +37,6 @@ using namespace llvm; // Constant Class //===----------------------------------------------------------------------===// -void Constant::anchor() { } - -void ConstantData::anchor() {} - bool Constant::isNegativeZeroValue() const { // Floating point values have an explicit -0.0 value. if (const ConstantFP *CFP = dyn_cast(this)) @@ -48,8 +44,8 @@ bool Constant::isNegativeZeroValue() const { // Equivalent for a vector of -0.0's. if (const ConstantDataVector *CV = dyn_cast(this)) - if (ConstantFP *SplatCFP = dyn_cast_or_null(CV->getSplatValue())) - if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative()) + if (CV->getElementType()->isFloatingPointTy() && CV->isSplat()) + if (CV->getElementAsAPFloat(0).isNegZero()) return true; if (const ConstantVector *CV = dyn_cast(this)) @@ -74,8 +70,8 @@ bool Constant::isZeroValue() const { // Equivalent for a vector of -0.0's. if (const ConstantDataVector *CV = dyn_cast(this)) - if (ConstantFP *SplatCFP = dyn_cast_or_null(CV->getSplatValue())) - if (SplatCFP && SplatCFP->isZero()) + if (CV->getElementType()->isFloatingPointTy() && CV->isSplat()) + if (CV->getElementAsAPFloat(0).isZero()) return true; if (const ConstantVector *CV = dyn_cast(this)) @@ -117,9 +113,13 @@ bool Constant::isAllOnesValue() const { return Splat->isAllOnesValue(); // Check for constant vectors which are splats of -1 values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isAllOnesValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return CV->getElementAsAPFloat(0).bitcastToAPInt().isAllOnesValue(); + return CV->getElementAsAPInt(0).isAllOnesValue(); + } + } return false; } @@ -131,7 +131,7 @@ bool Constant::isOneValue() const { // Check for FP which are bitcasted from 1 integers if (const ConstantFP *CFP = dyn_cast(this)) - return CFP->getValueAPF().bitcastToAPInt() == 1; + return CFP->getValueAPF().bitcastToAPInt().isOneValue(); // Check for constant vectors which are splats of 1 values. if (const ConstantVector *CV = dyn_cast(this)) @@ -139,9 +139,13 @@ bool Constant::isOneValue() const { return Splat->isOneValue(); // Check for constant vectors which are splats of 1 values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isOneValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return CV->getElementAsAPFloat(0).bitcastToAPInt().isOneValue(); + return CV->getElementAsAPInt(0).isOneValue(); + } + } return false; } @@ -161,9 +165,13 @@ bool Constant::isMinSignedValue() const { return Splat->isMinSignedValue(); // Check for constant vectors which are splats of INT_MIN values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isMinSignedValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue(); + return CV->getElementAsAPInt(0).isMinSignedValue(); + } + } return false; } @@ -183,9 +191,13 @@ bool Constant::isNotMinSignedValue() const { return Splat->isNotMinSignedValue(); // Check for constant vectors which are splats of INT_MIN values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isNotMinSignedValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return !CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue(); + return !CV->getElementAsAPInt(0).isMinSignedValue(); + } + } // It *may* contain INT_MIN, we can't tell. return false; @@ -496,8 +508,6 @@ void Constant::removeDeadConstantUsers() const { // ConstantInt //===----------------------------------------------------------------------===// -void ConstantInt::anchor() { } - ConstantInt::ConstantInt(IntegerType *Ty, const APInt &V) : ConstantData(Ty, ConstantIntVal), Val(V) { assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type"); @@ -518,7 +528,7 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { } Constant *ConstantInt::getTrue(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1."); ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext()); if (auto *VTy = dyn_cast(Ty)) return ConstantVector::getSplat(VTy->getNumElements(), TrueC); @@ -526,7 +536,7 @@ Constant *ConstantInt::getTrue(Type *Ty) { } Constant *ConstantInt::getFalse(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1."); ConstantInt *FalseC = ConstantInt::getFalse(Ty->getContext()); if (auto *VTy = dyn_cast(Ty)) return ConstantVector::getSplat(VTy->getNumElements(), FalseC); @@ -610,8 +620,6 @@ static const fltSemantics *TypeToFloatSemantics(Type *Ty) { return &APFloat::PPCDoubleDouble(); } -void ConstantFP::anchor() { } - Constant *ConstantFP::get(Type *Ty, double V) { LLVMContext &Context = Ty->getContext(); @@ -724,7 +732,7 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const { /// Remove the constant from the constant table. void ConstantFP::destroyConstantImpl() { - llvm_unreachable("You can't ConstantInt->destroyConstantImpl()!"); + llvm_unreachable("You can't ConstantFP->destroyConstantImpl()!"); } //===----------------------------------------------------------------------===// @@ -1165,21 +1173,14 @@ bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) { unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1; - if (NumBits >= 64) - return true; // always true, has to fit in largest type - uint64_t Max = (1ll << NumBits) - 1; - return Val <= Max; + return isUIntN(NumBits, Val); } bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) { unsigned NumBits = Ty->getIntegerBitWidth(); if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1 || Val == -1; - if (NumBits >= 64) - return true; // always true, has to fit in largest type - int64_t Min = -(1ll << (NumBits-1)); - int64_t Max = (1ll << (NumBits-1)) - 1; - return (Val >= Min && Val <= Max); + return isIntN(NumBits, Val); } bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) { @@ -1650,9 +1651,9 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty, bool OnlyIfReduced) { Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, bool OnlyIfReduced) { - assert(C->getType()->getScalarType()->isPointerTy() && + assert(C->getType()->isPtrOrPtrVectorTy() && "PtrToInt source must be pointer or pointer vector"); - assert(DstTy->getScalarType()->isIntegerTy() && + assert(DstTy->isIntOrIntVectorTy() && "PtrToInt destination must be integer or integer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) @@ -1663,9 +1664,9 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy, bool OnlyIfReduced) { - assert(C->getType()->getScalarType()->isIntegerTy() && + assert(C->getType()->isIntOrIntVectorTy() && "IntToPtr source must be integer or integer vector"); - assert(DstTy->getScalarType()->isPointerTy() && + assert(DstTy->isPtrOrPtrVectorTy() && "IntToPtr destination must be a pointer or pointer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) @@ -1929,8 +1930,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { assert(LHS->getType() == RHS->getType()); - assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && - pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate"); + assert(CmpInst::isIntPredicate((CmpInst::Predicate)pred) && + "Invalid ICmp Predicate"); if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... @@ -1954,7 +1955,8 @@ Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { assert(LHS->getType() == RHS->getType()); - assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate"); + assert(CmpInst::isFPPredicate((CmpInst::Predicate)pred) && + "Invalid FCmp Predicate"); if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... @@ -2266,9 +2268,6 @@ Type *GetElementPtrConstantExpr::getResultElementType() const { //===----------------------------------------------------------------------===// // ConstantData* implementations -void ConstantDataArray::anchor() {} -void ConstantDataVector::anchor() {} - Type *ConstantDataSequential::getElementType() const { return getType()->getElementType(); } @@ -2397,32 +2396,32 @@ void ConstantDataSequential::destroyConstantImpl() { Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*1), Ty); + return getImpl(StringRef(Data, Elts.size() * 1), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } /// getFP() constructors - Return a constant with array type with an element @@ -2434,27 +2433,26 @@ Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataArray::getString(LLVMContext &Context, StringRef Str, bool AddNull) { if (!AddNull) { const uint8_t *Data = reinterpret_cast(Str.data()); - return get(Context, makeArrayRef(const_cast(Data), - Str.size())); + return get(Context, makeArrayRef(Data, Str.size())); } SmallVector ElementVals; @@ -2469,32 +2467,32 @@ Constant *ConstantDataArray::getString(LLVMContext &Context, Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*1), Ty); + return getImpl(StringRef(Data, Elts.size() * 1), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } /// getFP() constructors - Return a constant with vector type with an element @@ -2506,19 +2504,19 @@ Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { @@ -2573,13 +2571,41 @@ uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { switch (getElementType()->getIntegerBitWidth()) { default: llvm_unreachable("Invalid bitwidth for CDS"); case 8: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); case 16: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); case 32: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); case 64: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); + } +} + +APInt ConstantDataSequential::getElementAsAPInt(unsigned Elt) const { + assert(isa(getElementType()) && + "Accessor can only be used when element is an integer"); + const char *EltPtr = getElementPointer(Elt); + + // The data is stored in host byte order, make sure to cast back to the right + // type to load with the right endianness. + switch (getElementType()->getIntegerBitWidth()) { + default: llvm_unreachable("Invalid bitwidth for CDS"); + case 8: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(8, EltVal); + } + case 16: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(16, EltVal); + } + case 32: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(32, EltVal); + } + case 64: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(64, EltVal); + } } } @@ -2607,16 +2633,13 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { float ConstantDataSequential::getElementAsFloat(unsigned Elt) const { assert(getElementType()->isFloatTy() && "Accessor can only be used when element is a 'float'"); - const float *EltPtr = reinterpret_cast(getElementPointer(Elt)); - return *const_cast(EltPtr); + return *reinterpret_cast(getElementPointer(Elt)); } double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { assert(getElementType()->isDoubleTy() && "Accessor can only be used when element is a 'float'"); - const double *EltPtr = - reinterpret_cast(getElementPointer(Elt)); - return *const_cast(EltPtr); + return *reinterpret_cast(getElementPointer(Elt)); } Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { @@ -2627,8 +2650,8 @@ Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); } -bool ConstantDataSequential::isString() const { - return isa(getType()) && getElementType()->isIntegerTy(8); +bool ConstantDataSequential::isString(unsigned CharSize) const { + return isa(getType()) && getElementType()->isIntegerTy(CharSize); } bool ConstantDataSequential::isCString() const { @@ -2644,17 +2667,21 @@ bool ConstantDataSequential::isCString() const { return Str.drop_back().find(0) == StringRef::npos; } -Constant *ConstantDataVector::getSplatValue() const { +bool ConstantDataVector::isSplat() const { const char *Base = getRawDataValues().data(); // Compare elements 1+ to the 0'th element. unsigned EltSize = getElementByteSize(); for (unsigned i = 1, e = getNumElements(); i != e; ++i) if (memcmp(Base, Base+i*EltSize, EltSize)) - return nullptr; + return false; + return true; +} + +Constant *ConstantDataVector::getSplatValue() const { // If they're all the same, return the 0th one as a representative. - return getElementAsConstant(0); + return isSplat() ? getElementAsConstant(0) : nullptr; } //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/IR/ConstantsContext.h b/interpreter/llvm/src/lib/IR/ConstantsContext.h index eda751d8af4ab..6585304e7674b 100644 --- a/interpreter/llvm/src/lib/IR/ConstantsContext.h +++ b/interpreter/llvm/src/lib/IR/ConstantsContext.h @@ -22,6 +22,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" @@ -43,8 +44,6 @@ namespace llvm { /// UnaryConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement unary constant exprs. class UnaryConstantExpr : public ConstantExpr { - void anchor() override; - public: UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty) : ConstantExpr(Ty, Opcode, &Op<0>(), 1) { @@ -56,16 +55,12 @@ class UnaryConstantExpr : public ConstantExpr { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; /// BinaryConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement binary constant exprs. class BinaryConstantExpr : public ConstantExpr { - void anchor() override; - public: BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags) @@ -80,8 +75,6 @@ class BinaryConstantExpr : public ConstantExpr { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -89,8 +82,6 @@ class BinaryConstantExpr : public ConstantExpr { /// SelectConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement select constant exprs. class SelectConstantExpr : public ConstantExpr { - void anchor() override; - public: SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3) : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) { @@ -104,8 +95,6 @@ class SelectConstantExpr : public ConstantExpr { return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -114,8 +103,6 @@ class SelectConstantExpr : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// extractelement constant exprs. class ExtractElementConstantExpr : public ConstantExpr { - void anchor() override; - public: ExtractElementConstantExpr(Constant *C1, Constant *C2) : ConstantExpr(cast(C1->getType())->getElementType(), @@ -129,8 +116,6 @@ class ExtractElementConstantExpr : public ConstantExpr { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -139,8 +124,6 @@ class ExtractElementConstantExpr : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// insertelement constant exprs. class InsertElementConstantExpr : public ConstantExpr { - void anchor() override; - public: InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3) : ConstantExpr(C1->getType(), Instruction::InsertElement, @@ -155,8 +138,6 @@ class InsertElementConstantExpr : public ConstantExpr { return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -165,8 +146,6 @@ class InsertElementConstantExpr : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// shufflevector constant exprs. class ShuffleVectorConstantExpr : public ConstantExpr { - void anchor() override; - public: ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3) : ConstantExpr(VectorType::get( @@ -184,8 +163,6 @@ class ShuffleVectorConstantExpr : public ConstantExpr { return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -194,8 +171,6 @@ class ShuffleVectorConstantExpr : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// extractvalue constant exprs. class ExtractValueConstantExpr : public ConstantExpr { - void anchor() override; - public: ExtractValueConstantExpr(Constant *Agg, ArrayRef IdxList, Type *DestTy) @@ -209,8 +184,6 @@ class ExtractValueConstantExpr : public ConstantExpr { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - /// Indices - These identify which value to extract. const SmallVector Indices; @@ -229,8 +202,6 @@ class ExtractValueConstantExpr : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// insertvalue constant exprs. class InsertValueConstantExpr : public ConstantExpr { - void anchor() override; - public: InsertValueConstantExpr(Constant *Agg, Constant *Val, ArrayRef IdxList, Type *DestTy) @@ -245,8 +216,6 @@ class InsertValueConstantExpr : public ConstantExpr { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Indices - These identify the position for the insertion. const SmallVector Indices; @@ -270,8 +239,6 @@ class GetElementPtrConstantExpr : public ConstantExpr { GetElementPtrConstantExpr(Type *SrcElementTy, Constant *C, ArrayRef IdxList, Type *DestTy); - void anchor() override; - public: static GetElementPtrConstantExpr *Create(Type *SrcElementTy, Constant *C, ArrayRef IdxList, @@ -300,8 +267,6 @@ class GetElementPtrConstantExpr : public ConstantExpr { // behind the scenes to implement ICmp and FCmp constant expressions. This is // needed in order to store the predicate value for these instructions. class CompareConstantExpr : public ConstantExpr { - void anchor() override; - public: unsigned short predicate; CompareConstantExpr(Type *ty, Instruction::OtherOps opc, @@ -316,8 +281,6 @@ class CompareConstantExpr : public ConstantExpr { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -387,31 +350,34 @@ struct ConstantExprKeyType; template struct ConstantInfo; template <> struct ConstantInfo { - typedef ConstantExprKeyType ValType; - typedef Type TypeClass; + using ValType = ConstantExprKeyType; + using TypeClass = Type; }; template <> struct ConstantInfo { - typedef InlineAsmKeyType ValType; - typedef PointerType TypeClass; + using ValType = InlineAsmKeyType; + using TypeClass = PointerType; }; template <> struct ConstantInfo { - typedef ConstantAggrKeyType ValType; - typedef ArrayType TypeClass; + using ValType = ConstantAggrKeyType; + using TypeClass = ArrayType; }; template <> struct ConstantInfo { - typedef ConstantAggrKeyType ValType; - typedef StructType TypeClass; + using ValType = ConstantAggrKeyType; + using TypeClass = StructType; }; template <> struct ConstantInfo { - typedef ConstantAggrKeyType ValType; - typedef VectorType TypeClass; + using ValType = ConstantAggrKeyType; + using TypeClass = VectorType; }; template struct ConstantAggrKeyType { ArrayRef Operands; + ConstantAggrKeyType(ArrayRef Operands) : Operands(Operands) {} + ConstantAggrKeyType(ArrayRef Operands, const ConstantClass *) : Operands(Operands) {} + ConstantAggrKeyType(const ConstantClass *C, SmallVectorImpl &Storage) { assert(Storage.empty() && "Expected empty storage"); @@ -437,7 +403,8 @@ template struct ConstantAggrKeyType { return hash_combine_range(Operands.begin(), Operands.end()); } - typedef typename ConstantInfo::TypeClass TypeClass; + using TypeClass = typename ConstantInfo::TypeClass; + ConstantClass *create(TypeClass *Ty) const { return new (Operands.size()) ConstantClass(Ty, Operands); } @@ -457,6 +424,7 @@ struct InlineAsmKeyType { : AsmString(AsmString), Constraints(Constraints), FTy(FTy), HasSideEffects(HasSideEffects), IsAlignStack(IsAlignStack), AsmDialect(AsmDialect) {} + InlineAsmKeyType(const InlineAsm *Asm, SmallVectorImpl &) : AsmString(Asm->getAsmString()), Constraints(Asm->getConstraintString()), FTy(Asm->getFunctionType()), HasSideEffects(Asm->hasSideEffects()), @@ -483,7 +451,8 @@ struct InlineAsmKeyType { AsmDialect, FTy); } - typedef ConstantInfo::TypeClass TypeClass; + using TypeClass = ConstantInfo::TypeClass; + InlineAsm *create(TypeClass *Ty) const { assert(PointerType::getUnqual(FTy) == Ty); return new InlineAsm(FTy, AsmString, Constraints, HasSideEffects, @@ -507,11 +476,13 @@ struct ConstantExprKeyType { : Opcode(Opcode), SubclassOptionalData(SubclassOptionalData), SubclassData(SubclassData), Ops(Ops), Indexes(Indexes), ExplicitTy(ExplicitTy) {} + ConstantExprKeyType(ArrayRef Operands, const ConstantExpr *CE) : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands), Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()) {} + ConstantExprKeyType(const ConstantExpr *CE, SmallVectorImpl &Storage) : Opcode(CE->getOpcode()), @@ -553,7 +524,8 @@ struct ConstantExprKeyType { hash_combine_range(Indexes.begin(), Indexes.end())); } - typedef ConstantInfo::TypeClass TypeClass; + using TypeClass = ConstantInfo::TypeClass; + ConstantExpr *create(TypeClass *Ty) const { switch (Opcode) { default: @@ -594,16 +566,17 @@ struct ConstantExprKeyType { template class ConstantUniqueMap { public: - typedef typename ConstantInfo::ValType ValType; - typedef typename ConstantInfo::TypeClass TypeClass; - typedef std::pair LookupKey; + using ValType = typename ConstantInfo::ValType; + using TypeClass = typename ConstantInfo::TypeClass; + using LookupKey = std::pair; /// Key and hash together, so that we compute the hash only once and reuse it. - typedef std::pair LookupKeyHashed; + using LookupKeyHashed = std::pair; private: struct MapInfo { - typedef DenseMapInfo ConstantClassInfo; + using ConstantClassInfo = DenseMapInfo; + static inline ConstantClass *getEmptyKey() { return ConstantClassInfo::getEmptyKey(); } @@ -643,7 +616,7 @@ template class ConstantUniqueMap { }; public: - typedef DenseSet MapTy; + using MapTy = DenseSet; private: MapTy Map; diff --git a/interpreter/llvm/src/lib/IR/Core.cpp b/interpreter/llvm/src/lib/IR/Core.cpp index 50292b6e20bf1..aba770457e2f0 100644 --- a/interpreter/llvm/src/lib/IR/Core.cpp +++ b/interpreter/llvm/src/lib/IR/Core.cpp @@ -14,7 +14,6 @@ #include "llvm-c/Core.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -50,6 +49,7 @@ void llvm::initializeCore(PassRegistry &Registry) { initializePrintModulePassWrapperPass(Registry); initializePrintFunctionPassWrapperPass(Registry); initializePrintBasicBlockPassPass(Registry); + initializeSafepointIRVerifierPass(Registry); initializeVerifierLegacyPassPass(Registry); } @@ -568,6 +568,14 @@ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) { /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ +void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr) { + int i = 0; + for (auto *T : unwrap(Tp)->subtypes()) { + Arr[i] = wrap(T); + i++; + } +} + LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { return wrap(ArrayType::get(unwrap(ElementType), ElementCount)); } @@ -587,6 +595,10 @@ LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) { return wrap(cast(Ty)->getElementType()); } +unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp) { + return unwrap(Tp)->getNumContainedTypes(); +} + unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) { return unwrap(ArrayTy)->getNumElements(); } @@ -2743,11 +2755,14 @@ static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid AtomicOrdering value!"); } +// TODO: Should this and other atomic instructions support building with +// "syncscope"? LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, LLVMBool isSingleThread, const char *Name) { return wrap( unwrap(B)->CreateFence(mapFromLLVMOrdering(Ordering), - isSingleThread ? SingleThread : CrossThread, + isSingleThread ? SyncScope::SingleThread + : SyncScope::System, Name)); } @@ -3029,7 +3044,8 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break; } return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), - mapFromLLVMOrdering(ordering), singleThread ? SingleThread : CrossThread)); + mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread + : SyncScope::System)); } LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, @@ -3041,7 +3057,7 @@ LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, return wrap(unwrap(B)->CreateAtomicCmpXchg(unwrap(Ptr), unwrap(Cmp), unwrap(New), mapFromLLVMOrdering(SuccessOrdering), mapFromLLVMOrdering(FailureOrdering), - singleThread ? SingleThread : CrossThread)); + singleThread ? SyncScope::SingleThread : SyncScope::System)); } @@ -3049,17 +3065,18 @@ LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst) { Value *P = unwrap(AtomicInst); if (AtomicRMWInst *I = dyn_cast(P)) - return I->getSynchScope() == SingleThread; - return cast(P)->getSynchScope() == SingleThread; + return I->getSyncScopeID() == SyncScope::SingleThread; + return cast(P)->getSyncScopeID() == + SyncScope::SingleThread; } void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool NewValue) { Value *P = unwrap(AtomicInst); - SynchronizationScope Sync = NewValue ? SingleThread : CrossThread; + SyncScope::ID SSID = NewValue ? SyncScope::SingleThread : SyncScope::System; if (AtomicRMWInst *I = dyn_cast(P)) - return I->setSynchScope(Sync); - return cast(P)->setSynchScope(Sync); + return I->setSyncScopeID(SSID); + return cast(P)->setSyncScopeID(SSID); } LLVMAtomicOrdering LLVMGetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst) { diff --git a/interpreter/llvm/src/lib/IR/DIBuilder.cpp b/interpreter/llvm/src/lib/IR/DIBuilder.cpp index 7e6f9a7804b9d..bce28ba3b9506 100644 --- a/interpreter/llvm/src/lib/IR/DIBuilder.cpp +++ b/interpreter/llvm/src/lib/IR/DIBuilder.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DIBuilder.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" -#include "LLVMContextImpl.h" using namespace llvm; using namespace llvm::dwarf; @@ -39,6 +39,21 @@ void DIBuilder::trackIfUnresolved(MDNode *N) { UnresolvedNodes.emplace_back(N); } +void DIBuilder::finalizeSubprogram(DISubprogram *SP) { + MDTuple *Temp = SP->getVariables().get(); + if (!Temp || !Temp->isTemporary()) + return; + + SmallVector Variables; + + auto PV = PreservedVariables.find(SP); + if (PV != PreservedVariables.end()) + Variables.append(PV->second.begin(), PV->second.end()); + + DINodeArray AV = getOrCreateArray(Variables); + TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); +} + void DIBuilder::finalize() { if (!CUNode) { assert(!AllowUnresolvedNodes && @@ -62,25 +77,11 @@ void DIBuilder::finalize() { CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues)); DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms); - auto resolveVariables = [&](DISubprogram *SP) { - MDTuple *Temp = SP->getVariables().get(); - if (!Temp) - return; - - SmallVector Variables; - - auto PV = PreservedVariables.find(SP); - if (PV != PreservedVariables.end()) - Variables.append(PV->second.begin(), PV->second.end()); - - DINodeArray AV = getOrCreateArray(Variables); - TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); - }; for (auto *SP : SPs) - resolveVariables(SP); + finalizeSubprogram(SP); for (auto *N : RetainValues) if (auto *SP = dyn_cast(N)) - resolveVariables(SP); + finalizeSubprogram(SP); if (!AllGVs.empty()) CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs)); @@ -147,10 +148,13 @@ DICompileUnit *DIBuilder::createCompileUnit( static DIImportedEntity * createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context, - Metadata *NS, unsigned Line, StringRef Name, + Metadata *NS, DIFile *File, unsigned Line, StringRef Name, SmallVectorImpl &AllImportedModules) { + if (Line) + assert(File && "Source location has line number but no file"); unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size(); - auto *M = DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), Line, Name); + auto *M = + DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), File, Line, Name); if (EntitiesCount < C.pImpl->DIImportedEntitys.size()) // A new Imported Entity was just added to the context. // Add it to the Imported Modules list. @@ -159,33 +163,38 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context, } DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, - DINamespace *NS, + DINamespace *NS, DIFile *File, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, - Context, NS, Line, StringRef(), AllImportedModules); + Context, NS, File, Line, StringRef(), + AllImportedModules); } DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIImportedEntity *NS, - unsigned Line) { + DIFile *File, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, - Context, NS, Line, StringRef(), AllImportedModules); + Context, NS, File, Line, StringRef(), + AllImportedModules); } DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M, - unsigned Line) { + DIFile *File, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, - Context, M, Line, StringRef(), AllImportedModules); + Context, M, File, Line, StringRef(), + AllImportedModules); } DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context, DINode *Decl, + DIFile *File, unsigned Line, StringRef Name) { // Make sure to use the unique identifier based metadata reference for // types that have one. return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration, - Context, Decl, Line, Name, AllImportedModules); + Context, Decl, File, Line, Name, + AllImportedModules); } DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory, diff --git a/interpreter/llvm/src/lib/IR/DataLayout.cpp b/interpreter/llvm/src/lib/IR/DataLayout.cpp index c117d29b7f694..5de281a952376 100644 --- a/interpreter/llvm/src/lib/IR/DataLayout.cpp +++ b/interpreter/llvm/src/lib/IR/DataLayout.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/DataLayout.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" @@ -307,7 +307,7 @@ void DataLayout::parseSpecifier(StringRef Desc) { case 'a': { AlignTypeEnum AlignType; switch (Specifier) { - default: + default: llvm_unreachable("Unexpected specifier!"); case 'i': AlignType = INTEGER_ALIGN; break; case 'v': AlignType = VECTOR_ALIGN; break; case 'f': AlignType = FLOAT_ALIGN; break; diff --git a/interpreter/llvm/src/lib/IR/DebugInfo.cpp b/interpreter/llvm/src/lib/IR/DebugInfo.cpp index ca3828420a72f..56cec57a4d070 100644 --- a/interpreter/llvm/src/lib/IR/DebugInfo.cpp +++ b/interpreter/llvm/src/lib/IR/DebugInfo.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" @@ -20,7 +21,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" diff --git a/interpreter/llvm/src/lib/IR/DebugInfoMetadata.cpp b/interpreter/llvm/src/lib/IR/DebugInfoMetadata.cpp index e6c49cad0722f..c14940bad45db 100644 --- a/interpreter/llvm/src/lib/IR/DebugInfoMetadata.cpp +++ b/interpreter/llvm/src/lib/IR/DebugInfoMetadata.cpp @@ -598,8 +598,7 @@ unsigned DIExpression::ExprOperand::getSize() const { case dwarf::DW_OP_LLVM_fragment: return 3; case dwarf::DW_OP_constu: - case dwarf::DW_OP_plus: - case dwarf::DW_OP_minus: + case dwarf::DW_OP_plus_uconst: return 2; default: return 1; @@ -641,6 +640,7 @@ bool DIExpression::isValid() const { break; } case dwarf::DW_OP_constu: + case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: case dwarf::DW_OP_deref: @@ -664,11 +664,12 @@ DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) { void DIExpression::appendOffset(SmallVectorImpl &Ops, int64_t Offset) { if (Offset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); } else if (Offset < 0) { - Ops.push_back(dwarf::DW_OP_minus); + Ops.push_back(dwarf::DW_OP_constu); Ops.push_back(-Offset); + Ops.push_back(dwarf::DW_OP_minus); } } @@ -677,16 +678,23 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const { Offset = 0; return true; } - if (getNumElements() != 2) - return false; - if (Elements[0] == dwarf::DW_OP_plus) { + + if (getNumElements() == 2 && Elements[0] == dwarf::DW_OP_plus_uconst) { Offset = Elements[1]; return true; } - if (Elements[0] == dwarf::DW_OP_minus) { - Offset = -Elements[1]; - return true; + + if (getNumElements() == 3 && Elements[0] == dwarf::DW_OP_constu) { + if (Elements[2] == dwarf::DW_OP_plus) { + Offset = Elements[1]; + return true; + } + if (Elements[2] == dwarf::DW_OP_minus) { + Offset = -Elements[1]; + return true; + } } + return false; } @@ -752,12 +760,13 @@ DIObjCProperty *DIObjCProperty::getImpl( DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity, - unsigned Line, MDString *Name, - StorageType Storage, + Metadata *File, unsigned Line, + MDString *Name, StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); - DEFINE_GETIMPL_LOOKUP(DIImportedEntity, (Tag, Scope, Entity, Line, Name)); - Metadata *Ops[] = {Scope, Entity, Name}; + DEFINE_GETIMPL_LOOKUP(DIImportedEntity, + (Tag, Scope, Entity, File, Line, Name)); + Metadata *Ops[] = {Scope, Entity, Name, File}; DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops); } diff --git a/interpreter/llvm/src/lib/IR/DebugLoc.cpp b/interpreter/llvm/src/lib/IR/DebugLoc.cpp index 3168ec6944a3a..6297395b4c009 100644 --- a/interpreter/llvm/src/lib/IR/DebugLoc.cpp +++ b/interpreter/llvm/src/lib/IR/DebugLoc.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/IntrinsicInst.h" #include "LLVMContextImpl.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IntrinsicInst.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -99,87 +99,6 @@ DebugLoc DebugLoc::appendInlinedAt(DebugLoc DL, DILocation *InlinedAt, return Last; } -/// Reparent \c Scope from \c OrigSP to \c NewSP. -static DIScope *reparentScope(LLVMContext &Ctx, DIScope *Scope, - DISubprogram *OrigSP, DISubprogram *NewSP, - DenseMap &Cache) { - SmallVector ScopeChain; - DIScope *Last = NewSP; - DIScope *CurScope = Scope; - do { - if (auto *SP = dyn_cast(CurScope)) { - // Don't rewrite this scope chain if it doesn't lead to the replaced SP. - if (SP != OrigSP) - return Scope; - Cache.insert({OrigSP, NewSP}); - break; - } - if (auto *Found = Cache[CurScope]) { - Last = cast(Found); - break; - } - ScopeChain.push_back(CurScope); - } while ((CurScope = CurScope->getScope().resolve())); - - // Starting from the top, rebuild the nodes to point to the new inlined-at - // location (then rebuilding the rest of the chain behind it) and update the - // map of already-constructed inlined-at nodes. - for (const DIScope *MD : reverse(ScopeChain)) { - if (auto *LB = dyn_cast(MD)) - Cache[MD] = Last = DILexicalBlock::getDistinct( - Ctx, Last, LB->getFile(), LB->getLine(), LB->getColumn()); - else if (auto *LB = dyn_cast(MD)) - Cache[MD] = Last = DILexicalBlockFile::getDistinct( - Ctx, Last, LB->getFile(), LB->getDiscriminator()); - else - llvm_unreachable("illegal parent scope"); - } - return Last; -} - -void DebugLoc::reparentDebugInfo(Instruction &I, DISubprogram *OrigSP, - DISubprogram *NewSP, - DenseMap &Cache) { - auto DL = I.getDebugLoc(); - if (!OrigSP || !NewSP || OrigSP == NewSP || !DL) - return; - - // Reparent the debug location. - auto &Ctx = I.getContext(); - DILocation *InlinedAt = DL->getInlinedAt(); - if (InlinedAt) { - while (auto *IA = InlinedAt->getInlinedAt()) - InlinedAt = IA; - auto NewScope = - reparentScope(Ctx, InlinedAt->getScope(), OrigSP, NewSP, Cache); - InlinedAt = - DebugLoc::get(InlinedAt->getLine(), InlinedAt->getColumn(), NewScope); - } - I.setDebugLoc( - DebugLoc::get(DL.getLine(), DL.getCol(), - reparentScope(Ctx, DL->getScope(), OrigSP, NewSP, Cache), - DebugLoc::appendInlinedAt(DL, InlinedAt, Ctx, Cache, - ReplaceLastInlinedAt))); - - // Fix up debug variables to point to NewSP. - auto reparentVar = [&](DILocalVariable *Var) { - return DILocalVariable::getDistinct( - Ctx, - cast( - reparentScope(Ctx, Var->getScope(), OrigSP, NewSP, Cache)), - Var->getName(), Var->getFile(), Var->getLine(), Var->getType(), - Var->getArg(), Var->getFlags(), Var->getAlignInBits()); - }; - if (auto *DbgValue = dyn_cast(&I)) { - auto *Var = DbgValue->getVariable(); - I.setOperand(2, MetadataAsValue::get(Ctx, reparentVar(Var))); - } else if (auto *DbgDeclare = dyn_cast(&I)) { - auto *Var = DbgDeclare->getVariable(); - I.setOperand(1, MetadataAsValue::get(Ctx, reparentVar(Var))); - } -} - - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void DebugLoc::dump() const { if (!Loc) diff --git a/interpreter/llvm/src/lib/IR/DiagnosticInfo.cpp b/interpreter/llvm/src/lib/IR/DiagnosticInfo.cpp index 395b6158e0c86..5129d6b9b008e 100644 --- a/interpreter/llvm/src/lib/IR/DiagnosticInfo.cpp +++ b/interpreter/llvm/src/lib/IR/DiagnosticInfo.cpp @@ -13,19 +13,30 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DiagnosticInfo.h" -#include "LLVMContextImpl.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" #include +#include +#include #include using namespace llvm; @@ -53,6 +64,8 @@ struct PassRemarksOpt { } }; +} // end anonymous namespace + static PassRemarksOpt PassRemarksOptLoc; static PassRemarksOpt PassRemarksMissedOptLoc; static PassRemarksOpt PassRemarksAnalysisOptLoc; @@ -85,7 +98,6 @@ PassRemarksAnalysis( "the given regular expression"), cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired, cl::ZeroOrMore); -} int llvm::getNextAvailablePluginDiagnosticKind() { static std::atomic PluginKindID(DK_FirstPluginKind); @@ -97,8 +109,7 @@ const char *OptimizationRemarkAnalysis::AlwaysPrint = ""; DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I, const Twine &MsgStr, DiagnosticSeverity Severity) - : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(0), MsgStr(MsgStr), - Instr(&I) { + : DiagnosticInfo(DK_InlineAsm, Severity), MsgStr(MsgStr), Instr(&I) { if (const MDNode *SrcLoc = I.getMetadata("srcloc")) { if (SrcLoc->getNumOperands() != 0) if (const auto *CI = @@ -193,7 +204,7 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, const Value *V // Only include names that correspond to user variables. FIXME: we should use // debug info if available to get the name of the user variable. if (isa(V) || isa(V)) - Val = GlobalValue::getRealLinkageName(V->getName()); + Val = GlobalValue::dropLLVMManglingEscape(V->getName()); else if (isa(V)) { raw_string_ostream OS(Val); V->printAsOperand(OS, /*PrintType=*/false); diff --git a/interpreter/llvm/src/lib/IR/DiagnosticPrinter.cpp b/interpreter/llvm/src/lib/IR/DiagnosticPrinter.cpp index 659ff49d623f8..ee2df9e24f939 100644 --- a/interpreter/llvm/src/lib/IR/DiagnosticPrinter.cpp +++ b/interpreter/llvm/src/lib/IR/DiagnosticPrinter.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Twine.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/IR/Dominators.cpp b/interpreter/llvm/src/lib/IR/Dominators.cpp index 44948cc5831d7..4d7e3040ecd7f 100644 --- a/interpreter/llvm/src/lib/IR/Dominators.cpp +++ b/interpreter/llvm/src/lib/IR/Dominators.cpp @@ -61,17 +61,30 @@ bool BasicBlockEdge::isSingleEdge() const { //===----------------------------------------------------------------------===// template class llvm::DomTreeNodeBase; -template class llvm::DominatorTreeBase; - -template void llvm::Calculate( - DominatorTreeBase< - typename std::remove_pointer::NodeRef>::type> - &DT, - Function &F); -template void llvm::Calculate>( - DominatorTreeBase>::NodeRef>::type> &DT, - Function &F); +template class llvm::DominatorTreeBase; // DomTreeBase +template class llvm::DominatorTreeBase; // PostDomTreeBase + +template void +llvm::DomTreeBuilder::Calculate( + DomTreeBuilder::BBDomTree &DT, Function &F); +template void +llvm::DomTreeBuilder::Calculate( + DomTreeBuilder::BBPostDomTree &DT, Function &F); + +template void llvm::DomTreeBuilder::InsertEdge( + DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To); +template void llvm::DomTreeBuilder::InsertEdge( + DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To); + +template void llvm::DomTreeBuilder::DeleteEdge( + DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To); +template void llvm::DomTreeBuilder::DeleteEdge( + DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To); + +template bool llvm::DomTreeBuilder::Verify( + const DomTreeBuilder::BBDomTree &DT); +template bool llvm::DomTreeBuilder::Verify( + const DomTreeBuilder::BBPostDomTree &DT); bool DominatorTree::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &) { @@ -150,12 +163,6 @@ bool DominatorTree::dominates(const Instruction *Def, bool DominatorTree::dominates(const BasicBlockEdge &BBE, const BasicBlock *UseBB) const { - // Assert that we have a single edge. We could handle them by simply - // returning false, but since isSingleEdge is linear on the number of - // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge() && - "This function is not efficient in handling multiple edges"); - // If the BB the edge ends in doesn't dominate the use BB, then the // edge also doesn't. const BasicBlock *Start = BBE.getStart(); @@ -188,11 +195,17 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, // trivially dominates itself, so we only have to find if it dominates the // other predecessors. Since the only way out of X is via NormalDest, X can // only properly dominate a node if NormalDest dominates that node too. + int IsDuplicateEdge = 0; for (const_pred_iterator PI = pred_begin(End), E = pred_end(End); PI != E; ++PI) { const BasicBlock *BB = *PI; - if (BB == Start) + if (BB == Start) { + // If there are multiple edges between Start and End, by definition they + // can't dominate anything. + if (IsDuplicateEdge++) + return false; continue; + } if (!dominates(End, BB)) return false; @@ -201,12 +214,6 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, } bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const { - // Assert that we have a single edge. We could handle them by simply - // returning false, but since isSingleEdge is linear on the number of - // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge() && - "This function is not efficient in handling multiple edges"); - Instruction *UserInst = cast(U.getUser()); // A PHI in the end of the edge is dominated by it. PHINode *PN = dyn_cast(UserInst); @@ -291,6 +298,13 @@ bool DominatorTree::isReachableFromEntry(const Use &U) const { } void DominatorTree::verifyDomTree() const { + // Perform the expensive checks only when VerifyDomInfo is set. + if (VerifyDomInfo && !verify()) { + errs() << "\n~~~~~~~~~~~\n\t\tDomTree verification failed!\n~~~~~~~~~~~\n"; + print(errs()); + abort(); + } + Function &F = *getRoot()->getParent(); DominatorTree OtherDT; diff --git a/interpreter/llvm/src/lib/IR/Function.cpp b/interpreter/llvm/src/lib/IR/Function.cpp index 16a9e51b83069..85a019856c017 100644 --- a/interpreter/llvm/src/lib/IR/Function.cpp +++ b/interpreter/llvm/src/lib/IR/Function.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Function.h" #include "LLVMContextImpl.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/ValueTypes.h" @@ -29,7 +30,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" @@ -66,8 +66,6 @@ template class llvm::SymbolTableListTraits; // Argument Implementation //===----------------------------------------------------------------------===// -void Argument::anchor() {} - Argument::Argument(Type *Ty, const Twine &Name, Function *Par, unsigned ArgNo) : Value(Ty, Value::ArgumentVal), Parent(Par), ArgNo(ArgNo) { setName(Name); @@ -120,15 +118,13 @@ unsigned Argument::getParamAlignment() const { uint64_t Argument::getDereferenceableBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableBytes(getArgNo() + - AttributeList::FirstArgIndex); + return getParent()->getParamDereferenceableBytes(getArgNo()); } uint64_t Argument::getDereferenceableOrNullBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableOrNullBytes( - getArgNo() + AttributeList::FirstArgIndex); + return getParent()->getParamDereferenceableOrNullBytes(getArgNo()); } bool Argument::hasNestAttr() const { @@ -171,21 +167,20 @@ bool Argument::onlyReadsMemory() const { void Argument::addAttrs(AttrBuilder &B) { AttributeList AL = getParent()->getAttributes(); - AL = AL.addAttributes(Parent->getContext(), - getArgNo() + AttributeList::FirstArgIndex, B); + AL = AL.addParamAttributes(Parent->getContext(), getArgNo(), B); getParent()->setAttributes(AL); } void Argument::addAttr(Attribute::AttrKind Kind) { - getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind); + getParent()->addParamAttr(getArgNo(), Kind); } void Argument::addAttr(Attribute Attr) { - getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Attr); + getParent()->addParamAttr(getArgNo(), Attr); } void Argument::removeAttr(Attribute::AttrKind Kind) { - getParent()->removeAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind); + getParent()->removeParamAttr(getArgNo(), Kind); } bool Argument::hasAttribute(Attribute::AttrKind Kind) const { @@ -367,6 +362,24 @@ void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) { setAttributes(PAL); } +void Function::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void Function::addParamAttr(unsigned ArgNo, Attribute Attr) { + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr); + setAttributes(PAL); +} + +void Function::addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) { + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttributes(getContext(), ArgNo, Attrs); + setAttributes(PAL); +} + void Function::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -385,18 +398,49 @@ void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) { setAttributes(PAL); } +void Function::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void Function::removeParamAttr(unsigned ArgNo, StringRef Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void Function::removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs); + setAttributes(PAL); +} + void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); setAttributes(PAL); } +void Function::addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes) { + AttributeList PAL = getAttributes(); + PAL = PAL.addDereferenceableParamAttr(getContext(), ArgNo, Bytes); + setAttributes(PAL); +} + void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes); setAttributes(PAL); } +void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo, + uint64_t Bytes) { + AttributeList PAL = getAttributes(); + PAL = PAL.addDereferenceableOrNullParamAttr(getContext(), ArgNo, Bytes); + setAttributes(PAL); +} + const std::string &Function::getGC() const { assert(hasGC() && "Function has no collector"); return getContext().getGC(*this); diff --git a/interpreter/llvm/src/lib/IR/Globals.cpp b/interpreter/llvm/src/lib/IR/Globals.cpp index 10ba1a64ad0c6..afd4a36270a87 100644 --- a/interpreter/llvm/src/lib/IR/Globals.cpp +++ b/interpreter/llvm/src/lib/IR/Globals.cpp @@ -12,10 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "LLVMContextImpl.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" @@ -24,7 +25,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "LLVMContextImpl.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -255,7 +255,7 @@ bool GlobalValue::canIncreaseAlignment() const { const GlobalObject *GlobalValue::getBaseObject() const { if (auto *GO = dyn_cast(this)) return GO; - if (auto *GA = dyn_cast(this)) + if (auto *GA = dyn_cast(this)) return GA->getBaseObject(); return nullptr; } @@ -293,6 +293,8 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) && + "invalid type for global variable"); setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && @@ -311,6 +313,8 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) && + "invalid type for global variable"); setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && diff --git a/interpreter/llvm/src/lib/IR/IRBuilder.cpp b/interpreter/llvm/src/lib/IR/IRBuilder.cpp index 3477c087967f0..b7fa07c6ffac7 100644 --- a/interpreter/llvm/src/lib/IR/IRBuilder.cpp +++ b/interpreter/llvm/src/lib/IR/IRBuilder.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" @@ -134,6 +134,37 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, return CI; } +CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( + Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { + Dst = getCastedInt8PtrValue(Dst); + Src = getCastedInt8PtrValue(Src); + + Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)}; + Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()}; + Module *M = BB->getParent()->getParent(); + Value *TheFn = Intrinsic::getDeclaration( + M, Intrinsic::memcpy_element_unordered_atomic, Tys); + + CallInst *CI = createCallHelper(TheFn, Ops, this); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + return CI; +} + CallInst *IRBuilderBase:: CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, @@ -381,8 +412,11 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align, Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), NumElts)); + if (!PassThru) + PassThru = UndefValue::get(DataTy); + Type *OverloadedTypes[] = {DataTy, PtrsTy}; - Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)}; + Value * Ops[] = {Ptrs, getInt32(Align), Mask, PassThru}; // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. diff --git a/interpreter/llvm/src/lib/IR/InlineAsm.cpp b/interpreter/llvm/src/lib/IR/InlineAsm.cpp index 8feeeb65d445e..ad22efdf0effb 100644 --- a/interpreter/llvm/src/lib/IR/InlineAsm.cpp +++ b/interpreter/llvm/src/lib/IR/InlineAsm.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/InlineAsm.h" #include "ConstantsContext.h" #include "LLVMContextImpl.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" @@ -40,10 +40,6 @@ InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString, "Function type not legal for constraints!"); } -// Implement the first virtual method in this class in this file so the -// InlineAsm vtable is emitted here. -InlineAsm::~InlineAsm() = default; - InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString, StringRef Constraints, bool hasSideEffects, bool isAlignStack, AsmDialect asmDialect) { diff --git a/interpreter/llvm/src/lib/IR/Instruction.cpp b/interpreter/llvm/src/lib/IR/Instruction.cpp index 91b9d9232b547..365cb019aec43 100644 --- a/interpreter/llvm/src/lib/IR/Instruction.cpp +++ b/interpreter/llvm/src/lib/IR/Instruction.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "llvm/IR/Instruction.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" using namespace llvm; @@ -43,8 +43,6 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, InsertAtEnd->getInstList().push_back(this); } - -// Out of line virtual method, so the vtable, etc has a home. Instruction::~Instruction() { assert(!Parent && "Instruction still linked in the program!"); if (hasMetadataHashEntry()) @@ -218,10 +216,10 @@ void Instruction::copyFastMathFlags(const Instruction *I) { copyFastMathFlags(I->getFastMathFlags()); } -void Instruction::copyIRFlags(const Value *V) { +void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) { // Copy the wrapping flags. - if (auto *OB = dyn_cast(V)) { - if (isa(this)) { + if (IncludeWrapFlags && isa(this)) { + if (auto *OB = dyn_cast(V)) { setHasNoSignedWrap(OB->hasNoSignedWrap()); setHasNoUnsignedWrap(OB->hasNoUnsignedWrap()); } @@ -364,13 +362,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, (LI->getAlignment() == cast(I2)->getAlignment() || IgnoreAlignment) && LI->getOrdering() == cast(I2)->getOrdering() && - LI->getSynchScope() == cast(I2)->getSynchScope(); + LI->getSyncScopeID() == cast(I2)->getSyncScopeID(); if (const StoreInst *SI = dyn_cast(I1)) return SI->isVolatile() == cast(I2)->isVolatile() && (SI->getAlignment() == cast(I2)->getAlignment() || IgnoreAlignment) && SI->getOrdering() == cast(I2)->getOrdering() && - SI->getSynchScope() == cast(I2)->getSynchScope(); + SI->getSyncScopeID() == cast(I2)->getSyncScopeID(); if (const CmpInst *CI = dyn_cast(I1)) return CI->getPredicate() == cast(I2)->getPredicate(); if (const CallInst *CI = dyn_cast(I1)) @@ -388,7 +386,7 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, return EVI->getIndices() == cast(I2)->getIndices(); if (const FenceInst *FI = dyn_cast(I1)) return FI->getOrdering() == cast(I2)->getOrdering() && - FI->getSynchScope() == cast(I2)->getSynchScope(); + FI->getSyncScopeID() == cast(I2)->getSyncScopeID(); if (const AtomicCmpXchgInst *CXI = dyn_cast(I1)) return CXI->isVolatile() == cast(I2)->isVolatile() && CXI->isWeak() == cast(I2)->isWeak() && @@ -396,12 +394,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, cast(I2)->getSuccessOrdering() && CXI->getFailureOrdering() == cast(I2)->getFailureOrdering() && - CXI->getSynchScope() == cast(I2)->getSynchScope(); + CXI->getSyncScopeID() == + cast(I2)->getSyncScopeID(); if (const AtomicRMWInst *RMWI = dyn_cast(I1)) return RMWI->getOperation() == cast(I2)->getOperation() && RMWI->isVolatile() == cast(I2)->isVolatile() && RMWI->getOrdering() == cast(I2)->getOrdering() && - RMWI->getSynchScope() == cast(I2)->getSynchScope(); + RMWI->getSyncScopeID() == cast(I2)->getSyncScopeID(); return true; } diff --git a/interpreter/llvm/src/lib/IR/Instructions.cpp b/interpreter/llvm/src/lib/IR/Instructions.cpp index ed3ca5738ac95..2c49564e328bd 100644 --- a/interpreter/llvm/src/lib/IR/Instructions.cpp +++ b/interpreter/llvm/src/lib/IR/Instructions.cpp @@ -1,4 +1,4 @@ -//===-- Instructions.cpp - Implement the LLVM instructions ----------------===// +//===- Instructions.cpp - Implement the LLVM instructions -----------------===// // // The LLVM Compiler Infrastructure // @@ -14,16 +14,34 @@ #include "llvm/IR/Instructions.h" #include "LLVMContextImpl.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include +#include +#include +#include + using namespace llvm; //===----------------------------------------------------------------------===// @@ -41,15 +59,11 @@ User::op_iterator CallSite::getCallee() const { // TerminatorInst Class //===----------------------------------------------------------------------===// -// Out of line virtual method, so the vtable, etc has a home. -TerminatorInst::~TerminatorInst() { -} - unsigned TerminatorInst::getNumSuccessors() const { switch (getOpcode()) { #define HANDLE_TERM_INST(N, OPC, CLASS) \ case Instruction::OPC: \ - return static_cast(this)->getNumSuccessorsV(); + return static_cast(this)->getNumSuccessors(); #include "llvm/IR/Instruction.def" default: break; @@ -61,7 +75,7 @@ BasicBlock *TerminatorInst::getSuccessor(unsigned idx) const { switch (getOpcode()) { #define HANDLE_TERM_INST(N, OPC, CLASS) \ case Instruction::OPC: \ - return static_cast(this)->getSuccessorV(idx); + return static_cast(this)->getSuccessor(idx); #include "llvm/IR/Instruction.def" default: break; @@ -73,7 +87,7 @@ void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) { switch (getOpcode()) { #define HANDLE_TERM_INST(N, OPC, CLASS) \ case Instruction::OPC: \ - return static_cast(this)->setSuccessorV(idx, B); + return static_cast(this)->setSuccessor(idx, B); #include "llvm/IR/Instruction.def" default: break; @@ -81,14 +95,6 @@ void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) { llvm_unreachable("not a terminator"); } -//===----------------------------------------------------------------------===// -// UnaryInstruction Class -//===----------------------------------------------------------------------===// - -// Out of line virtual method, so the vtable, etc has a home. -UnaryInstruction::~UnaryInstruction() { -} - //===----------------------------------------------------------------------===// // SelectInst Class //===----------------------------------------------------------------------===// @@ -118,13 +124,10 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { return nullptr; } - //===----------------------------------------------------------------------===// // PHINode Class //===----------------------------------------------------------------------===// -void PHINode::anchor() {} - PHINode::PHINode(const PHINode &PN) : Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()), ReservedSpace(PN.getNumOperands()) { @@ -278,9 +281,6 @@ void LandingPadInst::addClause(Constant *Val) { // CallInst Implementation //===----------------------------------------------------------------------===// -CallInst::~CallInst() { -} - void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, ArrayRef Bundles, const Twine &NameStr) { this->FTy = FTy; @@ -393,7 +393,17 @@ void CallInst::addAttribute(unsigned i, Attribute Attr) { } void CallInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void CallInst::addParamAttr(unsigned ArgNo, Attribute Attr) { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr); + setAttributes(PAL); } void CallInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { @@ -409,7 +419,17 @@ void CallInst::removeAttribute(unsigned i, StringRef Kind) { } void CallInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void CallInst::removeParamAttr(unsigned ArgNo, StringRef Kind) { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); } void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { @@ -454,6 +474,9 @@ bool CallInst::dataOperandHasImpliedAttr(unsigned i, // question is a call argument; or be indirectly implied by the kind of its // containing operand bundle, if the operand is a bundle operand. + if (i == AttributeList::ReturnIndex) + return hasRetAttr(Kind); + // FIXME: Avoid these i - 1 calculations and update the API to use zero-based // indices. if (i < (getNumArgOperands() + 1)) @@ -577,7 +600,6 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, ArraySize, OpB, MallocF, Name); } - /// CreateMalloc - Generate the IR for a call to malloc: /// 1. Compute the malloc call's argument as the specified type's size, /// possibly multiplied by the array size if the array size is not @@ -725,16 +747,6 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, return NewII; } -BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned InvokeInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) { - return setSuccessor(idx, B); -} - Value *InvokeInst::getReturnedArgOperand() const { unsigned Index; @@ -778,6 +790,9 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i, // question is an invoke argument; or be indirectly implied by the kind of its // containing operand bundle, if the operand is a bundle operand. + if (i == AttributeList::ReturnIndex) + return hasRetAttr(Kind); + // FIXME: Avoid these i - 1 calculations and update the API to use zero-based // indices. if (i < (getNumArgOperands() + 1)) @@ -801,7 +816,9 @@ void InvokeInst::addAttribute(unsigned i, Attribute Attr) { } void InvokeInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); } void InvokeInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { @@ -817,7 +834,9 @@ void InvokeInst::removeAttribute(unsigned i, StringRef Kind) { } void InvokeInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); } void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { @@ -857,6 +876,7 @@ ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore) if (retVal) Op<0>() = retVal; } + ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(C), Instruction::Ret, OperandTraits::op_end(this) - !!retVal, !!retVal, @@ -864,28 +884,12 @@ ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd) if (retVal) Op<0>() = retVal; } + ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret, OperandTraits::op_end(this), 0, InsertAtEnd) { } -unsigned ReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to -/// emit the vtable for the class in this translation unit. -void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("ReturnInst has no successors!"); -} - -BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("ReturnInst has no successors!"); -} - -ReturnInst::~ReturnInst() { -} - //===----------------------------------------------------------------------===// // ResumeInst Implementation //===----------------------------------------------------------------------===// @@ -908,18 +912,6 @@ ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd) Op<0>() = Exn; } -unsigned ResumeInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("ResumeInst has no successors!"); -} - -BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("ResumeInst has no successors!"); -} - //===----------------------------------------------------------------------===// // CleanupReturnInst Implementation //===----------------------------------------------------------------------===// @@ -962,18 +954,6 @@ CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, init(CleanupPad, UnwindBB); } -BasicBlock *CleanupReturnInst::getSuccessorV(unsigned Idx) const { - assert(Idx == 0); - return getUnwindDest(); -} -unsigned CleanupReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void CleanupReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { - assert(Idx == 0); - setUnwindDest(B); -} - //===----------------------------------------------------------------------===// // CatchReturnInst Implementation //===----------------------------------------------------------------------===// @@ -1005,18 +985,6 @@ CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB, init(CatchPad, BB); } -BasicBlock *CatchReturnInst::getSuccessorV(unsigned Idx) const { - assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); - return getSuccessor(); -} -unsigned CatchReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void CatchReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { - assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); - setSuccessor(B); -} - //===----------------------------------------------------------------------===// // CatchSwitchInst Implementation //===----------------------------------------------------------------------===// @@ -1100,16 +1068,6 @@ void CatchSwitchInst::removeHandler(handler_iterator HI) { setNumHungOffUseOperands(getNumOperands() - 1); } -BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned CatchSwitchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void CatchSwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // FuncletPadInst Implementation //===----------------------------------------------------------------------===// @@ -1162,18 +1120,6 @@ UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd) nullptr, 0, InsertAtEnd) { } -unsigned UnreachableInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("UnreachableInst has no successors!"); -} - -BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("UnreachableInst has no successors!"); -} - //===----------------------------------------------------------------------===// // BranchInst Implementation //===----------------------------------------------------------------------===// @@ -1191,6 +1137,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore) assert(IfTrue && "Branch destination may not be null!"); Op<-1>() = IfTrue; } + BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, Instruction *InsertBefore) : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, @@ -1225,7 +1172,6 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, #endif } - BranchInst::BranchInst(const BranchInst &BI) : TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br, OperandTraits::op_end(this) - BI.getNumOperands(), @@ -1249,17 +1195,6 @@ void BranchInst::swapSuccessors() { swapProfMetadata(); } -BasicBlock *BranchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned BranchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - - //===----------------------------------------------------------------------===// // AllocaInst Implementation //===----------------------------------------------------------------------===// @@ -1314,10 +1249,6 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, setName(Name); } -// Out of line virtual method, so the vtable, etc has a home. -AllocaInst::~AllocaInst() { -} - void AllocaInst::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); assert(Align <= MaximumAlignment && @@ -1373,34 +1304,34 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, Instruction *InsertBef) : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertBef) {} + SyncScope::System, InsertBef) {} LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, BasicBlock *InsertAE) : LoadInst(Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertAE) {} + SyncScope::System, InsertAE) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, Instruction *InsertBef) + SyncScope::ID SSID, Instruction *InsertBef) : UnaryInstruction(Ty, Load, Ptr, InsertBef) { assert(Ty == cast(Ptr->getType())->getElementType()); setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); setName(Name); } LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAE) : UnaryInstruction(cast(Ptr->getType())->getElementType(), Load, Ptr, InsertAE) { setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); setName(Name); } @@ -1488,16 +1419,16 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, Instruction *InsertBefore) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertBefore) {} + SyncScope::System, InsertBefore) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, BasicBlock *InsertAtEnd) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertAtEnd) {} + SyncScope::System, InsertAtEnd) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(val->getContext()), Store, OperandTraits::op_begin(this), @@ -1507,13 +1438,13 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); } StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(val->getContext()), Store, OperandTraits::op_begin(this), @@ -1523,7 +1454,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); } @@ -1543,13 +1474,13 @@ void StoreInst::setAlignment(unsigned Align) { void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { Op<0>() = Ptr; Op<1>() = Cmp; Op<2>() = NewVal; setSuccessOrdering(SuccessOrdering); setFailureOrdering(FailureOrdering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); assert(getOperand(0) && getOperand(1) && getOperand(2) && "All operands must be non-null!"); @@ -1576,25 +1507,25 @@ void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction( StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertBefore) { - Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); + Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID); } AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction( StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertAtEnd) { - Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); + Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID); } //===----------------------------------------------------------------------===// @@ -1603,12 +1534,12 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { Op<0>() = Ptr; Op<1>() = Val; setOperation(Operation); setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); assert(getOperand(0) && getOperand(1) && "All operands must be non-null!"); @@ -1623,24 +1554,24 @@ void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Val->getType(), AtomicRMW, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertBefore) { - Init(Operation, Ptr, Val, Ordering, SynchScope); + Init(Operation, Ptr, Val, Ordering, SSID); } AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Val->getType(), AtomicRMW, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertAtEnd) { - Init(Operation, Ptr, Val, Ordering, SynchScope); + Init(Operation, Ptr, Val, Ordering, SSID); } //===----------------------------------------------------------------------===// @@ -1648,27 +1579,25 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, //===----------------------------------------------------------------------===// FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } //===----------------------------------------------------------------------===// // GetElementPtrInst Implementation //===----------------------------------------------------------------------===// -void GetElementPtrInst::anchor() {} - void GetElementPtrInst::init(Value *Ptr, ArrayRef IdxList, const Twine &Name) { assert(getNumOperands() == 1 + IdxList.size() && @@ -1805,14 +1734,12 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, setName(Name); } - bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy()) return false; return true; } - //===----------------------------------------------------------------------===// // InsertElementInst Implementation //===----------------------------------------------------------------------===// @@ -1859,7 +1786,6 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, return true; } - //===----------------------------------------------------------------------===// // ShuffleVectorInst Implementation //===----------------------------------------------------------------------===// @@ -1972,7 +1898,6 @@ void ShuffleVectorInst::getShuffleMask(Constant *Mask, } } - //===----------------------------------------------------------------------===// // InsertValueInst Class //===----------------------------------------------------------------------===// @@ -1985,7 +1910,7 @@ void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef Idxs, // (other than weirdness with &*IdxBegin being invalid; see // getelementptr's init routine for example). But there's no // present need to support it. - assert(Idxs.size() > 0 && "InsertValueInst must have at least one index"); + assert(!Idxs.empty() && "InsertValueInst must have at least one index"); assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) == Val->getType() && "Inserted value must match indexed type!"); @@ -2014,7 +1939,7 @@ void ExtractValueInst::init(ArrayRef Idxs, const Twine &Name) { // There's no fundamental reason why we require at least one index. // But there's no present need to support it. - assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index"); + assert(!Idxs.empty() && "ExtractValueInst must have at least one index"); Indices.append(Idxs.begin(), Idxs.end()); setName(Name); @@ -2070,8 +1995,8 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, InsertBefore) { Op<0>() = S1; Op<1>() = S2; - init(iType); setName(Name); + AssertOK(); } BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, @@ -2083,18 +2008,17 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, InsertAtEnd) { Op<0>() = S1; Op<1>() = S2; - init(iType); setName(Name); + AssertOK(); } - -void BinaryOperator::init(BinaryOps iType) { +void BinaryOperator::AssertOK() { Value *LHS = getOperand(0), *RHS = getOperand(1); (void)LHS; (void)RHS; // Silence warnings. assert(LHS->getType() == RHS->getType() && "Binary operator operand types must match!"); #ifndef NDEBUG - switch (iType) { + switch (getOpcode()) { case Add: case Sub: case Mul: assert(getType() == LHS->getType() && @@ -2114,8 +2038,7 @@ void BinaryOperator::init(BinaryOps iType) { case SDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isIntegerTy() || (getType()->isVectorTy() && - cast(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Incorrect operand type (not integer) for S/UDIV"); break; case FDiv: @@ -2128,8 +2051,7 @@ void BinaryOperator::init(BinaryOps iType) { case SRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isIntegerTy() || (getType()->isVectorTy() && - cast(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Incorrect operand type (not integer) for S/UREM"); break; case FRem: @@ -2143,22 +2065,17 @@ void BinaryOperator::init(BinaryOps iType) { case AShr: assert(getType() == LHS->getType() && "Shift operation should return same type as operands!"); - assert((getType()->isIntegerTy() || - (getType()->isVectorTy() && - cast(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Tried to create a shift operation on a non-integral type!"); break; case And: case Or: case Xor: assert(getType() == LHS->getType() && "Logical operation should return same type as operands!"); - assert((getType()->isIntegerTy() || - (getType()->isVectorTy() && - cast(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Tried to create a logical operation on a non-integral type!"); break; - default: - break; + default: llvm_unreachable("Invalid opcode provided"); } #endif } @@ -2247,7 +2164,6 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, Op->getType(), Name, InsertAtEnd); } - // isConstantAllOnes - Helper function for several functions below static inline bool isConstantAllOnes(const Value *V) { if (const Constant *C = dyn_cast(V)) @@ -2313,7 +2229,6 @@ const Value *BinaryOperator::getNotArgument(const Value *BinOp) { return getNotArgument(const_cast(BinOp)); } - // Exchange the two operands to this instruction. This instruction is safe to // use on any binary instruction and does not modify the semantics of the // instruction. If the instruction is order-dependent (SetLT f.e.), the opcode @@ -2325,7 +2240,6 @@ bool BinaryOperator::swapOperands() { return false; } - //===----------------------------------------------------------------------===// // FPMathOperator Class //===----------------------------------------------------------------------===// @@ -2339,13 +2253,10 @@ float FPMathOperator::getFPAccuracy() const { return Accuracy->getValueAPF().convertToFloat(); } - //===----------------------------------------------------------------------===// // CastInst Class //===----------------------------------------------------------------------===// -void CastInst::anchor() {} - // Just determine if this cast only deals with integral->integral conversion. bool CastInst::isIntegerCast() const { switch (getOpcode()) { @@ -2601,13 +2512,12 @@ unsigned CastInst::isEliminableCastPair( return Instruction::BitCast; return 0; } - case 12: { + case 12: // addrspacecast, addrspacecast -> bitcast, if SrcAS == DstAS // addrspacecast, addrspacecast -> addrspacecast, if SrcAS != DstAS if (SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) return Instruction::AddrSpaceCast; return Instruction::BitCast; - } case 13: // FIXME: this state can be merged with (1), but the following assert // is useful to check the correcteness of the sequence due to semantic @@ -2628,7 +2538,6 @@ unsigned CastInst::isEliminableCastPair( DstTy->getScalarType()->getPointerElementType()) return Instruction::AddrSpaceCast; return 0; - case 15: // FIXME: this state can be merged with (1), but the following assert // is useful to check the correcteness of the sequence due to semantic @@ -3104,7 +3013,6 @@ CastInst::getCastOpcode( /// of the types involved. bool CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { - // Check for type sanity on the arguments Type *SrcTy = S->getType(); @@ -3156,16 +3064,14 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { if (VectorType *VT = dyn_cast(SrcTy)) if (VT->getNumElements() != cast(DstTy)->getNumElements()) return false; - return SrcTy->getScalarType()->isPointerTy() && - DstTy->getScalarType()->isIntegerTy(); + return SrcTy->isPtrOrPtrVectorTy() && DstTy->isIntOrIntVectorTy(); case Instruction::IntToPtr: if (isa(SrcTy) != isa(DstTy)) return false; if (VectorType *VT = dyn_cast(SrcTy)) if (VT->getNumElements() != cast(DstTy)->getNumElements()) return false; - return SrcTy->getScalarType()->isIntegerTy() && - DstTy->getScalarType()->isPointerTy(); + return SrcTy->isIntOrIntVectorTy() && DstTy->isPtrOrPtrVectorTy(); case Instruction::BitCast: { PointerType *SrcPtrTy = dyn_cast(SrcTy->getScalarType()); PointerType *DstPtrTy = dyn_cast(DstTy->getScalarType()); @@ -3377,8 +3283,6 @@ AddrSpaceCastInst::AddrSpaceCastInst( // CmpInst Classes //===----------------------------------------------------------------------===// -void CmpInst::anchor() {} - CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS, Value *RHS, const Twine &Name, Instruction *InsertBefore) : Instruction(ty, op, @@ -3453,7 +3357,6 @@ bool CmpInst::isEquality() const { return cast(this)->isEquality(); } - CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) { switch (pred) { default: llvm_unreachable("Unknown cmp predicate!"); @@ -3519,8 +3422,6 @@ StringRef CmpInst::getPredicateName(Predicate Pred) { } } -void ICmpInst::anchor() {} - ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) { switch (pred) { default: llvm_unreachable("Unknown icmp predicate!"); @@ -3773,17 +3674,6 @@ void SwitchInst::growOperands() { growHungoffUses(ReservedSpace); } - -BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned SwitchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // IndirectBrInst Implementation //===----------------------------------------------------------------------===// @@ -3863,16 +3753,6 @@ void IndirectBrInst::removeDestination(unsigned idx) { setNumHungOffUseOperands(NumOps-1); } -BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned IndirectBrInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // cloneImpl() implementations //===----------------------------------------------------------------------===// @@ -3915,12 +3795,12 @@ AllocaInst *AllocaInst::cloneImpl() const { LoadInst *LoadInst::cloneImpl() const { return new LoadInst(getOperand(0), Twine(), isVolatile(), - getAlignment(), getOrdering(), getSynchScope()); + getAlignment(), getOrdering(), getSyncScopeID()); } StoreInst *StoreInst::cloneImpl() const { return new StoreInst(getOperand(0), getOperand(1), isVolatile(), - getAlignment(), getOrdering(), getSynchScope()); + getAlignment(), getOrdering(), getSyncScopeID()); } @@ -3928,7 +3808,7 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { AtomicCmpXchgInst *Result = new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2), getSuccessOrdering(), getFailureOrdering(), - getSynchScope()); + getSyncScopeID()); Result->setVolatile(isVolatile()); Result->setWeak(isWeak()); return Result; @@ -3936,14 +3816,14 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { AtomicRMWInst *AtomicRMWInst::cloneImpl() const { AtomicRMWInst *Result = - new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1), - getOrdering(), getSynchScope()); + new AtomicRMWInst(getOperation(), getOperand(0), getOperand(1), + getOrdering(), getSyncScopeID()); Result->setVolatile(isVolatile()); return Result; } FenceInst *FenceInst::cloneImpl() const { - return new FenceInst(getContext(), getOrdering(), getSynchScope()); + return new FenceInst(getContext(), getOrdering(), getSyncScopeID()); } TruncInst *TruncInst::cloneImpl() const { diff --git a/interpreter/llvm/src/lib/IR/IntrinsicInst.cpp b/interpreter/llvm/src/lib/IR/IntrinsicInst.cpp index c9814a96bea69..8b12c55937f54 100644 --- a/interpreter/llvm/src/lib/IR/IntrinsicInst.cpp +++ b/interpreter/llvm/src/lib/IR/IntrinsicInst.cpp @@ -21,8 +21,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" @@ -97,7 +97,9 @@ Value *InstrProfIncrementInst::getStep() const { ConstrainedFPIntrinsic::RoundingMode ConstrainedFPIntrinsic::getRoundingMode() const { - Metadata *MD = dyn_cast(getOperand(2))->getMetadata(); + unsigned NumOperands = getNumArgOperands(); + Metadata *MD = + dyn_cast(getArgOperand(NumOperands - 2))->getMetadata(); if (!MD || !isa(MD)) return rmInvalid; StringRef RoundingArg = cast(MD)->getString(); @@ -115,7 +117,9 @@ ConstrainedFPIntrinsic::getRoundingMode() const { ConstrainedFPIntrinsic::ExceptionBehavior ConstrainedFPIntrinsic::getExceptionBehavior() const { - Metadata *MD = dyn_cast(getOperand(3))->getMetadata(); + unsigned NumOperands = getNumArgOperands(); + Metadata *MD = + dyn_cast(getArgOperand(NumOperands - 1))->getMetadata(); if (!MD || !isa(MD)) return ebInvalid; StringRef ExceptionArg = cast(MD)->getString(); @@ -125,3 +129,21 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const { .Case("fpexcept.strict", ebStrict) .Default(ebInvalid); } + +bool ConstrainedFPIntrinsic::isUnaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + return true; + } +} diff --git a/interpreter/llvm/src/lib/IR/LLVMBuild.txt b/interpreter/llvm/src/lib/IR/LLVMBuild.txt index cd90ef5b16b65..71368abfd8748 100644 --- a/interpreter/llvm/src/lib/IR/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/IR/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Core parent = Libraries -required_libraries = Support +required_libraries = BinaryFormat Support diff --git a/interpreter/llvm/src/lib/IR/LLVMContext.cpp b/interpreter/llvm/src/lib/IR/LLVMContext.cpp index 6c6383c22255d..c58459d6d5f5e 100644 --- a/interpreter/llvm/src/lib/IR/LLVMContext.cpp +++ b/interpreter/llvm/src/lib/IR/LLVMContext.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/LLVMContext.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "LLVMContextImpl.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Metadata.h" @@ -81,6 +81,18 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { assert(GCTransitionEntry->second == LLVMContext::OB_gc_transition && "gc-transition operand bundle id drifted!"); (void)GCTransitionEntry; + + SyncScope::ID SingleThreadSSID = + pImpl->getOrInsertSyncScopeID("singlethread"); + assert(SingleThreadSSID == SyncScope::SingleThread && + "singlethread synchronization scope ID drifted!"); + (void)SingleThreadSSID; + + SyncScope::ID SystemSSID = + pImpl->getOrInsertSyncScopeID(""); + assert(SystemSSID == SyncScope::System && + "system synchronization scope ID drifted!"); + (void)SystemSSID; } LLVMContext::~LLVMContext() { delete pImpl; } @@ -125,11 +137,18 @@ void LLVMContext::setDiagnosticHandler(DiagnosticHandlerTy DiagnosticHandler, pImpl->RespectDiagnosticFilters = RespectFilters; } -void LLVMContext::setDiagnosticHotnessRequested(bool Requested) { - pImpl->DiagnosticHotnessRequested = Requested; +void LLVMContext::setDiagnosticsHotnessRequested(bool Requested) { + pImpl->DiagnosticsHotnessRequested = Requested; } -bool LLVMContext::getDiagnosticHotnessRequested() const { - return pImpl->DiagnosticHotnessRequested; +bool LLVMContext::getDiagnosticsHotnessRequested() const { + return pImpl->DiagnosticsHotnessRequested; +} + +void LLVMContext::setDiagnosticsHotnessThreshold(uint64_t Threshold) { + pImpl->DiagnosticsHotnessThreshold = Threshold; +} +uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const { + return pImpl->DiagnosticsHotnessThreshold; } yaml::Output *LLVMContext::getDiagnosticsOutputFile() { @@ -248,6 +267,14 @@ uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const { return pImpl->getOperandBundleTagID(Tag); } +SyncScope::ID LLVMContext::getOrInsertSyncScopeID(StringRef SSN) { + return pImpl->getOrInsertSyncScopeID(SSN); +} + +void LLVMContext::getSyncScopeNames(SmallVectorImpl &SSNs) const { + pImpl->getSyncScopeNames(SSNs); +} + void LLVMContext::setGC(const Function &Fn, std::string GCName) { auto It = pImpl->GCNames.find(&Fn); diff --git a/interpreter/llvm/src/lib/IR/LLVMContextImpl.cpp b/interpreter/llvm/src/lib/IR/LLVMContextImpl.cpp index 343722463e5fa..57dd08b36fe70 100644 --- a/interpreter/llvm/src/lib/IR/LLVMContextImpl.cpp +++ b/interpreter/llvm/src/lib/IR/LLVMContextImpl.cpp @@ -1,4 +1,4 @@ -//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===// +//===- LLVMContextImpl.cpp - Implement LLVMContextImpl --------------------===// // // The LLVM Compiler Infrastructure // @@ -12,18 +12,17 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Module.h" #include "llvm/IR/OptBisect.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ManagedStatic.h" -#include +#include +#include + using namespace llvm; LLVMContextImpl::LLVMContextImpl(LLVMContext &C) - : TheTrueVal(nullptr), TheFalseVal(nullptr), - VoidTy(C, Type::VoidTyID), + : VoidTy(C, Type::VoidTyID), LabelTy(C, Type::LabelTyID), HalfTy(C, Type::HalfTyID), FloatTy(C, Type::FloatTyID), @@ -39,17 +38,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), - Int128Ty(C, 128) { - InlineAsmDiagHandler = nullptr; - InlineAsmDiagContext = nullptr; - DiagnosticHandler = nullptr; - DiagnosticContext = nullptr; - RespectDiagnosticFilters = false; - DiagnosticHotnessRequested = false; - YieldCallback = nullptr; - YieldOpaqueHandle = nullptr; - NamedStructTypesUniqueID = 0; -} + Int128Ty(C, 128) {} LLVMContextImpl::~LLVMContextImpl() { // NOTE: We need to delete the contents of OwnedModules, but Module's dtor @@ -156,7 +145,6 @@ void LLVMContextImpl::dropTriviallyDeadConstantArrays() { C->destroyConstant(); } } - } while (Changed); } @@ -165,6 +153,7 @@ void Module::dropTriviallyDeadConstantArrays() { } namespace llvm { + /// \brief Make MDOperand transparent for hashing. /// /// This overload of an implementation detail of the hashing library makes @@ -179,7 +168,8 @@ namespace llvm { /// does not cause MDOperand to be transparent. In particular, a bare pointer /// doesn't get hashed before it's combined, whereas \a MDOperand would. static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); } -} + +} // end namespace llvm unsigned MDNodeOpsKey::calculateHash(MDNode *N, unsigned Offset) { unsigned Hash = hash_combine_range(N->op_begin() + Offset, N->op_end()); @@ -215,26 +205,19 @@ uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const { return I->second; } -// ConstantsContext anchors -void UnaryConstantExpr::anchor() { } - -void BinaryConstantExpr::anchor() { } - -void SelectConstantExpr::anchor() { } - -void ExtractElementConstantExpr::anchor() { } - -void InsertElementConstantExpr::anchor() { } - -void ShuffleVectorConstantExpr::anchor() { } - -void ExtractValueConstantExpr::anchor() { } - -void InsertValueConstantExpr::anchor() { } - -void GetElementPtrConstantExpr::anchor() { } +SyncScope::ID LLVMContextImpl::getOrInsertSyncScopeID(StringRef SSN) { + auto NewSSID = SSC.size(); + assert(NewSSID < std::numeric_limits::max() && + "Hit the maximum number of synchronization scopes allowed!"); + return SSC.insert(std::make_pair(SSN, SyncScope::ID(NewSSID))).first->second; +} -void CompareConstantExpr::anchor() { } +void LLVMContextImpl::getSyncScopeNames( + SmallVectorImpl &SSNs) const { + SSNs.resize(SSC.size()); + for (const auto &SSE : SSC) + SSNs[SSE.second] = SSE.first(); +} /// Singleton instance of the OptBisect class. /// diff --git a/interpreter/llvm/src/lib/IR/LLVMContextImpl.h b/interpreter/llvm/src/lib/IR/LLVMContextImpl.h index 9db30da89ed08..bea2c7ae8ff21 100644 --- a/interpreter/llvm/src/lib/IR/LLVMContextImpl.h +++ b/interpreter/llvm/src/lib/IR/LLVMContextImpl.h @@ -1,4 +1,4 @@ -//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===// +//===- LLVMContextImpl.h - The LLVMContextImpl opaque class -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,33 +21,43 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/IR/TrackingMDRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/YAMLTraits.h" +#include +#include +#include +#include +#include +#include +#include #include namespace llvm { -class ConstantInt; class ConstantFP; -class DiagnosticInfoOptimizationRemark; -class DiagnosticInfoOptimizationRemarkMissed; -class DiagnosticInfoOptimizationRemarkAnalysis; -class GCStrategy; -class LLVMContext; +class ConstantInt; class Type; class Value; +class ValueHandleBase; struct DenseMapAPIntKeyInfo { static inline APInt getEmptyKey() { @@ -55,14 +65,17 @@ struct DenseMapAPIntKeyInfo { V.U.VAL = 0; return V; } + static inline APInt getTombstoneKey() { APInt V(nullptr, 0); V.U.VAL = 1; return V; } + static unsigned getHashValue(const APInt &Key) { return static_cast(hash_value(Key)); } + static bool isEqual(const APInt &LHS, const APInt &RHS) { return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; } @@ -71,9 +84,11 @@ struct DenseMapAPIntKeyInfo { struct DenseMapAPFloatKeyInfo { static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); } static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus(), 2); } + static unsigned getHashValue(const APFloat &Key) { return static_cast(hash_value(Key)); } + static bool isEqual(const APFloat &LHS, const APFloat &RHS) { return LHS.bitwiseIsEqual(RHS); } @@ -83,10 +98,13 @@ struct AnonStructTypeKeyInfo { struct KeyTy { ArrayRef ETypes; bool isPacked; + KeyTy(const ArrayRef& E, bool P) : ETypes(E), isPacked(P) {} + KeyTy(const StructType *ST) : ETypes(ST->elements()), isPacked(ST->isPacked()) {} + bool operator==(const KeyTy& that) const { if (isPacked != that.isPacked) return false; @@ -98,25 +116,31 @@ struct AnonStructTypeKeyInfo { return !this->operator==(that); } }; + static inline StructType* getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static inline StructType* getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(const KeyTy& Key) { return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), Key.isPacked); } + static unsigned getHashValue(const StructType *ST) { return getHashValue(KeyTy(ST)); } + static bool isEqual(const KeyTy& LHS, const StructType *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey()) return false; return LHS == KeyTy(RHS); } + static bool isEqual(const StructType *LHS, const StructType *RHS) { return LHS == RHS; } @@ -127,11 +151,13 @@ struct FunctionTypeKeyInfo { const Type *ReturnType; ArrayRef Params; bool isVarArg; + KeyTy(const Type* R, const ArrayRef& P, bool V) : ReturnType(R), Params(P), isVarArg(V) {} KeyTy(const FunctionType *FT) : ReturnType(FT->getReturnType()), Params(FT->params()), isVarArg(FT->isVarArg()) {} + bool operator==(const KeyTy& that) const { if (ReturnType != that.ReturnType) return false; @@ -145,26 +171,32 @@ struct FunctionTypeKeyInfo { return !this->operator==(that); } }; + static inline FunctionType* getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static inline FunctionType* getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(const KeyTy& Key) { return hash_combine(Key.ReturnType, hash_combine_range(Key.Params.begin(), Key.Params.end()), Key.isVarArg); } + static unsigned getHashValue(const FunctionType *FT) { return getHashValue(KeyTy(FT)); } + static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey()) return false; return LHS == KeyTy(RHS); } + static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) { return LHS == RHS; } @@ -174,7 +206,6 @@ struct FunctionTypeKeyInfo { class MDNodeOpsKey { ArrayRef RawOps; ArrayRef Ops; - unsigned Hash; protected: @@ -212,14 +243,15 @@ class MDNodeOpsKey { }; template struct MDNodeKeyImpl; -template struct MDNodeInfo; /// Configuration point for MDNodeInfo::isEqual(). template struct MDNodeSubsetEqualImpl { - typedef MDNodeKeyImpl KeyTy; + using KeyTy = MDNodeKeyImpl; + static bool isSubsetEqual(const KeyTy &LHS, const NodeTy *RHS) { return false; } + static bool isSubsetEqual(const NodeTy *LHS, const NodeTy *RHS) { return false; } @@ -252,7 +284,6 @@ template <> struct MDNodeKeyImpl { MDNodeKeyImpl(unsigned Line, unsigned Column, Metadata *Scope, Metadata *InlinedAt) : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {} - MDNodeKeyImpl(const DILocation *L) : Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()), InlinedAt(L->getRawInlinedAt()) {} @@ -261,6 +292,7 @@ template <> struct MDNodeKeyImpl { return Line == RHS->getLine() && Column == RHS->getColumn() && Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt(); } + unsigned getHashValue() const { return hash_combine(Line, Column, Scope, InlinedAt); } @@ -270,6 +302,7 @@ template <> struct MDNodeKeyImpl { template <> struct MDNodeKeyImpl : MDNodeOpsKey { unsigned Tag; MDString *Header; + MDNodeKeyImpl(unsigned Tag, MDString *Header, ArrayRef DwarfOps) : MDNodeOpsKey(DwarfOps), Tag(Tag), Header(Header) {} MDNodeKeyImpl(const GenericDINode *N) @@ -299,6 +332,7 @@ template <> struct MDNodeKeyImpl { bool isKeyOf(const DISubrange *RHS) const { return Count == RHS->getCount() && LowerBound == RHS->getLowerBound(); } + unsigned getHashValue() const { return hash_combine(Count, LowerBound); } }; @@ -313,6 +347,7 @@ template <> struct MDNodeKeyImpl { bool isKeyOf(const DIEnumerator *RHS) const { return Value == RHS->getValue() && Name == RHS->getRawName(); } + unsigned getHashValue() const { return hash_combine(Value, Name); } }; @@ -337,6 +372,7 @@ template <> struct MDNodeKeyImpl { AlignInBits == RHS->getAlignInBits() && Encoding == RHS->getEncoding(); } + unsigned getHashValue() const { return hash_combine(Tag, Name, SizeInBits, AlignInBits, Encoding); } @@ -384,6 +420,7 @@ template <> struct MDNodeKeyImpl { Flags == RHS->getFlags() && ExtraData == RHS->getRawExtraData(); } + unsigned getHashValue() const { // If this is a member inside an ODR type, only hash the type and the name. // Otherwise the hash will be stronger than @@ -402,10 +439,12 @@ template <> struct MDNodeKeyImpl { }; template <> struct MDNodeSubsetEqualImpl { - typedef MDNodeKeyImpl KeyTy; + using KeyTy = MDNodeKeyImpl; + static bool isSubsetEqual(const KeyTy &LHS, const DIDerivedType *RHS) { return isODRMember(LHS.Tag, LHS.Scope, LHS.Name, RHS); } + static bool isSubsetEqual(const DIDerivedType *LHS, const DIDerivedType *RHS) { return isODRMember(LHS->getTag(), LHS->getRawScope(), LHS->getRawName(), RHS); @@ -480,6 +519,7 @@ template <> struct MDNodeKeyImpl { TemplateParams == RHS->getRawTemplateParams() && Identifier == RHS->getRawIdentifier(); } + unsigned getHashValue() const { // Intentionally computes the hash on a subset of the operands for // performance reason. The subset has to be significant enough to avoid @@ -504,6 +544,7 @@ template <> struct MDNodeKeyImpl { return Flags == RHS->getFlags() && CC == RHS->getCC() && TypeArray == RHS->getRawTypeArray(); } + unsigned getHashValue() const { return hash_combine(Flags, CC, TypeArray); } }; @@ -527,6 +568,7 @@ template <> struct MDNodeKeyImpl { CSKind == RHS->getChecksumKind() && Checksum == RHS->getRawChecksum(); } + unsigned getHashValue() const { return hash_combine(Filename, Directory, CSKind, Checksum); } @@ -601,6 +643,7 @@ template <> struct MDNodeKeyImpl { Variables == RHS->getRawVariables() && ThrownTypes == RHS->getRawThrownTypes(); } + unsigned getHashValue() const { // If this is a declaration inside an ODR type, only hash the type and the // name. Otherwise the hash will be stronger than @@ -619,11 +662,13 @@ template <> struct MDNodeKeyImpl { }; template <> struct MDNodeSubsetEqualImpl { - typedef MDNodeKeyImpl KeyTy; + using KeyTy = MDNodeKeyImpl; + static bool isSubsetEqual(const KeyTy &LHS, const DISubprogram *RHS) { return isDeclarationOfODRMember(LHS.IsDefinition, LHS.Scope, LHS.LinkageName, LHS.TemplateParams, RHS); } + static bool isSubsetEqual(const DISubprogram *LHS, const DISubprogram *RHS) { return isDeclarationOfODRMember(LHS->isDefinition(), LHS->getRawScope(), LHS->getRawLinkageName(), @@ -672,6 +717,7 @@ template <> struct MDNodeKeyImpl { return Scope == RHS->getRawScope() && File == RHS->getRawFile() && Line == RHS->getLine() && Column == RHS->getColumn(); } + unsigned getHashValue() const { return hash_combine(Scope, File, Line, Column); } @@ -692,6 +738,7 @@ template <> struct MDNodeKeyImpl { return Scope == RHS->getRawScope() && File == RHS->getRawFile() && Discriminator == RHS->getDiscriminator(); } + unsigned getHashValue() const { return hash_combine(Scope, File, Discriminator); } @@ -712,6 +759,7 @@ template <> struct MDNodeKeyImpl { return Scope == RHS->getRawScope() && Name == RHS->getRawName() && ExportSymbols == RHS->getExportSymbols(); } + unsigned getHashValue() const { return hash_combine(Scope, Name); } @@ -723,6 +771,7 @@ template <> struct MDNodeKeyImpl { MDString *ConfigurationMacros; MDString *IncludePath; MDString *ISysRoot; + MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *ConfigurationMacros, MDString *IncludePath, MDString *ISysRoot) : Scope(Scope), Name(Name), ConfigurationMacros(ConfigurationMacros), @@ -738,6 +787,7 @@ template <> struct MDNodeKeyImpl { IncludePath == RHS->getRawIncludePath() && ISysRoot == RHS->getRawISysRoot(); } + unsigned getHashValue() const { return hash_combine(Scope, Name, ConfigurationMacros, IncludePath, ISysRoot); @@ -755,6 +805,7 @@ template <> struct MDNodeKeyImpl { bool isKeyOf(const DITemplateTypeParameter *RHS) const { return Name == RHS->getRawName() && Type == RHS->getRawType(); } + unsigned getHashValue() const { return hash_combine(Name, Type); } }; @@ -774,6 +825,7 @@ template <> struct MDNodeKeyImpl { return Tag == RHS->getTag() && Name == RHS->getRawName() && Type == RHS->getRawType() && Value == RHS->getValue(); } + unsigned getHashValue() const { return hash_combine(Tag, Name, Type, Value); } }; @@ -816,6 +868,7 @@ template <> struct MDNodeKeyImpl { RHS->getRawStaticDataMemberDeclaration() && AlignInBits == RHS->getAlignInBits(); } + unsigned getHashValue() const { // We do not use AlignInBits in hashing function here on purpose: // in most cases this param for local variable is zero (for function param @@ -856,6 +909,7 @@ template <> struct MDNodeKeyImpl { Type == RHS->getRawType() && Arg == RHS->getArg() && Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits(); } + unsigned getHashValue() const { // We do not use AlignInBits in hashing function here on purpose: // in most cases this param for local variable is zero (for function param @@ -877,6 +931,7 @@ template <> struct MDNodeKeyImpl { bool isKeyOf(const DIExpression *RHS) const { return Elements == RHS->getElements(); } + unsigned getHashValue() const { return hash_combine_range(Elements.begin(), Elements.end()); } @@ -895,6 +950,7 @@ template <> struct MDNodeKeyImpl { return Variable == RHS->getRawVariable() && Expression == RHS->getRawExpression(); } + unsigned getHashValue() const { return hash_combine(Variable, Expression); } }; @@ -923,6 +979,7 @@ template <> struct MDNodeKeyImpl { SetterName == RHS->getRawSetterName() && Attributes == RHS->getAttributes() && Type == RHS->getRawType(); } + unsigned getHashValue() const { return hash_combine(Name, File, Line, GetterName, SetterName, Attributes, Type); @@ -933,23 +990,26 @@ template <> struct MDNodeKeyImpl { unsigned Tag; Metadata *Scope; Metadata *Entity; + Metadata *File; unsigned Line; MDString *Name; - MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, unsigned Line, - MDString *Name) - : Tag(Tag), Scope(Scope), Entity(Entity), Line(Line), Name(Name) {} + MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, Metadata *File, + unsigned Line, MDString *Name) + : Tag(Tag), Scope(Scope), Entity(Entity), File(File), Line(Line), + Name(Name) {} MDNodeKeyImpl(const DIImportedEntity *N) : Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()), - Line(N->getLine()), Name(N->getRawName()) {} + File(N->getRawFile()), Line(N->getLine()), Name(N->getRawName()) {} bool isKeyOf(const DIImportedEntity *RHS) const { return Tag == RHS->getTag() && Scope == RHS->getRawScope() && - Entity == RHS->getRawEntity() && Line == RHS->getLine() && - Name == RHS->getRawName(); + Entity == RHS->getRawEntity() && File == RHS->getFile() && + Line == RHS->getLine() && Name == RHS->getRawName(); } + unsigned getHashValue() const { - return hash_combine(Tag, Scope, Entity, Line, Name); + return hash_combine(Tag, Scope, Entity, File, Line, Name); } }; @@ -969,6 +1029,7 @@ template <> struct MDNodeKeyImpl { return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && Name == RHS->getRawName() && Value == RHS->getRawValue(); } + unsigned getHashValue() const { return hash_combine(MIType, Line, Name, Value); } @@ -991,6 +1052,7 @@ template <> struct MDNodeKeyImpl { return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && File == RHS->getRawFile() && Elements == RHS->getRawElements(); } + unsigned getHashValue() const { return hash_combine(MIType, Line, File, Elements); } @@ -998,23 +1060,29 @@ template <> struct MDNodeKeyImpl { /// \brief DenseMapInfo for MDNode subclasses. template struct MDNodeInfo { - typedef MDNodeKeyImpl KeyTy; - typedef MDNodeSubsetEqualImpl SubsetEqualTy; + using KeyTy = MDNodeKeyImpl; + using SubsetEqualTy = MDNodeSubsetEqualImpl; + static inline NodeTy *getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static inline NodeTy *getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(const KeyTy &Key) { return Key.getHashValue(); } + static unsigned getHashValue(const NodeTy *N) { return KeyTy(N).getHashValue(); } + static bool isEqual(const KeyTy &LHS, const NodeTy *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey()) return false; return SubsetEqualTy::isSubsetEqual(LHS, RHS) || LHS.isKeyOf(RHS); } + static bool isEqual(const NodeTy *LHS, const NodeTy *RHS) { if (LHS == RHS) return true; @@ -1024,7 +1092,7 @@ template struct MDNodeInfo { } }; -#define HANDLE_MDNODE_LEAF(CLASS) typedef MDNodeInfo CLASS##Info; +#define HANDLE_MDNODE_LEAF(CLASS) using CLASS##Info = MDNodeInfo; #include "llvm/IR/Metadata.def" /// \brief Map-like storage for metadata attachments. @@ -1097,24 +1165,25 @@ class LLVMContextImpl { /// will be automatically deleted if this context is deleted. SmallPtrSet OwnedModules; - LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler; - void *InlineAsmDiagContext; - - LLVMContext::DiagnosticHandlerTy DiagnosticHandler; - void *DiagnosticContext; - bool RespectDiagnosticFilters; - bool DiagnosticHotnessRequested; + LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler = nullptr; + void *InlineAsmDiagContext = nullptr; + + LLVMContext::DiagnosticHandlerTy DiagnosticHandler = nullptr; + void *DiagnosticContext = nullptr; + bool RespectDiagnosticFilters = false; + bool DiagnosticsHotnessRequested = false; + uint64_t DiagnosticsHotnessThreshold = 0; std::unique_ptr DiagnosticsOutputFile; - LLVMContext::YieldCallbackTy YieldCallback; - void *YieldOpaqueHandle; + LLVMContext::YieldCallbackTy YieldCallback = nullptr; + void *YieldOpaqueHandle = nullptr; - typedef DenseMap, DenseMapAPIntKeyInfo> - IntMapTy; + using IntMapTy = + DenseMap, DenseMapAPIntKeyInfo>; IntMapTy IntConstants; - typedef DenseMap, DenseMapAPFloatKeyInfo> - FPMapTy; + using FPMapTy = + DenseMap, DenseMapAPFloatKeyInfo>; FPMapTy FPConstants; FoldingSet AttrsSet; @@ -1142,13 +1211,13 @@ class LLVMContextImpl { DenseMap> CAZConstants; - typedef ConstantUniqueMap ArrayConstantsTy; + using ArrayConstantsTy = ConstantUniqueMap; ArrayConstantsTy ArrayConstants; - typedef ConstantUniqueMap StructConstantsTy; + using StructConstantsTy = ConstantUniqueMap; StructConstantsTy StructConstants; - typedef ConstantUniqueMap VectorConstantsTy; + using VectorConstantsTy = ConstantUniqueMap; VectorConstantsTy VectorConstants; DenseMap> CPNConstants; @@ -1163,8 +1232,8 @@ class LLVMContextImpl { ConstantUniqueMap InlineAsms; - ConstantInt *TheTrueVal; - ConstantInt *TheFalseVal; + ConstantInt *TheTrueVal = nullptr; + ConstantInt *TheFalseVal = nullptr; std::unique_ptr TheNoneToken; @@ -1172,7 +1241,6 @@ class LLVMContextImpl { Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy; Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; - /// TypeAllocator - All dynamically allocated types are allocated from this. /// They live forever until the context is torn down. @@ -1180,23 +1248,22 @@ class LLVMContextImpl { DenseMap IntegerTypes; - typedef DenseSet FunctionTypeSet; + using FunctionTypeSet = DenseSet; FunctionTypeSet FunctionTypes; - typedef DenseSet StructTypeSet; + using StructTypeSet = DenseSet; StructTypeSet AnonStructTypes; StringMap NamedStructTypes; - unsigned NamedStructTypesUniqueID; + unsigned NamedStructTypesUniqueID = 0; DenseMap, ArrayType*> ArrayTypes; DenseMap, VectorType*> VectorTypes; DenseMap PointerTypes; // Pointers in AddrSpace = 0 DenseMap, PointerType*> ASPointerTypes; - /// ValueHandles - This map keeps track of all of the value handles that are /// watching a Value*. The Value::HasValueHandle bit is used to know /// whether or not a value has an entry in this map. - typedef DenseMap ValueHandlesTy; + using ValueHandlesTy = DenseMap; ValueHandlesTy ValueHandles; /// CustomMDKindNames - Map to hold the metadata string to ID mapping. @@ -1232,6 +1299,20 @@ class LLVMContextImpl { void getOperandBundleTags(SmallVectorImpl &Tags) const; uint32_t getOperandBundleTagID(StringRef Tag) const; + /// A set of interned synchronization scopes. The StringMap maps + /// synchronization scope names to their respective synchronization scope IDs. + StringMap SSC; + + /// getOrInsertSyncScopeID - Maps synchronization scope name to + /// synchronization scope ID. Every synchronization scope registered with + /// LLVMContext has unique ID except pre-defined ones. + SyncScope::ID getOrInsertSyncScopeID(StringRef SSN); + + /// getSyncScopeNames - Populates client supplied SmallVector with + /// synchronization scope names registered with LLVMContext. Synchronization + /// scope names are ordered by increasing synchronization scope IDs. + void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// Maintain the GC name for each function. /// /// This saves allocating an additional word in Function for programs which @@ -1254,6 +1335,6 @@ class LLVMContextImpl { OptBisect &getOptBisect(); }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_IR_LLVMCONTEXTIMPL_H diff --git a/interpreter/llvm/src/lib/IR/LegacyPassManager.cpp b/interpreter/llvm/src/lib/IR/LegacyPassManager.cpp index 628a67bd639ce..995e1e5703404 100644 --- a/interpreter/llvm/src/lib/IR/LegacyPassManager.cpp +++ b/interpreter/llvm/src/lib/IR/LegacyPassManager.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/LegacyPassManager.h" +#include "llvm/ADT/Statistic.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManagers.h" @@ -465,6 +466,11 @@ class TimingInfo { // null. It may be called multiple times. static void createTheTimeInfo(); + // print - Prints out timing information and then resets the timers. + void print() { + TG.print(*CreateInfoOutputFile()); + } + /// getPassTimer - Return the timer for the specified pass if it exists. Timer *getPassTimer(Pass *P) { if (P->getAsPMDataManager()) @@ -587,7 +593,7 @@ AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { assert(Node && "cached analysis usage must be non null"); AnUsageMap[P] = &Node->AU; - AnUsage = &Node->AU;; + AnUsage = &Node->AU; } return AnUsage; } @@ -619,21 +625,21 @@ void PMTopLevelManager::schedulePass(Pass *P) { checkAnalysis = false; const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); - for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(), - E = RequiredSet.end(); I != E; ++I) { + for (const AnalysisID ID : RequiredSet) { - Pass *AnalysisPass = findAnalysisPass(*I); + Pass *AnalysisPass = findAnalysisPass(ID); if (!AnalysisPass) { - const PassInfo *PI = findAnalysisPassInfo(*I); + const PassInfo *PI = findAnalysisPassInfo(ID); if (!PI) { // Pass P is not in the global PassRegistry dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n"; dbgs() << "Verify if there is a pass dependency cycle." << "\n"; dbgs() << "Required Passes:" << "\n"; - for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(), - E = RequiredSet.end(); I2 != E && I2 != I; ++I2) { - Pass *AnalysisPass2 = findAnalysisPass(*I2); + for (const AnalysisID ID2 : RequiredSet) { + if (ID == ID2) + break; + Pass *AnalysisPass2 = findAnalysisPass(ID2); if (AnalysisPass2) { dbgs() << "\t" << AnalysisPass2->getPassName() << "\n"; } else { @@ -1064,17 +1070,15 @@ void PMDataManager::collectRequiredAndUsedAnalyses( void PMDataManager::initializeAnalysisImpl(Pass *P) { AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - for (AnalysisUsage::VectorType::const_iterator - I = AnUsage->getRequiredSet().begin(), - E = AnUsage->getRequiredSet().end(); I != E; ++I) { - Pass *Impl = findAnalysisPass(*I, true); + for (const AnalysisID ID : AnUsage->getRequiredSet()) { + Pass *Impl = findAnalysisPass(ID, true); if (!Impl) // This may be analysis pass that is initialized on the fly. // If that is not the case then it will raise an assert when it is used. continue; AnalysisResolver *AR = P->getResolver(); assert(AR && "Analysis Resolver is not set"); - AR->addAnalysisImplsPair(*I, Impl); + AR->addAnalysisImplsPair(ID, Impl); } } @@ -1106,21 +1110,19 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{ TPM->collectLastUses(LUses, P); - for (SmallVectorImpl::iterator I = LUses.begin(), - E = LUses.end(); I != E; ++I) { + for (Pass *P : LUses) { dbgs() << "--" << std::string(Offset*2, ' '); - (*I)->dumpPassStructure(0); + P->dumpPassStructure(0); } } void PMDataManager::dumpPassArguments() const { - for (SmallVectorImpl::const_iterator I = PassVector.begin(), - E = PassVector.end(); I != E; ++I) { - if (PMDataManager *PMD = (*I)->getAsPMDataManager()) + for (Pass *P : PassVector) { + if (PMDataManager *PMD = P->getAsPMDataManager()) PMD->dumpPassArguments(); else if (const PassInfo *PI = - TPM->findAnalysisPassInfo((*I)->getPassID())) + TPM->findAnalysisPassInfo(P->getPassID())) if (!PI->isAnalysisGroup()) dbgs() << " -" << PI->getPassArgument(); } @@ -1249,9 +1251,8 @@ Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) { // Destructor PMDataManager::~PMDataManager() { - for (SmallVectorImpl::iterator I = PassVector.begin(), - E = PassVector.end(); I != E; ++I) - delete *I; + for (Pass *P : PassVector) + delete P; } //===----------------------------------------------------------------------===// @@ -1278,35 +1279,35 @@ bool BBPassManager::runOnFunction(Function &F) { bool Changed = doInitialization(F); - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + for (BasicBlock &BB : F) for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { BasicBlockPass *BP = getContainedPass(Index); bool LocalChanged = false; - dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName()); + dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, BB.getName()); dumpRequiredSet(BP); initializeAnalysisImpl(BP); { // If the pass crashes, remember this. - PassManagerPrettyStackEntry X(BP, *I); + PassManagerPrettyStackEntry X(BP, BB); TimeRegion PassTimer(getPassTimer(BP)); - LocalChanged |= BP->runOnBasicBlock(*I); + LocalChanged |= BP->runOnBasicBlock(BB); } Changed |= LocalChanged; if (LocalChanged) dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG, - I->getName()); + BB.getName()); dumpPreservedSet(BP); dumpUsedSet(BP); verifyPreservedAnalysis(BP); removeNotPreservedAnalysis(BP); recordAvailableAnalysis(BP); - removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG); + removeDeadPasses(BP, BB.getName(), ON_BASICBLOCK_MSG); } return doFinalization(F) || Changed; @@ -1752,6 +1753,13 @@ Timer *llvm::getPassTimer(Pass *P) { return nullptr; } +/// If timing is enabled, report the times collected up to now and then reset +/// them. +void llvm::reportAndResetTimings() { + if (TheTimeInfo) + TheTimeInfo->print(); +} + //===----------------------------------------------------------------------===// // PMStack implementation // diff --git a/interpreter/llvm/src/lib/IR/Metadata.cpp b/interpreter/llvm/src/lib/IR/Metadata.cpp index 2411dc5ce7dc2..ac02ff76c8436 100644 --- a/interpreter/llvm/src/lib/IR/Metadata.cpp +++ b/interpreter/llvm/src/lib/IR/Metadata.cpp @@ -19,13 +19,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -53,6 +54,7 @@ #include #include #include +#include #include #include @@ -233,7 +235,7 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) { return; // Copy out uses since UseMap will get touched below. - typedef std::pair> UseTy; + using UseTy = std::pair>; SmallVector Uses(UseMap.begin(), UseMap.end()); std::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) { return L.second.second < R.second.second; @@ -286,7 +288,7 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) { } // Copy out uses since UseMap could get touched below. - typedef std::pair> UseTy; + using UseTy = std::pair>; SmallVector Uses(UseMap.begin(), UseMap.end()); std::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) { return L.second.second < R.second.second; @@ -758,8 +760,8 @@ static T *uniquifyImpl(T *N, DenseSet &Store) { } template struct MDNode::HasCachedHash { - typedef char Yes[1]; - typedef char No[2]; + using Yes = char[1]; + using No = char[2]; template struct SFINAE {}; template @@ -1470,7 +1472,7 @@ void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) { if (E) OrigElements = E->getElements(); std::vector Elements(OrigElements.size() + 2); - Elements[0] = dwarf::DW_OP_plus; + Elements[0] = dwarf::DW_OP_plus_uconst; Elements[1] = Offset; std::copy(OrigElements.begin(), OrigElements.end(), Elements.begin() + 2); E = DIExpression::get(getContext(), Elements); @@ -1484,7 +1486,7 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) { addMetadata( LLVMContext::MD_type, *MDTuple::get(getContext(), - {ConstantAsMetadata::get(llvm::ConstantInt::get( + {ConstantAsMetadata::get(ConstantInt::get( Type::getInt64Ty(getContext()), Offset)), TypeID})); } diff --git a/interpreter/llvm/src/lib/IR/Module.cpp b/interpreter/llvm/src/lib/IR/Module.cpp index 12c258d95f523..c230a50044c74 100644 --- a/interpreter/llvm/src/lib/IR/Module.cpp +++ b/interpreter/llvm/src/lib/IR/Module.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Module.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" @@ -22,17 +23,16 @@ #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" @@ -88,7 +88,7 @@ Module::~Module() { delete static_cast *>(NamedMDSymTab); } -RandomNumberGenerator *Module::createRNG(const Pass* P) const { +std::unique_ptr Module::createRNG(const Pass* P) const { SmallString<32> Salt(P->getPassName()); // This RNG is guaranteed to produce the same random stream only @@ -103,7 +103,7 @@ RandomNumberGenerator *Module::createRNG(const Pass* P) const { // store salt metadata from the Module constructor. Salt += sys::path::filename(getModuleIdentifier()); - return new RandomNumberGenerator(Salt); + return std::unique_ptr(new RandomNumberGenerator(Salt)); } /// getNamedValue - Return the first global value in the module with @@ -481,7 +481,7 @@ PICLevel::Level Module::getPICLevel() const { } void Module::setPICLevel(PICLevel::Level PL) { - addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL); + addModuleFlag(ModFlagBehavior::Max, "PIC Level", PL); } PIELevel::Level Module::getPIELevel() const { @@ -495,7 +495,7 @@ PIELevel::Level Module::getPIELevel() const { } void Module::setPIELevel(PIELevel::Level PL) { - addModuleFlag(ModFlagBehavior::Error, "PIE Level", PL); + addModuleFlag(ModFlagBehavior::Max, "PIE Level", PL); } void Module::setProfileSummary(Metadata *M) { diff --git a/interpreter/llvm/src/lib/IR/ModuleSummaryIndex.cpp b/interpreter/llvm/src/lib/IR/ModuleSummaryIndex.cpp index 9dd712f9ca13e..51c4bae3332e6 100644 --- a/interpreter/llvm/src/lib/IR/ModuleSummaryIndex.cpp +++ b/interpreter/llvm/src/lib/IR/ModuleSummaryIndex.cpp @@ -56,3 +56,16 @@ ModuleSummaryIndex::getGlobalValueSummary(uint64_t ValueGUID, auto &Summary = VI.getSummaryList()[0]; return Summary.get(); } + +bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const { + auto VI = getValueInfo(GUID); + if (!VI) + return true; + const auto &SummaryList = VI.getSummaryList(); + if (SummaryList.empty()) + return true; + for (auto &I : SummaryList) + if (isGlobalValueLive(I.get())) + return true; + return false; +} diff --git a/interpreter/llvm/src/lib/IR/OptBisect.cpp b/interpreter/llvm/src/lib/IR/OptBisect.cpp index b670c817569a9..f1c70058fac2c 100644 --- a/interpreter/llvm/src/lib/IR/OptBisect.cpp +++ b/interpreter/llvm/src/lib/IR/OptBisect.cpp @@ -13,11 +13,12 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/IR/OptBisect.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" #include "llvm/IR/Module.h" -#include "llvm/IR/OptBisect.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -53,13 +54,20 @@ static std::string getDescription(const BasicBlock &BB) { } static std::string getDescription(const Loop &L) { - // FIXME: I'd like to be able to provide a better description here, but - // calling L->getHeader() would introduce a new dependency on the - // LLVMCore library. + // FIXME: Move into LoopInfo so we can get a better description + // (and avoid a circular dependency between IR and Analysis). return "loop"; } +static std::string getDescription(const Region &R) { + // FIXME: Move into RegionInfo so we can get a better description + // (and avoid a circular dependency between IR and Analysis). + return "region"; +} + static std::string getDescription(const CallGraphSCC &SCC) { + // FIXME: Move into CallGraphSCCPass to avoid circular dependency between + // IR and Analysis. std::string Desc = "SCC ("; bool First = true; for (CallGraphNode *CGN : SCC) { @@ -83,6 +91,7 @@ template bool OptBisect::shouldRunPass(const Pass *, const Function &); template bool OptBisect::shouldRunPass(const Pass *, const BasicBlock &); template bool OptBisect::shouldRunPass(const Pass *, const Loop &); template bool OptBisect::shouldRunPass(const Pass *, const CallGraphSCC &); +template bool OptBisect::shouldRunPass(const Pass *, const Region &); template bool OptBisect::shouldRunPass(const Pass *P, const UnitT &U) { diff --git a/interpreter/llvm/src/lib/IR/PassRegistry.cpp b/interpreter/llvm/src/lib/IR/PassRegistry.cpp index 584dee2869c17..c0f6f07169ffb 100644 --- a/interpreter/llvm/src/lib/IR/PassRegistry.cpp +++ b/interpreter/llvm/src/lib/IR/PassRegistry.cpp @@ -105,8 +105,6 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, ImplementationInfo->getNormalCtor() && "Cannot specify pass as default if it does not have a default ctor"); InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor()); - InterfaceInfo->setTargetMachineCtor( - ImplementationInfo->getTargetMachineCtor()); } } diff --git a/interpreter/llvm/src/lib/IR/SafepointIRVerifier.cpp b/interpreter/llvm/src/lib/IR/SafepointIRVerifier.cpp new file mode 100644 index 0000000000000..8b328c221da32 --- /dev/null +++ b/interpreter/llvm/src/lib/IR/SafepointIRVerifier.cpp @@ -0,0 +1,437 @@ +//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Run a sanity check on the IR to ensure that Safepoints - if they've been +// inserted - were inserted correctly. In particular, look for use of +// non-relocated values after a safepoint. It's primary use is to check the +// correctness of safepoint insertion immediately after insertion, but it can +// also be used to verify that later transforms have not found a way to break +// safepoint semenatics. +// +// In its current form, this verify checks a property which is sufficient, but +// not neccessary for correctness. There are some cases where an unrelocated +// pointer can be used after the safepoint. Consider this example: +// +// a = ... +// b = ... +// (a',b') = safepoint(a,b) +// c = cmp eq a b +// br c, ..., .... +// +// Because it is valid to reorder 'c' above the safepoint, this is legal. In +// practice, this is a somewhat uncommon transform, but CodeGenPrep does create +// idioms like this. The verifier knows about these cases and avoids reporting +// false positives. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/SafepointIRVerifier.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "safepoint-ir-verifier" + +using namespace llvm; + +/// This option is used for writing test cases. Instead of crashing the program +/// when verification fails, report a message to the console (for FileCheck +/// usage) and continue execution as if nothing happened. +static cl::opt PrintOnly("safepoint-ir-verifier-print-only", + cl::init(false)); + +static void Verify(const Function &F, const DominatorTree &DT); + +struct SafepointIRVerifier : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DominatorTree DT; + SafepointIRVerifier() : FunctionPass(ID) { + initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + DT.recalculate(F); + Verify(F, DT); + return false; // no modifications + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + StringRef getPassName() const override { return "safepoint verifier"; } +}; + +void llvm::verifySafepointIR(Function &F) { + SafepointIRVerifier pass; + pass.runOnFunction(F); +} + +char SafepointIRVerifier::ID = 0; + +FunctionPass *llvm::createSafepointIRVerifierPass() { + return new SafepointIRVerifier(); +} + +INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir", + "Safepoint IR Verifier", false, true) +INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir", + "Safepoint IR Verifier", false, true) + +static bool isGCPointerType(Type *T) { + if (auto *PT = dyn_cast(T)) + // For the sake of this example GC, we arbitrarily pick addrspace(1) as our + // GC managed heap. We know that a pointer into this heap needs to be + // updated and that no other pointer does. + return (1 == PT->getAddressSpace()); + return false; +} + +static bool containsGCPtrType(Type *Ty) { + if (isGCPointerType(Ty)) + return true; + if (VectorType *VT = dyn_cast(Ty)) + return isGCPointerType(VT->getScalarType()); + if (ArrayType *AT = dyn_cast(Ty)) + return containsGCPtrType(AT->getElementType()); + if (StructType *ST = dyn_cast(Ty)) + return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), + containsGCPtrType); + return false; +} + +// Debugging aid -- prints a [Begin, End) range of values. +template +static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) { + OS << "[ "; + while (Begin != End) { + OS << **Begin << " "; + ++Begin; + } + OS << "]"; +} + +/// The verifier algorithm is phrased in terms of availability. The set of +/// values "available" at a given point in the control flow graph is the set of +/// correctly relocated value at that point, and is a subset of the set of +/// definitions dominating that point. + +/// State we compute and track per basic block. +struct BasicBlockState { + // Set of values available coming in, before the phi nodes + DenseSet AvailableIn; + + // Set of values available going out + DenseSet AvailableOut; + + // AvailableOut minus AvailableIn. + // All elements are Instructions + DenseSet Contribution; + + // True if this block contains a safepoint and thus AvailableIn does not + // contribute to AvailableOut. + bool Cleared = false; +}; + + +/// Gather all the definitions dominating the start of BB into Result. This is +/// simply the Defs introduced by every dominating basic block and the function +/// arguments. +static void GatherDominatingDefs(const BasicBlock *BB, + DenseSet &Result, + const DominatorTree &DT, + DenseMap &BlockMap) { + DomTreeNode *DTN = DT[const_cast(BB)]; + + while (DTN->getIDom()) { + DTN = DTN->getIDom(); + const auto &Defs = BlockMap[DTN->getBlock()]->Contribution; + Result.insert(Defs.begin(), Defs.end()); + // If this block is 'Cleared', then nothing LiveIn to this block can be + // available after this block completes. Note: This turns out to be + // really important for reducing memory consuption of the initial available + // sets and thus peak memory usage by this verifier. + if (BlockMap[DTN->getBlock()]->Cleared) + return; + } + + for (const Argument &A : BB->getParent()->args()) + if (containsGCPtrType(A.getType())) + Result.insert(&A); +} + +/// Model the effect of an instruction on the set of available values. +static void TransferInstruction(const Instruction &I, bool &Cleared, + DenseSet &Available) { + if (isStatepoint(I)) { + Cleared = true; + Available.clear(); + } else if (containsGCPtrType(I.getType())) + Available.insert(&I); +} + +/// Compute the AvailableOut set for BB, based on the +/// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set +/// when the verifier runs for the first time computing the AvailableOut set +/// for BB. +static void TransferBlock(const BasicBlock *BB, + BasicBlockState &BBS, bool FirstPass) { + + const DenseSet &AvailableIn = BBS.AvailableIn; + DenseSet &AvailableOut = BBS.AvailableOut; + + if (BBS.Cleared) { + // AvailableOut does not change no matter how the input changes, just + // leave it be. We need to force this calculation the first time so that + // we have a AvailableOut at all. + if (FirstPass) { + AvailableOut = BBS.Contribution; + } + } else { + // Otherwise, we need to reduce the AvailableOut set by things which are no + // longer in our AvailableIn + DenseSet Temp = BBS.Contribution; + set_union(Temp, AvailableIn); + AvailableOut = std::move(Temp); + } + + DEBUG(dbgs() << "Transfered block " << BB->getName() << " from "; + PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end()); + dbgs() << " to "; + PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end()); + dbgs() << "\n";); +} + +/// A given derived pointer can have multiple base pointers through phi/selects. +/// This type indicates when the base pointer is exclusively constant +/// (ExclusivelySomeConstant), and if that constant is proven to be exclusively +/// null, we record that as ExclusivelyNull. In all other cases, the BaseType is +/// NonConstant. +enum BaseType { + NonConstant = 1, // Base pointers is not exclusively constant. + ExclusivelyNull, + ExclusivelySomeConstant // Base pointers for a given derived pointer is from a + // set of constants, but they are not exclusively + // null. +}; + +/// Return the baseType for Val which states whether Val is exclusively +/// derived from constant/null, or not exclusively derived from constant. +/// Val is exclusively derived off a constant base when all operands of phi and +/// selects are derived off a constant base. +static enum BaseType getBaseType(const Value *Val) { + + SmallVector Worklist; + DenseSet Visited; + bool isExclusivelyDerivedFromNull = true; + Worklist.push_back(Val); + // Strip through all the bitcasts and geps to get base pointer. Also check for + // the exclusive value when there can be multiple base pointers (through phis + // or selects). + while(!Worklist.empty()) { + const Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + if (const auto *CI = dyn_cast(V)) { + Worklist.push_back(CI->stripPointerCasts()); + continue; + } + if (const auto *GEP = dyn_cast(V)) { + Worklist.push_back(GEP->getPointerOperand()); + continue; + } + // Push all the incoming values of phi node into the worklist for + // processing. + if (const auto *PN = dyn_cast(V)) { + for (Value *InV: PN->incoming_values()) + Worklist.push_back(InV); + continue; + } + if (const auto *SI = dyn_cast(V)) { + // Push in the true and false values + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + if (isa(V)) { + // We found at least one base pointer which is non-null, so this derived + // pointer is not exclusively derived from null. + if (V != Constant::getNullValue(V->getType())) + isExclusivelyDerivedFromNull = false; + // Continue processing the remaining values to make sure it's exclusively + // constant. + continue; + } + // At this point, we know that the base pointer is not exclusively + // constant. + return BaseType::NonConstant; + } + // Now, we know that the base pointer is exclusively constant, but we need to + // differentiate between exclusive null constant and non-null constant. + return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull + : BaseType::ExclusivelySomeConstant; +} + +static void Verify(const Function &F, const DominatorTree &DT) { + SpecificBumpPtrAllocator BSAllocator; + DenseMap BlockMap; + + DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"); + if (PrintOnly) + dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"; + + + for (const BasicBlock &BB : F) { + BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState; + for (const auto &I : BB) + TransferInstruction(I, BBS->Cleared, BBS->Contribution); + BlockMap[&BB] = BBS; + } + + for (auto &BBI : BlockMap) { + GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap); + TransferBlock(BBI.first, *BBI.second, true); + } + + SetVector Worklist; + for (auto &BBI : BlockMap) + Worklist.insert(BBI.first); + + // This loop iterates the AvailableIn and AvailableOut sets to a fixed point. + // The AvailableIn and AvailableOut sets decrease as we iterate. + while (!Worklist.empty()) { + const BasicBlock *BB = Worklist.pop_back_val(); + BasicBlockState *BBS = BlockMap[BB]; + + size_t OldInCount = BBS->AvailableIn.size(); + for (const BasicBlock *PBB : predecessors(BB)) + set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut); + + if (OldInCount == BBS->AvailableIn.size()) + continue; + + assert(OldInCount > BBS->AvailableIn.size() && "invariant!"); + + size_t OldOutCount = BBS->AvailableOut.size(); + TransferBlock(BB, *BBS, false); + if (OldOutCount != BBS->AvailableOut.size()) { + assert(OldOutCount > BBS->AvailableOut.size() && "invariant!"); + Worklist.insert(succ_begin(BB), succ_end(BB)); + } + } + + // We now have all the information we need to decide if the use of a heap + // reference is legal or not, given our safepoint semantics. + + bool AnyInvalidUses = false; + + auto ReportInvalidUse = [&AnyInvalidUses](const Value &V, + const Instruction &I) { + errs() << "Illegal use of unrelocated value found!\n"; + errs() << "Def: " << V << "\n"; + errs() << "Use: " << I << "\n"; + if (!PrintOnly) + abort(); + AnyInvalidUses = true; + }; + + auto isNotExclusivelyConstantDerived = [](const Value *V) { + return getBaseType(V) == BaseType::NonConstant; + }; + + for (const BasicBlock &BB : F) { + // We destructively modify AvailableIn as we traverse the block instruction + // by instruction. + DenseSet &AvailableSet = BlockMap[&BB]->AvailableIn; + for (const Instruction &I : BB) { + if (const PHINode *PN = dyn_cast(&I)) { + if (containsGCPtrType(PN->getType())) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + const BasicBlock *InBB = PN->getIncomingBlock(i); + const Value *InValue = PN->getIncomingValue(i); + + if (isNotExclusivelyConstantDerived(InValue) && + !BlockMap[InBB]->AvailableOut.count(InValue)) + ReportInvalidUse(*InValue, *PN); + } + } else if (isa(I) && + containsGCPtrType(I.getOperand(0)->getType())) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + enum BaseType baseTyLHS = getBaseType(LHS), + baseTyRHS = getBaseType(RHS); + + // Returns true if LHS and RHS are unrelocated pointers and they are + // valid unrelocated uses. + auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () { + // A cmp instruction has valid unrelocated pointer operands only if + // both operands are unrelocated pointers. + // In the comparison between two pointers, if one is an unrelocated + // use, the other *should be* an unrelocated use, for this + // instruction to contain valid unrelocated uses. This unrelocated + // use can be a null constant as well, or another unrelocated + // pointer. + if (AvailableSet.count(LHS) || AvailableSet.count(RHS)) + return false; + // Constant pointers (that are not exclusively null) may have + // meaning in different VMs, so we cannot reorder the compare + // against constant pointers before the safepoint. In other words, + // comparison of an unrelocated use against a non-null constant + // maybe invalid. + if ((baseTyLHS == BaseType::ExclusivelySomeConstant && + baseTyRHS == BaseType::NonConstant) || + (baseTyLHS == BaseType::NonConstant && + baseTyRHS == BaseType::ExclusivelySomeConstant)) + return false; + // All other cases are valid cases enumerated below: + // 1. Comparison between an exlusively derived null pointer and a + // constant base pointer. + // 2. Comparison between an exlusively derived null pointer and a + // non-constant unrelocated base pointer. + // 3. Comparison between 2 unrelocated pointers. + return true; + }; + if (!hasValidUnrelocatedUse()) { + // Print out all non-constant derived pointers that are unrelocated + // uses, which are invalid. + if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS)) + ReportInvalidUse(*LHS, I); + if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS)) + ReportInvalidUse(*RHS, I); + } + } else { + for (const Value *V : I.operands()) + if (containsGCPtrType(V->getType()) && + isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) + ReportInvalidUse(*V, I); + } + + bool Cleared = false; + TransferInstruction(I, Cleared, AvailableSet); + (void)Cleared; + } + } + + if (PrintOnly && !AnyInvalidUses) { + dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName() + << "\n"; + } +} diff --git a/interpreter/llvm/src/lib/IR/Statepoint.cpp b/interpreter/llvm/src/lib/IR/Statepoint.cpp index 8c3f0f208cc67..18efee2177c34 100644 --- a/interpreter/llvm/src/lib/IR/Statepoint.cpp +++ b/interpreter/llvm/src/lib/IR/Statepoint.cpp @@ -44,10 +44,22 @@ bool llvm::isGCRelocate(ImmutableCallSite CS) { return CS.getInstruction() && isa(CS.getInstruction()); } +bool llvm::isGCRelocate(const Value *V) { + if (auto CS = ImmutableCallSite(V)) + return isGCRelocate(CS); + return false; +} + bool llvm::isGCResult(ImmutableCallSite CS) { return CS.getInstruction() && isa(CS.getInstruction()); } +bool llvm::isGCResult(const Value *V) { + if (auto CS = ImmutableCallSite(V)) + return isGCResult(CS); + return false; +} + bool llvm::isStatepointDirectiveAttr(Attribute Attr) { return Attr.hasAttribute("statepoint-id") || Attr.hasAttribute("statepoint-num-patch-bytes"); diff --git a/interpreter/llvm/src/lib/IR/Type.cpp b/interpreter/llvm/src/lib/IR/Type.cpp index c9f957c244f8e..20e9c2b5fff25 100644 --- a/interpreter/llvm/src/lib/IR/Type.cpp +++ b/interpreter/llvm/src/lib/IR/Type.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Type.h" #include "LLVMContextImpl.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/None.h" @@ -22,7 +23,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" @@ -538,7 +538,7 @@ bool CompositeType::indexValid(const Value *V) const { if (auto *STy = dyn_cast(this)) { // Structure indexes require (vectors of) 32-bit integer constants. In the // vector case all of the indices must be equal. - if (!V->getType()->getScalarType()->isIntegerTy(32)) + if (!V->getType()->isIntOrIntVectorTy(32)) return false; const Constant *C = dyn_cast(V); if (C && V->getType()->isVectorTy()) diff --git a/interpreter/llvm/src/lib/IR/TypeFinder.cpp b/interpreter/llvm/src/lib/IR/TypeFinder.cpp index a178b9ec0f09b..b39678a013fb2 100644 --- a/interpreter/llvm/src/lib/IR/TypeFinder.cpp +++ b/interpreter/llvm/src/lib/IR/TypeFinder.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/TypeFinder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -20,7 +21,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/IR/TypeFinder.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/interpreter/llvm/src/lib/IR/User.cpp b/interpreter/llvm/src/lib/IR/User.cpp index 497b4aa176434..d46039107f331 100644 --- a/interpreter/llvm/src/lib/IR/User.cpp +++ b/interpreter/llvm/src/lib/IR/User.cpp @@ -19,8 +19,6 @@ class BasicBlock; // User Class //===----------------------------------------------------------------------===// -void User::anchor() {} - void User::replaceUsesOfWith(Value *From, Value *To) { if (From == To) return; // Duh what? @@ -193,12 +191,4 @@ void User::operator delete(void *Usr) { } } -//===----------------------------------------------------------------------===// -// Operator Class -//===----------------------------------------------------------------------===// - -Operator::~Operator() { - llvm_unreachable("should never destroy an Operator"); -} - } // End llvm namespace diff --git a/interpreter/llvm/src/lib/IR/Value.cpp b/interpreter/llvm/src/lib/IR/Value.cpp index 02b40c93b5d8b..51a7d424c1f33 100644 --- a/interpreter/llvm/src/lib/IR/Value.cpp +++ b/interpreter/llvm/src/lib/IR/Value.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DerivedUser.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" @@ -59,7 +60,7 @@ Value::Value(Type *ty, unsigned scid) (SubclassID < ConstantFirstVal || SubclassID > ConstantLastVal)) assert((VTy->isFirstClassType() || VTy->isVoidTy()) && "Cannot create non-first-class values except for constants!"); - static_assert(sizeof(Value) == 3 * sizeof(void *) + 2 * sizeof(unsigned), + static_assert(sizeof(Value) == 2 * sizeof(void *) + 2 * sizeof(unsigned), "Value too big"); } @@ -89,6 +90,32 @@ Value::~Value() { destroyValueName(); } +void Value::deleteValue() { + switch (getValueID()) { +#define HANDLE_VALUE(Name) \ + case Value::Name##Val: \ + delete static_cast(this); \ + break; +#define HANDLE_MEMORY_VALUE(Name) \ + case Value::Name##Val: \ + static_cast(this)->DeleteValue( \ + static_cast(this)); \ + break; +#define HANDLE_INSTRUCTION(Name) /* nothing */ +#include "llvm/IR/Value.def" + +#define HANDLE_INST(N, OPC, CLASS) \ + case Value::InstructionVal + Instruction::OPC: \ + delete static_cast(this); \ + break; +#define HANDLE_USER_INST(N, OPC, CLASS) +#include "llvm/IR/Instruction.def" + + default: + llvm_unreachable("attempting to delete unknown value kind"); + } +} + void Value::destroyValueName() { ValueName *Name = getValueName(); if (Name) diff --git a/interpreter/llvm/src/lib/IR/ValueSymbolTable.cpp b/interpreter/llvm/src/lib/IR/ValueSymbolTable.cpp index 0c3946c8661eb..ccdabe0817b4f 100644 --- a/interpreter/llvm/src/lib/IR/ValueSymbolTable.cpp +++ b/interpreter/llvm/src/lib/IR/ValueSymbolTable.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/IR/ValueTypes.cpp b/interpreter/llvm/src/lib/IR/ValueTypes.cpp index 2132e1659225d..cf6ee063c2d5b 100644 --- a/interpreter/llvm/src/lib/IR/ValueTypes.cpp +++ b/interpreter/llvm/src/lib/IR/ValueTypes.cpp @@ -142,6 +142,7 @@ std::string EVT::getEVTString() const { case MVT::Other: return "ch"; case MVT::Glue: return "glue"; case MVT::x86mmx: return "x86mmx"; + case MVT::v1i1: return "v1i1"; case MVT::v2i1: return "v2i1"; case MVT::v4i1: return "v4i1"; case MVT::v8i1: return "v8i1"; @@ -220,6 +221,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::f128: return Type::getFP128Ty(Context); case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); + case MVT::v1i1: return VectorType::get(Type::getInt1Ty(Context), 1); case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2); case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4); case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8); diff --git a/interpreter/llvm/src/lib/IR/Verifier.cpp b/interpreter/llvm/src/lib/IR/Verifier.cpp index 3b68d6365872e..454a56a769230 100644 --- a/interpreter/llvm/src/lib/IR/Verifier.cpp +++ b/interpreter/llvm/src/lib/IR/Verifier.cpp @@ -49,7 +49,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/ilist.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -59,6 +58,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -81,10 +82,10 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -102,7 +103,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -1282,6 +1282,13 @@ Verifier::visitModuleFlag(const MDNode *Op, // These behavior types accept any value. break; + case Module::Max: { + Assert(mdconst::dyn_extract_or_null(Op->getOperand(2)), + "invalid value for 'max' module flag (expected constant integer)", + Op->getOperand(2)); + break; + } + case Module::Require: { // The value should itself be an MDNode with two operands, a flag ID (an // MDString), and a value. @@ -1317,6 +1324,20 @@ Verifier::visitModuleFlag(const MDNode *Op, Assert(Inserted, "module flag identifiers must be unique (or of 'require' type)", ID); } + + if (ID->getString() == "wchar_size") { + ConstantInt *Value + = mdconst::dyn_extract_or_null(Op->getOperand(2)); + Assert(Value, "wchar_size metadata requires constant integer argument"); + } + + if (ID->getString() == "Linker Options") { + // If the llvm.linker.options named metadata exists, we assume that the + // bitcode reader has upgraded the module flag. Otherwise the flag might + // have been created by a client directly. + Assert(M.getNamedMetadata("llvm.linker.options"), + "'Linker Options' named metadata no longer supported"); + } } /// Return true if this attribute kind only applies to functions. @@ -1723,17 +1744,9 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) { } bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { - if (Attrs.getNumSlots() == 0) - return true; - - unsigned LastSlot = Attrs.getNumSlots() - 1; - unsigned LastIndex = Attrs.getSlotIndex(LastSlot); - if (LastIndex <= Params || - (LastIndex == AttributeList::FunctionIndex && - (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params))) - return true; - - return false; + // There shouldn't be more attribute sets than there are parameters plus the + // function and return value. + return Attrs.getNumAttrSets() <= Params + 2; } /// Verify that statepoint intrinsic is well formed. @@ -2491,15 +2504,13 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->getScalarType()->isPointerTy(), - "PtrToInt source must be pointer", &I); + Assert(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I); if (auto *PTy = dyn_cast(SrcTy->getScalarType())) Assert(!DL.isNonIntegralPointerType(PTy), "ptrtoint not supported for non-integral pointers"); - Assert(DestTy->getScalarType()->isIntegerTy(), - "PtrToInt result must be integral", &I); + Assert(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I); Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch", &I); @@ -2518,10 +2529,9 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->getScalarType()->isIntegerTy(), + Assert(SrcTy->isIntOrIntVectorTy(), "IntToPtr source must be an integral", &I); - Assert(DestTy->getScalarType()->isPointerTy(), - "IntToPtr result must be a pointer", &I); + Assert(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I); if (auto *PTy = dyn_cast(DestTy->getScalarType())) Assert(!DL.isNonIntegralPointerType(PTy), @@ -2939,11 +2949,10 @@ void Verifier::visitICmpInst(ICmpInst &IC) { Assert(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(), + Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(), "Invalid operand types for ICmp instruction", &IC); // Check that the predicate is valid. - Assert(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && - IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE, + Assert(IC.isIntPredicate(), "Invalid predicate in ICmp instruction!", &IC); visitInstruction(IC); @@ -2959,8 +2968,7 @@ void Verifier::visitFCmpInst(FCmpInst &FC) { Assert(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction", &FC); // Check that the predicate is valid. - Assert(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE && - FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE, + Assert(FC.isFPPredicate(), "Invalid predicate in FCmp instruction!", &FC); visitInstruction(FC); @@ -2998,7 +3006,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Assert(GEP.getType()->getScalarType()->isPointerTy() && + Assert(GEP.getType()->isPtrOrPtrVectorTy() && GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); @@ -3014,7 +3022,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { unsigned IndexWidth = IndexTy->getVectorNumElements(); Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP); } - Assert(IndexTy->getScalarType()->isIntegerTy(), + Assert(IndexTy->isIntOrIntVectorTy(), "All GEP indices should be of integer type"); } } @@ -3100,7 +3108,7 @@ void Verifier::visitLoadInst(LoadInst &LI) { ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { - Assert(LI.getSynchScope() == CrossThread, + Assert(LI.getSyncScopeID() == SyncScope::System, "Non-atomic load cannot have SynchronizationScope specified", &LI); } @@ -3129,7 +3137,7 @@ void Verifier::visitStoreInst(StoreInst &SI) { ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { - Assert(SI.getSynchScope() == CrossThread, + Assert(SI.getSyncScopeID() == SyncScope::System, "Non-atomic store cannot have SynchronizationScope specified", &SI); } visitInstruction(SI); @@ -3961,6 +3969,18 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: visitConstrainedFPIntrinsic( cast(*CS.getInstruction())); break; @@ -3987,10 +4007,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { CS); break; } - case Intrinsic::memcpy_element_atomic: { - ConstantInt *ElementSizeCI = dyn_cast(CS.getArgOperand(3)); - Assert(ElementSizeCI, "element size of the element-wise atomic memory " - "intrinsic must be a constant int", + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst *MI = + cast(CS.getInstruction()); + ; + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", CS); const APInt &ElementSizeVal = ElementSizeCI->getValue(); Assert(ElementSizeVal.isPowerOf2(), @@ -3998,19 +4024,91 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "must be a power of 2", CS); + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + auto IsValidAlignment = [&](uint64_t Alignment) { return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); }; - uint64_t DstAlignment = CS.getParamAlignment(0), SrcAlignment = CS.getParamAlignment(1); - Assert(IsValidAlignment(DstAlignment), - "incorrect alignment of the destination argument", + "incorrect alignment of the destination argument", CS); + Assert(IsValidAlignment(SrcAlignment), + "incorrect alignment of the source argument", CS); + break; + } + case Intrinsic::memmove_element_unordered_atomic: { + auto *MI = cast(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0), + SrcAlignment = CS.getParamAlignment(1); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); Assert(IsValidAlignment(SrcAlignment), - "incorrect alignment of the source argument", + "incorrect alignment of the source argument", CS); + break; + } + case Intrinsic::memset_element_unordered_atomic: { + auto *MI = cast(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); break; } case Intrinsic::gcroot: @@ -4217,7 +4315,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // relocated pointer. It can be casted to the correct type later if it's // desired. However, they must have the same address space and 'vectorness' GCRelocateInst &Relocate = cast(*CS.getInstruction()); - Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(), + Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(), "gc.relocate: relocated value must be a gc pointer", CS); auto ResultType = CS.getType(); @@ -4330,7 +4428,12 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) { } void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { - Assert(isa(FPI.getOperand(2)), + unsigned NumOperands = FPI.getNumArgOperands(); + Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)), + "invalid arguments for constrained FP intrinsic", &FPI); + Assert(isa(FPI.getArgOperand(NumOperands-1)), + "invalid exception behavior argument", &FPI); + Assert(isa(FPI.getArgOperand(NumOperands-2)), "invalid rounding mode argument", &FPI); Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid, "invalid rounding mode argument", &FPI); diff --git a/interpreter/llvm/src/lib/LLVMBuild.txt b/interpreter/llvm/src/lib/LLVMBuild.txt index 684b378c93e5a..1d22c2a11f131 100644 --- a/interpreter/llvm/src/lib/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/LLVMBuild.txt @@ -24,7 +24,6 @@ subdirectories = DebugInfo Demangle ExecutionEngine - LibDriver LineEditor Linker IR @@ -32,6 +31,7 @@ subdirectories = LTO MC Object + BinaryFormat ObjectYAML Option Passes @@ -39,6 +39,8 @@ subdirectories = Support TableGen Target + Testing + ToolDrivers Transforms [component_0] diff --git a/interpreter/llvm/src/lib/LTO/LTO.cpp b/interpreter/llvm/src/lib/LTO/LTO.cpp index 2d2dcdec05fb9..19973946ac5a6 100644 --- a/interpreter/llvm/src/lib/LTO/LTO.cpp +++ b/interpreter/llvm/src/lib/LTO/LTO.cpp @@ -114,11 +114,15 @@ static void computeCacheKey( AddUnsigned((unsigned)Conf.Options.DebuggerTuning); for (auto &A : Conf.MAttrs) AddString(A); - AddUnsigned(Conf.RelocModel); + if (Conf.RelocModel) + AddUnsigned(*Conf.RelocModel); + else + AddUnsigned(-1); AddUnsigned(Conf.CodeModel); AddUnsigned(Conf.CGOptLevel); AddUnsigned(Conf.CGFileType); AddUnsigned(Conf.OptLevel); + AddUnsigned(Conf.UseNewPM); AddString(Conf.OptPipeline); AddString(Conf.AAPipeline); AddString(Conf.OverrideTriple); @@ -311,54 +315,19 @@ InputFile::~InputFile() = default; Expected> InputFile::create(MemoryBufferRef Object) { std::unique_ptr File(new InputFile); - ErrorOr BCOrErr = - IRObjectFile::findBitcodeInMemBuffer(Object); - if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); - - Expected> BMsOrErr = - getBitcodeModuleList(*BCOrErr); - if (!BMsOrErr) - return BMsOrErr.takeError(); - - if (BMsOrErr->empty()) - return make_error("Bitcode file does not contain any modules", - inconvertibleErrorCode()); - - File->Mods = *BMsOrErr; - - LLVMContext Ctx; - std::vector Mods; - std::vector> OwnedMods; - for (auto BM : *BMsOrErr) { - Expected> MOrErr = - BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, - /*IsImporting*/ false); - if (!MOrErr) - return MOrErr.takeError(); - - if ((*MOrErr)->getDataLayoutStr().empty()) - return make_error("input module has no datalayout", - inconvertibleErrorCode()); - - Mods.push_back(MOrErr->get()); - OwnedMods.push_back(std::move(*MOrErr)); - } - - SmallVector Symtab; - if (Error E = irsymtab::build(Mods, Symtab, File->Strtab)) - return std::move(E); + Expected FOrErr = readIRSymtab(Object); + if (!FOrErr) + return FOrErr.takeError(); - irsymtab::Reader R({Symtab.data(), Symtab.size()}, - {File->Strtab.data(), File->Strtab.size()}); - File->TargetTriple = R.getTargetTriple(); - File->SourceFileName = R.getSourceFileName(); - File->COFFLinkerOpts = R.getCOFFLinkerOpts(); - File->ComdatTable = R.getComdatTable(); + File->TargetTriple = FOrErr->TheReader.getTargetTriple(); + File->SourceFileName = FOrErr->TheReader.getSourceFileName(); + File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts(); + File->ComdatTable = FOrErr->TheReader.getComdatTable(); - for (unsigned I = 0; I != Mods.size(); ++I) { + for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) { size_t Begin = File->Symbols.size(); - for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I)) + for (const irsymtab::Reader::SymbolRef &Sym : + FOrErr->TheReader.module_symbols(I)) // Skip symbols that are irrelevant to LTO. Note that this condition needs // to match the one in Skip() in LTO::addRegularLTO(). if (Sym.isGlobal() && !Sym.isFormatSpecific()) @@ -366,6 +335,8 @@ Expected> InputFile::create(MemoryBufferRef Object) { File->ModuleSymIndices.push_back({Begin, File->Symbols.size()}); } + File->Mods = FOrErr->Mods; + File->Strtab = std::move(FOrErr->Strtab); return std::move(File); } @@ -393,30 +364,40 @@ LTO::LTO(Config Conf, ThinBackend Backend, // Requires a destructor for MapVector. LTO::~LTO() = default; -// Add the given symbol to the GlobalResolutions map, and resolve its partition. -void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, - SymbolResolution Res, unsigned Partition) { - auto &GlobalRes = GlobalResolutions[Sym.getName()]; - GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); - if (Res.Prevailing) - GlobalRes.IRName = Sym.getIRName(); - - // Set the partition to external if we know it is used elsewhere, e.g. - // it is visible to a regular object, is referenced from llvm.compiler_used, - // or was already recorded as being referenced from a different partition. - if (Res.VisibleToRegularObj || Sym.isUsed() || - (GlobalRes.Partition != GlobalResolution::Unknown && - GlobalRes.Partition != Partition)) { - GlobalRes.Partition = GlobalResolution::External; - } else - // First recorded reference, save the current partition. - GlobalRes.Partition = Partition; - - // Flag as visible outside of ThinLTO if visible from a regular object or - // if this is a reference in the regular LTO partition. - GlobalRes.VisibleOutsideThinLTO |= - (Res.VisibleToRegularObj || Sym.isUsed() || - Partition == GlobalResolution::RegularLTO); +// Add the symbols in the given module to the GlobalResolutions map, and resolve +// their partitions. +void LTO::addModuleToGlobalRes(ArrayRef Syms, + ArrayRef Res, + unsigned Partition, bool InSummary) { + auto *ResI = Res.begin(); + auto *ResE = Res.end(); + (void)ResE; + for (const InputFile::Symbol &Sym : Syms) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + + auto &GlobalRes = GlobalResolutions[Sym.getName()]; + GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); + if (Res.Prevailing) + GlobalRes.IRName = Sym.getIRName(); + + // Set the partition to external if we know it is re-defined by the linker + // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a + // regular object, is referenced from llvm.compiler_used, or was already + // recorded as being referenced from a different partition. + if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || + (GlobalRes.Partition != GlobalResolution::Unknown && + GlobalRes.Partition != Partition)) { + GlobalRes.Partition = GlobalResolution::External; + } else + // First recorded reference, save the current partition. + GlobalRes.Partition = Partition; + + // Flag as visible outside of summary if visible from a regular object or + // from a module that does not have a summary. + GlobalRes.VisibleOutsideSummary |= + (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary); + } } static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, @@ -435,6 +416,8 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, OS << 'l'; if (Res.VisibleToRegularObj) OS << 'x'; + if (Res.LinkerRedefined) + OS << 'r'; OS << '\n'; } OS.flush(); @@ -460,46 +443,91 @@ Error LTO::add(std::unique_ptr Input, Error LTO::addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - Expected HasThinLTOSummary = Input.Mods[ModI].hasSummary(); - if (!HasThinLTOSummary) - return HasThinLTOSummary.takeError(); + Expected LTOInfo = Input.Mods[ModI].getLTOInfo(); + if (!LTOInfo) + return LTOInfo.takeError(); + BitcodeModule BM = Input.Mods[ModI]; auto ModSyms = Input.module_symbols(ModI); - if (*HasThinLTOSummary) - return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE); - else - return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE); + addModuleToGlobalRes(ModSyms, {ResI, ResE}, + LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, + LTOInfo->HasSummary); + + if (LTOInfo->IsThinLTO) + return addThinLTO(BM, ModSyms, ResI, ResE); + + Expected ModOrErr = + addRegularLTO(BM, ModSyms, ResI, ResE); + if (!ModOrErr) + return ModOrErr.takeError(); + + if (!LTOInfo->HasSummary) + return linkRegularLTO(std::move(*ModOrErr), /*LivenessFromIndex=*/false); + + // Regular LTO module summaries are added to a dummy module that represents + // the combined regular LTO module. + if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "", -1ull)) + return Err; + RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr)); + return Error::success(); +} + +// Checks whether the given global value is in a non-prevailing comdat +// (comdat containing values the linker indicated were not prevailing, +// which we then dropped to available_externally), and if so, removes +// it from the comdat. This is called for all global values to ensure the +// comdat is empty rather than leaving an incomplete comdat. It is needed for +// regular LTO modules, in case we are in a mixed-LTO mode (both regular +// and thin LTO modules) compilation. Since the regular LTO module will be +// linked first in the final native link, we want to make sure the linker +// doesn't select any of these incomplete comdats that would be left +// in the regular LTO module without this cleanup. +static void +handleNonPrevailingComdat(GlobalValue &GV, + std::set &NonPrevailingComdats) { + Comdat *C = GV.getComdat(); + if (!C) + return; + + if (!NonPrevailingComdats.count(C)) + return; + + // Additionally need to drop externally visible global values from the comdat + // to available_externally, so that there aren't multiply defined linker + // errors. + if (!GV.hasLocalLinkage()) + GV.setLinkage(GlobalValue::AvailableExternallyLinkage); + + if (auto GO = dyn_cast(&GV)) + GO->setComdat(nullptr); } // Add a regular LTO object to the link. -Error LTO::addRegularLTO(BitcodeModule BM, - ArrayRef Syms, - const SymbolResolution *&ResI, - const SymbolResolution *ResE) { - if (!RegularLTO.CombinedModule) { - RegularLTO.CombinedModule = - llvm::make_unique("ld-temp.o", RegularLTO.Ctx); - RegularLTO.Mover = llvm::make_unique(*RegularLTO.CombinedModule); - } +// The resulting module needs to be linked into the combined LTO module with +// linkRegularLTO. +Expected +LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + RegularLTOState::AddedModule Mod; Expected> MOrErr = BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true, /*IsImporting*/ false); if (!MOrErr) return MOrErr.takeError(); - Module &M = **MOrErr; + Mod.M = std::move(*MOrErr); + if (Error Err = M.materializeMetadata()) - return Err; + return std::move(Err); UpgradeDebugInfo(M); ModuleSymbolTable SymTab; SymTab.addModule(&M); - std::vector Keep; - for (GlobalVariable &GV : M.globals()) if (GV.hasAppendingLinkage()) - Keep.push_back(&GV); + Mod.Keep.push_back(&GV); DenseSet AliasedGlobals; for (auto &GA : M.aliases()) @@ -525,10 +553,10 @@ Error LTO::addRegularLTO(BitcodeModule BM, }; Skip(); + std::set NonPrevailingComdats; for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Sym, Res, 0); assert(MsymI != MsymE); ModuleSymbolTable::Symbol Msym = *MsymI++; @@ -538,32 +566,31 @@ Error LTO::addRegularLTO(BitcodeModule BM, if (Res.Prevailing) { if (Sym.isUndefined()) continue; - Keep.push_back(GV); - switch (GV->getLinkage()) { - default: - break; - case GlobalValue::LinkOnceAnyLinkage: + Mod.Keep.push_back(GV); + // For symbols re-defined with linker -wrap and -defsym options, + // set the linkage to weak to inhibit IPO. The linkage will be + // restored by the linker. + if (Res.LinkerRedefined) GV->setLinkage(GlobalValue::WeakAnyLinkage); - break; - case GlobalValue::LinkOnceODRLinkage: - GV->setLinkage(GlobalValue::WeakODRLinkage); - break; - } + + GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage(); + if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) + GV->setLinkage(GlobalValue::getWeakLinkage( + GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); } else if (isa(GV) && (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() || GV->hasAvailableExternallyLinkage()) && !AliasedGlobals.count(cast(GV))) { - // Either of the above three types of linkage indicates that the + // Any of the above three types of linkage indicates that the // chosen prevailing symbol will have the same semantics as this copy of - // the symbol, so we can link it with available_externally linkage. We - // only need to do this if the symbol is undefined. - GlobalValue *CombinedGV = - RegularLTO.CombinedModule->getNamedValue(GV->getName()); - if (!CombinedGV || CombinedGV->isDeclaration()) { - Keep.push_back(GV); - GV->setLinkage(GlobalValue::AvailableExternallyLinkage); - cast(GV)->setComdat(nullptr); - } + // the symbol, so we may be able to link it with available_externally + // linkage. We will decide later whether to do that when we link this + // module (in linkRegularLTO), based on whether it is undefined. + Mod.Keep.push_back(GV); + GV->setLinkage(GlobalValue::AvailableExternallyLinkage); + if (GV->hasComdat()) + NonPrevailingComdats.insert(GV->getComdat()); + cast(GV)->setComdat(nullptr); } } // Common resolution: collect the maximum size/alignment over all commons. @@ -580,32 +607,73 @@ Error LTO::addRegularLTO(BitcodeModule BM, // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. } + if (!M.getComdatSymbolTable().empty()) + for (GlobalValue &GV : M.global_values()) + handleNonPrevailingComdat(GV, NonPrevailingComdats); assert(MsymI == MsymE); + return std::move(Mod); +} - return RegularLTO.Mover->move(std::move(*MOrErr), Keep, +Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, + bool LivenessFromIndex) { + if (!RegularLTO.CombinedModule) { + RegularLTO.CombinedModule = + llvm::make_unique("ld-temp.o", RegularLTO.Ctx); + RegularLTO.Mover = llvm::make_unique(*RegularLTO.CombinedModule); + } + + std::vector Keep; + for (GlobalValue *GV : Mod.Keep) { + if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) + continue; + + if (!GV->hasAvailableExternallyLinkage()) { + Keep.push_back(GV); + continue; + } + + // Only link available_externally definitions if we don't already have a + // definition. + GlobalValue *CombinedGV = + RegularLTO.CombinedModule->getNamedValue(GV->getName()); + if (CombinedGV && !CombinedGV->isDeclaration()) + continue; + + Keep.push_back(GV); + } + + return RegularLTO.Mover->move(std::move(Mod.M), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, /* IsPerformingImport */ false); } -// Add a ThinLTO object to the link. -Error LTO::addThinLTO(BitcodeModule BM, - ArrayRef Syms, +// Add a ThinLTO module to the link. +Error LTO::addThinLTO(BitcodeModule BM, ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE) { if (Error Err = - BM.readSummary(ThinLTO.CombinedIndex, ThinLTO.ModuleMap.size())) + BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), + ThinLTO.ModuleMap.size())) return Err; for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1); if (Res.Prevailing) { if (!Sym.getIRName().empty()) { auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( Sym.getIRName(), GlobalValue::ExternalLinkage, "")); ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + + // For linker redefined symbols (via --wrap or --defsym) we want to + // switch the linkage to `weak` to prevent IPOs from happening. + // Find the summary in the module for this very GV and record the new + // linkage so that we can switch it when we import the GV. + if (Res.LinkerRedefined) + if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( + GUID, BM.getModuleIdentifier())) + S->setLinkage(GlobalValue::WeakAnyLinkage); } } } @@ -624,10 +692,24 @@ unsigned LTO::getMaxTasks() const { } Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { + // Compute "dead" symbols, we don't want to import/export these! + DenseSet GUIDPreservedSymbols; + for (auto &Res : GlobalResolutions) { + if (Res.second.VisibleOutsideSummary && + // IRName will be defined if we have seen the prevailing copy of + // this value. If not, no need to preserve any ThinLTO copies. + !Res.second.IRName.empty()) + GUIDPreservedSymbols.insert(GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(Res.second.IRName))); + } + + computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); + // Save the status of having a regularLTO combined module, as // this is needed for generating the ThinLTO Task ID, and // the CombinedModule will be moved at the end of runRegularLTO. - bool HasRegularLTO = RegularLTO.CombinedModule != nullptr; + bool HasRegularLTO = RegularLTO.CombinedModule != nullptr || + !RegularLTO.ModsWithSummaries.empty(); // Invoke regular LTO if there was a regular LTO module to start with. if (HasRegularLTO) if (auto E = runRegularLTO(AddStream)) @@ -636,6 +718,11 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { } Error LTO::runRegularLTO(AddStreamFn AddStream) { + for (auto &M : RegularLTO.ModsWithSummaries) + if (Error Err = linkRegularLTO(std::move(M), + /*LivenessFromIndex=*/true)) + return Err; + // Make sure commons have the right size/alignment: we kept the largest from // all the prevailing when adding the inputs, and we apply it here. const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); @@ -943,7 +1030,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, // Collect for each module the list of function it defines (GUID -> // Summary). - StringMap> + StringMap ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size()); ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule( ModuleToDefinedGVSummaries); @@ -965,22 +1052,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, StringMap> ResolvedODR; if (Conf.OptLevel > 0) { - // Compute "dead" symbols, we don't want to import/export these! - DenseSet GUIDPreservedSymbols; - for (auto &Res : GlobalResolutions) { - if (Res.second.VisibleOutsideThinLTO && - // IRName will be defined if we have seen the prevailing copy of - // this value. If not, no need to preserve any ThinLTO copies. - !Res.second.IRName.empty()) - GUIDPreservedSymbols.insert(GlobalValue::getGUID( - GlobalValue::getRealLinkageName(Res.second.IRName))); - } - - auto DeadSymbols = - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); - ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - ImportLists, ExportLists, &DeadSymbols); + ImportLists, ExportLists); std::set ExportedGUIDs; for (auto &Res : GlobalResolutions) { @@ -993,16 +1066,12 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, if (Res.second.IRName.empty()) continue; auto GUID = GlobalValue::getGUID( - GlobalValue::getRealLinkageName(Res.second.IRName)); + GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); // Mark exported unless index-based analysis determined it to be dead. - if (!DeadSymbols.count(GUID)) + if (ThinLTO.CombinedIndex.isGUIDLive(GUID)) ExportedGUIDs.insert(GUID); } - auto isPrevailing = [&](GlobalValue::GUID GUID, - const GlobalValueSummary *S) { - return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); - }; auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && @@ -1010,17 +1079,20 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ExportedGUIDs.count(GUID); }; thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); - - auto recordNewLinkage = [&](StringRef ModuleIdentifier, - GlobalValue::GUID GUID, - GlobalValue::LinkageTypes NewLinkage) { - ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; - }; - - thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, - recordNewLinkage); } + auto isPrevailing = [&](GlobalValue::GUID GUID, + const GlobalValueSummary *S) { + return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + }; + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, + recordNewLinkage); + std::unique_ptr BackendProc = ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); @@ -1060,7 +1132,7 @@ lto::setupOptimizationRemarks(LLVMContext &Context, Context.setDiagnosticsOutputFile( llvm::make_unique(DiagnosticFile->os())); if (LTOPassRemarksWithHotness) - Context.setDiagnosticHotnessRequested(true); + Context.setDiagnosticsHotnessRequested(true); DiagnosticFile->keep(); return std::move(DiagnosticFile); } diff --git a/interpreter/llvm/src/lib/LTO/LTOBackend.cpp b/interpreter/llvm/src/lib/LTO/LTOBackend.cpp index 30447c528af1a..3f72e446cdf2e 100644 --- a/interpreter/llvm/src/lib/LTO/LTOBackend.cpp +++ b/interpreter/llvm/src/lib/LTO/LTOBackend.cpp @@ -42,11 +42,6 @@ using namespace llvm; using namespace lto; -static cl::opt - LTOUseNewPM("lto-use-new-pm", - cl::desc("Run LTO passes using the new pass manager"), - cl::init(false), cl::Hidden); - LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { errs() << "failed to open " << Path << ": " << Msg << '\n'; errs().flush(); @@ -117,19 +112,27 @@ Error Config::addSaveTemps(std::string OutputFileName, namespace { std::unique_ptr -createTargetMachine(Config &Conf, StringRef TheTriple, - const Target *TheTarget) { +createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) { + StringRef TheTriple = M.getTargetTriple(); SubtargetFeatures Features; Features.getDefaultSubtargetFeatures(Triple(TheTriple)); for (const std::string &A : Conf.MAttrs) Features.AddFeature(A); + Reloc::Model RelocModel; + if (Conf.RelocModel) + RelocModel = *Conf.RelocModel; + else + RelocModel = + M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; + return std::unique_ptr(TheTarget->createTargetMachine( - TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel, + TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel, Conf.CodeModel, Conf.CGOptLevel)); } -static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel) { +static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel, + bool IsThinLTO) { PassBuilder PB(TM); AAManager AA; @@ -173,7 +176,10 @@ static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel) { break; } - MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); + if (IsThinLTO) + MPM = PB.buildThinLTODefaultPipeline(OL, false /* DebugLogging */); + else + MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); MPM.run(Mod, MAM); // FIXME (davide): verify the output. @@ -251,17 +257,12 @@ static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { - // There's still no ThinLTO pipeline hooked up in the new pass manager, - // once there is one, we can just remove this. - if (LTOUseNewPM && IsThinLTO) - report_fatal_error("ThinLTO not supported with the new PM yet!"); - // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); - else if (LTOUseNewPM) - runNewPMPasses(Mod, TM, Conf.OptLevel); + else if (Conf.UseNewPM) + runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO); else runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); @@ -311,7 +312,7 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream, std::unique_ptr MPartInCtx = std::move(MOrErr.get()); std::unique_ptr TM = - createTargetMachine(C, MPartInCtx->getTargetTriple(), T); + createTargetMachine(C, T, *MPartInCtx); codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx); }, @@ -360,8 +361,7 @@ Error lto::backend(Config &C, AddStreamFn AddStream, if (!TOrErr) return TOrErr.takeError(); - std::unique_ptr TM = - createTargetMachine(C, Mod->getTargetTriple(), *TOrErr); + std::unique_ptr TM = createTargetMachine(C, *TOrErr, *Mod); // Setup optimization remarks. auto DiagFileOrErr = lto::setupOptimizationRemarks( @@ -397,8 +397,7 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, if (!TOrErr) return TOrErr.takeError(); - std::unique_ptr TM = - createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr); + std::unique_ptr TM = createTargetMachine(Conf, *TOrErr, Mod); if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); diff --git a/interpreter/llvm/src/lib/LTO/LTOCodeGenerator.cpp b/interpreter/llvm/src/lib/LTO/LTOCodeGenerator.cpp index 2fbacd7091ef1..6a275560dc92b 100644 --- a/interpreter/llvm/src/lib/LTO/LTOCodeGenerator.cpp +++ b/interpreter/llvm/src/lib/LTO/LTOCodeGenerator.cpp @@ -597,6 +597,7 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef Out) { // If statistics were requested, print them out after codegen. if (llvm::AreStatisticsEnabled()) llvm::PrintStatistics(); + reportAndResetTimings(); finishOptimizationRemarks(); diff --git a/interpreter/llvm/src/lib/LTO/LTOModule.cpp b/interpreter/llvm/src/lib/LTO/LTOModule.cpp index 11f0982c6a602..3cc8b7d0e7706 100644 --- a/interpreter/llvm/src/lib/LTO/LTOModule.cpp +++ b/interpreter/llvm/src/lib/LTO/LTOModule.cpp @@ -77,14 +77,12 @@ bool LTOModule::isBitcodeFile(StringRef Path) { } bool LTOModule::isThinLTO() { - // Right now the detection is only based on the summary presence. We may want - // to add a dedicated flag at some point. - Expected Result = hasGlobalValueSummary(MBRef); + Expected Result = getBitcodeLTOInfo(MBRef); if (!Result) { logAllUnhandledErrors(Result.takeError(), errs(), ""); return false; } - return *Result; + return Result->IsThinLTO; } bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, @@ -637,10 +635,10 @@ void LTOModule::parseMetadata() { raw_string_ostream OS(LinkerOpts); // Linker Options - if (Metadata *Val = getModule().getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast(Val); + if (NamedMDNode *LinkerOptions = + getModule().getNamedMetadata("llvm.linker.options")) { for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { - MDNode *MDOptions = cast(LinkerOptions->getOperand(i)); + MDNode *MDOptions = LinkerOptions->getOperand(i); for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { MDString *MDOption = cast(MDOptions->getOperand(ii)); OS << " " << MDOption->getString(); diff --git a/interpreter/llvm/src/lib/LTO/ThinLTOCodeGenerator.cpp b/interpreter/llvm/src/lib/LTO/ThinLTOCodeGenerator.cpp index f3d441db98cbb..1efd481b246c8 100644 --- a/interpreter/llvm/src/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/interpreter/llvm/src/lib/LTO/ThinLTOCodeGenerator.cpp @@ -24,13 +24,14 @@ #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/LTO/LTO.h" -#include "llvm/Linker/Linker.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Support/CachePruning.h" @@ -62,6 +63,7 @@ namespace llvm { extern cl::opt LTODiscardValueNames; extern cl::opt LTORemarksFilename; extern cl::opt LTOPassRemarksWithHotness; +extern cl::opt LTOStripInvalidDebugInfo; } namespace { @@ -142,6 +144,30 @@ static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) { report_fatal_error("renameModuleForThinLTO failed"); } +namespace { +class ThinLTODiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; +public: + ThinLTODiagnosticInfo(const Twine &DiagMsg, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {} + void print(DiagnosticPrinter &DP) const override { DP << Msg; } +}; +} + +/// Verify the module and strip broken debug info. +static void verifyLoadedModule(Module &TheModule) { + bool BrokenDebugInfo = false; + if (verifyModule(TheModule, &dbgs(), + LTOStripInvalidDebugInfo ? &BrokenDebugInfo : nullptr)) + report_fatal_error("Broken module found, compilation aborted!"); + if (BrokenDebugInfo) { + TheModule.getContext().diagnose(ThinLTODiagnosticInfo( + "Invalid debug info found, debug info will be stripped", DS_Warning)); + StripDebugInfo(TheModule); + } +} + static std::unique_ptr loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, bool Lazy, bool IsImporting) { @@ -159,6 +185,8 @@ loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, }); report_fatal_error("Can't load module, abort."); } + if (!Lazy) + verifyLoadedModule(*ModuleOrErr.get()); return std::move(ModuleOrErr.get()); } @@ -181,6 +209,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, }); report_fatal_error("importFunctions failed"); } + // Verify again after cross-importing. + verifyLoadedModule(TheModule); } static void optimizeModule(Module &TheModule, TargetMachine &TM, @@ -195,7 +225,8 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM, PMB.OptLevel = OptLevel; PMB.LoopVectorize = true; PMB.SLPVectorize = true; - PMB.VerifyInput = true; + // Already did this in verifyLoadedModule(). + PMB.VerifyInput = false; PMB.VerifyOutput = false; legacy::PassManager PM; @@ -505,29 +536,25 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { ThinLTOBuffer Buffer(Data, Identifier); - if (Modules.empty()) { - // First module added, so initialize the triple and some options - LLVMContext Context; - StringRef TripleStr; - ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( - Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); - if (TripleOrErr) - TripleStr = *TripleOrErr; - Triple TheTriple(TripleStr); + LLVMContext Context; + StringRef TripleStr; + ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); + + if (TripleOrErr) + TripleStr = *TripleOrErr; + + Triple TheTriple(TripleStr); + + if (Modules.empty()) initTMBuilder(TMBuilder, Triple(TheTriple)); + else if (TMBuilder.TheTriple != TheTriple) { + if (!TMBuilder.TheTriple.isCompatibleWith(TheTriple)) + report_fatal_error("ThinLTO modules with incompatible triples not " + "supported"); + initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple))); } -#ifndef NDEBUG - else { - LLVMContext Context; - StringRef TripleStr; - ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( - Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); - if (TripleOrErr) - TripleStr = *TripleOrErr; - assert(TMBuilder.TheTriple.str() == TripleStr && - "ThinLTO modules with different triple not supported"); - } -#endif + Modules.push_back(Buffer); } @@ -600,13 +627,13 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); // Resolve LinkOnce/Weak symbols. StringMap> ResolvedODR; @@ -645,13 +672,13 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); @@ -722,13 +749,13 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); auto &ExportList = ExportLists[ModuleIdentifier]; // Be friendly and don't nuke totally the module when the client didn't @@ -874,14 +901,14 @@ void ThinLTOCodeGenerator::run() { computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols); + computeDeadSymbols(*Index, GUIDPreservedSymbols); // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. @@ -1024,4 +1051,5 @@ void ThinLTOCodeGenerator::run() { // If statistics were requested, print them out now. if (llvm::AreStatisticsEnabled()) llvm::PrintStatistics(); + reportAndResetTimings(); } diff --git a/interpreter/llvm/src/lib/Linker/IRMover.cpp b/interpreter/llvm/src/lib/Linker/IRMover.cpp index ecef1efda1a2c..f486e525b5e76 100644 --- a/interpreter/llvm/src/lib/Linker/IRMover.cpp +++ b/interpreter/llvm/src/lib/Linker/IRMover.cpp @@ -1157,6 +1157,11 @@ Error IRLinker::linkModuleFlagsMetadata() { mdconst::extract(DstOp->getOperand(0)); unsigned DstBehaviorValue = DstBehavior->getZExtValue(); + auto overrideDstValue = [&]() { + DstModFlags->setOperand(DstIndex, SrcOp); + Flags[ID].first = SrcOp; + }; + // If either flag has override behavior, handle it first. if (DstBehaviorValue == Module::Override) { // Diagnose inconsistent flags which both have override behavior. @@ -1167,8 +1172,7 @@ Error IRLinker::linkModuleFlagsMetadata() { continue; } else if (SrcBehaviorValue == Module::Override) { // Update the destination flag to that of the source. - DstModFlags->setOperand(DstIndex, SrcOp); - Flags[ID].first = SrcOp; + overrideDstValue(); continue; } @@ -1204,6 +1208,15 @@ Error IRLinker::linkModuleFlagsMetadata() { } continue; } + case Module::Max: { + ConstantInt *DstValue = + mdconst::extract(DstOp->getOperand(2)); + ConstantInt *SrcValue = + mdconst::extract(SrcOp->getOperand(2)); + if (SrcValue->getZExtValue() > DstValue->getZExtValue()) + overrideDstValue(); + break; + } case Module::Append: { MDNode *DstValue = cast(DstOp->getOperand(2)); MDNode *SrcValue = cast(SrcOp->getOperand(2)); @@ -1243,25 +1256,16 @@ Error IRLinker::linkModuleFlagsMetadata() { return Error::success(); } -// This function returns true if the triples match. -static bool triplesMatch(const Triple &T0, const Triple &T1) { - // If vendor is apple, ignore the version number. - if (T0.getVendor() == Triple::Apple) - return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() && - T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS(); - - return T0 == T1; -} - -// This function returns the merged triple. -static std::string mergeTriples(const Triple &SrcTriple, - const Triple &DstTriple) { - // If vendor is apple, pick the triple with the larger version number. - if (SrcTriple.getVendor() == Triple::Apple) - if (DstTriple.isOSVersionLT(SrcTriple)) - return SrcTriple.str(); - - return DstTriple.str(); +/// Return InlineAsm adjusted with target-specific directives if required. +/// For ARM and Thumb, we have to add directives to select the appropriate ISA +/// to support mixing module-level inline assembly from ARM and Thumb modules. +static std::string adjustInlineAsm(const std::string &InlineAsm, + const Triple &Triple) { + if (Triple.getArch() == Triple::thumb || Triple.getArch() == Triple::thumbeb) + return ".text\n.balign 2\n.thumb\n" + InlineAsm; + if (Triple.getArch() == Triple::arm || Triple.getArch() == Triple::armeb) + return ".text\n.balign 4\n.arm\n" + InlineAsm; + return InlineAsm; } Error IRLinker::run() { @@ -1289,22 +1293,25 @@ Error IRLinker::run() { Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM.getTargetTriple()); - if (!SrcM->getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple)) + if (!SrcM->getTargetTriple().empty()&& + !SrcTriple.isCompatibleWith(DstTriple)) emitWarning("Linking two modules of different target triples: " + SrcM->getModuleIdentifier() + "' is '" + SrcM->getTargetTriple() + "' whereas '" + DstM.getModuleIdentifier() + "' is '" + DstM.getTargetTriple() + "'\n"); - DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple)); + DstM.setTargetTriple(SrcTriple.merge(DstTriple)); // Append the module inline asm string. if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) { + std::string SrcModuleInlineAsm = adjustInlineAsm(SrcM->getModuleInlineAsm(), + SrcTriple); if (DstM.getModuleInlineAsm().empty()) - DstM.setModuleInlineAsm(SrcM->getModuleInlineAsm()); + DstM.setModuleInlineAsm(SrcModuleInlineAsm); else DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" + - SrcM->getModuleInlineAsm()); + SrcModuleInlineAsm); } // Loop over all of the linked values to compute type mappings. diff --git a/interpreter/llvm/src/lib/MC/CMakeLists.txt b/interpreter/llvm/src/lib/MC/CMakeLists.txt index a86fd383003da..562f136a3ce2b 100644 --- a/interpreter/llvm/src/lib/MC/CMakeLists.txt +++ b/interpreter/llvm/src/lib/MC/CMakeLists.txt @@ -45,13 +45,13 @@ add_llvm_library(LLVMMC MCWasmObjectTargetWriter.cpp MCWasmStreamer.cpp MCWin64EH.cpp + MCWinCOFFStreamer.cpp MCWinEH.cpp MachObjectWriter.cpp StringTableBuilder.cpp SubtargetFeature.cpp WasmObjectWriter.cpp WinCOFFObjectWriter.cpp - WinCOFFStreamer.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/MC diff --git a/interpreter/llvm/src/lib/MC/ELFObjectWriter.cpp b/interpreter/llvm/src/lib/MC/ELFObjectWriter.cpp index e86db933af3c7..c8dd630119439 100644 --- a/interpreter/llvm/src/lib/MC/ELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/MC/ELFObjectWriter.cpp @@ -13,11 +13,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -25,6 +27,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" @@ -36,7 +39,6 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -204,8 +206,7 @@ class ELFObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; // Map from a signature symbol to the group section index using RevGroupMapTy = DenseMap; @@ -626,16 +627,16 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { + MCAsmBackend &Backend = Asm.getBackend(); + bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel; const MCSectionELF &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); if (const MCSymbolRefExpr *RefB = Target.getSymB()) { - assert(RefB->getKind() == MCSymbolRefExpr::VK_None && - "Should not have constructed this"); - // Let A, B and C being the components of Target and R be the location of // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). // If it is pcrel, we want to compute (A - B + C - R). @@ -1020,18 +1021,24 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, MCSectionELF &Section = static_cast(Sec); StringRef SectionName = Section.getSectionName(); + auto &MC = Asm.getContext(); + const auto &MAI = MC.getAsmInfo(); + // Compressing debug_frame requires handling alignment fragments which is // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow // for writing to arbitrary buffers) for little benefit. bool CompressionEnabled = - Asm.getContext().getAsmInfo()->compressDebugSections() != - DebugCompressionType::DCT_None; + MAI->compressDebugSections() != DebugCompressionType::None; if (!CompressionEnabled || !SectionName.startswith(".debug_") || SectionName == ".debug_frame") { Asm.writeSectionData(&Section, Layout); return; } + assert((MAI->compressDebugSections() == DebugCompressionType::Z || + MAI->compressDebugSections() == DebugCompressionType::GNU) && + "expected zlib or zlib-gnu style compression"); + SmallVector UncompressedData; raw_svector_ostream VecOS(UncompressedData); raw_pwrite_stream &OldStream = getStream(); @@ -1048,8 +1055,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, return; } - bool ZlibStyle = Asm.getContext().getAsmInfo()->compressDebugSections() == - DebugCompressionType::DCT_Zlib; + bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, ZlibStyle, Sec.getAlignment())) { getStream() << UncompressedData; @@ -1061,8 +1067,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); else // Add "z" prefix to section name. This is zlib-gnu style. - Asm.getContext().renameELFSection(&Section, - (".z" + SectionName.drop_front(1)).str()); + MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); getStream() << CompressedContents; } diff --git a/interpreter/llvm/src/lib/MC/MCAsmBackend.cpp b/interpreter/llvm/src/lib/MC/MCAsmBackend.cpp index fc0aa788f6d3a..3642f37aa855c 100644 --- a/interpreter/llvm/src/lib/MC/MCAsmBackend.cpp +++ b/interpreter/llvm/src/lib/MC/MCAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixupKindInfo.h" #include #include diff --git a/interpreter/llvm/src/lib/MC/MCAsmInfo.cpp b/interpreter/llvm/src/lib/MC/MCAsmInfo.cpp index b9be685cedc41..f05904048e0b7 100644 --- a/interpreter/llvm/src/lib/MC/MCAsmInfo.cpp +++ b/interpreter/llvm/src/lib/MC/MCAsmInfo.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCAsmInfoDarwin.cpp b/interpreter/llvm/src/lib/MC/MCAsmInfoDarwin.cpp index 4b2001764e972..c74840982fb75 100644 --- a/interpreter/llvm/src/lib/MC/MCAsmInfoDarwin.cpp +++ b/interpreter/llvm/src/lib/MC/MCAsmInfoDarwin.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Support/MachO.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCAsmInfoELF.cpp b/interpreter/llvm/src/lib/MC/MCAsmInfoELF.cpp index e44c08b50d766..b0dc43c6c868f 100644 --- a/interpreter/llvm/src/lib/MC/MCAsmInfoELF.cpp +++ b/interpreter/llvm/src/lib/MC/MCAsmInfoELF.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCAssembler.cpp b/interpreter/llvm/src/lib/MC/MCAssembler.cpp index c2bb7b2771814..eaf6f19326eb4 100644 --- a/interpreter/llvm/src/lib/MC/MCAssembler.cpp +++ b/interpreter/llvm/src/lib/MC/MCAssembler.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAssembler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -16,7 +17,6 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -37,9 +37,9 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include +#include #include #include @@ -193,14 +193,23 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, // FIXME: This code has some duplication with recordRelocation. We should // probably merge the two into a single callback that tries to evaluate a // fixup and records a relocation if one is needed. + + // On error claim to have completely evaluated the fixup, to prevent any + // further processing from being done. const MCExpr *Expr = Fixup.getValue(); + MCContext &Ctx = getContext(); + Value = 0; if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) { - getContext().reportError(Fixup.getLoc(), "expected relocatable expression"); - // Claim to have completely evaluated the fixup, to prevent any further - // processing from being done. - Value = 0; + Ctx.reportError(Fixup.getLoc(), "expected relocatable expression"); return true; } + if (const MCSymbolRefExpr *RefB = Target.getSymB()) { + if (RefB->getKind() != MCSymbolRefExpr::VK_None) { + Ctx.reportError(Fixup.getLoc(), + "unsupported subtraction of qualified symbol"); + return true; + } + } bool IsPCRel = Backend.getFixupKindInfo( Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; @@ -252,10 +261,9 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, Value -= Offset; } - // Let the backend adjust the fixup value if necessary, including whether - // we need a relocation. - Backend.processFixupValue(*this, Layout, Fixup, DF, Target, Value, - IsResolved); + // Let the backend force a relocation if needed. + if (IsResolved && Backend.shouldForceRelocation(*this, Fixup, Target)) + IsResolved = false; return IsResolved; } @@ -639,22 +647,20 @@ void MCAssembler::writeSectionData(const MCSection *Sec, Layout.getSectionAddressSize(Sec)); } -std::pair MCAssembler::handleFixup(const MCAsmLayout &Layout, - MCFragment &F, - const MCFixup &Fixup) { +std::tuple +MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F, + const MCFixup &Fixup) { // Evaluate the fixup. MCValue Target; uint64_t FixedValue; - bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & - MCFixupKindInfo::FKF_IsPCRel; - if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) { + bool IsResolved = evaluateFixup(Layout, Fixup, &F, Target, FixedValue); + if (!IsResolved) { // The fixup was unresolved, we need a relocation. Inform the object // writer of the relocation, and give it an opportunity to adjust the // fixup value if need be. - getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel, - FixedValue); + getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, FixedValue); } - return std::make_pair(FixedValue, IsPCRel); + return std::make_tuple(Target, FixedValue, IsResolved); } void MCAssembler::layout(MCAsmLayout &Layout) { @@ -730,10 +736,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) { llvm_unreachable("Unknown fragment with fixups!"); for (const MCFixup &Fixup : Fixups) { uint64_t FixedValue; - bool IsPCRel; - std::tie(FixedValue, IsPCRel) = handleFixup(Layout, Frag, Fixup); - getBackend().applyFixup(Fixup, Contents.data(), Contents.size(), - FixedValue, IsPCRel, getContext()); + bool IsResolved; + MCValue Target; + std::tie(Target, FixedValue, IsResolved) = + handleFixup(Layout, Frag, Fixup); + getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue, + IsResolved); } } } diff --git a/interpreter/llvm/src/lib/MC/MCCodeView.cpp b/interpreter/llvm/src/lib/MC/MCCodeView.cpp index 2b97ecc0fd2c1..92b1e12da5525 100644 --- a/interpreter/llvm/src/lib/MC/MCCodeView.cpp +++ b/interpreter/llvm/src/lib/MC/MCCodeView.cpp @@ -12,15 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCCodeView.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/EndianStream.h" using namespace llvm; @@ -145,7 +145,7 @@ void CodeViewContext::emitStringTable(MCObjectStreamer &OS) { MCSymbol *StringBegin = Ctx.createTempSymbol("strtab_begin", false), *StringEnd = Ctx.createTempSymbol("strtab_end", false); - OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::StringTable), 4); + OS.EmitIntValue(unsigned(DebugSubsectionKind::StringTable), 4); OS.emitAbsoluteSymbolDiff(StringEnd, StringBegin, 4); OS.EmitLabel(StringBegin); @@ -172,7 +172,7 @@ void CodeViewContext::emitFileChecksums(MCObjectStreamer &OS) { MCSymbol *FileBegin = Ctx.createTempSymbol("filechecksums_begin", false), *FileEnd = Ctx.createTempSymbol("filechecksums_end", false); - OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::FileChecksums), 4); + OS.EmitIntValue(unsigned(DebugSubsectionKind::FileChecksums), 4); OS.emitAbsoluteSymbolDiff(FileEnd, FileBegin, 4); OS.EmitLabel(FileBegin); @@ -197,7 +197,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, MCSymbol *LineBegin = Ctx.createTempSymbol("linetable_begin", false), *LineEnd = Ctx.createTempSymbol("linetable_end", false); - OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::Lines), 4); + OS.EmitIntValue(unsigned(DebugSubsectionKind::Lines), 4); OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4); OS.EmitLabel(LineBegin); OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0); diff --git a/interpreter/llvm/src/lib/MC/MCContext.cpp b/interpreter/llvm/src/lib/MC/MCContext.cpp index 4628d0ab88f30..48ee84edb096b 100644 --- a/interpreter/llvm/src/lib/MC/MCContext.cpp +++ b/interpreter/llvm/src/lib/MC/MCContext.cpp @@ -7,14 +7,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCContext.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" @@ -32,14 +34,12 @@ #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/MC/MCDisassembler/Disassembler.cpp b/interpreter/llvm/src/lib/MC/MCDisassembler/Disassembler.cpp index aa5072743bdfe..ef1d8335e1bd7 100644 --- a/interpreter/llvm/src/lib/MC/MCDisassembler/Disassembler.cpp +++ b/interpreter/llvm/src/lib/MC/MCDisassembler/Disassembler.cpp @@ -27,8 +27,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/interpreter/llvm/src/lib/MC/MCDisassembler/MCRelocationInfo.cpp index 5805fd7007d2c..8f932a3f0d487 100644 --- a/interpreter/llvm/src/lib/MC/MCDisassembler/MCRelocationInfo.cpp +++ b/interpreter/llvm/src/lib/MC/MCDisassembler/MCRelocationInfo.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm-c/Disassembler.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCDwarf.cpp b/interpreter/llvm/src/lib/MC/MCDwarf.cpp index 1a320b0165faf..a2beee32f2cb1 100644 --- a/interpreter/llvm/src/lib/MC/MCDwarf.cpp +++ b/interpreter/llvm/src/lib/MC/MCDwarf.cpp @@ -7,19 +7,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCDwarf.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Config/config.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" @@ -28,7 +29,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" diff --git a/interpreter/llvm/src/lib/MC/MCELFStreamer.cpp b/interpreter/llvm/src/lib/MC/MCELFStreamer.cpp index c8e0223c0573b..50c1f6e79f8a2 100644 --- a/interpreter/llvm/src/lib/MC/MCELFStreamer.cpp +++ b/interpreter/llvm/src/lib/MC/MCELFStreamer.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCELFStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFragment.h" @@ -27,10 +28,9 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/MC/MCExpr.cpp b/interpreter/llvm/src/lib/MC/MCExpr.cpp index 8149aa27327ca..38a8af49c1949 100644 --- a/interpreter/llvm/src/lib/MC/MCExpr.cpp +++ b/interpreter/llvm/src/lib/MC/MCExpr.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCExpr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" @@ -655,8 +655,12 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, // the OS X assembler will completely drop the 4. We should probably // include it in the relocation or produce an error if that is not // possible. + // Allow constant expressions. if (!A && !B) return true; + // Allows aliases with zero offset. + if (Res.getConstant() == 0 && (!A || !B)) + return true; } } diff --git a/interpreter/llvm/src/lib/MC/MCFragment.cpp b/interpreter/llvm/src/lib/MC/MCFragment.cpp index 90b44177cf5e8..6e0249377a899 100644 --- a/interpreter/llvm/src/lib/MC/MCFragment.cpp +++ b/interpreter/llvm/src/lib/MC/MCFragment.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCFragment.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" @@ -307,7 +307,7 @@ raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { } // end namespace llvm #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MCFragment::dump() { +LLVM_DUMP_METHOD void MCFragment::dump() const { raw_ostream &OS = errs(); OS << "<"; @@ -328,9 +328,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() { case MCFragment::FT_Dummy: OS << "MCDummyFragment"; break; } - OS << "(getBundlePadding()) << ">"; switch (getKind()) { @@ -382,7 +382,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() { } case MCFragment::FT_Fill: { const MCFillFragment *FF = cast(this); - OS << " Value:" << FF->getValue() << " Size:" << FF->getSize(); + OS << " Value:" << static_cast(FF->getValue()) + << " Size:" << FF->getSize(); break; } case MCFragment::FT_Relaxable: { @@ -395,7 +396,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() { case MCFragment::FT_Org: { const MCOrgFragment *OF = cast(this); OS << "\n "; - OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue(); + OS << " Offset:" << OF->getOffset() + << " Value:" << static_cast(OF->getValue()); break; } case MCFragment::FT_Dwarf: { @@ -445,19 +447,19 @@ LLVM_DUMP_METHOD void MCFragment::dump() { OS << ">"; } -LLVM_DUMP_METHOD void MCAssembler::dump() { +LLVM_DUMP_METHOD void MCAssembler::dump() const{ raw_ostream &OS = errs(); OS << "dump(); } OS << "],\n"; OS << " Symbols:["; - for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { + for (const_symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { if (it != symbol_begin()) OS << ",\n "; OS << "("; it->dump(); diff --git a/interpreter/llvm/src/lib/MC/MCInstPrinter.cpp b/interpreter/llvm/src/lib/MC/MCInstPrinter.cpp index 9121790959749..9296fcedb72b5 100644 --- a/interpreter/llvm/src/lib/MC/MCInstPrinter.cpp +++ b/interpreter/llvm/src/lib/MC/MCInstPrinter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCInstPrinter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" diff --git a/interpreter/llvm/src/lib/MC/MCInstrAnalysis.cpp b/interpreter/llvm/src/lib/MC/MCInstrAnalysis.cpp index 566944c53548a..280b5cf68c985 100644 --- a/interpreter/llvm/src/lib/MC/MCInstrAnalysis.cpp +++ b/interpreter/llvm/src/lib/MC/MCInstrAnalysis.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include diff --git a/interpreter/llvm/src/lib/MC/MCMachOStreamer.cpp b/interpreter/llvm/src/lib/MC/MCMachOStreamer.cpp index 1e9ef4163256a..674c7b9bf6197 100644 --- a/interpreter/llvm/src/lib/MC/MCMachOStreamer.cpp +++ b/interpreter/llvm/src/lib/MC/MCMachOStreamer.cpp @@ -32,8 +32,8 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/interpreter/llvm/src/lib/MC/MCNullStreamer.cpp b/interpreter/llvm/src/lib/MC/MCNullStreamer.cpp index d156f5d05a316..4db9a2c8d8de9 100644 --- a/interpreter/llvm/src/lib/MC/MCNullStreamer.cpp +++ b/interpreter/llvm/src/lib/MC/MCNullStreamer.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCObjectFileInfo.cpp b/interpreter/llvm/src/lib/MC/MCObjectFileInfo.cpp index b685790910d08..21c5516785efd 100644 --- a/interpreter/llvm/src/lib/MC/MCObjectFileInfo.cpp +++ b/interpreter/llvm/src/lib/MC/MCObjectFileInfo.cpp @@ -10,6 +10,8 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" @@ -17,8 +19,6 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/ELF.h" using namespace llvm; @@ -241,6 +241,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { DwarfStrSection = Ctx->getMachOSection("__DWARF", "__debug_str", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "info_string"); + DwarfStrOffSection = + Ctx->getMachOSection("__DWARF", "__debug_str_offs", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "section_str_off"); DwarfLocSection = Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "section_debug_loc"); @@ -557,6 +560,11 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { DwarfAccelTypesSection = Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0); + // String Offset and Address Sections + DwarfStrOffSection = + Ctx->getELFSection(".debug_str_offsets", DebugSecType, 0); + DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0); + // Fission Sections DwarfInfoDWOSection = Ctx->getELFSection(".debug_info.dwo", DebugSecType, 0); @@ -573,7 +581,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { Ctx->getELFSection(".debug_loc.dwo", DebugSecType, 0); DwarfStrOffDWOSection = Ctx->getELFSection(".debug_str_offsets.dwo", DebugSecType, 0); - DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0); // DWP Sections DwarfCUIndexSection = @@ -695,6 +702,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata(), "info_string"); + DwarfStrOffSection = Ctx->getCOFFSection( + ".debug_str_offsets", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_str_off"); DwarfLocSection = Ctx->getCOFFSection( ".debug_loc", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -749,7 +761,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { ".debug_str_offsets.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); + SectionKind::getMetadata(), "section_str_off_dwo"); DwarfAddrSection = Ctx->getCOFFSection( ".debug_addr", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | diff --git a/interpreter/llvm/src/lib/MC/MCObjectStreamer.cpp b/interpreter/llvm/src/lib/MC/MCObjectStreamer.cpp index f7f2253256ebf..174397e273960 100644 --- a/interpreter/llvm/src/lib/MC/MCObjectStreamer.cpp +++ b/interpreter/llvm/src/lib/MC/MCObjectStreamer.cpp @@ -133,6 +133,11 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, // Avoid fixups when possible. int64_t AbsValue; if (Value->evaluateAsAbsolute(AbsValue, getAssembler())) { + if (!isUIntN(8 * Size, AbsValue) && !isIntN(8 * Size, AbsValue)) { + getContext().reportError( + Loc, "value evaluated as " + Twine(AbsValue) + " is out of range."); + return; + } EmitIntValue(AbsValue, Size); return; } diff --git a/interpreter/llvm/src/lib/MC/MCObjectWriter.cpp b/interpreter/llvm/src/lib/MC/MCObjectWriter.cpp index 478b4e84e74ac..98ac48a23f91c 100644 --- a/interpreter/llvm/src/lib/MC/MCObjectWriter.cpp +++ b/interpreter/llvm/src/lib/MC/MCObjectWriter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCParser/AsmLexer.cpp b/interpreter/llvm/src/lib/MC/MCParser/AsmLexer.cpp index 38dadfe621355..2b963607b8374 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/AsmLexer.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/AsmLexer.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SaveAndRestore.h" diff --git a/interpreter/llvm/src/lib/MC/MCParser/AsmParser.cpp b/interpreter/llvm/src/lib/MC/MCParser/AsmParser.cpp index 3b213ef4ce090..dad47e49e2c20 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/AsmParser.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/AsmParser.cpp @@ -15,12 +15,13 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -47,7 +48,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" @@ -703,7 +703,7 @@ const AsmToken &AsmParser::Lex() { // if it's a end of statement with a comment in it if (getTok().is(AsmToken::EndOfStatement)) { // if this is a line comment output it. - if (getTok().getString().front() != '\n' && + if (!getTok().getString().empty() && getTok().getString().front() != '\n' && getTok().getString().front() != '\r' && MAI.preserveAsmComments()) Out.addExplicitComment(Twine(getTok().getString())); } @@ -1523,7 +1523,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, Lex(); if (Lexer.is(AsmToken::EndOfStatement)) { // if this is a line comment we can drop it safely - if (getTok().getString().front() == '\r' || + if (getTok().getString().empty() || getTok().getString().front() == '\r' || getTok().getString().front() == '\n') Out.AddBlankLine(); Lex(); diff --git a/interpreter/llvm/src/lib/MC/MCParser/COFFAsmParser.cpp b/interpreter/llvm/src/lib/MC/MCParser/COFFAsmParser.cpp index bec62ccb2f7f7..b83d68d4fe206 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/COFFAsmParser.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/COFFAsmParser.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/SMLoc.h" #include #include diff --git a/interpreter/llvm/src/lib/MC/MCParser/DarwinAsmParser.cpp b/interpreter/llvm/src/lib/MC/MCParser/DarwinAsmParser.cpp index 73a7ad0500c37..f4152a9067a06 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/DarwinAsmParser.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -25,10 +26,9 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/MC/MCParser/ELFAsmParser.cpp b/interpreter/llvm/src/lib/MC/MCParser/ELFAsmParser.cpp index 401011a027f42..a407691b0bd17 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/ELFAsmParser.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/ELFAsmParser.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" #include @@ -603,6 +603,8 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { Type = ELF::SHT_NOTE; else if (TypeName == "unwind") Type = ELF::SHT_X86_64_UNWIND; + else if (TypeName == "llvm_odrtab") + Type = ELF::SHT_LLVM_ODRTAB; else if (TypeName.getAsInteger(0, Type)) return TokError("unknown section type"); } diff --git a/interpreter/llvm/src/lib/MC/MCParser/MCAsmLexer.cpp b/interpreter/llvm/src/lib/MC/MCParser/MCAsmLexer.cpp index 1d12ab8582841..8f845ee1d76fa 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/MCAsmLexer.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/MCAsmLexer.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/SMLoc.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCParser/MCAsmParser.cpp b/interpreter/llvm/src/lib/MC/MCParser/MCAsmParser.cpp index 27b37f3e2dfbc..ea36b3b9b3b2a 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/MCAsmParser.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/MCAsmParser.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/MC/MCParser/MCTargetAsmParser.cpp b/interpreter/llvm/src/lib/MC/MCParser/MCTargetAsmParser.cpp index 5f821443bb964..64ac82a6c66f3 100644 --- a/interpreter/llvm/src/lib/MC/MCParser/MCTargetAsmParser.cpp +++ b/interpreter/llvm/src/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCContext.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCRegisterInfo.cpp b/interpreter/llvm/src/lib/MC/MCRegisterInfo.cpp index a75100a4876b7..0f76c1838b518 100644 --- a/interpreter/llvm/src/lib/MC/MCRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/MC/MCRegisterInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/interpreter/llvm/src/lib/MC/MCSection.cpp b/interpreter/llvm/src/lib/MC/MCSection.cpp index 7986c01220434..d141dd6627c46 100644 --- a/interpreter/llvm/src/lib/MC/MCSection.cpp +++ b/interpreter/llvm/src/lib/MC/MCSection.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSection.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -86,7 +86,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MCSection::dump() { +LLVM_DUMP_METHOD void MCSection::dump() const { raw_ostream &OS = errs(); OS << " diff --git a/interpreter/llvm/src/lib/MC/MCSectionELF.cpp b/interpreter/llvm/src/lib/MC/MCSectionELF.cpp index 78fe01cca24a3..2f4f61aa4d504 100644 --- a/interpreter/llvm/src/lib/MC/MCSectionELF.cpp +++ b/interpreter/llvm/src/lib/MC/MCSectionELF.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSectionELF.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -147,6 +147,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, // Print hex value of the flag while we do not have // any standard symbolic representation of the flag. OS << "0x7000001e"; + else if (Type == ELF::SHT_LLVM_ODRTAB) + OS << "llvm_odrtab"; else report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) + " for section " + getSectionName()); diff --git a/interpreter/llvm/src/lib/MC/MCStreamer.cpp b/interpreter/llvm/src/lib/MC/MCStreamer.cpp index c9a6f12b6a58d..2bfb9a63eedbd 100644 --- a/interpreter/llvm/src/lib/MC/MCStreamer.cpp +++ b/interpreter/llvm/src/lib/MC/MCStreamer.cpp @@ -7,9 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" @@ -21,19 +23,17 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCWinEH.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include +#include #include using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCSubtargetInfo.cpp b/interpreter/llvm/src/lib/MC/MCSubtargetInfo.cpp index 777b4e3d6b676..385cdcc623202 100644 --- a/interpreter/llvm/src/lib/MC/MCSubtargetInfo.cpp +++ b/interpreter/llvm/src/lib/MC/MCSubtargetInfo.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/MC/MCSymbol.cpp b/interpreter/llvm/src/lib/MC/MCSymbol.cpp index cb262542b89f8..9abaaef2fe848 100644 --- a/interpreter/llvm/src/lib/MC/MCSymbol.cpp +++ b/interpreter/llvm/src/lib/MC/MCSymbol.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSymbol.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/interpreter/llvm/src/lib/MC/MCSymbolELF.cpp b/interpreter/llvm/src/lib/MC/MCSymbolELF.cpp index ffa8260d43420..67449eb6dcf93 100644 --- a/interpreter/llvm/src/lib/MC/MCSymbolELF.cpp +++ b/interpreter/llvm/src/lib/MC/MCSymbolELF.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/Support/ELF.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/MC/MCTargetOptions.cpp b/interpreter/llvm/src/lib/MC/MCTargetOptions.cpp index 5d666b67fddbe..b85e53db5d616 100644 --- a/interpreter/llvm/src/lib/MC/MCTargetOptions.cpp +++ b/interpreter/llvm/src/lib/MC/MCTargetOptions.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCTargetOptions.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/MC/MCWasmObjectTargetWriter.cpp b/interpreter/llvm/src/lib/MC/MCWasmObjectTargetWriter.cpp index a09a17d7a124f..301f30d4f6ecf 100644 --- a/interpreter/llvm/src/lib/MC/MCWasmObjectTargetWriter.cpp +++ b/interpreter/llvm/src/lib/MC/MCWasmObjectTargetWriter.cpp @@ -17,11 +17,5 @@ using namespace llvm; MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit_) : Is64Bit(Is64Bit_) {} -bool MCWasmObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym, - unsigned Type) const { - return false; -} - -void MCWasmObjectTargetWriter::sortRelocs( - const MCAssembler &Asm, std::vector &Relocs) { -} +// Pin the vtable to this object file +MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default; diff --git a/interpreter/llvm/src/lib/MC/MCWasmStreamer.cpp b/interpreter/llvm/src/lib/MC/MCWasmStreamer.cpp index 59b62b8d37c30..02fa070f0c57d 100644 --- a/interpreter/llvm/src/lib/MC/MCWasmStreamer.cpp +++ b/interpreter/llvm/src/lib/MC/MCWasmStreamer.cpp @@ -98,18 +98,30 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_WeakDefAutoPrivate: case MCSA_Invalid: case MCSA_IndirectSymbol: + case MCSA_Hidden: return false; + + case MCSA_Weak: + case MCSA_WeakReference: + Symbol->setWeak(true); + Symbol->setExternal(true); + break; + case MCSA_Global: Symbol->setExternal(true); break; + case MCSA_ELF_TypeFunction: Symbol->setIsFunction(true); break; + case MCSA_ELF_TypeObject: Symbol->setIsFunction(false); break; + default: // unrecognized directive + llvm_unreachable("unexpected MCSymbolAttr"); return false; } diff --git a/interpreter/llvm/src/lib/MC/WinCOFFStreamer.cpp b/interpreter/llvm/src/lib/MC/MCWinCOFFStreamer.cpp similarity index 97% rename from interpreter/llvm/src/lib/MC/WinCOFFStreamer.cpp rename to interpreter/llvm/src/lib/MC/MCWinCOFFStreamer.cpp index c26d87f36f83d..bf341bb1f4511 100644 --- a/interpreter/llvm/src/lib/MC/WinCOFFStreamer.cpp +++ b/interpreter/llvm/src/lib/MC/MCWinCOFFStreamer.cpp @@ -1,4 +1,4 @@ -//===- llvm/MC/WinCOFFStreamer.cpp ----------------------------------------===// +//===- llvm/MC/MCWinCOFFStreamer.cpp --------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -28,11 +29,10 @@ #include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -190,7 +190,8 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { << COFF::SCT_COMPLEX_TYPE_SHIFT); } -void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) { + visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_2); @@ -198,8 +199,9 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { DF->getContents().resize(DF->getContents().size() + 2, 0); } -void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, +void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol, uint64_t Offset) { + visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); // Create Symbol A for the relocation relative reference. const MCExpr *MCE = MCSymbolRefExpr::create(Symbol, getContext()); diff --git a/interpreter/llvm/src/lib/MC/MCWinEH.cpp b/interpreter/llvm/src/lib/MC/MCWinEH.cpp index 21a913999f64e..a5d0f5a2cb750 100644 --- a/interpreter/llvm/src/lib/MC/MCWinEH.cpp +++ b/interpreter/llvm/src/lib/MC/MCWinEH.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCWinEH.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCWinEH.h" -#include "llvm/Support/COFF.h" namespace llvm { namespace WinEH { diff --git a/interpreter/llvm/src/lib/MC/MachObjectWriter.cpp b/interpreter/llvm/src/lib/MC/MachObjectWriter.cpp index d9ccf0dd661f1..62bf0a58fdfa9 100644 --- a/interpreter/llvm/src/lib/MC/MachObjectWriter.cpp +++ b/interpreter/llvm/src/lib/MC/MachObjectWriter.cpp @@ -8,8 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -27,7 +28,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -449,7 +449,7 @@ void MachObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup, Target, FixedValue); } diff --git a/interpreter/llvm/src/lib/MC/StringTableBuilder.cpp b/interpreter/llvm/src/lib/MC/StringTableBuilder.cpp index a0fb33846fcf7..6025a20a9c193 100644 --- a/interpreter/llvm/src/lib/MC/StringTableBuilder.cpp +++ b/interpreter/llvm/src/lib/MC/StringTableBuilder.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/StringTableBuilder.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/StringTableBuilder.h" -#include "llvm/Support/COFF.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/MC/SubtargetFeature.cpp b/interpreter/llvm/src/lib/MC/SubtargetFeature.cpp index 51aaa4b0aa259..b68e88ca5725a 100644 --- a/interpreter/llvm/src/lib/MC/SubtargetFeature.cpp +++ b/interpreter/llvm/src/lib/MC/SubtargetFeature.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/SubtargetFeature.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" diff --git a/interpreter/llvm/src/lib/MC/WasmObjectWriter.cpp b/interpreter/llvm/src/lib/MC/WasmObjectWriter.cpp index 0540c4c47a3f0..0d31f65c49d9f 100644 --- a/interpreter/llvm/src/lib/MC/WasmObjectWriter.cpp +++ b/interpreter/llvm/src/lib/MC/WasmObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" @@ -31,15 +32,14 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/StringSaver.h" -#include "llvm/Support/Wasm.h" #include using namespace llvm; -#undef DEBUG_TYPE -#define DEBUG_TYPE "reloc-info" +#define DEBUG_TYPE "mc" namespace { + // For patching purposes, we need to remember where each section starts, both // for patching up the section size field, and for patching up references to // locations within the section. @@ -50,6 +50,124 @@ struct SectionBookkeeping { uint64_t ContentsOffset; }; +// The signature of a wasm function, in a struct capable of being used as a +// DenseMap key. +struct WasmFunctionType { + // Support empty and tombstone instances, needed by DenseMap. + enum { Plain, Empty, Tombstone } State; + + // The return types of the function. + SmallVector Returns; + + // The parameter types of the function. + SmallVector Params; + + WasmFunctionType() : State(Plain) {} + + bool operator==(const WasmFunctionType &Other) const { + return State == Other.State && Returns == Other.Returns && + Params == Other.Params; + } +}; + +// Traits for using WasmFunctionType in a DenseMap. +struct WasmFunctionTypeDenseMapInfo { + static WasmFunctionType getEmptyKey() { + WasmFunctionType FuncTy; + FuncTy.State = WasmFunctionType::Empty; + return FuncTy; + } + static WasmFunctionType getTombstoneKey() { + WasmFunctionType FuncTy; + FuncTy.State = WasmFunctionType::Tombstone; + return FuncTy; + } + static unsigned getHashValue(const WasmFunctionType &FuncTy) { + uintptr_t Value = FuncTy.State; + for (wasm::ValType Ret : FuncTy.Returns) + Value += DenseMapInfo::getHashValue(int32_t(Ret)); + for (wasm::ValType Param : FuncTy.Params) + Value += DenseMapInfo::getHashValue(int32_t(Param)); + return Value; + } + static bool isEqual(const WasmFunctionType &LHS, + const WasmFunctionType &RHS) { + return LHS == RHS; + } +}; + +// A wasm import to be written into the import section. +struct WasmImport { + StringRef ModuleName; + StringRef FieldName; + unsigned Kind; + int32_t Type; +}; + +// A wasm function to be written into the function section. +struct WasmFunction { + int32_t Type; + const MCSymbolWasm *Sym; +}; + +// A wasm export to be written into the export section. +struct WasmExport { + StringRef FieldName; + unsigned Kind; + uint32_t Index; +}; + +// A wasm global to be written into the global section. +struct WasmGlobal { + wasm::ValType Type; + bool IsMutable; + bool HasImport; + uint64_t InitialValue; + uint32_t ImportIndex; +}; + +// Information about a single relocation. +struct WasmRelocationEntry { + uint64_t Offset; // Where is the relocation. + const MCSymbolWasm *Symbol; // The symbol to relocate with. + int64_t Addend; // A value to add to the symbol. + unsigned Type; // The type of the relocation. + const MCSectionWasm *FixupSection;// The section the relocation is targeting. + + WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, + int64_t Addend, unsigned Type, + const MCSectionWasm *FixupSection) + : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), + FixupSection(FixupSection) {} + + bool hasAddend() const { + switch (Type) { + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + return true; + default: + return false; + } + } + + void print(raw_ostream &Out) const { + Out << "Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend + << ", Type=" << Type << ", FixupSection=" << FixupSection; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { print(dbgs()); } +#endif +}; + +#if !defined(NDEBUG) +raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) { + Rel.print(OS); + return OS; +} +#endif + class WasmObjectWriter : public MCObjectWriter { /// Helper struct for containing some precomputed information on symbols. struct WasmSymbolData { @@ -69,17 +187,23 @@ class WasmObjectWriter : public MCObjectWriter { // Relocations for fixing up references in the data section. std::vector DataRelocations; - // Fixups for call_indirect type indices. - std::vector TypeIndexFixups; - // Index values to use for fixing up call_indirect type indices. - std::vector TypeIndexFixupTypes; + // Maps function symbols to the index of the type of the function + DenseMap TypeIndices; + // Maps function symbols to the table element index space. Used + // for TABLE_INDEX relocation types (i.e. address taken functions). + DenseMap IndirectSymbolIndices; + // Maps function/global symbols to the function/global index space. + DenseMap SymbolIndices; + + DenseMap + FunctionTypeIndices; + SmallVector FunctionTypes; // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const { - return TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); + unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const { + return TargetObjectWriter->getRelocType(Target, Fixup); } void startSection(SectionBookkeeping &Section, unsigned SectionId, @@ -91,28 +215,71 @@ class WasmObjectWriter : public MCObjectWriter { : MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {} private: + ~WasmObjectWriter() override; + void reset() override { + CodeRelocations.clear(); + DataRelocations.clear(); + TypeIndices.clear(); + SymbolIndices.clear(); + IndirectSymbolIndices.clear(); + FunctionTypeIndices.clear(); + FunctionTypes.clear(); MCObjectWriter::reset(); } - ~WasmObjectWriter() override; - void writeHeader(const MCAssembler &Asm); - void writeValueType(wasm::ValType Ty) { - encodeSLEB128(int32_t(Ty), getStream()); - } - void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + + void writeString(const StringRef Str) { + encodeULEB128(Str.size(), getStream()); + writeBytes(Str); + } + + void writeValueType(wasm::ValType Ty) { + encodeSLEB128(int32_t(Ty), getStream()); + } + + void writeTypeSection(const SmallVector &FunctionTypes); + void writeImportSection(const SmallVector &Imports); + void writeFunctionSection(const SmallVector &Functions); + void writeTableSection(uint32_t NumElements); + void writeMemorySection(const SmallVector &DataBytes); + void writeGlobalSection(const SmallVector &Globals); + void writeExportSection(const SmallVector &Exports); + void writeElemSection(const SmallVector &TableElems); + void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, + const SmallVector &Functions); + uint64_t + writeDataSection(const SmallVector &DataBytes); + void writeNameSection(const SmallVector &Functions, + const SmallVector &Imports, + uint32_t NumFuncImports); + void writeCodeRelocSection(); + void writeDataRelocSection(uint64_t DataSectionHeaderSize); + void writeLinkingMetaDataSection(uint32_t DataSize, uint32_t DataAlignment, + ArrayRef WeakSymbols, + bool HasStackPointer, + uint32_t StackPointerGlobal); + + void applyRelocations(ArrayRef Relocations, + uint64_t ContentsOffset); + + void writeRelocations(ArrayRef Relocations, + uint64_t HeaderSize); + uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); + uint32_t getFunctionType(const MCSymbolWasm& Symbol); + uint32_t registerFunctionType(const MCSymbolWasm& Symbol); }; + } // end anonymous namespace WasmObjectWriter::~WasmObjectWriter() {} @@ -134,6 +301,7 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section, assert((Name != nullptr) == (SectionId == wasm::WASM_SEC_CUSTOM) && "Only custom sections can have names"); + DEBUG(dbgs() << "startSection " << SectionId << ": " << Name << "\n"); encodeULEB128(SectionId, getStream()); Section.SizeOffset = getStream().tell(); @@ -147,8 +315,8 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section, // Custom sections in wasm also have a string identifier. if (SectionId == wasm::WASM_SEC_CUSTOM) { - encodeULEB128(strlen(Name), getStream()); - writeBytes(Name); + assert(Name); + writeString(StringRef(Name)); } } @@ -159,6 +327,7 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) { if (uint32_t(Size) != Size) report_fatal_error("section size does not fit in a uint32_t"); + DEBUG(dbgs() << "endSection size=" << Size << "\n"); unsigned Padding = PaddingFor5ByteULEB128(Size); // Write the final section size to the payload_len field, which follows @@ -183,8 +352,11 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { - MCSectionWasm &FixupSection = cast(*Fragment->getParent()); + uint64_t &FixedValue) { + MCAsmBackend &Backend = Asm.getBackend(); + bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel; + const auto &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); @@ -234,15 +406,11 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCSymbolRefExpr *RefA = Target.getSymA(); const auto *SymA = RefA ? cast(&RefA->getSymbol()) : nullptr; - bool ViaWeakRef = false; if (SymA && SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); - if (const auto *Inner = dyn_cast(Expr)) { - if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) { - SymA = cast(&Inner->getSymbol()); - ViaWeakRef = true; - } - } + const auto *Inner = cast(Expr); + if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) + llvm_unreachable("weakref used in reloc not yet implemented"); } // Put any constant offset in an addend. Offsets can be negative, and @@ -250,27 +418,16 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, // be negative and don't wrap. FixedValue = 0; - if (SymA) { - if (ViaWeakRef) - llvm_unreachable("weakref used in reloc not yet implemented"); - else - SymA->setUsedInReloc(); - } + if (SymA) + SymA->setUsedInReloc(); - if (RefA) { - if (RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX) { - assert(C == 0); - WasmRelocationEntry Rec(FixupOffset, SymA, C, - wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB, - &FixupSection); - TypeIndexFixups.push_back(Rec); - return; - } - } + assert(!IsPCRel); + assert(SymA); - unsigned Type = getRelocType(Ctx, Target, Fixup, IsPCRel); + unsigned Type = getRelocType(Target, Fixup); WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); + DEBUG(dbgs() << "WasmReloc: " << Rec << "\n"); if (FixupSection.hasInstructions()) CodeRelocations.push_back(Rec); @@ -278,86 +435,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, DataRelocations.push_back(Rec); } -namespace { - -// The signature of a wasm function, in a struct capable of being used as a -// DenseMap key. -struct WasmFunctionType { - // Support empty and tombstone instances, needed by DenseMap. - enum { Plain, Empty, Tombstone } State; - - // The return types of the function. - SmallVector Returns; - - // The parameter types of the function. - SmallVector Params; - - WasmFunctionType() : State(Plain) {} - - bool operator==(const WasmFunctionType &Other) const { - return State == Other.State && Returns == Other.Returns && - Params == Other.Params; - } -}; - -// Traits for using WasmFunctionType in a DenseMap. -struct WasmFunctionTypeDenseMapInfo { - static WasmFunctionType getEmptyKey() { - WasmFunctionType FuncTy; - FuncTy.State = WasmFunctionType::Empty; - return FuncTy; - } - static WasmFunctionType getTombstoneKey() { - WasmFunctionType FuncTy; - FuncTy.State = WasmFunctionType::Tombstone; - return FuncTy; - } - static unsigned getHashValue(const WasmFunctionType &FuncTy) { - uintptr_t Value = FuncTy.State; - for (wasm::ValType Ret : FuncTy.Returns) - Value += DenseMapInfo::getHashValue(int32_t(Ret)); - for (wasm::ValType Param : FuncTy.Params) - Value += DenseMapInfo::getHashValue(int32_t(Param)); - return Value; - } - static bool isEqual(const WasmFunctionType &LHS, - const WasmFunctionType &RHS) { - return LHS == RHS; - } -}; - -// A wasm import to be written into the import section. -struct WasmImport { - StringRef ModuleName; - StringRef FieldName; - unsigned Kind; - int32_t Type; -}; - -// A wasm function to be written into the function section. -struct WasmFunction { - int32_t Type; - const MCSymbolWasm *Sym; -}; - -// A wasm export to be written into the export section. -struct WasmExport { - StringRef FieldName; - unsigned Kind; - uint32_t Index; -}; - -// A wasm global to be written into the global section. -struct WasmGlobal { - wasm::ValType Type; - bool IsMutable; - bool HasImport; - uint64_t InitialValue; - uint32_t ImportIndex; -}; - -} // end anonymous namespace - // Write X as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. static void @@ -395,11 +472,10 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) { const MCSymbolWasm *Sym = RelEntry.Symbol; // For undefined symbols, use a hopefully invalid value. - if (!Sym->isDefined(false)) + if (!Sym->isDefined(/*SetUsed=*/false)) return UINT32_MAX; - MCSectionWasm &Section = - cast(RelEntry.Symbol->getSection(false)); + const auto &Section = cast(RelEntry.Symbol->getSection(false)); uint64_t Address = Section.getSectionOffset() + RelEntry.Addend; // Ignore overflow. LLVM allows address arithmetic to silently wrap. @@ -408,147 +484,487 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) { return Value; } +uint32_t WasmObjectWriter::getRelocationIndexValue( + const WasmRelocationEntry &RelEntry) { + switch (RelEntry.Type) { + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + if (!IndirectSymbolIndices.count(RelEntry.Symbol)) + report_fatal_error("symbol not found table index space: " + + RelEntry.Symbol->getName()); + return IndirectSymbolIndices[RelEntry.Symbol]; + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + if (!SymbolIndices.count(RelEntry.Symbol)) + report_fatal_error("symbol not found function/global index space: " + + RelEntry.Symbol->getName()); + return SymbolIndices[RelEntry.Symbol]; + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + if (!TypeIndices.count(RelEntry.Symbol)) + report_fatal_error("symbol not found in type index space: " + + RelEntry.Symbol->getName()); + return TypeIndices[RelEntry.Symbol]; + default: + llvm_unreachable("invalid relocation type"); + } +} + // Apply the portions of the relocation records that we can handle ourselves // directly. -static void ApplyRelocations( - ArrayRef Relocations, - raw_pwrite_stream &Stream, - DenseMap &SymbolIndices, - uint64_t ContentsOffset) -{ +void WasmObjectWriter::applyRelocations( + ArrayRef Relocations, uint64_t ContentsOffset) { + raw_pwrite_stream &Stream = getStream(); for (const WasmRelocationEntry &RelEntry : Relocations) { uint64_t Offset = ContentsOffset + RelEntry.FixupSection->getSectionOffset() + RelEntry.Offset; - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: { - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - WritePatchableLEB(Stream, Index, Offset); + DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); + switch (RelEntry.Type) { + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: { + uint32_t Index = getRelocationIndexValue(RelEntry); + WritePatchableSLEB(Stream, Index, Offset); break; } - case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: { - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - - WritePatchableSLEB(Stream, Index, Offset); + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { + uint32_t Index = getRelocationIndexValue(RelEntry); + WriteI32(Stream, Index, Offset); break; } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: { uint32_t Value = ProvisionalValue(RelEntry); - WritePatchableSLEB(Stream, Value, Offset); break; } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: { uint32_t Value = ProvisionalValue(RelEntry); - WritePatchableLEB(Stream, Value, Offset); break; } - case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - - WriteI32(Stream, Index, Offset); - break; - } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: { uint32_t Value = ProvisionalValue(RelEntry); - WriteI32(Stream, Value, Offset); break; } default: - break; + llvm_unreachable("invalid relocation type"); } } } // Write out the portions of the relocation records that the linker will // need to handle. -static void -WriteRelocations(ArrayRef Relocations, - raw_pwrite_stream &Stream, - DenseMap &SymbolIndices, - uint64_t HeaderSize) { - for (const WasmRelocationEntry RelEntry : Relocations) { - encodeULEB128(RelEntry.Type, Stream); +void WasmObjectWriter::writeRelocations( + ArrayRef Relocations, uint64_t HeaderSize) { + raw_pwrite_stream &Stream = getStream(); + for (const WasmRelocationEntry& RelEntry : Relocations) { uint64_t Offset = RelEntry.Offset + RelEntry.FixupSection->getSectionOffset() + HeaderSize; - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - int64_t Addend = RelEntry.Addend; + uint32_t Index = getRelocationIndexValue(RelEntry); - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: - case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: - case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: - encodeULEB128(Offset, Stream); - encodeULEB128(Index, Stream); - assert(Addend == 0 && "addends not supported for functions"); + encodeULEB128(RelEntry.Type, Stream); + encodeULEB128(Offset, Stream); + encodeULEB128(Index, Stream); + if (RelEntry.hasAddend()) + encodeSLEB128(RelEntry.Addend, Stream); + } +} + +void WasmObjectWriter::writeTypeSection( + const SmallVector &FunctionTypes) { + if (FunctionTypes.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_TYPE); + + encodeULEB128(FunctionTypes.size(), getStream()); + + for (const WasmFunctionType &FuncTy : FunctionTypes) { + encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream()); + encodeULEB128(FuncTy.Params.size(), getStream()); + for (wasm::ValType Ty : FuncTy.Params) + writeValueType(Ty); + encodeULEB128(FuncTy.Returns.size(), getStream()); + for (wasm::ValType Ty : FuncTy.Returns) + writeValueType(Ty); + } + + endSection(Section); +} + + +void WasmObjectWriter::writeImportSection( + const SmallVector &Imports) { + if (Imports.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_IMPORT); + + encodeULEB128(Imports.size(), getStream()); + for (const WasmImport &Import : Imports) { + writeString(Import.ModuleName); + writeString(Import.FieldName); + + encodeULEB128(Import.Kind, getStream()); + + switch (Import.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + encodeULEB128(Import.Type, getStream()); break; - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: - encodeULEB128(Offset, Stream); - encodeULEB128(Index, Stream); - encodeSLEB128(Addend, Stream); + case wasm::WASM_EXTERNAL_GLOBAL: + encodeSLEB128(int32_t(Import.Type), getStream()); + encodeULEB128(0, getStream()); // mutability break; default: - llvm_unreachable("unsupported relocation type"); + llvm_unreachable("unsupported import kind"); } } -} -// Write out the the type relocation records that the linker will -// need to handle. -static void WriteTypeRelocations( - ArrayRef TypeIndexFixups, - ArrayRef TypeIndexFixupTypes, - raw_pwrite_stream &Stream) -{ - for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { - const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; - uint32_t Type = TypeIndexFixupTypes[i]; - - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - assert(Fixup.Addend == 0); - - uint64_t Offset = Fixup.Offset + - Fixup.FixupSection->getSectionOffset(); - - encodeULEB128(Fixup.Type, Stream); - encodeULEB128(Offset, Stream); - encodeULEB128(Type, Stream); - } + endSection(Section); +} + +void WasmObjectWriter::writeFunctionSection( + const SmallVector &Functions) { + if (Functions.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_FUNCTION); + + encodeULEB128(Functions.size(), getStream()); + for (const WasmFunction &Func : Functions) + encodeULEB128(Func.Type, getStream()); + + endSection(Section); +} + +void WasmObjectWriter::writeTableSection(uint32_t NumElements) { + // For now, always emit the table section, since indirect calls are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no indirect calls. + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_TABLE); + + encodeULEB128(1, getStream()); // The number of tables. + // Fixed to 1 for now. + encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); // Type of table + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumElements, getStream()); // initial + + endSection(Section); +} + +void WasmObjectWriter::writeMemorySection( + const SmallVector &DataBytes) { + // For now, always emit the memory section, since loads and stores are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no loads or stores. + SectionBookkeeping Section; + uint32_t NumPages = + (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize; + + startSection(Section, wasm::WASM_SEC_MEMORY); + encodeULEB128(1, getStream()); // number of memory spaces + + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumPages, getStream()); // initial + + endSection(Section); +} + +void WasmObjectWriter::writeGlobalSection( + const SmallVector &Globals) { + if (Globals.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_GLOBAL); + + encodeULEB128(Globals.size(), getStream()); + for (const WasmGlobal &Global : Globals) { + writeValueType(Global.Type); + write8(Global.IsMutable); + + if (Global.HasImport) { + assert(Global.InitialValue == 0); + write8(wasm::WASM_OPCODE_GET_GLOBAL); + encodeULEB128(Global.ImportIndex, getStream()); + } else { + assert(Global.ImportIndex == 0); + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(Global.InitialValue, getStream()); // offset + } + write8(wasm::WASM_OPCODE_END); + } + + endSection(Section); +} + +void WasmObjectWriter::writeExportSection( + const SmallVector &Exports) { + if (Exports.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_EXPORT); + + encodeULEB128(Exports.size(), getStream()); + for (const WasmExport &Export : Exports) { + writeString(Export.FieldName); + encodeSLEB128(Export.Kind, getStream()); + encodeULEB128(Export.Index, getStream()); + } + + endSection(Section); +} + +void WasmObjectWriter::writeElemSection( + const SmallVector &TableElems) { + if (TableElems.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_ELEM); + + encodeULEB128(1, getStream()); // number of "segments" + encodeULEB128(0, getStream()); // the table index + + // init expr for starting offset + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(0, getStream()); + write8(wasm::WASM_OPCODE_END); + + encodeULEB128(TableElems.size(), getStream()); + for (uint32_t Elem : TableElems) + encodeULEB128(Elem, getStream()); + + endSection(Section); +} + +void WasmObjectWriter::writeCodeSection( + const MCAssembler &Asm, const MCAsmLayout &Layout, + const SmallVector &Functions) { + if (Functions.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CODE); + + encodeULEB128(Functions.size(), getStream()); + + for (const WasmFunction &Func : Functions) { + auto &FuncSection = static_cast(Func.Sym->getSection()); + + int64_t Size = 0; + if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) + report_fatal_error(".size expression must be evaluatable"); + + encodeULEB128(Size, getStream()); + + FuncSection.setSectionOffset(getStream().tell() - Section.ContentsOffset); + + Asm.writeSectionData(&FuncSection, Layout); + } + + // Apply fixups. + applyRelocations(CodeRelocations, Section.ContentsOffset); + + endSection(Section); +} + +uint64_t WasmObjectWriter::writeDataSection( + const SmallVector &DataBytes) { + if (DataBytes.empty()) + return 0; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_DATA); + + encodeULEB128(1, getStream()); // count + encodeULEB128(0, getStream()); // memory index + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(0, getStream()); // offset + write8(wasm::WASM_OPCODE_END); + encodeULEB128(DataBytes.size(), getStream()); // size + uint32_t HeaderSize = getStream().tell() - Section.ContentsOffset; + writeBytes(DataBytes); // data + + // Apply fixups. + applyRelocations(DataRelocations, Section.ContentsOffset + HeaderSize); + + endSection(Section); + return HeaderSize; +} + +void WasmObjectWriter::writeNameSection( + const SmallVector &Functions, + const SmallVector &Imports, + unsigned NumFuncImports) { + uint32_t TotalFunctions = NumFuncImports + Functions.size(); + if (TotalFunctions == 0) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "name"); + SectionBookkeeping SubSection; + startSection(SubSection, wasm::WASM_NAMES_FUNCTION); + + encodeULEB128(TotalFunctions, getStream()); + uint32_t Index = 0; + for (const WasmImport &Import : Imports) { + if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { + encodeULEB128(Index, getStream()); + writeString(Import.FieldName); + ++Index; + } + } + for (const WasmFunction &Func : Functions) { + encodeULEB128(Index, getStream()); + writeString(Func.Sym->getName()); + ++Index; + } + + endSection(SubSection); + endSection(Section); +} + +void WasmObjectWriter::writeCodeRelocSection() { + // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + // for descriptions of the reloc sections. + + if (CodeRelocations.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE"); + + encodeULEB128(wasm::WASM_SEC_CODE, getStream()); + encodeULEB128(CodeRelocations.size(), getStream()); + + writeRelocations(CodeRelocations, 0); + + endSection(Section); +} + +void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) { + // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + // for descriptions of the reloc sections. + + if (DataRelocations.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA"); + + encodeULEB128(wasm::WASM_SEC_DATA, getStream()); + encodeULEB128(DataRelocations.size(), getStream()); + + writeRelocations(DataRelocations, DataSectionHeaderSize); + + endSection(Section); +} + +void WasmObjectWriter::writeLinkingMetaDataSection( + uint32_t DataSize, uint32_t DataAlignment, ArrayRef WeakSymbols, + bool HasStackPointer, uint32_t StackPointerGlobal) { + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); + SectionBookkeeping SubSection; + + if (HasStackPointer) { + startSection(SubSection, wasm::WASM_STACK_POINTER); + encodeULEB128(StackPointerGlobal, getStream()); // id + endSection(SubSection); + } + + if (WeakSymbols.size() != 0) { + startSection(SubSection, wasm::WASM_SYMBOL_INFO); + encodeULEB128(WeakSymbols.size(), getStream()); + for (const StringRef Export: WeakSymbols) { + writeString(Export); + encodeULEB128(wasm::WASM_SYMBOL_FLAG_WEAK, getStream()); + } + endSection(SubSection); + } + + if (DataSize > 0) { + startSection(SubSection, wasm::WASM_DATA_SIZE); + encodeULEB128(DataSize, getStream()); + endSection(SubSection); + + startSection(SubSection, wasm::WASM_DATA_ALIGNMENT); + encodeULEB128(DataAlignment, getStream()); + endSection(SubSection); + } + + endSection(Section); +} + +uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm& Symbol) { + assert(Symbol.isFunction()); + assert(TypeIndices.count(&Symbol)); + return TypeIndices[&Symbol]; +} + +uint32_t WasmObjectWriter::registerFunctionType(const MCSymbolWasm& Symbol) { + assert(Symbol.isFunction()); + + WasmFunctionType F; + if (Symbol.isVariable()) { + const MCExpr *Expr = Symbol.getVariableValue(); + auto *Inner = cast(Expr); + const auto *ResolvedSym = cast(&Inner->getSymbol()); + F.Returns = ResolvedSym->getReturns(); + F.Params = ResolvedSym->getParams(); + } else { + F.Returns = Symbol.getReturns(); + F.Params = Symbol.getParams(); + } + + auto Pair = + FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); + if (Pair.second) + FunctionTypes.push_back(F); + TypeIndices[&Symbol] = Pair.first->second; + + DEBUG(dbgs() << "registerFunctionType: " << Symbol << " new:" << Pair.second << "\n"); + DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n"); + return Pair.first->second; } void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { + DEBUG(dbgs() << "WasmObjectWriter::writeObject\n"); MCContext &Ctx = Asm.getContext(); wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32; // Collect information from the available symbols. - DenseMap - FunctionTypeIndices; - SmallVector FunctionTypes; SmallVector Functions; SmallVector TableElems; SmallVector Globals; SmallVector Imports; SmallVector Exports; - DenseMap SymbolIndices; + SmallVector WeakSymbols; SmallPtrSet IsAddressTaken; unsigned NumFuncImports = 0; unsigned NumGlobalImports = 0; SmallVector DataBytes; + uint32_t DataAlignment = 1; uint32_t StackPointerGlobal = 0; bool HasStackPointer = false; // Populate the IsAddressTaken set. - for (WasmRelocationEntry RelEntry : CodeRelocations) { + for (const WasmRelocationEntry &RelEntry : CodeRelocations) { switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: @@ -558,7 +974,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, break; } } - for (WasmRelocationEntry RelEntry : DataRelocations) { + for (const WasmRelocationEntry &RelEntry : DataRelocations) { switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: @@ -572,37 +988,27 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // Populate the Imports set. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); - int32_t Type; - if (WS.isFunction()) { - // Prepare the function's type, if we haven't seen it yet. - WasmFunctionType F; - F.Returns = WS.getReturns(); - F.Params = WS.getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); - - Type = Pair.first->second; - } else { - Type = int32_t(PtrType); - } + if (WS.isTemporary()) + continue; + + if (WS.isFunction()) + registerFunctionType(WS); // If the symbol is not defined in this translation unit, import it. - if (!WS.isTemporary() && !WS.isDefined(/*SetUsed=*/false)) { + if (!WS.isDefined(/*SetUsed=*/false) || WS.isVariable()) { WasmImport Import; Import.ModuleName = WS.getModuleName(); Import.FieldName = WS.getName(); if (WS.isFunction()) { Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; - Import.Type = Type; + Import.Type = getFunctionType(WS); SymbolIndices[&WS] = NumFuncImports; ++NumFuncImports; } else { Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; - Import.Type = Type; + Import.Type = int32_t(PtrType); SymbolIndices[&WS] = NumGlobalImports; ++NumGlobalImports; } @@ -621,7 +1027,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCFragment &Frag = *GlobalVars->begin(); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .global_variables"); - const MCDataFragment &DataFrag = cast(Frag); + const auto &DataFrag = cast(Frag); if (!DataFrag.getFixups().empty()) report_fatal_error("fixups not supported in .global_variables"); const SmallVectorImpl &Contents = DataFrag.getContents(); @@ -677,7 +1083,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCFragment &Frag = *StackPtr->begin(); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .stack_pointer"); - const MCDataFragment &DataFrag = cast(Frag); + const auto &DataFrag = cast(Frag); if (!DataFrag.getFixups().empty()) report_fatal_error("fixups not supported in .stack_pointer"); const SmallVectorImpl &Contents = DataFrag.getContents(); @@ -687,33 +1093,45 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, StackPointerGlobal = NumGlobalImports + *(const int32_t *)Contents.data(); } - // Handle defined symbols. + // Handle regular defined and undefined symbols. for (const MCSymbol &S : Asm.symbols()) { // Ignore unnamed temporary symbols, which aren't ever exported, imported, // or used in relocations. if (S.isTemporary() && S.getName().empty()) continue; + const auto &WS = static_cast(S); + DEBUG(dbgs() << "MCSymbol: '" << S << "'" + << " isDefined=" << S.isDefined() << " isExternal=" + << S.isExternal() << " isTemporary=" << S.isTemporary() + << " isFunction=" << WS.isFunction() + << " isWeak=" << WS.isWeak() + << " isVariable=" << WS.isVariable() << "\n"); + + if (WS.isWeak()) + WeakSymbols.push_back(WS.getName()); + + if (WS.isVariable()) + continue; + unsigned Index; + if (WS.isFunction()) { - // Prepare the function's type, if we haven't seen it yet. - WasmFunctionType F; - F.Returns = WS.getReturns(); - F.Params = WS.getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); + if (WS.isDefined(/*SetUsed=*/false)) { + if (WS.getOffset() != 0) + report_fatal_error( + "function sections must contain one function each"); - int32_t Type = Pair.first->second; + if (WS.getSize() == 0) + report_fatal_error( + "function symbols must have a size set with .size"); - if (WS.isDefined(/*SetUsed=*/false)) { // A definition. Take the next available index. Index = NumFuncImports + Functions.size(); // Prepare the function. WasmFunction Func; - Func.Type = Type; + Func.Type = getFunctionType(WS); Func.Sym = &WS; SymbolIndices[&WS] = Index; Functions.push_back(Func); @@ -722,429 +1140,152 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Index = SymbolIndices.find(&WS)->second; } + DEBUG(dbgs() << " -> function index: " << Index << "\n"); + // If needed, prepare the function to be called indirectly. - if (IsAddressTaken.count(&WS)) + if (IsAddressTaken.count(&WS) != 0) { + IndirectSymbolIndices[&WS] = TableElems.size(); + DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n"); TableElems.push_back(Index); + } } else { - // For now, ignore temporary non-function symbols. - if (S.isTemporary()) + if (WS.isTemporary() && !WS.getSize()) + continue; + + if (!WS.isDefined(/*SetUsed=*/false)) continue; if (WS.getOffset() != 0) - report_fatal_error("data sections must contain one variable each"); + report_fatal_error("data sections must contain one variable each: " + + WS.getName()); if (!WS.getSize()) - report_fatal_error("data symbols must have a size set with .size"); + report_fatal_error("data symbols must have a size set with .size: " + + WS.getName()); int64_t Size = 0; if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) report_fatal_error(".size expression must be evaluatable"); - if (WS.isDefined(false)) { - MCSectionWasm &DataSection = - static_cast(WS.getSection()); - - if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection)) - report_fatal_error("data sections must contain at most one variable"); - - DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); - - DataSection.setSectionOffset(DataBytes.size()); - - for (MCSection::iterator I = DataSection.begin(), E = DataSection.end(); - I != E; ++I) { - const MCFragment &Frag = *I; - if (Frag.hasInstructions()) - report_fatal_error("only data supported in data sections"); - - if (const MCAlignFragment *Align = dyn_cast(&Frag)) { - if (Align->getValueSize() != 1) - report_fatal_error("only byte values supported for alignment"); - // If nops are requested, use zeros, as this is the data section. - uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue(); - uint64_t Size = std::min(alignTo(DataBytes.size(), - Align->getAlignment()), - DataBytes.size() + - Align->getMaxBytesToEmit()); - DataBytes.resize(Size, Value); - } else if (const MCFillFragment *Fill = - dyn_cast(&Frag)) { - DataBytes.insert(DataBytes.end(), Size, Fill->getValue()); - } else { - const MCDataFragment &DataFrag = cast(Frag); - const SmallVectorImpl &Contents = DataFrag.getContents(); - - DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end()); - } - } - - // For each external global, prepare a corresponding wasm global - // holding its address. - if (WS.isExternal()) { - Index = NumGlobalImports + Globals.size(); - - WasmGlobal Global; - Global.Type = PtrType; - Global.IsMutable = false; - Global.HasImport = false; - Global.InitialValue = DataSection.getSectionOffset(); - Global.ImportIndex = 0; - SymbolIndices[&WS] = Index; - Globals.push_back(Global); + auto &DataSection = static_cast(WS.getSection()); + + if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection)) + report_fatal_error("data sections must contain at most one variable"); + + DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); + DataAlignment = std::max(DataAlignment, DataSection.getAlignment()); + + DataSection.setSectionOffset(DataBytes.size()); + + for (const MCFragment &Frag : DataSection) { + if (Frag.hasInstructions()) + report_fatal_error("only data supported in data sections"); + + if (auto *Align = dyn_cast(&Frag)) { + if (Align->getValueSize() != 1) + report_fatal_error("only byte values supported for alignment"); + // If nops are requested, use zeros, as this is the data section. + uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue(); + uint64_t Size = std::min(alignTo(DataBytes.size(), + Align->getAlignment()), + DataBytes.size() + + Align->getMaxBytesToEmit()); + DataBytes.resize(Size, Value); + } else if (auto *Fill = dyn_cast(&Frag)) { + DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue()); + } else { + const auto &DataFrag = cast(Frag); + const SmallVectorImpl &Contents = DataFrag.getContents(); + + DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end()); } } + + // For each global, prepare a corresponding wasm global holding its + // address. For externals these will also be named exports. + Index = NumGlobalImports + Globals.size(); + + WasmGlobal Global; + Global.Type = PtrType; + Global.IsMutable = false; + Global.HasImport = false; + Global.InitialValue = DataSection.getSectionOffset(); + Global.ImportIndex = 0; + SymbolIndices[&WS] = Index; + DEBUG(dbgs() << " -> global index: " << Index << "\n"); + Globals.push_back(Global); } // If the symbol is visible outside this translation unit, export it. - if (WS.isExternal()) { - assert(WS.isDefined(false)); + if ((WS.isExternal() && WS.isDefined(/*SetUsed=*/false))) { WasmExport Export; Export.FieldName = WS.getName(); Export.Index = Index; - if (WS.isFunction()) Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; else Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; - + DEBUG(dbgs() << " -> export " << Exports.size() << "\n"); Exports.push_back(Export); } } - // Add types for indirect function calls. - for (const WasmRelocationEntry &Fixup : TypeIndexFixups) { - assert(Fixup.Addend == 0); - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - - WasmFunctionType F; - F.Returns = Fixup.Symbol->getReturns(); - F.Params = Fixup.Symbol->getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); - - TypeIndexFixupTypes.push_back(Pair.first->second); - } - - // Write out the Wasm header. - writeHeader(Asm); - - SectionBookkeeping Section; - - // === Type Section ========================================================= - if (!FunctionTypes.empty()) { - startSection(Section, wasm::WASM_SEC_TYPE); - - encodeULEB128(FunctionTypes.size(), getStream()); - - for (WasmFunctionType &FuncTy : FunctionTypes) { - encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream()); - encodeULEB128(FuncTy.Params.size(), getStream()); - for (wasm::ValType Ty : FuncTy.Params) - writeValueType(Ty); - encodeULEB128(FuncTy.Returns.size(), getStream()); - for (wasm::ValType Ty : FuncTy.Returns) - writeValueType(Ty); - } - - endSection(Section); - } - - // === Import Section ======================================================== - if (!Imports.empty()) { - startSection(Section, wasm::WASM_SEC_IMPORT); - - encodeULEB128(Imports.size(), getStream()); - for (const WasmImport &Import : Imports) { - StringRef ModuleName = Import.ModuleName; - encodeULEB128(ModuleName.size(), getStream()); - writeBytes(ModuleName); - - StringRef FieldName = Import.FieldName; - encodeULEB128(FieldName.size(), getStream()); - writeBytes(FieldName); - - encodeULEB128(Import.Kind, getStream()); - - switch (Import.Kind) { - case wasm::WASM_EXTERNAL_FUNCTION: - encodeULEB128(Import.Type, getStream()); - break; - case wasm::WASM_EXTERNAL_GLOBAL: - encodeSLEB128(int32_t(Import.Type), getStream()); - encodeULEB128(0, getStream()); // mutability - break; - default: - llvm_unreachable("unsupported import kind"); - } - } - - endSection(Section); - } - - // === Function Section ====================================================== - if (!Functions.empty()) { - startSection(Section, wasm::WASM_SEC_FUNCTION); - - encodeULEB128(Functions.size(), getStream()); - for (const WasmFunction &Func : Functions) - encodeULEB128(Func.Type, getStream()); - - endSection(Section); - } - - // === Table Section ========================================================= - // For now, always emit the table section, since indirect calls are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no indirect calls. - startSection(Section, wasm::WASM_SEC_TABLE); - - // The number of tables, fixed to 1 for now. - encodeULEB128(1, getStream()); - - encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); - - encodeULEB128(0, getStream()); // flags - encodeULEB128(TableElems.size(), getStream()); // initial - - endSection(Section); - - // === Memory Section ======================================================== - // For now, always emit the memory section, since loads and stores are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no loads or stores. - uint32_t NumPages = - (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize; - - startSection(Section, wasm::WASM_SEC_MEMORY); - encodeULEB128(1, getStream()); // number of memory spaces - - encodeULEB128(0, getStream()); // flags - encodeULEB128(NumPages, getStream()); // initial - - endSection(Section); - - // === Global Section ======================================================== - if (!Globals.empty()) { - startSection(Section, wasm::WASM_SEC_GLOBAL); - - encodeULEB128(Globals.size(), getStream()); - for (const WasmGlobal &Global : Globals) { - writeValueType(Global.Type); - write8(Global.IsMutable); - - if (Global.HasImport) { - assert(Global.InitialValue == 0); - write8(wasm::WASM_OPCODE_GET_GLOBAL); - encodeULEB128(Global.ImportIndex, getStream()); - } else { - assert(Global.ImportIndex == 0); - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(Global.InitialValue, getStream()); // offset - } - write8(wasm::WASM_OPCODE_END); - } - - endSection(Section); - } - - // === Export Section ======================================================== - if (!Exports.empty()) { - startSection(Section, wasm::WASM_SEC_EXPORT); - - encodeULEB128(Exports.size(), getStream()); - for (const WasmExport &Export : Exports) { - encodeULEB128(Export.FieldName.size(), getStream()); - writeBytes(Export.FieldName); - - encodeSLEB128(Export.Kind, getStream()); - - encodeULEB128(Export.Index, getStream()); - } - - endSection(Section); - } - -#if 0 // TODO: Start Section - if (HaveStartFunction) { - // === Start Section ========================================================= - startSection(Section, wasm::WASM_SEC_START); - - encodeSLEB128(StartFunction, getStream()); - - endSection(Section); - } -#endif - - // === Elem Section ========================================================== - if (!TableElems.empty()) { - startSection(Section, wasm::WASM_SEC_ELEM); - - encodeULEB128(1, getStream()); // number of "segments" - encodeULEB128(0, getStream()); // the table index - - // init expr for starting offset - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(0, getStream()); - write8(wasm::WASM_OPCODE_END); - - encodeULEB128(TableElems.size(), getStream()); - for (uint32_t Elem : TableElems) - encodeULEB128(Elem, getStream()); - - endSection(Section); - } - - // === Code Section ========================================================== - if (!Functions.empty()) { - startSection(Section, wasm::WASM_SEC_CODE); - - encodeULEB128(Functions.size(), getStream()); - - for (const WasmFunction &Func : Functions) { - MCSectionWasm &FuncSection = - static_cast(Func.Sym->getSection()); - - if (Func.Sym->isVariable()) - report_fatal_error("weak symbols not supported yet"); - - if (Func.Sym->getOffset() != 0) - report_fatal_error("function sections must contain one function each"); - - if (!Func.Sym->getSize()) - report_fatal_error("function symbols must have a size set with .size"); - - int64_t Size = 0; - if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) - report_fatal_error(".size expression must be evaluatable"); - - encodeULEB128(Size, getStream()); - - FuncSection.setSectionOffset(getStream().tell() - - Section.ContentsOffset); - - Asm.writeSectionData(&FuncSection, Layout); - } - - // Apply the type index fixups for call_indirect etc. instructions. - for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { - uint32_t Type = TypeIndexFixupTypes[i]; - unsigned Padding = PaddingFor5ByteULEB128(Type); - - const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; - assert(Fixup.Addend == 0); - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - uint64_t Offset = Fixup.Offset + - Fixup.FixupSection->getSectionOffset(); - - uint8_t Buffer[16]; - unsigned SizeLen = encodeULEB128(Type, Buffer, Padding); - assert(SizeLen == 5); - getStream().pwrite((char *)Buffer, SizeLen, - Section.ContentsOffset + Offset); - } - - // Apply fixups. - ApplyRelocations(CodeRelocations, getStream(), SymbolIndices, - Section.ContentsOffset); - - endSection(Section); - } - - // === Data Section ========================================================== - uint32_t DataSectionHeaderSize = 0; - if (!DataBytes.empty()) { - startSection(Section, wasm::WASM_SEC_DATA); - - encodeULEB128(1, getStream()); // count - encodeULEB128(0, getStream()); // memory index - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(0, getStream()); // offset - write8(wasm::WASM_OPCODE_END); - encodeULEB128(DataBytes.size(), getStream()); // size - DataSectionHeaderSize = getStream().tell() - Section.ContentsOffset; - writeBytes(DataBytes); // data - - // Apply fixups. - ApplyRelocations(DataRelocations, getStream(), SymbolIndices, - Section.ContentsOffset + DataSectionHeaderSize); - - endSection(Section); - } - - // === Name Section ========================================================== - uint32_t TotalFunctions = NumFuncImports + Functions.size(); - if (TotalFunctions != 0) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "name"); - SectionBookkeeping SubSection; - startSection(SubSection, wasm::WASM_NAMES_FUNCTION); - - encodeULEB128(TotalFunctions, getStream()); - uint32_t Index = 0; - for (const WasmImport &Import : Imports) { - if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { - encodeULEB128(Index, getStream()); - encodeULEB128(Import.FieldName.size(), getStream()); - writeBytes(Import.FieldName); - ++Index; - } - } - for (const WasmFunction &Func : Functions) { - encodeULEB128(Index, getStream()); - encodeULEB128(Func.Sym->getName().size(), getStream()); - writeBytes(Func.Sym->getName()); - ++Index; - } - - endSection(SubSection); - endSection(Section); - } - - // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md - // for descriptions of the reloc sections. - - // === Code Reloc Section ==================================================== - if (!CodeRelocations.empty()) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE"); - - encodeULEB128(wasm::WASM_SEC_CODE, getStream()); - - encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream()); - - WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0); - WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); + // Handle weak aliases. We need to process these in a separate pass because + // we need to have processed the target of the alias before the alias itself + // and the symbols are not necessarily ordered in this way. + for (const MCSymbol &S : Asm.symbols()) { + if (!S.isVariable()) + continue; + assert(S.isDefined(/*SetUsed=*/false)); - endSection(Section); + const auto &WS = static_cast(S); + // Find the target symbol of this weak alias and export that index + const MCExpr *Expr = WS.getVariableValue(); + auto *Inner = cast(Expr); + const auto *ResolvedSym = cast(&Inner->getSymbol()); + DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym << "'\n"); + assert(SymbolIndices.count(ResolvedSym) > 0); + uint32_t Index = SymbolIndices.find(ResolvedSym)->second; + DEBUG(dbgs() << " -> index:" << Index << "\n"); + + WasmExport Export; + Export.FieldName = WS.getName(); + Export.Index = Index; + if (WS.isFunction()) + Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; + else + Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; + DEBUG(dbgs() << " -> export " << Exports.size() << "\n"); + Exports.push_back(Export); } - // === Data Reloc Section ==================================================== - if (!DataRelocations.empty()) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA"); - - encodeULEB128(wasm::WASM_SEC_DATA, getStream()); - - encodeULEB128(DataRelocations.size(), getStream()); - - WriteRelocations(DataRelocations, getStream(), SymbolIndices, - DataSectionHeaderSize); + // Add types for indirect function calls. + for (const WasmRelocationEntry &Fixup : CodeRelocations) { + if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) + continue; - endSection(Section); + registerFunctionType(*Fixup.Symbol); } - // === Linking Metadata Section ============================================== - if (HasStackPointer) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); - - encodeULEB128(1, getStream()); // count - - encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type - encodeULEB128(StackPointerGlobal, getStream()); // id + // Write out the Wasm header. + writeHeader(Asm); - endSection(Section); - } + writeTypeSection(FunctionTypes); + writeImportSection(Imports); + writeFunctionSection(Functions); + writeTableSection(TableElems.size()); + writeMemorySection(DataBytes); + writeGlobalSection(Globals); + writeExportSection(Exports); + // TODO: Start Section + writeElemSection(TableElems); + writeCodeSection(Asm, Layout, Functions); + uint64_t DataSectionHeaderSize = writeDataSection(DataBytes); + writeNameSection(Functions, Imports, NumFuncImports); + writeCodeRelocSection(); + writeDataRelocSection(DataSectionHeaderSize); + writeLinkingMetaDataSection(DataBytes.size(), DataAlignment, WeakSymbols, HasStackPointer, StackPointerGlobal); // TODO: Translate the .comment section to the output. - // TODO: Translate debug sections to the output. } diff --git a/interpreter/llvm/src/lib/MC/WinCOFFObjectWriter.cpp b/interpreter/llvm/src/lib/MC/WinCOFFObjectWriter.cpp index e99a548ac0019..956ae70b38d19 100644 --- a/interpreter/llvm/src/lib/MC/WinCOFFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/MC/WinCOFFObjectWriter.cpp @@ -12,11 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -32,13 +33,12 @@ #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/JamCRC.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include +#include #include #include #include @@ -197,8 +197,7 @@ class WinCOFFObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void createFileSymbols(MCAssembler &Asm); void assignSectionNumbers(); @@ -708,9 +707,11 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( InSet, IsPCRel); } -void WinCOFFObjectWriter::recordRelocation( - MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { +void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { assert(Target.getSymA() && "Relocation must reference a symbol!"); const MCSymbol &A = Target.getSymA()->getSymbol(); @@ -735,7 +736,6 @@ void WinCOFFObjectWriter::recordRelocation( COFFSection *Sec = SectionMap[MCSec]; const MCSymbolRefExpr *SymB = Target.getSymB(); - bool CrossSection = false; if (SymB) { const MCSymbol *B = &SymB->getSymbol(); @@ -747,28 +747,9 @@ void WinCOFFObjectWriter::recordRelocation( return; } - if (!A.getFragment()) { - Asm.getContext().reportError( - Fixup.getLoc(), - Twine("symbol '") + A.getName() + - "' can not be undefined in a subtraction expression"); - return; - } - - CrossSection = &A.getSection() != &B->getSection(); - // Offset of the symbol in the section int64_t OffsetOfB = Layout.getSymbolOffset(*B); - // In the case where we have SymbA and SymB, we just need to store the delta - // between the two symbols. Update FixedValue to account for the delta, and - // skip recording the relocation. - if (!CrossSection) { - int64_t OffsetOfA = Layout.getSymbolOffset(A); - FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant(); - return; - } - // Offset of the relocation in the section int64_t OffsetOfRelocation = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -784,7 +765,7 @@ void WinCOFFObjectWriter::recordRelocation( Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment); // Turn relocations for temporary symbols into section relocations. - if (A.isTemporary() || CrossSection) { + if (A.isTemporary()) { MCSection *TargetSection = &A.getSection(); assert( SectionMap.find(TargetSection) != SectionMap.end() && @@ -802,7 +783,7 @@ void WinCOFFObjectWriter::recordRelocation( Reloc.Data.VirtualAddress += Fixup.getOffset(); Reloc.Data.Type = TargetObjectWriter->getRelocType( - Target, Fixup, CrossSection, Asm.getBackend()); + Asm.getContext(), Target, Fixup, SymB, Asm.getBackend()); // FIXME: Can anyone explain what this does other than adjust for the size // of the offset? diff --git a/interpreter/llvm/src/lib/Object/Archive.cpp b/interpreter/llvm/src/lib/Object/Archive.cpp index c4924f85a907a..977cccc11dcda 100644 --- a/interpreter/llvm/src/lib/Object/Archive.cpp +++ b/interpreter/llvm/src/lib/Object/Archive.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/Archive.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Support/Chrono.h" diff --git a/interpreter/llvm/src/lib/Object/ArchiveWriter.cpp b/interpreter/llvm/src/lib/Object/ArchiveWriter.cpp index 5b233aab2018a..b052c76d1fed4 100644 --- a/interpreter/llvm/src/lib/Object/ArchiveWriter.cpp +++ b/interpreter/llvm/src/lib/Object/ArchiveWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/ArchiveWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" @@ -35,7 +36,8 @@ using namespace llvm; NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) - : Buf(MemoryBuffer::getMemBuffer(BufRef, false)) {} + : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), + MemberName(BufRef.getBufferIdentifier()) {} Expected NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, @@ -47,6 +49,7 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, NewArchiveMember M; assert(M.IsNew == false); M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { auto ModTimeOrErr = OldMember.getLastModified(); if (!ModTimeOrErr) @@ -96,6 +99,7 @@ Expected NewArchiveMember::getFile(StringRef FileName, NewArchiveMember M; M.IsNew = true; M.Buf = std::move(*MemberBufferOrErr); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { M.ModTime = std::chrono::time_point_cast( Status.getLastModificationTime()); @@ -184,7 +188,7 @@ printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, } static bool useStringTable(bool Thin, StringRef Name) { - return Thin || Name.size() >= 16; + return Thin || Name.size() >= 16 || Name.contains('/'); } static void @@ -238,7 +242,7 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, unsigned StartOffset = 0; for (const NewArchiveMember &M : Members) { StringRef Path = M.Buf->getBufferIdentifier(); - StringRef Name = sys::path::filename(Path); + StringRef Name = M.MemberName; if (!useStringTable(Thin, Name)) continue; if (StartOffset == 0) { @@ -290,7 +294,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef(); Expected> ObjOrErr = object::SymbolicFile::createSymbolicFile( - MemberBuffer, sys::fs::file_magic::unknown, &Context); + MemberBuffer, llvm::file_magic::unknown, &Context); if (!ObjOrErr) { // FIXME: check only for "not an object file" errors. consumeError(ObjOrErr.takeError()); @@ -314,7 +318,8 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, continue; if (!(Symflags & object::SymbolRef::SF_Global)) continue; - if (Symflags & object::SymbolRef::SF_Undefined) + if (Symflags & object::SymbolRef::SF_Undefined && + !(Symflags & object::SymbolRef::SF_Indirect)) continue; unsigned NameOffset = NameOS.tell(); @@ -422,9 +427,8 @@ llvm::writeArchive(StringRef ArcName, if (Kind == object::Archive::K_DARWIN) Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8); - printMemberHeader(Out, Kind, Thin, - sys::path::filename(M.Buf->getBufferIdentifier()), - StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms, + printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter, + M.ModTime, M.UID, M.GID, M.Perms, M.Buf->getBufferSize() + Padding); if (!Thin) diff --git a/interpreter/llvm/src/lib/Object/Binary.cpp b/interpreter/llvm/src/lib/Object/Binary.cpp index 2b44c4a82d2ca..c4565db459e64 100644 --- a/interpreter/llvm/src/lib/Object/Binary.cpp +++ b/interpreter/llvm/src/lib/Object/Binary.cpp @@ -11,12 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/Binary.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Archive.h" -#include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/WindowsResource.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" @@ -42,40 +44,41 @@ MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; } Expected> object::createBinary(MemoryBufferRef Buffer, LLVMContext *Context) { - sys::fs::file_magic Type = sys::fs::identify_magic(Buffer.getBuffer()); + file_magic Type = identify_magic(Buffer.getBuffer()); switch (Type) { - case sys::fs::file_magic::archive: - return Archive::create(Buffer); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::wasm_object: - return ObjectFile::createSymbolicFile(Buffer, Type, Context); - case sys::fs::file_magic::macho_universal_binary: - return MachOUniversalBinary::create(Buffer); - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::windows_resource: - // Unrecognized object file format. - return errorCodeToError(object_error::invalid_file_type); + case file_magic::archive: + return Archive::create(Buffer); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + case file_magic::bitcode: + case file_magic::wasm_object: + return ObjectFile::createSymbolicFile(Buffer, Type, Context); + case file_magic::macho_universal_binary: + return MachOUniversalBinary::create(Buffer); + case file_magic::windows_resource: + return WindowsResource::createWindowsResource(Buffer); + case file_magic::unknown: + case file_magic::coff_cl_gl_object: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/interpreter/llvm/src/lib/Object/CMakeLists.txt b/interpreter/llvm/src/lib/Object/CMakeLists.txt index 08365e71c2f6a..fd5e7707c5411 100644 --- a/interpreter/llvm/src/lib/Object/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Object/CMakeLists.txt @@ -2,6 +2,8 @@ add_llvm_library(LLVMObject Archive.cpp ArchiveWriter.cpp Binary.cpp + COFFImportFile.cpp + COFFModuleDefinition.cpp COFFObjectFile.cpp Decompressor.cpp ELF.cpp @@ -18,10 +20,12 @@ add_llvm_library(LLVMObject SymbolicFile.cpp SymbolSize.cpp WasmObjectFile.cpp + WindowsResource.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Object DEPENDS intrinsics_gen + llvm_vcsrevision_h ) diff --git a/interpreter/llvm/src/lib/Object/COFFImportFile.cpp b/interpreter/llvm/src/lib/Object/COFFImportFile.cpp new file mode 100644 index 0000000000000..ff039463d08c8 --- /dev/null +++ b/interpreter/llvm/src/lib/Object/COFFImportFile.cpp @@ -0,0 +1,612 @@ +//===- COFFImportFile.cpp - COFF short import file implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeImportLibrary function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFImportFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +#include +#include +#include +#include +#include + +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +namespace llvm { +namespace object { + +static bool is32bit(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template static void append(std::vector &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector &B, + ArrayRef Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + size_t Pos = B.size(); + size_t Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + support::endian::write32le(&B[Offset], Length); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName, + MachineTypes Machine) { + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static Expected replace(StringRef S, StringRef From, + StringRef To) { + size_t Pos = S.find(From); + + // From and To may be mangled, but substrings in S may not. + if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) { + From = From.substr(1); + To = To.substr(1); + Pos = S.find(From); + } + + if (Pos == StringRef::npos) { + return make_error( + StringRef(Twine(S + ": replacing '" + From + + "' with '" + To + "' failed").str()), object_error::parse_failed); + } + + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + MachineTypes Machine; + BumpPtrAllocator Alloc; + StringRef ImportName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S, MachineTypes M) + : Machine(M), ImportName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportType Type, ImportNameType NameType); + + // Create a weak external file which is described in PE/COFF Aux Format 3. + NewArchiveMember createWeakExternal(StringRef Sym, StringRef Weak, bool Imp); +}; +} // namespace + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector &Buffer) { + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 7; + const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (ImportName.size() + 1)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(ImportName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation(Machine))}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + ImportName.size() + 1); + memcpy(&Buffer[S], ImportName.data(), ImportName.size()); + Buffer[S + ImportName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + // TODO: Name.Offset.Offset here and in the all similar places below + // suggests a names refactoring. Maybe StringTableOffset.Value? + SymbolTable[0].Name.Offset.Offset = + sizeof(uint32_t); + SymbolTable[5].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + SymbolTable[6].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, ImportName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, ImportName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit(Machine) ? 4 : 8; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + VASize + + // .idata$4 + VASize), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + VASize), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, ImportName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportType ImportType, + ImportNameType NameType) { + size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (NameType << 2) | ImportType; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, ImportName.data(), ImportName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), ImportName)}; +} + +NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, + StringRef Weak, bool Imp) { + std::vector Buffer; + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 5; + + // COFF Header + coff_file_header Header{ + u16(0), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))), + u32(NumberOfSymbols), + u16(0), + u16(0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'd', 'r', 'e', 'c', 't', 'v', 'e'}, + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_LNK_INFO | IMAGE_SCN_LNK_REMOVE)}}; + append(Buffer, SectionTable); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{'@', 'c', 'o', 'm', 'p', '.', 'i', 'd'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'@', 'f', 'e', 'a', 't', '.', '0', '0'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_WEAK_EXTERNAL, + 1}, + {{{2, 0, 0, 0, 3, 0, 0, 0}}, u32(0), u16(0), u16(0), uint8_t(0), 0}, + }; + SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t); + + //__imp_ String Table + StringRef Prefix = Imp ? "__imp_" : ""; + SymbolTable[3].Name.Offset.Offset = + sizeof(uint32_t) + Sym.size() + Prefix.size() + 1; + append(Buffer, SymbolTable); + writeStringTable(Buffer, {(Prefix + Sym).str(), + (Prefix + Weak).str()}); + + // Copied here so we can still use writeStringTable + char *Buf = Alloc.Allocate(Buffer.size()); + memcpy(Buf, Buffer.data(), Buffer.size()); + return {MemoryBufferRef(StringRef(Buf, Buffer.size()), ImportName)}; +} + +std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, + ArrayRef Exports, + MachineTypes Machine, bool MakeWeakAliases) { + + std::vector Members; + ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); + + std::vector ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (COFFShortExport E : Exports) { + if (E.Private) + continue; + + if (E.isWeak() && MakeWeakAliases) { + Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false)); + Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true)); + continue; + } + + ImportType ImportType = IMPORT_CODE; + if (E.Data) + ImportType = IMPORT_DATA; + if (E.Constant) + ImportType = IMPORT_CONST; + + StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; + ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); + Expected Name = E.ExtName.empty() + ? SymbolName + : replace(SymbolName, E.Name, E.ExtName); + + if (!Name) { + return errorToErrorCode(Name.takeError()); + } + + Members.push_back( + OF.createShortImport(*Name, E.Ordinal, ImportType, NameType)); + } + + std::pair Result = + writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); + + return Result.second; +} + +} // namespace object +} // namespace llvm diff --git a/interpreter/llvm/src/lib/Object/COFFModuleDefinition.cpp b/interpreter/llvm/src/lib/Object/COFFModuleDefinition.cpp new file mode 100644 index 0000000000000..510eac8b239ba --- /dev/null +++ b/interpreter/llvm/src/lib/Object/COFFModuleDefinition.cpp @@ -0,0 +1,337 @@ +//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFModuleDefinition.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::COFF; +using namespace llvm; + +namespace llvm { +namespace object { + +enum Kind { + Unknown, + Eof, + Identifier, + Comma, + Equal, + KwBase, + KwConstant, + KwData, + KwExports, + KwHeapsize, + KwLibrary, + KwName, + KwNoname, + KwPrivate, + KwStacksize, + KwVersion, +}; + +struct Token { + explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} + Kind K; + StringRef Value; +}; + +static bool isDecorated(StringRef Sym, bool MingwDef) { + // mingw does not prepend "_". + return (!MingwDef && Sym.startswith("_")) || Sym.startswith("@") || + Sym.startswith("?"); +} + +static Error createError(const Twine &Err) { + return make_error(StringRef(Err.str()), + object_error::parse_failed); +} + +class Lexer { +public: + Lexer(StringRef S) : Buf(S) {} + + Token lex() { + Buf = Buf.trim(); + if (Buf.empty()) + return Token(Eof); + + switch (Buf[0]) { + case '\0': + return Token(Eof); + case ';': { + size_t End = Buf.find('\n'); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return lex(); + } + case '=': + Buf = Buf.drop_front(); + // GNU dlltool accepts both = and ==. + if (Buf.startswith("=")) + Buf = Buf.drop_front(); + return Token(Equal, "="); + case ',': + Buf = Buf.drop_front(); + return Token(Comma, ","); + case '"': { + StringRef S; + std::tie(S, Buf) = Buf.substr(1).split('"'); + return Token(Identifier, S); + } + default: { + size_t End = Buf.find_first_of("=,\r\n \t\v"); + StringRef Word = Buf.substr(0, End); + Kind K = llvm::StringSwitch(Word) + .Case("BASE", KwBase) + .Case("CONSTANT", KwConstant) + .Case("DATA", KwData) + .Case("EXPORTS", KwExports) + .Case("HEAPSIZE", KwHeapsize) + .Case("LIBRARY", KwLibrary) + .Case("NAME", KwName) + .Case("NONAME", KwNoname) + .Case("PRIVATE", KwPrivate) + .Case("STACKSIZE", KwStacksize) + .Case("VERSION", KwVersion) + .Default(Identifier); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return Token(K, Word); + } + } + } + +private: + StringRef Buf; +}; + +class Parser { +public: + explicit Parser(StringRef S, MachineTypes M, bool B) + : Lex(S), Machine(M), MingwDef(B) {} + + Expected parse() { + do { + if (Error Err = parseOne()) + return std::move(Err); + } while (Tok.K != Eof); + return Info; + } + +private: + void read() { + if (Stack.empty()) { + Tok = Lex.lex(); + return; + } + Tok = Stack.back(); + Stack.pop_back(); + } + + Error readAsInt(uint64_t *I) { + read(); + if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) + return createError("integer expected"); + return Error::success(); + } + + Error expect(Kind Expected, StringRef Msg) { + read(); + if (Tok.K != Expected) + return createError(Msg); + return Error::success(); + } + + void unget() { Stack.push_back(Tok); } + + Error parseOne() { + read(); + switch (Tok.K) { + case Eof: + return Error::success(); + case KwExports: + for (;;) { + read(); + if (Tok.K != Identifier) { + unget(); + return Error::success(); + } + if (Error Err = parseExport()) + return Err; + } + case KwHeapsize: + return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); + case KwStacksize: + return parseNumbers(&Info.StackReserve, &Info.StackCommit); + case KwLibrary: + case KwName: { + bool IsDll = Tok.K == KwLibrary; // Check before parseName. + std::string Name; + if (Error Err = parseName(&Name, &Info.ImageBase)) + return Err; + + Info.ImportName = Name; + + // Set the output file, but don't override /out if it was already passed. + if (Info.OutputFile.empty()) { + Info.OutputFile = Name; + // Append the appropriate file extension if not already present. + if (!sys::path::has_extension(Name)) + Info.OutputFile += IsDll ? ".dll" : ".exe"; + } + + return Error::success(); + } + case KwVersion: + return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); + default: + return createError("unknown directive: " + Tok.Value); + } + } + + Error parseExport() { + COFFShortExport E; + E.Name = Tok.Value; + read(); + if (Tok.K == Equal) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + E.ExtName = E.Name; + E.Name = Tok.Value; + } else { + unget(); + } + + if (Machine == IMAGE_FILE_MACHINE_I386) { + if (!isDecorated(E.Name, MingwDef)) + E.Name = (std::string("_").append(E.Name)); + if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) + E.ExtName = (std::string("_").append(E.ExtName)); + } + + for (;;) { + read(); + if (Tok.K == Identifier && Tok.Value[0] == '@') { + if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { + // Not an ordinal modifier at all, but the next export (fastcall + // decorated) - complete the current one. + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + read(); + if (Tok.K == KwNoname) { + E.Noname = true; + } else { + unget(); + } + continue; + } + if (Tok.K == KwData) { + E.Data = true; + continue; + } + if (Tok.K == KwConstant) { + E.Constant = true; + continue; + } + if (Tok.K == KwPrivate) { + E.Private = true; + continue; + } + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + } + + // HEAPSIZE/STACKSIZE reserve[,commit] + Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { + if (Error Err = readAsInt(Reserve)) + return Err; + read(); + if (Tok.K != Comma) { + unget(); + Commit = nullptr; + return Error::success(); + } + if (Error Err = readAsInt(Commit)) + return Err; + return Error::success(); + } + + // NAME outputPath [BASE=address] + Error parseName(std::string *Out, uint64_t *Baseaddr) { + read(); + if (Tok.K == Identifier) { + *Out = Tok.Value; + } else { + *Out = ""; + unget(); + return Error::success(); + } + read(); + if (Tok.K == KwBase) { + if (Error Err = expect(Equal, "'=' expected")) + return Err; + if (Error Err = readAsInt(Baseaddr)) + return Err; + } else { + unget(); + *Baseaddr = 0; + } + return Error::success(); + } + + // VERSION major[.minor] + Error parseVersion(uint32_t *Major, uint32_t *Minor) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + StringRef V1, V2; + std::tie(V1, V2) = Tok.Value.split('.'); + if (V1.getAsInteger(10, *Major)) + return createError("integer expected, but got " + Tok.Value); + if (V2.empty()) + *Minor = 0; + else if (V2.getAsInteger(10, *Minor)) + return createError("integer expected, but got " + Tok.Value); + return Error::success(); + } + + Lexer Lex; + Token Tok; + std::vector Stack; + MachineTypes Machine; + COFFModuleDefinition Info; + bool MingwDef; +}; + +Expected parseCOFFModuleDefinition(MemoryBufferRef MB, + MachineTypes Machine, + bool MingwDef) { + return Parser(MB.getBuffer(), Machine, MingwDef).parse(); +} + +} // namespace object +} // namespace llvm diff --git a/interpreter/llvm/src/lib/Object/COFFObjectFile.cpp b/interpreter/llvm/src/lib/Object/COFFObjectFile.cpp index b7e4479bcadcb..0a2053477caf1 100644 --- a/interpreter/llvm/src/lib/Object/COFFObjectFile.cpp +++ b/interpreter/llvm/src/lib/Object/COFFObjectFile.cpp @@ -15,12 +15,12 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -227,8 +227,11 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { if (Symb.isExternal() || Symb.isWeakExternal()) Result |= SymbolRef::SF_Global; - if (Symb.isWeakExternal()) + if (Symb.isWeakExternal()) { Result |= SymbolRef::SF_Weak; + // We use indirect to allow the archiver to write weak externs + Result |= SymbolRef::SF_Indirect; + } if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE) Result |= SymbolRef::SF_Absolute; @@ -293,6 +296,10 @@ uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { return Result; } +uint64_t COFFObjectFile::getSectionIndex(DataRefImpl Sec) const { + return toSec(Sec) - SectionTable; +} + uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { return getSectionSize(toSec(Ref)); } @@ -646,6 +653,23 @@ std::error_code COFFObjectFile::initDebugDirectoryPtr() { return std::error_code(); } +std::error_code COFFObjectFile::initLoadConfigPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry; + if (getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + + LoadConfig = (const void *)IntPtr; + return std::error_code(); +} + COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) : ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr), COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr), @@ -780,6 +804,9 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) if ((EC = initDebugDirectoryPtr())) return; + if ((EC = initLoadConfigPtr())) + return; + EC = std::error_code(); } @@ -859,7 +886,7 @@ base_reloc_iterator COFFObjectFile::base_reloc_end() const { } uint8_t COFFObjectFile::getBytesInAddress() const { - return getArch() == Triple::x86_64 ? 8 : 4; + return getArch() == Triple::x86_64 || getArch() == Triple::aarch64 ? 8 : 4; } StringRef COFFObjectFile::getFileFormatName() const { @@ -1062,7 +1089,7 @@ COFFObjectFile::getSectionContents(const coff_section *Sec, // In COFF, a virtual section won't have any in-file // content, so the file pointer to the content will be zero. if (Sec->PointerToRawData == 0) - return object_error::parse_failed; + return std::error_code(); // The only thing that we need to verify is that the contents is contained // within the file bounds. We don't need to make sure it doesn't cover other // data, as there's nothing that says that is not allowed. @@ -1192,6 +1219,29 @@ void COFFObjectFile::getRelocationTypeName( Res = "Unknown"; } break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH26); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEBASE_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_HIGH12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14); + default: + Res = "Unknown"; + } + break; case COFF::IMAGE_FILE_MACHINE_I386: switch (Reloc->Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); diff --git a/interpreter/llvm/src/lib/Object/Decompressor.cpp b/interpreter/llvm/src/lib/Object/Decompressor.cpp index 0be602b1fc1ab..53f084d7620e7 100644 --- a/interpreter/llvm/src/lib/Object/Decompressor.cpp +++ b/interpreter/llvm/src/lib/Object/Decompressor.cpp @@ -8,11 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/Decompressor.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/ELF.h" using namespace llvm; using namespace llvm::support::endian; @@ -88,11 +88,6 @@ bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); } -Error Decompressor::decompress(SmallString<32> &Out) { - Out.resize(DecompressedSize); - return decompress({Out.data(), (size_t)DecompressedSize}); -} - Error Decompressor::decompress(MutableArrayRef Buffer) { size_t Size = Buffer.size(); return zlib::uncompress(SectionData, Buffer.data(), Size); diff --git a/interpreter/llvm/src/lib/Object/ELF.cpp b/interpreter/llvm/src/lib/Object/ELF.cpp index 5798a3540f537..448fb1bd6b561 100644 --- a/interpreter/llvm/src/lib/Object/ELF.cpp +++ b/interpreter/llvm/src/lib/Object/ELF.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELF.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" using namespace llvm; using namespace object; @@ -24,7 +24,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, switch (Machine) { case ELF::EM_X86_64: switch (Type) { -#include "llvm/Support/ELFRelocs/x86_64.def" +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" default: break; } @@ -32,77 +32,77 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, case ELF::EM_386: case ELF::EM_IAMCU: switch (Type) { -#include "llvm/Support/ELFRelocs/i386.def" +#include "llvm/BinaryFormat/ELFRelocs/i386.def" default: break; } break; case ELF::EM_MIPS: switch (Type) { -#include "llvm/Support/ELFRelocs/Mips.def" +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" default: break; } break; case ELF::EM_AARCH64: switch (Type) { -#include "llvm/Support/ELFRelocs/AArch64.def" +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" default: break; } break; case ELF::EM_ARM: switch (Type) { -#include "llvm/Support/ELFRelocs/ARM.def" +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" default: break; } break; case ELF::EM_AVR: switch (Type) { -#include "llvm/Support/ELFRelocs/AVR.def" +#include "llvm/BinaryFormat/ELFRelocs/AVR.def" default: break; } break; case ELF::EM_HEXAGON: switch (Type) { -#include "llvm/Support/ELFRelocs/Hexagon.def" +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" default: break; } break; case ELF::EM_LANAI: switch (Type) { -#include "llvm/Support/ELFRelocs/Lanai.def" +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" default: break; } break; case ELF::EM_PPC: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" default: break; } break; case ELF::EM_PPC64: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC64.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" default: break; } break; case ELF::EM_RISCV: switch (Type) { -#include "llvm/Support/ELFRelocs/RISCV.def" +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" default: break; } break; case ELF::EM_S390: switch (Type) { -#include "llvm/Support/ELFRelocs/SystemZ.def" +#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def" default: break; } @@ -111,27 +111,27 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, case ELF::EM_SPARC32PLUS: case ELF::EM_SPARCV9: switch (Type) { -#include "llvm/Support/ELFRelocs/Sparc.def" +#include "llvm/BinaryFormat/ELFRelocs/Sparc.def" default: break; } break; case ELF::EM_WEBASSEMBLY: switch (Type) { -#include "llvm/Support/ELFRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/ELFRelocs/WebAssembly.def" default: break; } break; case ELF::EM_AMDGPU: switch (Type) { -#include "llvm/Support/ELFRelocs/AMDGPU.def" +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" default: break; } case ELF::EM_BPF: switch (Type) { -#include "llvm/Support/ELFRelocs/BPF.def" +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" default: break; } @@ -192,6 +192,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); diff --git a/interpreter/llvm/src/lib/Object/ELFObjectFile.cpp b/interpreter/llvm/src/lib/Object/ELFObjectFile.cpp index 86f033bb6cbf4..fa136d782b5aa 100644 --- a/interpreter/llvm/src/lib/Object/ELFObjectFile.cpp +++ b/interpreter/llvm/src/lib/Object/ELFObjectFile.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/ELFObjectFile.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ELF.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" -#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMAttributeParser.h" -#include "llvm/Support/ELF.h" +#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" diff --git a/interpreter/llvm/src/lib/Object/IRObjectFile.cpp b/interpreter/llvm/src/lib/Object/IRObjectFile.cpp index adbf0de6d1bc4..e7807b0383351 100644 --- a/interpreter/llvm/src/lib/Object/IRObjectFile.cpp +++ b/interpreter/llvm/src/lib/Object/IRObjectFile.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" @@ -95,13 +96,13 @@ ErrorOr IRObjectFile::findBitcodeInObject(const ObjectFile &Obj } ErrorOr IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { - sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + file_magic Type = identify_magic(Object.getBuffer()); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: return Object; - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected> ObjFile = ObjectFile::createObjectFile(Object, Type); if (!ObjFile) @@ -138,3 +139,25 @@ IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { return std::unique_ptr( new IRObjectFile(*BCOrErr, std::move(Mods))); } + +Expected object::readIRSymtab(MemoryBufferRef MBRef) { + IRSymtabFile F; + ErrorOr BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(MBRef); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected BFCOrErr = getBitcodeFileContents(*BCOrErr); + if (!BFCOrErr) + return BFCOrErr.takeError(); + + Expected FCOrErr = irsymtab::readBitcode(*BFCOrErr); + if (!FCOrErr) + return FCOrErr.takeError(); + + F.Mods = std::move(BFCOrErr->Mods); + F.Symtab = std::move(FCOrErr->Symtab); + F.Strtab = std::move(FCOrErr->Strtab); + F.TheReader = std::move(FCOrErr->TheReader); + return std::move(F); +} diff --git a/interpreter/llvm/src/lib/Object/IRSymtab.cpp b/interpreter/llvm/src/lib/Object/IRSymtab.cpp index 5f0837882d600..7a6424a76a981 100644 --- a/interpreter/llvm/src/lib/Object/IRSymtab.cpp +++ b/interpreter/llvm/src/lib/Object/IRSymtab.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/IRSymtab.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -22,15 +23,17 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/MC/StringTableBuilder.h" -#include "llvm/Object/IRSymtab.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/VCSRevision.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -41,18 +44,33 @@ using namespace irsymtab; namespace { +const char *getExpectedProducerName() { + static char DefaultName[] = LLVM_VERSION_STRING +#ifdef LLVM_REVISION + " " LLVM_REVISION +#endif + ; + // Allows for testing of the irsymtab writer and upgrade mechanism. This + // environment variable should not be set by users. + if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER")) + return OverrideName; + return DefaultName; +} + +const char *kExpectedProducerName = getExpectedProducerName(); + /// Stores the temporary state that is required to build an IR symbol table. struct Builder { SmallVector &Symtab; - SmallVector &Strtab; + StringTableBuilder &StrtabBuilder; + StringSaver Saver; - Builder(SmallVector &Symtab, SmallVector &Strtab) - : Symtab(Symtab), Strtab(Strtab) {} - - StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; - - BumpPtrAllocator Alloc; - StringSaver Saver{Alloc}; + // This ctor initializes a StringSaver using the passed in BumpPtrAllocator. + // The StringTableBuilder does not create a copy of any strings added to it, + // so this provides somewhere to store any strings that we create. + Builder(SmallVector &Symtab, StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} DenseMap ComdatMap; Mangler Mang; @@ -88,6 +106,10 @@ struct Builder { }; Error Builder::addModule(Module *M) { + if (M->getDataLayoutStr().empty()) + return make_error("input module has no datalayout", + inconvertibleErrorCode()); + SmallPtrSet Used; collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); @@ -103,9 +125,9 @@ Error Builder::addModule(Module *M) { if (TT.isOSBinFormatCOFF()) { if (auto E = M->materializeMetadata()) return E; - if (Metadata *Val = M->getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast(Val); - for (const MDOperand &MDOptions : LinkerOptions->operands()) + if (NamedMDNode *LinkerOptions = + M->getNamedMetadata("llvm.linker.options")) { + for (MDNode *MDOptions : LinkerOptions->operands()) for (const MDOperand &MDOption : cast(MDOptions)->operands()) COFFLinkerOptsOS << " " << cast(MDOption)->getString(); } @@ -225,6 +247,8 @@ Error Builder::build(ArrayRef IRMods) { storage::Header Hdr; assert(!IRMods.empty()); + Hdr.Version = storage::Header::kCurrentVersion; + setStr(Hdr.Producer, kExpectedProducerName); setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple()); setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); TT = Triple(IRMods[0]->getTargetTriple()); @@ -234,7 +258,7 @@ Error Builder::build(ArrayRef IRMods) { return Err; COFFLinkerOptsOS.flush(); - setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); + setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts)); // We are about to fill in the header's range fields, so reserve space for it // and copy it in afterwards. @@ -245,17 +269,80 @@ Error Builder::build(ArrayRef IRMods) { writeRange(Hdr.Uncommons, Uncommons); *reinterpret_cast(Symtab.data()) = Hdr; - - raw_svector_ostream OS(Strtab); - StrtabBuilder.finalizeInOrder(); - StrtabBuilder.write(OS); - return Error::success(); } } // end anonymous namespace Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, - SmallVector &Strtab) { - return Builder(Symtab, Strtab).build(Mods); + StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) { + return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); +} + +// Upgrade a vector of bitcode modules created by an old version of LLVM by +// creating an irsymtab for them in the current format. +static Expected upgrade(ArrayRef BMs) { + FileContents FC; + + LLVMContext Ctx; + std::vector Mods; + std::vector> OwnedMods; + for (auto BM : BMs) { + Expected> MOrErr = + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); + } + + StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); + BumpPtrAllocator Alloc; + if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc)) + return std::move(E); + + StrtabBuilder.finalizeInOrder(); + FC.Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)FC.Strtab.data()); + + FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, + {FC.Strtab.data(), FC.Strtab.size()}}; + return std::move(FC); +} + +Expected irsymtab::readBitcode(const BitcodeFileContents &BFC) { + if (BFC.Mods.empty()) + return make_error("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + if (BFC.StrtabForSymtab.empty() || + BFC.Symtab.size() < sizeof(storage::Header)) + return upgrade(BFC.Mods); + + // We cannot use the regular reader to read the version and producer, because + // it will expect the header to be in the current format. The only thing we + // can rely on is that the version and producer will be present as the first + // struct elements. + auto *Hdr = reinterpret_cast(BFC.Symtab.data()); + unsigned Version = Hdr->Version; + StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab); + if (Version != storage::Header::kCurrentVersion || + Producer != kExpectedProducerName) + return upgrade(BFC.Mods); + + FileContents FC; + FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, + {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; + + // Finally, make sure that the number of modules in the symbol table matches + // the number of modules in the bitcode file. If they differ, it may mean that + // the bitcode file was created by binary concatenation, so we need to create + // a new symbol table from scratch. + if (FC.TheReader.getNumModules() != BFC.Mods.size()) + return upgrade(std::move(BFC.Mods)); + + return std::move(FC); } diff --git a/interpreter/llvm/src/lib/Object/LLVMBuild.txt b/interpreter/llvm/src/lib/Object/LLVMBuild.txt index bae578c76f7e8..687713bab6a29 100644 --- a/interpreter/llvm/src/lib/Object/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/Object/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Object parent = Libraries -required_libraries = BitReader Core MC MCParser Support +required_libraries = BitReader Core MC BinaryFormat MCParser Support diff --git a/interpreter/llvm/src/lib/Object/MachOObjectFile.cpp b/interpreter/llvm/src/lib/Object/MachOObjectFile.cpp index 3d3fa07db3f48..2e4da9f15aa13 100644 --- a/interpreter/llvm/src/lib/Object/MachOObjectFile.cpp +++ b/interpreter/llvm/src/lib/Object/MachOObjectFile.cpp @@ -14,13 +14,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -32,10 +33,9 @@ #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -1820,6 +1820,10 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec).addr; } +uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { // In the case if a malformed Mach-O file where the section offset is past // the end of the file or some part of the section size is past the end of @@ -1947,13 +1951,29 @@ MachOObjectFile::section_rel_end(DataRefImpl Sec) const { return relocation_iterator(RelocationRef(Ret, this)); } +relocation_iterator MachOObjectFile::extrel_begin() const { + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = 0; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { ++Rel.d.b; } uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { - assert(getHeader().filetype == MachO::MH_OBJECT && - "Only implemented for MH_OBJECT"); + assert((getHeader().filetype == MachO::MH_OBJECT || + getHeader().filetype == MachO::MH_KEXT_BUNDLE) && + "Only implemented for MH_OBJECT && MH_KEXT_BUNDLE"); MachO::any_relocation_info RE = getRelocation(Rel); return getAnyRelocationAddress(RE); } @@ -3265,7 +3285,6 @@ void MachOBindEntry::moveNext() { if (ImmValue) { SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; Ordinal = SignExtended; - LibraryOrdinalSet = true; if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) { *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown " "special ordinal: " + Twine((int)Ordinal) + " for opcode at: " @@ -3275,6 +3294,7 @@ void MachOBindEntry::moveNext() { } } else Ordinal = 0; + LibraryOrdinalSet = true; DEBUG_WITH_TYPE( "mach-o-bind", dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " @@ -4082,15 +4102,20 @@ MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { MachO::any_relocation_info MachOObjectFile::getRelocation(DataRefImpl Rel) const { - DataRefImpl Sec; - Sec.d.a = Rel.d.a; uint32_t Offset; - if (is64Bit()) { - MachO::section_64 Sect = getSection64(Sec); - Offset = Sect.reloff; + if (getHeader().filetype == MachO::MH_OBJECT) { + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } } else { - MachO::section Sect = getSection(Sec); - Offset = Sect.reloff; + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations } auto P = reinterpret_cast( @@ -4310,3 +4335,9 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, return make_error("Unrecognized MachO magic number", object_error::invalid_file_type); } + +StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch(Name) + .Case("debug_str_offs", "debug_str_offsets") + .Default(Name); +} diff --git a/interpreter/llvm/src/lib/Object/ModuleSymbolTable.cpp b/interpreter/llvm/src/lib/Object/ModuleSymbolTable.cpp index a5b42725d8179..f2e7a218c13a1 100644 --- a/interpreter/llvm/src/lib/Object/ModuleSymbolTable.cpp +++ b/interpreter/llvm/src/lib/Object/ModuleSymbolTable.cpp @@ -13,9 +13,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/ModuleSymbolTable.h" #include "RecordStreamer.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -36,16 +37,15 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/Object/Object.cpp b/interpreter/llvm/src/lib/Object/Object.cpp index 6df481b060e10..1d2859cfbe9d8 100644 --- a/interpreter/llvm/src/lib/Object/Object.cpp +++ b/interpreter/llvm/src/lib/Object/Object.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm-c/Object.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Object/ObjectFile.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Object/ObjectFile.cpp b/interpreter/llvm/src/lib/Object/ObjectFile.cpp index 1f60e7157bd9c..8377dd0d73fa4 100644 --- a/interpreter/llvm/src/lib/Object/ObjectFile.cpp +++ b/interpreter/llvm/src/lib/Object/ObjectFile.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/ObjectFile.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" -#include "llvm/Object/ObjectFile.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -79,42 +80,42 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { } Expected> -ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) { +ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::bitcode: + case file_magic::coff_cl_gl_object: + case file_magic::archive: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: return errorOrToExpected(createELFObjectFile(Object)); - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: return createMachOObjectFile(Object); - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: return errorOrToExpected(createCOFFObjectFile(Object)); - case sys::fs::file_magic::wasm_object: + case file_magic::wasm_object: return createWasmObjectFile(Object); } llvm_unreachable("Unexpected Object File Type"); diff --git a/interpreter/llvm/src/lib/Object/SymbolicFile.cpp b/interpreter/llvm/src/lib/Object/SymbolicFile.cpp index 16cff5c228bdd..1042d29d2350c 100644 --- a/interpreter/llvm/src/lib/Object/SymbolicFile.cpp +++ b/interpreter/llvm/src/lib/Object/SymbolicFile.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/SymbolicFile.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -34,45 +35,46 @@ SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) SymbolicFile::~SymbolicFile() = default; -Expected> SymbolicFile::createSymbolicFile( - MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) { +Expected> +SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, + LLVMContext *Context) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: if (Context) return IRObjectFile::create(Object, *Context); LLVM_FALLTHROUGH; - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::archive: + case file_magic::coff_cl_gl_object: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::wasm_object: + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::wasm_object: return ObjectFile::createObjectFile(Object, Type); - case sys::fs::file_magic::coff_import_library: + case file_magic::coff_import_library: return std::unique_ptr(new COFFImportFile(Object)); - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected> Obj = ObjectFile::createObjectFile(Object, Type); if (!Obj || !Context) diff --git a/interpreter/llvm/src/lib/Object/WasmObjectFile.cpp b/interpreter/llvm/src/lib/Object/WasmObjectFile.cpp index 058686e4db9e9..7f80bf0b83a0a 100644 --- a/interpreter/llvm/src/lib/Object/WasmObjectFile.cpp +++ b/interpreter/llvm/src/lib/Object/WasmObjectFile.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" @@ -21,13 +22,14 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/Wasm.h" #include #include #include #include #include +#define DEBUG_TYPE "wasm-object" + using namespace llvm; using namespace object; @@ -191,6 +193,9 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr, WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) : ObjectFile(Binary::ID_Wasm, Buffer) { + LinkingData.DataAlignment = 0; + LinkingData.DataSize = 0; + ErrorAsOutParameter ErrAsOutParam(&Err); Header.Magic = getData().substr(0, 4); if (Header.Magic != StringRef("\0asm", 4)) { @@ -256,6 +261,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { while (Ptr < End) { uint8_t Type = readVarint7(Ptr); uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; switch (Type) { case wasm::WASM_NAMES_FUNCTION: { uint32_t Count = readVaruint32(Ptr); @@ -275,6 +281,9 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { Ptr += Size; break; } + if (Ptr != SubSectionEnd) + return make_error("Name sub-section ended prematurely", + object_error::parse_failed); } if (Ptr != End) @@ -283,6 +292,57 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { return Error::success(); } +Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, + const uint8_t *End) { + HasLinkingSection = true; + while (Ptr < End) { + uint8_t Type = readVarint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; + switch (Type) { + case wasm::WASM_SYMBOL_INFO: { + uint32_t Count = readVaruint32(Ptr); + while (Count--) { + StringRef Symbol = readString(Ptr); + DEBUG(dbgs() << "reading syminfo: " << Symbol << "\n"); + uint32_t Flags = readVaruint32(Ptr); + auto iter = SymbolMap.find(Symbol); + if (iter == SymbolMap.end()) { + return make_error( + "Invalid symbol name in linking section: " + Symbol, + object_error::parse_failed); + } + uint32_t SymIndex = iter->second; + assert(SymIndex < Symbols.size()); + Symbols[SymIndex].Flags = Flags; + DEBUG(dbgs() << "Set symbol flags index:" + << SymIndex << " name:" + << Symbols[SymIndex].Name << " exptected:" + << Symbol << " flags: " << Flags << "\n"); + } + break; + } + case wasm::WASM_DATA_SIZE: + LinkingData.DataSize = readVaruint32(Ptr); + break; + case wasm::WASM_DATA_ALIGNMENT: + LinkingData.DataAlignment = readVaruint32(Ptr); + break; + case wasm::WASM_STACK_POINTER: + default: + Ptr += Size; + break; + } + if (Ptr != SubSectionEnd) + return make_error( + "Linking sub-section ended prematurely", object_error::parse_failed); + } + if (Ptr != End) + return make_error("Linking section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) { for (WasmSection& Section : Sections) { if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name) @@ -325,6 +385,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: break; case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: @@ -332,7 +393,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, Reloc.Addend = readVarint32(Ptr); break; default: - return make_error("Bad relocation type", + return make_error("Bad relocation type: " + + Twine(Reloc.Type), object_error::parse_failed); } Section->Relocations.push_back(Reloc); @@ -349,6 +411,9 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, if (Sec.Name == "name") { if (Error Err = parseNameSection(Ptr, End)) return Err; + } else if (Sec.Name == "linking") { + if (Error Err = parseLinkingSection(Ptr, End)) + return Err; } else if (Sec.Name.startswith("reloc.")) { if (Error Err = parseRelocSection(Sec.Name, Ptr, End)) return Err; @@ -400,14 +465,20 @@ Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) switch (Im.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: Im.SigIndex = readVaruint32(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT, Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); break; case wasm::WASM_EXTERNAL_GLOBAL: Im.Global.Type = readVarint7(Ptr); Im.Global.Mutable = readVaruint1(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT, Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); break; case wasm::WASM_EXTERNAL_MEMORY: Im.Memory = readLimits(Ptr); @@ -496,15 +567,16 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) Ex.Name = readString(Ptr); Ex.Kind = readUint8(Ptr); Ex.Index = readVaruint32(Ptr); - Exports.push_back(Ex); + WasmSymbol::SymbolType ExportType; + bool MakeSymbol = false; switch (Ex.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT, - Sections.size(), i); + ExportType = WasmSymbol::SymbolType::FUNCTION_EXPORT; + MakeSymbol = true; break; case wasm::WASM_EXTERNAL_GLOBAL: - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT, - Sections.size(), i); + ExportType = WasmSymbol::SymbolType::GLOBAL_EXPORT; + MakeSymbol = true; break; case wasm::WASM_EXTERNAL_MEMORY: case wasm::WASM_EXTERNAL_TABLE: @@ -513,6 +585,21 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) return make_error( "Unexpected export kind", object_error::parse_failed); } + if (MakeSymbol) { + auto Pair = SymbolMap.try_emplace(Ex.Name, Symbols.size()); + if (Pair.second) { + Symbols.emplace_back(Ex.Name, ExportType, + Sections.size(), i); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + } else { + uint32_t SymIndex = Pair.first->second; + Symbols[SymIndex] = WasmSymbol(Ex.Name, ExportType, Sections.size(), i); + DEBUG(dbgs() << "Replacing existing symbol: " << Symbols[SymIndex] + << " sym index:" << SymIndex << "\n"); + } + } + Exports.push_back(Ex); } if (Ptr != End) return make_error("Export section ended prematurely", @@ -588,15 +675,17 @@ Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) { } Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { + const uint8_t *Start = Ptr; uint32_t Count = readVaruint32(Ptr); DataSegments.reserve(Count); while (Count--) { - wasm::WasmDataSegment Segment; - Segment.Index = readVaruint32(Ptr); - if (Error Err = readInitExpr(Segment.Offset, Ptr)) + WasmSegment Segment; + Segment.Data.MemoryIndex = readVaruint32(Ptr); + if (Error Err = readInitExpr(Segment.Data.Offset, Ptr)) return Err; uint32_t Size = readVaruint32(Ptr); - Segment.Content = ArrayRef(Ptr, Size); + Segment.Data.Content = ArrayRef(Ptr, Size); + Segment.SectionOffset = Ptr - Start; Ptr += Size; DataSegments.push_back(Segment); } @@ -620,6 +709,10 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { uint32_t Result = SymbolRef::SF_None; const WasmSymbol &Sym = getWasmSymbol(Symb); + DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n"); + if (Sym.Flags & wasm::WASM_SYMBOL_FLAG_WEAK) + Result |= SymbolRef::SF_Weak; + switch (Sym.Type) { case WasmSymbol::SymbolType::FUNCTION_IMPORT: Result |= SymbolRef::SF_Undefined | SymbolRef::SF_Executable; @@ -629,6 +722,7 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { break; case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: Result |= SymbolRef::SF_Executable; + Result |= SymbolRef::SF_FormatSpecific; break; case WasmSymbol::SymbolType::GLOBAL_IMPORT: Result |= SymbolRef::SF_Undefined; @@ -662,8 +756,7 @@ const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const { } Expected WasmObjectFile::getSymbolName(DataRefImpl Symb) const { - const WasmSymbol &Sym = getWasmSymbol(Symb); - return Sym.Name; + return getWasmSymbol(Symb).Name; } Expected WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { @@ -671,8 +764,18 @@ Expected WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { } uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { - const WasmSymbol &Sym = getWasmSymbol(Symb); - return Sym.ElementIndex; + const WasmSymbol& Sym = getWasmSymbol(Symb); + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + return 0; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return Exports[Sym.ElementIndex].Index; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return Sym.ElementIndex; + } + llvm_unreachable("invalid symbol type"); } uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { @@ -743,6 +846,10 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } +uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { const WasmSection &S = Sections[Sec.d.a]; return S.Content.size(); @@ -826,7 +933,7 @@ void WasmObjectFile::getRelocationTypeName( break; switch (Rel.Type) { -#include "llvm/Support/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" } #undef WASM_RELOC @@ -856,7 +963,9 @@ SubtargetFeatures WasmObjectFile::getFeatures() const { return SubtargetFeatures(); } -bool WasmObjectFile::isRelocatableObject() const { return false; } +bool WasmObjectFile::isRelocatableObject() const { + return HasLinkingSection; +} const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const { assert(Ref.d.a < Sections.size()); diff --git a/interpreter/llvm/src/lib/Object/WindowsResource.cpp b/interpreter/llvm/src/lib/Object/WindowsResource.cpp new file mode 100644 index 0000000000000..246eee5ddb311 --- /dev/null +++ b/interpreter/llvm/src/lib/Object/WindowsResource.cpp @@ -0,0 +1,720 @@ +//===-- WindowsResource.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the .res file class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/WindowsResource.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace object; + +namespace llvm { +namespace object { + +#define RETURN_IF_ERROR(X) \ + if (auto EC = X) \ + return EC; + +const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); + +// COFF files seem to be inconsistent with alignment between sections, just use +// 8-byte because it makes everyone happy. +const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); + +uint32_t WindowsResourceParser::TreeNode::StringCount = 0; +uint32_t WindowsResourceParser::TreeNode::DataCount = 0; + +WindowsResource::WindowsResource(MemoryBufferRef Source) + : Binary(Binary::ID_WinRes, Source) { + size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; + BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize), + support::little); +} + +Expected> +WindowsResource::createWindowsResource(MemoryBufferRef Source) { + if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE) + return make_error( + "File too small to be a resource file", + object_error::invalid_file_type); + std::unique_ptr Ret(new WindowsResource(Source)); + return std::move(Ret); +} + +Expected WindowsResource::getHeadEntry() { + Error Err = Error::success(); + auto Ref = ResourceEntryRef(BinaryStreamRef(BBS), this, Err); + if (Err) + return std::move(Err); + return Ref; +} + +ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref, + const WindowsResource *Owner, Error &Err) + : Reader(Ref), OwningRes(Owner) { + if (loadNext()) + Err = make_error("Could not read first entry.\n", + object_error::unexpected_eof); +} + +Error ResourceEntryRef::moveNext(bool &End) { + // Reached end of all the entries. + if (Reader.bytesRemaining() == 0) { + End = true; + return Error::success(); + } + RETURN_IF_ERROR(loadNext()); + + return Error::success(); +} + +static Error readStringOrId(BinaryStreamReader &Reader, uint16_t &ID, + ArrayRef &Str, bool &IsString) { + uint16_t IDFlag; + RETURN_IF_ERROR(Reader.readInteger(IDFlag)); + IsString = IDFlag != 0xffff; + + if (IsString) { + Reader.setOffset( + Reader.getOffset() - + sizeof(uint16_t)); // Re-read the bytes which we used to check the flag. + RETURN_IF_ERROR(Reader.readWideString(Str)); + } else + RETURN_IF_ERROR(Reader.readInteger(ID)); + + return Error::success(); +} + +Error ResourceEntryRef::loadNext() { + const WinResHeaderPrefix *Prefix; + RETURN_IF_ERROR(Reader.readObject(Prefix)); + + if (Prefix->HeaderSize < MIN_HEADER_SIZE) + return make_error("Header size is too small.", + object_error::parse_failed); + + RETURN_IF_ERROR(readStringOrId(Reader, TypeID, Type, IsStringType)); + + RETURN_IF_ERROR(readStringOrId(Reader, NameID, Name, IsStringName)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_HEADER_ALIGNMENT)); + + RETURN_IF_ERROR(Reader.readObject(Suffix)); + + RETURN_IF_ERROR(Reader.readArray(Data, Prefix->DataSize)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_DATA_ALIGNMENT)); + + return Error::success(); +} + +WindowsResourceParser::WindowsResourceParser() : Root(false) {} + +Error WindowsResourceParser::parse(WindowsResource *WR) { + auto EntryOrErr = WR->getHeadEntry(); + if (!EntryOrErr) + return EntryOrErr.takeError(); + + ResourceEntryRef Entry = EntryOrErr.get(); + bool End = false; + while (!End) { + Data.push_back(Entry.getData()); + + bool IsNewTypeString = false; + bool IsNewNameString = false; + + Root.addEntry(Entry, IsNewTypeString, IsNewNameString); + + if (IsNewTypeString) + StringTable.push_back(Entry.getTypeString()); + + if (IsNewNameString) + StringTable.push_back(Entry.getNameString()); + + RETURN_IF_ERROR(Entry.moveNext(End)); + } + + return Error::success(); +} + +void WindowsResourceParser::printTree(raw_ostream &OS) const { + ScopedPrinter Writer(OS); + Root.print(Writer, "Resource Tree"); +} + +void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry, + bool &IsNewTypeString, + bool &IsNewNameString) { + TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString); + TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString); + NameNode.addLanguageNode(Entry); +} + +WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) { + if (IsStringNode) + StringIndex = StringCount++; +} + +WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) + : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), + Characteristics(Characteristics) { + DataIndex = DataCount++; +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createStringNode() { + return std::unique_ptr(new TreeNode(true)); +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createIDNode() { + return std::unique_ptr(new TreeNode(false)); +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) { + return std::unique_ptr( + new TreeNode(MajorVersion, MinorVersion, Characteristics)); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry, + bool &IsNewTypeString) { + if (Entry.checkTypeString()) + return addChild(Entry.getTypeString(), IsNewTypeString); + else + return addChild(Entry.getTypeID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry, + bool &IsNewNameString) { + if (Entry.checkNameString()) + return addChild(Entry.getNameString(), IsNewNameString); + else + return addChild(Entry.getNameID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addLanguageNode( + const ResourceEntryRef &Entry) { + return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics()); +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild( + uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics) { + auto Child = IDChildren.find(ID); + if (Child == IDChildren.end()) { + auto NewChild = + IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics) + : createIDNode(); + WindowsResourceParser::TreeNode &Node = *NewChild; + IDChildren.emplace(ID, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addChild(ArrayRef NameRef, + bool &IsNewString) { + std::string NameString; + ArrayRef CorrectedName; + std::vector EndianCorrectedName; + if (sys::IsBigEndianHost) { + EndianCorrectedName.resize(NameRef.size() + 1); + std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1); + EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED; + CorrectedName = makeArrayRef(EndianCorrectedName); + } else + CorrectedName = NameRef; + convertUTF16ToUTF8String(CorrectedName, NameString); + + auto Child = StringChildren.find(NameString); + if (Child == StringChildren.end()) { + auto NewChild = createStringNode(); + IsNewString = true; + WindowsResourceParser::TreeNode &Node = *NewChild; + StringChildren.emplace(NameString, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer, + StringRef Name) const { + ListScope NodeScope(Writer, Name); + for (auto const &Child : StringChildren) { + Child.second->print(Writer, Child.first); + } + for (auto const &Child : IDChildren) { + Child.second->print(Writer, to_string(Child.first)); + } +} + +// This function returns the size of the entire resource tree, including +// directory tables, directory entries, and data entries. It does not include +// the directory strings or the relocations of the .rsrc section. +uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { + uint32_t Size = (IDChildren.size() + StringChildren.size()) * + sizeof(coff_resource_dir_entry); + + // Reached a node pointing to a data entry. + if (IsDataNode) { + Size += sizeof(coff_resource_data_entry); + return Size; + } + + // If the node does not point to data, it must have a directory table pointing + // to other nodes. + Size += sizeof(coff_resource_dir_table); + + for (auto const &Child : StringChildren) { + Size += Child.second->getTreeSize(); + } + for (auto const &Child : IDChildren) { + Size += Child.second->getTreeSize(); + } + return Size; +} + +class WindowsResourceCOFFWriter { +public: + WindowsResourceCOFFWriter(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser, Error &E); + std::unique_ptr write(); + +private: + void performFileLayout(); + void performSectionOneLayout(); + void performSectionTwoLayout(); + void writeCOFFHeader(); + void writeFirstSectionHeader(); + void writeSecondSectionHeader(); + void writeFirstSection(); + void writeSecondSection(); + void writeSymbolTable(); + void writeStringTable(); + void writeDirectoryTree(); + void writeDirectoryStringTable(); + void writeFirstSectionRelocations(); + std::unique_ptr OutputBuffer; + char *BufferStart; + uint64_t CurrentOffset = 0; + COFF::MachineTypes MachineType; + const WindowsResourceParser::TreeNode &Resources; + const ArrayRef> Data; + uint64_t FileSize; + uint32_t SymbolTableOffset; + uint32_t SectionOneSize; + uint32_t SectionOneOffset; + uint32_t SectionOneRelocations; + uint32_t SectionTwoSize; + uint32_t SectionTwoOffset; + const ArrayRef> StringTable; + std::vector StringTableOffsets; + std::vector DataOffsets; + std::vector RelocationAddresses; +}; + +WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( + COFF::MachineTypes MachineType, const WindowsResourceParser &Parser, + Error &E) + : MachineType(MachineType), Resources(Parser.getTree()), + Data(Parser.getData()), StringTable(Parser.getStringTable()) { + performFileLayout(); + + OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize); +} + +void WindowsResourceCOFFWriter::performFileLayout() { + // Add size of COFF header. + FileSize = COFF::Header16Size; + + // one .rsrc section header for directory tree, another for resource data. + FileSize += 2 * COFF::SectionSize; + + performSectionOneLayout(); + performSectionTwoLayout(); + + // We have reached the address of the symbol table. + SymbolTableOffset = FileSize; + + FileSize += COFF::Symbol16Size; // size of the @feat.00 symbol. + FileSize += 4 * COFF::Symbol16Size; // symbol + aux for each section. + FileSize += Data.size() * COFF::Symbol16Size; // 1 symbol per resource. + FileSize += 4; // four null bytes for the string table. +} + +void WindowsResourceCOFFWriter::performSectionOneLayout() { + SectionOneOffset = FileSize; + + SectionOneSize = Resources.getTreeSize(); + uint32_t CurrentStringOffset = SectionOneSize; + uint32_t TotalStringTableSize = 0; + for (auto const &String : StringTable) { + StringTableOffsets.push_back(CurrentStringOffset); + uint32_t StringSize = String.size() * sizeof(UTF16) + sizeof(uint16_t); + CurrentStringOffset += StringSize; + TotalStringTableSize += StringSize; + } + SectionOneSize += alignTo(TotalStringTableSize, sizeof(uint32_t)); + + // account for the relocations of section one. + SectionOneRelocations = FileSize + SectionOneSize; + FileSize += SectionOneSize; + FileSize += + Data.size() * COFF::RelocationSize; // one relocation for each resource. + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::performSectionTwoLayout() { + // add size of .rsrc$2 section, which contains all resource data on 8-byte + // alignment. + SectionTwoOffset = FileSize; + SectionTwoSize = 0; + for (auto const &Entry : Data) { + DataOffsets.push_back(SectionTwoSize); + SectionTwoSize += alignTo(Entry.size(), sizeof(uint64_t)); + } + FileSize += SectionTwoSize; + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +static std::time_t getTime() { + std::time_t Now = time(nullptr); + if (Now < 0 || !isUInt<32>(Now)) + return UINT32_MAX; + return Now; +} + +std::unique_ptr WindowsResourceCOFFWriter::write() { + BufferStart = const_cast(OutputBuffer->getBufferStart()); + + writeCOFFHeader(); + writeFirstSectionHeader(); + writeSecondSectionHeader(); + writeFirstSection(); + writeSecondSection(); + writeSymbolTable(); + writeStringTable(); + + return std::move(OutputBuffer); +} + +void WindowsResourceCOFFWriter::writeCOFFHeader() { + // Write the COFF header. + auto *Header = reinterpret_cast(BufferStart); + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Header->Machine = COFF::IMAGE_FILE_MACHINE_ARMNT; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Header->Machine = COFF::IMAGE_FILE_MACHINE_AMD64; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Header->Machine = COFF::IMAGE_FILE_MACHINE_I386; + break; + default: + Header->Machine = COFF::IMAGE_FILE_MACHINE_UNKNOWN; + } + Header->NumberOfSections = 2; + Header->TimeDateStamp = getTime(); + Header->PointerToSymbolTable = SymbolTableOffset; + // One symbol for every resource plus 2 for each section and @feat.00 + Header->NumberOfSymbols = Data.size() + 5; + Header->SizeOfOptionalHeader = 0; + Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE; +} + +void WindowsResourceCOFFWriter::writeFirstSectionHeader() { + // Write the first section header. + CurrentOffset += sizeof(coff_file_header); + auto *SectionOneHeader = + reinterpret_cast(BufferStart + CurrentOffset); + strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize); + SectionOneHeader->VirtualSize = 0; + SectionOneHeader->VirtualAddress = 0; + SectionOneHeader->SizeOfRawData = SectionOneSize; + SectionOneHeader->PointerToRawData = SectionOneOffset; + SectionOneHeader->PointerToRelocations = SectionOneRelocations; + SectionOneHeader->PointerToLinenumbers = 0; + SectionOneHeader->NumberOfRelocations = Data.size(); + SectionOneHeader->NumberOfLinenumbers = 0; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeSecondSectionHeader() { + // Write the second section header. + CurrentOffset += sizeof(coff_section); + auto *SectionTwoHeader = + reinterpret_cast(BufferStart + CurrentOffset); + strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize); + SectionTwoHeader->VirtualSize = 0; + SectionTwoHeader->VirtualAddress = 0; + SectionTwoHeader->SizeOfRawData = SectionTwoSize; + SectionTwoHeader->PointerToRawData = SectionTwoOffset; + SectionTwoHeader->PointerToRelocations = 0; + SectionTwoHeader->PointerToLinenumbers = 0; + SectionTwoHeader->NumberOfRelocations = 0; + SectionTwoHeader->NumberOfLinenumbers = 0; + SectionTwoHeader->Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionTwoHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeFirstSection() { + // Write section one. + CurrentOffset += sizeof(coff_section); + + writeDirectoryTree(); + writeDirectoryStringTable(); + writeFirstSectionRelocations(); + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSecondSection() { + // Now write the .rsrc$02 section. + for (auto const &RawDataEntry : Data) { + std::copy(RawDataEntry.begin(), RawDataEntry.end(), + BufferStart + CurrentOffset); + CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t)); + } + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSymbolTable() { + // Now write the symbol table. + // First, the feat symbol. + auto *Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize); + Symbol->Value = 0x11; + Symbol->SectionNumber = 0xffff; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + + // Now write the .rsrc1 symbol + aux. + Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 1; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + auto *Aux = reinterpret_cast(BufferStart + + CurrentOffset); + Aux->Length = SectionOneSize; + Aux->NumberOfRelocations = Data.size(); + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write the .rsrc2 symbol + aux. + Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + Aux = reinterpret_cast(BufferStart + + CurrentOffset); + Aux->Length = SectionTwoSize; + Aux->NumberOfRelocations = 0; + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write a symbol for each relocation. + for (unsigned i = 0; i < Data.size(); i++) { + char RelocationName[9]; + sprintf(RelocationName, "$R%06X", DataOffsets[i]); + Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize); + Symbol->Value = DataOffsets[i]; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + } +} + +void WindowsResourceCOFFWriter::writeStringTable() { + // Just 4 null bytes for the string table. + auto COFFStringTable = reinterpret_cast(BufferStart + CurrentOffset); + memset(COFFStringTable, 0, 4); +} + +void WindowsResourceCOFFWriter::writeDirectoryTree() { + // Traverse parsed resource tree breadth-first and write the corresponding + // COFF objects. + std::queue Queue; + Queue.push(&Resources); + uint32_t NextLevelOffset = + sizeof(coff_resource_dir_table) + (Resources.getStringChildren().size() + + Resources.getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + std::vector DataEntriesTreeOrder; + uint32_t CurrentRelativeOffset = 0; + + while (!Queue.empty()) { + auto CurrentNode = Queue.front(); + Queue.pop(); + auto *Table = reinterpret_cast(BufferStart + + CurrentOffset); + Table->Characteristics = CurrentNode->getCharacteristics(); + Table->TimeDateStamp = 0; + Table->MajorVersion = CurrentNode->getMajorVersion(); + Table->MinorVersion = CurrentNode->getMinorVersion(); + auto &IDChildren = CurrentNode->getIDChildren(); + auto &StringChildren = CurrentNode->getStringChildren(); + Table->NumberOfNameEntries = StringChildren.size(); + Table->NumberOfIDEntries = IDChildren.size(); + CurrentOffset += sizeof(coff_resource_dir_table); + CurrentRelativeOffset += sizeof(coff_resource_dir_table); + + // Write the directory entries immediately following each directory table. + for (auto const &Child : StringChildren) { + auto *Entry = reinterpret_cast(BufferStart + + CurrentOffset); + Entry->Identifier.setNameOffset( + StringTableOffsets[Child.second->getStringIndex()]); + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + for (auto const &Child : IDChildren) { + auto *Entry = reinterpret_cast(BufferStart + + CurrentOffset); + Entry->Identifier.ID = Child.first; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + } + + RelocationAddresses.resize(Data.size()); + // Now write all the resource data entries. + for (auto DataNodes : DataEntriesTreeOrder) { + auto *Entry = reinterpret_cast(BufferStart + + CurrentOffset); + RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; + Entry->DataRVA = 0; // Set to zero because it is a relocation. + Entry->DataSize = Data[DataNodes->getDataIndex()].size(); + Entry->Codepage = 0; + Entry->Reserved = 0; + CurrentOffset += sizeof(coff_resource_data_entry); + CurrentRelativeOffset += sizeof(coff_resource_data_entry); + } +} + +void WindowsResourceCOFFWriter::writeDirectoryStringTable() { + // Now write the directory string table for .rsrc$01 + uint32_t TotalStringTableSize = 0; + for (auto &String : StringTable) { + uint16_t Length = String.size(); + support::endian::write16le(BufferStart + CurrentOffset, Length); + CurrentOffset += sizeof(uint16_t); + auto *Start = reinterpret_cast(BufferStart + CurrentOffset); + std::copy(String.begin(), String.end(), Start); + CurrentOffset += Length * sizeof(UTF16); + TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t); + } + CurrentOffset += + alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize; +} + +void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { + + // Now write the relocations for .rsrc$01 + // Five symbols already in table before we start, @feat.00 and 2 for each + // .rsrc section. + uint32_t NextSymbolIndex = 5; + for (unsigned i = 0; i < Data.size(); i++) { + auto *Reloc = + reinterpret_cast(BufferStart + CurrentOffset); + Reloc->VirtualAddress = RelocationAddresses[i]; + Reloc->SymbolTableIndex = NextSymbolIndex++; + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB; + break; + default: + Reloc->Type = 0; + } + CurrentOffset += sizeof(coff_relocation); + } +} + +Expected> +writeWindowsResourceCOFF(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser) { + Error E = Error::success(); + WindowsResourceCOFFWriter Writer(MachineType, Parser, E); + if (E) + return std::move(E); + return Writer.write(); +} + +} // namespace object +} // namespace llvm diff --git a/interpreter/llvm/src/lib/ObjectYAML/CMakeLists.txt b/interpreter/llvm/src/lib/ObjectYAML/CMakeLists.txt index 37f8fd7bce1a6..7af0b9c194e64 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/CMakeLists.txt +++ b/interpreter/llvm/src/lib/ObjectYAML/CMakeLists.txt @@ -1,4 +1,7 @@ add_llvm_library(LLVMObjectYAML + CodeViewYAMLTypes.cpp + CodeViewYAMLSymbols.cpp + CodeViewYAMLDebugSections.cpp COFFYAML.cpp DWARFEmitter.cpp DWARFVisitor.cpp diff --git a/interpreter/llvm/src/lib/ObjectYAML/COFFYAML.cpp b/interpreter/llvm/src/lib/ObjectYAML/COFFYAML.cpp index 7f9f4c1f8c2cb..1103159fc98df 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/COFFYAML.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/COFFYAML.cpp @@ -12,17 +12,25 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/COFFYAML.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include #define ECase(X) IO.enumCase(Value, #X, COFF::X); + namespace llvm { namespace COFFYAML { + Section::Section() { memset(&Header, 0, sizeof(COFF::section)); } Symbol::Symbol() { memset(&Header, 0, sizeof(COFF::symbol)); } Object::Object() { memset(&Header, 0, sizeof(COFF::header)); } -} + +} // end namespace COFFYAML namespace yaml { + void ScalarEnumerationTraits::enumeration( IO &IO, COFFYAML::COMDATType &Value) { IO.enumCase(Value, "0", 0); @@ -172,20 +180,20 @@ void ScalarEnumerationTraits::enumeration( void ScalarEnumerationTraits::enumeration( IO &IO, COFF::WindowsSubsystem &Value) { - ECase(IMAGE_SUBSYSTEM_UNKNOWN); - ECase(IMAGE_SUBSYSTEM_NATIVE); - ECase(IMAGE_SUBSYSTEM_WINDOWS_GUI); - ECase(IMAGE_SUBSYSTEM_WINDOWS_CUI); - ECase(IMAGE_SUBSYSTEM_OS2_CUI); - ECase(IMAGE_SUBSYSTEM_POSIX_CUI); - ECase(IMAGE_SUBSYSTEM_NATIVE_WINDOWS); - ECase(IMAGE_SUBSYSTEM_WINDOWS_CE_GUI); - ECase(IMAGE_SUBSYSTEM_EFI_APPLICATION); - ECase(IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER); - ECase(IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER); - ECase(IMAGE_SUBSYSTEM_EFI_ROM); - ECase(IMAGE_SUBSYSTEM_XBOX); - ECase(IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION); + ECase(IMAGE_SUBSYSTEM_UNKNOWN); + ECase(IMAGE_SUBSYSTEM_NATIVE); + ECase(IMAGE_SUBSYSTEM_WINDOWS_GUI); + ECase(IMAGE_SUBSYSTEM_WINDOWS_CUI); + ECase(IMAGE_SUBSYSTEM_OS2_CUI); + ECase(IMAGE_SUBSYSTEM_POSIX_CUI); + ECase(IMAGE_SUBSYSTEM_NATIVE_WINDOWS); + ECase(IMAGE_SUBSYSTEM_WINDOWS_CE_GUI); + ECase(IMAGE_SUBSYSTEM_EFI_APPLICATION); + ECase(IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER); + ECase(IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER); + ECase(IMAGE_SUBSYSTEM_EFI_ROM); + ECase(IMAGE_SUBSYSTEM_XBOX); + ECase(IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION); } #undef ECase @@ -252,12 +260,15 @@ void ScalarBitSetTraits::bitset( #undef BCase namespace { + struct NSectionSelectionType { NSectionSelectionType(IO &) : SelectionType(COFFYAML::COMDATType(0)) {} NSectionSelectionType(IO &, uint8_t C) : SelectionType(COFFYAML::COMDATType(C)) {} + uint8_t denormalize(IO &) { return SelectionType; } + COFFYAML::COMDATType SelectionType; }; @@ -266,7 +277,9 @@ struct NWeakExternalCharacteristics { : Characteristics(COFFYAML::WeakExternalCharacteristics(0)) {} NWeakExternalCharacteristics(IO &, uint32_t C) : Characteristics(COFFYAML::WeakExternalCharacteristics(C)) {} + uint32_t denormalize(IO &) { return Characteristics; } + COFFYAML::WeakExternalCharacteristics Characteristics; }; @@ -275,7 +288,9 @@ struct NSectionCharacteristics { : Characteristics(COFF::SectionCharacteristics(0)) {} NSectionCharacteristics(IO &, uint32_t C) : Characteristics(COFF::SectionCharacteristics(C)) {} + uint32_t denormalize(IO &) { return Characteristics; } + COFF::SectionCharacteristics Characteristics; }; @@ -284,13 +299,16 @@ struct NAuxTokenType { : AuxType(COFFYAML::AuxSymbolType(0)) {} NAuxTokenType(IO &, uint8_t C) : AuxType(COFFYAML::AuxSymbolType(C)) {} + uint32_t denormalize(IO &) { return AuxType; } + COFFYAML::AuxSymbolType AuxType; }; struct NStorageClass { NStorageClass(IO &) : StorageClass(COFF::SymbolStorageClass(0)) {} NStorageClass(IO &, uint8_t S) : StorageClass(COFF::SymbolStorageClass(S)) {} + uint8_t denormalize(IO &) { return StorageClass; } COFF::SymbolStorageClass StorageClass; @@ -299,7 +317,9 @@ struct NStorageClass { struct NMachine { NMachine(IO &) : Machine(COFF::MachineTypes(0)) {} NMachine(IO &, uint16_t M) : Machine(COFF::MachineTypes(M)) {} + uint16_t denormalize(IO &) { return Machine; } + COFF::MachineTypes Machine; }; @@ -307,6 +327,7 @@ struct NHeaderCharacteristics { NHeaderCharacteristics(IO &) : Characteristics(COFF::Characteristics(0)) {} NHeaderCharacteristics(IO &, uint16_t C) : Characteristics(COFF::Characteristics(C)) {} + uint16_t denormalize(IO &) { return Characteristics; } COFF::Characteristics Characteristics; @@ -316,13 +337,16 @@ template struct NType { NType(IO &) : Type(RelocType(0)) {} NType(IO &, uint16_t T) : Type(RelocType(T)) {} + uint16_t denormalize(IO &) { return Type; } + RelocType Type; }; struct NWindowsSubsystem { NWindowsSubsystem(IO &) : Subsystem(COFF::WindowsSubsystem(0)) {} NWindowsSubsystem(IO &, uint16_t C) : Subsystem(COFF::WindowsSubsystem(C)) {} + uint16_t denormalize(IO &) { return Subsystem; } COFF::WindowsSubsystem Subsystem; @@ -332,12 +356,13 @@ struct NDLLCharacteristics { NDLLCharacteristics(IO &) : Characteristics(COFF::DLLCharacteristics(0)) {} NDLLCharacteristics(IO &, uint16_t C) : Characteristics(COFF::DLLCharacteristics(C)) {} + uint16_t denormalize(IO &) { return Characteristics; } COFF::DLLCharacteristics Characteristics; }; -} +} // end anonymous namespace void MappingTraits::mapping(IO &IO, COFFYAML::Relocation &Rel) { @@ -488,7 +513,16 @@ void MappingTraits::mapping(IO &IO, COFFYAML::Section &Sec) { IO.mapOptional("VirtualAddress", Sec.Header.VirtualAddress, 0U); IO.mapOptional("VirtualSize", Sec.Header.VirtualSize, 0U); IO.mapOptional("Alignment", Sec.Alignment, 0U); - IO.mapRequired("SectionData", Sec.SectionData); + + // If this is a .debug$S or .debug$T section parse the semantic representation + // of the symbols/types. If it is any other kind of section, just deal in raw + // bytes. + IO.mapOptional("SectionData", Sec.SectionData); + if (Sec.Name == ".debug$S") + IO.mapOptional("Subsections", Sec.DebugS); + else if (Sec.Name == ".debug$T") + IO.mapOptional("Types", Sec.DebugT); + IO.mapOptional("Relocations", Sec.Relocations); } @@ -500,5 +534,6 @@ void MappingTraits::mapping(IO &IO, COFFYAML::Object &Obj) { IO.mapRequired("symbols", Obj.Symbols); } -} -} +} // end namespace yaml + +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp new file mode 100644 index 0000000000000..60b0ea28030a2 --- /dev/null +++ b/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -0,0 +1,958 @@ +//===- CodeViewYAMLDebugSections.cpp - CodeView YAMLIO debug sections -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" +#include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::CodeViewYAML; +using namespace llvm::CodeViewYAML::detail; +using namespace llvm::yaml; + +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceFileChecksumEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceColumnEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineBlock) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeSite) +LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(CrossModuleExport) +LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLCrossModuleImport) +LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLFrameData) + +LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false) +LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind) +LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags) + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleExport) +LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLFrameData) +LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLCrossModuleImport) +LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleImportItem) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceColumnEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceFileChecksumEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineBlock) +LLVM_YAML_DECLARE_MAPPING_TRAITS(InlineeSite) + +namespace llvm { +namespace CodeViewYAML { +namespace detail { + +struct YAMLSubsectionBase { + explicit YAMLSubsectionBase(DebugSubsectionKind Kind) : Kind(Kind) {} + virtual ~YAMLSubsectionBase() = default; + + virtual void map(IO &IO) = 0; + virtual std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const = 0; + + DebugSubsectionKind Kind; +}; + +} // end namespace detail +} // end namespace CodeViewYAML +} // end namespace llvm + +namespace { + +struct YAMLChecksumsSubsection : public YAMLSubsectionBase { + YAMLChecksumsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &FC); + + std::vector Checksums; +}; + +struct YAMLLinesSubsection : public YAMLSubsectionBase { + YAMLLinesSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Lines) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugLinesSubsectionRef &Lines); + + SourceLineInfo Lines; +}; + +struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase { + YAMLInlineeLinesSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::InlineeLines) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugInlineeLinesSubsectionRef &Lines); + + InlineeInfo InlineeLines; +}; + +struct YAMLCrossModuleExportsSubsection : public YAMLSubsectionBase { + YAMLCrossModuleExportsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeExports) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugCrossModuleExportsSubsectionRef &Exports); + + std::vector Exports; +}; + +struct YAMLCrossModuleImportsSubsection : public YAMLSubsectionBase { + YAMLCrossModuleImportsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeImports) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugCrossModuleImportsSubsectionRef &Imports); + + std::vector Imports; +}; + +struct YAMLSymbolsSubsection : public YAMLSubsectionBase { + YAMLSymbolsSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Symbols) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugSymbolsSubsectionRef &Symbols); + + std::vector Symbols; +}; + +struct YAMLStringTableSubsection : public YAMLSubsectionBase { + YAMLStringTableSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::StringTable) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings); + + std::vector Strings; +}; + +struct YAMLFrameDataSubsection : public YAMLSubsectionBase { + YAMLFrameDataSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::FrameData) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugFrameDataSubsectionRef &Frames); + + std::vector Frames; +}; + +struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase { + YAMLCoffSymbolRVASubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CoffSymbolRVA) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugSymbolRVASubsectionRef &RVAs); + + std::vector RVAs; +}; + +} // end anonymous namespace + +void ScalarBitSetTraits::bitset(IO &io, LineFlags &Flags) { + io.bitSetCase(Flags, "HasColumnInfo", LF_HaveColumns); + io.enumFallback(Flags); +} + +void ScalarEnumerationTraits::enumeration( + IO &io, FileChecksumKind &Kind) { + io.enumCase(Kind, "None", FileChecksumKind::None); + io.enumCase(Kind, "MD5", FileChecksumKind::MD5); + io.enumCase(Kind, "SHA1", FileChecksumKind::SHA1); + io.enumCase(Kind, "SHA256", FileChecksumKind::SHA256); +} + +void ScalarTraits::output(const HexFormattedString &Value, + void *ctx, raw_ostream &Out) { + StringRef Bytes(reinterpret_cast(Value.Bytes.data()), + Value.Bytes.size()); + Out << toHex(Bytes); +} + +StringRef ScalarTraits::input(StringRef Scalar, void *ctxt, + HexFormattedString &Value) { + std::string H = fromHex(Scalar); + Value.Bytes.assign(H.begin(), H.end()); + return StringRef(); +} + +void MappingTraits::mapping(IO &IO, SourceLineEntry &Obj) { + IO.mapRequired("Offset", Obj.Offset); + IO.mapRequired("LineStart", Obj.LineStart); + IO.mapRequired("IsStatement", Obj.IsStatement); + IO.mapRequired("EndDelta", Obj.EndDelta); +} + +void MappingTraits::mapping(IO &IO, SourceColumnEntry &Obj) { + IO.mapRequired("StartColumn", Obj.StartColumn); + IO.mapRequired("EndColumn", Obj.EndColumn); +} + +void MappingTraits::mapping(IO &IO, SourceLineBlock &Obj) { + IO.mapRequired("FileName", Obj.FileName); + IO.mapRequired("Lines", Obj.Lines); + IO.mapRequired("Columns", Obj.Columns); +} + +void MappingTraits::mapping(IO &IO, CrossModuleExport &Obj) { + IO.mapRequired("LocalId", Obj.Local); + IO.mapRequired("GlobalId", Obj.Global); +} + +void MappingTraits::mapping(IO &IO, + YAMLCrossModuleImport &Obj) { + IO.mapRequired("Module", Obj.ModuleName); + IO.mapRequired("Imports", Obj.ImportIds); +} + +void MappingTraits::mapping( + IO &IO, SourceFileChecksumEntry &Obj) { + IO.mapRequired("FileName", Obj.FileName); + IO.mapRequired("Kind", Obj.Kind); + IO.mapRequired("Checksum", Obj.ChecksumBytes); +} + +void MappingTraits::mapping(IO &IO, InlineeSite &Obj) { + IO.mapRequired("FileName", Obj.FileName); + IO.mapRequired("LineNum", Obj.SourceLineNum); + IO.mapRequired("Inlinee", Obj.Inlinee); + IO.mapOptional("ExtraFiles", Obj.ExtraFiles); +} + +void MappingTraits::mapping(IO &IO, YAMLFrameData &Obj) { + IO.mapRequired("CodeSize", Obj.CodeSize); + IO.mapRequired("FrameFunc", Obj.FrameFunc); + IO.mapRequired("LocalSize", Obj.LocalSize); + IO.mapOptional("MaxStackSize", Obj.MaxStackSize); + IO.mapOptional("ParamsSize", Obj.ParamsSize); + IO.mapOptional("PrologSize", Obj.PrologSize); + IO.mapOptional("RvaStart", Obj.RvaStart); + IO.mapOptional("SavedRegsSize", Obj.SavedRegsSize); +} + +void YAMLChecksumsSubsection::map(IO &IO) { + IO.mapTag("!FileChecksums", true); + IO.mapRequired("Checksums", Checksums); +} + +void YAMLLinesSubsection::map(IO &IO) { + IO.mapTag("!Lines", true); + IO.mapRequired("CodeSize", Lines.CodeSize); + + IO.mapRequired("Flags", Lines.Flags); + IO.mapRequired("RelocOffset", Lines.RelocOffset); + IO.mapRequired("RelocSegment", Lines.RelocSegment); + IO.mapRequired("Blocks", Lines.Blocks); +} + +void YAMLInlineeLinesSubsection::map(IO &IO) { + IO.mapTag("!InlineeLines", true); + IO.mapRequired("HasExtraFiles", InlineeLines.HasExtraFiles); + IO.mapRequired("Sites", InlineeLines.Sites); +} + +void YAMLCrossModuleExportsSubsection::map(IO &IO) { + IO.mapTag("!CrossModuleExports", true); + IO.mapOptional("Exports", Exports); +} + +void YAMLCrossModuleImportsSubsection::map(IO &IO) { + IO.mapTag("!CrossModuleImports", true); + IO.mapOptional("Imports", Imports); +} + +void YAMLSymbolsSubsection::map(IO &IO) { + IO.mapTag("!Symbols", true); + IO.mapRequired("Records", Symbols); +} + +void YAMLStringTableSubsection::map(IO &IO) { + IO.mapTag("!StringTable", true); + IO.mapRequired("Strings", Strings); +} + +void YAMLFrameDataSubsection::map(IO &IO) { + IO.mapTag("!FrameData", true); + IO.mapRequired("Frames", Frames); +} + +void YAMLCoffSymbolRVASubsection::map(IO &IO) { + IO.mapTag("!COFFSymbolRVAs", true); + IO.mapRequired("RVAs", RVAs); +} + +void MappingTraits::mapping( + IO &IO, YAMLDebugSubsection &Subsection) { + if (!IO.outputting()) { + if (IO.mapTag("!FileChecksums")) { + auto SS = std::make_shared(); + Subsection.Subsection = SS; + } else if (IO.mapTag("!Lines")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!InlineeLines")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!CrossModuleExports")) { + Subsection.Subsection = + std::make_shared(); + } else if (IO.mapTag("!CrossModuleImports")) { + Subsection.Subsection = + std::make_shared(); + } else if (IO.mapTag("!Symbols")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!StringTable")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!FrameData")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!COFFSymbolRVAs")) { + Subsection.Subsection = std::make_shared(); + } else { + llvm_unreachable("Unexpected subsection tag!"); + } + } + Subsection.Subsection->map(IO); +} + +std::shared_ptr YAMLChecksumsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + auto Result = std::make_shared(*SC.strings()); + for (const auto &CS : Checksums) { + Result->addChecksum(CS.FileName, CS.Kind, CS.ChecksumBytes.Bytes); + } + return Result; +} + +std::shared_ptr YAMLLinesSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings() && SC.hasChecksums()); + auto Result = + std::make_shared(*SC.checksums(), *SC.strings()); + Result->setCodeSize(Lines.CodeSize); + Result->setRelocationAddress(Lines.RelocSegment, Lines.RelocOffset); + Result->setFlags(Lines.Flags); + for (const auto &LC : Lines.Blocks) { + Result->createBlock(LC.FileName); + if (Result->hasColumnInfo()) { + for (const auto &Item : zip(LC.Lines, LC.Columns)) { + auto &L = std::get<0>(Item); + auto &C = std::get<1>(Item); + uint32_t LE = L.LineStart + L.EndDelta; + Result->addLineAndColumnInfo(L.Offset, + LineInfo(L.LineStart, LE, L.IsStatement), + C.StartColumn, C.EndColumn); + } + } else { + for (const auto &L : LC.Lines) { + uint32_t LE = L.LineStart + L.EndDelta; + Result->addLineInfo(L.Offset, LineInfo(L.LineStart, LE, L.IsStatement)); + } + } + } + return Result; +} + +std::shared_ptr +YAMLInlineeLinesSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasChecksums()); + auto Result = std::make_shared( + *SC.checksums(), InlineeLines.HasExtraFiles); + + for (const auto &Site : InlineeLines.Sites) { + Result->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName, + Site.SourceLineNum); + if (!InlineeLines.HasExtraFiles) + continue; + + for (auto EF : Site.ExtraFiles) { + Result->addExtraFile(EF); + } + } + return Result; +} + +std::shared_ptr +YAMLCrossModuleExportsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &M : Exports) + Result->addMapping(M.Local, M.Global); + return Result; +} + +std::shared_ptr +YAMLCrossModuleImportsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + + auto Result = + std::make_shared(*SC.strings()); + for (const auto &M : Imports) { + for (const auto Id : M.ImportIds) + Result->addImport(M.ModuleName, Id); + } + return Result; +} + +std::shared_ptr YAMLSymbolsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &Sym : Symbols) + Result->addSymbol( + Sym.toCodeViewSymbol(Allocator, CodeViewContainer::ObjectFile)); + return Result; +} + +std::shared_ptr +YAMLStringTableSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &Str : this->Strings) + Result->insert(Str); + return Result; +} + +std::shared_ptr YAMLFrameDataSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + + auto Result = std::make_shared(); + for (const auto &YF : Frames) { + codeview::FrameData F; + F.CodeSize = YF.CodeSize; + F.Flags = YF.Flags; + F.LocalSize = YF.LocalSize; + F.MaxStackSize = YF.MaxStackSize; + F.ParamsSize = YF.ParamsSize; + F.PrologSize = YF.PrologSize; + F.RvaStart = YF.RvaStart; + F.SavedRegsSize = YF.SavedRegsSize; + F.FrameFunc = SC.strings()->insert(YF.FrameFunc); + Result->addFrameData(F); + } + return Result; +} + +std::shared_ptr +YAMLCoffSymbolRVASubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &RVA : RVAs) + Result->addRVA(RVA); + return Result; +} + +static Expected +convertOneChecksum(const DebugStringTableSubsectionRef &Strings, + const FileChecksumEntry &CS) { + auto ExpectedString = Strings.getString(CS.FileNameOffset); + if (!ExpectedString) + return ExpectedString.takeError(); + + SourceFileChecksumEntry Result; + Result.ChecksumBytes.Bytes = CS.Checksum; + Result.Kind = CS.Kind; + Result.FileName = *ExpectedString; + return Result; +} + +static Expected +getFileName(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) { + auto Iter = Checksums.getArray().at(FileID); + if (Iter == Checksums.getArray().end()) + return make_error(cv_error_code::no_records); + uint32_t Offset = Iter->FileNameOffset; + return Strings.getString(Offset); +} + +Expected> +YAMLChecksumsSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &FC) { + auto Result = std::make_shared(); + + for (const auto &CS : FC) { + auto ConvertedCS = convertOneChecksum(Strings, CS); + if (!ConvertedCS) + return ConvertedCS.takeError(); + Result->Checksums.push_back(*ConvertedCS); + } + return Result; +} + +Expected> +YAMLLinesSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugLinesSubsectionRef &Lines) { + auto Result = std::make_shared(); + Result->Lines.CodeSize = Lines.header()->CodeSize; + Result->Lines.RelocOffset = Lines.header()->RelocOffset; + Result->Lines.RelocSegment = Lines.header()->RelocSegment; + Result->Lines.Flags = static_cast(uint16_t(Lines.header()->Flags)); + for (const auto &L : Lines) { + SourceLineBlock Block; + auto EF = getFileName(Strings, Checksums, L.NameIndex); + if (!EF) + return EF.takeError(); + Block.FileName = *EF; + if (Lines.hasColumnInfo()) { + for (const auto &C : L.Columns) { + SourceColumnEntry SCE; + SCE.EndColumn = C.EndColumn; + SCE.StartColumn = C.StartColumn; + Block.Columns.push_back(SCE); + } + } + for (const auto &LN : L.LineNumbers) { + SourceLineEntry SLE; + LineInfo LI(LN.Flags); + SLE.Offset = LN.Offset; + SLE.LineStart = LI.getStartLine(); + SLE.EndDelta = LI.getLineDelta(); + SLE.IsStatement = LI.isStatement(); + Block.Lines.push_back(SLE); + } + Result->Lines.Blocks.push_back(Block); + } + return Result; +} + +Expected> +YAMLInlineeLinesSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugInlineeLinesSubsectionRef &Lines) { + auto Result = std::make_shared(); + + Result->InlineeLines.HasExtraFiles = Lines.hasExtraFiles(); + for (const auto &IL : Lines) { + InlineeSite Site; + auto ExpF = getFileName(Strings, Checksums, IL.Header->FileID); + if (!ExpF) + return ExpF.takeError(); + Site.FileName = *ExpF; + Site.Inlinee = IL.Header->Inlinee.getIndex(); + Site.SourceLineNum = IL.Header->SourceLineNum; + if (Lines.hasExtraFiles()) { + for (const auto EF : IL.ExtraFiles) { + auto ExpF2 = getFileName(Strings, Checksums, EF); + if (!ExpF2) + return ExpF2.takeError(); + Site.ExtraFiles.push_back(*ExpF2); + } + } + Result->InlineeLines.Sites.push_back(Site); + } + return Result; +} + +Expected> +YAMLCrossModuleExportsSubsection::fromCodeViewSubsection( + const DebugCrossModuleExportsSubsectionRef &Exports) { + auto Result = std::make_shared(); + Result->Exports.assign(Exports.begin(), Exports.end()); + return Result; +} + +Expected> +YAMLCrossModuleImportsSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugCrossModuleImportsSubsectionRef &Imports) { + auto Result = std::make_shared(); + for (const auto &CMI : Imports) { + YAMLCrossModuleImport YCMI; + auto ExpectedStr = Strings.getString(CMI.Header->ModuleNameOffset); + if (!ExpectedStr) + return ExpectedStr.takeError(); + YCMI.ModuleName = *ExpectedStr; + YCMI.ImportIds.assign(CMI.Imports.begin(), CMI.Imports.end()); + Result->Imports.push_back(YCMI); + } + return Result; +} + +Expected> +YAMLSymbolsSubsection::fromCodeViewSubsection( + const DebugSymbolsSubsectionRef &Symbols) { + auto Result = std::make_shared(); + for (const auto &Sym : Symbols) { + auto S = CodeViewYAML::SymbolRecord::fromCodeViewSymbol(Sym); + if (!S) + return joinErrors(make_error( + cv_error_code::corrupt_record, + "Invalid CodeView Symbol Record in SymbolRecord " + "subsection of .debug$S while converting to YAML!"), + S.takeError()); + + Result->Symbols.push_back(*S); + } + return Result; +} + +Expected> +YAMLStringTableSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings) { + auto Result = std::make_shared(); + BinaryStreamReader Reader(Strings.getBuffer()); + StringRef S; + // First item is a single null string, skip it. + if (auto EC = Reader.readCString(S)) + return std::move(EC); + assert(S.empty()); + while (Reader.bytesRemaining() > 0) { + if (auto EC = Reader.readCString(S)) + return std::move(EC); + Result->Strings.push_back(S); + } + return Result; +} + +Expected> +YAMLFrameDataSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugFrameDataSubsectionRef &Frames) { + auto Result = std::make_shared(); + for (const auto &F : Frames) { + YAMLFrameData YF; + YF.CodeSize = F.CodeSize; + YF.Flags = F.Flags; + YF.LocalSize = F.LocalSize; + YF.MaxStackSize = F.MaxStackSize; + YF.ParamsSize = F.ParamsSize; + YF.PrologSize = F.PrologSize; + YF.RvaStart = F.RvaStart; + YF.SavedRegsSize = F.SavedRegsSize; + + auto ES = Strings.getString(F.FrameFunc); + if (!ES) + return joinErrors( + make_error( + cv_error_code::no_records, + "Could not find string for string id while mapping FrameData!"), + ES.takeError()); + YF.FrameFunc = *ES; + Result->Frames.push_back(YF); + } + return Result; +} + +Expected> +YAMLCoffSymbolRVASubsection::fromCodeViewSubsection( + const DebugSymbolRVASubsectionRef &Section) { + auto Result = std::make_shared(); + for (const auto &RVA : Section) { + Result->RVAs.push_back(RVA); + } + return Result; +} + +Expected>> +llvm::CodeViewYAML::toCodeViewSubsectionList( + BumpPtrAllocator &Allocator, ArrayRef Subsections, + const codeview::StringsAndChecksums &SC) { + std::vector> Result; + if (Subsections.empty()) + return std::move(Result); + + for (const auto &SS : Subsections) { + std::shared_ptr CVS; + CVS = SS.Subsection->toCodeViewSubsection(Allocator, SC); + assert(CVS != nullptr); + Result.push_back(std::move(CVS)); + } + return std::move(Result); +} + +namespace { + +struct SubsectionConversionVisitor : public DebugSubsectionVisitor { + SubsectionConversionVisitor() = default; + + Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override; + Error visitLines(DebugLinesSubsectionRef &Lines, + const StringsAndChecksumsRef &State) override; + Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) override; + Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) override; + Error visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) override; + Error visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) override; + Error visitStringTable(DebugStringTableSubsectionRef &ST, + const StringsAndChecksumsRef &State) override; + Error visitSymbols(DebugSymbolsSubsectionRef &Symbols, + const StringsAndChecksumsRef &State) override; + Error visitFrameData(DebugFrameDataSubsectionRef &Symbols, + const StringsAndChecksumsRef &State) override; + Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &Symbols, + const StringsAndChecksumsRef &State) override; + + YAMLDebugSubsection Subsection; +}; + +} // end anonymous namespace + +Error SubsectionConversionVisitor::visitUnknown( + DebugUnknownSubsectionRef &Unknown) { + return make_error(cv_error_code::operation_unsupported); +} + +Error SubsectionConversionVisitor::visitLines( + DebugLinesSubsectionRef &Lines, const StringsAndChecksumsRef &State) { + auto Result = YAMLLinesSubsection::fromCodeViewSubsection( + State.strings(), State.checksums(), Lines); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitFileChecksums( + DebugChecksumsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) { + auto Result = YAMLChecksumsSubsection::fromCodeViewSubsection(State.strings(), + Checksums); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitInlineeLines( + DebugInlineeLinesSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) { + auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection( + State.strings(), State.checksums(), Inlinees); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCrossModuleExports( + DebugCrossModuleExportsSubsectionRef &Exports, + const StringsAndChecksumsRef &State) { + auto Result = + YAMLCrossModuleExportsSubsection::fromCodeViewSubsection(Exports); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCrossModuleImports( + DebugCrossModuleImportsSubsectionRef &Imports, + const StringsAndChecksumsRef &State) { + auto Result = YAMLCrossModuleImportsSubsection::fromCodeViewSubsection( + State.strings(), Imports); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitStringTable( + DebugStringTableSubsectionRef &Strings, + const StringsAndChecksumsRef &State) { + auto Result = YAMLStringTableSubsection::fromCodeViewSubsection(Strings); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitSymbols( + DebugSymbolsSubsectionRef &Symbols, const StringsAndChecksumsRef &State) { + auto Result = YAMLSymbolsSubsection::fromCodeViewSubsection(Symbols); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitFrameData( + DebugFrameDataSubsectionRef &Frames, const StringsAndChecksumsRef &State) { + auto Result = + YAMLFrameDataSubsection::fromCodeViewSubsection(State.strings(), Frames); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCOFFSymbolRVAs( + DebugSymbolRVASubsectionRef &RVAs, const StringsAndChecksumsRef &State) { + auto Result = YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(RVAs); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Expected +YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC, + const DebugSubsectionRecord &SS) { + SubsectionConversionVisitor V; + if (auto EC = visitDebugSubsection(SS, V, SC)) + return std::move(EC); + + return V.Subsection; +} + +std::vector +llvm::CodeViewYAML::fromDebugS(ArrayRef Data, + const StringsAndChecksumsRef &SC) { + BinaryStreamReader Reader(Data, support::little); + uint32_t Magic; + + ExitOnError Err("Invalid .debug$S section!"); + Err(Reader.readInteger(Magic)); + assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$S section!"); + + DebugSubsectionArray Subsections; + Err(Reader.readArray(Subsections, Reader.bytesRemaining())); + + std::vector Result; + + for (const auto &SS : Subsections) { + auto YamlSS = Err(YAMLDebugSubsection::fromCodeViewSubection(SC, SS)); + Result.push_back(YamlSS); + } + return Result; +} + +void llvm::CodeViewYAML::initializeStringsAndChecksums( + ArrayRef Sections, codeview::StringsAndChecksums &SC) { + // String Table and Checksums subsections don't use the allocator. + BumpPtrAllocator Allocator; + + // It's possible for checksums and strings to even appear in different debug$S + // sections, so we have to make this a stateful function that can build up + // the strings and checksums field over multiple iterations. + + // File Checksums require the string table, but may become before it, so we + // have to scan for strings first, then scan for checksums again from the + // beginning. + if (!SC.hasStrings()) { + for (const auto &SS : Sections) { + if (SS.Subsection->Kind != DebugSubsectionKind::StringTable) + continue; + + auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC); + SC.setStrings( + std::static_pointer_cast(Result)); + break; + } + } + + if (SC.hasStrings() && !SC.hasChecksums()) { + for (const auto &SS : Sections) { + if (SS.Subsection->Kind != DebugSubsectionKind::FileChecksums) + continue; + + auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC); + SC.setChecksums( + std::static_pointer_cast(Result)); + break; + } + } +} diff --git a/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLSymbols.cpp new file mode 100644 index 0000000000000..dbe4e2a6d6fd7 --- /dev/null +++ b/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -0,0 +1,590 @@ +//===- CodeViewYAMLSymbols.cpp - CodeView YAMLIO Symbol implementation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolSerializer.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::CodeViewYAML; +using namespace llvm::CodeViewYAML::detail; +using namespace llvm::yaml; + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex) + +// We only need to declare these, the definitions are in CodeViewYAMLTypes.cpp +LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false) +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false) + +LLVM_YAML_DECLARE_ENUM_TRAITS(SymbolKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(FrameCookieKind) + +LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym2Flags) +LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym3Flags) +LLVM_YAML_DECLARE_BITSET_TRAITS(ExportFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(PublicSymFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(LocalSymFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(ProcSymFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(FrameProcedureOptions) +LLVM_YAML_DECLARE_ENUM_TRAITS(CPUType) +LLVM_YAML_DECLARE_ENUM_TRAITS(RegisterId) +LLVM_YAML_DECLARE_ENUM_TRAITS(TrampolineType) +LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal) + +LLVM_YAML_STRONG_TYPEDEF(StringRef, TypeName) + +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, true) + +StringRef ScalarTraits::input(StringRef S, void *V, TypeName &T) { + return ScalarTraits::input(S, V, T.value); +} + +void ScalarTraits::output(const TypeName &T, void *V, + raw_ostream &R) { + ScalarTraits::output(T.value, V, R); +} + +void ScalarEnumerationTraits::enumeration(IO &io, + SymbolKind &Value) { + auto SymbolNames = getSymbolTypeNames(); + for (const auto &E : SymbolNames) + io.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarBitSetTraits::bitset(IO &io, + CompileSym2Flags &Flags) { + auto FlagNames = getCompileSym2FlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, + CompileSym3Flags &Flags) { + auto FlagNames = getCompileSym3FlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, ExportFlags &Flags) { + auto FlagNames = getExportSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, PublicSymFlags &Flags) { + auto FlagNames = getProcSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, LocalSymFlags &Flags) { + auto FlagNames = getLocalFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, ProcSymFlags &Flags) { + auto FlagNames = getProcSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset( + IO &io, FrameProcedureOptions &Flags) { + auto FlagNames = getFrameProcSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration(IO &io, CPUType &Cpu) { + auto CpuNames = getCPUTypeNames(); + for (const auto &E : CpuNames) { + io.enumCase(Cpu, E.Name.str().c_str(), static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration(IO &io, RegisterId &Reg) { + auto RegNames = getRegisterNames(); + for (const auto &E : RegNames) { + io.enumCase(Reg, E.Name.str().c_str(), static_cast(E.Value)); + } + io.enumFallback(Reg); +} + +void ScalarEnumerationTraits::enumeration( + IO &io, TrampolineType &Tramp) { + auto TrampNames = getTrampolineNames(); + for (const auto &E : TrampNames) { + io.enumCase(Tramp, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration(IO &io, + ThunkOrdinal &Ord) { + auto ThunkNames = getThunkOrdinalNames(); + for (const auto &E : ThunkNames) { + io.enumCase(Ord, E.Name.str().c_str(), static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration( + IO &io, FrameCookieKind &FC) { + auto ThunkNames = getFrameCookieKindNames(); + for (const auto &E : ThunkNames) { + io.enumCase(FC, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +namespace llvm { +namespace CodeViewYAML { +namespace detail { + +struct SymbolRecordBase { + codeview::SymbolKind Kind; + + explicit SymbolRecordBase(codeview::SymbolKind K) : Kind(K) {} + virtual ~SymbolRecordBase() = default; + + virtual void map(yaml::IO &io) = 0; + virtual codeview::CVSymbol + toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const = 0; + virtual Error fromCodeViewSymbol(codeview::CVSymbol Type) = 0; +}; + +template struct SymbolRecordImpl : public SymbolRecordBase { + explicit SymbolRecordImpl(codeview::SymbolKind K) + : SymbolRecordBase(K), Symbol(static_cast(K)) {} + + void map(yaml::IO &io) override; + + codeview::CVSymbol + toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const override { + return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Container); + } + + Error fromCodeViewSymbol(codeview::CVSymbol CVS) override { + return SymbolDeserializer::deserializeAs(CVS, Symbol); + } + + mutable T Symbol; +}; + +struct UnknownSymbolRecord : public SymbolRecordBase { + explicit UnknownSymbolRecord(codeview::SymbolKind K) : SymbolRecordBase(K) {} + + void map(yaml::IO &io) override; + + CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const override { + RecordPrefix Prefix; + uint32_t TotalLen = sizeof(RecordPrefix) + Data.size(); + Prefix.RecordKind = Kind; + Prefix.RecordLen = TotalLen - 2; + uint8_t *Buffer = Allocator.Allocate(TotalLen); + ::memcpy(Buffer, &Prefix, sizeof(RecordPrefix)); + ::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size()); + return CVSymbol(Kind, ArrayRef(Buffer, TotalLen)); + } + + Error fromCodeViewSymbol(CVSymbol CVS) override { + this->Kind = CVS.kind(); + Data = CVS.RecordData.drop_front(sizeof(RecordPrefix)); + return Error::success(); + } + + std::vector Data; +}; + +template <> void SymbolRecordImpl::map(IO &IO) {} + +void UnknownSymbolRecord::map(yaml::IO &io) { + yaml::BinaryRef Binary; + if (io.outputting()) + Binary = yaml::BinaryRef(Data); + io.mapRequired("Data", Binary); + if (!io.outputting()) { + std::string Str; + raw_string_ostream OS(Str); + Binary.writeAsBinary(OS); + OS.flush(); + Data.assign(Str.begin(), Str.end()); + } +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Parent", Symbol.Parent); + IO.mapRequired("End", Symbol.End); + IO.mapRequired("Next", Symbol.Next); + IO.mapRequired("Off", Symbol.Offset); + IO.mapRequired("Seg", Symbol.Segment); + IO.mapRequired("Len", Symbol.Length); + IO.mapRequired("Ordinal", Symbol.Thunk); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Size", Symbol.Size); + IO.mapRequired("ThunkOff", Symbol.ThunkOffset); + IO.mapRequired("TargetOff", Symbol.TargetOffset); + IO.mapRequired("ThunkSection", Symbol.ThunkSection); + IO.mapRequired("TargetSection", Symbol.TargetSection); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("SectionNumber", Symbol.SectionNumber); + IO.mapRequired("Alignment", Symbol.Alignment); + IO.mapRequired("Rva", Symbol.Rva); + IO.mapRequired("Length", Symbol.Length); + IO.mapRequired("Characteristics", Symbol.Characteristics); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Size", Symbol.Size); + IO.mapRequired("Characteristics", Symbol.Characteristics); + IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Segment", Symbol.Segment); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Ordinal", Symbol.Ordinal); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapOptional("PtrParent", Symbol.Parent, 0U); + IO.mapOptional("PtrEnd", Symbol.End, 0U); + IO.mapOptional("PtrNext", Symbol.Next, 0U); + IO.mapRequired("CodeSize", Symbol.CodeSize); + IO.mapRequired("DbgStart", Symbol.DbgStart); + IO.mapRequired("DbgEnd", Symbol.DbgEnd); + IO.mapRequired("FunctionType", Symbol.FunctionType); + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("DisplayName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Index); + IO.mapRequired("Seg", Symbol.Register); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Flags", Symbol.Flags); + IO.mapOptional("Offset", Symbol.Offset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("SumName", Symbol.SumName); + IO.mapRequired("SymOffset", Symbol.SymOffset); + IO.mapRequired("Mod", Symbol.Module); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Entries", Symbol.Fields); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapOptional("PtrParent", Symbol.Parent, 0U); + IO.mapOptional("PtrEnd", Symbol.End, 0U); + IO.mapRequired("Inlinee", Symbol.Inlinee); + // TODO: The binary annotations +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Flags", Symbol.Flags); + + IO.mapRequired("VarName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> +void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapOptional("PtrParent", Symbol.Parent, 0U); + IO.mapOptional("PtrEnd", Symbol.End, 0U); + IO.mapRequired("CodeSize", Symbol.CodeSize); + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("BlockName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("DisplayName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Signature", Symbol.Signature); + IO.mapRequired("ObjectName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Machine", Symbol.Machine); + IO.mapRequired("FrontendMajor", Symbol.VersionFrontendMajor); + IO.mapRequired("FrontendMinor", Symbol.VersionFrontendMinor); + IO.mapRequired("FrontendBuild", Symbol.VersionFrontendBuild); + IO.mapRequired("BackendMajor", Symbol.VersionBackendMajor); + IO.mapRequired("BackendMinor", Symbol.VersionBackendMinor); + IO.mapRequired("BackendBuild", Symbol.VersionBackendBuild); + IO.mapRequired("Version", Symbol.Version); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Machine", Symbol.Machine); + IO.mapRequired("FrontendMajor", Symbol.VersionFrontendMajor); + IO.mapRequired("FrontendMinor", Symbol.VersionFrontendMinor); + IO.mapRequired("FrontendBuild", Symbol.VersionFrontendBuild); + IO.mapRequired("FrontendQFE", Symbol.VersionFrontendQFE); + IO.mapRequired("BackendMajor", Symbol.VersionBackendMajor); + IO.mapRequired("BackendMinor", Symbol.VersionBackendMinor); + IO.mapRequired("BackendBuild", Symbol.VersionBackendBuild); + IO.mapRequired("BackendQFE", Symbol.VersionBackendQFE); + IO.mapRequired("Version", Symbol.Version); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("TotalFrameBytes", Symbol.TotalFrameBytes); + IO.mapRequired("PaddingFrameBytes", Symbol.PaddingFrameBytes); + IO.mapRequired("OffsetToPadding", Symbol.OffsetToPadding); + IO.mapRequired("BytesOfCalleeSavedRegisters", + Symbol.BytesOfCalleeSavedRegisters); + IO.mapRequired("OffsetOfExceptionHandler", Symbol.OffsetOfExceptionHandler); + IO.mapRequired("SectionIdOfExceptionHandler", + Symbol.SectionIdOfExceptionHandler); + IO.mapRequired("Flags", Symbol.Flags); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("Type", Symbol.Type); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Index", Symbol.Index); + IO.mapRequired("ModFilenameOffset", Symbol.ModFilenameOffset); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("CallInstructionSize", Symbol.CallInstructionSize); + IO.mapRequired("Type", Symbol.Type); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Register", Symbol.Register); + IO.mapRequired("CookieKind", Symbol.CookieKind); + IO.mapRequired("Flags", Symbol.Flags); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("FuncID", Symbol.Indices); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("UDTName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("BuildId", Symbol.BuildId); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("VarName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Register", Symbol.Register); + IO.mapRequired("VarName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Value", Symbol.Value); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapOptional("Offset", Symbol.DataOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("DisplayName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapOptional("Offset", Symbol.DataOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); + IO.mapRequired("DisplayName", Symbol.Name); +} + +} // end namespace detail +} // end namespace CodeViewYAML +} // end namespace llvm + +CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol( + BumpPtrAllocator &Allocator, CodeViewContainer Container) const { + return Symbol->toCodeViewSymbol(Allocator, Container); +} + +namespace llvm { +namespace yaml { + +template <> struct MappingTraits { + static void mapping(IO &io, SymbolRecordBase &Record) { Record.map(io); } +}; + +} // end namespace yaml +} // end namespace llvm + +template +static inline Expected +fromCodeViewSymbolImpl(CVSymbol Symbol) { + CodeViewYAML::SymbolRecord Result; + + auto Impl = std::make_shared(Symbol.kind()); + if (auto EC = Impl->fromCodeViewSymbol(Symbol)) + return std::move(EC); + Result.Symbol = Impl; + return Result; +} + +Expected +CodeViewYAML::SymbolRecord::fromCodeViewSymbol(CVSymbol Symbol) { +#define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + return fromCodeViewSymbolImpl>(Symbol); +#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + SYMBOL_RECORD(EnumName, EnumVal, ClassName) + switch (Symbol.kind()) { +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + return fromCodeViewSymbolImpl(Symbol); + } + return make_error(cv_error_code::corrupt_record); +} + +template +static void mapSymbolRecordImpl(IO &IO, const char *Class, SymbolKind Kind, + CodeViewYAML::SymbolRecord &Obj) { + if (!IO.outputting()) + Obj.Symbol = std::make_shared(Kind); + + IO.mapRequired(Class, *Obj.Symbol); +} + +void MappingTraits::mapping( + IO &IO, CodeViewYAML::SymbolRecord &Obj) { + SymbolKind Kind; + if (IO.outputting()) + Kind = Obj.Symbol->Kind; + IO.mapRequired("Kind", Kind); + +#define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + mapSymbolRecordImpl>(IO, #ClassName, Kind, \ + Obj); \ + break; +#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + SYMBOL_RECORD(EnumName, EnumVal, ClassName) + switch (Kind) { +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + mapSymbolRecordImpl(IO, "UnknownSym", Kind, Obj); + } +} diff --git a/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLTypes.cpp new file mode 100644 index 0000000000000..81046b217862c --- /dev/null +++ b/interpreter/llvm/src/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -0,0 +1,806 @@ +//===- CodeViewYAMLTypes.cpp - CodeView YAMLIO types implementation -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/CodeViewYAMLTypes.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::CodeViewYAML; +using namespace llvm::CodeViewYAML::detail; +using namespace llvm::yaml; + +LLVM_YAML_IS_SEQUENCE_VECTOR(OneMethodRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(VFTableSlotKind) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex) + +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false) +LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false) + +LLVM_YAML_DECLARE_ENUM_TRAITS(TypeLeafKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(PointerToMemberRepresentation) +LLVM_YAML_DECLARE_ENUM_TRAITS(VFTableSlotKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(CallingConvention) +LLVM_YAML_DECLARE_ENUM_TRAITS(PointerKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(PointerMode) +LLVM_YAML_DECLARE_ENUM_TRAITS(HfaKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(MemberAccess) +LLVM_YAML_DECLARE_ENUM_TRAITS(MethodKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(WindowsRTClassKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(LabelType) + +LLVM_YAML_DECLARE_BITSET_TRAITS(PointerOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(ModifierOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(FunctionOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(ClassOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(MethodOptions) + +LLVM_YAML_DECLARE_MAPPING_TRAITS(OneMethodRecord) +LLVM_YAML_DECLARE_MAPPING_TRAITS(MemberPointerInfo) + +namespace llvm { +namespace CodeViewYAML { +namespace detail { + +struct LeafRecordBase { + TypeLeafKind Kind; + + explicit LeafRecordBase(TypeLeafKind K) : Kind(K) {} + virtual ~LeafRecordBase() = default; + + virtual void map(yaml::IO &io) = 0; + virtual CVType toCodeViewRecord(TypeTableBuilder &TTB) const = 0; + virtual Error fromCodeViewRecord(CVType Type) = 0; +}; + +template struct LeafRecordImpl : public LeafRecordBase { + explicit LeafRecordImpl(TypeLeafKind K) + : LeafRecordBase(K), Record(static_cast(K)) {} + + void map(yaml::IO &io) override; + + Error fromCodeViewRecord(CVType Type) override { + return TypeDeserializer::deserializeAs(Type, Record); + } + + CVType toCodeViewRecord(TypeTableBuilder &TTB) const override { + TTB.writeKnownType(Record); + return CVType(Kind, TTB.records().back()); + } + + mutable T Record; +}; + +template <> struct LeafRecordImpl : public LeafRecordBase { + explicit LeafRecordImpl(TypeLeafKind K) : LeafRecordBase(K) {} + + void map(yaml::IO &io) override; + CVType toCodeViewRecord(TypeTableBuilder &TTB) const override; + Error fromCodeViewRecord(CVType Type) override; + + std::vector Members; +}; + +struct MemberRecordBase { + TypeLeafKind Kind; + + explicit MemberRecordBase(TypeLeafKind K) : Kind(K) {} + virtual ~MemberRecordBase() = default; + + virtual void map(yaml::IO &io) = 0; + virtual void writeTo(FieldListRecordBuilder &FLRB) = 0; +}; + +template struct MemberRecordImpl : public MemberRecordBase { + explicit MemberRecordImpl(TypeLeafKind K) + : MemberRecordBase(K), Record(static_cast(K)) {} + + void map(yaml::IO &io) override; + + void writeTo(FieldListRecordBuilder &FLRB) override { + FLRB.writeMemberType(Record); + } + + mutable T Record; +}; + +} // end namespace detail +} // end namespace CodeViewYAML +} // end namespace llvm + +void ScalarTraits::output(const GUID &G, void *, llvm::raw_ostream &OS) { + OS << G; +} + +StringRef ScalarTraits::input(StringRef Scalar, void *Ctx, GUID &S) { + if (Scalar.size() != 38) + return "GUID strings are 38 characters long"; + if (Scalar[0] != '{' || Scalar[37] != '}') + return "GUID is not enclosed in {}"; + if (Scalar[9] != '-' || Scalar[14] != '-' || Scalar[19] != '-' || + Scalar[24] != '-') + return "GUID sections are not properly delineated with dashes"; + + uint8_t *OutBuffer = S.Guid; + for (auto Iter = Scalar.begin(); Iter != Scalar.end();) { + if (*Iter == '-' || *Iter == '{' || *Iter == '}') { + ++Iter; + continue; + } + uint8_t Value = (llvm::hexDigitValue(*Iter++) << 4); + Value |= llvm::hexDigitValue(*Iter++); + *OutBuffer++ = Value; + } + + return ""; +} + +void ScalarTraits::output(const TypeIndex &S, void *, + raw_ostream &OS) { + OS << S.getIndex(); +} + +StringRef ScalarTraits::input(StringRef Scalar, void *Ctx, + TypeIndex &S) { + uint32_t I; + StringRef Result = ScalarTraits::input(Scalar, Ctx, I); + S.setIndex(I); + return Result; +} + +void ScalarTraits::output(const APSInt &S, void *, raw_ostream &OS) { + S.print(OS, S.isSigned()); +} + +StringRef ScalarTraits::input(StringRef Scalar, void *Ctx, APSInt &S) { + S = APSInt(Scalar); + return ""; +} + +void ScalarEnumerationTraits::enumeration(IO &io, + TypeLeafKind &Value) { +#define CV_TYPE(name, val) io.enumCase(Value, #name, name); +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +#undef CV_TYPE +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, PointerToMemberRepresentation &Value) { + IO.enumCase(Value, "Unknown", PointerToMemberRepresentation::Unknown); + IO.enumCase(Value, "SingleInheritanceData", + PointerToMemberRepresentation::SingleInheritanceData); + IO.enumCase(Value, "MultipleInheritanceData", + PointerToMemberRepresentation::MultipleInheritanceData); + IO.enumCase(Value, "VirtualInheritanceData", + PointerToMemberRepresentation::VirtualInheritanceData); + IO.enumCase(Value, "GeneralData", PointerToMemberRepresentation::GeneralData); + IO.enumCase(Value, "SingleInheritanceFunction", + PointerToMemberRepresentation::SingleInheritanceFunction); + IO.enumCase(Value, "MultipleInheritanceFunction", + PointerToMemberRepresentation::MultipleInheritanceFunction); + IO.enumCase(Value, "VirtualInheritanceFunction", + PointerToMemberRepresentation::VirtualInheritanceFunction); + IO.enumCase(Value, "GeneralFunction", + PointerToMemberRepresentation::GeneralFunction); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, VFTableSlotKind &Kind) { + IO.enumCase(Kind, "Near16", VFTableSlotKind::Near16); + IO.enumCase(Kind, "Far16", VFTableSlotKind::Far16); + IO.enumCase(Kind, "This", VFTableSlotKind::This); + IO.enumCase(Kind, "Outer", VFTableSlotKind::Outer); + IO.enumCase(Kind, "Meta", VFTableSlotKind::Meta); + IO.enumCase(Kind, "Near", VFTableSlotKind::Near); + IO.enumCase(Kind, "Far", VFTableSlotKind::Far); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, CallingConvention &Value) { + IO.enumCase(Value, "NearC", CallingConvention::NearC); + IO.enumCase(Value, "FarC", CallingConvention::FarC); + IO.enumCase(Value, "NearPascal", CallingConvention::NearPascal); + IO.enumCase(Value, "FarPascal", CallingConvention::FarPascal); + IO.enumCase(Value, "NearFast", CallingConvention::NearFast); + IO.enumCase(Value, "FarFast", CallingConvention::FarFast); + IO.enumCase(Value, "NearStdCall", CallingConvention::NearStdCall); + IO.enumCase(Value, "FarStdCall", CallingConvention::FarStdCall); + IO.enumCase(Value, "NearSysCall", CallingConvention::NearSysCall); + IO.enumCase(Value, "FarSysCall", CallingConvention::FarSysCall); + IO.enumCase(Value, "ThisCall", CallingConvention::ThisCall); + IO.enumCase(Value, "MipsCall", CallingConvention::MipsCall); + IO.enumCase(Value, "Generic", CallingConvention::Generic); + IO.enumCase(Value, "AlphaCall", CallingConvention::AlphaCall); + IO.enumCase(Value, "PpcCall", CallingConvention::PpcCall); + IO.enumCase(Value, "SHCall", CallingConvention::SHCall); + IO.enumCase(Value, "ArmCall", CallingConvention::ArmCall); + IO.enumCase(Value, "AM33Call", CallingConvention::AM33Call); + IO.enumCase(Value, "TriCall", CallingConvention::TriCall); + IO.enumCase(Value, "SH5Call", CallingConvention::SH5Call); + IO.enumCase(Value, "M32RCall", CallingConvention::M32RCall); + IO.enumCase(Value, "ClrCall", CallingConvention::ClrCall); + IO.enumCase(Value, "Inline", CallingConvention::Inline); + IO.enumCase(Value, "NearVector", CallingConvention::NearVector); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + PointerKind &Kind) { + IO.enumCase(Kind, "Near16", PointerKind::Near16); + IO.enumCase(Kind, "Far16", PointerKind::Far16); + IO.enumCase(Kind, "Huge16", PointerKind::Huge16); + IO.enumCase(Kind, "BasedOnSegment", PointerKind::BasedOnSegment); + IO.enumCase(Kind, "BasedOnValue", PointerKind::BasedOnValue); + IO.enumCase(Kind, "BasedOnSegmentValue", PointerKind::BasedOnSegmentValue); + IO.enumCase(Kind, "BasedOnAddress", PointerKind::BasedOnAddress); + IO.enumCase(Kind, "BasedOnSegmentAddress", + PointerKind::BasedOnSegmentAddress); + IO.enumCase(Kind, "BasedOnType", PointerKind::BasedOnType); + IO.enumCase(Kind, "BasedOnSelf", PointerKind::BasedOnSelf); + IO.enumCase(Kind, "Near32", PointerKind::Near32); + IO.enumCase(Kind, "Far32", PointerKind::Far32); + IO.enumCase(Kind, "Near64", PointerKind::Near64); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + PointerMode &Mode) { + IO.enumCase(Mode, "Pointer", PointerMode::Pointer); + IO.enumCase(Mode, "LValueReference", PointerMode::LValueReference); + IO.enumCase(Mode, "PointerToDataMember", PointerMode::PointerToDataMember); + IO.enumCase(Mode, "PointerToMemberFunction", + PointerMode::PointerToMemberFunction); + IO.enumCase(Mode, "RValueReference", PointerMode::RValueReference); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, HfaKind &Value) { + IO.enumCase(Value, "None", HfaKind::None); + IO.enumCase(Value, "Float", HfaKind::Float); + IO.enumCase(Value, "Double", HfaKind::Double); + IO.enumCase(Value, "Other", HfaKind::Other); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + MemberAccess &Access) { + IO.enumCase(Access, "None", MemberAccess::None); + IO.enumCase(Access, "Private", MemberAccess::Private); + IO.enumCase(Access, "Protected", MemberAccess::Protected); + IO.enumCase(Access, "Public", MemberAccess::Public); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + MethodKind &Kind) { + IO.enumCase(Kind, "Vanilla", MethodKind::Vanilla); + IO.enumCase(Kind, "Virtual", MethodKind::Virtual); + IO.enumCase(Kind, "Static", MethodKind::Static); + IO.enumCase(Kind, "Friend", MethodKind::Friend); + IO.enumCase(Kind, "IntroducingVirtual", MethodKind::IntroducingVirtual); + IO.enumCase(Kind, "PureVirtual", MethodKind::PureVirtual); + IO.enumCase(Kind, "PureIntroducingVirtual", + MethodKind::PureIntroducingVirtual); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, WindowsRTClassKind &Value) { + IO.enumCase(Value, "None", WindowsRTClassKind::None); + IO.enumCase(Value, "Ref", WindowsRTClassKind::RefClass); + IO.enumCase(Value, "Value", WindowsRTClassKind::ValueClass); + IO.enumCase(Value, "Interface", WindowsRTClassKind::Interface); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, LabelType &Value) { + IO.enumCase(Value, "Near", LabelType::Near); + IO.enumCase(Value, "Far", LabelType::Far); +} + +void ScalarBitSetTraits::bitset(IO &IO, + PointerOptions &Options) { + IO.bitSetCase(Options, "None", PointerOptions::None); + IO.bitSetCase(Options, "Flat32", PointerOptions::Flat32); + IO.bitSetCase(Options, "Volatile", PointerOptions::Volatile); + IO.bitSetCase(Options, "Const", PointerOptions::Const); + IO.bitSetCase(Options, "Unaligned", PointerOptions::Unaligned); + IO.bitSetCase(Options, "Restrict", PointerOptions::Restrict); + IO.bitSetCase(Options, "WinRTSmartPointer", + PointerOptions::WinRTSmartPointer); +} + +void ScalarBitSetTraits::bitset(IO &IO, + ModifierOptions &Options) { + IO.bitSetCase(Options, "None", ModifierOptions::None); + IO.bitSetCase(Options, "Const", ModifierOptions::Const); + IO.bitSetCase(Options, "Volatile", ModifierOptions::Volatile); + IO.bitSetCase(Options, "Unaligned", ModifierOptions::Unaligned); +} + +void ScalarBitSetTraits::bitset(IO &IO, + FunctionOptions &Options) { + IO.bitSetCase(Options, "None", FunctionOptions::None); + IO.bitSetCase(Options, "CxxReturnUdt", FunctionOptions::CxxReturnUdt); + IO.bitSetCase(Options, "Constructor", FunctionOptions::Constructor); + IO.bitSetCase(Options, "ConstructorWithVirtualBases", + FunctionOptions::ConstructorWithVirtualBases); +} + +void ScalarBitSetTraits::bitset(IO &IO, ClassOptions &Options) { + IO.bitSetCase(Options, "None", ClassOptions::None); + IO.bitSetCase(Options, "HasConstructorOrDestructor", + ClassOptions::HasConstructorOrDestructor); + IO.bitSetCase(Options, "HasOverloadedOperator", + ClassOptions::HasOverloadedOperator); + IO.bitSetCase(Options, "Nested", ClassOptions::Nested); + IO.bitSetCase(Options, "ContainsNestedClass", + ClassOptions::ContainsNestedClass); + IO.bitSetCase(Options, "HasOverloadedAssignmentOperator", + ClassOptions::HasOverloadedAssignmentOperator); + IO.bitSetCase(Options, "HasConversionOperator", + ClassOptions::HasConversionOperator); + IO.bitSetCase(Options, "ForwardReference", ClassOptions::ForwardReference); + IO.bitSetCase(Options, "Scoped", ClassOptions::Scoped); + IO.bitSetCase(Options, "HasUniqueName", ClassOptions::HasUniqueName); + IO.bitSetCase(Options, "Sealed", ClassOptions::Sealed); + IO.bitSetCase(Options, "Intrinsic", ClassOptions::Intrinsic); +} + +void ScalarBitSetTraits::bitset(IO &IO, MethodOptions &Options) { + IO.bitSetCase(Options, "None", MethodOptions::None); + IO.bitSetCase(Options, "Pseudo", MethodOptions::Pseudo); + IO.bitSetCase(Options, "NoInherit", MethodOptions::NoInherit); + IO.bitSetCase(Options, "NoConstruct", MethodOptions::NoConstruct); + IO.bitSetCase(Options, "CompilerGenerated", MethodOptions::CompilerGenerated); + IO.bitSetCase(Options, "Sealed", MethodOptions::Sealed); +} + +void MappingTraits::mapping(IO &IO, MemberPointerInfo &MPI) { + IO.mapRequired("ContainingType", MPI.ContainingType); + IO.mapRequired("Representation", MPI.Representation); +} + +namespace llvm { +namespace CodeViewYAML { +namespace detail { + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ModifiedType", Record.ModifiedType); + IO.mapRequired("Modifiers", Record.Modifiers); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ReturnType", Record.ReturnType); + IO.mapRequired("CallConv", Record.CallConv); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("ParameterCount", Record.ParameterCount); + IO.mapRequired("ArgumentList", Record.ArgumentList); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ReturnType", Record.ReturnType); + IO.mapRequired("ClassType", Record.ClassType); + IO.mapRequired("ThisType", Record.ThisType); + IO.mapRequired("CallConv", Record.CallConv); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("ParameterCount", Record.ParameterCount); + IO.mapRequired("ArgumentList", Record.ArgumentList); + IO.mapRequired("ThisPointerAdjustment", Record.ThisPointerAdjustment); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Mode", Record.Mode); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ClassType", Record.ClassType); + IO.mapRequired("FunctionType", Record.FunctionType); + IO.mapRequired("Name", Record.Name); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ArgIndices", Record.ArgIndices); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("StringIndices", Record.StringIndices); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ReferentType", Record.ReferentType); + IO.mapRequired("Attrs", Record.Attrs); + IO.mapOptional("MemberInfo", Record.MemberInfo); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ElementType", Record.ElementType); + IO.mapRequired("IndexType", Record.IndexType); + IO.mapRequired("Size", Record.Size); + IO.mapRequired("Name", Record.Name); +} + +void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("FieldList", Members); +} + +} // end namespace detail +} // end namespace CodeViewYAML +} // end namespace llvm + +namespace { + +class MemberRecordConversionVisitor : public TypeVisitorCallbacks { +public: + explicit MemberRecordConversionVisitor(std::vector &Records) + : Records(Records) {} + +#define TYPE_RECORD(EnumName, EnumVal, Name) +#define MEMBER_RECORD(EnumName, EnumVal, Name) \ + Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override { \ + return visitKnownMemberImpl(Record); \ + } +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +private: + template Error visitKnownMemberImpl(T &Record) { + TypeLeafKind K = static_cast(Record.getKind()); + auto Impl = std::make_shared>(K); + Impl->Record = Record; + Records.push_back(MemberRecord{Impl}); + return Error::success(); + } + + std::vector &Records; +}; + +} // end anonymous namespace + +Error LeafRecordImpl::fromCodeViewRecord(CVType Type) { + MemberRecordConversionVisitor V(Members); + return visitMemberRecordStream(Type.content(), V); +} + +CVType +LeafRecordImpl::toCodeViewRecord(TypeTableBuilder &TTB) const { + FieldListRecordBuilder FLRB(TTB); + FLRB.begin(); + for (const auto &Member : Members) { + Member.Member->writeTo(FLRB); + } + FLRB.end(true); + return CVType(Kind, TTB.records().back()); +} + +void MappingTraits::mapping(IO &io, OneMethodRecord &Record) { + io.mapRequired("Type", Record.Type); + io.mapRequired("Attrs", Record.Attrs.Attrs); + io.mapRequired("VFTableOffset", Record.VFTableOffset); + io.mapRequired("Name", Record.Name); +} + +namespace llvm { +namespace CodeViewYAML { +namespace detail { + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("MemberCount", Record.MemberCount); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("FieldList", Record.FieldList); + IO.mapRequired("Name", Record.Name); + IO.mapRequired("UniqueName", Record.UniqueName); + IO.mapRequired("DerivationList", Record.DerivationList); + IO.mapRequired("VTableShape", Record.VTableShape); + IO.mapRequired("Size", Record.Size); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("MemberCount", Record.MemberCount); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("FieldList", Record.FieldList); + IO.mapRequired("Name", Record.Name); + IO.mapRequired("UniqueName", Record.UniqueName); + IO.mapRequired("Size", Record.Size); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("NumEnumerators", Record.MemberCount); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("FieldList", Record.FieldList); + IO.mapRequired("Name", Record.Name); + IO.mapRequired("UniqueName", Record.UniqueName); + IO.mapRequired("UnderlyingType", Record.UnderlyingType); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Record.Type); + IO.mapRequired("BitSize", Record.BitSize); + IO.mapRequired("BitOffset", Record.BitOffset); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Slots", Record.Slots); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Guid", Record.Guid); + IO.mapRequired("Age", Record.Age); + IO.mapRequired("Name", Record.Name); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Id", Record.Id); + IO.mapRequired("String", Record.String); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ParentScope", Record.ParentScope); + IO.mapRequired("FunctionType", Record.FunctionType); + IO.mapRequired("Name", Record.Name); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("UDT", Record.UDT); + IO.mapRequired("SourceFile", Record.SourceFile); + IO.mapRequired("LineNumber", Record.LineNumber); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("UDT", Record.UDT); + IO.mapRequired("SourceFile", Record.SourceFile); + IO.mapRequired("LineNumber", Record.LineNumber); + IO.mapRequired("Module", Record.Module); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ArgIndices", Record.ArgIndices); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("CompleteClass", Record.CompleteClass); + IO.mapRequired("OverriddenVFTable", Record.OverriddenVFTable); + IO.mapRequired("VFPtrOffset", Record.VFPtrOffset); + IO.mapRequired("MethodNames", Record.MethodNames); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Methods", Record.Methods); +} + +template <> void MemberRecordImpl::map(IO &IO) { + MappingTraits::mapping(IO, Record); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("NumOverloads", Record.NumOverloads); + IO.mapRequired("MethodList", Record.MethodList); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Record.Type); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Type", Record.Type); + IO.mapRequired("FieldOffset", Record.FieldOffset); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Type", Record.Type); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Value", Record.Value); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Record.Type); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Type", Record.Type); + IO.mapRequired("Offset", Record.Offset); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("BaseType", Record.BaseType); + IO.mapRequired("VBPtrType", Record.VBPtrType); + IO.mapRequired("VBPtrOffset", Record.VBPtrOffset); + IO.mapRequired("VTableIndex", Record.VTableIndex); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("ContinuationIndex", Record.ContinuationIndex); +} + +} // end namespace detail +} // end namespace CodeViewYAML +} // end namespace llvm + +template +static inline Expected fromCodeViewRecordImpl(CVType Type) { + LeafRecord Result; + + auto Impl = std::make_shared>(Type.kind()); + if (auto EC = Impl->fromCodeViewRecord(Type)) + return std::move(EC); + Result.Leaf = Impl; + return Result; +} + +Expected LeafRecord::fromCodeViewRecord(CVType Type) { +#define TYPE_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + return fromCodeViewRecordImpl(Type); +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + TYPE_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) + switch (Type.kind()) { +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: + llvm_unreachable("Unknown leaf kind!"); + } + return make_error(cv_error_code::corrupt_record); +} + +CVType LeafRecord::toCodeViewRecord(BumpPtrAllocator &Alloc) const { + TypeTableBuilder TTB(Alloc); + return Leaf->toCodeViewRecord(TTB); +} + +CVType LeafRecord::toCodeViewRecord(TypeTableBuilder &TTB) const { + return Leaf->toCodeViewRecord(TTB); +} + +namespace llvm { +namespace yaml { + +template <> struct MappingTraits { + static void mapping(IO &io, LeafRecordBase &Record) { Record.map(io); } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, MemberRecordBase &Record) { Record.map(io); } +}; + +} // end namespace yaml +} // end namespace llvm + +template +static void mapLeafRecordImpl(IO &IO, const char *Class, TypeLeafKind Kind, + LeafRecord &Obj) { + if (!IO.outputting()) + Obj.Leaf = std::make_shared>(Kind); + + if (Kind == LF_FIELDLIST) + Obj.Leaf->map(IO); + else + IO.mapRequired(Class, *Obj.Leaf); +} + +void MappingTraits::mapping(IO &IO, LeafRecord &Obj) { + TypeLeafKind Kind; + if (IO.outputting()) + Kind = Obj.Leaf->Kind; + IO.mapRequired("Kind", Kind); + +#define TYPE_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + mapLeafRecordImpl(IO, #ClassName, Kind, Obj); \ + break; +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + TYPE_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) + switch (Kind) { +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: { llvm_unreachable("Unknown leaf kind!"); } + } +} + +template +static void mapMemberRecordImpl(IO &IO, const char *Class, TypeLeafKind Kind, + MemberRecord &Obj) { + if (!IO.outputting()) + Obj.Member = std::make_shared>(Kind); + + IO.mapRequired(Class, *Obj.Member); +} + +void MappingTraits::mapping(IO &IO, MemberRecord &Obj) { + TypeLeafKind Kind; + if (IO.outputting()) + Kind = Obj.Member->Kind; + IO.mapRequired("Kind", Kind); + +#define MEMBER_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + mapMemberRecordImpl(IO, #ClassName, Kind, Obj); \ + break; +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + MEMBER_RECORD(EnumName, EnumVal, ClassName) +#define TYPE_RECORD(EnumName, EnumVal, ClassName) +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) + switch (Kind) { +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: { llvm_unreachable("Unknown member kind!"); } + } +} + +std::vector +llvm::CodeViewYAML::fromDebugT(ArrayRef DebugT) { + ExitOnError Err("Invalid .debug$T section!"); + BinaryStreamReader Reader(DebugT, support::little); + CVTypeArray Types; + uint32_t Magic; + + Err(Reader.readInteger(Magic)); + assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$T section!"); + + std::vector Result; + Err(Reader.readArray(Types, Reader.bytesRemaining())); + for (const auto &T : Types) { + auto CVT = Err(LeafRecord::fromCodeViewRecord(T)); + Result.push_back(CVT); + } + return Result; +} + +ArrayRef llvm::CodeViewYAML::toDebugT(ArrayRef Leafs, + BumpPtrAllocator &Alloc) { + TypeTableBuilder TTB(Alloc, false); + uint32_t Size = sizeof(uint32_t); + for (const auto &Leaf : Leafs) { + CVType T = Leaf.toCodeViewRecord(TTB); + Size += T.length(); + assert(T.length() % 4 == 0 && "Improper type record alignment!"); + } + uint8_t *ResultBuffer = Alloc.Allocate(Size); + MutableArrayRef Output(ResultBuffer, Size); + BinaryStreamWriter Writer(Output, support::little); + ExitOnError Err("Error writing type record to .debug$T section"); + Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); + for (const auto &R : TTB.records()) { + Err(Writer.writeBytes(R)); + } + assert(Writer.bytesRemaining() == 0 && "Didn't write all type record bytes!"); + return Output; +} diff --git a/interpreter/llvm/src/lib/ObjectYAML/DWARFEmitter.cpp b/interpreter/llvm/src/lib/ObjectYAML/DWARFEmitter.cpp index 1aa1519b708ba..89fc652035ca9 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/DWARFEmitter.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/DWARFEmitter.cpp @@ -13,15 +13,25 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/DWARFEmitter.h" +#include "DWARFVisitor.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SwapByteOrder.h" - -#include "DWARFVisitor.h" - +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" #include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -127,7 +137,7 @@ class DumpVisitor : public DWARFYAML::ConstVisitor { raw_ostream &OS; protected: - virtual void onStartCompileUnit(const DWARFYAML::Unit &CU) { + void onStartCompileUnit(const DWARFYAML::Unit &CU) override { writeInitialLength(CU.Length, OS, DebugInfo.IsLittleEndian); writeInteger((uint16_t)CU.Version, OS, DebugInfo.IsLittleEndian); if(CU.Version >= 5) { @@ -141,41 +151,43 @@ class DumpVisitor : public DWARFYAML::ConstVisitor { } - virtual void onStartDIE(const DWARFYAML::Unit &CU, - const DWARFYAML::Entry &DIE) { + void onStartDIE(const DWARFYAML::Unit &CU, + const DWARFYAML::Entry &DIE) override { encodeULEB128(DIE.AbbrCode, OS); } - virtual void onValue(const uint8_t U) { + void onValue(const uint8_t U) override { writeInteger(U, OS, DebugInfo.IsLittleEndian); } - virtual void onValue(const uint16_t U) { + void onValue(const uint16_t U) override { writeInteger(U, OS, DebugInfo.IsLittleEndian); } - virtual void onValue(const uint32_t U) { + + void onValue(const uint32_t U) override { writeInteger(U, OS, DebugInfo.IsLittleEndian); } - virtual void onValue(const uint64_t U, const bool LEB = false) { + + void onValue(const uint64_t U, const bool LEB = false) override { if (LEB) encodeULEB128(U, OS); else writeInteger(U, OS, DebugInfo.IsLittleEndian); } - virtual void onValue(const int64_t S, const bool LEB = false) { + void onValue(const int64_t S, const bool LEB = false) override { if (LEB) encodeSLEB128(S, OS); else writeInteger(S, OS, DebugInfo.IsLittleEndian); } - virtual void onValue(const StringRef String) { + void onValue(const StringRef String) override { OS.write(String.data(), String.size()); OS.write('\0'); } - virtual void onValue(const MemoryBufferRef MBR) { + void onValue(const MemoryBufferRef MBR) override { OS.write(MBR.getBufferStart(), MBR.getBufferSize()); } @@ -280,7 +292,7 @@ void DWARFYAML::EmitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) { } } -typedef void (*EmitFuncType)(raw_ostream &, const DWARFYAML::Data &); +using EmitFuncType = void (*)(raw_ostream &, const DWARFYAML::Data &); static void EmitDebugSectionImpl(const DWARFYAML::Data &DI, EmitFuncType EmitFunc, diff --git a/interpreter/llvm/src/lib/ObjectYAML/DWARFVisitor.h b/interpreter/llvm/src/lib/ObjectYAML/DWARFVisitor.h index 263e36220a05b..81ef412eb7e69 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/DWARFVisitor.h +++ b/interpreter/llvm/src/lib/ObjectYAML/DWARFVisitor.h @@ -13,7 +13,7 @@ #define LLVM_OBJECTYAML_DWARFVISITOR_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/MemoryBuffer.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/ObjectYAML/DWARFYAML.cpp b/interpreter/llvm/src/lib/ObjectYAML/DWARFYAML.cpp index edb9545f14b13..d6c09e1a35d73 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/DWARFYAML.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/DWARFYAML.cpp @@ -171,6 +171,6 @@ void MappingTraits::mapping( IO.mapRequired("TotalLength64", InitialLength.TotalLength64); } -} // namespace llvm::yaml +} // end namespace yaml -} // namespace llvm +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/ObjectYAML/ELFYAML.cpp b/interpreter/llvm/src/lib/ObjectYAML/ELFYAML.cpp index 3052901da45ca..39741dab327a5 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/ELFYAML.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/ELFYAML.cpp @@ -12,12 +12,18 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/ELFYAML.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MipsABIFlags.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include namespace llvm { -ELFYAML::Section::~Section() {} +ELFYAML::Section::~Section() = default; namespace yaml { @@ -372,6 +378,7 @@ void ScalarEnumerationTraits::enumeration( ECase(SHT_GROUP); ECase(SHT_SYMTAB_SHNDX); ECase(SHT_LOOS); + ECase(SHT_LLVM_ODRTAB); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); ECase(SHT_GNU_verdef); @@ -424,12 +431,6 @@ void ScalarBitSetTraits::bitset(IO &IO, case ELF::EM_ARM: BCase(SHF_ARM_PURECODE); break; - case ELF::EM_AMDGPU: - BCase(SHF_AMDGPU_HSA_GLOBAL); - BCase(SHF_AMDGPU_HSA_READONLY); - BCase(SHF_AMDGPU_HSA_CODE); - BCase(SHF_AMDGPU_HSA_AGENT); - break; case ELF::EM_HEXAGON: BCase(SHF_HEX_GPREL); break; @@ -513,40 +514,41 @@ void ScalarEnumerationTraits::enumeration( #define ELF_RELOC(X, Y) IO.enumCase(Value, #X, ELF::X); switch (Object->Header.Machine) { case ELF::EM_X86_64: -#include "llvm/Support/ELFRelocs/x86_64.def" +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" break; case ELF::EM_MIPS: -#include "llvm/Support/ELFRelocs/Mips.def" +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" break; case ELF::EM_HEXAGON: -#include "llvm/Support/ELFRelocs/Hexagon.def" +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" break; case ELF::EM_386: case ELF::EM_IAMCU: -#include "llvm/Support/ELFRelocs/i386.def" +#include "llvm/BinaryFormat/ELFRelocs/i386.def" break; case ELF::EM_AARCH64: -#include "llvm/Support/ELFRelocs/AArch64.def" +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" break; case ELF::EM_ARM: -#include "llvm/Support/ELFRelocs/ARM.def" +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" break; case ELF::EM_RISCV: -#include "llvm/Support/ELFRelocs/RISCV.def" +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" break; case ELF::EM_LANAI: -#include "llvm/Support/ELFRelocs/Lanai.def" +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" break; case ELF::EM_AMDGPU: -#include "llvm/Support/ELFRelocs/AMDGPU.def" +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" break; case ELF::EM_BPF: -#include "llvm/Support/ELFRelocs/BPF.def" +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" break; default: llvm_unreachable("Unsupported architecture"); } #undef ELF_RELOC + IO.enumFallback(Value); } void ScalarEnumerationTraits::enumeration( @@ -648,6 +650,7 @@ void MappingTraits::mapping(IO &IO, } namespace { + struct NormalizedOther { NormalizedOther(IO &) : Visibility(ELFYAML::ELF_STV(0)), Other(ELFYAML::ELF_STO(0)) {} @@ -659,7 +662,8 @@ struct NormalizedOther { ELFYAML::ELF_STV Visibility; ELFYAML::ELF_STO Other; }; -} + +} // end anonymous namespace void MappingTraits::mapping(IO &IO, ELFYAML::Symbol &Symbol) { IO.mapOptional("Name", Symbol.Name, StringRef()); @@ -782,6 +786,7 @@ StringRef MappingTraits>::validate( } namespace { + struct NormalizedMips64RelType { NormalizedMips64RelType(IO &) : Type(ELFYAML::ELF_REL(ELF::R_MIPS_NONE)), @@ -802,7 +807,8 @@ struct NormalizedMips64RelType { ELFYAML::ELF_REL Type3; ELFYAML::ELF_RSS SpecSym; }; -} + +} // end anonymous namespace void MappingTraits::mapping(IO &IO, ELFYAML::Relocation &Rel) { @@ -843,4 +849,5 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_ASE) LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_FLAGS1) } // end namespace yaml + } // end namespace llvm diff --git a/interpreter/llvm/src/lib/ObjectYAML/LLVMBuild.txt b/interpreter/llvm/src/lib/ObjectYAML/LLVMBuild.txt index b8d1d2f1779e3..44657e916a915 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/ObjectYAML/LLVMBuild.txt @@ -11,4 +11,4 @@ type = Library name = ObjectYAML parent = Libraries -required_libraries = Support +required_libraries = Support DebugInfoCodeView diff --git a/interpreter/llvm/src/lib/ObjectYAML/MachOYAML.cpp b/interpreter/llvm/src/lib/ObjectYAML/MachOYAML.cpp index 6b0e4e3762d05..ab452a7bf6ef4 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/MachOYAML.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/MachOYAML.cpp @@ -12,16 +12,19 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/MachOYAML.h" -#include "llvm/Support/Casting.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" -#include "llvm/Support/MachO.h" - -#include // For memcpy, memset and strnlen. +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include namespace llvm { -MachOYAML::LoadCommand::~LoadCommand() {} +MachOYAML::LoadCommand::~LoadCommand() = default; bool MachOYAML::LinkEditData::isEmpty() const { return 0 == @@ -33,7 +36,7 @@ bool MachOYAML::LinkEditData::isEmpty() const { namespace yaml { void ScalarTraits::output(const char_16 &Val, void *, - llvm::raw_ostream &Out) { + raw_ostream &Out) { auto Len = strnlen(&Val[0], 16); Out << StringRef(&Val[0], Len); } @@ -51,8 +54,7 @@ StringRef ScalarTraits::input(StringRef Scalar, void *, char_16 &Val) { bool ScalarTraits::mustQuote(StringRef S) { return needsQuotes(S); } -void ScalarTraits::output(const uuid_t &Val, void *, - llvm::raw_ostream &Out) { +void ScalarTraits::output(const uuid_t &Val, void *, raw_ostream &Out) { for (int Idx = 0; Idx < 16; ++Idx) { Out << format("%02" PRIX32, Val[Idx]); if (Idx == 3 || Idx == 5 || Idx == 7 || Idx == 9) @@ -154,7 +156,7 @@ void MappingTraits::mapping( IO.mapOptional("BindOpcodes", LinkEditData.BindOpcodes); IO.mapOptional("WeakBindOpcodes", LinkEditData.WeakBindOpcodes); IO.mapOptional("LazyBindOpcodes", LinkEditData.LazyBindOpcodes); - if(LinkEditData.ExportTrie.Children.size() > 0 || !IO.outputting()) + if (!LinkEditData.ExportTrie.Children.empty() || !IO.outputting()) IO.mapOptional("ExportTrie", LinkEditData.ExportTrie); IO.mapOptional("NameList", LinkEditData.NameList); IO.mapOptional("StringTable", LinkEditData.StringTable); @@ -252,7 +254,7 @@ void MappingTraits::mapping( break; switch (LoadCommand.Data.load_command_data.cmd) { -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" } IO.mapOptional("PayloadBytes", LoadCommand.PayloadBytes); IO.mapOptional("ZeroPadBytes", LoadCommand.ZeroPadBytes, (uint64_t)0ull); @@ -308,13 +310,11 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::dylinker_command &LoadCommand) { - IO.mapRequired("name", LoadCommand.name); } void MappingTraits::mapping( IO &IO, MachO::dysymtab_command &LoadCommand) { - IO.mapRequired("ilocalsym", LoadCommand.ilocalsym); IO.mapRequired("nlocalsym", LoadCommand.nlocalsym); IO.mapRequired("iextdefsym", LoadCommand.iextdefsym); @@ -337,7 +337,6 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::encryption_info_command &LoadCommand) { - IO.mapRequired("cryptoff", LoadCommand.cryptoff); IO.mapRequired("cryptsize", LoadCommand.cryptsize); IO.mapRequired("cryptid", LoadCommand.cryptid); @@ -345,7 +344,6 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::encryption_info_command_64 &LoadCommand) { - IO.mapRequired("cryptoff", LoadCommand.cryptoff); IO.mapRequired("cryptsize", LoadCommand.cryptsize); IO.mapRequired("cryptid", LoadCommand.cryptid); @@ -354,14 +352,12 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::entry_point_command &LoadCommand) { - IO.mapRequired("entryoff", LoadCommand.entryoff); IO.mapRequired("stacksize", LoadCommand.stacksize); } void MappingTraits::mapping( IO &IO, MachO::fvmfile_command &LoadCommand) { - IO.mapRequired("name", LoadCommand.name); IO.mapRequired("header_addr", LoadCommand.header_addr); } @@ -374,7 +370,6 @@ void MappingTraits::mapping(IO &IO, MachO::fvmlib &FVMLib) { void MappingTraits::mapping( IO &IO, MachO::fvmlib_command &LoadCommand) { - IO.mapRequired("fvmlib", LoadCommand.fvmlib); } @@ -383,20 +378,17 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::linkedit_data_command &LoadCommand) { - IO.mapRequired("dataoff", LoadCommand.dataoff); IO.mapRequired("datasize", LoadCommand.datasize); } void MappingTraits::mapping( IO &IO, MachO::linker_option_command &LoadCommand) { - IO.mapRequired("count", LoadCommand.count); } void MappingTraits::mapping( IO &IO, MachO::prebind_cksum_command &LoadCommand) { - IO.mapRequired("cksum", LoadCommand.cksum); } @@ -405,7 +397,6 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::prebound_dylib_command &LoadCommand) { - IO.mapRequired("name", LoadCommand.name); IO.mapRequired("nmodules", LoadCommand.nmodules); IO.mapRequired("linked_modules", LoadCommand.linked_modules); @@ -413,7 +404,6 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::routines_command &LoadCommand) { - IO.mapRequired("init_address", LoadCommand.init_address); IO.mapRequired("init_module", LoadCommand.init_module); IO.mapRequired("reserved1", LoadCommand.reserved1); @@ -426,7 +416,6 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::routines_command_64 &LoadCommand) { - IO.mapRequired("init_address", LoadCommand.init_address); IO.mapRequired("init_module", LoadCommand.init_module); IO.mapRequired("reserved1", LoadCommand.reserved1); @@ -439,7 +428,6 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::rpath_command &LoadCommand) { - IO.mapRequired("path", LoadCommand.path); } @@ -475,7 +463,6 @@ void MappingTraits::mapping(IO &IO, void MappingTraits::mapping( IO &IO, MachO::segment_command &LoadCommand) { - IO.mapRequired("segname", LoadCommand.segname); IO.mapRequired("vmaddr", LoadCommand.vmaddr); IO.mapRequired("vmsize", LoadCommand.vmsize); @@ -489,7 +476,6 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::segment_command_64 &LoadCommand) { - IO.mapRequired("segname", LoadCommand.segname); IO.mapRequired("vmaddr", LoadCommand.vmaddr); IO.mapRequired("vmsize", LoadCommand.vmsize); @@ -503,44 +489,37 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::source_version_command &LoadCommand) { - IO.mapRequired("version", LoadCommand.version); } void MappingTraits::mapping( IO &IO, MachO::sub_client_command &LoadCommand) { - IO.mapRequired("client", LoadCommand.client); } void MappingTraits::mapping( IO &IO, MachO::sub_framework_command &LoadCommand) { - IO.mapRequired("umbrella", LoadCommand.umbrella); } void MappingTraits::mapping( IO &IO, MachO::sub_library_command &LoadCommand) { - IO.mapRequired("sub_library", LoadCommand.sub_library); } void MappingTraits::mapping( IO &IO, MachO::sub_umbrella_command &LoadCommand) { - IO.mapRequired("sub_umbrella", LoadCommand.sub_umbrella); } void MappingTraits::mapping( IO &IO, MachO::symseg_command &LoadCommand) { - IO.mapRequired("offset", LoadCommand.offset); IO.mapRequired("size", LoadCommand.size); } void MappingTraits::mapping( IO &IO, MachO::symtab_command &LoadCommand) { - IO.mapRequired("symoff", LoadCommand.symoff); IO.mapRequired("nsyms", LoadCommand.nsyms); IO.mapRequired("stroff", LoadCommand.stroff); @@ -552,27 +531,23 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::twolevel_hints_command &LoadCommand) { - IO.mapRequired("offset", LoadCommand.offset); IO.mapRequired("nhints", LoadCommand.nhints); } void MappingTraits::mapping( IO &IO, MachO::uuid_command &LoadCommand) { - IO.mapRequired("uuid", LoadCommand.uuid); } void MappingTraits::mapping( IO &IO, MachO::version_min_command &LoadCommand) { - IO.mapRequired("version", LoadCommand.version); IO.mapRequired("sdk", LoadCommand.sdk); } void MappingTraits::mapping( IO &IO, MachO::note_command &LoadCommand) { - IO.mapRequired("data_owner", LoadCommand.data_owner); IO.mapRequired("offset", LoadCommand.offset); IO.mapRequired("size", LoadCommand.size); @@ -580,13 +555,12 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, MachO::build_version_command &LoadCommand) { - IO.mapRequired("platform", LoadCommand.platform); IO.mapRequired("minos", LoadCommand.minos); IO.mapRequired("sdk", LoadCommand.sdk); IO.mapRequired("ntools", LoadCommand.ntools); } -} // namespace llvm::yaml +} // end namespace yaml -} // namespace llvm +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/ObjectYAML/ObjectYAML.cpp b/interpreter/llvm/src/lib/ObjectYAML/ObjectYAML.cpp index 74581c1ecaacc..850c1a5a06c02 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/ObjectYAML.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/ObjectYAML.cpp @@ -11,8 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ObjectYAML/YAML.h" #include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/YAMLParser.h" +#include "llvm/Support/YAMLTraits.h" +#include using namespace llvm; using namespace yaml; @@ -53,8 +56,8 @@ void MappingTraits::mapping(IO &IO, IO.setError("YAML Object File missing document type tag!"); else IO.setError( - llvm::Twine("YAML Object File unsupported document type tag '") + - llvm::Twine(Tag) + llvm::Twine("'!")); + Twine("YAML Object File unsupported document type tag '") + + Twine(Tag) + Twine("'!")); } } } diff --git a/interpreter/llvm/src/lib/ObjectYAML/WasmYAML.cpp b/interpreter/llvm/src/lib/ObjectYAML/WasmYAML.cpp index 910d32f16af97..6a68cd265ad84 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/WasmYAML.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/WasmYAML.cpp @@ -12,9 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/WasmYAML.h" -#include "llvm/Object/Wasm.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MipsABIFlags.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/YAMLTraits.h" namespace llvm { @@ -22,7 +23,7 @@ namespace WasmYAML { // Declared here rather than in the header to comply with: // http://llvm.org/docs/CodingStandards.html#provide-a-virtual-method-anchor-for-classes-in-headers -Section::~Section() {} +Section::~Section() = default; } // end namespace WasmYAML @@ -47,14 +48,24 @@ static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) { IO.mapOptional("Relocations", Section.Relocations); } +static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) { + commonSectionMapping(IO, Section); + IO.mapRequired("Name", Section.Name); + IO.mapOptional("FunctionNames", Section.FunctionNames); +} + +static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) { + commonSectionMapping(IO, Section); + IO.mapRequired("Name", Section.Name); + IO.mapRequired("DataSize", Section.DataSize); + IO.mapRequired("DataAlignment", Section.DataAlignment); + IO.mapRequired("SymbolInfo", Section.SymbolInfos); +} + static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Name", Section.Name); - if (Section.Name == "name") { - IO.mapOptional("FunctionNames", Section.FunctionNames); - } else { - IO.mapRequired("Payload", Section.Payload); - } + IO.mapRequired("Payload", Section.Payload); } static void sectionMapping(IO &IO, WasmYAML::TypeSection &Section) { @@ -121,11 +132,29 @@ void MappingTraits>::mapping( IO.mapRequired("Type", SectionType); switch (SectionType) { - case wasm::WASM_SEC_CUSTOM: - if (!IO.outputting()) - Section.reset(new WasmYAML::CustomSection()); - sectionMapping(IO, *cast(Section.get())); + case wasm::WASM_SEC_CUSTOM: { + StringRef SectionName; + if (IO.outputting()) { + auto CustomSection = cast(Section.get()); + SectionName = CustomSection->Name; + } else { + IO.mapRequired("Name", SectionName); + } + if (SectionName == "linking") { + if (!IO.outputting()) + Section.reset(new WasmYAML::LinkingSection()); + sectionMapping(IO, *cast(Section.get())); + } else if (SectionName == "name") { + if (!IO.outputting()) + Section.reset(new WasmYAML::NameSection()); + sectionMapping(IO, *cast(Section.get())); + } else { + if (!IO.outputting()) + Section.reset(new WasmYAML::CustomSection(SectionName)); + sectionMapping(IO, *cast(Section.get())); + } break; + } case wasm::WASM_SEC_TYPE: if (!IO.outputting()) Section.reset(new WasmYAML::TypeSection()); @@ -316,11 +345,18 @@ void MappingTraits::mapping(IO &IO, void MappingTraits::mapping( IO &IO, WasmYAML::DataSegment &Segment) { - IO.mapRequired("Index", Segment.Index); + IO.mapOptional("SectionOffset", Segment.SectionOffset); + IO.mapRequired("MemoryIndex", Segment.MemoryIndex); IO.mapRequired("Offset", Segment.Offset); IO.mapRequired("Content", Segment.Content); } +void MappingTraits::mapping(IO &IO, + WasmYAML::SymbolInfo &Info) { + IO.mapRequired("Name", Info.Name); + IO.mapRequired("Flags", Info.Flags); +} + void ScalarEnumerationTraits::enumeration( IO &IO, WasmYAML::ValueType &Type) { #define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X); @@ -366,9 +402,10 @@ void ScalarEnumerationTraits::enumeration( void ScalarEnumerationTraits::enumeration( IO &IO, WasmYAML::RelocType &Type) { #define WASM_RELOC(name, value) IO.enumCase(Type, #name, wasm::name); -#include "llvm/Support/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" #undef WASM_RELOC } } // end namespace yaml + } // end namespace llvm diff --git a/interpreter/llvm/src/lib/ObjectYAML/YAML.cpp b/interpreter/llvm/src/lib/ObjectYAML/YAML.cpp index 75cf1fbccc800..67b5764eadaa4 100644 --- a/interpreter/llvm/src/lib/ObjectYAML/YAML.cpp +++ b/interpreter/llvm/src/lib/ObjectYAML/YAML.cpp @@ -16,11 +16,12 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; void yaml::ScalarTraits::output( - const yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) { + const yaml::BinaryRef &Val, void *, raw_ostream &Out) { Val.writeAsHex(Out); } @@ -34,7 +35,7 @@ StringRef yaml::ScalarTraits::input(StringRef Scalar, void *, if (!isxdigit(Scalar[I])) return "BinaryRef hex string must contain only hex digits."; Val = yaml::BinaryRef(Scalar); - return StringRef(); + return {}; } void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const { diff --git a/interpreter/llvm/src/lib/Option/Arg.cpp b/interpreter/llvm/src/lib/Option/Arg.cpp index 3e8a1d802314b..e581fee8bf381 100644 --- a/interpreter/llvm/src/lib/Option/Arg.cpp +++ b/interpreter/llvm/src/lib/Option/Arg.cpp @@ -1,4 +1,4 @@ -//===--- Arg.cpp - Argument Implementations -------------------------------===// +//===- Arg.cpp - Argument Implementations ---------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/Arg.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" +#include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::opt; @@ -67,7 +67,7 @@ LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); } std::string Arg::getAsString(const ArgList &Args) const { SmallString<256> Res; - llvm::raw_svector_ostream OS(Res); + raw_svector_ostream OS(Res); ArgStringList ASL; render(Args, ASL); @@ -98,7 +98,7 @@ void Arg::render(const ArgList &Args, ArgStringList &Output) const { case Option::RenderCommaJoinedStyle: { SmallString<256> Res; - llvm::raw_svector_ostream OS(Res); + raw_svector_ostream OS(Res); OS << getSpelling(); for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ','; diff --git a/interpreter/llvm/src/lib/Option/ArgList.cpp b/interpreter/llvm/src/lib/Option/ArgList.cpp index 39dbce87f9ae0..cbccc1935d3c2 100644 --- a/interpreter/llvm/src/lib/Option/ArgList.cpp +++ b/interpreter/llvm/src/lib/Option/ArgList.cpp @@ -1,4 +1,4 @@ -//===--- ArgList.cpp - Argument List Management ---------------------------===// +//===- ArgList.cpp - Argument List Management -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/ArgList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Option/OptSpecifier.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::opt; @@ -197,8 +208,6 @@ void ArgList::print(raw_ostream &O) const { LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); } #endif -// - void InputArgList::releaseMemory() { // An InputArgList always owns its arguments. for (Arg *A : *this) @@ -234,8 +243,6 @@ const char *InputArgList::MakeArgStringRef(StringRef Str) const { return getArgString(MakeIndex(Str)); } -// - DerivedArgList::DerivedArgList(const InputArgList &BaseArgs) : BaseArgs(BaseArgs) {} diff --git a/interpreter/llvm/src/lib/Option/OptTable.cpp b/interpreter/llvm/src/lib/Option/OptTable.cpp index 7eafb00855d77..51c62d33f8e16 100644 --- a/interpreter/llvm/src/lib/Option/OptTable.cpp +++ b/interpreter/llvm/src/lib/Option/OptTable.cpp @@ -1,4 +1,4 @@ -//===--- OptTable.cpp - Option Table Implementation -----------------------===// +//===- OptTable.cpp - Option Table Implementation -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,16 +7,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/OptTable.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Option/OptSpecifier.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include +#include #include +#include #include +#include +#include +#include using namespace llvm; using namespace llvm::opt; @@ -80,14 +89,14 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) { static inline bool operator<(const OptTable::Info &I, const char *Name) { return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0; } -} -} + +} // end namespace opt +} // end namespace llvm OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) - : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), TheInputOptionID(0), - TheUnknownOptionID(0), FirstSearchableIndex(0) { + : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) { // Explicitly zero initialize the error to work around a bug in array // value-initialization on MinGW with gcc 4.3.5. @@ -138,8 +147,8 @@ OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) } // Build prefix chars. - for (llvm::StringSet<>::const_iterator I = PrefixesUnion.begin(), - E = PrefixesUnion.end(); I != E; ++I) { + for (StringSet<>::const_iterator I = PrefixesUnion.begin(), + E = PrefixesUnion.end(); I != E; ++I) { StringRef Prefix = I->getKey(); for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end(); C != CE; ++C) @@ -148,8 +157,7 @@ OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) } } -OptTable::~OptTable() { -} +OptTable::~OptTable() = default; const Option OptTable::getOption(OptSpecifier Opt) const { unsigned id = Opt.getID(); @@ -159,11 +167,11 @@ const Option OptTable::getOption(OptSpecifier Opt) const { return Option(&getInfo(id), this); } -static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) { +static bool isInput(const StringSet<> &Prefixes, StringRef Arg) { if (Arg == "-") return true; - for (llvm::StringSet<>::const_iterator I = Prefixes.begin(), - E = Prefixes.end(); I != E; ++I) + for (StringSet<>::const_iterator I = Prefixes.begin(), + E = Prefixes.end(); I != E; ++I) if (Arg.startswith(I->getKey())) return false; return true; @@ -186,6 +194,57 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str, return 0; } +// Returns true if one of the Prefixes + In.Names matches Option +static bool optionMatches(const OptTable::Info &In, StringRef Option) { + if (In.Values && In.Prefixes) + for (size_t I = 0; In.Prefixes[I]; I++) + if (Option == std::string(In.Prefixes[I]) + In.Name) + return true; + return false; +} + +// This function is for flag value completion. +// Eg. When "-stdlib=" and "l" was passed to this function, it will return +// appropiriate values for stdlib, which starts with l. +std::vector +OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const { + // Search all options and return possible values. + for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { + if (!optionMatches(In, Option)) + continue; + + SmallVector Candidates; + StringRef(In.Values).split(Candidates, ",", -1, false); + + std::vector Result; + for (StringRef Val : Candidates) + if (Val.startswith(Arg)) + Result.push_back(Val); + return Result; + } + return {}; +} + +std::vector +OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const { + std::vector Ret; + for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { + if (!In.Prefixes || (!In.HelpText && !In.GroupID)) + continue; + if (In.Flags & DisableFlags) + continue; + + for (int I = 0; In.Prefixes[I]; I++) { + std::string S = std::string(In.Prefixes[I]) + std::string(In.Name) + "\t"; + if (In.HelpText) + S += In.HelpText; + if (StringRef(S).startswith(Cur)) + Ret.push_back(S); + } + } + return Ret; +} + Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, unsigned FlagsToInclude, unsigned FlagsToExclude) const { @@ -314,6 +373,9 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { case Option::FlagClass: break; + case Option::ValuesClass: + break; + case Option::SeparateClass: case Option::JoinedOrSeparateClass: case Option::RemainingArgsClass: case Option::RemainingArgsJoinedClass: Name += ' '; @@ -330,27 +392,29 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { return Name; } +namespace { +struct OptionInfo { + std::string Name; + StringRef HelpText; +}; +} // namespace + static void PrintHelpOptionList(raw_ostream &OS, StringRef Title, - std::vector > &OptionHelp) { + std::vector &OptionHelp) { OS << Title << ":\n"; // Find the maximum option length. unsigned OptionFieldWidth = 0; for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) { - // Skip titles. - if (!OptionHelp[i].second) - continue; - // Limit the amount of padding we are willing to give up for alignment. - unsigned Length = OptionHelp[i].first.size(); + unsigned Length = OptionHelp[i].Name.size(); if (Length <= 23) OptionFieldWidth = std::max(OptionFieldWidth, Length); } const unsigned InitialPad = 2; for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) { - const std::string &Option = OptionHelp[i].first; + const std::string &Option = OptionHelp[i].Name; int Pad = OptionFieldWidth - int(Option.size()); OS.indent(InitialPad) << Option; @@ -359,7 +423,7 @@ static void PrintHelpOptionList(raw_ostream &OS, StringRef Title, OS << "\n"; Pad = OptionFieldWidth + InitialPad; } - OS.indent(Pad + 1) << OptionHelp[i].second << '\n'; + OS.indent(Pad + 1) << OptionHelp[i].HelpText << '\n'; } } @@ -398,8 +462,7 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, // Render help text into a map of group-name to a list of (option, help) // pairs. - typedef std::map > > helpmap_ty; + using helpmap_ty = std::map>; helpmap_ty GroupedOptionHelp; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { @@ -418,7 +481,7 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, if (const char *Text = getOptionHelpText(Id)) { const char *HelpGroup = getOptionHelpGroup(*this, Id); const std::string &OptName = getOptionHelpName(*this, Id); - GroupedOptionHelp[HelpGroup].push_back(std::make_pair(OptName, Text)); + GroupedOptionHelp[HelpGroup].push_back({OptName, Text}); } } diff --git a/interpreter/llvm/src/lib/Option/Option.cpp b/interpreter/llvm/src/lib/Option/Option.cpp index 736b939fe80b3..bf9f040bde525 100644 --- a/interpreter/llvm/src/lib/Option/Option.cpp +++ b/interpreter/llvm/src/lib/Option/Option.cpp @@ -1,4 +1,4 @@ -//===--- Option.cpp - Abstract Driver Options -----------------------------===// +//===- Option.cpp - Abstract Driver Options -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,22 +7,24 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/Option.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include +#include using namespace llvm; using namespace llvm::opt; Option::Option(const OptTable::Info *info, const OptTable *owner) : Info(info), Owner(owner) { - // Multi-level aliases are not supported. This just simplifies option // tracking, it is not an inherent limitation. assert((!Info || !getAlias().isValid() || !getAlias().getAlias().isValid()) && @@ -45,6 +47,7 @@ void Option::print(raw_ostream &O) const { P(UnknownClass); P(FlagClass); P(JoinedClass); + P(ValuesClass); P(SeparateClass); P(CommaJoinedClass); P(MultiArgClass); diff --git a/interpreter/llvm/src/lib/Passes/PassBuilder.cpp b/interpreter/llvm/src/lib/Passes/PassBuilder.cpp index 7076e751071dc..9e0cf27aa17b5 100644 --- a/interpreter/llvm/src/lib/Passes/PassBuilder.cpp +++ b/interpreter/llvm/src/lib/Passes/PassBuilder.cpp @@ -150,12 +150,30 @@ using namespace llvm; static cl::opt MaxDevirtIterations("pm-max-devirt-iterations", cl::ReallyHidden, cl::init(4)); +static cl::opt + RunPartialInlining("enable-npm-partial-inlining", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Run Partial inlinining pass")); + +static cl::opt + RunNewGVN("enable-npm-newgvn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Run NewGVN instead of GVN")); + +static cl::opt EnableEarlyCSEMemSSA( + "enable-npm-earlycse-memssa", cl::init(true), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = on)")); static cl::opt EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); -static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$"); +static cl::opt EnableGVNSink( + "enable-npm-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); + +static Regex DefaultAliasRegex( + "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { @@ -263,33 +281,52 @@ AnalysisKey NoOpLoopAnalysis::Key; } // End anonymous namespace. +void PassBuilder::invokePeepholeEPCallbacks( + FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + for (auto &C : PeepholeEPCallbacks) + C(FPM, Level); +} + void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) { #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ MAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : ModuleAnalysisRegistrationCallbacks) + C(MAM); } void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) { #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ CGAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : CGSCCAnalysisRegistrationCallbacks) + C(CGAM); } void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ FAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : FunctionAnalysisRegistrationCallbacks) + C(FAM); } void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ LAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : LoopAnalysisRegistrationCallbacks) + C(LAM); } FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PrepareForThinLTO) { assert(Level != O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); @@ -298,7 +335,17 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SROA()); // Catch trivial redundancies - FPM.addPass(EarlyCSEPass()); + FPM.addPass(EarlyCSEPass(EnableEarlyCSEMemSSA)); + + // Hoisting of scalars and load expressions. + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); + } // Speculative execution if the target has divergent branches; otherwise nop. FPM.addPass(SpeculativeExecutionPass()); @@ -312,6 +359,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (!isOptimizingForSize(Level)) FPM.addPass(LibCallsShrinkWrapPass()); + invokePeepholeEPCallbacks(FPM, Level); + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -332,14 +381,22 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); LPM1.addPass(LICMPass()); -#if 0 - // The LoopUnswitch pass isn't yet ported to the new pass manager. - LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3)); -#endif + LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); + + for (auto &C : LateLoopOptimizationsEPCallbacks) + C(LPM2, Level); + LPM2.addPass(LoopDeletionPass()); - LPM2.addPass(LoopUnrollPass::createFull(Level)); + // Do not enable unrolling in PrepareForThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. + if (!PrepareForThinLTO || !PGOOpt || PGOOpt->SampleProfileFile.empty()) + LPM2.addPass(LoopUnrollPass::createFull(Level)); + + for (auto &C : LoopOptimizerEndEPCallbacks) + C(LPM2, Level); // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. @@ -353,7 +410,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (Level != O1) { // These passes add substantial compile time so skip them at O1. FPM.addPass(MergedLoadStoreMotionPass()); - FPM.addPass(GVN()); + if (RunNewGVN) + FPM.addPass(NewGVNPass()); + else + FPM.addPass(GVN()); } // Specially optimize memory movement as it doesn't look like dataflow in SSA. @@ -372,6 +432,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Run instcombine after redundancy and dead bit elimination to exploit // opportunities opened up by them. FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. @@ -380,19 +441,24 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + for (auto &C : ScalarOptimizerLateEPCallbacks) + C(FPM, Level); + // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); return FPM; } -static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - PassBuilder::OptimizationLevel Level, - bool RunProfileGen, std::string ProfileGenFile, - std::string ProfileUseFile) { +void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, + PassBuilder::OptimizationLevel Level, + bool RunProfileGen, + std::string ProfileGenFile, + std::string ProfileUseFile) { // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification @@ -417,21 +483,30 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. + invokePeepholeEPCallbacks(FPM, Level); - // FIXME: Here the old pass manager inserts peephole extensions. - // Add them when they're supported. CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); } + // Delete anything that is now dead to make sure that we don't instrument + // dead code. Instrumentation can end up keeping dead code around and + // dramatically increase code size. + MPM.addPass(GlobalDCEPass()); + if (RunProfileGen) { MPM.addPass(PGOInstrumentationGen()); + FunctionPassManager FPM; + FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + // Add the profile lowering pass. InstrProfOptions Options; if (!ProfileGenFile.empty()) Options.InstrProfileOutput = ProfileGenFile; + Options.DoCounterPromotion = true; MPM.addPass(InstrProfiling(Options)); } @@ -439,15 +514,20 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, MPM.addPass(PGOInstrumentationUse(ProfileUseFile)); } +static InlineParams +getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { + auto O3 = PassBuilder::O3; + unsigned OptLevel = Level > O3 ? 2 : Level; + unsigned SizeLevel = Level > O3 ? Level - O3 : 0; + return getInlineParams(OptLevel, SizeLevel); +} + ModulePassManager -PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); +PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, + bool DebugLogging, + bool PrepareForThinLTO) { ModulePassManager MPM(DebugLogging); - // Force any function attributes we want the rest of the pipeline te observe. - MPM.addPass(ForceFunctionAttrsPass()); - // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); @@ -459,8 +539,6 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - if (EnableGVNHoist) - EarlyFPM.addPass(GVNHoistPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); // Interprocedural constant propagation now that basic cleanup has occured @@ -487,20 +565,29 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // optimizations. FunctionPassManager GlobalCleanupPM(DebugLogging); GlobalCleanupPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(GlobalCleanupPM, Level); + GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); - // Add all the requested passes for PGO Instrumentation, if requested. + // Add all the requested passes for PGO, if requested. if (PGOOpt) { - assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO || + assert(PGOOpt->RunProfileGen || !PGOOpt->SampleProfileFile.empty() || !PGOOpt->ProfileUseFile.empty()); - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, - PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); + if (PGOOpt->SampleProfileFile.empty()) + addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, + PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); + else + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + + // Indirect call promotion that promotes intra-module targes only. + // Do not enable it in PrepareForThinLTO phase during sample PGO because + // it changes IR to makes profile annotation in back compile inaccurate. + if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty()) + MPM.addPass(PGOIndirectCallPromotion( + false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); } - // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); - // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MPM.addPass(RequireAnalysisPass()); @@ -523,8 +610,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. - // FIXME; Customize the threshold based on optimization level. - MainCGPipeline.addPass(InlinerPass()); + // For PrepareForThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (PrepareForThinLTO && PGOOpt && !PGOOpt->SampleProfileFile.empty()) + IP.HotCallSiteThreshold = 0; + MainCGPipeline.addPass(InlinerPass(IP)); // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -537,7 +628,11 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, DebugLogging))); + buildFunctionSimplificationPipeline(Level, DebugLogging, + PrepareForThinLTO))); + + for (auto &C : CGSCCOptimizerLateEPCallbacks) + C(MainCGPipeline, Level); // We wrap the CGSCC pipeline in a devirtualization repeater. This will try // to detect when we devirtualize indirect calls and iterate the SCC passes @@ -548,12 +643,30 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( std::move(MainCGPipeline), MaxDevirtIterations, DebugLogging))); - // This ends the canonicalization and simplification phase of the pipeline. - // At this point, we expect to have canonical and simple IR which we begin - // *optimizing* for efficient execution going forward. + return MPM; +} + +ModulePassManager +PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging) { + ModulePassManager MPM(DebugLogging); - // Eliminate externally available functions now that inlining is over -- we - // won't emit these anyways. + // Optimize globals now that the module is fully simplified. + MPM.addPass(GlobalOptPass()); + + // Run partial inlining pass to partially inline functions that have + // large bodies. + if (RunPartialInlining) + MPM.addPass(PartialInlinerPass()); + + // Remove avail extern fns and globals definitions since we aren't compiling + // an object file for later LTO. For LTO we want to preserve these so they + // are eligible for inlining at link-time. Note if they are unreferenced they + // will be removed by GlobalDCE later, so this only impacts referenced + // available externally globals. Eventually they will be suppressed during + // codegen, but eliminating here enables more opportunity for GlobalDCE as it + // may make globals referenced by available external functions dead and saves + // running remaining passes on the eliminated functions. MPM.addPass(EliminateAvailableExternallyPass()); // Do RPO function attribute inference across the module to forward-propagate @@ -579,6 +692,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // rather than on each loop in an inside-out manner, and so they are actually // function passes. + for (auto &C : VectorizerStartEPCallbacks) + C(OptimizePM, Level); + // First rotate loops that may have been un-rotated by prior passes. OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); @@ -651,6 +767,87 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, return MPM; } +ModulePassManager +PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + + return MPM; +} + +ModulePassManager +PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // If we are planning to perform ThinLTO later, we don't bloat the code with + // unrolling/vectorization/... now. Just simplify the module as much as we + // can. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/true)); + + // Run partial inlining pass to partially inline functions that have + // large bodies. + // FIXME: It isn't clear whether this is really the right place to run this + // in ThinLTO. Because there is another canonicalization and simplification + // phase that will run after the thin link, running this here ends up with + // less information than will be available later and it may grow functions in + // ways that aren't beneficial. + if (RunPartialInlining) + MPM.addPass(PartialInlinerPass()); + + // Reduce the size of the IR as much as possible. + MPM.addPass(GlobalOptPass()); + + return MPM; +} + +ModulePassManager +PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + // FIXME: The summary index is not hooked in the new pass manager yet. + // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest + // here. + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // During the ThinLTO backend phase we perform early indirect call promotion + // here, before globalopt. Otherwise imported available_externally functions + // look unreferenced and are removed. + MPM.addPass(PGOIndirectCallPromotion( + true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty() && + !PGOOpt->ProfileUseFile.empty())); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + + return MPM; +} + ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { @@ -680,8 +877,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. - MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, - PGOOpt && PGOOpt->SamplePGO)); + MPM.addPass(PGOIndirectCallPromotion( + true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function @@ -726,15 +923,19 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. - // FIXME: add peephole extensions here as the legacy PM does. - MPM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass())); + FunctionPassManager PeepholeFPM(DebugLogging); + PeepholeFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(PeepholeFPM, Level); + + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); // Note: historically, the PruneEH pass was run first to deduce nounwind and // generally clean up exception handling overhead. It isn't clear this is // valuable as the inliner doesn't currently care whether it is inlining an // invoke or a call. // Run the inliner now. - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(InlinerPass())); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + InlinerPass(getInlineParamsFromOptLevel(Level)))); // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); @@ -744,10 +945,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(GlobalDCEPass()); FunctionPassManager FPM(DebugLogging); - // The IPO Passes may leave cruft around. Clean up after them. - // FIXME: add peephole extensions here as the legacy PM does. FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + FPM.addPass(JumpThreadingPass()); // Break up allocas @@ -765,7 +966,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // FIXME: once we fix LoopPass Manager, add LICM here. // FIXME: once we provide support for enabling MLSM, add it here. // FIXME: once we provide support for enabling NewGVN, add it here. - MainFPM.addPass(GVN()); + if (RunNewGVN) + MainFPM.addPass(NewGVNPass()); + else + MainFPM.addPass(GVN()); // Remove dead memcpy()'s. MainFPM.addPass(MemCpyOptPass()); @@ -794,8 +998,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // FIXME: Conditionally run LoadCombine here, after it's ported // (in case we still have this pass, given its questionable usefulness). - // FIXME: add peephole extensions to the PM here. MainFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); @@ -871,9 +1075,36 @@ static Optional parseDevirtPassName(StringRef Name) { return Count; } -static bool isModulePassName(StringRef Name) { +/// Tests whether a pass name starts with a valid prefix for a default pipeline +/// alias. +static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { + return Name.startswith("default") || Name.startswith("thinlto") || + Name.startswith("lto"); +} + +/// Tests whether registered callbacks will accept a given pass name. +/// +/// When parsing a pipeline text, the type of the outermost pipeline may be +/// omitted, in which case the type is automatically determined from the first +/// pass name in the text. This may be a name that is handled through one of the +/// callbacks. We check this through the oridinary parsing callbacks by setting +/// up a dummy PassManager in order to not force the client to also handle this +/// type of query. +template +static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) { + if (!Callbacks.empty()) { + PassManagerT DummyPM; + for (auto &CB : Callbacks) + if (CB(Name, DummyPM, {})) + return true; + } + return false; +} + +template +static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) { // Manually handle aliases for pre-configured pipeline fragments. - if (Name.startswith("default") || Name.startswith("lto")) + if (startsWithDefaultPipelineAliasPrefix(Name)) return DefaultAliasRegex.match(Name); // Explicitly handle pass manager names. @@ -896,10 +1127,11 @@ static bool isModulePassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } -static bool isCGSCCPassName(StringRef Name) { +template +static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "cgscc") return true; @@ -920,10 +1152,11 @@ static bool isCGSCCPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } -static bool isFunctionPassName(StringRef Name) { +template +static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "function") return true; @@ -942,10 +1175,11 @@ static bool isFunctionPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } -static bool isLoopPassName(StringRef Name) { +template +static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "loop") return true; @@ -962,7 +1196,7 @@ static bool isLoopPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } Optional> @@ -1063,30 +1297,39 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM))); return true; } + + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } // Manually handle aliases for pre-configured pipeline fragments. - if (Name.startswith("default") || Name.startswith("lto")) { + if (startsWithDefaultPipelineAliasPrefix(Name)) { SmallVector Matches; if (!DefaultAliasRegex.match(Name, &Matches)) return false; assert(Matches.size() == 3 && "Must capture two matched strings!"); OptimizationLevel L = StringSwitch(Matches[2]) - .Case("O0", O0) - .Case("O1", O1) - .Case("O2", O2) - .Case("O3", O3) - .Case("Os", Os) - .Case("Oz", Oz); + .Case("O0", O0) + .Case("O1", O1) + .Case("O2", O2) + .Case("O3", O3) + .Case("Os", Os) + .Case("Oz", Oz); if (L == O0) // At O0 we do nothing at all! return true; if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); + } else if (Matches[1] == "thinlto-pre-link") { + MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging)); + } else if (Matches[1] == "thinlto") { + MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging)); } else if (Matches[1] == "lto-pre-link") { MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging)); } else { @@ -1116,6 +1359,9 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, } #include "PassRegistry.def" + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return true; return false; } @@ -1163,11 +1409,16 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, *MaxRepetitions, DebugLogging)); return true; } + + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(CREATE_PASS); \ @@ -1188,6 +1439,9 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, } #include "PassRegistry.def" + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return true; return false; } @@ -1225,11 +1479,16 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM))); return true; } + + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ FPM.addPass(CREATE_PASS); \ @@ -1249,6 +1508,9 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, } #include "PassRegistry.def" + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return true; return false; } @@ -1276,11 +1538,16 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM))); return true; } + + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ LPM.addPass(CREATE_PASS); \ @@ -1301,6 +1568,9 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, } #include "PassRegistry.def" + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return true; return false; } @@ -1319,6 +1589,9 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) { } #include "PassRegistry.def" + for (auto &C : AAParsingCallbacks) + if (C(Name, AA)) + return true; return false; } @@ -1385,7 +1658,7 @@ bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, return true; } -// Primary pass pipeline description parsing routine. +// Primary pass pipeline description parsing routine for a \c ModulePassManager // FIXME: Should this routine accept a TargetMachine or require the caller to // pre-populate the analysis managers with target-specific stuff? bool PassBuilder::parsePassPipeline(ModulePassManager &MPM, @@ -1399,21 +1672,70 @@ bool PassBuilder::parsePassPipeline(ModulePassManager &MPM, // automatically. StringRef FirstName = Pipeline->front().Name; - if (!isModulePassName(FirstName)) { - if (isCGSCCPassName(FirstName)) + if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) { + if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) { Pipeline = {{"cgscc", std::move(*Pipeline)}}; - else if (isFunctionPassName(FirstName)) + } else if (isFunctionPassName(FirstName, + FunctionPipelineParsingCallbacks)) { Pipeline = {{"function", std::move(*Pipeline)}}; - else if (isLoopPassName(FirstName)) + } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) { Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}}; - else + } else { + for (auto &C : TopLevelPipelineParsingCallbacks) + if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging)) + return true; + // Unknown pass name! return false; + } } return parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging); } +// Primary pass pipeline description parsing routine for a \c CGSCCPassManager +bool PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + StringRef FirstName = Pipeline->front().Name; + if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) + return false; + + return parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging); +} + +// Primary pass pipeline description parsing routine for a \c +// FunctionPassManager +bool PassBuilder::parsePassPipeline(FunctionPassManager &FPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + StringRef FirstName = Pipeline->front().Name; + if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) + return false; + + return parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass, + DebugLogging); +} + +// Primary pass pipeline description parsing routine for a \c LoopPassManager +bool PassBuilder::parsePassPipeline(LoopPassManager &CGPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + return parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging); +} + bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) { // If the pipeline just consists of the word 'default' just replace the AA // manager with our default one. diff --git a/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMapping.cpp b/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMapping.cpp index 23999a5312c73..8c5f136ea2704 100644 --- a/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -1,4 +1,4 @@ -//===- CoverageMapping.cpp - Code coverage mapping support ------*- C++ -*-===// +//===- CoverageMapping.cpp - Code coverage mapping support ----------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" @@ -19,7 +20,6 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ProfileData/Coverage/CoverageMappingReader.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/Debug.h" @@ -54,26 +54,26 @@ Counter CounterExpressionBuilder::get(const CounterExpression &E) { return Counter::getExpression(I); } -void CounterExpressionBuilder::extractTerms( - Counter C, int Sign, SmallVectorImpl> &Terms) { +void CounterExpressionBuilder::extractTerms(Counter C, int Factor, + SmallVectorImpl &Terms) { switch (C.getKind()) { case Counter::Zero: break; case Counter::CounterValueReference: - Terms.push_back(std::make_pair(C.getCounterID(), Sign)); + Terms.emplace_back(C.getCounterID(), Factor); break; case Counter::Expression: const auto &E = Expressions[C.getExpressionID()]; - extractTerms(E.LHS, Sign, Terms); - extractTerms(E.RHS, E.Kind == CounterExpression::Subtract ? -Sign : Sign, - Terms); + extractTerms(E.LHS, Factor, Terms); + extractTerms( + E.RHS, E.Kind == CounterExpression::Subtract ? -Factor : Factor, Terms); break; } } Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) { // Gather constant terms. - SmallVector, 32> Terms; + SmallVector Terms; extractTerms(ExpressionTree, +1, Terms); // If there are no terms, this is just a zero. The algorithm below assumes at @@ -82,17 +82,15 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) { return Counter::getZero(); // Group the terms by counter ID. - std::sort(Terms.begin(), Terms.end(), - [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; + std::sort(Terms.begin(), Terms.end(), [](const Term &LHS, const Term &RHS) { + return LHS.CounterID < RHS.CounterID; }); // Combine terms by counter ID to eliminate counters that sum to zero. auto Prev = Terms.begin(); for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) { - if (I->first == Prev->first) { - Prev->second += I->second; + if (I->CounterID == Prev->CounterID) { + Prev->Factor += I->Factor; continue; } ++Prev; @@ -103,24 +101,24 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) { Counter C; // Create additions. We do this before subtractions to avoid constructs like // ((0 - X) + Y), as opposed to (Y - X). - for (auto Term : Terms) { - if (Term.second <= 0) + for (auto T : Terms) { + if (T.Factor <= 0) continue; - for (int I = 0; I < Term.second; ++I) + for (int I = 0; I < T.Factor; ++I) if (C.isZero()) - C = Counter::getCounter(Term.first); + C = Counter::getCounter(T.CounterID); else C = get(CounterExpression(CounterExpression::Add, C, - Counter::getCounter(Term.first))); + Counter::getCounter(T.CounterID))); } // Create subtractions. - for (auto Term : Terms) { - if (Term.second >= 0) + for (auto T : Terms) { + if (T.Factor >= 0) continue; - for (int I = 0; I < -Term.second; ++I) + for (int I = 0; I < -T.Factor; ++I) C = get(CounterExpression(CounterExpression::Subtract, C, - Counter::getCounter(Term.first))); + Counter::getCounter(T.CounterID))); } return C; } @@ -200,6 +198,9 @@ Error CoverageMapping::loadFunctionRecord( const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader) { StringRef OrigFuncName = Record.FunctionName; + if (OrigFuncName.empty()) + return make_error(coveragemap_error::malformed); + if (Record.Filenames.empty()) OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName); else @@ -244,18 +245,6 @@ Error CoverageMapping::loadFunctionRecord( return Error::success(); } -Expected> -CoverageMapping::load(CoverageMappingReader &CoverageReader, - IndexedInstrProfReader &ProfileReader) { - auto Coverage = std::unique_ptr(new CoverageMapping()); - - for (const auto &Record : CoverageReader) - if (Error E = Coverage->loadFunctionRecord(Record, ProfileReader)) - return std::move(E); - - return std::move(Coverage); -} - Expected> CoverageMapping::load( ArrayRef> CoverageReaders, IndexedInstrProfReader &ProfileReader) { @@ -300,8 +289,8 @@ namespace { /// An instantiation set is a collection of functions that have the same source /// code, ie, template functions specializations. class FunctionInstantiationSetCollector { - typedef DenseMap, - std::vector> MapT; + using MapT = DenseMap, + std::vector>; MapT InstantiatedFunctions; public: @@ -315,7 +304,6 @@ class FunctionInstantiationSetCollector { } MapT::iterator begin() { return InstantiatedFunctions.begin(); } - MapT::iterator end() { return InstantiatedFunctions.end(); } }; diff --git a/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingReader.cpp index a34f359cd5427..fff0a03ccbe01 100644 --- a/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -1,4 +1,4 @@ -//===- CoverageMappingReader.cpp - Code coverage mapping reader -*- C++ -*-===// +//===- CoverageMappingReader.cpp - Code coverage mapping reader -----------===// // // The LLVM Compiler Infrastructure // @@ -62,7 +62,7 @@ void CoverageMappingIterator::increment() { } Error RawCoverageReader::readULEB128(uint64_t &Result) { - if (Data.size() < 1) + if (Data.empty()) return make_error(coveragemap_error::truncated); unsigned N = 0; Result = decodeULEB128(reinterpret_cast(Data.data()), &N); @@ -392,9 +392,9 @@ struct CovMapFuncRecordReader { // A class for reading coverage mapping function records for a module. template class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { - typedef typename CovMapTraits< - Version, IntPtrT>::CovMapFuncRecordType FuncRecordType; - typedef typename CovMapTraits::NameRefType NameRefType; + using FuncRecordType = + typename CovMapTraits::CovMapFuncRecordType; + using NameRefType = typename CovMapTraits::NameRefType; // Maps function's name references to the indexes of their records // in \c Records. @@ -419,6 +419,8 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { StringRef FuncName; if (Error Err = CFR->template getFuncName(ProfileNames, FuncName)) return Err; + if (FuncName.empty()) + return make_error(instrprof_error::malformed); Records.emplace_back(Version, FuncName, FuncHash, Mapping, FilenamesBegin, Filenames.size() - FilenamesBegin); return Error::success(); @@ -574,7 +576,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames, Endian = support::endianness::little; Data = Data.substr(StringRef(TestingFormatMagic).size()); - if (Data.size() < 1) + if (Data.empty()) return make_error(coveragemap_error::truncated); unsigned N = 0; auto ProfileNamesSize = @@ -582,7 +584,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames, if (N > Data.size()) return make_error(coveragemap_error::malformed); Data = Data.substr(N); - if (Data.size() < 1) + if (Data.empty()) return make_error(coveragemap_error::truncated); N = 0; uint64_t Address = @@ -596,7 +598,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames, return E; CoverageMapping = Data.substr(ProfileNamesSize); // Skip the padding bytes because coverage map data has an alignment of 8. - if (CoverageMapping.size() < 1) + if (CoverageMapping.empty()) return make_error(coveragemap_error::truncated); size_t Pad = alignmentAdjustment(CoverageMapping.data(), 8); if (CoverageMapping.size() < Pad) diff --git a/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index f131be2cba492..6fe93530da21b 100644 --- a/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/interpreter/llvm/src/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/ProfileData/InstrProf.cpp b/interpreter/llvm/src/lib/ProfileData/InstrProf.cpp index 64a65ccc11a19..48c1643cb13c8 100644 --- a/interpreter/llvm/src/lib/ProfileData/InstrProf.cpp +++ b/interpreter/llvm/src/lib/ProfileData/InstrProf.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProf.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -29,7 +30,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -45,8 +45,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -330,14 +330,15 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName) { return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), PGOFuncName); } -void InstrProfSymtab::create(Module &M, bool InLTO) { +Error InstrProfSymtab::create(Module &M, bool InLTO) { for (Function &F : M) { // Function may not have a name: like using asm("") to overwrite the name. // Ignore in this case. if (!F.hasName()) continue; const std::string &PGOFuncName = getPGOFuncName(F, InLTO); - addFuncName(PGOFuncName); + if (Error E = addFuncName(PGOFuncName)) + return E; MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); // In ThinLTO, local function may have been promoted to global and have // suffix added to the function name. We need to add the stripped function @@ -346,16 +347,18 @@ void InstrProfSymtab::create(Module &M, bool InLTO) { auto pos = PGOFuncName.find('.'); if (pos != std::string::npos) { const std::string &OtherFuncName = PGOFuncName.substr(0, pos); - addFuncName(OtherFuncName); + if (Error E = addFuncName(OtherFuncName)) + return E; MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); } } } finalizeSymtab(); + return Error::success(); } -Error collectPGOFuncNameStrings(const std::vector &NameStrs, +Error collectPGOFuncNameStrings(ArrayRef NameStrs, bool doCompression, std::string &Result) { assert(!NameStrs.empty() && "No name data to emit"); @@ -403,7 +406,7 @@ StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) { return NameStr; } -Error collectPGOFuncNameStrings(const std::vector &NameVars, +Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression) { std::vector NameStrs; for (auto *NameVar : NameVars) { @@ -447,7 +450,8 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { SmallVector Names; NameStrings.split(Names, getInstrProfNameSeparator()); for (StringRef &Name : Names) - Symtab.addFuncName(Name); + if (Error E = Symtab.addFuncName(Name)) + return E; while (P < EndP && *P == 0) P++; @@ -456,9 +460,9 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { return Error::success(); } -void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, - InstrProfValueSiteRecord &Input, - uint64_t Weight) { +void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input, + uint64_t Weight, + function_ref Warn) { this->sortByTargetValues(); Input.sortByTargetValues(); auto I = ValueData.begin(); @@ -471,7 +475,7 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, bool Overflowed; I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); ++I; continue; } @@ -479,40 +483,43 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, } } -void InstrProfValueSiteRecord::scale(SoftInstrProfErrors &SIPE, - uint64_t Weight) { +void InstrProfValueSiteRecord::scale(uint64_t Weight, + function_ref Warn) { for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) { bool Overflowed; I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } } // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. -void InstrProfRecord::mergeValueProfData(uint32_t ValueKind, - InstrProfRecord &Src, - uint64_t Weight) { +void InstrProfRecord::mergeValueProfData( + uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight, + function_ref Warn) { uint32_t ThisNumValueSites = getNumValueSites(ValueKind); uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); if (ThisNumValueSites != OtherNumValueSites) { - SIPE.addError(instrprof_error::value_site_count_mismatch); + Warn(instrprof_error::value_site_count_mismatch); return; } + if (!ThisNumValueSites) + return; std::vector &ThisSiteRecords = - getValueSitesForKind(ValueKind); - std::vector &OtherSiteRecords = + getOrCreateValueSitesForKind(ValueKind); + MutableArrayRef OtherSiteRecords = Src.getValueSitesForKind(ValueKind); for (uint32_t I = 0; I < ThisNumValueSites; I++) - ThisSiteRecords[I].merge(SIPE, OtherSiteRecords[I], Weight); + ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight, Warn); } -void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) { +void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight, + function_ref Warn) { // If the number of counters doesn't match we either have bad data // or a hash collision. if (Counts.size() != Other.Counts.size()) { - SIPE.addError(instrprof_error::count_mismatch); + Warn(instrprof_error::count_mismatch); return; } @@ -521,30 +528,30 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) { Counts[I] = SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - mergeValueProfData(Kind, Other, Weight); + mergeValueProfData(Kind, Other, Weight, Warn); } -void InstrProfRecord::scaleValueProfData(uint32_t ValueKind, uint64_t Weight) { - uint32_t ThisNumValueSites = getNumValueSites(ValueKind); - std::vector &ThisSiteRecords = - getValueSitesForKind(ValueKind); - for (uint32_t I = 0; I < ThisNumValueSites; I++) - ThisSiteRecords[I].scale(SIPE, Weight); +void InstrProfRecord::scaleValueProfData( + uint32_t ValueKind, uint64_t Weight, + function_ref Warn) { + for (auto &R : getValueSitesForKind(ValueKind)) + R.scale(Weight, Warn); } -void InstrProfRecord::scale(uint64_t Weight) { +void InstrProfRecord::scale(uint64_t Weight, + function_ref Warn) { for (auto &Count : this->Counts) { bool Overflowed; Count = SaturatingMultiply(Count, Weight, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - scaleValueProfData(Kind, Weight); + scaleValueProfData(Kind, Weight, Warn); } // Map indirect call target name hash to name string. @@ -579,7 +586,7 @@ void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site, VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap); } std::vector &ValueSites = - getValueSitesForKind(ValueKind); + getOrCreateValueSitesForKind(ValueKind); if (N == 0) ValueSites.emplace_back(); else @@ -638,8 +645,9 @@ static ValueProfRecordClosure InstrProfRecordClosure = { // Wrapper implementation using the closure mechanism. uint32_t ValueProfData::getSize(const InstrProfRecord &Record) { - InstrProfRecordClosure.Record = &Record; - return getValueProfDataSize(&InstrProfRecordClosure); + auto Closure = InstrProfRecordClosure; + Closure.Record = &Record; + return getValueProfDataSize(&Closure); } // Wrapper implementation using the closure mechanism. @@ -978,22 +986,22 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { } // Parse the value profile options. -void getMemOPSizeRangeFromOption(std::string MemOPSizeRange, - int64_t &RangeStart, int64_t &RangeLast) { +void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, + int64_t &RangeLast) { static const int64_t DefaultMemOPSizeRangeStart = 0; static const int64_t DefaultMemOPSizeRangeLast = 8; RangeStart = DefaultMemOPSizeRangeStart; RangeLast = DefaultMemOPSizeRangeLast; if (!MemOPSizeRange.empty()) { - auto Pos = MemOPSizeRange.find(":"); + auto Pos = MemOPSizeRange.find(':'); if (Pos != std::string::npos) { if (Pos > 0) - RangeStart = atoi(MemOPSizeRange.substr(0, Pos).c_str()); + MemOPSizeRange.substr(0, Pos).getAsInteger(10, RangeStart); if (Pos < MemOPSizeRange.size() - 1) - RangeLast = atoi(MemOPSizeRange.substr(Pos + 1).c_str()); + MemOPSizeRange.substr(Pos + 1).getAsInteger(10, RangeLast); } else - RangeLast = atoi(MemOPSizeRange.c_str()); + MemOPSizeRange.getAsInteger(10, RangeLast); } assert(RangeLast >= RangeStart); } diff --git a/interpreter/llvm/src/lib/ProfileData/InstrProfReader.cpp b/interpreter/llvm/src/lib/ProfileData/InstrProfReader.cpp index 856f793363f77..1b39a0695aac6 100644 --- a/interpreter/llvm/src/lib/ProfileData/InstrProfReader.cpp +++ b/interpreter/llvm/src/lib/ProfileData/InstrProfReader.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -200,7 +200,8 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { std::pair VD = Line->rsplit(':'); uint64_t TakenCount, Value; if (ValueKind == IPVK_IndirectCallTarget) { - Symtab->addFuncName(VD.first); + if (Error E = Symtab->addFuncName(VD.first)) + return E; Value = IndexedInstrProf::ComputeHash(VD.first); } else { READ_NUM(VD.first, Value); @@ -220,7 +221,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { #undef VP_READ_ADVANCE } -Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { // Skip empty lines and comments. while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) ++Line; @@ -232,7 +233,8 @@ Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Read the function name. Record.Name = *Line++; - Symtab->addFuncName(Record.Name); + if (Error E = Symtab->addFuncName(Record.Name)) + return E; // Read the function hash. if (Line.is_at_end()) @@ -375,13 +377,13 @@ Error RawInstrProfReader::readHeader( } template -Error RawInstrProfReader::readName(InstrProfRecord &Record) { +Error RawInstrProfReader::readName(NamedInstrProfRecord &Record) { Record.Name = getName(Data->NameRef); return success(); } template -Error RawInstrProfReader::readFuncHash(InstrProfRecord &Record) { +Error RawInstrProfReader::readFuncHash(NamedInstrProfRecord &Record) { Record.Hash = swap(Data->FuncHash); return success(); } @@ -443,7 +445,7 @@ Error RawInstrProfReader::readValueProfilingData( } template -Error RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error RawInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { if (atEnd()) // At this point, ValueDataStart field points to the next header. if (Error E = readNextHeader(getNextHeaderPos())) @@ -482,8 +484,8 @@ InstrProfLookupTrait::ComputeHash(StringRef K) { return IndexedInstrProf::ComputeHash(HashType, K); } -typedef InstrProfLookupTrait::data_type data_type; -typedef InstrProfLookupTrait::offset_type offset_type; +using data_type = InstrProfLookupTrait::data_type; +using offset_type = InstrProfLookupTrait::offset_type; bool InstrProfLookupTrait::readValueProfilingData( const unsigned char *&D, const unsigned char *const End) { @@ -548,7 +550,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, template Error InstrProfReaderIndex::getRecords( - StringRef FuncName, ArrayRef &Data) { + StringRef FuncName, ArrayRef &Data) { auto Iter = HashTable->find(FuncName); if (Iter == HashTable->end()) return make_error(instrprof_error::unknown_function); @@ -562,7 +564,7 @@ Error InstrProfReaderIndex::getRecords( template Error InstrProfReaderIndex::getRecords( - ArrayRef &Data) { + ArrayRef &Data) { if (atEnd()) return make_error(instrprof_error::eof); @@ -620,7 +622,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) Dst[I] = endian::byte_swap(Src[I]); - llvm::SummaryEntryVector DetailedSummary; + SummaryEntryVector DetailedSummary; for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, @@ -642,7 +644,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); // FIXME: This only computes an empty summary. Need to call addRecord for - // all InstrProfRecords to get the correct summary. + // all NamedInstrProfRecords to get the correct summary. this->Summary = Builder.getSummary(); return Cur; } @@ -694,7 +696,9 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { return *Symtab.get(); std::unique_ptr NewSymtab = make_unique(); - Index->populateSymtab(*NewSymtab.get()); + if (Error E = Index->populateSymtab(*NewSymtab.get())) { + consumeError(error(InstrProfError::take(std::move(E)))); + } Symtab = std::move(NewSymtab); return *Symtab.get(); @@ -703,7 +707,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { Expected IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, uint64_t FuncHash) { - ArrayRef Data; + ArrayRef Data; Error Err = Index->getRecords(FuncName, Data); if (Err) return std::move(Err); @@ -728,10 +732,10 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, return success(); } -Error IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { static unsigned RecordIndex = 0; - ArrayRef Data; + ArrayRef Data; Error E = Index->getRecords(Data); if (E) diff --git a/interpreter/llvm/src/lib/ProfileData/InstrProfWriter.cpp b/interpreter/llvm/src/lib/ProfileData/InstrProfWriter.cpp index 6b7bd3b2fc0a0..ce3f8806e12e7 100644 --- a/interpreter/llvm/src/lib/ProfileData/InstrProfWriter.cpp +++ b/interpreter/llvm/src/lib/ProfileData/InstrProfWriter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -69,8 +69,7 @@ class ProfOStream { write(P[K].D[I]); } } else { - raw_string_ostream &SOStream = - static_cast(OS); + raw_string_ostream &SOStream = static_cast(OS); std::string &Data = SOStream.str(); // with flush for (int K = 0; K < NItems; K++) { for (int I = 0; I < P[K].N; I++) { @@ -91,14 +90,14 @@ class ProfOStream { class InstrProfRecordWriterTrait { public: - typedef StringRef key_type; - typedef StringRef key_type_ref; + using key_type = StringRef; + using key_type_ref = StringRef; - typedef const InstrProfWriter::ProfilingData *const data_type; - typedef const InstrProfWriter::ProfilingData *const data_type_ref; + using data_type = const InstrProfWriter::ProfilingData *const; + using data_type_ref = const InstrProfWriter::ProfilingData *const; - typedef uint64_t hash_value_type; - typedef uint64_t offset_type; + using hash_value_type = uint64_t; + using offset_type = uint64_t; support::endianness ValueProfDataEndianness = support::little; InstrProfSummaryBuilder *SummaryBuilder; @@ -177,38 +176,46 @@ void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; } -Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) { - auto &ProfileDataMap = FunctionData[I.Name]; +void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, + function_ref Warn) { + auto Name = I.Name; + auto Hash = I.Hash; + addRecord(Name, Hash, std::move(I), Weight, Warn); +} + +void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, + InstrProfRecord &&I, uint64_t Weight, + function_ref Warn) { + auto &ProfileDataMap = FunctionData[Name]; bool NewFunc; ProfilingData::iterator Where; std::tie(Where, NewFunc) = - ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord())); + ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord())); InstrProfRecord &Dest = Where->second; + auto MapWarn = [&](instrprof_error E) { + Warn(make_error(E)); + }; + if (NewFunc) { // We've never seen a function with this name and hash, add it. Dest = std::move(I); - // Fix up the name to avoid dangling reference. - Dest.Name = FunctionData.find(Dest.Name)->getKey(); if (Weight > 1) - Dest.scale(Weight); + Dest.scale(Weight, MapWarn); } else { // We're updating a function we've seen before. - Dest.merge(I, Weight); + Dest.merge(I, Weight, MapWarn); } Dest.sortValueData(); - - return Dest.takeError(); } -Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) { +void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, + function_ref Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) - if (Error E = addRecord(std::move(Func.second), 1)) - return E; - return Error::success(); + addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -324,11 +331,12 @@ static const char *ValueProfKindStr[] = { #include "llvm/ProfileData/InstrProfData.inc" }; -void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, +void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, + const InstrProfRecord &Func, InstrProfSymtab &Symtab, raw_fd_ostream &OS) { - OS << Func.Name << "\n"; - OS << "# Func Hash:\n" << Func.Hash << "\n"; + OS << Name << "\n"; + OS << "# Func Hash:\n" << Hash << "\n"; OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; OS << "# Counter Values:\n"; for (uint64_t Count : Func.Counts) @@ -363,17 +371,19 @@ void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, OS << "\n"; } -void InstrProfWriter::writeText(raw_fd_ostream &OS) { +Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (ProfileKind == PF_IRLevel) OS << "# IR level Instrumentation Flag\n:ir\n"; InstrProfSymtab Symtab; for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) - Symtab.addFuncName(I.getKey()); + if (Error E = Symtab.addFuncName(I.getKey())) + return E; Symtab.finalizeSymtab(); for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) for (const auto &Func : I.getValue()) - writeRecordInText(Func.second, Symtab, OS); + writeRecordInText(I.getKey(), Func.first, Func.second, Symtab, OS); + return Error::success(); } diff --git a/interpreter/llvm/src/lib/ProfileData/SampleProfWriter.cpp b/interpreter/llvm/src/lib/ProfileData/SampleProfWriter.cpp index b05efa7417b94..b45026140c99a 100644 --- a/interpreter/llvm/src/lib/ProfileData/SampleProfWriter.cpp +++ b/interpreter/llvm/src/lib/ProfileData/SampleProfWriter.cpp @@ -18,10 +18,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" diff --git a/interpreter/llvm/src/lib/Support/AMDGPUCodeObjectMetadata.cpp b/interpreter/llvm/src/lib/Support/AMDGPUCodeObjectMetadata.cpp new file mode 100644 index 0000000000000..863093ab7def7 --- /dev/null +++ b/interpreter/llvm/src/lib/Support/AMDGPUCodeObjectMetadata.cpp @@ -0,0 +1,216 @@ +//===--- AMDGPUCodeObjectMetadata.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata definitions and in-memory +/// representations. +/// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::CodeObject; + +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) + +namespace llvm { +namespace yaml { + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AccessQualifier &EN) { + YIO.enumCase(EN, "Default", AccessQualifier::Default); + YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); + YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); + YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { + YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); + YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); + YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); + YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); + YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); + YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueKind &EN) { + YIO.enumCase(EN, "ByValue", ValueKind::ByValue); + YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); + YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); + YIO.enumCase(EN, "Sampler", ValueKind::Sampler); + YIO.enumCase(EN, "Image", ValueKind::Image); + YIO.enumCase(EN, "Pipe", ValueKind::Pipe); + YIO.enumCase(EN, "Queue", ValueKind::Queue); + YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); + YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); + YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); + YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); + YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); + YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); + YIO.enumCase(EN, "HiddenCompletionAction", + ValueKind::HiddenCompletionAction); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueType &EN) { + YIO.enumCase(EN, "Struct", ValueType::Struct); + YIO.enumCase(EN, "I8", ValueType::I8); + YIO.enumCase(EN, "U8", ValueType::U8); + YIO.enumCase(EN, "I16", ValueType::I16); + YIO.enumCase(EN, "U16", ValueType::U16); + YIO.enumCase(EN, "F16", ValueType::F16); + YIO.enumCase(EN, "I32", ValueType::I32); + YIO.enumCase(EN, "U32", ValueType::U32); + YIO.enumCase(EN, "F32", ValueType::F32); + YIO.enumCase(EN, "I64", ValueType::I64); + YIO.enumCase(EN, "U64", ValueType::U64); + YIO.enumCase(EN, "F64", ValueType::F64); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { + YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, + MD.mReqdWorkGroupSize, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, + MD.mWorkGroupSizeHint, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, + MD.mVecTypeHint, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { + YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); + YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); + YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); + YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); + YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, + uint32_t(0)); + YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, + AddressSpaceQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); + YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); + YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); + YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); + YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, + MD.mKernargSegmentSize, uint64_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, + MD.mWorkgroupGroupSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, + MD.mWorkitemPrivateSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, + MD.mWavefrontNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, + MD.mWorkitemNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, + MD.mKernargSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, + MD.mGroupSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, + MD.mPrivateSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, + MD.mWavefrontSize, uint8_t(0)); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { + YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, + MD.mDebuggerABIVersion, std::vector()); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, + MD.mReservedNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, + MD.mReservedFirstVGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, + MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, + MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Metadata &MD) { + YIO.mapRequired(Kernel::Key::Name, MD.mName); + YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); + YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, + std::vector()); + if (!MD.mAttrs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); + if (!MD.mArgs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Args, MD.mArgs); + if (!MD.mCodeProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); + if (!MD.mDebugProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, CodeObject::Metadata &MD) { + YIO.mapRequired(Key::Version, MD.mVersion); + YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); + if (!MD.mKernels.empty() || !YIO.outputting()) + YIO.mapOptional(Key::Kernels, MD.mKernels); + } +}; + +} // end namespace yaml + +namespace AMDGPU { +namespace CodeObject { + +/* static */ +std::error_code Metadata::fromYamlString( + std::string YamlString, Metadata &CodeObjectMetadata) { + yaml::Input YamlInput(YamlString); + YamlInput >> CodeObjectMetadata; + return YamlInput.error(); +} + +/* static */ +std::error_code Metadata::toYamlString( + Metadata CodeObjectMetadata, std::string &YamlString) { + raw_string_ostream YamlStream(YamlString); + yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); + YamlOutput << CodeObjectMetadata; + return std::error_code(); +} + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/Support/APFloat.cpp b/interpreter/llvm/src/lib/Support/APFloat.cpp index e1e2c22e1df1a..deb76cb565d1e 100644 --- a/interpreter/llvm/src/lib/Support/APFloat.cpp +++ b/interpreter/llvm/src/lib/Support/APFloat.cpp @@ -37,10 +37,6 @@ using namespace llvm; -// TODO: Remove these and use APInt qualified types directly. -typedef APInt::WordType integerPart; -const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD; - /// A macro used to combine two fcCategory enums into one key which can be used /// in a switch statement to classify how the interaction of two APFloat's /// categories affects an operation. @@ -51,7 +47,7 @@ const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD; /* Assumed in hexadecimal significand parsing, and conversion to hexadecimal strings. */ -static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); +static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); namespace llvm { /* Represents floating point arithmetic semantics. */ @@ -153,8 +149,7 @@ namespace llvm { const unsigned int maxExponent = 16383; const unsigned int maxPrecision = 113; const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; - const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) - / (351 * integerPartWidth)); + const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { return semantics.precision; @@ -180,7 +175,7 @@ namespace llvm { static inline unsigned int partCountForBits(unsigned int bits) { - return ((bits) + integerPartWidth - 1) / integerPartWidth; + return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; } /* Returns 0U-9U. Return values >= 10U are not digits. */ @@ -420,7 +415,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* Return the fraction lost were a bignum truncated losing the least significant BITS bits. */ static lostFraction -lostFractionThroughTruncation(const integerPart *parts, +lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits) { @@ -433,7 +428,7 @@ lostFractionThroughTruncation(const integerPart *parts, return lfExactlyZero; if (bits == lsb + 1) return lfExactlyHalf; - if (bits <= partCount * integerPartWidth && + if (bits <= partCount * APFloatBase::integerPartWidth && APInt::tcExtractBit(parts, bits - 1)) return lfMoreThanHalf; @@ -442,7 +437,7 @@ lostFractionThroughTruncation(const integerPart *parts, /* Shift DST right BITS bits noting lost fraction. */ static lostFraction -shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) +shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) { lostFraction lost_fraction; @@ -489,22 +484,22 @@ HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) /* The number of ulps from the boundary (zero, or half if ISNEAREST) when the least significant BITS are truncated. BITS cannot be zero. */ -static integerPart -ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) -{ +static APFloatBase::integerPart +ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, + bool isNearest) { unsigned int count, partBits; - integerPart part, boundary; + APFloatBase::integerPart part, boundary; assert(bits != 0); bits--; - count = bits / integerPartWidth; - partBits = bits % integerPartWidth + 1; + count = bits / APFloatBase::integerPartWidth; + partBits = bits % APFloatBase::integerPartWidth + 1; - part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); + part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); if (isNearest) - boundary = (integerPart) 1 << (partBits - 1); + boundary = (APFloatBase::integerPart) 1 << (partBits - 1); else boundary = 0; @@ -518,32 +513,30 @@ ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) if (part == boundary) { while (--count) if (parts[count]) - return ~(integerPart) 0; /* A lot. */ + return ~(APFloatBase::integerPart) 0; /* A lot. */ return parts[0]; } else if (part == boundary - 1) { while (--count) if (~parts[count]) - return ~(integerPart) 0; /* A lot. */ + return ~(APFloatBase::integerPart) 0; /* A lot. */ return -parts[0]; } - return ~(integerPart) 0; /* A lot. */ + return ~(APFloatBase::integerPart) 0; /* A lot. */ } /* Place pow(5, power) in DST, and return the number of parts used. DST must be at least one part larger than size of the answer. */ static unsigned int -powerOf5(integerPart *dst, unsigned int power) -{ - static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, - 15625, 78125 }; - integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; +powerOf5(APFloatBase::integerPart *dst, unsigned int power) { + static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; + APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; pow5s[0] = 78125 * 5; unsigned int partsCount[16] = { 1 }; - integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; + APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; unsigned int result; assert(power <= maxExponent); @@ -572,7 +565,7 @@ powerOf5(integerPart *dst, unsigned int power) } if (power & 1) { - integerPart *tmp; + APFloatBase::integerPart *tmp; APInt::tcFullMultiply(p2, p1, pow5, result, pc); result += pc; @@ -608,14 +601,14 @@ static const char NaNU[] = "NAN"; significant nibble. Write out exactly COUNT hexdigits, return COUNT. */ static unsigned int -partAsHex (char *dst, integerPart part, unsigned int count, +partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars) { unsigned int result = count; - assert(count != 0 && count <= integerPartWidth / 4); + assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); - part >>= (integerPartWidth - 4 * count); + part >>= (APFloatBase::integerPartWidth - 4 * count); while (count--) { dst[count] = hexDigitChars[part & 0xf]; part >>= 4; @@ -889,11 +882,11 @@ unsigned int IEEEFloat::partCount() const { return partCountForBits(semantics->precision + 1); } -const integerPart *IEEEFloat::significandParts() const { +const IEEEFloat::integerPart *IEEEFloat::significandParts() const { return const_cast(this)->significandParts(); } -integerPart *IEEEFloat::significandParts() { +IEEEFloat::integerPart *IEEEFloat::significandParts() { if (partCount() > 1) return significand.parts; else @@ -916,7 +909,7 @@ void IEEEFloat::incrementSignificand() { } /* Add the significand of the RHS. Returns the carry flag. */ -integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { +IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { integerPart *parts; parts = significandParts(); @@ -929,8 +922,8 @@ integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { /* Subtract the significand of the RHS with a borrow flag. Returns the borrow flag. */ -integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, - integerPart borrow) { +IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, + integerPart borrow) { integerPart *parts; parts = significandParts(); @@ -1559,11 +1552,13 @@ IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { case PackCategoriesIntoKey(fcInfinity, fcNaN): category = fcNaN; copySignificand(rhs); + LLVM_FALLTHROUGH; case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): case PackCategoriesIntoKey(fcNaN, fcInfinity): case PackCategoriesIntoKey(fcNaN, fcNaN): sign = false; + LLVM_FALLTHROUGH; case PackCategoriesIntoKey(fcInfinity, fcZero): case PackCategoriesIntoKey(fcInfinity, fcNormal): case PackCategoriesIntoKey(fcZero, fcInfinity): diff --git a/interpreter/llvm/src/lib/Support/APInt.cpp b/interpreter/llvm/src/lib/Support/APInt.cpp index ed6756f6ef3ea..c558ddd82161d 100644 --- a/interpreter/llvm/src/lib/Support/APInt.cpp +++ b/interpreter/llvm/src/lib/Support/APInt.cpp @@ -546,10 +546,7 @@ unsigned APInt::countLeadingZerosSlowCase() const { return Count; } -unsigned APInt::countLeadingOnes() const { - if (isSingleWord()) - return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); - +unsigned APInt::countLeadingOnesSlowCase() const { unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; unsigned shift; if (!highWordBits) { @@ -573,9 +570,7 @@ unsigned APInt::countLeadingOnes() const { return Count; } -unsigned APInt::countTrailingZeros() const { - if (isSingleWord()) - return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth); +unsigned APInt::countTrailingZerosSlowCase() const { unsigned Count = 0; unsigned i = 0; for (; i < getNumWords() && U.pVal[i] == 0; ++i) @@ -1398,8 +1393,8 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, DEBUG(dbgs() << '\n'); } -void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, - unsigned rhsWords, APInt *Quotient, APInt *Remainder) { +void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, + unsigned rhsWords, WordType *Quotient, WordType *Remainder) { assert(lhsWords >= rhsWords && "Fractional result"); // First, compose the values into an array of 32-bit words instead of @@ -1436,7 +1431,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Initialize the dividend memset(U, 0, (m+n+1)*sizeof(uint32_t)); for (unsigned i = 0; i < lhsWords; ++i) { - uint64_t tmp = LHS.getRawData()[i]; + uint64_t tmp = LHS[i]; U[i * 2] = Lo_32(tmp); U[i * 2 + 1] = Hi_32(tmp); } @@ -1445,7 +1440,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Initialize the divisor memset(V, 0, (n)*sizeof(uint32_t)); for (unsigned i = 0; i < rhsWords; ++i) { - uint64_t tmp = RHS.getRawData()[i]; + uint64_t tmp = RHS[i]; V[i * 2] = Lo_32(tmp); V[i * 2 + 1] = Hi_32(tmp); } @@ -1476,7 +1471,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, if (n == 1) { uint32_t divisor = V[0]; uint32_t remainder = 0; - for (int i = m+n-1; i >= 0; i--) { + for (int i = m; i >= 0; i--) { uint64_t partial_dividend = Make_64(remainder, U[i]); if (partial_dividend == 0) { Q[i] = 0; @@ -1502,48 +1497,14 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // If the caller wants the quotient if (Quotient) { - // Set up the Quotient value's memory. - Quotient->reallocate(LHS.BitWidth); - // Clear out any previous bits. - Quotient->clearAllBits(); - - // The quotient is in Q. Reconstitute the quotient into Quotient's low - // order words. - // This case is currently dead as all users of divide() handle trivial cases - // earlier. - if (lhsWords == 1) { - uint64_t tmp = Make_64(Q[1], Q[0]); - if (Quotient->isSingleWord()) - Quotient->U.VAL = tmp; - else - Quotient->U.pVal[0] = tmp; - } else { - assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); - for (unsigned i = 0; i < lhsWords; ++i) - Quotient->U.pVal[i] = Make_64(Q[i*2+1], Q[i*2]); - } + for (unsigned i = 0; i < lhsWords; ++i) + Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); } // If the caller wants the remainder if (Remainder) { - // Set up the Remainder value's memory. - Remainder->reallocate(RHS.BitWidth); - // Clear out any previous bits. - Remainder->clearAllBits(); - - // The remainder is in R. Reconstitute the remainder into Remainder's low - // order words. - if (rhsWords == 1) { - uint64_t tmp = Make_64(R[1], R[0]); - if (Remainder->isSingleWord()) - Remainder->U.VAL = tmp; - else - Remainder->U.pVal[0] = tmp; - } else { - assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); - for (unsigned i = 0; i < rhsWords; ++i) - Remainder->U.pVal[i] = Make_64(R[i*2+1], R[i*2]); - } + for (unsigned i = 0; i < rhsWords; ++i) + Remainder[i] = Make_64(R[i*2+1], R[i*2]); } // Clean up the memory we allocated. @@ -1555,7 +1516,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, } } -APInt APInt::udiv(const APInt& RHS) const { +APInt APInt::udiv(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); // First, deal with the easy case @@ -1588,8 +1549,41 @@ APInt APInt::udiv(const APInt& RHS) const { return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); // We have to compute it the hard way. Invoke the Knuth divide algorithm. - APInt Quotient; // to hold result. - divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr); + APInt Quotient(BitWidth, 0); // to hold result. + divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); + return Quotient; +} + +APInt APInt::udiv(uint64_t RHS) const { + assert(RHS != 0 && "Divide by zero?"); + + // First, deal with the easy case + if (isSingleWord()) + return APInt(BitWidth, U.VAL / RHS); + + // Get some facts about the LHS words. + unsigned lhsWords = getNumWords(getActiveBits()); + + // Deal with some degenerate cases + if (!lhsWords) + // 0 / X ===> 0 + return APInt(BitWidth, 0); + if (RHS == 1) + // X / 1 ===> X + return *this; + if (this->ult(RHS)) + // X / Y ===> 0, iff X < Y + return APInt(BitWidth, 0); + if (*this == RHS) + // X / X ===> 1 + return APInt(BitWidth, 1); + if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. + // All high words are zero, just use native divide + return APInt(BitWidth, this->U.pVal[0] / RHS); + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Quotient(BitWidth, 0); // to hold result. + divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); return Quotient; } @@ -1604,7 +1598,18 @@ APInt APInt::sdiv(const APInt &RHS) const { return this->udiv(RHS); } -APInt APInt::urem(const APInt& RHS) const { +APInt APInt::sdiv(int64_t RHS) const { + if (isNegative()) { + if (RHS < 0) + return (-(*this)).udiv(-RHS); + return -((-(*this)).udiv(RHS)); + } + if (RHS < 0) + return -(this->udiv(-RHS)); + return this->udiv(RHS); +} + +APInt APInt::urem(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { assert(RHS.U.VAL != 0 && "Remainder by zero?"); @@ -1637,8 +1642,40 @@ APInt APInt::urem(const APInt& RHS) const { return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); // We have to compute it the hard way. Invoke the Knuth divide algorithm. - APInt Remainder; - divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder); + APInt Remainder(BitWidth, 0); + divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); + return Remainder; +} + +uint64_t APInt::urem(uint64_t RHS) const { + assert(RHS != 0 && "Remainder by zero?"); + + if (isSingleWord()) + return U.VAL % RHS; + + // Get some facts about the LHS + unsigned lhsWords = getNumWords(getActiveBits()); + + // Check the degenerate cases + if (lhsWords == 0) + // 0 % Y ===> 0 + return 0; + if (RHS == 1) + // X % 1 ===> 0 + return 0; + if (this->ult(RHS)) + // X % Y ===> X, iff X < Y + return getZExtValue(); + if (*this == RHS) + // X % X == 0; + return 0; + if (lhsWords == 1) + // All high words are zero, just use native remainder + return U.pVal[0] % RHS; + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + uint64_t Remainder; + divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); return Remainder; } @@ -1653,6 +1690,17 @@ APInt APInt::srem(const APInt &RHS) const { return this->urem(RHS); } +int64_t APInt::srem(int64_t RHS) const { + if (isNegative()) { + if (RHS < 0) + return -((-(*this)).urem(-RHS)); + return -((-(*this)).urem(RHS)); + } + if (RHS < 0) + return this->urem(-RHS); + return this->urem(RHS); +} + void APInt::udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder) { assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); @@ -1698,20 +1746,90 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, return; } + // Make sure there is enough space to hold the results. + // NOTE: This assumes that reallocate won't affect any bits if it doesn't + // change the size. This is necessary if Quotient or Remainder is aliased + // with LHS or RHS. + Quotient.reallocate(BitWidth); + Remainder.reallocate(BitWidth); + if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. // There is only one word to consider so use the native versions. uint64_t lhsValue = LHS.U.pVal[0]; uint64_t rhsValue = RHS.U.pVal[0]; - // Make sure there is enough space to hold the results. - Quotient.reallocate(BitWidth); - Remainder.reallocate(BitWidth); Quotient = lhsValue / rhsValue; Remainder = lhsValue % rhsValue; return; } // Okay, lets do it the long way - divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); + divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, + Remainder.U.pVal); + // Clear the rest of the Quotient and Remainder. + std::memset(Quotient.U.pVal + lhsWords, 0, + (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); + std::memset(Remainder.U.pVal + rhsWords, 0, + (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); +} + +void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, + uint64_t &Remainder) { + assert(RHS != 0 && "Divide by zero?"); + unsigned BitWidth = LHS.BitWidth; + + // First, deal with the easy case + if (LHS.isSingleWord()) { + uint64_t QuotVal = LHS.U.VAL / RHS; + Remainder = LHS.U.VAL % RHS; + Quotient = APInt(BitWidth, QuotVal); + return; + } + + // Get some size facts about the dividend and divisor + unsigned lhsWords = getNumWords(LHS.getActiveBits()); + + // Check the degenerate cases + if (lhsWords == 0) { + Quotient = 0; // 0 / Y ===> 0 + Remainder = 0; // 0 % Y ===> 0 + return; + } + + if (RHS == 1) { + Quotient = LHS; // X / 1 ===> X + Remainder = 0; // X % 1 ===> 0 + } + + if (LHS.ult(RHS)) { + Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y + Quotient = 0; // X / Y ===> 0, iff X < Y + return; + } + + if (LHS == RHS) { + Quotient = 1; // X / X ===> 1 + Remainder = 0; // X % X ===> 0; + return; + } + + // Make sure there is enough space to hold the results. + // NOTE: This assumes that reallocate won't affect any bits if it doesn't + // change the size. This is necessary if Quotient is aliased with LHS. + Quotient.reallocate(BitWidth); + + if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. + // There is only one word to consider so use the native versions. + uint64_t lhsValue = LHS.U.pVal[0]; + Quotient = lhsValue / RHS; + Remainder = lhsValue % RHS; + return; + } + + // Okay, lets do it the long way + divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); + // Clear the rest of the Quotient. + std::memset(Quotient.U.pVal + lhsWords, 0, + (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); } void APInt::sdivrem(const APInt &LHS, const APInt &RHS, @@ -1732,6 +1850,26 @@ void APInt::sdivrem(const APInt &LHS, const APInt &RHS, } } +void APInt::sdivrem(const APInt &LHS, int64_t RHS, + APInt &Quotient, int64_t &Remainder) { + uint64_t R = Remainder; + if (LHS.isNegative()) { + if (RHS < 0) + APInt::udivrem(-LHS, -RHS, Quotient, R); + else { + APInt::udivrem(-LHS, RHS, Quotient, R); + Quotient.negate(); + } + R = -R; + } else if (RHS < 0) { + APInt::udivrem(LHS, -RHS, Quotient, R); + Quotient.negate(); + } else { + APInt::udivrem(LHS, RHS, Quotient, R); + } + Remainder = R; +} + APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { APInt Res = *this+RHS; Overflow = isNonNegative() == RHS.isNonNegative() && @@ -1902,7 +2040,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, if (isSingleWord()) { char Buffer[65]; - char *BufPtr = Buffer+65; + char *BufPtr = std::end(Buffer); uint64_t N; if (!Signed) { @@ -1926,7 +2064,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, *--BufPtr = Digits[N % Radix]; N /= Radix; } - Str.append(BufPtr, Buffer+65); + Str.append(BufPtr, std::end(Buffer)); return; } @@ -1962,11 +2100,9 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, Tmp.lshrInPlace(ShiftAmt); } } else { - APInt divisor(Tmp.getBitWidth(), Radix); - APInt APdigit; while (Tmp.getBoolValue()) { - udivrem(Tmp, divisor, Tmp, APdigit); - unsigned Digit = (unsigned)APdigit.getZExtValue(); + uint64_t Digit; + udivrem(Tmp, Radix, Tmp, Digit); assert(Digit < Radix && "divide failed"); Str.push_back(Digits[Digit]); } diff --git a/interpreter/llvm/src/lib/Support/ARMAttributeParser.cpp b/interpreter/llvm/src/lib/Support/ARMAttributeParser.cpp index 63e800a5b78b0..a9a0c1d1a4d3d 100644 --- a/interpreter/llvm/src/lib/Support/ARMAttributeParser.cpp +++ b/interpreter/llvm/src/lib/Support/ARMAttributeParser.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/ARMAttributeParser.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/ScopedPrinter.h" diff --git a/interpreter/llvm/src/lib/Support/ARMBuildAttrs.cpp b/interpreter/llvm/src/lib/Support/ARMBuildAttrs.cpp index 134ef8b587b72..8f18e9eb24eda 100644 --- a/interpreter/llvm/src/lib/Support/ARMBuildAttrs.cpp +++ b/interpreter/llvm/src/lib/Support/ARMBuildAttrs.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ARMBuildAttributes.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Support/Atomic.cpp b/interpreter/llvm/src/lib/Support/Atomic.cpp index 80550e2b46a7c..55910c489faf5 100644 --- a/interpreter/llvm/src/lib/Support/Atomic.cpp +++ b/interpreter/llvm/src/lib/Support/Atomic.cpp @@ -18,6 +18,8 @@ using namespace llvm; #if defined(_MSC_VER) #include + +// We must include windows.h after Intrin.h. #include #undef MemoryFence #endif diff --git a/interpreter/llvm/src/lib/Support/BinaryStreamReader.cpp b/interpreter/llvm/src/lib/Support/BinaryStreamReader.cpp index 702d98770e052..e00527f2519e1 100644 --- a/interpreter/llvm/src/lib/Support/BinaryStreamReader.cpp +++ b/interpreter/llvm/src/lib/Support/BinaryStreamReader.cpp @@ -13,9 +13,18 @@ #include "llvm/Support/BinaryStreamRef.h" using namespace llvm; +using endianness = llvm::support::endianness; -BinaryStreamReader::BinaryStreamReader(BinaryStreamRef S) - : Stream(S), Offset(0) {} +BinaryStreamReader::BinaryStreamReader(BinaryStreamRef Ref) : Stream(Ref) {} + +BinaryStreamReader::BinaryStreamReader(BinaryStream &Stream) : Stream(Stream) {} + +BinaryStreamReader::BinaryStreamReader(ArrayRef Data, + endianness Endian) + : Stream(Data, Endian) {} + +BinaryStreamReader::BinaryStreamReader(StringRef Data, endianness Endian) + : Stream(Data, Endian) {} Error BinaryStreamReader::readLongestContiguousChunk( ArrayRef &Buffer) { @@ -33,28 +42,49 @@ Error BinaryStreamReader::readBytes(ArrayRef &Buffer, uint32_t Size) { } Error BinaryStreamReader::readCString(StringRef &Dest) { - // TODO: This could be made more efficient by using readLongestContiguousChunk - // and searching for null terminators in the resulting buffer. + uint32_t OriginalOffset = getOffset(); + uint32_t FoundOffset = 0; + while (true) { + uint32_t ThisOffset = getOffset(); + ArrayRef Buffer; + if (auto EC = readLongestContiguousChunk(Buffer)) + return EC; + StringRef S(reinterpret_cast(Buffer.begin()), Buffer.size()); + size_t Pos = S.find_first_of('\0'); + if (LLVM_LIKELY(Pos != StringRef::npos)) { + FoundOffset = Pos + ThisOffset; + break; + } + } + assert(FoundOffset >= OriginalOffset); + + setOffset(OriginalOffset); + size_t Length = FoundOffset - OriginalOffset; + + if (auto EC = readFixedString(Dest, Length)) + return EC; + + // Now set the offset back to after the null terminator. + setOffset(FoundOffset + 1); + return Error::success(); +} +Error BinaryStreamReader::readWideString(ArrayRef &Dest) { uint32_t Length = 0; - // First compute the length of the string by reading 1 byte at a time. uint32_t OriginalOffset = getOffset(); - const char *C; + const UTF16 *C; while (true) { if (auto EC = readObject(C)) return EC; - if (*C == '\0') + if (*C == 0x0000) break; ++Length; } - // Now go back and request a reference for that many bytes. uint32_t NewOffset = getOffset(); setOffset(OriginalOffset); - if (auto EC = readFixedString(Dest, Length)) + if (auto EC = readArray(Dest, Length)) return EC; - - // Now set the offset back to where it was after we calculated the length. setOffset(NewOffset); return Error::success(); } @@ -79,6 +109,12 @@ Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref, uint32_t Length) { return Error::success(); } +Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Stream, + uint32_t Size) { + Stream.Offset = getOffset(); + return readStreamRef(Stream.StreamData, Size); +} + Error BinaryStreamReader::skip(uint32_t Amount) { if (Amount > bytesRemaining()) return make_error(stream_error_code::stream_too_short); @@ -86,6 +122,11 @@ Error BinaryStreamReader::skip(uint32_t Amount) { return Error::success(); } +Error BinaryStreamReader::padToAlignment(uint32_t Align) { + uint32_t NewOffset = alignTo(Offset, Align); + return skip(NewOffset - Offset); +} + uint8_t BinaryStreamReader::peek() const { ArrayRef Buffer; auto EC = Stream.readBytes(Offset, 1, Buffer); diff --git a/interpreter/llvm/src/lib/Support/BinaryStreamRef.cpp b/interpreter/llvm/src/lib/Support/BinaryStreamRef.cpp new file mode 100644 index 0000000000000..fe9a8171e1460 --- /dev/null +++ b/interpreter/llvm/src/lib/Support/BinaryStreamRef.cpp @@ -0,0 +1,137 @@ +//===- BinaryStreamRef.cpp - ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/BinaryByteStream.h" + +using namespace llvm; +using namespace llvm::support; + +namespace { + +class ArrayRefImpl : public BinaryStream { +public: + ArrayRefImpl(ArrayRef Data, endianness Endian) : BBS(Data, Endian) {} + + llvm::support::endianness getEndian() const override { + return BBS.getEndian(); + } + Error readBytes(uint32_t Offset, uint32_t Size, + ArrayRef &Buffer) override { + return BBS.readBytes(Offset, Size, Buffer); + } + Error readLongestContiguousChunk(uint32_t Offset, + ArrayRef &Buffer) override { + return BBS.readLongestContiguousChunk(Offset, Buffer); + } + uint32_t getLength() override { return BBS.getLength(); } + +private: + BinaryByteStream BBS; +}; + +class MutableArrayRefImpl : public WritableBinaryStream { +public: + MutableArrayRefImpl(MutableArrayRef Data, endianness Endian) + : BBS(Data, Endian) {} + + // Inherited via WritableBinaryStream + llvm::support::endianness getEndian() const override { + return BBS.getEndian(); + } + Error readBytes(uint32_t Offset, uint32_t Size, + ArrayRef &Buffer) override { + return BBS.readBytes(Offset, Size, Buffer); + } + Error readLongestContiguousChunk(uint32_t Offset, + ArrayRef &Buffer) override { + return BBS.readLongestContiguousChunk(Offset, Buffer); + } + uint32_t getLength() override { return BBS.getLength(); } + + Error writeBytes(uint32_t Offset, ArrayRef Data) override { + return BBS.writeBytes(Offset, Data); + } + Error commit() override { return BBS.commit(); } + +private: + MutableBinaryByteStream BBS; +}; +} + +BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream) + : BinaryStreamRef(Stream, 0, Stream.getLength()) {} +BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, + uint32_t Length) + : BinaryStreamRefBase(Stream, Offset, Length) {} +BinaryStreamRef::BinaryStreamRef(ArrayRef Data, endianness Endian) + : BinaryStreamRefBase(std::make_shared(Data, Endian), 0, + Data.size()) {} +BinaryStreamRef::BinaryStreamRef(StringRef Data, endianness Endian) + : BinaryStreamRef(makeArrayRef(Data.bytes_begin(), Data.bytes_end()), + Endian) {} + +BinaryStreamRef::BinaryStreamRef(const BinaryStreamRef &Other) + : BinaryStreamRefBase(Other) {} + +Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size, + ArrayRef &Buffer) const { + if (auto EC = checkOffset(Offset, Size)) + return EC; + return BorrowedImpl->readBytes(ViewOffset + Offset, Size, Buffer); +} + +Error BinaryStreamRef::readLongestContiguousChunk( + uint32_t Offset, ArrayRef &Buffer) const { + if (auto EC = checkOffset(Offset, 1)) + return EC; + + if (auto EC = + BorrowedImpl->readLongestContiguousChunk(ViewOffset + Offset, Buffer)) + return EC; + // This StreamRef might refer to a smaller window over a larger stream. In + // that case we will have read out more bytes than we should return, because + // we should not read past the end of the current view. + uint32_t MaxLength = Length - Offset; + if (Buffer.size() > MaxLength) + Buffer = Buffer.slice(0, MaxLength); + return Error::success(); +} + +WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream) + : WritableBinaryStreamRef(Stream, 0, Stream.getLength()) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream, + uint32_t Offset, + uint32_t Length) + : BinaryStreamRefBase(Stream, Offset, Length) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef Data, + endianness Endian) + : BinaryStreamRefBase(std::make_shared(Data, Endian), + 0, Data.size()) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef( + const WritableBinaryStreamRef &Other) + : BinaryStreamRefBase(Other) {} + +Error WritableBinaryStreamRef::writeBytes(uint32_t Offset, + ArrayRef Data) const { + if (auto EC = checkOffset(Offset, Data.size())) + return EC; + + return BorrowedImpl->writeBytes(ViewOffset + Offset, Data); +} + +WritableBinaryStreamRef::operator BinaryStreamRef() const { + return BinaryStreamRef(*BorrowedImpl, ViewOffset, Length); +} + +/// \brief For buffered streams, commits changes to the backing store. +Error WritableBinaryStreamRef::commit() { return BorrowedImpl->commit(); } diff --git a/interpreter/llvm/src/lib/Support/BinaryStreamWriter.cpp b/interpreter/llvm/src/lib/Support/BinaryStreamWriter.cpp index d78dbc68f5936..c4276518b1919 100644 --- a/interpreter/llvm/src/lib/Support/BinaryStreamWriter.cpp +++ b/interpreter/llvm/src/lib/Support/BinaryStreamWriter.cpp @@ -15,8 +15,15 @@ using namespace llvm; -BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef S) - : Stream(S), Offset(0) {} +BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef Ref) + : Stream(Ref) {} + +BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStream &Stream) + : Stream(Stream) {} + +BinaryStreamWriter::BinaryStreamWriter(MutableArrayRef Data, + llvm::support::endianness Endian) + : Stream(Data, Endian) {} Error BinaryStreamWriter::writeBytes(ArrayRef Buffer) { if (auto EC = Stream.writeBytes(Offset, Buffer)) @@ -76,6 +83,8 @@ Error BinaryStreamWriter::padToAlignment(uint32_t Align) { uint32_t NewOffset = alignTo(Offset, Align); if (NewOffset > getLength()) return make_error(stream_error_code::stream_too_short); - Offset = NewOffset; + while (Offset < NewOffset) + if (auto EC = writeInteger('\0')) + return EC; return Error::success(); } diff --git a/interpreter/llvm/src/lib/Support/CMakeLists.txt b/interpreter/llvm/src/lib/Support/CMakeLists.txt index f77d273ac9bff..0a8e3897cce92 100644 --- a/interpreter/llvm/src/lib/Support/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Support/CMakeLists.txt @@ -30,6 +30,7 @@ elseif( CMAKE_HOST_UNIX ) endif( MSVC OR MINGW ) add_llvm_library(LLVMSupport + AMDGPUCodeObjectMetadata.cpp APFloat.cpp APInt.cpp APSInt.cpp @@ -39,6 +40,7 @@ add_llvm_library(LLVMSupport Allocator.cpp BinaryStreamError.cpp BinaryStreamReader.cpp + BinaryStreamRef.cpp BinaryStreamWriter.cpp BlockFrequency.cpp BranchProbability.cpp @@ -56,7 +58,6 @@ add_llvm_library(LLVMSupport DebugCounter.cpp DeltaAlgorithm.cpp DAGDeltaAlgorithm.cpp - Dwarf.cpp Error.cpp ErrorHandling.cpp FileUtilities.cpp @@ -131,7 +132,6 @@ add_llvm_library(LLVMSupport Process.cpp Program.cpp RWMutex.cpp - SearchForAddressOfSpecialSymbol.cpp Signals.cpp TargetRegistry.cpp ThreadLocal.cpp diff --git a/interpreter/llvm/src/lib/Support/CachePruning.cpp b/interpreter/llvm/src/lib/Support/CachePruning.cpp index aca1236395655..60d0964f27646 100644 --- a/interpreter/llvm/src/lib/Support/CachePruning.cpp +++ b/interpreter/llvm/src/lib/Support/CachePruning.cpp @@ -82,7 +82,7 @@ llvm::parseCachePruningPolicy(StringRef PolicyStr) { if (Value.back() != '%') return make_error("'" + Value + "' must be a percentage", inconvertibleErrorCode()); - StringRef SizeStr = Value.slice(0, Value.size() - 1); + StringRef SizeStr = Value.drop_back(); uint64_t Size; if (SizeStr.getAsInteger(0, Size)) return make_error("'" + SizeStr + "' not an integer", @@ -91,7 +91,28 @@ llvm::parseCachePruningPolicy(StringRef PolicyStr) { return make_error("'" + SizeStr + "' must be between 0 and 100", inconvertibleErrorCode()); - Policy.PercentageOfAvailableSpace = Size; + Policy.MaxSizePercentageOfAvailableSpace = Size; + } else if (Key == "cache_size_bytes") { + uint64_t Mult = 1; + switch (tolower(Value.back())) { + case 'k': + Mult = 1024; + Value = Value.drop_back(); + break; + case 'm': + Mult = 1024 * 1024; + Value = Value.drop_back(); + break; + case 'g': + Mult = 1024 * 1024 * 1024; + Value = Value.drop_back(); + break; + } + uint64_t Size; + if (Value.getAsInteger(0, Size)) + return make_error("'" + Value + "' not an integer", + inconvertibleErrorCode()); + Policy.MaxSizeBytes = Size * Mult; } else { return make_error("Unknown key: '" + Key + "'", inconvertibleErrorCode()); @@ -115,11 +136,12 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { if (!isPathDir) return false; - Policy.PercentageOfAvailableSpace = - std::min(Policy.PercentageOfAvailableSpace, 100u); + Policy.MaxSizePercentageOfAvailableSpace = + std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u); if (Policy.Expiration == seconds(0) && - Policy.PercentageOfAvailableSpace == 0) { + Policy.MaxSizePercentageOfAvailableSpace == 0 && + Policy.MaxSizeBytes == 0) { DEBUG(dbgs() << "No pruning settings set, exit early\n"); // Nothing will be pruned, early exit return false; @@ -157,7 +179,8 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { writeTimestampFile(TimestampFile); } - bool ShouldComputeSize = (Policy.PercentageOfAvailableSpace > 0); + bool ShouldComputeSize = + (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0); // Keep track of space std::set> FileSizes; @@ -216,14 +239,22 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { } sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get(); auto AvailableSpace = TotalSize + SpaceInfo.free; - auto FileAndSize = FileSizes.rbegin(); + + if (Policy.MaxSizePercentageOfAvailableSpace == 0) + Policy.MaxSizePercentageOfAvailableSpace = 100; + if (Policy.MaxSizeBytes == 0) + Policy.MaxSizeBytes = AvailableSpace; + auto TotalSizeTarget = std::min( + AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull, + Policy.MaxSizeBytes); + DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace) - << "% target is: " << Policy.PercentageOfAvailableSpace - << "\n"); + << "% target is: " << Policy.MaxSizePercentageOfAvailableSpace + << "%, " << Policy.MaxSizeBytes << " bytes\n"); + + auto FileAndSize = FileSizes.rbegin(); // Remove the oldest accessed files first, till we get below the threshold - while (((100 * TotalSize) / AvailableSpace) > - Policy.PercentageOfAvailableSpace && - FileAndSize != FileSizes.rend()) { + while (TotalSize > TotalSizeTarget && FileAndSize != FileSizes.rend()) { // Remove the file. sys::fs::remove(FileAndSize->second); // Update size diff --git a/interpreter/llvm/src/lib/Support/CommandLine.cpp b/interpreter/llvm/src/lib/Support/CommandLine.cpp index 34345901eab1f..8eeb685a18a9a 100644 --- a/interpreter/llvm/src/lib/Support/CommandLine.cpp +++ b/interpreter/llvm/src/lib/Support/CommandLine.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" @@ -1235,7 +1236,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc, << ": Not enough positional command line arguments specified!\n" << "Must specify at least " << NumPositionalRequired << " positional argument" << (NumPositionalRequired > 1 ? "s" : "") - << ": See: " << argv[0] << " - help\n"; + << ": See: " << argv[0] << " -help\n"; ErrorParsing = true; } else if (!HasUnlimitedPositionals && @@ -1522,13 +1523,9 @@ bool parser::parse(Option &O, StringRef ArgName, // parser/parser implementation // static bool parseDouble(Option &O, StringRef Arg, double &Value) { - SmallString<32> TmpStr(Arg.begin(), Arg.end()); - const char *ArgStart = TmpStr.c_str(); - char *End; - Value = strtod(ArgStart, &End); - if (*End != 0) - return O.error("'" + Arg + "' value invalid for floating point argument!"); - return false; + if (to_float(Arg, Value)) + return false; + return O.error("'" + Arg + "' value invalid for floating point argument!"); } bool parser::parse(Option &O, StringRef ArgName, StringRef Arg, diff --git a/interpreter/llvm/src/lib/Support/ConvertUTF.cpp b/interpreter/llvm/src/lib/Support/ConvertUTF.cpp index 39fd218d3f071..e56854a3ae428 100644 --- a/interpreter/llvm/src/lib/Support/ConvertUTF.cpp +++ b/interpreter/llvm/src/lib/Support/ConvertUTF.cpp @@ -46,13 +46,40 @@ ------------------------------------------------------------------------ */ - #include "llvm/Support/ConvertUTF.h" #ifdef CVTUTF_DEBUG #include #endif #include +/* + * This code extensively uses fall-through switches. + * Keep the compiler from warning about that. + */ +#if defined(__clang__) && defined(__has_warning) +# if __has_warning("-Wimplicit-fallthrough") +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("clang diagnostic pop") +# endif +#elif defined(__GNUC__) && __GNUC__ > 6 +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#endif +#ifndef ConvertUTF_DISABLE_WARNINGS +# define ConvertUTF_DISABLE_WARNINGS +#endif +#ifndef ConvertUTF_RESTORE_WARNINGS +# define ConvertUTF_RESTORE_WARNINGS +#endif + +ConvertUTF_DISABLE_WARNINGS + namespace llvm { static const int halfShift = 10; /* used for shifting by 10 bits */ @@ -708,3 +735,5 @@ ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, --------------------------------------------------------------------- */ } // namespace llvm + +ConvertUTF_RESTORE_WARNINGS diff --git a/interpreter/llvm/src/lib/Support/ConvertUTFWrapper.cpp b/interpreter/llvm/src/lib/Support/ConvertUTFWrapper.cpp index 217cedb24df69..6cb4f63762500 100644 --- a/interpreter/llvm/src/lib/Support/ConvertUTFWrapper.cpp +++ b/interpreter/llvm/src/lib/Support/ConvertUTFWrapper.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/ConvertUTF.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SwapByteOrder.h" #include diff --git a/interpreter/llvm/src/lib/Support/CrashRecoveryContext.cpp b/interpreter/llvm/src/lib/Support/CrashRecoveryContext.cpp index 98865f5e065e7..bd38dd88201fd 100644 --- a/interpreter/llvm/src/lib/Support/CrashRecoveryContext.cpp +++ b/interpreter/llvm/src/lib/Support/CrashRecoveryContext.cpp @@ -78,6 +78,9 @@ static bool gCrashRecoveryEnabled = false; static ManagedStatic> tlIsRecoveringFromCrash; +static void installExceptionOrSignalHandlers(); +static void uninstallExceptionOrSignalHandlers(); + CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} CrashRecoveryContext::~CrashRecoveryContext() { @@ -113,6 +116,23 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { return CRCI->CRC; } +void CrashRecoveryContext::Enable() { + sys::ScopedLock L(*gCrashRecoveryContextMutex); + // FIXME: Shouldn't this be a refcount or something? + if (gCrashRecoveryEnabled) + return; + gCrashRecoveryEnabled = true; + installExceptionOrSignalHandlers(); +} + +void CrashRecoveryContext::Disable() { + sys::ScopedLock L(*gCrashRecoveryContextMutex); + if (!gCrashRecoveryEnabled) + return; + gCrashRecoveryEnabled = false; + uninstallExceptionOrSignalHandlers(); +} + void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup) { if (!cleanup) @@ -140,30 +160,70 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { delete cleanup; } -#ifdef LLVM_ON_WIN32 +#if defined(_MSC_VER) +// If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way +// better than VEH. Vectored exception handling catches all exceptions happening +// on the thread with installed exception handlers, so it can interfere with +// internal exception handling of other libraries on that thread. SEH works +// exactly as you would expect normal exception handling to work: it only +// catches exceptions if they would bubble out from the stack frame with __try / +// __except. -#include "Windows/WindowsSupport.h" +static void installExceptionOrSignalHandlers() {} +static void uninstallExceptionOrSignalHandlers() {} -// On Windows, we can make use of vectored exception handling to -// catch most crashing situations. Note that this does mean -// we will be alerted of exceptions *before* structured exception -// handling has the opportunity to catch it. But that isn't likely -// to cause problems because nowhere in the project is SEH being -// used. +bool CrashRecoveryContext::RunSafely(function_ref Fn) { + if (!gCrashRecoveryEnabled) { + Fn(); + return true; + } + + bool Result = true; + __try { + Fn(); + } __except (1) { // Catch any exception. + Result = false; + } + return Result; +} + +#else // !_MSC_VER + +#if defined(LLVM_ON_WIN32) +// This is a non-MSVC compiler, probably mingw gcc or clang without +// -fms-extensions. Use vectored exception handling (VEH). +// +// On Windows, we can make use of vectored exception handling to catch most +// crashing situations. Note that this does mean we will be alerted of +// exceptions *before* structured exception handling has the opportunity to +// catch it. Unfortunately, this causes problems in practice with other code +// running on threads with LLVM crash recovery contexts, so we would like to +// eventually move away from VEH. // -// Vectored exception handling is built on top of SEH, and so it -// works on a per-thread basis. +// Vectored works on a per-thread basis, which is an advantage over +// SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have +// any native support for chaining exception handlers, but VEH allows more than +// one. // // The vectored exception handler functionality was added in Windows // XP, so if support for older versions of Windows is required, // it will have to be added. -// -// If we want to support as far back as Win2k, we could use the -// SetUnhandledExceptionFilter API, but there's a risk of that -// being entirely overwritten (it's not a chain). + +#include "Windows/WindowsSupport.h" static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { + // DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported + // compilers and platforms, so we define it manually. + constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL; + switch (ExceptionInfo->ExceptionRecord->ExceptionCode) + { + case DBG_PRINTEXCEPTION_C: + case DbgPrintExceptionWideC: + case 0x406D1388: // set debugger thread name + return EXCEPTION_CONTINUE_EXECUTION; + } + // Lookup the current thread local recovery object. const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); @@ -192,14 +252,7 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) // non-NULL, valid VEH handles, or NULL. static sys::ThreadLocal sCurrentExceptionHandle; -void CrashRecoveryContext::Enable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = true; - +static void installExceptionOrSignalHandlers() { // We can set up vectored exception handling now. We will install our // handler as the front of the list, though there's no assurances that // it will remain at the front (another call could install itself before @@ -208,14 +261,7 @@ void CrashRecoveryContext::Enable() { sCurrentExceptionHandle.set(handle); } -void CrashRecoveryContext::Disable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (!gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = false; - +static void uninstallExceptionOrSignalHandlers() { PVOID currentHandle = const_cast(sCurrentExceptionHandle.get()); if (currentHandle) { // Now we can remove the vectored exception handler from the chain @@ -226,7 +272,7 @@ void CrashRecoveryContext::Disable() { } } -#else +#else // !LLVM_ON_WIN32 // Generic POSIX implementation. // @@ -278,14 +324,7 @@ static void CrashRecoverySignalHandler(int Signal) { const_cast(CRCI)->HandleCrash(); } -void CrashRecoveryContext::Enable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = true; - +static void installExceptionOrSignalHandlers() { // Setup the signal handler. struct sigaction Handler; Handler.sa_handler = CrashRecoverySignalHandler; @@ -297,20 +336,13 @@ void CrashRecoveryContext::Enable() { } } -void CrashRecoveryContext::Disable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (!gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = false; - +static void uninstallExceptionOrSignalHandlers() { // Restore the previous signal handlers. for (unsigned i = 0; i != NumSignals; ++i) sigaction(Signals[i], &PrevActions[i], nullptr); } -#endif +#endif // !LLVM_ON_WIN32 bool CrashRecoveryContext::RunSafely(function_ref Fn) { // If crash recovery is disabled, do nothing. @@ -328,6 +360,8 @@ bool CrashRecoveryContext::RunSafely(function_ref Fn) { return true; } +#endif // !_MSC_VER + void CrashRecoveryContext::HandleCrash() { CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; assert(CRCI && "Crash recovery context never initialized!"); diff --git a/interpreter/llvm/src/lib/Support/DataExtractor.cpp b/interpreter/llvm/src/lib/Support/DataExtractor.cpp index 53c10bcc562e3..0199b300ba72d 100644 --- a/interpreter/llvm/src/lib/Support/DataExtractor.cpp +++ b/interpreter/llvm/src/lib/Support/DataExtractor.cpp @@ -68,6 +68,13 @@ uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, Data.data()); } +uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const { + uint24_t ExtractedVal = + getU(offset_ptr, this, IsLittleEndian, Data.data()); + // The 3 bytes are in the correct byte order for the host. + return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); +} + uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { return getU(offset_ptr, this, IsLittleEndian, Data.data()); } diff --git a/interpreter/llvm/src/lib/Support/DebugCounter.cpp b/interpreter/llvm/src/lib/Support/DebugCounter.cpp index 29dae8a20f00f..1d46de04ee6af 100644 --- a/interpreter/llvm/src/lib/Support/DebugCounter.cpp +++ b/interpreter/llvm/src/lib/Support/DebugCounter.cpp @@ -6,6 +6,7 @@ using namespace llvm; +namespace { // This class overrides the default list implementation of printing so we // can pretty print the list of debug counter options. This type of // dynamic option is pretty rare (basically this and pass lists). @@ -40,6 +41,7 @@ class DebugCounterList : public cl::list { } } }; +} // namespace // Create our command line option. static DebugCounterList DebugCounterOption( @@ -100,9 +102,13 @@ void DebugCounter::push_back(const std::string &Val) { } } -void DebugCounter::print(raw_ostream &OS) { +void DebugCounter::print(raw_ostream &OS) const { OS << "Counters and values:\n"; for (const auto &KV : Counters) OS << left_justify(RegisteredCounters[KV.first], 32) << ": {" << KV.second.first << "," << KV.second.second << "}\n"; } + +LLVM_DUMP_METHOD void DebugCounter::dump() const { + print(dbgs()); +} diff --git a/interpreter/llvm/src/lib/Support/DynamicLibrary.cpp b/interpreter/llvm/src/lib/Support/DynamicLibrary.cpp index 22fb3f2cb9c93..d8422115eae81 100644 --- a/interpreter/llvm/src/lib/Support/DynamicLibrary.cpp +++ b/interpreter/llvm/src/lib/Support/DynamicLibrary.cpp @@ -14,175 +14,196 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm-c/Support.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Config/config.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include #include +#include -// Collection of symbol name/value pairs to be searched prior to any libraries. -static llvm::ManagedStatic > ExplicitSymbols; -static llvm::ManagedStatic > SymbolsMutex; - -void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, - void *symbolValue) { - SmartScopedLock lock(*SymbolsMutex); - (*ExplicitSymbols)[symbolName] = symbolValue; -} - -char llvm::sys::DynamicLibrary::Invalid = 0; - -#ifdef LLVM_ON_WIN32 - -#include "Windows/DynamicLibrary.inc" - -#else - -#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) -#include using namespace llvm; using namespace llvm::sys; -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// +// All methods for HandleSet should be used holding SymbolsMutex. +class DynamicLibrary::HandleSet { + typedef std::vector HandleList; + HandleList Handles; + void *Process; -static llvm::ManagedStatic > OpenedHandles; +public: + static void *DLOpen(const char *Filename, std::string *Err); + static void DLClose(void *Handle); + static void *DLSym(void *Handle, const char *Symbol); -DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); + HandleSet() : Process(nullptr) {} + ~HandleSet(); - void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL); - if (!handle) { - if (errMsg) *errMsg = dlerror(); - return DynamicLibrary(); + HandleList::iterator Find(void *Handle) { + return std::find(Handles.begin(), Handles.end(), Handle); } -#ifdef __CYGWIN__ - // Cygwin searches symbols only in the main - // with the handle of dlopen(NULL, RTLD_GLOBAL). - if (!filename) - handle = RTLD_DEFAULT; -#endif + bool Contains(void *Handle) { + return Handle == Process || Find(Handle) != Handles.end(); + } - // If we've already loaded this library, dlclose() the handle in order to - // keep the internal refcount at +1. - if (!OpenedHandles->insert(handle).second) - dlclose(handle); + bool AddLibrary(void *Handle, bool IsProcess = false, bool CanClose = true) { +#ifdef LLVM_ON_WIN32 + assert((Handle == this ? IsProcess : !IsProcess) && "Bad Handle."); +#endif - return DynamicLibrary(handle); -} + if (LLVM_LIKELY(!IsProcess)) { + if (Find(Handle) != Handles.end()) { + if (CanClose) + DLClose(Handle); + return false; + } + Handles.push_back(Handle); + } else { +#ifndef LLVM_ON_WIN32 + if (Process) { + if (CanClose) + DLClose(Process); + if (Process == Handle) + return false; + } +#endif + Process = Handle; + } + return true; + } -DynamicLibrary DynamicLibrary::addPermanentLibrary(void *handle, - std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); - // If we've already loaded this library, tell the caller. - if (!OpenedHandles->insert(handle).second) { - if (errMsg) *errMsg = "Library already loaded"; - return DynamicLibrary(); + void *LibLookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + if (Order & SO_LoadOrder) { + for (void *Handle : Handles) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } else { + for (void *Handle : llvm::reverse(Handles)) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } + return nullptr; } - return DynamicLibrary(handle); -} + void *Lookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + assert(!((Order & SO_LoadedFirst) && (Order & SO_LoadedLast)) && + "Invalid Ordering"); -void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - if (!isValid()) + if (!Process || (Order & SO_LoadedFirst)) { + if (void *Ptr = LibLookup(Symbol, Order)) + return Ptr; + } + if (Process) { + // Use OS facilities to search the current binary and all loaded libs. + if (void *Ptr = DLSym(Process, Symbol)) + return Ptr; + + // Search any libs that might have been skipped because of RTLD_LOCAL. + if (Order & SO_LoadedLast) { + if (void *Ptr = LibLookup(Symbol, Order)) + return Ptr; + } + } return nullptr; - return dlsym(Data, symbolName); + } +}; + +namespace { +// Collection of symbol name/value pairs to be searched prior to any libraries. +static llvm::ManagedStatic> ExplicitSymbols; +// Collection of known library handles. +static llvm::ManagedStatic OpenedHandles; +// Lock for ExplicitSymbols and OpenedHandles. +static llvm::ManagedStatic> SymbolsMutex; } -#else +#ifdef LLVM_ON_WIN32 -using namespace llvm; -using namespace llvm::sys; +#include "Windows/DynamicLibrary.inc" -DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - if (errMsg) *errMsg = "dlopen() not supported on this platform"; - return DynamicLibrary(); -} +#else -void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - return NULL; -} +#include "Unix/DynamicLibrary.inc" #endif +char DynamicLibrary::Invalid; +DynamicLibrary::SearchOrdering DynamicLibrary::SearchOrder = + DynamicLibrary::SO_Linker; + namespace llvm { -void *SearchForAddressOfSpecialSymbol(const char* symbolName); +void *SearchForAddressOfSpecialSymbol(const char *SymbolName) { + return DoSearch(SymbolName); // DynamicLibrary.inc +} } -void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { +void DynamicLibrary::AddSymbol(StringRef SymbolName, void *SymbolValue) { SmartScopedLock Lock(*SymbolsMutex); + (*ExplicitSymbols)[SymbolName] = SymbolValue; +} - // First check symbols added via AddSymbol(). - if (ExplicitSymbols.isConstructed()) { - StringMap::iterator i = ExplicitSymbols->find(symbolName); +DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName, + std::string *Err) { + // Force OpenedHandles to be added into the ManagedStatic list before any + // ManagedStatic can be added from static constructors in HandleSet::DLOpen. + HandleSet& HS = *OpenedHandles; - if (i != ExplicitSymbols->end()) - return i->second; + void *Handle = HandleSet::DLOpen(FileName, Err); + if (Handle != &Invalid) { + SmartScopedLock Lock(*SymbolsMutex); + HS.AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); } -#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) - // Now search the libraries. - if (OpenedHandles.isConstructed()) { - for (DenseSet::iterator I = OpenedHandles->begin(), - E = OpenedHandles->end(); I != E; ++I) { - //lt_ptr ptr = lt_dlsym(*I, symbolName); - void *ptr = dlsym(*I, symbolName); - if (ptr) { - return ptr; - } - } - } -#endif + return DynamicLibrary(Handle); +} + +DynamicLibrary DynamicLibrary::addPermanentLibrary(void *Handle, + std::string *Err) { + SmartScopedLock Lock(*SymbolsMutex); + // If we've already loaded this library, tell the caller. + if (!OpenedHandles->AddLibrary(Handle, /*IsProcess*/false, /*CanClose*/false)) + *Err = "Library already loaded"; - if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName)) - return Result; + return DynamicLibrary(Handle); +} -// This macro returns the address of a well-known, explicit symbol -#define EXPLICIT_SYMBOL(SYM) \ - if (!strcmp(symbolName, #SYM)) return &SYM +void *DynamicLibrary::getAddressOfSymbol(const char *SymbolName) { + if (!isValid()) + return nullptr; + return HandleSet::DLSym(Data, SymbolName); +} -// On linux we have a weird situation. The stderr/out/in symbols are both -// macros and global variables because of standards requirements. So, we -// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. -#if defined(__linux__) and !defined(__ANDROID__) +void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) { { - EXPLICIT_SYMBOL(stderr); - EXPLICIT_SYMBOL(stdout); - EXPLICIT_SYMBOL(stdin); - } -#else - // For everything else, we want to check to make sure the symbol isn't defined - // as a macro before using EXPLICIT_SYMBOL. - { -#ifndef stdin - EXPLICIT_SYMBOL(stdin); -#endif -#ifndef stdout - EXPLICIT_SYMBOL(stdout); -#endif -#ifndef stderr - EXPLICIT_SYMBOL(stderr); -#endif + SmartScopedLock Lock(*SymbolsMutex); + + // First check symbols added via AddSymbol(). + if (ExplicitSymbols.isConstructed()) { + StringMap::iterator i = ExplicitSymbols->find(SymbolName); + + if (i != ExplicitSymbols->end()) + return i->second; + } + + // Now search the libraries. + if (OpenedHandles.isConstructed()) { + if (void *Ptr = OpenedHandles->Lookup(SymbolName, SearchOrder)) + return Ptr; + } } -#endif -#undef EXPLICIT_SYMBOL - return nullptr; + return llvm::SearchForAddressOfSpecialSymbol(SymbolName); } -#endif // LLVM_ON_WIN32 - //===----------------------------------------------------------------------===// // C API. //===----------------------------------------------------------------------===// -LLVMBool LLVMLoadLibraryPermanently(const char* Filename) { +LLVMBool LLVMLoadLibraryPermanently(const char *Filename) { return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename); } diff --git a/interpreter/llvm/src/lib/Support/Errno.cpp b/interpreter/llvm/src/lib/Support/Errno.cpp index 3ba2a1277d05f..10be9b391b490 100644 --- a/interpreter/llvm/src/lib/Support/Errno.cpp +++ b/interpreter/llvm/src/lib/Support/Errno.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Errno.h" -#include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/Support/Error.cpp b/interpreter/llvm/src/lib/Support/Error.cpp index 4730c0b26ba06..bb02c03ff2b6b 100644 --- a/interpreter/llvm/src/lib/Support/Error.cpp +++ b/interpreter/llvm/src/lib/Support/Error.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/ManagedStatic.h" #include - using namespace llvm; namespace { diff --git a/interpreter/llvm/src/lib/Support/ErrorHandling.cpp b/interpreter/llvm/src/lib/Support/ErrorHandling.cpp index a7d3a18003eee..fb8ae4c1cd5ef 100644 --- a/interpreter/llvm/src/lib/Support/ErrorHandling.cpp +++ b/interpreter/llvm/src/lib/Support/ErrorHandling.cpp @@ -20,15 +20,14 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include "llvm/Support/WindowsError.h" #include "llvm/Support/raw_ostream.h" #include #include +#include +#include #if defined(HAVE_UNISTD_H) # include @@ -43,18 +42,39 @@ using namespace llvm; static fatal_error_handler_t ErrorHandler = nullptr; static void *ErrorHandlerUserData = nullptr; -static ManagedStatic ErrorHandlerMutex; +static fatal_error_handler_t BadAllocErrorHandler = nullptr; +static void *BadAllocErrorHandlerUserData = nullptr; + +#if LLVM_ENABLE_THREADS == 1 +// Mutexes to synchronize installing error handlers and calling error handlers. +// Do not use ManagedStatic, or that may allocate memory while attempting to +// report an OOM. +// +// This usage of std::mutex has to be conditionalized behind ifdefs because +// of this script: +// compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +// That script attempts to statically link the LLVM symbolizer library with the +// STL and hide all of its symbols with 'opt -internalize'. To reduce size, it +// cuts out the threading portions of the hermetic copy of libc++ that it +// builds. We can remove these ifdefs if that script goes away. +static std::mutex ErrorHandlerMutex; +static std::mutex BadAllocErrorHandlerMutex; +#endif void llvm::install_fatal_error_handler(fatal_error_handler_t handler, void *user_data) { - llvm::MutexGuard Lock(*ErrorHandlerMutex); +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard Lock(ErrorHandlerMutex); +#endif assert(!ErrorHandler && "Error handler already registered!\n"); ErrorHandler = handler; ErrorHandlerUserData = user_data; } void llvm::remove_fatal_error_handler() { - llvm::MutexGuard Lock(*ErrorHandlerMutex); +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard Lock(ErrorHandlerMutex); +#endif ErrorHandler = nullptr; ErrorHandlerUserData = nullptr; } @@ -77,7 +97,9 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { { // Only acquire the mutex while reading the handler, so as not to invoke a // user-supplied callback under a lock. - llvm::MutexGuard Lock(*ErrorHandlerMutex); +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard Lock(ErrorHandlerMutex); +#endif handler = ErrorHandler; handlerData = ErrorHandlerUserData; } @@ -104,6 +126,55 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { exit(1); } +void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, + void *user_data) { +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard Lock(BadAllocErrorHandlerMutex); +#endif + assert(!ErrorHandler && "Bad alloc error handler already registered!\n"); + BadAllocErrorHandler = handler; + BadAllocErrorHandlerUserData = user_data; +} + +void llvm::remove_bad_alloc_error_handler() { +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard Lock(BadAllocErrorHandlerMutex); +#endif + BadAllocErrorHandler = nullptr; + BadAllocErrorHandlerUserData = nullptr; +} + +void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { + fatal_error_handler_t Handler = nullptr; + void *HandlerData = nullptr; + { + // Only acquire the mutex while reading the handler, so as not to invoke a + // user-supplied callback under a lock. +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard Lock(BadAllocErrorHandlerMutex); +#endif + Handler = BadAllocErrorHandler; + HandlerData = BadAllocErrorHandlerUserData; + } + + if (Handler) { + Handler(HandlerData, Reason, GenCrashDiag); + llvm_unreachable("bad alloc handler should not return"); + } + +#ifdef LLVM_ENABLE_EXCEPTIONS + // If exceptions are enabled, make OOM in malloc look like OOM in new. + throw std::bad_alloc(); +#else + // Don't call the normal error handler. It may allocate memory. Directly write + // an OOM to stderr and abort. + char OOMMessage[] = "LLVM ERROR: out of memory\n"; + ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage)); + (void)written; + abort(); +#endif +} + void llvm::llvm_unreachable_internal(const char *msg, const char *file, unsigned line) { // This code intentionally doesn't call the ErrorHandler callback, because diff --git a/interpreter/llvm/src/lib/Support/FoldingSet.cpp b/interpreter/llvm/src/lib/Support/FoldingSet.cpp index c9bca7f4c1ab7..4496d06a15f3f 100644 --- a/interpreter/llvm/src/lib/Support/FoldingSet.cpp +++ b/interpreter/llvm/src/lib/Support/FoldingSet.cpp @@ -26,7 +26,7 @@ using namespace llvm; // FoldingSetNodeIDRef Implementation /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, -/// used to lookup the node in the FoldingSetImpl. +/// used to lookup the node in the FoldingSetBase. unsigned FoldingSetNodeIDRef::ComputeHash() const { return static_cast(hash_combine_range(Data, Data+Size)); } @@ -142,7 +142,7 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { } /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to -/// lookup the node in the FoldingSetImpl. +/// lookup the node in the FoldingSetBase. unsigned FoldingSetNodeID::ComputeHash() const { return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); } @@ -180,7 +180,7 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { } //===----------------------------------------------------------------------===// -/// Helper functions for FoldingSetImpl. +/// Helper functions for FoldingSetBase. /// GetNextPtr - In order to save space, each bucket is a /// singly-linked-list. In order to make deletion more efficient, we make @@ -188,12 +188,12 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { /// The problem with this is that the start of the hash buckets are not /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: /// use GetBucketPtr when this happens. -static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { +static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { // The low bit is set if this is the pointer back to the bucket. if (reinterpret_cast(NextInBucketPtr) & 1) return nullptr; - return static_cast(NextInBucketPtr); + return static_cast(NextInBucketPtr); } @@ -221,11 +221,11 @@ static void **AllocateBuckets(unsigned NumBuckets) { } //===----------------------------------------------------------------------===// -// FoldingSetImpl Implementation +// FoldingSetBase Implementation -void FoldingSetImpl::anchor() {} +void FoldingSetBase::anchor() {} -FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { +FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) { assert(5 < Log2InitSize && Log2InitSize < 32 && "Initial hash table size out of range"); NumBuckets = 1 << Log2InitSize; @@ -233,14 +233,14 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { NumNodes = 0; } -FoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg) +FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg) : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { Arg.Buckets = nullptr; Arg.NumBuckets = 0; Arg.NumNodes = 0; } -FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { +FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) { free(Buckets); // This may be null if the set is in a moved-from state. Buckets = RHS.Buckets; NumBuckets = RHS.NumBuckets; @@ -251,11 +251,11 @@ FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { return *this; } -FoldingSetImpl::~FoldingSetImpl() { +FoldingSetBase::~FoldingSetBase() { free(Buckets); } -void FoldingSetImpl::clear() { +void FoldingSetBase::clear() { // Set all but the last bucket to null pointers. memset(Buckets, 0, NumBuckets*sizeof(void*)); @@ -266,7 +266,7 @@ void FoldingSetImpl::clear() { NumNodes = 0; } -void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) { +void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) { assert((NewBucketCount > NumBuckets) && "Can't shrink a folding set with GrowBucketCount"); assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!"); void **OldBuckets = Buckets; @@ -300,11 +300,11 @@ void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) { /// GrowHashTable - Double the size of the hash table and rehash everything. /// -void FoldingSetImpl::GrowHashTable() { +void FoldingSetBase::GrowHashTable() { GrowBucketCount(NumBuckets * 2); } -void FoldingSetImpl::reserve(unsigned EltCount) { +void FoldingSetBase::reserve(unsigned EltCount) { // This will give us somewhere between EltCount / 2 and // EltCount buckets. This puts us in the load factor // range of 1.0 - 2.0. @@ -316,9 +316,9 @@ void FoldingSetImpl::reserve(unsigned EltCount) { /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, /// return it. If not, return the insertion token that will make insertion /// faster. -FoldingSetImpl::Node -*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, - void *&InsertPos) { +FoldingSetBase::Node * +FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { unsigned IDHash = ID.ComputeHash(); void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; @@ -342,7 +342,7 @@ FoldingSetImpl::Node /// InsertNode - Insert the specified node into the folding set, knowing that it /// is not already in the map. InsertPos must be obtained from /// FindNodeOrInsertPos. -void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { +void FoldingSetBase::InsertNode(Node *N, void *InsertPos) { assert(!N->getNextInBucket()); // Do we need to grow the hashtable? if (NumNodes+1 > capacity()) { @@ -371,7 +371,7 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { /// RemoveNode - Remove a node from the folding set, returning true if one was /// removed or false if the node was not in the folding set. -bool FoldingSetImpl::RemoveNode(Node *N) { +bool FoldingSetBase::RemoveNode(Node *N) { // Because each bucket is a circular list, we don't need to compute N's hash // to remove it. void *Ptr = N->getNextInBucket(); @@ -412,7 +412,7 @@ bool FoldingSetImpl::RemoveNode(Node *N) { /// GetOrInsertNode - If there is an existing simple Node exactly /// equal to the specified node, return it. Otherwise, insert 'N' and it /// instead. -FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { +FoldingSetBase::Node *FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N) { FoldingSetNodeID ID; GetNodeProfile(N, ID); void *IP; diff --git a/interpreter/llvm/src/lib/Support/FormattedStream.cpp b/interpreter/llvm/src/lib/Support/FormattedStream.cpp index 2ed71c7e43119..a9f4409f5ddeb 100644 --- a/interpreter/llvm/src/lib/Support/FormattedStream.cpp +++ b/interpreter/llvm/src/lib/Support/FormattedStream.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -32,6 +32,7 @@ static void UpdatePosition(std::pair &Position, const char * switch (*Ptr) { case '\n': Line += 1; + LLVM_FALLTHROUGH; case '\r': Column = 0; break; diff --git a/interpreter/llvm/src/lib/Support/GraphWriter.cpp b/interpreter/llvm/src/lib/Support/GraphWriter.cpp index d0e1d50e8ccbc..e04bd8bb3b9a1 100644 --- a/interpreter/llvm/src/lib/Support/GraphWriter.cpp +++ b/interpreter/llvm/src/lib/Support/GraphWriter.cpp @@ -1,4 +1,4 @@ -//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===// +//===- GraphWriter.cpp - Implements GraphWriter support routines ----------===// // // The LLVM Compiler Infrastructure // @@ -12,10 +12,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/GraphWriter.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + using namespace llvm; static cl::opt ViewBackground("view-background", cl::Hidden, @@ -43,6 +55,7 @@ std::string llvm::DOT::EscapeString(const std::string &Label) { Str.erase(Str.begin()+i); continue; default: break; } + LLVM_FALLTHROUGH; case '{': case '}': case '<': case '>': case '|': case '"': @@ -98,8 +111,10 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector &args, } namespace { + struct GraphSession { std::string LogBuffer; + bool TryFindProgram(StringRef Names, std::string &ProgramPath) { raw_string_ostream Log(LogBuffer); SmallVector parts; @@ -114,7 +129,8 @@ struct GraphSession { return false; } }; -} // namespace + +} // end anonymous namespace static const char *getProgramName(GraphProgram::Name program) { switch (program) { diff --git a/interpreter/llvm/src/lib/Support/Host.cpp b/interpreter/llvm/src/lib/Support/Host.cpp index 6a0b64fb884df..5cf0316d4d718 100644 --- a/interpreter/llvm/src/lib/Support/Host.cpp +++ b/interpreter/llvm/src/lib/Support/Host.cpp @@ -250,6 +250,8 @@ StringRef sys::detail::getHostCPUNameForS390x( Pos += sizeof("machine = ") - 1; unsigned int Id; if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { + if (Id >= 3906 && HaveVectorSupport) + return "z14"; if (Id >= 2964 && HaveVectorSupport) return "z13"; if (Id >= 2827) @@ -281,11 +283,17 @@ enum ProcessorVendors { }; enum ProcessorTypes { - INTEL_ATOM = 1, + INTEL_BONNELL = 1, INTEL_CORE2, INTEL_COREI7, AMDFAM10H, AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + // Entries below this are not in libgcc/compiler-rt. INTEL_i386, INTEL_i486, INTEL_PENTIUM, @@ -295,16 +303,13 @@ enum ProcessorTypes { INTEL_PENTIUM_IV, INTEL_PENTIUM_M, INTEL_CORE_DUO, - INTEL_XEONPHI, INTEL_X86_64, INTEL_NOCONA, INTEL_PRESCOTT, AMD_i486, AMDPENTIUM, AMDATHLON, - AMDFAM14H, - AMDFAM16H, - AMDFAM17H, + INTEL_GOLDMONT, CPU_TYPE_MAX }; @@ -317,33 +322,26 @@ enum ProcessorSubtypes { AMDFAM10H_ISTANBUL, AMDFAM15H_BDVER1, AMDFAM15H_BDVER2, - INTEL_PENTIUM_MMX, - INTEL_CORE2_65, - INTEL_CORE2_45, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, INTEL_COREI7_IVYBRIDGE, INTEL_COREI7_HASWELL, INTEL_COREI7_BROADWELL, INTEL_COREI7_SKYLAKE, INTEL_COREI7_SKYLAKE_AVX512, - INTEL_ATOM_BONNELL, - INTEL_ATOM_SILVERMONT, - INTEL_KNIGHTS_LANDING, + // Entries below this are not in libgcc/compiler-rt. + INTEL_PENTIUM_MMX, + INTEL_CORE2_65, + INTEL_CORE2_45, AMDPENTIUM_K6, AMDPENTIUM_K62, AMDPENTIUM_K63, AMDPENTIUM_GEODE, - AMDATHLON_TBIRD, - AMDATHLON_MP, + AMDATHLON_CLASSIC, AMDATHLON_XP, + AMDATHLON_K8, AMDATHLON_K8SSE3, - AMDATHLON_OPTERON, - AMDATHLON_FX, - AMDATHLON_64, - AMD_BTVER1, - AMD_BTVER2, - AMDFAM15H_BDVER3, - AMDFAM15H_BDVER4, - AMDFAM17H_ZNVER1, CPU_SUBTYPE_MAX }; @@ -359,9 +357,28 @@ enum ProcessorFeatures { FEATURE_SSE4_2, FEATURE_AVX, FEATURE_AVX2, - FEATURE_AVX512, - FEATURE_AVX512SAVE, - FEATURE_MOVBE, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + // Only one bit free left in the first 32 features. + FEATURE_MOVBE = 32, FEATURE_ADX, FEATURE_EM64T }; @@ -405,7 +422,6 @@ static bool isCpuIdSupported() { /// the specified arguments. If we can't run cpuid on the host, return true. static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. @@ -415,14 +431,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); + return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); + return false; #else - assert(0 && "This method is defined only for x86."); + return true; #endif #elif defined(_MSC_VER) // The MSVC intrinsic is portable across x86 and x64. @@ -432,7 +450,6 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; -#endif return false; #else return true; @@ -445,55 +462,40 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) -#if defined(__x86_64__) || defined(_M_X64) #if defined(__GNUC__) || defined(__clang__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); -#elif defined(_MSC_VER) - int registers[4]; - __cpuidex(registers, value, subleaf); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; -#endif -#elif defined(__i386__) || defined(_M_IX86) -#if defined(__GNUC__) || defined(__clang__) + return false; +#elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); -#elif defined(_MSC_VER) - __asm { - mov eax,value - mov ecx,subleaf - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } -#endif + return false; #else - assert(0 && "This method is defined only for x86."); + return true; #endif +#elif defined(_MSC_VER) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; return false; #else return true; #endif } +// Read control register 0 (XCR0). Used to detect features such as AVX. static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) // Check xgetbv; this uses a .byte sequence instead of the instruction @@ -525,9 +527,10 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } static void -getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, - unsigned int Brand_id, unsigned int Features, - unsigned *Type, unsigned *Subtype) { +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, unsigned Features, + unsigned Features2, unsigned *Type, + unsigned *Subtype) { if (Brand_id != 0) return; switch (Family) { @@ -680,12 +683,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Skylake Xeon: case 0x55: *Type = INTEL_COREI7; - // Check that we really have AVX512 - if (Features & (1 << FEATURE_AVX512)) { - *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" - } else { - *Subtype = INTEL_COREI7_SKYLAKE; // "skylake" - } + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" break; case 0x1c: // Most 45 nm Intel Atom processors @@ -693,8 +691,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; + *Type = INTEL_BONNELL; break; // "bonnell" // Atom Silvermont codes from the Intel software optimization guide. @@ -704,22 +701,23 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, case 0x5a: case 0x5d: case 0x4c: // really airmont - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; + *Type = INTEL_SILVERMONT; break; // "silvermont" - + // Goldmont: + case 0x5c: + case 0x5f: + *Type = INTEL_GOLDMONT; + break; // "goldmont" case 0x57: - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; + *Type = INTEL_KNL; // knl break; default: // Unknown family 6 CPU, try to guess. - if (Features & (1 << FEATURE_AVX512)) { - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; + if (Features & (1 << FEATURE_AVX512F)) { + *Type = INTEL_KNL; // knl break; } - if (Features & (1 << FEATURE_ADX)) { + if (Features2 & (1 << (FEATURE_ADX - 32))) { *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_BROADWELL; break; @@ -735,9 +733,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, break; } if (Features & (1 << FEATURE_SSE4_2)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; + if (Features2 & (1 << (FEATURE_MOVBE - 32))) { + *Type = INTEL_SILVERMONT; } else { *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_NEHALEM; @@ -750,16 +747,15 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, break; } if (Features & (1 << FEATURE_SSSE3)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; // "bonnell" + if (Features2 & (1 << (FEATURE_MOVBE - 32))) { + *Type = INTEL_BONNELL; // "bonnell" } else { *Type = INTEL_CORE2; // "core2" *Subtype = INTEL_CORE2_65; } break; } - if (Features & (1 << FEATURE_EM64T)) { + if (Features2 & (1 << (FEATURE_EM64T - 32))) { *Type = INTEL_X86_64; break; // x86-64 } @@ -790,8 +786,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron // processor, and Mobile Intel Celeron processor. All processors // are model 02h and manufactured using the 0.13 micron process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 + : INTEL_PENTIUM_IV); break; case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D @@ -805,13 +801,13 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Extreme Edition, Intel Xeon processor, Intel Xeon processor // MP, Intel Celeron D processor. All processors are model 06h // and manufactured using the 65 nm process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_NOCONA + : INTEL_PRESCOTT); break; default: - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 + : INTEL_PENTIUM_IV); break; } break; @@ -821,10 +817,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, } } -static void getAMDProcessorTypeAndSubtype(unsigned int Family, - unsigned int Model, - unsigned int Features, - unsigned *Type, +static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Features, unsigned *Type, unsigned *Subtype) { // FIXME: this poorly matches the generated SubtargetFeatureKV table. There // appears to be no way to generate the wide variety of AMD-specific targets @@ -854,38 +848,20 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, break; case 6: *Type = AMDATHLON; - switch (Model) { - case 4: - *Subtype = AMDATHLON_TBIRD; - break; // "athlon-tbird" - case 6: - case 7: - case 8: - *Subtype = AMDATHLON_MP; - break; // "athlon-mp" - case 10: + if (Features & (1 << FEATURE_SSE)) { *Subtype = AMDATHLON_XP; break; // "athlon-xp" } - break; + *Subtype = AMDATHLON_CLASSIC; + break; // "athlon" case 15: *Type = AMDATHLON; if (Features & (1 << FEATURE_SSE3)) { *Subtype = AMDATHLON_K8SSE3; break; // "k8-sse3" } - switch (Model) { - case 1: - *Subtype = AMDATHLON_OPTERON; - break; // "opteron" - case 5: - *Subtype = AMDATHLON_FX; - break; // "athlon-fx"; also opteron - default: - *Subtype = AMDATHLON_64; - break; // "athlon64" - } - break; + *Subtype = AMDATHLON_K8; + break; // "k8" case 16: *Type = AMDFAM10H; // "amdfam10" switch (Model) { @@ -901,19 +877,13 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, } break; case 20: - *Type = AMDFAM14H; - *Subtype = AMD_BTVER1; + *Type = AMD_BTVER1; break; // "btver1"; case 21: *Type = AMDFAM15H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1" - } - if (Model >= 0x50 && Model <= 0x6f) { + if (Model >= 0x60 && Model <= 0x7f) { *Subtype = AMDFAM15H_BDVER4; - break; // "bdver4"; 50h-6Fh: Excavator + break; // "bdver4"; 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { *Subtype = AMDFAM15H_BDVER3; @@ -929,39 +899,52 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, } break; case 22: - *Type = AMDFAM16H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1"; - } - *Subtype = AMD_BTVER2; + *Type = AMD_BTVER2; break; // "btver2" case 23: *Type = AMDFAM17H; - if (Features & (1 << FEATURE_ADX)) { - *Subtype = AMDFAM17H_ZNVER1; - break; // "znver1" - } - *Subtype = AMD_BTVER1; + *Subtype = AMDFAM17H_ZNVER1; break; default: break; // "generic" } } -static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, - unsigned MaxLeaf) { +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *FeaturesOut, + unsigned *Features2Out) { unsigned Features = 0; - unsigned int EAX, EBX; - Features |= (((EDX >> 23) & 1) << FEATURE_MMX); - Features |= (((EDX >> 25) & 1) << FEATURE_SSE); - Features |= (((EDX >> 26) & 1) << FEATURE_SSE2); - Features |= (((ECX >> 0) & 1) << FEATURE_SSE3); - Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3); - Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1); - Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2); - Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE); + unsigned Features2 = 0; + unsigned EAX, EBX; + + if ((EDX >> 15) & 1) + Features |= 1 << FEATURE_CMOV; + if ((EDX >> 23) & 1) + Features |= 1 << FEATURE_MMX; + if ((EDX >> 25) & 1) + Features |= 1 << FEATURE_SSE; + if ((EDX >> 26) & 1) + Features |= 1 << FEATURE_SSE2; + + if ((ECX >> 0) & 1) + Features |= 1 << FEATURE_SSE3; + if ((ECX >> 1) & 1) + Features |= 1 << FEATURE_PCLMUL; + if ((ECX >> 9) & 1) + Features |= 1 << FEATURE_SSSE3; + if ((ECX >> 12) & 1) + Features |= 1 << FEATURE_FMA; + if ((ECX >> 19) & 1) + Features |= 1 << FEATURE_SSE4_1; + if ((ECX >> 20) & 1) + Features |= 1 << FEATURE_SSE4_2; + if ((ECX >> 23) & 1) + Features |= 1 << FEATURE_POPCNT; + if ((ECX >> 25) & 1) + Features |= 1 << FEATURE_AES; + + if ((ECX >> 22) & 1) + Features2 |= 1 << (FEATURE_MOVBE - 32); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context @@ -970,20 +953,65 @@ static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + + if (HasAVX) + Features |= 1 << FEATURE_AVX; + bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); - bool HasADX = HasLeaf7 && ((EBX >> 19) & 1); - bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20); - bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1); - Features |= (HasAVX << FEATURE_AVX); - Features |= (HasAVX2 << FEATURE_AVX2); - Features |= (HasAVX512 << FEATURE_AVX512); - Features |= (HasAVX512Save << FEATURE_AVX512SAVE); - Features |= (HasADX << FEATURE_ADX); - - getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T); - return Features; + + if (HasLeaf7 && ((EBX >> 3) & 1)) + Features |= 1 << FEATURE_BMI; + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + Features |= 1 << FEATURE_AVX2; + if (HasLeaf7 && ((EBX >> 9) & 1)) + Features |= 1 << FEATURE_BMI2; + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512F; + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512DQ; + if (HasLeaf7 && ((EBX >> 19) & 1)) + Features2 |= 1 << (FEATURE_ADX - 32); + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512IFMA; + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512PF; + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512ER; + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512CD; + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512BW; + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VL; + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VBMI; + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VPOPCNTDQ; + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124VNNIW; + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124FMAPS; + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + Features |= 1 << FEATURE_SSE4_A; + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + Features |= 1 << FEATURE_XOP; + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + Features |= 1 << FEATURE_FMA4; + + if (HasExtLeaf1 && ((EDX >> 29) & 1)) + Features2 |= 1 << (FEATURE_EM64T - 32); + + *FeaturesOut = Features; + *Features2Out = Features2; } StringRef sys::getHostCPUName() { @@ -998,23 +1026,22 @@ StringRef sys::getHostCPUName() { if(!isCpuIdSupported()) return "generic"; #endif - if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX)) - return "generic"; - if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) + if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) return "generic"; + getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); unsigned Brand_id = EBX & 0xff; unsigned Family = 0, Model = 0; - unsigned Features = 0; + unsigned Features = 0, Features2 = 0; detectX86FamilyModel(EAX, &Family, &Model); - Features = getAvailableFeatures(ECX, EDX, MaxLeaf); + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); unsigned Type; unsigned Subtype; if (Vendor == SIG_INTEL) { - getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, &Type, - &Subtype); + getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, + Features2, &Type, &Subtype); switch (Type) { case INTEL_i386: return "i386"; @@ -1043,7 +1070,7 @@ StringRef sys::getHostCPUName() { case INTEL_CORE2_45: return "penryn"; default: - return "core2"; + llvm_unreachable("Unexpected subtype!"); } case INTEL_COREI7: switch (Subtype) { @@ -1064,19 +1091,16 @@ StringRef sys::getHostCPUName() { case INTEL_COREI7_SKYLAKE_AVX512: return "skylake-avx512"; default: - return "corei7"; - } - case INTEL_ATOM: - switch (Subtype) { - case INTEL_ATOM_BONNELL: - return "bonnell"; - case INTEL_ATOM_SILVERMONT: - return "silvermont"; - default: - return "atom"; + llvm_unreachable("Unexpected subtype!"); } - case INTEL_XEONPHI: - return "knl"; /*update for more variants added*/ + case INTEL_BONNELL: + return "bonnell"; + case INTEL_SILVERMONT: + return "silvermont"; + case INTEL_GOLDMONT: + return "goldmont"; + case INTEL_KNL: + return "knl"; case INTEL_X86_64: return "x86-64"; case INTEL_NOCONA: @@ -1084,7 +1108,7 @@ StringRef sys::getHostCPUName() { case INTEL_PRESCOTT: return "prescott"; default: - return "generic"; + break; } } else if (Vendor == SIG_AMD) { getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); @@ -1106,31 +1130,24 @@ StringRef sys::getHostCPUName() { } case AMDATHLON: switch (Subtype) { - case AMDATHLON_TBIRD: - return "athlon-tbird"; - case AMDATHLON_MP: - return "athlon-mp"; + case AMDATHLON_CLASSIC: + return "athlon"; case AMDATHLON_XP: return "athlon-xp"; + case AMDATHLON_K8: + return "k8"; case AMDATHLON_K8SSE3: return "k8-sse3"; - case AMDATHLON_OPTERON: - return "opteron"; - case AMDATHLON_FX: - return "athlon-fx"; - case AMDATHLON_64: - return "athlon64"; default: - return "athlon"; + llvm_unreachable("Unexpected subtype!"); } case AMDFAM10H: - if(Subtype == AMDFAM10H_BARCELONA) - return "barcelona"; return "amdfam10"; - case AMDFAM14H: + case AMD_BTVER1: return "btver1"; case AMDFAM15H: switch (Subtype) { + default: // There are gaps in the subtype detection. case AMDFAM15H_BDVER1: return "bdver1"; case AMDFAM15H_BDVER2: @@ -1139,31 +1156,13 @@ StringRef sys::getHostCPUName() { return "bdver3"; case AMDFAM15H_BDVER4: return "bdver4"; - case AMD_BTVER1: - return "btver1"; - default: - return "amdfam15"; - } - case AMDFAM16H: - switch (Subtype) { - case AMD_BTVER1: - return "btver1"; - case AMD_BTVER2: - return "btver2"; - default: - return "amdfam16"; } + case AMD_BTVER2: + return "btver2"; case AMDFAM17H: - switch (Subtype) { - case AMD_BTVER1: - return "btver1"; - case AMDFAM17H_ZNVER1: - return "znver1"; - default: - return "amdfam17"; - } + return "znver1"; default: - return "generic"; + break; } } return "generic"; @@ -1401,6 +1400,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; + Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; // Enable protection keys Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); @@ -1485,7 +1485,8 @@ bool sys::getHostCPUFeatures(StringMap &Features) { return false; } #endif std::string sys::getProcessTriple() { - Triple PT(Triple::normalize(LLVM_HOST_TRIPLE)); + std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); + Triple PT(Triple::normalize(TargetTripleString)); if (sizeof(void *) == 8 && PT.isArch32Bit()) PT = PT.get64BitArchVariant(); diff --git a/interpreter/llvm/src/lib/Support/LockFileManager.cpp b/interpreter/llvm/src/lib/Support/LockFileManager.cpp index 8be9879fbc243..3ee3af7731e6b 100644 --- a/interpreter/llvm/src/lib/Support/LockFileManager.cpp +++ b/interpreter/llvm/src/lib/Support/LockFileManager.cpp @@ -15,15 +15,15 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" #include #include #include -#include -#include #include #include +#include +#include #if LLVM_ON_WIN32 #include #endif diff --git a/interpreter/llvm/src/lib/Support/MD5.cpp b/interpreter/llvm/src/lib/Support/MD5.cpp index bdbf1d6779383..545a64cfc7679 100644 --- a/interpreter/llvm/src/lib/Support/MD5.cpp +++ b/interpreter/llvm/src/lib/Support/MD5.cpp @@ -37,11 +37,11 @@ * compile-time configuration. */ +#include "llvm/Support/MD5.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/interpreter/llvm/src/lib/Support/MemoryBuffer.cpp b/interpreter/llvm/src/lib/Support/MemoryBuffer.cpp index 227e792d83dc4..85e782b2c048a 100644 --- a/interpreter/llvm/src/lib/Support/MemoryBuffer.cpp +++ b/interpreter/llvm/src/lib/Support/MemoryBuffer.cpp @@ -240,11 +240,9 @@ getMemoryBufferForStream(int FD, const Twine &BufferName) { // Read into Buffer until we hit EOF. do { Buffer.reserve(Buffer.size() + ChunkSize); - ReadBytes = read(FD, Buffer.end(), ChunkSize); - if (ReadBytes == -1) { - if (errno == EINTR) continue; + ReadBytes = sys::RetryAfterSignal(-1, read, FD, Buffer.end(), ChunkSize); + if (ReadBytes == -1) return std::error_code(errno, std::generic_category()); - } Buffer.set_size(Buffer.size() + ReadBytes); } while (ReadBytes != 0); @@ -391,13 +389,12 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize, while (BytesLeft) { #ifdef HAVE_PREAD - ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset); + ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft, + MapSize - BytesLeft + Offset); #else - ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); + ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft); #endif if (NumRead == -1) { - if (errno == EINTR) - continue; // Error while reading. return std::error_code(errno, std::generic_category()); } diff --git a/interpreter/llvm/src/lib/Support/Mutex.cpp b/interpreter/llvm/src/lib/Support/Mutex.cpp index c8d3844d0c961..b1d5e7c0d9912 100644 --- a/interpreter/llvm/src/lib/Support/Mutex.cpp +++ b/interpreter/llvm/src/lib/Support/Mutex.cpp @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Config/config.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system @@ -47,6 +48,10 @@ MutexImpl::MutexImpl( bool recursive) // Declare the pthread_mutex data structures pthread_mutex_t* mutex = static_cast(malloc(sizeof(pthread_mutex_t))); + + if (mutex == nullptr) + report_bad_alloc_error("Mutex allocation failed"); + pthread_mutexattr_t attr; // Initialize the mutex attributes diff --git a/interpreter/llvm/src/lib/Support/Path.cpp b/interpreter/llvm/src/lib/Support/Path.cpp index 9fd6652ce4b8c..ea59ba62d7bdf 100644 --- a/interpreter/llvm/src/lib/Support/Path.cpp +++ b/interpreter/llvm/src/lib/Support/Path.cpp @@ -13,12 +13,10 @@ #include "llvm/Support/Path.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/Process.h" #include #include @@ -1027,177 +1025,6 @@ void directory_entry::replace_filename(const Twine &filename, file_status st) { Status = st; } -template -static bool startswith(StringRef Magic, const char (&S)[N]) { - return Magic.startswith(StringRef(S, N - 1)); -} - -/// @brief Identify the magic in magic. -file_magic identify_magic(StringRef Magic) { - if (Magic.size() < 4) - return file_magic::unknown; - switch ((unsigned char)Magic[0]) { - case 0x00: { - // COFF bigobj, CL.exe's LTO object file, or short import library file - if (startswith(Magic, "\0\0\xFF\xFF")) { - size_t MinSize = offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); - if (Magic.size() < MinSize) - return file_magic::coff_import_library; - - const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); - if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0) - return file_magic::coff_object; - if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0) - return file_magic::coff_cl_gl_object; - return file_magic::coff_import_library; - } - // Windows resource file - if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF")) - return file_magic::windows_resource; - // 0x0000 = COFF unknown machine type - if (Magic[1] == 0) - return file_magic::coff_object; - if (startswith(Magic, "\0asm")) - return file_magic::wasm_object; - break; - } - case 0xDE: // 0x0B17C0DE = BC wraper - if (startswith(Magic, "\xDE\xC0\x17\x0B")) - return file_magic::bitcode; - break; - case 'B': - if (startswith(Magic, "BC\xC0\xDE")) - return file_magic::bitcode; - break; - case '!': - if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) - return file_magic::archive; - break; - - case '\177': - if (startswith(Magic, "\177ELF") && Magic.size() >= 18) { - bool Data2MSB = Magic[5] == 2; - unsigned high = Data2MSB ? 16 : 17; - unsigned low = Data2MSB ? 17 : 16; - if (Magic[high] == 0) { - switch (Magic[low]) { - default: return file_magic::elf; - case 1: return file_magic::elf_relocatable; - case 2: return file_magic::elf_executable; - case 3: return file_magic::elf_shared_object; - case 4: return file_magic::elf_core; - } - } - // It's still some type of ELF file. - return file_magic::elf; - } - break; - - case 0xCA: - if (startswith(Magic, "\xCA\xFE\xBA\xBE") || - startswith(Magic, "\xCA\xFE\xBA\xBF")) { - // This is complicated by an overlap with Java class files. - // See the Mach-O section in /usr/share/file/magic for details. - if (Magic.size() >= 8 && Magic[7] < 43) - return file_magic::macho_universal_binary; - } - break; - - // The two magic numbers for mach-o are: - // 0xfeedface - 32-bit mach-o - // 0xfeedfacf - 64-bit mach-o - case 0xFE: - case 0xCE: - case 0xCF: { - uint16_t type = 0; - if (startswith(Magic, "\xFE\xED\xFA\xCE") || - startswith(Magic, "\xFE\xED\xFA\xCF")) { - /* Native endian */ - size_t MinSize; - if (Magic[3] == char(0xCE)) - MinSize = sizeof(MachO::mach_header); - else - MinSize = sizeof(MachO::mach_header_64); - if (Magic.size() >= MinSize) - type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; - } else if (startswith(Magic, "\xCE\xFA\xED\xFE") || - startswith(Magic, "\xCF\xFA\xED\xFE")) { - /* Reverse endian */ - size_t MinSize; - if (Magic[0] == char(0xCE)) - MinSize = sizeof(MachO::mach_header); - else - MinSize = sizeof(MachO::mach_header_64); - if (Magic.size() >= MinSize) - type = Magic[15] << 24 | Magic[14] << 12 |Magic[13] << 8 | Magic[12]; - } - switch (type) { - default: break; - case 1: return file_magic::macho_object; - case 2: return file_magic::macho_executable; - case 3: return file_magic::macho_fixed_virtual_memory_shared_lib; - case 4: return file_magic::macho_core; - case 5: return file_magic::macho_preload_executable; - case 6: return file_magic::macho_dynamically_linked_shared_lib; - case 7: return file_magic::macho_dynamic_linker; - case 8: return file_magic::macho_bundle; - case 9: return file_magic::macho_dynamically_linked_shared_lib_stub; - case 10: return file_magic::macho_dsym_companion; - case 11: return file_magic::macho_kext_bundle; - } - break; - } - case 0xF0: // PowerPC Windows - case 0x83: // Alpha 32-bit - case 0x84: // Alpha 64-bit - case 0x66: // MPS R4000 Windows - case 0x50: // mc68K - case 0x4c: // 80386 Windows - case 0xc4: // ARMNT Windows - if (Magic[1] == 0x01) - return file_magic::coff_object; - - case 0x90: // PA-RISC Windows - case 0x68: // mc68K Windows - if (Magic[1] == 0x02) - return file_magic::coff_object; - break; - - case 'M': // Possible MS-DOS stub on Windows PE file - if (startswith(Magic, "MZ")) { - uint32_t off = read32le(Magic.data() + 0x3c); - // PE/COFF file, either EXE or DLL. - if (off < Magic.size() && - memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) - return file_magic::pecoff_executable; - } - break; - - case 0x64: // x86-64 Windows. - if (Magic[1] == char(0x86)) - return file_magic::coff_object; - break; - - default: - break; - } - return file_magic::unknown; -} - -std::error_code identify_magic(const Twine &Path, file_magic &Result) { - int FD; - if (std::error_code EC = openFileForRead(Path, FD)) - return EC; - - char Buffer[32]; - int Length = read(FD, Buffer, sizeof(Buffer)); - if (close(FD) != 0 || Length < 0) - return std::error_code(errno, std::generic_category()); - - Result = identify_magic(StringRef(Buffer, Length)); - return std::error_code(); -} - std::error_code directory_entry::status(file_status &result) const { return fs::status(Path, result, FollowSymlinks); } diff --git a/interpreter/llvm/src/lib/Support/PrettyStackTrace.cpp b/interpreter/llvm/src/lib/Support/PrettyStackTrace.cpp index abf61b73a70d5..a18e9cc50040f 100644 --- a/interpreter/llvm/src/lib/Support/PrettyStackTrace.cpp +++ b/interpreter/llvm/src/lib/Support/PrettyStackTrace.cpp @@ -15,7 +15,7 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm-c/ErrorHandling.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/Compiler.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Watchdog.h" diff --git a/interpreter/llvm/src/lib/Support/Process.cpp b/interpreter/llvm/src/lib/Support/Process.cpp index 290c30f4968f0..caec993ee1653 100644 --- a/interpreter/llvm/src/lib/Support/Process.cpp +++ b/interpreter/llvm/src/lib/Support/Process.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Process.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" #include "llvm/Support/Program.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Support/RWMutex.cpp b/interpreter/llvm/src/lib/Support/RWMutex.cpp index 6c9781c4e2d6d..83c6d1d52b4c8 100644 --- a/interpreter/llvm/src/lib/Support/RWMutex.cpp +++ b/interpreter/llvm/src/lib/Support/RWMutex.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/Support/RWMutex.h" +#include "llvm/Config/config.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/interpreter/llvm/src/lib/Support/SHA1.cpp b/interpreter/llvm/src/lib/Support/SHA1.cpp index 0eefd998cd755..20f41c5ff4472 100644 --- a/interpreter/llvm/src/lib/Support/SHA1.cpp +++ b/interpreter/llvm/src/lib/Support/SHA1.cpp @@ -15,9 +15,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Host.h" #include "llvm/Support/SHA1.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Host.h" using namespace llvm; #include diff --git a/interpreter/llvm/src/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/interpreter/llvm/src/lib/Support/SearchForAddressOfSpecialSymbol.cpp deleted file mode 100644 index 55f3320f640fd..0000000000000 --- a/interpreter/llvm/src/lib/Support/SearchForAddressOfSpecialSymbol.cpp +++ /dev/null @@ -1,58 +0,0 @@ -//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file pulls the addresses of certain symbols out of the linker. It must -// include as few header files as possible because it declares the symbols as -// void*, which would conflict with the actual symbol type if any header -// declared it. -// -//===----------------------------------------------------------------------===// - -#include - -// Must declare the symbols in the global namespace. -static void *DoSearch(const char* symbolName) { -#define EXPLICIT_SYMBOL(SYM) \ - extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM - - // If this is darwin, it has some funky issues, try to solve them here. Some - // important symbols are marked 'private external' which doesn't allow - // SearchForAddressOfSymbol to find them. As such, we special case them here, - // there is only a small handful of them. - -#ifdef __APPLE__ - { - // __eprintf is sometimes used for assert() handling on x86. - // - // FIXME: Currently disabled when using Clang, as we don't always have our - // runtime support libraries available. -#ifndef __clang__ -#ifdef __i386__ - EXPLICIT_SYMBOL(__eprintf); -#endif -#endif - } -#endif - -#ifdef __CYGWIN__ - { - EXPLICIT_SYMBOL(_alloca); - EXPLICIT_SYMBOL(__main); - } -#endif - -#undef EXPLICIT_SYMBOL - return nullptr; -} - -namespace llvm { -void *SearchForAddressOfSpecialSymbol(const char* symbolName) { - return DoSearch(symbolName); -} -} // namespace llvm diff --git a/interpreter/llvm/src/lib/Support/Signals.cpp b/interpreter/llvm/src/lib/Support/Signals.cpp index 57f36bf175b3a..256a22dee87b9 100644 --- a/interpreter/llvm/src/lib/Support/Signals.cpp +++ b/interpreter/llvm/src/lib/Support/Signals.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Signals.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" @@ -23,18 +24,23 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" -#include "llvm/Support/Signals.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Options.h" #include -namespace llvm { - //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system //=== independent code. //===----------------------------------------------------------------------===// +using namespace llvm; + +static cl::opt + DisableSymbolication("disable-symbolication", + cl::desc("Disable symbolizing crash backtraces."), + cl::init(false), cl::Hidden); + static ManagedStatic>> CallBacksToRun; void sys::RunSignalHandlers() { @@ -44,9 +50,6 @@ void sys::RunSignalHandlers() { I.first(I.second); CallBacksToRun->clear(); } -} - -using namespace llvm; static bool findModulesAndOffsets(void **StackTrace, int Depth, const char **Modules, intptr_t *Offsets, @@ -70,6 +73,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, int Depth, llvm::raw_ostream &OS) { + if (DisableSymbolication) + return false; + // Don't recursively invoke the llvm-symbolizer binary. if (Argv0.find("llvm-symbolizer") != std::string::npos) return false; diff --git a/interpreter/llvm/src/lib/Support/SourceMgr.cpp b/interpreter/llvm/src/lib/Support/SourceMgr.cpp index 5199fad7d9e93..b0609d4fe047c 100644 --- a/interpreter/llvm/src/lib/Support/SourceMgr.cpp +++ b/interpreter/llvm/src/lib/Support/SourceMgr.cpp @@ -13,18 +13,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/SourceMgr.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/Locale.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/Support/SpecialCaseList.cpp b/interpreter/llvm/src/lib/Support/SpecialCaseList.cpp index df524b352351e..05886eaa8aee7 100644 --- a/interpreter/llvm/src/lib/Support/SpecialCaseList.cpp +++ b/interpreter/llvm/src/lib/Support/SpecialCaseList.cpp @@ -15,12 +15,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SpecialCaseList.h" -#include "llvm/Support/TrigramIndex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/TrigramIndex.h" #include #include #include diff --git a/interpreter/llvm/src/lib/Support/Statistic.cpp b/interpreter/llvm/src/lib/Support/Statistic.cpp index 0c50dfd27d615..72ca22806c43c 100644 --- a/interpreter/llvm/src/lib/Support/Statistic.cpp +++ b/interpreter/llvm/src/lib/Support/Statistic.cpp @@ -30,8 +30,8 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Support/StringExtras.cpp b/interpreter/llvm/src/lib/Support/StringExtras.cpp index 3e2420f677605..b2f42dfcc04d9 100644 --- a/interpreter/llvm/src/lib/Support/StringExtras.cpp +++ b/interpreter/llvm/src/lib/Support/StringExtras.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; /// StrInStrNoCase - Portable version of strcasestr. Locates the first diff --git a/interpreter/llvm/src/lib/Support/TargetParser.cpp b/interpreter/llvm/src/lib/Support/TargetParser.cpp index b16351906a4c4..e8ef1d2fd8b9b 100644 --- a/interpreter/llvm/src/lib/Support/TargetParser.cpp +++ b/interpreter/llvm/src/lib/Support/TargetParser.cpp @@ -452,6 +452,8 @@ bool llvm::AArch64::getExtensionFeatures(unsigned Extensions, Features.push_back("+ras"); if (Extensions & AArch64::AEK_LSE) Features.push_back("+lse"); + if (Extensions & AArch64::AEK_SVE) + Features.push_back("+sve"); return true; } @@ -784,6 +786,42 @@ unsigned llvm::ARM::parseArchVersion(StringRef Arch) { return 0; } +StringRef llvm::ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) { + StringRef ArchName = + CPU.empty() ? TT.getArchName() : ARM::getArchName(ARM::parseCPUArch(CPU)); + + if (TT.isOSBinFormatMachO()) { + if (TT.getEnvironment() == Triple::EABI || + TT.getOS() == Triple::UnknownOS || + llvm::ARM::parseArchProfile(ArchName) == ARM::PK_M) + return "aapcs"; + if (TT.isWatchABI()) + return "aapcs16"; + return "apcs-gnu"; + } else if (TT.isOSWindows()) + // FIXME: this is invalid for WindowsCE. + return "aapcs"; + + // Select the default based on the platform. + switch (TT.getEnvironment()) { + case Triple::Android: + case Triple::GNUEABI: + case Triple::GNUEABIHF: + case Triple::MuslEABI: + case Triple::MuslEABIHF: + return "aapcs-linux"; + case Triple::EABIHF: + case Triple::EABI: + return "aapcs"; + default: + if (TT.isOSNetBSD()) + return "apcs-gnu"; + if (TT.isOSOpenBSD()) + return "aapcs-linux"; + return "aapcs"; + } +} + StringRef llvm::AArch64::getCanonicalArchName(StringRef Arch) { return ARM::getCanonicalArchName(Arch); } diff --git a/interpreter/llvm/src/lib/Support/ThreadLocal.cpp b/interpreter/llvm/src/lib/Support/ThreadLocal.cpp index 9da1603080a2a..9a75c02b351f8 100644 --- a/interpreter/llvm/src/lib/Support/ThreadLocal.cpp +++ b/interpreter/llvm/src/lib/Support/ThreadLocal.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/ThreadLocal.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ThreadLocal.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/interpreter/llvm/src/lib/Support/ThreadPool.cpp b/interpreter/llvm/src/lib/Support/ThreadPool.cpp index db03a4d6240d2..22b7550d49714 100644 --- a/interpreter/llvm/src/lib/Support/ThreadPool.cpp +++ b/interpreter/llvm/src/lib/Support/ThreadPool.cpp @@ -53,11 +53,7 @@ ThreadPool::ThreadPool(unsigned ThreadCount) Tasks.pop(); } // Run the task we just grabbed -#ifndef _MSC_VER Task(); -#else - Task(/* unused */ false); -#endif { // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() @@ -82,7 +78,7 @@ void ThreadPool::wait() { [&] { return !ActiveThreads && Tasks.empty(); }); } -std::shared_future ThreadPool::asyncImpl(TaskTy Task) { +std::shared_future ThreadPool::asyncImpl(TaskTy Task) { /// Wrap the Task in a packaged_task to return a future object. PackagedTaskTy PackagedTask(std::move(Task)); auto Future = PackagedTask.get_future(); @@ -128,25 +124,16 @@ void ThreadPool::wait() { while (!Tasks.empty()) { auto Task = std::move(Tasks.front()); Tasks.pop(); -#ifndef _MSC_VER - Task(); -#else - Task(/* unused */ false); -#endif + Task(); } } -std::shared_future ThreadPool::asyncImpl(TaskTy Task) { -#ifndef _MSC_VER +std::shared_future ThreadPool::asyncImpl(TaskTy Task) { // Get a Future with launch::deferred execution using std::async auto Future = std::async(std::launch::deferred, std::move(Task)).share(); // Wrap the future so that both ThreadPool::wait() can operate and the // returned future can be sync'ed on. PackagedTaskTy PackagedTask([Future]() { Future.get(); }); -#else - auto Future = std::async(std::launch::deferred, std::move(Task), false).share(); - PackagedTaskTy PackagedTask([Future](bool) -> bool { Future.get(); return false; }); -#endif Tasks.push(std::move(PackagedTask)); return Future; } diff --git a/interpreter/llvm/src/lib/Support/Timer.cpp b/interpreter/llvm/src/lib/Support/Timer.cpp index 8d68c6ae9682a..3386f2660f31a 100644 --- a/interpreter/llvm/src/lib/Support/Timer.cpp +++ b/interpreter/llvm/src/lib/Support/Timer.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // This ugly hack is brought to you courtesy of constructor/destructor ordering @@ -72,10 +72,15 @@ std::unique_ptr llvm::CreateInfoOutputFile() { return llvm::make_unique(2, false); // stderr. } -static TimerGroup *getDefaultTimerGroup() { - static TimerGroup DefaultTimerGroup("misc", "Miscellaneous Ungrouped Timers"); - return &DefaultTimerGroup; -} +namespace { +struct CreateDefaultTimerGroup { + static void *call() { + return new TimerGroup("misc", "Miscellaneous Ungrouped Timers"); + } +}; +} // namespace +static ManagedStatic DefaultTimerGroup; +static TimerGroup *getDefaultTimerGroup() { return &*DefaultTimerGroup; } //===----------------------------------------------------------------------===// // Timer Implementation diff --git a/interpreter/llvm/src/lib/Support/TrigramIndex.cpp b/interpreter/llvm/src/lib/Support/TrigramIndex.cpp index 85ab5287566b3..721763c885252 100644 --- a/interpreter/llvm/src/lib/Support/TrigramIndex.cpp +++ b/interpreter/llvm/src/lib/Support/TrigramIndex.cpp @@ -18,9 +18,9 @@ #include "llvm/Support/TrigramIndex.h" #include "llvm/ADT/SmallVector.h" -#include #include #include +#include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Support/Triple.cpp b/interpreter/llvm/src/lib/Support/Triple.cpp index eb8108908ac5f..2687a67556d3e 100644 --- a/interpreter/llvm/src/lib/Support/Triple.cpp +++ b/interpreter/llvm/src/lib/Support/Triple.cpp @@ -12,8 +12,8 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/Host.h" +#include "llvm/Support/TargetParser.h" #include using namespace llvm; @@ -34,6 +34,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case mips64: return "mips64"; case mips64el: return "mips64el"; case msp430: return "msp430"; + case nios2: return "nios2"; case ppc64: return "powerpc64"; case ppc64le: return "powerpc64le"; case ppc: return "powerpc"; @@ -98,6 +99,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { case mips64: case mips64el: return "mips"; + case nios2: return "nios2"; + case hexagon: return "hexagon"; case amdgcn: return "amdgcn"; @@ -171,6 +174,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { switch (Kind) { case UnknownOS: return "unknown"; + case Ananas: return "ananas"; case CloudABI: return "cloudabi"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; @@ -262,6 +266,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("mips64", mips64) .Case("mips64el", mips64el) .Case("msp430", msp430) + .Case("nios2", nios2) .Case("ppc64", ppc64) .Case("ppc32", ppc) .Case("ppc", ppc) @@ -384,6 +389,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("mipsel", "mipsallegrexel", Triple::mipsel) .Cases("mips64", "mips64eb", Triple::mips64) .Case("mips64el", Triple::mips64el) + .Case("nios2", Triple::nios2) .Case("r600", Triple::r600) .Case("amdgcn", Triple::amdgcn) .Case("riscv32", Triple::riscv32) @@ -450,6 +456,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { static Triple::OSType parseOS(StringRef OSName) { return StringSwitch(OSName) + .StartsWith("ananas", Triple::Ananas) .StartsWith("cloudabi", Triple::CloudABI) .StartsWith("darwin", Triple::Darwin) .StartsWith("dragonfly", Triple::DragonFly) @@ -625,6 +632,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::mips64el: case Triple::mipsel: case Triple::msp430: + case Triple::nios2: case Triple::nvptx: case Triple::nvptx64: case Triple::ppc64le: @@ -871,6 +879,10 @@ std::string Triple::normalize(StringRef Str) { } } + // SUSE uses "gnueabi" to mean "gnueabihf" + if (Vendor == Triple::SUSE && Environment == llvm::Triple::GNUEABI) + Components[3] = "gnueabihf"; + if (OS == Triple::Win32) { Components.resize(4); Components[2] = "windows"; @@ -1160,6 +1172,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::le32: case llvm::Triple::mips: case llvm::Triple::mipsel: + case llvm::Triple::nios2: case llvm::Triple::nvptx: case llvm::Triple::ppc: case llvm::Triple::r600: @@ -1243,6 +1256,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::le32: case Triple::mips: case Triple::mipsel: + case Triple::nios2: case Triple::nvptx: case Triple::ppc: case Triple::r600: @@ -1290,6 +1304,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::kalimba: case Triple::lanai: case Triple::msp430: + case Triple::nios2: case Triple::r600: case Triple::tce: case Triple::tcele: @@ -1361,6 +1376,7 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::le32: case Triple::le64: case Triple::msp430: + case Triple::nios2: case Triple::nvptx64: case Triple::nvptx: case Triple::r600: @@ -1447,6 +1463,7 @@ bool Triple::isLittleEndian() const { case Triple::mips64el: case Triple::mipsel: case Triple::msp430: + case Triple::nios2: case Triple::nvptx64: case Triple::nvptx: case Triple::ppc64le: @@ -1472,6 +1489,39 @@ bool Triple::isLittleEndian() const { } } +bool Triple::isCompatibleWith(const Triple &Other) const { + // ARM and Thumb triples are compatible, if subarch, vendor and OS match. + if ((getArch() == Triple::thumb && Other.getArch() == Triple::arm) || + (getArch() == Triple::arm && Other.getArch() == Triple::thumb) || + (getArch() == Triple::thumbeb && Other.getArch() == Triple::armeb) || + (getArch() == Triple::armeb && Other.getArch() == Triple::thumbeb)) { + if (getVendor() == Triple::Apple) + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS(); + else + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS() && + getEnvironment() == Other.getEnvironment() && + getObjectFormat() == Other.getObjectFormat(); + } + + // If vendor is apple, ignore the version number. + if (getVendor() == Triple::Apple) + return getArch() == Other.getArch() && getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS(); + + return *this == Other; +} + +std::string Triple::merge(const Triple &Other) const { + // If vendor is apple, pick the triple with the larger version number. + if (getVendor() == Triple::Apple) + if (Other.isOSVersionLT(*this)) + return str(); + + return Other.str(); +} + StringRef Triple::getARMCPUForArch(StringRef MArch) const { if (MArch.empty()) MArch = getArchName(); diff --git a/interpreter/llvm/src/lib/Support/Unix/DynamicLibrary.inc b/interpreter/llvm/src/lib/Support/Unix/DynamicLibrary.inc index a0526fa2c1b80..f05103ccd1ebe 100644 --- a/interpreter/llvm/src/lib/Support/Unix/DynamicLibrary.inc +++ b/interpreter/llvm/src/lib/Support/Unix/DynamicLibrary.inc @@ -15,10 +15,14 @@ #include DynamicLibrary::HandleSet::~HandleSet() { - for (void *Handle : Handles) + // Close the libraries in reverse order. + for (void *Handle : llvm::reverse(Handles)) ::dlclose(Handle); if (Process) ::dlclose(Process); + + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; } void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { @@ -101,10 +105,10 @@ static void *DoSearch(const char* SymbolName) { #define EXPLICIT_SYMBOL(SYM) \ if (!strcmp(SymbolName, #SYM)) return &SYM -// On linux we have a weird situation. The stderr/out/in symbols are both +// Under glibc we have a weird situation. The stderr/out/in symbols are both // macros and global variables because of standards requirements. So, we // boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. -#if defined(__linux__) and !defined(__ANDROID__) +#if defined(__GLIBC__) { EXPLICIT_SYMBOL(stderr); EXPLICIT_SYMBOL(stdout); diff --git a/interpreter/llvm/src/lib/Support/Unix/Host.inc b/interpreter/llvm/src/lib/Support/Unix/Host.inc index 457217125a222..5580e63893c6d 100644 --- a/interpreter/llvm/src/lib/Support/Unix/Host.inc +++ b/interpreter/llvm/src/lib/Support/Unix/Host.inc @@ -34,16 +34,35 @@ static std::string getOSVersion() { return info.release; } -std::string sys::getDefaultTargetTriple() { - std::string TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE); - - // On darwin, we want to update the version to match that of the - // target. +static std::string updateTripleOSVersion(std::string TargetTripleString) { + // On darwin, we want to update the version to match that of the target. std::string::size_type DarwinDashIdx = TargetTripleString.find("-darwin"); if (DarwinDashIdx != std::string::npos) { TargetTripleString.resize(DarwinDashIdx + strlen("-darwin")); TargetTripleString += getOSVersion(); + return TargetTripleString; + } + std::string::size_type MacOSDashIdx = TargetTripleString.find("-macos"); + if (MacOSDashIdx != std::string::npos) { + TargetTripleString.resize(MacOSDashIdx); + // Reset the OS to darwin as the OS version from `uname` doesn't use the + // macOS version scheme. + TargetTripleString += "-darwin"; + TargetTripleString += getOSVersion(); } + return TargetTripleString; +} + +std::string sys::getDefaultTargetTriple() { + std::string TargetTripleString = + updateTripleOSVersion(LLVM_DEFAULT_TARGET_TRIPLE); + + // Override the default target with an environment variable named by + // LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + TargetTripleString = EnvTriple; +#endif return Triple::normalize(TargetTripleString); } diff --git a/interpreter/llvm/src/lib/Support/Unix/Memory.inc b/interpreter/llvm/src/lib/Support/Unix/Memory.inc index edbc7938f0cbf..dd39ef935bf92 100644 --- a/interpreter/llvm/src/lib/Support/Unix/Memory.inc +++ b/interpreter/llvm/src/lib/Support/Unix/Memory.inc @@ -195,6 +195,10 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__)) void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC, flags, fd, 0); +#elif defined(__NetBSD__) && defined(PROT_MPROTECT) + void *pa = + ::mmap(start, PageSize * NumPages, + PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC), flags, fd, 0); #else void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC, flags, fd, 0); diff --git a/interpreter/llvm/src/lib/Support/Unix/Path.inc b/interpreter/llvm/src/lib/Support/Unix/Path.inc index fa28ba1b6ab6a..45097eb918b7c 100644 --- a/interpreter/llvm/src/lib/Support/Unix/Path.inc +++ b/interpreter/llvm/src/lib/Support/Unix/Path.inc @@ -75,8 +75,8 @@ #define STATVFS_F_FRSIZE(vfs) vfs.f_frsize #else #if defined(__OpenBSD__) || defined(__FreeBSD__) -#include #include +#include #elif defined(__linux__) #if defined(HAVE_LINUX_MAGIC_H) #include @@ -381,6 +381,11 @@ static bool is_local_impl(struct STATVFS &Vfs) { #elif defined(__CYGWIN__) // Cygwin doesn't expose this information; would need to use Win32 API. return false; +#elif defined(__sun) + // statvfs::f_basetype contains a null-terminated FSType name of the mounted target + StringRef fstype(Vfs.f_basetype); + // NFS is the only non-local fstype?? + return !fstype.equals("nfs"); #else return !!(STATVFS_F_FLAG(Vfs) & MNT_LOCAL); #endif @@ -732,10 +737,8 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD, #ifdef O_CLOEXEC OpenFlags |= O_CLOEXEC; #endif - while ((ResultFD = open(P.begin(), OpenFlags)) < 0) { - if (errno != EINTR) - return std::error_code(errno, std::generic_category()); - } + if ((ResultFD = sys::RetryAfterSignal(-1, open, P.begin(), OpenFlags)) < 0) + return std::error_code(errno, std::generic_category()); #ifndef O_CLOEXEC int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC); (void)r; @@ -795,10 +798,8 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD, SmallString<128> Storage; StringRef P = Name.toNullTerminatedStringRef(Storage); - while ((ResultFD = open(P.begin(), OpenFlags, Mode)) < 0) { - if (errno != EINTR) - return std::error_code(errno, std::generic_category()); - } + if ((ResultFD = sys::RetryAfterSignal(-1, open, P.begin(), OpenFlags, Mode)) < 0) + return std::error_code(errno, std::generic_category()); #ifndef O_CLOEXEC int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC); (void)r; diff --git a/interpreter/llvm/src/lib/Support/Unix/Process.inc b/interpreter/llvm/src/lib/Support/Unix/Process.inc index 1d0143c6716e0..2d4662094682f 100644 --- a/interpreter/llvm/src/lib/Support/Unix/Process.inc +++ b/interpreter/llvm/src/lib/Support/Unix/Process.inc @@ -207,13 +207,10 @@ std::error_code Process::FixupStandardFileDescriptors() { for (int StandardFD : StandardFDs) { struct stat st; errno = 0; - while (fstat(StandardFD, &st) < 0) { + if (RetryAfterSignal(-1, fstat, StandardFD, &st) < 0) { assert(errno && "expected errno to be set if fstat failed!"); // fstat should return EBADF if the file descriptor is closed. - if (errno == EBADF) - break; - // retry fstat if we got EINTR, otherwise bubble up the failure. - if (errno != EINTR) + if (errno != EBADF) return std::error_code(errno, std::generic_category()); } // if fstat succeeds, move on to the next FD. @@ -222,11 +219,8 @@ std::error_code Process::FixupStandardFileDescriptors() { assert(errno == EBADF && "expected errno to have EBADF at this point!"); if (NullFD < 0) { - while ((NullFD = open("/dev/null", O_RDWR)) < 0) { - if (errno == EINTR) - continue; + if ((NullFD = RetryAfterSignal(-1, open, "/dev/null", O_RDWR)) < 0) return std::error_code(errno, std::generic_category()); - } } if (NullFD == StandardFD) diff --git a/interpreter/llvm/src/lib/Support/Unix/Program.inc b/interpreter/llvm/src/lib/Support/Unix/Program.inc index 7d3537e20727e..c866d5b5a84ef 100644 --- a/interpreter/llvm/src/lib/Support/Unix/Program.inc +++ b/interpreter/llvm/src/lib/Support/Unix/Program.inc @@ -40,9 +40,6 @@ #include #endif #ifdef HAVE_POSIX_SPAWN -#ifdef __sun__ -#define _RESTRICT_KYWD -#endif #include #if defined(__APPLE__) @@ -163,16 +160,6 @@ static void SetMemoryLimits (unsigned size) r.rlim_cur = limit; setrlimit (RLIMIT_RSS, &r); #endif -#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it. - // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb - // of virtual memory for shadow memory mapping. -#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD - // Virtual memory. - getrlimit (RLIMIT_AS, &r); - r.rlim_cur = limit; - setrlimit (RLIMIT_AS, &r); -#endif -#endif #endif } @@ -459,11 +446,22 @@ bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { - ArgLength += strlen(*I) + 1; + size_t length = strlen(*I); + + // Ensure that we do not exceed the MAX_ARG_STRLEN constant on Linux, which + // does not have a constant unlike what the man pages would have you + // believe. Since this limit is pretty high, perform the check + // unconditionally rather than trying to be aggressive and limiting it to + // Linux only. + if (length >= (32 * 4096)) + return false; + + ArgLength += length + 1; if (ArgLength > size_t(HalfArgMax)) { return false; } } + return true; } } diff --git a/interpreter/llvm/src/lib/Support/Unix/Signals.inc b/interpreter/llvm/src/lib/Support/Unix/Signals.inc index 88ad21e9806ed..aaf760c5b6166 100644 --- a/interpreter/llvm/src/lib/Support/Unix/Signals.inc +++ b/interpreter/llvm/src/lib/Support/Unix/Signals.inc @@ -15,9 +15,9 @@ #include "Unix.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Demangle/Demangle.h" -#include "llvm/Support/Format.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" diff --git a/interpreter/llvm/src/lib/Support/Unix/Threading.inc b/interpreter/llvm/src/lib/Support/Unix/Threading.inc index 407b194e1b6ae..267af388ecdbc 100644 --- a/interpreter/llvm/src/lib/Support/Unix/Threading.inc +++ b/interpreter/llvm/src/lib/Support/Unix/Threading.inc @@ -26,19 +26,19 @@ #endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include #include #include -#include #include #endif #if defined(__NetBSD__) -#include // For _lwp_self() +#include // For _lwp_self() #endif #if defined(__linux__) -#include // For syscall() -#include // For syscall codes +#include // For syscall codes +#include // For syscall() #endif namespace { diff --git a/interpreter/llvm/src/lib/Support/Windows/DynamicLibrary.inc b/interpreter/llvm/src/lib/Support/Windows/DynamicLibrary.inc index 709499deeafa9..083ea902eeb29 100644 --- a/interpreter/llvm/src/lib/Support/Windows/DynamicLibrary.inc +++ b/interpreter/llvm/src/lib/Support/Windows/DynamicLibrary.inc @@ -12,98 +12,142 @@ //===----------------------------------------------------------------------===// #include "WindowsSupport.h" +#include "llvm/Support/raw_ostream.h" -#ifdef __MINGW32__ - #include -#else - #include -#endif - -#ifdef _MSC_VER - #include -#endif - -namespace llvm { +#include //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code //=== and must not be UNIX code. //===----------------------------------------------------------------------===// -typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID); -static fpEnumerateLoadedModules fEnumerateLoadedModules; -static llvm::ManagedStatic > OpenedHandles; -static bool loadDebugHelp(void) { - HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); - if (hLib) { - fEnumerateLoadedModules = (fpEnumerateLoadedModules) - ::GetProcAddress(hLib, "EnumerateLoadedModules64"); - } - return fEnumerateLoadedModules != 0; -} +DynamicLibrary::HandleSet::~HandleSet() { + for (void *Handle : llvm::reverse(Handles)) + FreeLibrary(HMODULE(Handle)); -static BOOL CALLBACK -ELM_Callback(PCSTR ModuleName, DWORD64 ModuleBase, - ULONG ModuleSize, PVOID UserContext) { - OpenedHandles->insert((HMODULE)ModuleBase); - return TRUE; + // 'Process' should not be released on Windows. + assert((!Process || Process==this) && "Bad Handle"); + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; } -sys::DynamicLibrary -sys::DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); - - if (!filename) { - // When no file is specified, enumerate all DLLs and EXEs in the process. - if (!fEnumerateLoadedModules) { - if (!loadDebugHelp()) { - assert(false && "These APIs should always be available"); - return DynamicLibrary(); - } - } +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + // Create the instance and return it to be the *Process* handle + // simillar to dlopen(NULL, RTLD_LAZY|RTLD_GLOBAL) + if (!File) + return &(*OpenedHandles); - fEnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0); - // Dummy library that represents "search all handles". - // This is mostly to ensure that the return value still shows up as "valid". - return DynamicLibrary(&OpenedHandles); - } - - SmallVector filenameUnicode; - if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { + SmallVector FileUnicode; + if (std::error_code ec = windows::UTF8ToUTF16(File, FileUnicode)) { SetLastError(ec.value()); - MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16"); - return DynamicLibrary(); + MakeErrMsg(Err, std::string(File) + ": Can't convert to UTF-16"); + return &DynamicLibrary::Invalid; } - HMODULE a_handle = LoadLibraryW(filenameUnicode.data()); - - if (a_handle == 0) { - MakeErrMsg(errMsg, std::string(filename) + ": Can't open"); - return DynamicLibrary(); + HMODULE Handle = LoadLibraryW(FileUnicode.data()); + if (Handle == NULL) { + MakeErrMsg(Err, std::string(File) + ": Can't open"); + return &DynamicLibrary::Invalid; } - // If we've already loaded this library, FreeLibrary() the handle in order to - // keep the internal refcount at +1. - if (!OpenedHandles->insert(a_handle).second) - FreeLibrary(a_handle); + return reinterpret_cast(Handle); +} - return DynamicLibrary(a_handle); +static DynamicLibrary::HandleSet *IsOpenedHandlesInstance(void *Handle) { + if (!OpenedHandles.isConstructed()) + return nullptr; + DynamicLibrary::HandleSet &Inst = *OpenedHandles; + return Handle == &Inst ? &Inst : nullptr; } -sys::DynamicLibrary -sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); - // If we've already loaded this library, tell the caller. - if (!OpenedHandles->insert((HMODULE)handle).second) { - MakeErrMsg(errMsg, "Library already loaded"); - return DynamicLibrary(); +void DynamicLibrary::HandleSet::DLClose(void *Handle) { + if (HandleSet* HS = IsOpenedHandlesInstance(Handle)) + HS->Process = nullptr; // Just drop the *Process* handle. + else + FreeLibrary((HMODULE)Handle); +} + +static bool GetProcessModules(HANDLE H, DWORD &Bytes, HMODULE *Data = nullptr) { + // EnumProcessModules will fail on Windows 64 while some versions of + // MingW-32 don't have EnumProcessModulesEx. + if ( +#ifdef _WIN64 + !EnumProcessModulesEx(H, Data, Bytes, &Bytes, LIST_MODULES_64BIT) +#else + !EnumProcessModules(H, Data, Bytes, &Bytes) +#endif + ) { + std::string Err; + if (MakeErrMsg(&Err, "EnumProcessModules failure")) + llvm::errs() << Err << "\n"; + return false; } + return true; +} - return DynamicLibrary(handle); +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + HandleSet* HS = IsOpenedHandlesInstance(Handle); + if (!HS) + return (void *)uintptr_t(GetProcAddress((HMODULE)Handle, Symbol)); + + // Could have done a dlclose on the *Process* handle + if (!HS->Process) + return nullptr; + + // Trials indicate EnumProcessModulesEx is consistantly faster than using + // EnumerateLoadedModules64 or CreateToolhelp32Snapshot. + // + // | Handles | DbgHelp.dll | CreateSnapshot | EnumProcessModulesEx + // |=========|=============|======================================== + // | 37 | 0.0000585 * | 0.0003031 | 0.0000152 + // | 1020 | 0.0026310 * | 0.0121598 | 0.0002683 + // | 2084 | 0.0149418 * | 0.0369936 | 0.0005610 + // + // * Not including the load time of Dbghelp.dll (~.005 sec) + // + // There's still a case to somehow cache the result of EnumProcessModulesEx + // across invocations, but the complication of doing that properly... + // Possibly using LdrRegisterDllNotification to invalidate the cache? + + DWORD Bytes = 0; + HMODULE Self = HMODULE(GetCurrentProcess()); + if (!GetProcessModules(Self, Bytes)) + return nullptr; + + // Get the most recent list in case any modules added/removed between calls + // to EnumProcessModulesEx that gets the amount of, then copies the HMODULES. + // MSDN is pretty clear that if the module list changes during the call to + // EnumProcessModulesEx the results should not be used. + std::vector Handles; + do { + assert(Bytes && ((Bytes % sizeof(HMODULE)) == 0) && + "Should have at least one module and be aligned"); + Handles.resize(Bytes / sizeof(HMODULE)); + if (!GetProcessModules(Self, Bytes, Handles.data())) + return nullptr; + } while (Bytes != (Handles.size() * sizeof(HMODULE))); + + // Try EXE first, mirroring what dlsym(dlopen(NULL)) does. + if (FARPROC Ptr = GetProcAddress(HMODULE(Handles.front()), Symbol)) + return (void *) uintptr_t(Ptr); + + if (Handles.size() > 1) { + // This is different behaviour than what Posix dlsym(dlopen(NULL)) does. + // Doing that here is causing real problems for the JIT where msvc.dll + // and ucrt.dll can define the same symbols. The runtime linker will choose + // symbols from ucrt.dll first, but iterating NOT in reverse here would + // mean that the msvc.dll versions would be returned. + + for (auto I = Handles.rbegin(), E = Handles.rend()-1; I != E; ++I) { + if (FARPROC Ptr = GetProcAddress(HMODULE(*I), Symbol)) + return (void *) uintptr_t(Ptr); + } + } + return nullptr; } + // Stack probing routines are in the support library (e.g. libgcc), but we don't // have dynamic linking on windows. Provide a hook. #define EXPLICIT_SYMBOL(SYM) \ @@ -129,38 +173,18 @@ sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) { #undef INLINE_DEF_SYMBOL1 #undef INLINE_DEF_SYMBOL2 -void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { - SmartScopedLock Lock(*SymbolsMutex); - - // First check symbols added via AddSymbol(). - if (ExplicitSymbols.isConstructed()) { - StringMap::iterator i = ExplicitSymbols->find(symbolName); - - if (i != ExplicitSymbols->end()) - return i->second; - } - - // Now search the libraries. - if (OpenedHandles.isConstructed()) { - for (DenseSet::iterator I = OpenedHandles->begin(), - E = OpenedHandles->end(); I != E; ++I) { - FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); - if (ptr) { - return (void *)(intptr_t)ptr; - } - } - } +static void *DoSearch(const char *SymbolName) { #define EXPLICIT_SYMBOL(SYM) \ - if (!strcmp(symbolName, #SYM)) \ + if (!strcmp(SymbolName, #SYM)) \ return (void *)&SYM; #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \ - if (!strcmp(symbolName, #SYMFROM)) \ + if (!strcmp(SymbolName, #SYMFROM)) \ return (void *)&SYMTO; #ifdef _M_IX86 #define INLINE_DEF_SYMBOL1(TYP, SYM) \ - if (!strcmp(symbolName, #SYM)) \ + if (!strcmp(SymbolName, #SYM)) \ return (void *)&inline_##SYM; #define INLINE_DEF_SYMBOL2(TYP, SYM) INLINE_DEF_SYMBOL1(TYP, SYM) #endif @@ -174,15 +198,5 @@ void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { #undef INLINE_DEF_SYMBOL1 #undef INLINE_DEF_SYMBOL2 - return 0; -} - -void *sys::DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - if (!isValid()) - return NULL; - if (Data == &OpenedHandles) - return SearchForAddressOfSymbol(symbolName); - return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName); -} - + return nullptr; } diff --git a/interpreter/llvm/src/lib/Support/Windows/Host.inc b/interpreter/llvm/src/lib/Support/Windows/Host.inc index fe89fe0aad8c4..90a6fb316703f 100644 --- a/interpreter/llvm/src/lib/Support/Windows/Host.inc +++ b/interpreter/llvm/src/lib/Support/Windows/Host.inc @@ -17,6 +17,18 @@ using namespace llvm; +static std::string updateTripleOSVersion(std::string Triple) { + return Triple; +} + std::string sys::getDefaultTargetTriple() { - return Triple::normalize(LLVM_DEFAULT_TARGET_TRIPLE); + const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE; + + // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + Triple = EnvTriple; +#endif + + return Triple::normalize(Triple); } diff --git a/interpreter/llvm/src/lib/Support/Windows/WindowsSupport.h b/interpreter/llvm/src/lib/Support/Windows/WindowsSupport.h index c358b99ab96aa..d4599dca044e9 100644 --- a/interpreter/llvm/src/lib/Support/Windows/WindowsSupport.h +++ b/interpreter/llvm/src/lib/Support/Windows/WindowsSupport.h @@ -45,7 +45,9 @@ #include #include #include -#include // Must be included after windows.h + +// Must be included after windows.h +#include /// Determines if the program is running on Windows 8 or newer. This /// reimplements one of the helpers in the Windows 8.1 SDK, which are intended diff --git a/interpreter/llvm/src/lib/Support/YAMLParser.cpp b/interpreter/llvm/src/lib/Support/YAMLParser.cpp index c17a6f6e1ea63..e2f21a56a810a 100644 --- a/interpreter/llvm/src/lib/Support/YAMLParser.cpp +++ b/interpreter/llvm/src/lib/Support/YAMLParser.cpp @@ -1,4 +1,4 @@ -//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// +//===- YAMLParser.cpp - Simple YAML parser --------------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,16 +12,30 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLParser.h" +#include "llvm/ADT/AllocatorList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/ADT/AllocatorList.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace yaml; @@ -37,7 +51,7 @@ enum UnicodeEncodingForm { /// EncodingInfo - Holds the encoding type and length of the byte order mark if /// it exists. Length is in {0, 2, 3, 4}. -typedef std::pair EncodingInfo; +using EncodingInfo = std::pair; /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode /// encoding form of \a Input. @@ -46,7 +60,7 @@ typedef std::pair EncodingInfo; /// @returns An EncodingInfo indicating the Unicode encoding form of the input /// and how long the byte order mark is if one exists. static EncodingInfo getUnicodeEncoding(StringRef Input) { - if (Input.size() == 0) + if (Input.empty()) return std::make_pair(UEF_Unknown, 0); switch (uint8_t(Input[0])) { @@ -95,8 +109,6 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) { return std::make_pair(UEF_UTF8, 0); } -namespace llvm { -namespace yaml { /// Pin the vtables to this file. void Node::anchor() {} void NullNode::anchor() {} @@ -107,6 +119,9 @@ void MappingNode::anchor() {} void SequenceNode::anchor() {} void AliasNode::anchor() {} +namespace llvm { +namespace yaml { + /// Token - A single YAML token. struct Token { enum TokenKind { @@ -133,7 +148,7 @@ struct Token { TK_Alias, TK_Anchor, TK_Tag - } Kind; + } Kind = TK_Error; /// A string of length 0 or more whose begin() points to the logical location /// of the token in the input. @@ -142,14 +157,16 @@ struct Token { /// The value of a block scalar node. std::string Value; - Token() : Kind(TK_Error) {} + Token() = default; }; -} -} -typedef llvm::BumpPtrList TokenQueueT; +} // end namespace yaml +} // end namespace llvm + +using TokenQueueT = BumpPtrList; namespace { + /// @brief This struct is used to track simple keys. /// /// Simple keys are handled by creating an entry in SimpleKeys for each Token @@ -170,12 +187,13 @@ struct SimpleKey { return Tok == Other.Tok; } }; -} + +} // end anonymous namespace /// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit /// subsequence and the subsequence's length in code units (uint8_t). /// A length of 0 represents an error. -typedef std::pair UTF8Decoded; +using UTF8Decoded = std::pair; static UTF8Decoded decodeUTF8(StringRef Range) { StringRef::iterator Position= Range.begin(); @@ -229,6 +247,7 @@ static UTF8Decoded decodeUTF8(StringRef Range) { namespace llvm { namespace yaml { + /// @brief Scans YAML tokens from a MemoryBuffer. class Scanner { public: @@ -350,7 +369,8 @@ class Scanner { /// ns-char. StringRef::iterator skip_ns_char(StringRef::iterator Position); - typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); + using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); + /// @brief Skip minimal well-formed code unit subsequences until Func /// returns its input. /// @@ -655,10 +675,10 @@ bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { } bool yaml::scanTokens(StringRef Input) { - llvm::SourceMgr SM; - llvm::yaml::Scanner scanner(Input, SM); - for (;;) { - llvm::yaml::Token T = scanner.getNext(); + SourceMgr SM; + Scanner scanner(Input, SM); + while (true) { + Token T = scanner.getNext(); if (T.Kind == Token::TK_StreamEnd) break; else if (T.Kind == Token::TK_Error) @@ -1744,7 +1764,7 @@ Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, std::error_code *EC) : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} -Stream::~Stream() {} +Stream::~Stream() = default; bool Stream::failed() { return scanner->failed(); } @@ -1851,8 +1871,6 @@ bool Node::failed() const { return Doc->failed(); } - - StringRef ScalarNode::getValue(SmallVectorImpl &Storage) const { // TODO: Handle newlines properly. We need to remove leading whitespace. if (Value[0] == '"') { // Double quoted. @@ -2116,6 +2134,7 @@ void MappingNode::increment() { break; default: setError("Unexpected token. Expected Key or Block End", T); + LLVM_FALLTHROUGH; case Token::TK_Error: IsAtEnd = true; CurrentEntry = nullptr; @@ -2128,6 +2147,7 @@ void MappingNode::increment() { return increment(); case Token::TK_FlowMappingEnd: getNext(); + LLVM_FALLTHROUGH; case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; @@ -2170,6 +2190,7 @@ void SequenceNode::increment() { default: setError( "Unexpected token. Expected Block Entry or Block End." , T); + LLVM_FALLTHROUGH; case Token::TK_Error: IsAtEnd = true; CurrentEntry = nullptr; @@ -2198,6 +2219,7 @@ void SequenceNode::increment() { return increment(); case Token::TK_FlowSequenceEnd: getNext(); + LLVM_FALLTHROUGH; case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; diff --git a/interpreter/llvm/src/lib/Support/YAMLTraits.cpp b/interpreter/llvm/src/lib/Support/YAMLTraits.cpp index c410b1d560860..65eda246a7fea 100644 --- a/interpreter/llvm/src/lib/Support/YAMLTraits.cpp +++ b/interpreter/llvm/src/lib/Support/YAMLTraits.cpp @@ -8,17 +8,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLTraits.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" -#include +#include +#include +#include +#include #include +#include +#include + using namespace llvm; using namespace yaml; @@ -26,11 +36,9 @@ using namespace yaml; // IO //===----------------------------------------------------------------------===// -IO::IO(void *Context) : Ctxt(Context) { -} +IO::IO(void *Context) : Ctxt(Context) {} -IO::~IO() { -} +IO::~IO() = default; void *IO::getContext() { return Ctxt; @@ -46,16 +54,22 @@ void IO::setContext(void *Context) { Input::Input(StringRef InputContent, void *Ctxt, SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt) - : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)), - CurrentNode(nullptr) { + : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)) { if (DiagHandler) SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); DocIterator = Strm->begin(); } -Input::~Input() { +Input::Input(MemoryBufferRef Input, void *Ctxt, + SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt) + : IO(Ctxt), Strm(new Stream(Input, SrcMgr, false, &EC)) { + if (DiagHandler) + SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); + DocIterator = Strm->begin(); } +Input::~Input() = default; + std::error_code Input::error() { return EC; } // Pin the vtables to this file. @@ -398,13 +412,9 @@ bool Input::canElideEmptySequence() { //===----------------------------------------------------------------------===// Output::Output(raw_ostream &yout, void *context, int WrapColumn) - : IO(context), Out(yout), WrapColumn(WrapColumn), Column(0), - ColumnAtFlowStart(0), ColumnAtMapFlowStart(0), NeedBitValueComma(false), - NeedFlowSequenceComma(false), EnumerationMatchFound(false), - NeedsNewLine(false), WriteDefaultValues(false) {} + : IO(context), Out(yout), WrapColumn(WrapColumn) {} -Output::~Output() { -} +Output::~Output() = default; bool Output::outputting() { return true; @@ -911,12 +921,9 @@ void ScalarTraits::output(const double &Val, void *, raw_ostream &Out) { } StringRef ScalarTraits::input(StringRef Scalar, void *, double &Val) { - SmallString<32> buff(Scalar.begin(), Scalar.end()); - char *end; - Val = strtod(buff.c_str(), &end); - if (*end != '\0') - return "invalid floating point number"; - return StringRef(); + if (to_float(Scalar, Val)) + return StringRef(); + return "invalid floating point number"; } void ScalarTraits::output(const float &Val, void *, raw_ostream &Out) { @@ -924,12 +931,9 @@ void ScalarTraits::output(const float &Val, void *, raw_ostream &Out) { } StringRef ScalarTraits::input(StringRef Scalar, void *, float &Val) { - SmallString<32> buff(Scalar.begin(), Scalar.end()); - char *end; - Val = strtod(buff.c_str(), &end); - if (*end != '\0') - return "invalid floating point number"; - return StringRef(); + if (to_float(Scalar, Val)) + return StringRef(); + return "invalid floating point number"; } void ScalarTraits::output(const Hex8 &Val, void *, raw_ostream &Out) { diff --git a/interpreter/llvm/src/lib/Support/raw_ostream.cpp b/interpreter/llvm/src/lib/Support/raw_ostream.cpp index 1abc8ed8683d5..dd58eccee9579 100644 --- a/interpreter/llvm/src/lib/Support/raw_ostream.cpp +++ b/interpreter/llvm/src/lib/Support/raw_ostream.cpp @@ -326,13 +326,30 @@ raw_ostream &raw_ostream::operator<<(const formatv_object_base &Obj) { } raw_ostream &raw_ostream::operator<<(const FormattedString &FS) { - unsigned Len = FS.Str.size(); - int PadAmount = FS.Width - Len; - if (FS.RightJustify && (PadAmount > 0)) - this->indent(PadAmount); - this->operator<<(FS.Str); - if (!FS.RightJustify && (PadAmount > 0)) + if (FS.Str.size() >= FS.Width || FS.Justify == FormattedString::JustifyNone) { + this->operator<<(FS.Str); + return *this; + } + const size_t Difference = FS.Width - FS.Str.size(); + switch (FS.Justify) { + case FormattedString::JustifyLeft: + this->operator<<(FS.Str); + this->indent(Difference); + break; + case FormattedString::JustifyRight: + this->indent(Difference); + this->operator<<(FS.Str); + break; + case FormattedString::JustifyCenter: { + int PadAmount = Difference / 2; this->indent(PadAmount); + this->operator<<(FS.Str); + this->indent(Difference - PadAmount); + break; + } + default: + llvm_unreachable("Bad Justification"); + } return *this; } @@ -548,7 +565,11 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { pos += Size; #ifndef LLVM_ON_WIN32 +#if defined(__linux__) + bool ShouldWriteInChunks = true; +#else bool ShouldWriteInChunks = false; +#endif #else // Writing a large size of output to Windows console returns ENOMEM. It seems // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and diff --git a/interpreter/llvm/src/lib/TableGen/Record.cpp b/interpreter/llvm/src/lib/TableGen/Record.cpp index 33d3de5daf33f..b2636e1e6cb4d 100644 --- a/interpreter/llvm/src/lib/TableGen/Record.cpp +++ b/interpreter/llvm/src/lib/TableGen/Record.cpp @@ -11,20 +11,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/Record.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" #include #include -#include +#include +#include +#include +#include using namespace llvm; @@ -162,7 +170,8 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { // Initializer implementations //===----------------------------------------------------------------------===// -void Init::anchor() { } +void Init::anchor() {} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); } #endif @@ -219,7 +228,6 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef Range) { BitsInit *BitsInit::get(ArrayRef Range) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileBitsInit(ID, Range); @@ -234,7 +242,6 @@ BitsInit *BitsInit::get(ArrayRef Range) { std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -303,7 +310,6 @@ static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) { // resolveReferences - If there are any field references that refer to fields // that have been filled in, we can propagate the values now. -// Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { bool Changed = false; SmallVector NewBits(getNumBits()); @@ -407,27 +413,21 @@ IntInit::convertInitializerBitRange(ArrayRef Bits) const { } CodeInit *CodeInit::get(StringRef V) { - static DenseMap ThePool; + static StringMap ThePool(Allocator); - auto I = ThePool.insert(std::make_pair(V, nullptr)); - if (I.second) { - StringRef VCopy = V.copy(Allocator); - I.first->first = VCopy; - I.first->second = new(Allocator) CodeInit(VCopy); - } - return I.first->second; + auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first; + if (!Entry.second) + Entry.second = new(Allocator) CodeInit(Entry.getKey()); + return Entry.second; } StringInit *StringInit::get(StringRef V) { - static DenseMap ThePool; + static StringMap ThePool(Allocator); - auto I = ThePool.insert(std::make_pair(V, nullptr)); - if (I.second) { - StringRef VCopy = V.copy(Allocator); - I.first->first = VCopy; - I.first->second = new(Allocator) StringInit(VCopy); - } - return I.first->second; + auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first; + if (!Entry.second) + Entry.second = new(Allocator) StringInit(Entry.getKey()); + return Entry.second; } Init *StringInit::convertInitializerTo(RecTy *Ty) const { @@ -456,7 +456,6 @@ static void ProfileListInit(FoldingSetNodeID &ID, ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileListInit(ID, Range, EltTy); @@ -471,7 +470,6 @@ ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -606,7 +604,6 @@ ProfileUnOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *Op, RecTy *Type) { UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileUnOpInit(ID, Opc, LHS, Type); @@ -617,7 +614,6 @@ UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) { UnOpInit *I = new(Allocator) UnOpInit(Opc, LHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -627,7 +623,7 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const { Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { switch (getOpcode()) { - case CAST: { + case CAST: if (isa(getType())) { if (StringInit *LHSs = dyn_cast(LHS)) return LHSs; @@ -692,15 +688,15 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { } } break; - } - case HEAD: { + + case HEAD: if (ListInit *LHSl = dyn_cast(LHS)) { assert(!LHSl->empty() && "Empty list in head"); return LHSl->getElement(0); } break; - } - case TAIL: { + + case TAIL: if (ListInit *LHSl = dyn_cast(LHS)) { assert(!LHSl->empty() && "Empty list in tail"); // Note the +1. We can't just pass the result of getValues() @@ -708,16 +704,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { return ListInit::get(LHSl->getValues().slice(1), LHSl->getType()); } break; - } - case EMPTY: { + + case EMPTY: if (ListInit *LHSl = dyn_cast(LHS)) return IntInit::get(LHSl->empty()); if (StringInit *LHSs = dyn_cast(LHS)) return IntInit::get(LHSs->getValue().empty()); - break; } - } return const_cast(this); } @@ -752,7 +746,6 @@ ProfileBinOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *RHS, BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, Init *RHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileBinOpInit(ID, Opc, LHS, RHS, Type); @@ -763,7 +756,6 @@ BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, BinOpInit *I = new(Allocator) BinOpInit(Opc, LHS, RHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -910,7 +902,6 @@ ProfileTernOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *MHS, TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type); @@ -921,7 +912,6 @@ TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS, TernOpInit *I = new(Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -964,7 +954,6 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Record *CurRec, MultiClass *CurMultiClass) { - OpInit *RHSo = dyn_cast(RHS); if (!RHSo) @@ -1261,7 +1250,7 @@ VarInit *VarInit::get(StringRef VN, RecTy *T) { } VarInit *VarInit::get(Init *VN, RecTy *T) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(T, VN)); @@ -1336,7 +1325,7 @@ Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const { } VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(T, B)); @@ -1368,7 +1357,7 @@ Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const { VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(T, E)); @@ -1438,7 +1427,7 @@ std::string DefInit::getAsString() const { } FieldInit *FieldInit::get(Init *R, StringInit *FN) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(R, FN)); @@ -1503,7 +1492,6 @@ DagInit * DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, ArrayRef NameRange) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileDagInit(ID, V, VN, ArgRange, NameRange); @@ -1512,9 +1500,13 @@ DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) return I; - DagInit *I = new(Allocator) DagInit(V, VN, ArgRange, NameRange); + void *Mem = Allocator.Allocate(totalSizeToAlloc(ArgRange.size(), NameRange.size()), alignof(BitsInit)); + DagInit *I = new(Mem) DagInit(V, VN, ArgRange.size(), NameRange.size()); + std::uninitialized_copy(ArgRange.begin(), ArgRange.end(), + I->getTrailingObjects()); + std::uninitialized_copy(NameRange.begin(), NameRange.end(), + I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -1533,7 +1525,7 @@ DagInit::get(Init *V, StringInit *VN, } void DagInit::Profile(FoldingSetNodeID &ID) const { - ProfileDagInit(ID, Val, ValName, Args, ArgNames); + ProfileDagInit(ID, Val, ValName, makeArrayRef(getTrailingObjects(), NumArgs), makeArrayRef(getTrailingObjects(), NumArgNames)); } Init *DagInit::convertInitializerTo(RecTy *Ty) const { @@ -1545,9 +1537,9 @@ Init *DagInit::convertInitializerTo(RecTy *Ty) const { Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { SmallVector NewArgs; - NewArgs.reserve(Args.size()); + NewArgs.reserve(arg_size()); bool ArgsChanged = false; - for (const Init *Arg : Args) { + for (const Init *Arg : getArgs()) { Init *NewArg = Arg->resolveReferences(R, RV); NewArgs.push_back(NewArg); ArgsChanged |= NewArg != Arg; @@ -1555,7 +1547,7 @@ Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *Op = Val->resolveReferences(R, RV); if (Op != Val || ArgsChanged) - return DagInit::get(Op, ValName, NewArgs, ArgNames); + return DagInit::get(Op, ValName, NewArgs, getArgNames()); return const_cast(this); } @@ -1564,12 +1556,12 @@ std::string DagInit::getAsString() const { std::string Result = "(" + Val->getAsString(); if (ValName) Result += ":" + ValName->getAsUnquotedString(); - if (!Args.empty()) { - Result += " " + Args[0]->getAsString(); - if (ArgNames[0]) Result += ":$" + ArgNames[0]->getAsUnquotedString(); - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - Result += ", " + Args[i]->getAsString(); - if (ArgNames[i]) Result += ":$" + ArgNames[i]->getAsUnquotedString(); + if (!arg_empty()) { + Result += " " + getArg(0)->getAsString(); + if (getArgName(0)) Result += ":$" + getArgName(0)->getAsUnquotedString(); + for (unsigned i = 1, e = getNumArgs(); i != e; ++i) { + Result += ", " + getArg(i)->getAsString(); + if (getArgName(i)) Result += ":$" + getArgName(i)->getAsUnquotedString(); } } return Result + ")"; @@ -1585,12 +1577,6 @@ RecordVal::RecordVal(Init *N, RecTy *T, bool P) assert(Value && "Cannot create unset value for current type!"); } -RecordVal::RecordVal(StringRef N, RecTy *T, bool P) - : Name(StringInit::get(N)), TyAndPrefix(T, P) { - Value = UnsetInit::get()->convertInitializerTo(T); - assert(Value && "Cannot create unset value for current type!"); -} - StringRef RecordVal::getName() const { return cast(getNameInit())->getValue(); } @@ -1616,8 +1602,7 @@ void Record::init() { // Every record potentially has a def at the top. This value is // replaced with the top-level def name at instantiation time. - RecordVal DN("NAME", StringRecTy::get(), false); - addValue(DN); + addValue(RecordVal(StringInit::get("NAME"), StringRecTy::get(), false)); } void Record::checkName() { @@ -1653,10 +1638,6 @@ void Record::setName(Init *NewName) { // this. See TGParser::ParseDef and TGParser::ParseDefm. } -void Record::setName(StringRef Name) { - setName(StringInit::get(Name)); -} - void Record::resolveReferencesTo(const RecordVal *RV) { for (RecordVal &Value : Values) { if (RV == &Value) // Skip resolve the same field as the given one @@ -1727,7 +1708,7 @@ Init *Record::getValueInit(StringRef FieldName) const { return R->getValue(); } -std::string Record::getValueAsString(StringRef FieldName) const { +StringRef Record::getValueAsString(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + @@ -1806,10 +1787,10 @@ Record::getValueAsListOfInts(StringRef FieldName) const { return Ints; } -std::vector +std::vector Record::getValueAsListOfStrings(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); - std::vector Strings; + std::vector Strings; for (Init *I : List->getValues()) { if (StringInit *SI = dyn_cast(I)) Strings.push_back(SI->getValue()); diff --git a/interpreter/llvm/src/lib/TableGen/SetTheory.cpp b/interpreter/llvm/src/lib/TableGen/SetTheory.cpp index a4d33051b4f70..733e0aeef6234 100644 --- a/interpreter/llvm/src/lib/TableGen/SetTheory.cpp +++ b/interpreter/llvm/src/lib/TableGen/SetTheory.cpp @@ -12,18 +12,29 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/SetTheory.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Format.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" +#include +#include +#include +#include using namespace llvm; // Define the standard operators. namespace { -typedef SetTheory::RecSet RecSet; -typedef SetTheory::RecVec RecVec; +using RecSet = SetTheory::RecSet; +using RecVec = SetTheory::RecVec; // (add a, b, ...) Evaluate and union all arguments. struct AddOp : public SetTheory::Operator { @@ -237,13 +248,13 @@ struct FieldExpander : public SetTheory::Expander { ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc()); } }; + } // end anonymous namespace // Pin the vtables to this file. void SetTheory::Operator::anchor() {} void SetTheory::Expander::anchor() {} - SetTheory::SetTheory() { addOperator("add", llvm::make_unique()); addOperator("sub", llvm::make_unique()); @@ -321,4 +332,3 @@ const RecVec *SetTheory::expand(Record *Set) { // Set is not expandable. return nullptr; } - diff --git a/interpreter/llvm/src/lib/TableGen/StringMatcher.cpp b/interpreter/llvm/src/lib/TableGen/StringMatcher.cpp index 0c83da65e19ed..7e510f0c2fdc0 100644 --- a/interpreter/llvm/src/lib/TableGen/StringMatcher.cpp +++ b/interpreter/llvm/src/lib/TableGen/StringMatcher.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/TableGen/StringMatcher.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/TableGen/StringMatcher.h" #include #include #include diff --git a/interpreter/llvm/src/lib/TableGen/TGParser.cpp b/interpreter/llvm/src/lib/TableGen/TGParser.cpp index 96015b06d798f..b492cf9495c02 100644 --- a/interpreter/llvm/src/lib/TableGen/TGParser.cpp +++ b/interpreter/llvm/src/lib/TableGen/TGParser.cpp @@ -339,7 +339,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ if (!IVal) return Error(Loc, "foreach iterator value is untyped"); - IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false)); + IterRec->addValue(RecordVal(IterVar->getNameInit(), IVal->getType(), false)); if (SetValue(IterRec.get(), Loc, IterVar->getNameInit(), None, IVal)) return Error(Loc, "when instantiating this def"); @@ -378,8 +378,8 @@ static bool isObjectStart(tgtok::TokKind K) { /// GetNewAnonymousName - Generate a unique anonymous name that can be used as /// an identifier. -std::string TGParser::GetNewAnonymousName() { - return "anonymous_" + utostr(AnonCounter++); +Init *TGParser::GetNewAnonymousName() { + return StringInit::get("anonymous_" + utostr(AnonCounter++)); } /// ParseObjectName - If an object name is specified, return it. Otherwise, @@ -2350,7 +2350,7 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, bool IsAnonymous = false; if (!DefmPrefix) { - DefmPrefix = StringInit::get(GetNewAnonymousName()); + DefmPrefix = GetNewAnonymousName(); IsAnonymous = true; } diff --git a/interpreter/llvm/src/lib/TableGen/TGParser.h b/interpreter/llvm/src/lib/TableGen/TGParser.h index 76f7d8fe5026a..1b2966c9f6c9c 100644 --- a/interpreter/llvm/src/lib/TableGen/TGParser.h +++ b/interpreter/llvm/src/lib/TableGen/TGParser.h @@ -110,7 +110,7 @@ class TGParser { bool AddSubMultiClass(MultiClass *CurMC, SubMultiClassReference &SubMultiClass); - std::string GetNewAnonymousName(); + Init *GetNewAnonymousName(); // IterRecord: Map an iterator name to a value. struct IterRecord { diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64.h index 3e0e3978b90b5..1dda746a6be1e 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64.h @@ -31,6 +31,7 @@ class MachineFunctionPass; FunctionPass *createAArch64DeadRegisterDefinitions(); FunctionPass *createAArch64RedundantCopyEliminationPass(); +FunctionPass *createAArch64CondBrTuning(); FunctionPass *createAArch64ConditionalCompares(); FunctionPass *createAArch64AdvSIMDScalar(); FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, @@ -43,6 +44,8 @@ ModulePass *createAArch64PromoteConstantPass(); FunctionPass *createAArch64ConditionOptimizerPass(); FunctionPass *createAArch64A57FPLoadBalancing(); FunctionPass *createAArch64A53Fix835769(); +FunctionPass *createFalkorHWPFFixPass(); +FunctionPass *createFalkorMarkStridedAccessesPass(); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); @@ -55,6 +58,7 @@ void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); +void initializeAArch64CondBrTuningPass(PassRegistry &); void initializeAArch64ConditionalComparesPass(PassRegistry&); void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); @@ -64,6 +68,8 @@ void initializeAArch64VectorByElementOptPass(PassRegistry&); void initializeAArch64PromoteConstantPass(PassRegistry&); void initializeAArch64RedundantCopyEliminationPass(PassRegistry&); void initializeAArch64StorePairSuppressPass(PassRegistry&); +void initializeFalkorHWPFFixPass(PassRegistry&); +void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry&); } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64.td index 73f2b6a25f660..436bf1193304c 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64.td @@ -50,6 +50,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", "Enable Statistical Profiling extension">; +def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", + "Enable Scalable Vector Extension (SVE) instructions">; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -190,6 +193,7 @@ def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, @@ -216,6 +220,7 @@ def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", FeatureCRC, FeatureCrypto, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon ]>; @@ -225,6 +230,7 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", FeatureCRC, FeatureCrypto, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon ]>; @@ -266,6 +272,7 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, @@ -359,6 +366,7 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", def : ProcessorModel<"generic", NoSchedModel, [ FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 4a7e0b2b803ee..db1fbe069f4d2 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -509,7 +509,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C, assert(ChainBegin != ChainEnd && "Chain should contain instructions"); do { --I; - Units.accumulateBackward(*I); + Units.accumulate(*I); } while (I != ChainBegin); // Make sure we allocate in-order, to get the cheapest registers first. diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64AsmPrinter.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64AsmPrinter.cpp index 056ffd58b5218..5ce57926cc036 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" @@ -35,11 +35,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -320,6 +320,9 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, switch (ExtraCode[0]) { default: return true; // Unknown modifier. + case 'a': // Print 'a' modifier + PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O); + return false; case 'w': // Print W register case 'x': // Print X register if (MO.isReg()) @@ -388,7 +391,7 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) + if (ExtraCode && ExtraCode[0] && ExtraCode[0] != 'a') return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64CallingConvention.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64CallingConvention.td index 938779d23690d..291bc5ea858e3 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64CallingConvention.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64CallingConvention.td @@ -118,6 +118,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; +// Vararg functions on windows pass floats in integer registers +def CC_AArch64_Win64_VarArg : CallingConv<[ + CCIfType<[f16, f32], CCPromoteToType>, + CCIfType<[f64], CCBitConvertToType>, + CCDelegateTo +]>; + // Darwin uses a calling convention which differs in only two ways // from the standard one at this level: diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 6f8dd3e3ac0ca..b3b738584b409 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -113,7 +113,7 @@ struct LDTLSCleanup : public MachineFunctionPass { return Copy; } - // Create a virtal register in *TLSBaseAddrReg, and populate it by + // Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I.getParent()->getParent(); diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64CondBrTuning.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64CondBrTuning.cpp new file mode 100644 index 0000000000000..51700f9059799 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -0,0 +1,339 @@ +//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions +/// into a conditional branch (B.cond), when the NZCV flags can be set for +/// "free". This is preferred on targets that have more flexibility when +/// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming +/// all other variables are equal). This can also reduce register pressure. +/// +/// A few examples: +/// +/// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS. +/// cbz w8, .LBB_2 -> b.eq .LBB0_2 +/// +/// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses. +/// cbz w8, .LBB1_2 -> b.eq .LBB1_2 +/// +/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. +/// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2 +/// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-cond-br-tuning" +#define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning" + +namespace { +class AArch64CondBrTuning : public MachineFunctionPass { + const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; + + MachineRegisterInfo *MRI; + +public: + static char ID; + AArch64CondBrTuning() : MachineFunctionPass(ID) { + initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; } + +private: + MachineInstr *getOperandDef(const MachineOperand &MO); + MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting); + MachineInstr *convertToCondBr(MachineInstr &MI); + bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI); +}; +} // end anonymous namespace + +char AArch64CondBrTuning::ID = 0; + +INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning", + AARCH64_CONDBR_TUNING_NAME, false, false) + +void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { + if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) + return nullptr; + return MRI->getUniqueVRegDef(MO.getReg()); +} + +MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, + bool IsFlagSetting) { + // If this is already the flag setting version of the instruction (e.g., SUBS) + // just make sure the implicit-def of NZCV isn't marked dead. + if (IsFlagSetting) { + for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands(); + I != E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV) + MO.setIsDead(false); + } + return &MI; + } + bool Is64Bit; + unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); + unsigned NewDestReg = MI.getOperand(0).getReg(); + if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) + NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; + + MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + TII->get(NewOpc), NewDestReg); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + MIB.add(MI.getOperand(I)); + + return MIB; +} + +MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { + AArch64CC::CondCode CC; + MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBZX: + CC = AArch64CC::EQ; + break; + case AArch64::CBNZW: + case AArch64::CBNZX: + CC = AArch64CC::NE; + break; + case AArch64::TBZW: + case AArch64::TBZX: + CC = AArch64CC::PL; + break; + case AArch64::TBNZW: + case AArch64::TBNZX: + CC = AArch64CC::MI; + break; + } + return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TargetMBB); +} + +bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, + MachineInstr &DefMI) { + // We don't want NZCV bits live across blocks. + if (MI.getParent() != DefMI.getParent()) + return false; + + bool IsFlagSetting = true; + unsigned MIOpc = MI.getOpcode(); + MachineInstr *NewCmp = nullptr, *NewBr = nullptr; + switch (DefMI.getOpcode()) { + default: + return false; + case AArch64::ADDWri: + case AArch64::ADDWrr: + case AArch64::ADDWrs: + case AArch64::ADDWrx: + case AArch64::ANDWri: + case AArch64::ANDWrr: + case AArch64::ANDWrs: + case AArch64::BICWrr: + case AArch64::BICWrs: + case AArch64::SUBWri: + case AArch64::SUBWrr: + case AArch64::SUBWrs: + case AArch64::SUBWrx: + IsFlagSetting = false; + LLVM_FALLTHROUGH; + case AArch64::ADDSWri: + case AArch64::ADDSWrr: + case AArch64::ADDSWrs: + case AArch64::ADDSWrx: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::BICSWrr: + case AArch64::BICSWrs: + case AArch64::SUBSWri: + case AArch64::SUBSWrr: + case AArch64::SUBSWrs: + case AArch64::SUBSWrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::TBZW: + case AArch64::TBNZW: + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) && + MI.getOperand(1).getImm() != 31) + return false; + + // There must not be any instruction between DefMI and MI that clobbers or + // reads NZCV. + MachineBasicBlock::iterator I(DefMI), E(MI); + for (I = std::next(I); I != E; ++I) { + if (I->modifiesRegister(AArch64::NZCV, TRI) || + I->readsRegister(AArch64::NZCV, TRI)) + return false; + } + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + break; + + case AArch64::ADDXri: + case AArch64::ADDXrr: + case AArch64::ADDXrs: + case AArch64::ADDXrx: + case AArch64::ANDXri: + case AArch64::ANDXrr: + case AArch64::ANDXrs: + case AArch64::BICXrr: + case AArch64::BICXrs: + case AArch64::SUBXri: + case AArch64::SUBXrr: + case AArch64::SUBXrs: + case AArch64::SUBXrx: + IsFlagSetting = false; + LLVM_FALLTHROUGH; + case AArch64::ADDSXri: + case AArch64::ADDSXrr: + case AArch64::ADDSXrs: + case AArch64::ADDSXrx: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSXrr: + case AArch64::BICSXrs: + case AArch64::SUBSXri: + case AArch64::SUBSXrr: + case AArch64::SUBSXrs: + case AArch64::SUBSXrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::TBZX: + case AArch64::TBNZX: { + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) && + MI.getOperand(1).getImm() != 63) + return false; + // There must not be any instruction between DefMI and MI that clobbers or + // reads NZCV. + MachineBasicBlock::iterator I(DefMI), E(MI); + for (I = std::next(I); I != E; ++I) { + if (I->modifiesRegister(AArch64::NZCV, TRI) || + I->readsRegister(AArch64::NZCV, TRI)) + return false; + } + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + } + break; + } + (void)NewCmp; (void)NewBr; + assert(NewCmp && NewBr && "Expected new instructions."); + + DEBUG(dbgs() << " with instruction:\n "); + DEBUG(NewCmp->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(NewBr->print(dbgs())); + + // If this was a flag setting version of the instruction, we use the original + // instruction by just clearing the dead marked on the implicit-def of NCZV. + // Therefore, we should not erase this instruction. + if (!IsFlagSetting) + DefMI.eraseFromParent(); + MI.eraseFromParent(); + return true; +} + +bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n" + << "********** Function: " << MF.getName() << '\n'); + + TII = static_cast(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + bool LocalChange = false; + for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(), + E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: + break; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: + MachineInstr *DefMI = getOperandDef(MI.getOperand(0)); + LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI)); + break; + } + // If the optimization was successful, we can't optimize any other + // branches because doing so would clobber the NZCV flags. + if (LocalChange) { + Changed = true; + break; + } + } + } + return Changed; +} + +FunctionPass *llvm::createAArch64CondBrTuning() { + return new AArch64CondBrTuning(); +} diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 00a0111f2bd2d..9eda56c825a9c 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -139,6 +140,7 @@ class SSACCmpConv { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; + const MachineBranchProbabilityInfo *MBPI; public: /// The first block containing a conditional branch, dominating everything @@ -186,8 +188,10 @@ class SSACCmpConv { public: /// runOnMachineFunction - Initialize per-function data structures. - void runOnMachineFunction(MachineFunction &MF) { + void runOnMachineFunction(MachineFunction &MF, + const MachineBranchProbabilityInfo *MBPI) { this->MF = &MF; + this->MBPI = MBPI; TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -564,8 +568,40 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { // All CmpBB instructions are moved into Head, and CmpBB is deleted. // Update the CFG first. updateTailPHIs(); - Head->removeSuccessor(CmpBB, true); - CmpBB->removeSuccessor(Tail, true); + + // Save successor probabilties before removing CmpBB and Tail from their + // parents. + BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB); + BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail); + + Head->removeSuccessor(CmpBB); + CmpBB->removeSuccessor(Tail); + + // If Head and CmpBB had successor probabilties, udpate the probabilities to + // reflect the ccmp-conversion. + if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) { + + // Head is allowed two successors. We've removed CmpBB, so the remaining + // successor is Tail. We need to increase the successor probability for + // Tail to account for the CmpBB path we removed. + // + // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB). + assert(*Head->succ_begin() == Tail && "Head successor is not Tail"); + BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail); + Head->setSuccProbability(Head->succ_begin(), + Head2Tail + Head2CmpBB * CmpBB2Tail); + + // We will transfer successors of CmpBB to Head in a moment without + // normalizing the successor probabilities. Set the successor probabilites + // before doing so. + // + // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB). + for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) { + BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I); + CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I); + } + } + Head->transferSuccessorsAndUpdatePHIs(CmpBB); DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); TII->removeBranch(*Head); @@ -717,6 +753,7 @@ int SSACCmpConv::expectedCodeSizeDelta() const { namespace { class AArch64ConditionalCompares : public MachineFunctionPass { + const MachineBranchProbabilityInfo *MBPI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MCSchedModel SchedModel; @@ -753,6 +790,7 @@ char AArch64ConditionalCompares::ID = 0; INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", @@ -763,6 +801,7 @@ FunctionPass *llvm::createAArch64ConditionalCompares() { } void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -892,12 +931,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DomTree = &getAnalysis(); Loops = getAnalysisIfAvailable(); + MBPI = &getAnalysis(); Traces = &getAnalysis(); MinInstr = nullptr; MinSize = MF.getFunction()->optForMinSize(); bool Changed = false; - CmpConv.runOnMachineFunction(MF); + CmpConv.runOnMachineFunction(MF, MBPI); // Visit blocks in dominator tree pre-order. The pre-order enables multiple // cmp-conversions from the same head block. diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 30e2b2310456b..b72f23b109d94 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -13,15 +13,17 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-dead-defs" @@ -84,6 +86,55 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n"); continue; } + if (MF.getSubtarget().hasLSE()) { + // XZ/WZ for LSE can only be used when acquire semantics are not used, + // LDOPAL WZ is an invalid opcode. + switch (MI.getOpcode()) { + case AArch64::CASALb: + case AArch64::CASALh: + case AArch64::CASALs: + case AArch64::CASALd: + case AArch64::SWPALb: + case AArch64::SWPALh: + case AArch64::SWPALs: + case AArch64::SWPALd: + case AArch64::LDADDALb: + case AArch64::LDADDALh: + case AArch64::LDADDALs: + case AArch64::LDADDALd: + case AArch64::LDCLRALb: + case AArch64::LDCLRALh: + case AArch64::LDCLRALs: + case AArch64::LDCLRALd: + case AArch64::LDEORALb: + case AArch64::LDEORALh: + case AArch64::LDEORALs: + case AArch64::LDEORALd: + case AArch64::LDSETALb: + case AArch64::LDSETALh: + case AArch64::LDSETALs: + case AArch64::LDSETALd: + case AArch64::LDSMINALb: + case AArch64::LDSMINALh: + case AArch64::LDSMINALs: + case AArch64::LDSMINALd: + case AArch64::LDSMAXALb: + case AArch64::LDSMAXALh: + case AArch64::LDSMAXALs: + case AArch64::LDSMAXALd: + case AArch64::LDUMINALb: + case AArch64::LDUMINALh: + case AArch64::LDUMINALs: + case AArch64::LDUMINALd: + case AArch64::LDUMAXALb: + case AArch64::LDUMAXALh: + case AArch64::LDUMAXALs: + case AArch64::LDUMAXALd: + continue; + default: + break; + } + } const MCInstrDesc &Desc = MI.getDesc(); for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 629ad5c61b78a..d52cd84246a18 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -584,27 +584,21 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, return true; } -static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MBB->addLiveIn(*I); -} - bool AArch64ExpandPseudo::expandCMP_SWAP( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - MachineOperand &Dest = MI.getOperand(0); + const MachineOperand &Dest = MI.getOperand(0); unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + bool StatusDead = MI.getOperand(1).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + unsigned NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -616,19 +610,18 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( MF->insert(++StoreBB->getIterator(), DoneBB); // .Lloadcmp: + // mov wStatus, 0 // ldaxr xDest, [xAddr] // cmp xDest, xDesired // b.ne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - + if (!StatusDead) + BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) + .addImm(0).addImm(0); BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) - .addReg(Addr.getReg()); + .addReg(AddrReg); BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) - .add(Desired) + .addReg(DesiredReg) .addImm(ExtendImm); BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) .addImm(AArch64CC::NE) @@ -640,25 +633,35 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( // .Lstore: // stlxr wStatus, xNew, [xAddr] // cbnz wStatus, .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - - BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr); + BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) + .addReg(NewReg) + .addReg(AddrReg); BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addMBB(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -671,16 +674,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( MachineOperand &DestLo = MI.getOperand(0); MachineOperand &DestHi = MI.getOperand(1); unsigned StatusReg = MI.getOperand(2).getReg(); - MachineOperand &Addr = MI.getOperand(3); - MachineOperand &DesiredLo = MI.getOperand(4); - MachineOperand &DesiredHi = MI.getOperand(5); - MachineOperand &NewLo = MI.getOperand(6); - MachineOperand &NewHi = MI.getOperand(7); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + bool StatusDead = MI.getOperand(2).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(3).getReg(); + unsigned DesiredLoReg = MI.getOperand(4).getReg(); + unsigned DesiredHiReg = MI.getOperand(5).getReg(); + unsigned NewLoReg = MI.getOperand(6).getReg(); + unsigned NewHiReg = MI.getOperand(7).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -696,20 +698,13 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( // cmp xDestLo, xDesiredLo // sbcs xDestHi, xDesiredHi // b.ne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(DestLo.getReg()); - LoadCmpBB->addLiveIn(DestHi.getReg()); - LoadCmpBB->addLiveIn(DesiredLo.getReg()); - LoadCmpBB->addLiveIn(DesiredHi.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) .addReg(DestLo.getReg(), RegState::Define) .addReg(DestHi.getReg(), RegState::Define) - .addReg(Addr.getReg()); + .addReg(AddrReg); BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) - .add(DesiredLo) + .addReg(DesiredLoReg) .addImm(0); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) .addUse(AArch64::WZR) @@ -717,14 +712,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( .addImm(AArch64CC::EQ); BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) - .add(DesiredHi) + .addReg(DesiredHiReg) .addImm(0); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) .addUse(StatusReg, RegState::Kill) .addUse(StatusReg, RegState::Kill) .addImm(AArch64CC::EQ); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) - .addUse(StatusReg, RegState::Kill) + .addUse(StatusReg, getKillRegState(StatusDead)) .addMBB(DoneBB); LoadCmpBB->addSuccessor(DoneBB); LoadCmpBB->addSuccessor(StoreBB); @@ -732,28 +727,36 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( // .Lstore: // stlxp wStatus, xNewLo, xNewHi, [xAddr] // cbnz wStatus, .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(NewLo.getReg()); - StoreBB->addLiveIn(NewHi.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) - .add(NewLo) - .add(NewHi) - .add(Addr); + .addReg(NewLoReg) + .addReg(NewHiReg) + .addReg(AddrReg); BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addMBB(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute liveness bottom up. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass in the loop to get the loop carried dependencies right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -943,6 +946,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case AArch64::CMP_SWAP_128: return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); + case AArch64::AESMCrrTied: + case AArch64::AESIMCrrTied: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : + AArch64::AESIMCrr)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)); + transferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } } return false; } diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp new file mode 100644 index 0000000000000..c0e22355a9ff6 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -0,0 +1,790 @@ +//===-- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions +/// that may inhibit the HW prefetching. This is done in two steps. Before +/// ISel, we mark strided loads (i.e. those that will likely benefit from +/// prefetching) with metadata. Then, after opcodes have been finalized, we +/// insert MOVs and re-write loads to prevent unintnentional tag collisions. +// ===---------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "falkor-hwpf-fix" + +STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked"); +STATISTIC(NumCollisionsAvoided, + "Number of HW prefetch tag collisions avoided"); +STATISTIC(NumCollisionsNotAvoided, + "Number of HW prefetch tag collisions not avoided due to lack of regsiters"); + +namespace { + +class FalkorMarkStridedAccesses { +public: + FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE) + : LI(LI), SE(SE) {} + + bool run(); + +private: + bool runOnLoop(Loop &L); + + LoopInfo &LI; + ScalarEvolution &SE; +}; + +class FalkorMarkStridedAccessesLegacy : public FunctionPass { +public: + static char ID; // Pass ID, replacement for typeid + FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) { + initializeFalkorMarkStridedAccessesLegacyPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + // FIXME: For some reason, preserving SE here breaks LSR (even if + // this pass changes nothing). + // AU.addPreserved(); + } + + bool runOnFunction(Function &F) override; +}; +} // namespace + +char FalkorMarkStridedAccessesLegacy::ID = 0; +INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, + "Falkor HW Prefetch Fix", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, + "Falkor HW Prefetch Fix", false, false) + +FunctionPass *llvm::createFalkorMarkStridedAccessesPass() { + return new FalkorMarkStridedAccessesLegacy(); +} + +bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) { + TargetPassConfig &TPC = getAnalysis(); + const AArch64Subtarget *ST = + TPC.getTM().getSubtargetImpl(F); + if (ST->getProcFamily() != AArch64Subtarget::Falkor) + return false; + + if (skipFunction(F)) + return false; + + LoopInfo &LI = getAnalysis().getLoopInfo(); + ScalarEvolution &SE = getAnalysis().getSE(); + + FalkorMarkStridedAccesses LDP(LI, SE); + return LDP.run(); +} + +bool FalkorMarkStridedAccesses::run() { + bool MadeChange = false; + + for (Loop *L : LI) + for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt) + MadeChange |= runOnLoop(**LIt); + + return MadeChange; +} + +bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) { + // Only mark strided loads in the inner-most loop + if (!L.empty()) + return false; + + bool MadeChange = false; + + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + LoadInst *LoadI = dyn_cast(&I); + if (!LoadI) + continue; + + Value *PtrValue = LoadI->getPointerOperand(); + if (L.isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE.getSCEV(PtrValue); + const SCEVAddRecExpr *LSCEVAddRec = dyn_cast(LSCEV); + if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) + continue; + + LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD, + MDNode::get(LoadI->getContext(), {})); + ++NumStridedLoadsMarked; + DEBUG(dbgs() << "Load: " << I << " marked as strided\n"); + MadeChange = true; + } + } + + return MadeChange; +} + +namespace { + +class FalkorHWPFFix : public MachineFunctionPass { +public: + static char ID; + + FalkorHWPFFix() : MachineFunctionPass(ID) { + initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + void runOnLoop(MachineLoop &L, MachineFunction &Fn); + + const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; + DenseMap> TagMap; + bool Modified; +}; + +/// Bits from load opcodes used to compute HW prefetcher instruction tags. +struct LoadInfo { + LoadInfo() + : DestReg(0), BaseReg(0), BaseRegIdx(-1), OffsetOpnd(nullptr), + IsPrePost(false) {} + unsigned DestReg; + unsigned BaseReg; + int BaseRegIdx; + const MachineOperand *OffsetOpnd; + bool IsPrePost; +}; + +} // namespace + +char FalkorHWPFFix::ID = 0; + +INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late", + "Falkor HW Prefetch Fix Late Phase", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late", + "Falkor HW Prefetch Fix Late Phase", false, false) + +static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) { + return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8); +} + +static Optional getLoadInfo(const MachineInstr &MI) { + int DestRegIdx; + int BaseRegIdx; + int OffsetIdx; + bool IsPrePost; + + switch (MI.getOpcode()) { + default: + return None; + + case AArch64::LD1i8: + case AArch64::LD1i16: + case AArch64::LD1i32: + case AArch64::LD1i64: + case AArch64::LD2i8: + case AArch64::LD2i16: + case AArch64::LD2i32: + case AArch64::LD2i64: + case AArch64::LD3i8: + case AArch64::LD3i16: + case AArch64::LD3i32: + case AArch64::LD4i8: + case AArch64::LD4i16: + case AArch64::LD4i32: + DestRegIdx = 0; + BaseRegIdx = 3; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD3i64: + case AArch64::LD4i64: + DestRegIdx = -1; + BaseRegIdx = 3; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD1Onev1d: + case AArch64::LD1Onev2s: + case AArch64::LD1Onev4h: + case AArch64::LD1Onev8b: + case AArch64::LD1Onev2d: + case AArch64::LD1Onev4s: + case AArch64::LD1Onev8h: + case AArch64::LD1Onev16b: + case AArch64::LD1Rv1d: + case AArch64::LD1Rv2s: + case AArch64::LD1Rv4h: + case AArch64::LD1Rv8b: + case AArch64::LD1Rv2d: + case AArch64::LD1Rv4s: + case AArch64::LD1Rv8h: + case AArch64::LD1Rv16b: + case AArch64::LD1Twov1d: + case AArch64::LD1Twov2s: + case AArch64::LD1Twov4h: + case AArch64::LD1Twov8b: + case AArch64::LD2Twov2s: + case AArch64::LD2Twov4s: + case AArch64::LD2Twov8b: + case AArch64::LD2Rv1d: + case AArch64::LD2Rv2s: + case AArch64::LD2Rv4s: + case AArch64::LD2Rv8b: + DestRegIdx = 0; + BaseRegIdx = 1; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD1Twov2d: + case AArch64::LD1Twov4s: + case AArch64::LD1Twov8h: + case AArch64::LD1Twov16b: + case AArch64::LD1Threev1d: + case AArch64::LD1Threev2s: + case AArch64::LD1Threev4h: + case AArch64::LD1Threev8b: + case AArch64::LD1Threev2d: + case AArch64::LD1Threev4s: + case AArch64::LD1Threev8h: + case AArch64::LD1Threev16b: + case AArch64::LD1Fourv1d: + case AArch64::LD1Fourv2s: + case AArch64::LD1Fourv4h: + case AArch64::LD1Fourv8b: + case AArch64::LD1Fourv2d: + case AArch64::LD1Fourv4s: + case AArch64::LD1Fourv8h: + case AArch64::LD1Fourv16b: + case AArch64::LD2Twov2d: + case AArch64::LD2Twov4h: + case AArch64::LD2Twov8h: + case AArch64::LD2Twov16b: + case AArch64::LD2Rv2d: + case AArch64::LD2Rv4h: + case AArch64::LD2Rv8h: + case AArch64::LD2Rv16b: + case AArch64::LD3Threev2s: + case AArch64::LD3Threev4h: + case AArch64::LD3Threev8b: + case AArch64::LD3Threev2d: + case AArch64::LD3Threev4s: + case AArch64::LD3Threev8h: + case AArch64::LD3Threev16b: + case AArch64::LD3Rv1d: + case AArch64::LD3Rv2s: + case AArch64::LD3Rv4h: + case AArch64::LD3Rv8b: + case AArch64::LD3Rv2d: + case AArch64::LD3Rv4s: + case AArch64::LD3Rv8h: + case AArch64::LD3Rv16b: + case AArch64::LD4Fourv2s: + case AArch64::LD4Fourv4h: + case AArch64::LD4Fourv8b: + case AArch64::LD4Fourv2d: + case AArch64::LD4Fourv4s: + case AArch64::LD4Fourv8h: + case AArch64::LD4Fourv16b: + case AArch64::LD4Rv1d: + case AArch64::LD4Rv2s: + case AArch64::LD4Rv4h: + case AArch64::LD4Rv8b: + case AArch64::LD4Rv2d: + case AArch64::LD4Rv4s: + case AArch64::LD4Rv8h: + case AArch64::LD4Rv16b: + DestRegIdx = -1; + BaseRegIdx = 1; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD1i8_POST: + case AArch64::LD1i16_POST: + case AArch64::LD1i32_POST: + case AArch64::LD1i64_POST: + case AArch64::LD2i8_POST: + case AArch64::LD2i16_POST: + case AArch64::LD2i32_POST: + case AArch64::LD2i64_POST: + case AArch64::LD3i8_POST: + case AArch64::LD3i16_POST: + case AArch64::LD3i32_POST: + case AArch64::LD4i8_POST: + case AArch64::LD4i16_POST: + case AArch64::LD4i32_POST: + DestRegIdx = 1; + BaseRegIdx = 4; + OffsetIdx = 5; + IsPrePost = false; + break; + + case AArch64::LD3i64_POST: + case AArch64::LD4i64_POST: + DestRegIdx = -1; + BaseRegIdx = 4; + OffsetIdx = 5; + IsPrePost = false; + break; + + case AArch64::LD1Onev1d_POST: + case AArch64::LD1Onev2s_POST: + case AArch64::LD1Onev4h_POST: + case AArch64::LD1Onev8b_POST: + case AArch64::LD1Onev2d_POST: + case AArch64::LD1Onev4s_POST: + case AArch64::LD1Onev8h_POST: + case AArch64::LD1Onev16b_POST: + case AArch64::LD1Rv1d_POST: + case AArch64::LD1Rv2s_POST: + case AArch64::LD1Rv4h_POST: + case AArch64::LD1Rv8b_POST: + case AArch64::LD1Rv2d_POST: + case AArch64::LD1Rv4s_POST: + case AArch64::LD1Rv8h_POST: + case AArch64::LD1Rv16b_POST: + case AArch64::LD1Twov1d_POST: + case AArch64::LD1Twov2s_POST: + case AArch64::LD1Twov4h_POST: + case AArch64::LD1Twov8b_POST: + case AArch64::LD2Twov2s_POST: + case AArch64::LD2Twov4s_POST: + case AArch64::LD2Twov8b_POST: + case AArch64::LD2Rv1d_POST: + case AArch64::LD2Rv2s_POST: + case AArch64::LD2Rv4s_POST: + case AArch64::LD2Rv8b_POST: + DestRegIdx = 1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LD1Twov2d_POST: + case AArch64::LD1Twov4s_POST: + case AArch64::LD1Twov8h_POST: + case AArch64::LD1Twov16b_POST: + case AArch64::LD1Threev1d_POST: + case AArch64::LD1Threev2s_POST: + case AArch64::LD1Threev4h_POST: + case AArch64::LD1Threev8b_POST: + case AArch64::LD1Threev2d_POST: + case AArch64::LD1Threev4s_POST: + case AArch64::LD1Threev8h_POST: + case AArch64::LD1Threev16b_POST: + case AArch64::LD1Fourv1d_POST: + case AArch64::LD1Fourv2s_POST: + case AArch64::LD1Fourv4h_POST: + case AArch64::LD1Fourv8b_POST: + case AArch64::LD1Fourv2d_POST: + case AArch64::LD1Fourv4s_POST: + case AArch64::LD1Fourv8h_POST: + case AArch64::LD1Fourv16b_POST: + case AArch64::LD2Twov2d_POST: + case AArch64::LD2Twov4h_POST: + case AArch64::LD2Twov8h_POST: + case AArch64::LD2Twov16b_POST: + case AArch64::LD2Rv2d_POST: + case AArch64::LD2Rv4h_POST: + case AArch64::LD2Rv8h_POST: + case AArch64::LD2Rv16b_POST: + case AArch64::LD3Threev2s_POST: + case AArch64::LD3Threev4h_POST: + case AArch64::LD3Threev8b_POST: + case AArch64::LD3Threev2d_POST: + case AArch64::LD3Threev4s_POST: + case AArch64::LD3Threev8h_POST: + case AArch64::LD3Threev16b_POST: + case AArch64::LD3Rv1d_POST: + case AArch64::LD3Rv2s_POST: + case AArch64::LD3Rv4h_POST: + case AArch64::LD3Rv8b_POST: + case AArch64::LD3Rv2d_POST: + case AArch64::LD3Rv4s_POST: + case AArch64::LD3Rv8h_POST: + case AArch64::LD3Rv16b_POST: + case AArch64::LD4Fourv2s_POST: + case AArch64::LD4Fourv4h_POST: + case AArch64::LD4Fourv8b_POST: + case AArch64::LD4Fourv2d_POST: + case AArch64::LD4Fourv4s_POST: + case AArch64::LD4Fourv8h_POST: + case AArch64::LD4Fourv16b_POST: + case AArch64::LD4Rv1d_POST: + case AArch64::LD4Rv2s_POST: + case AArch64::LD4Rv4h_POST: + case AArch64::LD4Rv8b_POST: + case AArch64::LD4Rv2d_POST: + case AArch64::LD4Rv4s_POST: + case AArch64::LD4Rv8h_POST: + case AArch64::LD4Rv16b_POST: + DestRegIdx = -1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LDRBBroW: + case AArch64::LDRBBroX: + case AArch64::LDRBBui: + case AArch64::LDRBroW: + case AArch64::LDRBroX: + case AArch64::LDRBui: + case AArch64::LDRDl: + case AArch64::LDRDroW: + case AArch64::LDRDroX: + case AArch64::LDRDui: + case AArch64::LDRHHroW: + case AArch64::LDRHHroX: + case AArch64::LDRHHui: + case AArch64::LDRHroW: + case AArch64::LDRHroX: + case AArch64::LDRHui: + case AArch64::LDRQl: + case AArch64::LDRQroW: + case AArch64::LDRQroX: + case AArch64::LDRQui: + case AArch64::LDRSBWroW: + case AArch64::LDRSBWroX: + case AArch64::LDRSBWui: + case AArch64::LDRSBXroW: + case AArch64::LDRSBXroX: + case AArch64::LDRSBXui: + case AArch64::LDRSHWroW: + case AArch64::LDRSHWroX: + case AArch64::LDRSHWui: + case AArch64::LDRSHXroW: + case AArch64::LDRSHXroX: + case AArch64::LDRSHXui: + case AArch64::LDRSWl: + case AArch64::LDRSWroW: + case AArch64::LDRSWroX: + case AArch64::LDRSWui: + case AArch64::LDRSl: + case AArch64::LDRSroW: + case AArch64::LDRSroX: + case AArch64::LDRSui: + case AArch64::LDRWl: + case AArch64::LDRWroW: + case AArch64::LDRWroX: + case AArch64::LDRWui: + case AArch64::LDRXl: + case AArch64::LDRXroW: + case AArch64::LDRXroX: + case AArch64::LDRXui: + case AArch64::LDURBBi: + case AArch64::LDURBi: + case AArch64::LDURDi: + case AArch64::LDURHHi: + case AArch64::LDURHi: + case AArch64::LDURQi: + case AArch64::LDURSBWi: + case AArch64::LDURSBXi: + case AArch64::LDURSHWi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::LDURSi: + case AArch64::LDURWi: + case AArch64::LDURXi: + DestRegIdx = 0; + BaseRegIdx = 1; + OffsetIdx = 2; + IsPrePost = false; + break; + + case AArch64::LDRBBpost: + case AArch64::LDRBBpre: + case AArch64::LDRBpost: + case AArch64::LDRBpre: + case AArch64::LDRDpost: + case AArch64::LDRDpre: + case AArch64::LDRHHpost: + case AArch64::LDRHHpre: + case AArch64::LDRHpost: + case AArch64::LDRHpre: + case AArch64::LDRQpost: + case AArch64::LDRQpre: + case AArch64::LDRSBWpost: + case AArch64::LDRSBWpre: + case AArch64::LDRSBXpost: + case AArch64::LDRSBXpre: + case AArch64::LDRSHWpost: + case AArch64::LDRSHWpre: + case AArch64::LDRSHXpost: + case AArch64::LDRSHXpre: + case AArch64::LDRSWpost: + case AArch64::LDRSWpre: + case AArch64::LDRSpost: + case AArch64::LDRSpre: + case AArch64::LDRWpost: + case AArch64::LDRWpre: + case AArch64::LDRXpost: + case AArch64::LDRXpre: + DestRegIdx = 1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = true; + break; + + case AArch64::LDPDi: + case AArch64::LDPQi: + DestRegIdx = -1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LDPSWi: + case AArch64::LDPSi: + case AArch64::LDPWi: + case AArch64::LDPXi: + DestRegIdx = 0; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LDPQpost: + case AArch64::LDPQpre: + DestRegIdx = -1; + BaseRegIdx = 3; + OffsetIdx = 4; + IsPrePost = true; + break; + + case AArch64::LDPDpost: + case AArch64::LDPDpre: + case AArch64::LDPSWpost: + case AArch64::LDPSWpre: + case AArch64::LDPSpost: + case AArch64::LDPSpre: + case AArch64::LDPWpost: + case AArch64::LDPWpre: + case AArch64::LDPXpost: + case AArch64::LDPXpre: + DestRegIdx = 1; + BaseRegIdx = 3; + OffsetIdx = 4; + IsPrePost = true; + break; + } + + LoadInfo LI; + LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg(); + LI.BaseReg = MI.getOperand(BaseRegIdx).getReg(); + LI.BaseRegIdx = BaseRegIdx; + LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx); + LI.IsPrePost = IsPrePost; + return LI; +} + +static Optional getTag(const TargetRegisterInfo *TRI, + const MachineInstr &MI, const LoadInfo &LI) { + unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0; + unsigned Base = TRI->getEncodingValue(LI.BaseReg); + unsigned Off; + if (LI.OffsetOpnd == nullptr) + Off = 0; + else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() || + LI.OffsetOpnd->isCPI()) + return None; + else if (LI.OffsetOpnd->isReg()) + Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg()); + else + Off = LI.OffsetOpnd->getImm() >> 2; + + return makeTag(Dest, Base, Off); +} + +void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { + // Build the initial tag map for the whole loop. + TagMap.clear(); + for (MachineBasicBlock *MBB : L.getBlocks()) + for (MachineInstr &MI : *MBB) { + Optional LInfo = getLoadInfo(MI); + if (!LInfo) + continue; + Optional Tag = getTag(TRI, MI, *LInfo); + if (!Tag) + continue; + TagMap[*Tag].push_back(&MI); + } + + bool AnyCollisions = false; + for (auto &P : TagMap) { + auto Size = P.second.size(); + if (Size > 1) { + for (auto *MI : P.second) { + if (TII->isStridedAccess(*MI)) { + AnyCollisions = true; + break; + } + } + } + if (AnyCollisions) + break; + } + // Nothing to fix. + if (!AnyCollisions) + return; + + MachineRegisterInfo &MRI = Fn.getRegInfo(); + + // Go through all the basic blocks in the current loop and fix any streaming + // loads to avoid collisions with any other loads. + LiveRegUnits LR(*TRI); + for (MachineBasicBlock *MBB : L.getBlocks()) { + LR.clear(); + LR.addLiveOuts(*MBB); + for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) { + MachineInstr &MI = *I; + if (!TII->isStridedAccess(MI)) + continue; + + LoadInfo LdI = *getLoadInfo(MI); + unsigned OldTag = *getTag(TRI, MI, LdI); + auto &OldCollisions = TagMap[OldTag]; + if (OldCollisions.size() <= 1) + continue; + + bool Fixed = false; + DEBUG(dbgs() << "Attempting to fix tag collision: " << MI); + + for (unsigned ScratchReg : AArch64::GPR64RegClass) { + if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg)) + continue; + + LoadInfo NewLdI(LdI); + NewLdI.BaseReg = ScratchReg; + unsigned NewTag = *getTag(TRI, MI, NewLdI); + // Scratch reg tag would collide too, so don't use it. + if (TagMap.count(NewTag)) + continue; + + DEBUG(dbgs() << "Changing base reg to: " << PrintReg(ScratchReg, TRI) + << '\n'); + + // Rewrite: + // Xd = LOAD Xb, off + // to: + // Xc = MOV Xb + // Xd = LOAD Xc, off + DebugLoc DL = MI.getDebugLoc(); + BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg) + .addReg(AArch64::XZR) + .addReg(LdI.BaseReg) + .addImm(0); + MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx); + BaseOpnd.setReg(ScratchReg); + + // If the load does a pre/post increment, then insert a MOV after as + // well to update the real base register. + if (LdI.IsPrePost) { + DEBUG(dbgs() << "Doing post MOV of incremented reg: " + << PrintReg(ScratchReg, TRI) << '\n'); + MI.getOperand(0).setReg( + ScratchReg); // Change tied operand pre/post update dest. + BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL, + TII->get(AArch64::ORRXrs), LdI.BaseReg) + .addReg(AArch64::XZR) + .addReg(ScratchReg) + .addImm(0); + } + + for (int I = 0, E = OldCollisions.size(); I != E; ++I) + if (OldCollisions[I] == &MI) { + std::swap(OldCollisions[I], OldCollisions[E - 1]); + OldCollisions.pop_back(); + break; + } + + // Update TagMap to reflect instruction changes to reduce the number + // of later MOVs to be inserted. This needs to be done after + // OldCollisions is updated since it may be relocated by this + // insertion. + TagMap[NewTag].push_back(&MI); + ++NumCollisionsAvoided; + Fixed = true; + Modified = true; + break; + } + if (!Fixed) + ++NumCollisionsNotAvoided; + } + } +} + +bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { + auto &ST = static_cast(Fn.getSubtarget()); + if (ST.getProcFamily() != AArch64Subtarget::Falkor) + return false; + + if (skipFunction(*Fn.getFunction())) + return false; + + TII = static_cast(ST.getInstrInfo()); + TRI = ST.getRegisterInfo(); + + assert(TRI->trackLivenessAfterRegAlloc(Fn) && + "Register liveness not available!"); + + MachineLoopInfo &LI = getAnalysis(); + + Modified = false; + + for (MachineLoop *I : LI) + for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) + // Only process inner-loops + if (L->empty()) + runOnLoop(**L, Fn); + + return Modified; +} + +FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); } diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64FastISel.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64FastISel.cpp index 9ac7ecb9cdb46..97396057dce07 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64FastISel.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64FastISel.cpp @@ -1282,6 +1282,10 @@ unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || + RHSReg == AArch64::SP || RHSReg == AArch64::WSP) + return 0; + if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -1362,6 +1366,8 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, uint64_t ShiftImm, bool SetFlags, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && + RHSReg != AArch64::SP && RHSReg != AArch64::WSP); if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -1403,6 +1409,8 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, uint64_t ShiftImm, bool SetFlags, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && + RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -2106,7 +2114,7 @@ bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type."); - case MVT::i1: VTIsi1 = true; + case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; case MVT::i8: Opc = OpcTable[Idx][0]; break; case MVT::i16: Opc = OpcTable[Idx][1]; break; case MVT::i32: Opc = OpcTable[Idx][2]; break; @@ -2827,7 +2835,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { return false; EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); - if (SrcVT == MVT::f128) + if (SrcVT == MVT::f128 || SrcVT == MVT::f16) return false; unsigned Opc; @@ -2854,6 +2862,10 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { MVT DestVT; if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) return false; + // Let regular ISEL handle FP16 + if (DestVT == MVT::f16) + return false; + assert((DestVT == MVT::f32 || DestVT == MVT::f64) && "Unexpected value type."); @@ -5126,6 +5138,7 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectOperator(I, I->getOpcode()); // Silence warnings. (void)&CC_AArch64_DarwinPCS_VarArg; + (void)&CC_AArch64_Win64_VarArg; } namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64FrameLowering.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64FrameLowering.cpp index dc916c0346613..7c6a99990406c 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -41,6 +41,10 @@ // | | // |-----------------------------------| // | | +// | (Win64 only) varargs from reg | +// | | +// |-----------------------------------| +// | | // | prev_fp, prev_lr | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) @@ -137,6 +141,34 @@ static cl::opt EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); +/// Look at each instruction that references stack frames and return the stack +/// size limit beyond which some of these instructions will require a scratch +/// register during their expansion later. +static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { + // FIXME: For now, just conservatively guestimate based on unscaled indexing + // range. We'll end up allocating an unnecessary spill slot a lot, but + // realistically that's not a big deal at this stage of the game. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.isDebugValue() || MI.isPseudo() || + MI.getOpcode() == AArch64::ADDXri || + MI.getOpcode() == AArch64::ADDSXri) + continue; + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isFI()) + continue; + + int Offset = 0; + if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == + AArch64FrameOffsetCannotUpdate) + return 0; + } + } + } + return 255; +} + bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -267,12 +299,12 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { return AArch64::X9; const AArch64Subtarget &Subtarget = MF->getSubtarget(); - const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo(); + const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); LiveRegs.addLiveIns(*MBB); // Mark callee saved registers as used so we will not choose them. - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); @@ -474,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, return; } - auto CSStackSize = AFI->getCalleeSavedStackSize(); + bool IsWin64 = + Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + + auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. - AFI->setLocalStackSize(NumBytes - CSStackSize); + AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (CombineSPBump) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); NumBytes = 0; - } else if (CSStackSize != 0) { + } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, - -CSStackSize); - NumBytes -= CSStackSize; + -PrologueSaveSize); + NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -500,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++MBBI; } if (HasFP) { - // Only set up FP if we actually need to. Frame pointer is fp = sp - 16. - int FPOffset = CSStackSize - 16; + // Only set up FP if we actually need to. Frame pointer is fp = + // sp - fixedobject - 16. + int FPOffset = AFI->getCalleeSavedStackSize() - 16; if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -640,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, Reg, 2 * StackGrowth - FixedObject)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -727,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. - auto CSStackSize = AFI->getCalleeSavedStackSize(); + bool IsWin64 = + Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + + auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); - if (!CombineSPBump && CSStackSize != 0) + if (!CombineSPBump && PrologueSaveSize != 0) convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize); + MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize); // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); @@ -754,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, return; } - NumBytes -= CSStackSize; + NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { @@ -764,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (RedZone && ArgumentPopSize == 0) return; - bool NoCalleeSaveRestore = CSStackSize == 0; + bool NoCalleeSaveRestore = PrologueSaveSize == 0; int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += ArgumentPopSize; @@ -783,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // be able to save any instructions. if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - -CSStackSize + 16, TII, MachineInstr::FrameDestroy); + -AFI->getCalleeSavedStackSize() + 16, TII, + MachineInstr::FrameDestroy); else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, MachineInstr::FrameDestroy); @@ -813,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); const AArch64FunctionInfo *AFI = MF.getInfo(); - int FPOffset = MFI.getObjectOffset(FI) + 16; + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + bool IsWin64 = + Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); bool isFixed = MFI.isFixedObjectIndex(FI); @@ -922,7 +968,7 @@ static void computeCalleeSaveRegisterPairs( CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); - unsigned Offset = AFI->getCalleeSavedStackSize(); + int Offset = AFI->getCalleeSavedStackSize(); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; @@ -991,6 +1037,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( SmallVector RegPairs; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); + const MachineRegisterInfo &MRI = MF.getRegInfo(); for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { @@ -1022,9 +1069,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); - MBB.addLiveIn(Reg1); + if (!MRI.isReserved(Reg1)) + MBB.addLiveIn(Reg1); if (RPI.isPaired()) { - MBB.addLiveIn(Reg2); + if (!MRI.isReserved(Reg2)) + MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), @@ -1158,8 +1207,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, } DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; - for (int Reg = SavedRegs.find_first(); Reg != -1; - Reg = SavedRegs.find_next(Reg)) + for (unsigned Reg : SavedRegs.set_bits()) dbgs() << ' ' << PrintReg(Reg, RegInfo); dbgs() << "\n";); @@ -1167,16 +1215,13 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned NumRegsSpilled = SavedRegs.count(); bool CanEliminateFrame = NumRegsSpilled == 0; - // FIXME: Set BigStack if any stack slot references may be out of range. - // For now, just conservatively guestimate based on unscaled indexing - // range. We'll end up allocating an unnecessary spill slot a lot, but - // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); - bool BigStack = (CFSize >= 256); + unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); + bool BigStack = (CFSize > EstimatedStackSizeLimit); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index b18fb30eb2d48..06005f6b68861 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -201,7 +201,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); - void SelectCMP_SWAP(SDNode *N); + bool SelectCMP_SWAP(SDNode *N); }; } // end anonymous namespace @@ -239,10 +239,17 @@ bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( case InlineAsm::Constraint_i: case InlineAsm::Constraint_m: case InlineAsm::Constraint_Q: - // Require the address to be in a register. That is safe for all AArch64 - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); + // We need to make sure that this one operand does not end up in XZR, thus + // require the address to be in a PointerRegClass register. + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); + SDLoc dl(Op); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); + SDValue NewOp = + SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + dl, Op.getValueType(), + Op, RC), 0); + OutOps.push_back(NewOp); return false; } return true; @@ -2566,7 +2573,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { // pstatefield for the MSR (immediate) instruction, we also require that an // immediate value has been provided as an argument, we know that this is // the case as it has been ensured by semantic checking. - auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());; + auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); if (PMapper) { assert (isa(N->getOperand(2)) && "Expected a constant integer expression."); @@ -2609,9 +2616,13 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { } /// We've got special pseudo-instructions for these -void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { +bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; EVT MemTy = cast(N)->getMemoryVT(); + + // Leave IR for LSE if subtarget supports it. + if (Subtarget->hasLSE()) return false; + if (MemTy == MVT::i8) Opcode = AArch64::CMP_SWAP_8; else if (MemTy == MVT::i16) @@ -2637,6 +2648,8 @@ void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); CurDAG->RemoveDeadNode(N); + + return true; } void AArch64DAGToDAGISel::Select(SDNode *Node) { @@ -2660,8 +2673,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { break; case ISD::ATOMIC_CMP_SWAP: - SelectCMP_SWAP(Node); - return; + if (SelectCMP_SWAP(Node)) + return; + break; case ISD::READ_REGISTER: if (tryReadRegister(Node)) diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.cpp index 1af36086ad903..9d879886d39dd 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AArch64ISelLowering.h" #include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" -#include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" @@ -22,9 +22,9 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -51,10 +51,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" @@ -381,7 +381,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); setOperationAction(ISD::FNEG, MVT::v4f16, Expand); setOperationAction(ISD::FPOW, MVT::v4f16, Expand); - setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); setOperationAction(ISD::FREM, MVT::v4f16, Expand); setOperationAction(ISD::FROUND, MVT::v4f16, Expand); setOperationAction(ISD::FRINT, MVT::v4f16, Expand); @@ -413,7 +412,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); setOperationAction(ISD::FNEG, MVT::v8f16, Expand); setOperationAction(ISD::FPOW, MVT::v8f16, Expand); - setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); setOperationAction(ISD::FREM, MVT::v8f16, Expand); setOperationAction(ISD::FROUND, MVT::v8f16, Expand); setOperationAction(ISD::FRINT, MVT::v8f16, Expand); @@ -726,7 +724,6 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); @@ -886,18 +883,21 @@ static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, // Create the new constant immediate node. EVT VT = Op.getValueType(); SDLoc DL(Op); + SDValue New; // If the new constant immediate is all-zeros or all-ones, let the target // independent DAG combine optimize this node. - if (NewImm == 0 || NewImm == OrigMask) - return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT)); - + if (NewImm == 0 || NewImm == OrigMask) { + New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), + TLO.DAG.getConstant(NewImm, DL, VT)); // Otherwise, create a machine node so that target independent DAG combine // doesn't undo this optimization. - Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); - SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); - SDValue New( - TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + } else { + Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); + SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); + New = SDValue( + TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + } return TLO.CombineTo(Op, New); } @@ -2650,9 +2650,13 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::PreserveMost: case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: + if (Subtarget->isTargetWindows() && IsVarArg) + return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; + case CallingConv::Win64: + return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; } } @@ -2668,6 +2672,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -2824,10 +2829,12 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // varargs AArch64FunctionInfo *FuncInfo = MF.getInfo(); if (isVarArg) { - if (!Subtarget->isTargetDarwin()) { + if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should // save them for future reference. + // Win64 variadic functions also pass arguments in registers, but all float + // arguments are passed in integer registers. saveVarArgRegisters(CCInfo, DAG, DL, Chain); } @@ -2869,6 +2876,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); SmallVector MemOps; @@ -2881,7 +2889,13 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { - GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); + if (IsWin64) { + GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); + if (GPRSaveSize & 15) + // The extra size here, if triggered, will always be 8. + MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false); + } else + GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); @@ -2890,7 +2904,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); SDValue Store = DAG.getStore( Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); + IsWin64 + ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + GPRIdx, + (i - FirstVariadicGPR) * 8) + : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); @@ -2899,7 +2917,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, FuncInfo->setVarArgsGPRIndex(GPRIdx); FuncInfo->setVarArgsGPRSize(GPRSaveSize); - if (Subtarget->hasFPARMv8()) { + if (Subtarget->hasFPARMv8() && !IsWin64) { static const MCPhysReg FPRArgRegs[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; @@ -4491,6 +4509,21 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, MachinePointerInfo(SV)); } +SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op, + SelectionDAG &DAG) const { + AArch64FunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo(); + + SDLoc DL(Op); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0 + ? FuncInfo->getVarArgsGPRIndex() + : FuncInfo->getVarArgsStackIndex(), + getPointerTy(DAG.getDataLayout())); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV)); +} + SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call @@ -4562,8 +4595,14 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) - : LowerAAPCS_VASTART(Op, DAG); + MachineFunction &MF = DAG.getMachineFunction(); + + if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) + return LowerWin64_VASTART(Op, DAG); + else if (Subtarget->isTargetDarwin()) + return LowerDarwin_VASTART(Op, DAG); + else + return LowerAAPCS_VASTART(Op, DAG); } SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, @@ -4571,7 +4610,8 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); - unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; + unsigned VaListSize = + Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); @@ -7451,6 +7491,14 @@ AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy, return (DL.getTypeSizeInBits(VecTy) + 127) / 128; } +MachineMemOperand::Flags +AArch64TargetLowering::getMMOFlags(const Instruction &I) const { + if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor && + I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr) + return MOStridedAccess; + return MachineMemOperand::MONone; +} + bool AArch64TargetLowering::isLegalInterleavedAccessType( VectorType *VecTy, const DataLayout &DL) const { @@ -7561,8 +7609,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType()); - + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SVI->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SVI].push_back(SubVec); } } @@ -8363,9 +8412,9 @@ static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, /// EXTR instruction extracts a contiguous chunk of bits from two existing /// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. +/// (or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N)) and replaces it +/// with an EXTR. Can't quite be done in TableGen because the two immediates +/// aren't independent. static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; @@ -9219,16 +9268,26 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, // instructions (stp). SDLoc DL(&St); SDValue BasePtr = St.getBasePtr(); + uint64_t BaseOffset = 0; + const MachinePointerInfo &PtrInfo = St.getPointerInfo(); SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, OrigAlignment, St.getMemOperand()->getFlags()); + // As this in ISel, we will not merge this add which may degrade results. + if (BasePtr->getOpcode() == ISD::ADD && + isa(BasePtr->getOperand(1))) { + BaseOffset = cast(BasePtr->getOperand(1))->getSExtValue(); + BasePtr = BasePtr->getOperand(0); + } + unsigned Offset = EltOffset; while (--NumVecElts) { unsigned Alignment = MinAlign(OrigAlignment, Offset); - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(Offset, DL, MVT::i64)); + SDValue OffsetPtr = + DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, + DAG.getConstant(BaseOffset + Offset, DL, MVT::i64)); NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, PtrInfo.getWithOffset(Offset), Alignment, St.getMemOperand()->getFlags()); @@ -9356,7 +9415,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); StoreSDNode *S = cast(N); - if (S->isVolatile()) + if (S->isVolatile() || S->isIndexed()) return SDValue(); SDValue StVal = S->getValue(); @@ -9520,15 +9579,15 @@ static SDValue performPostLD1Combine(SDNode *N, return SDValue(); } -/// Simplify \Addr given that the top byte of it is ignored by HW during +/// Simplify ``Addr`` given that the top byte of it is ignored by HW during /// address translation. static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { APInt DemandedMask = APInt::getLowBitsSet(64, 56); KnownBits Known; - TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), - DCI.isBeforeLegalizeOps()); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); @@ -10553,11 +10612,17 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; + if (Size > 128) return AtomicExpansionKind::None; + // Nand not supported in LSE. + if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; + // Leave 128 bits to LLSC. + return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; } bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { + // If subtarget has LSE, leave cmpxchg intact for codegen. + if (Subtarget->hasLSE()) return false; // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a @@ -10763,7 +10828,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { unsigned AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const { - if (Subtarget->isTargetDarwin()) + if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) return getPointerTy(DL).getSizeInBits(); return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.h index ecc2517fb288d..3b0e0f1de8946 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64ISelLowering.h @@ -408,6 +408,19 @@ class AArch64TargetLowering : public TargetLowering { bool isIntDivCheap(EVT VT, AttributeList Attr) const override; + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override { + // Do not merge to float value size (128 bytes) if no implicit + // float attribute is set. + + bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + if (NoFloat) + return (MemVT.getSizeInBits() <= 64); + return true; + } + bool isCheapToSpeculateCttz() const override { return true; } @@ -455,6 +468,8 @@ class AArch64TargetLowering : public TargetLowering { unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const; + MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; + private: bool isExtFreeImpl(const Instruction *Ext) const override; @@ -541,6 +556,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrAtomics.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrAtomics.td index 71826bec6b11f..eec41ddbc159f 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrAtomics.td @@ -405,3 +405,59 @@ def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch), (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, GPR64:$newLo, GPR64:$newHi), []>, Sched<[WriteAtomic]>; + +// v8.1 Atomic instructions: +def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALb GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALh GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALs GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALd GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>; + +def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_sub_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd (SUBXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>; + +def : Pat<(atomic_load_and_8 GPR64:$Rn, GPR32:$Rs), (LDCLRALb (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_16 GPR64:$Rn, GPR32:$Rs), (LDCLRALh (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_32 GPR64:$Rn, GPR32:$Rs), (LDCLRALs (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_64 GPR64:$Rn, GPR64:$Rs), (LDCLRALd (ORNXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.cpp index d382a40f8c2c3..c0c6055c358f7 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -52,9 +52,6 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" -static const MachineMemOperand::Flags MOSuppressPair = - MachineMemOperand::MOTargetFlag1; - static cl::opt TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); @@ -763,15 +760,126 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { llvm_unreachable("Unknown opcode to check as cheap as a move!"); } -bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const { - if (MI.getNumOperands() < 4) +bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: return false; - unsigned ShOpVal = MI.getOperand(3).getImm(); - unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal); - if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL && - ShImm < 4) - return true; - return false; + + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + if (ShiftVal == 0) + return true; + return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; + } + + case AArch64::ADDWrx: + case AArch64::ADDXrx: + case AArch64::ADDXrx64: + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: + case AArch64::ADDSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) <= 4; + } + } + + case AArch64::SUBWrs: + case AArch64::SUBSWrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); + } + + case AArch64::SUBXrs: + case AArch64::SUBSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); + } + + case AArch64::SUBWrx: + case AArch64::SUBXrx: + case AArch64::SUBXrx64: + case AArch64::SUBSWrx: + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) == 0; + } + } + + case AArch64::LDRBBroW: + case AArch64::LDRBBroX: + case AArch64::LDRBroW: + case AArch64::LDRBroX: + case AArch64::LDRDroW: + case AArch64::LDRDroX: + case AArch64::LDRHHroW: + case AArch64::LDRHHroX: + case AArch64::LDRHroW: + case AArch64::LDRHroX: + case AArch64::LDRQroW: + case AArch64::LDRQroX: + case AArch64::LDRSBWroW: + case AArch64::LDRSBWroX: + case AArch64::LDRSBXroW: + case AArch64::LDRSBXroX: + case AArch64::LDRSHWroW: + case AArch64::LDRSHWroX: + case AArch64::LDRSHXroW: + case AArch64::LDRSHXroX: + case AArch64::LDRSWroW: + case AArch64::LDRSWroX: + case AArch64::LDRSroW: + case AArch64::LDRSroX: + case AArch64::LDRWroW: + case AArch64::LDRWroX: + case AArch64::LDRXroW: + case AArch64::LDRXroX: + case AArch64::PRFMroW: + case AArch64::PRFMroX: + case AArch64::STRBBroW: + case AArch64::STRBBroX: + case AArch64::STRBroW: + case AArch64::STRBroX: + case AArch64::STRDroW: + case AArch64::STRDroX: + case AArch64::STRHHroW: + case AArch64::STRHHroX: + case AArch64::STRHroW: + case AArch64::STRHroX: + case AArch64::STRQroW: + case AArch64::STRQroX: + case AArch64::STRSroW: + case AArch64::STRSroX: + case AArch64::STRWroW: + case AArch64::STRWroX: + case AArch64::STRXroW: + case AArch64::STRXroX: { + unsigned IsSigned = MI.getOperand(3).getImm(); + return !IsSigned; + } + } } bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, @@ -925,7 +1033,7 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) { /// \brief Return the opcode that does not set flags when possible - otherwise /// return the original opcode. The caller is responsible to do the actual /// substitution and legality checking. -static unsigned convertFlagSettingOpcode(const MachineInstr &MI) { +static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { // Don't convert all compare instructions, because for some the zero register // encoding becomes the sp register. bool MIDefinesZeroReg = false; @@ -1034,7 +1142,7 @@ bool AArch64InstrInfo::optimizeCompareInstr( return true; } unsigned Opc = CmpInstr.getOpcode(); - unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); + unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); if (NewOpc == Opc) return false; const MCInstrDesc &MCID = get(NewOpc); @@ -1171,6 +1279,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::HI: // Z clear and C set case AArch64CC::LS: // Z set or C clear UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::HS: // C set case AArch64CC::LO: // C clear UsedFlags.C = true; @@ -1189,6 +1298,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::GT: // Z clear, N and V the same case AArch64CC::LE: // Z set, N and V differ UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::GE: // N and V the same case AArch64CC::LT: // N and V differ UsedFlags.N = true; @@ -1602,6 +1712,13 @@ void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const { (*MI.memoperands_begin())->setFlags(MOSuppressPair); } +/// Check all MachineMemOperands for a hint that the load/store is strided. +bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const { + return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { + return MMO->getFlags() & MOStridedAccess; + }); +} + bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const { switch (Opc) { default: @@ -3207,7 +3324,7 @@ static bool getMaddPatterns(MachineInstr &Root, // When NZCV is live bail out. if (Cmp_NZCV == -1) return false; - unsigned NewOpc = convertFlagSettingOpcode(Root); + unsigned NewOpc = convertToNonFlagSettingOpc(Root); // When opcode can't change bail out. // CHECKME: do we miss any cases for opcode conversion? if (NewOpc == Opc) @@ -3558,12 +3675,17 @@ enum class FMAInstKind { Default, Indexed, Accumulator }; /// F|MUL I=A,B,0 /// F|ADD R,I,C /// ==> F|MADD R,A,B,C +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the F|ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction /// \param IdxMulOpd is index of operand in Root that is the result of /// the F|MUL. In the example above IdxMulOpd is 1. /// \param MaddOpc the opcode fo the f|madd instruction +/// \param RC Register class of operands +/// \param kind of fma instruction (addressing mode) to be generated static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, @@ -3622,6 +3744,9 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// ADD R,I,Imm /// ==> ORR V, ZR, Imm /// ==> MADD R,A,B,V +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction @@ -3630,6 +3755,7 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// \param MaddOpc the opcode fo the madd instruction /// \param VR is a virtual register that holds the value of an ADD operand /// (V in the example above). +/// \param RC Register class of operands static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl &InsInstrs, @@ -4039,6 +4165,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; + } case MachineCombinerPattern::FMLSv1i32_indexed_OP2: Opc = AArch64::FMLSv1i32_indexed; @@ -4095,7 +4222,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Accumulator); } break; - } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion DelInstrs.push_back(MUL); @@ -4105,26 +4231,36 @@ void AArch64InstrInfo::genAlternativeCodeSequence( /// \brief Replace csincr-branch sequence by simple conditional branch /// /// Examples: -/// 1. +/// 1. \code /// csinc w9, wzr, wzr, /// tbnz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b. +/// \endcode /// -/// 2. +/// 2. \code /// csinc w9, wzr, wzr, /// tbz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b. +/// \endcode /// /// Replace compare and branch sequence by TBZ/TBNZ instruction when the /// compare's constant operand is power of 2. /// /// Examples: +/// \code /// and w8, w8, #0x400 /// cbnz w8, L1 +/// \endcode /// to +/// \code /// tbnz w8, #10, L1 +/// \endcode /// /// \param MI Conditional Branch /// \return True when the simple conditional branch is generated @@ -4298,6 +4434,14 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { return makeArrayRef(TargetFlags); } +ArrayRef> +AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { + static const std::pair TargetFlags[] = + {{MOSuppressPair, "aarch64-suppress-pair"}, + {MOStridedAccess, "aarch64-strided-access"}}; + return makeArrayRef(TargetFlags); +} + unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, size_t Occurrences, bool CanBeTailCall) const { diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.h index 4cd14db633b97..1765a0263ea44 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.h @@ -27,6 +27,13 @@ namespace llvm { class AArch64Subtarget; class AArch64TargetMachine; +static const MachineMemOperand::Flags MOSuppressPair = + MachineMemOperand::MOTargetFlag1; +static const MachineMemOperand::Flags MOStridedAccess = + MachineMemOperand::MOTargetFlag2; + +#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" + class AArch64InstrInfo final : public AArch64GenInstrInfo { const AArch64RegisterInfo RI; const AArch64Subtarget &Subtarget; @@ -81,6 +88,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// unprofitable. bool isLdStPairSuppressed(const MachineInstr &MI) const; + /// Return true if the given load or store is a strided memory access. + bool isStridedAccess(const MachineInstr &MI) const; + /// Return true if this is an unscaled load/store. bool isUnscaledLdSt(unsigned Opc) const; @@ -119,6 +129,44 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { } } + /// \brief Return the opcode that set flags when possible. The caller is + /// responsible for ensuring the opc has a flag setting equivalent. + static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit) { + switch (Opc) { + default: + llvm_unreachable("Opcode has no flag setting equivalent!"); + // 32-bit cases: + case AArch64::ADDWri: Is64Bit = false; return AArch64::ADDSWri; + case AArch64::ADDWrr: Is64Bit = false; return AArch64::ADDSWrr; + case AArch64::ADDWrs: Is64Bit = false; return AArch64::ADDSWrs; + case AArch64::ADDWrx: Is64Bit = false; return AArch64::ADDSWrx; + case AArch64::ANDWri: Is64Bit = false; return AArch64::ANDSWri; + case AArch64::ANDWrr: Is64Bit = false; return AArch64::ANDSWrr; + case AArch64::ANDWrs: Is64Bit = false; return AArch64::ANDSWrs; + case AArch64::BICWrr: Is64Bit = false; return AArch64::BICSWrr; + case AArch64::BICWrs: Is64Bit = false; return AArch64::BICSWrs; + case AArch64::SUBWri: Is64Bit = false; return AArch64::SUBSWri; + case AArch64::SUBWrr: Is64Bit = false; return AArch64::SUBSWrr; + case AArch64::SUBWrs: Is64Bit = false; return AArch64::SUBSWrs; + case AArch64::SUBWrx: Is64Bit = false; return AArch64::SUBSWrx; + // 64-bit cases: + case AArch64::ADDXri: Is64Bit = true; return AArch64::ADDSXri; + case AArch64::ADDXrr: Is64Bit = true; return AArch64::ADDSXrr; + case AArch64::ADDXrs: Is64Bit = true; return AArch64::ADDSXrs; + case AArch64::ADDXrx: Is64Bit = true; return AArch64::ADDSXrx; + case AArch64::ANDXri: Is64Bit = true; return AArch64::ANDSXri; + case AArch64::ANDXrr: Is64Bit = true; return AArch64::ANDSXrr; + case AArch64::ANDXrs: Is64Bit = true; return AArch64::ANDSXrs; + case AArch64::BICXrr: Is64Bit = true; return AArch64::BICSXrr; + case AArch64::BICXrs: Is64Bit = true; return AArch64::BICSXrs; + case AArch64::SUBXri: Is64Bit = true; return AArch64::SUBSXri; + case AArch64::SUBXrr: Is64Bit = true; return AArch64::SUBSXrr; + case AArch64::SUBXrs: Is64Bit = true; return AArch64::SUBSXrs; + case AArch64::SUBXrx: Is64Bit = true; return AArch64::SUBSXrx; + } + } + + /// Return true if this is a load/store that can be potentially paired/merged. bool isCandidateToMergeOrPair(MachineInstr &MI) const; @@ -225,8 +273,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// \param Pattern - combiner pattern bool isThroughputPattern(MachineCombinerPattern Pattern) const override; /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// listed in the array. + /// for an instruction chain ending in ``Root``. All potential patterns are + /// listed in the ``Patterns`` array. bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl &Patterns) const override; @@ -251,6 +299,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { getSerializableDirectMachineOperandTargetFlags() const override; ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + ArrayRef> + getSerializableMachineMemOperandTargetFlags() const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, @@ -270,7 +320,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool IsTailCall) const override; /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. - bool isFalkorLSLFast(const MachineInstr &MI) const; + bool isFalkorShiftExtFast(const MachineInstr &MI) const; private: /// \brief Sets the offsets on outlined instructions in \p MBB which use SP @@ -316,7 +366,7 @@ enum AArch64FrameOffsetStatus { /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to /// use an offset.eq /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be -/// rewriten in @p MI. +/// rewritten in @p MI. /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the /// amount that is off the limit of the legal offset. /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.td index 5ddf66654a675..5049a39814f12 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstrInfo.td @@ -37,6 +37,11 @@ def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16", "fullfp16">; def HasSPE : Predicate<"Subtarget->hasSPE()">, AssemblerPredicate<"FeatureSPE", "spe">; +def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, + AssemblerPredicate<"FeatureFuseAES", + "fuse-aes">; +def HasSVE : Predicate<"Subtarget->hasSVE()">, + AssemblerPredicate<"FeatureSVE", "sve">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; @@ -313,10 +318,13 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; //===----------------------------------------------------------------------===// // AArch64 Instruction Predicate Definitions. -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; -def ForCodeSize : Predicate<"Subtarget->getForCodeSize()">; -def NotForCodeSize : Predicate<"!Subtarget->getForCodeSize()">; +// We could compute these on a per-module basis but doing so requires accessing +// the Function object through the Subtarget and objections were raised +// to that (see post-commit review comments for r301750). +let RecomputePerFunction = 1 in { + def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">; + def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">; +} include "AArch64InstrFormats.td" @@ -436,7 +444,7 @@ def MSRpstateImm4 : MSRpstateImm0_15; // TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. let hasSideEffects = 0 in def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), - [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>; + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; // The cycle counter PMC register is PMCCNTR_EL0. let Predicates = [HasPerfMon] in @@ -708,10 +716,10 @@ def : InstAlias<"negs $dst, $src$shift", defm UDIV : Div<0, "udiv", udiv>; defm SDIV : Div<1, "sdiv", sdiv>; -def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr $Rn, $Rm)>; -def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr $Rn, $Rm)>; -def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr $Rn, $Rm)>; -def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr $Rn, $Rm)>; +def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; // Variable shift defm ASRV : Shift<0b10, "asr", sra>; @@ -729,7 +737,7 @@ def : ShiftAlias<"rorv", RORVWr, GPR32>; def : ShiftAlias<"rorv", RORVXr, GPR64>; // Multiply-add -let AddedComplexity = 7 in { +let AddedComplexity = 5 in { defm MADD : MulAccum<0, "madd", add>; defm MSUB : MulAccum<1, "msub", sub>; @@ -746,7 +754,7 @@ def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; -} // AddedComplexity = 7 +} // AddedComplexity = 5 let AddedComplexity = 5 in { def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; @@ -5299,6 +5307,31 @@ def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; +// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required +// for AES fusion on some CPUs. +let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { +def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, + Sched<[WriteV]>; +def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, + Sched<[WriteV]>; +} + +// Only use constrained versions of AES(I)MC instructions if they are paired with +// AESE/AESD. +def : Pat<(v16i8 (int_aarch64_crypto_aesmc + (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), + (v16i8 V128:$src2))))), + (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), + (v16i8 V128:$src2)))))>, + Requires<[HasFuseAES]>; + +def : Pat<(v16i8 (int_aarch64_crypto_aesimc + (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), + (v16i8 V128:$src2))))), + (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), + (v16i8 V128:$src2)))))>, + Requires<[HasFuseAES]>; + def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstructionSelector.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstructionSelector.cpp index 9bfd570e9a827..7e275e4d2f463 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -33,6 +33,8 @@ #define DEBUG_TYPE "aarch64-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -212,6 +214,7 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, return GenericOpc; } } + break; case AArch64::FPRRegBankID: switch (OpSize) { case 32: @@ -243,7 +246,8 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, return GenericOpc; } } - }; + break; + } return GenericOpc; } @@ -267,6 +271,7 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, case 64: return isStore ? AArch64::STRXui : AArch64::LDRXui; } + break; case AArch64::FPRRegBankID: switch (OpSize) { case 8: @@ -278,7 +283,8 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, case 64: return isStore ? AArch64::STRDui : AArch64::LDRDui; } - }; + break; + } return GenericOpc; } @@ -947,7 +953,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); if (DstRB.getID() != SrcRB.getID()) { - DEBUG(dbgs() << "G_TRUNC input/output on different banks\n"); + DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); return false; } @@ -964,16 +970,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { - DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); return false; } if (DstRC == SrcRC) { // Nothing to be done + } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && + SrcTy == LLT::scalar(64)) { + llvm_unreachable("TableGen can import this case"); + return false; } else if (DstRC == &AArch64::GPR32RegClass && SrcRC == &AArch64::GPR64RegClass) { I.getOperand(1).setSubReg(AArch64::sub_32); } else { + DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); return false; } @@ -1314,6 +1325,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { case TargetOpcode::G_VASTART: return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) : selectVaStartAAPCS(I, MF, MRI); + case TargetOpcode::G_IMPLICIT_DEF: + I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); + return true; } return false; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 6e6daf8122951..ffb27834c31c6 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" #include "llvm/Target/TargetOpcodes.h" using namespace llvm; @@ -39,6 +39,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); + for (auto Ty : {p0, s1, s8, s16, s32, s64}) + setAction({G_IMPLICIT_DEF, Ty}, Legal); + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) { // These operations naturally get the right answer when used on // GPR32, even if the actual type is narrower. @@ -79,7 +82,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({Op, 1, s1}, Legal); } - for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV}) for (auto Ty : {s32, s64}) setAction({BinOp, Ty}, Legal); @@ -99,6 +102,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap). } + for (auto Ty : {s1, s8, s16, s32, s64, p0}) + setAction({G_EXTRACT, Ty}, Legal); + + for (auto Ty : {s32, s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + for (unsigned MemOp : {G_LOAD, G_STORE}) { for (auto Ty : {s8, s16, s32, s64, p0, v2s32}) setAction({MemOp, Ty}, Legal); @@ -282,11 +291,10 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, unsigned DstPtr; if (Align > PtrSize) { // Realign the list to the actual required alignment. - unsigned AlignMinus1 = MRI.createGenericVirtualRegister(IntPtrTy); - MIRBuilder.buildConstant(AlignMinus1, Align - 1); + auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(ListTmp, List, AlignMinus1); + MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg()); DstPtr = MRI.createGenericVirtualRegister(PtrTy); MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 976498aa70d6d..9a7f45bde6c99 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -16,10 +16,10 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -388,6 +388,10 @@ static unsigned isMatchingStore(MachineInstr &LoadInst, } static unsigned getPreIndexedOpcode(unsigned Opc) { + // FIXME: We don't currently support creating pre-indexed loads/stores when + // the load or store is the unscaled version. If we decide to perform such an + // optimization in the future the cases for the unscaled loads/stores will + // need to be added here. switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); @@ -451,32 +455,42 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { default: llvm_unreachable("Opcode has no post-indexed wise equivalent!"); case AArch64::STRSui: + case AArch64::STURSi: return AArch64::STRSpost; case AArch64::STRDui: + case AArch64::STURDi: return AArch64::STRDpost; case AArch64::STRQui: + case AArch64::STURQi: return AArch64::STRQpost; case AArch64::STRBBui: return AArch64::STRBBpost; case AArch64::STRHHui: return AArch64::STRHHpost; case AArch64::STRWui: + case AArch64::STURWi: return AArch64::STRWpost; case AArch64::STRXui: + case AArch64::STURXi: return AArch64::STRXpost; case AArch64::LDRSui: + case AArch64::LDURSi: return AArch64::LDRSpost; case AArch64::LDRDui: + case AArch64::LDURDi: return AArch64::LDRDpost; case AArch64::LDRQui: + case AArch64::LDURQi: return AArch64::LDRQpost; case AArch64::LDRBBui: return AArch64::LDRBBpost; case AArch64::LDRHHui: return AArch64::LDRHHpost; case AArch64::LDRWui: + case AArch64::LDURWi: return AArch64::LDRWpost; case AArch64::LDRXui: + case AArch64::LDURXi: return AArch64::LDRXpost; case AArch64::LDRSWui: return AArch64::LDRSWpost; @@ -795,6 +809,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, int LoadSize = getMemScale(*LoadI); int StoreSize = getMemScale(*StoreI); unsigned LdRt = getLdStRegOp(*LoadI).getReg(); + const MachineOperand &StMO = getLdStRegOp(*StoreI); unsigned StRt = getLdStRegOp(*StoreI).getReg(); bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); @@ -807,7 +822,13 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, // Remove the load, if the destination register of the loads is the same // register for stored value. if (StRt == LdRt && LoadSize == 8) { - StoreI->clearRegisterKills(StRt, TRI); + for (MachineInstr &MI : make_range(StoreI->getIterator(), + LoadI->getIterator())) { + if (MI.killsRegister(StRt, TRI)) { + MI.clearRegisterKills(StRt, TRI); + break; + } + } DEBUG(dbgs() << "Remove load instruction:\n "); DEBUG(LoadI->print(dbgs())); DEBUG(dbgs() << "\n"); @@ -819,7 +840,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) - .addReg(StRt) + .add(StMO) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // FIXME: Currently we disable this transformation in big-endian targets as @@ -860,14 +881,14 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(AndMaskEncoded); } else { BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(Immr) .addImm(Imms); } @@ -876,7 +897,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, // Clear kill flags between store and load. for (MachineInstr &MI : make_range(StoreI->getIterator(), BitExtMI->getIterator())) - MI.clearRegisterKills(StRt, TRI); + if (MI.killsRegister(StRt, TRI)) { + MI.clearRegisterKills(StRt, TRI); + break; + } DEBUG(dbgs() << "Promoting load by replacing :\n "); DEBUG(StoreI->print(dbgs())); @@ -1684,8 +1708,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++NumPostFolded; break; } - // Don't know how to handle pre/post-index versions, so move to the next - // instruction. + + // Don't know how to handle unscaled pre/post-index versions below, so + // move to the next instruction. if (TII->isUnscaledLdSt(Opc)) { ++MBBI; break; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.cpp index 45083df7ab457..f82b9dbc2c9f7 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -151,13 +151,24 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, return MCOperand::createExpr(Expr); } +MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO, + MCSymbol *Sym) const { + MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + return MCOperand::createExpr(Expr); +} + MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { if (Printer.TM.getTargetTriple().isOSDarwin()) return lowerSymbolOperandDarwin(MO, Sym); + if (Printer.TM.getTargetTriple().isOSBinFormatCOFF()) + return lowerSymbolOperandCOFF(MO, Sym); - assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && - "Expect Darwin or ELF target"); + assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && "Invalid target"); return lowerSymbolOperandELF(MO, Sym); } diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.h index 1e29b80c2d626..aa30fe1fa7078 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64MCInstLower.h @@ -42,6 +42,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64MCInstLower { MCSymbol *Sym) const; MCOperand lowerSymbolOperandELF(const MachineOperand &MO, MCSymbol *Sym) const; + MCOperand lowerSymbolOperandCOFF(const MachineOperand &MO, + MCSymbol *Sym) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.cpp index a6926a6700e18..963cfadc54fd7 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -7,37 +7,27 @@ // //===----------------------------------------------------------------------===// // -// \file This file contains the AArch64 implementation of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 implementation of the DAG scheduling +/// mutation to pair instructions back to back. // //===----------------------------------------------------------------------===// #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/MacroFusion.h" #include "llvm/Target/TargetInstrInfo.h" -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); - using namespace llvm; -static cl::opt EnableMacroFusion("aarch64-misched-fusion", cl::Hidden, - cl::desc("Enable scheduling for macro fusion."), cl::init(true)); - namespace { -/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused -/// together. Given an anchor instr, when the other instr is unspecified, then -/// check if the anchor instr may be part of a fused pair at all. +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, - const MachineInstr *SecondMI) { - assert((FirstMI || SecondMI) && "At least one instr must be specified"); - + const MachineInstr &SecondMI) { const AArch64InstrInfo &II = static_cast(TII); const AArch64Subtarget &ST = static_cast(TSI); @@ -45,9 +35,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, unsigned FirstOpcode = FirstMI ? FirstMI->getOpcode() : static_cast(AArch64::INSTRUCTION_LIST_END); - unsigned SecondOpcode = - SecondMI ? SecondMI->getOpcode() - : static_cast(AArch64::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); if (ST.hasArithmeticBccFusion()) // Fuse CMN, CMP, TST followed by Bcc. @@ -128,145 +116,51 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (ST.hasFuseAES()) // Fuse AES crypto operations. - switch(FirstOpcode) { + switch(SecondOpcode) { // AES encode. - case AArch64::AESErr: - return SecondOpcode == AArch64::AESMCrr || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::AESMCrr: + case AArch64::AESMCrrTied: + return FirstOpcode == AArch64::AESErr || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; // AES decode. - case AArch64::AESDrr: - return SecondOpcode == AArch64::AESIMCrr || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::AESIMCrr: + case AArch64::AESIMCrrTied: + return FirstOpcode == AArch64::AESDrr || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; } if (ST.hasFuseLiterals()) // Fuse literal generation operations. - switch (FirstOpcode) { + switch (SecondOpcode) { // PC relative address. - case AArch64::ADRP: - return SecondOpcode == AArch64::ADDXri || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::ADDXri: + return FirstOpcode == AArch64::ADRP || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; // 32 bit immediate. - case AArch64::MOVZWi: - return (SecondOpcode == AArch64::MOVKWi && - SecondMI->getOperand(3).getImm() == 16) || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; - // Lower half of 64 bit immediate. - case AArch64::MOVZXi: - return (SecondOpcode == AArch64::MOVKXi && - SecondMI->getOperand(3).getImm() == 16) || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; - // Upper half of 64 bit immediate. + case AArch64::MOVKWi: + return (FirstOpcode == AArch64::MOVZWi && + SecondMI.getOperand(3).getImm() == 16) || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; + // Lower and upper half of 64 bit immediate. case AArch64::MOVKXi: - return FirstMI->getOperand(3).getImm() == 32 && - ((SecondOpcode == AArch64::MOVKXi && - SecondMI->getOperand(3).getImm() == 48) || - SecondOpcode == AArch64::INSTRUCTION_LIST_END); + return FirstOpcode == AArch64::INSTRUCTION_LIST_END || + (FirstOpcode == AArch64::MOVZXi && + SecondMI.getOperand(3).getImm() == 16) || + (FirstOpcode == AArch64::MOVKXi && + FirstMI->getOperand(3).getImm() == 32 && + SecondMI.getOperand(3).getImm() == 48); } return false; } -/// \brief Implement the fusion of instr pairs in the scheduling DAG, -/// anchored at the instr in AnchorSU.. -static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { - const MachineInstr *AnchorMI = AnchorSU.getInstr(); - if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient()) - return false; - - // If the anchor instr is the ExitSU, then consider its predecessors; - // otherwise, its successors. - bool Preds = (&AnchorSU == &DAG->ExitSU); - SmallVectorImpl &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs; - - const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI; - const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr; - - // Check if the anchor instr may be fused. - if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), - FirstMI, SecondMI)) - return false; - - // Explorer for fusion candidates among the dependencies of the anchor instr. - for (SDep &Dep : AnchorDeps) { - // Ignore dependencies that don't enforce ordering. - if (Dep.isWeak()) - continue; - - SUnit &DepSU = *Dep.getSUnit(); - // Ignore the ExitSU if the dependents are successors. - if (!Preds && &DepSU == &DAG->ExitSU) - continue; - - const MachineInstr *DepMI = DepSU.getInstr(); - if (!DepMI || DepMI->isPseudo() || DepMI->isTransient()) - continue; - - FirstMI = Preds ? DepMI : AnchorMI; - SecondMI = Preds ? AnchorMI : DepMI; - if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), - FirstMI, SecondMI)) - continue; - - // Create a single weak edge between the adjacent instrs. The only effect is - // to cause bottom-up scheduling to heavily prioritize the clustered instrs. - SUnit &FirstSU = Preds ? DepSU : AnchorSU; - SUnit &SecondSU = Preds ? AnchorSU : DepSU; - DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); - - // Adjust the latency between the anchor instr and its - // predecessors/successors. - for (SDep &IDep : AnchorDeps) - if (IDep.getSUnit() == &DepSU) - IDep.setLatency(0); - - // Adjust the latency between the dependent instr and its - // successors/predecessors. - for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds) - if (IDep.getSUnit() == &AnchorSU) - IDep.setLatency(0); - - DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; - FirstSU.print(dbgs(), DAG); dbgs() << " - "; - SecondSU.print(dbgs(), DAG); dbgs() << " / "; - dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << - DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); - - ++NumFused; - return true; - } - - return false; -} - -/// \brief Post-process the DAG to create cluster edges between instrs that may -/// be fused by the processor into a single operation. -class AArch64MacroFusion : public ScheduleDAGMutation { -public: - AArch64MacroFusion() {} - - void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast(DAGInstrs); - - // For each of the SUnits in the scheduling block, try to fuse the instr in it - // with one in its successors. - for (SUnit &ISU : DAG->SUnits) - scheduleAdjacentImpl(DAG, ISU); - - // Try to fuse the instr in the ExitSU with one in its predecessors. - scheduleAdjacentImpl(DAG, DAG->ExitSU); -} - } // end namespace namespace llvm { std::unique_ptr createAArch64MacroFusionDAGMutation () { - return EnableMacroFusion ? make_unique() : nullptr; + return createMacroFusionDAGMutation(shouldScheduleAdjacent); } } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.h index e5efedd9fbfd9..32d90d4c40d6f 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64MacroFusion.h @@ -2,23 +2,18 @@ // // The LLVM Compiler Infrastructure // -// \fileThis file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This file contains the AArch64 definition of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 definition of the DAG scheduling +/// mutation to pair instructions back to back. // //===----------------------------------------------------------------------===// -#include "AArch64InstrInfo.h" #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// - namespace llvm { /// Note that you have to add: diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index 038162c6f54a9..fe4ef4b40ece7 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -17,8 +17,8 @@ #define DEBUG_TYPE "aarch64-pbqp" -#include "AArch64.h" #include "AArch64PBQPRegAlloc.h" +#include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.h index 4f656f94ea12f..b99c1d1d6b3e2 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64PBQPRegAlloc.h @@ -1,4 +1,4 @@ -//===-- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints -------===// +//==- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,8 @@ namespace llvm { +class TargetRegisterInfo; + /// Add the accumulator chaining constraint to a PBQP graph class A57ChainingConstraint : public PBQPRAConstraint { public: @@ -33,6 +35,7 @@ class A57ChainingConstraint : public PBQPRAConstraint { // Add constraints between existing chains void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra); }; -} + +} // end namespace llvm #endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index f3c8e7e9bdc2b..4e65c0ab6011b 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -163,6 +163,7 @@ AArch64RedundantCopyElimination::knownRegValInBlock( case AArch64::ADDSWri: case AArch64::ADDSXri: IsCMN = true; + LLVM_FALLTHROUGH; // CMP is an alias for SUBS with a dead destination register. case AArch64::SUBSWri: case AArch64::SUBSXri: { diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 789270c2a34b7..69124dbd0f838 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -15,13 +15,13 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64InstrInfo.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LowLevelType.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -469,10 +469,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), /*NumOperands*/ 2); } - case TargetOpcode::G_SEQUENCE: - // FIXME: support this, but the generic code is really not going to do - // anything sane. - return getInvalidInstructionMapping(); default: break; } diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterInfo.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterInfo.cpp index baf15ac540cfb..9f7dcb3fe1c3f 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -74,7 +74,7 @@ const uint32_t * AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; @@ -94,7 +94,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { if (TT.isOSDarwin()) return CSR_AArch64_TLS_Darwin_RegMask; - assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS"); + assert(TT.isOSBinFormatELF() && "Invalid target"); return CSR_AArch64_TLS_ELF_RegMask; } @@ -167,7 +167,7 @@ bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const { const TargetRegisterClass * AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { - return &AArch64::GPR64RegClass; + return &AArch64::GPR64spRegClass; } const TargetRegisterClass * diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedA57.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedA57.td index 303398ea0b7f3..5d1608ef04afa 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedA57.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedA57.td @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// The Cortex-A57 is a traditional superscaler microprocessor with a +// The Cortex-A57 is a traditional superscalar microprocessor with a // conservative 3-wide in-order stage for decode and dispatch. Combined with the // much wider out-of-order issue stage, this produced a need to carefully // schedule micro-ops so that all three decoded each cycle are successfully diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkor.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkor.td index cf1c0b66db583..44fd94fc3d485 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkor.td @@ -61,56 +61,42 @@ let SchedModel = FalkorModel in { let SchedModel = FalkorModel in { -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes - { let Latency = 1; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes { let Latency = 1; } -def : WriteRes - { let Latency = 8; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 16; let NumMicroOps = 2; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 5; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 3; let NumMicroOps = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes - { let Latency = 6; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Latency = 3; } - -def : WriteRes { let Unsupported = 1; } - -// No forwarding logic is modelled yet. +// These WriteRes entries are not used in the Falkor sched model. +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } + +// These ReadAdvance entries are not used in the Falkor sched model. def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkorDetails.td index f5015416e4d23..0aeb1f3e30584 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -12,7 +12,560 @@ // //===----------------------------------------------------------------------===// -include "AArch64SchedFalkorWriteRes.td" +// Contains all of the Falkor specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: FalkorWr +// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) +// Latency: #cyc +// +// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued +// down one Z pipe, six SD pipes, four VX pipes and the total latency is +// six cycles. +// +// Contains all of the Falkor specific ReadAdvance types for forwarding logic. +// +// Contains all of the Falkor specific WriteVariant types for immediate zero +// and LSLFast. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define 0 micro-op types +def FalkorWr_LdStInc_none_3cyc : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} +def FalkorWr_none_3cyc : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} +def FalkorWr_none_4cyc : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; +} + +//===----------------------------------------------------------------------===// +// Define 1 micro-op types + +def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } +def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } +def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } +def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } +def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } +def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } +def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; } +def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } +def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } +def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } +def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } +def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } + +def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; } +def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } +def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } +def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } +def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } +def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } + +def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } +def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } +def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } + +def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; } +def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } +def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } +def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Define 2 micro-op types + +def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 0; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 1; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} +def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 12; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 2; +} + +def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2, 8]; +} + +def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [2, 11]; +} + +def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define 3 micro-op types + +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +//===----------------------------------------------------------------------===// +// Define 4 micro-op types + +def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 20; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 24; + let NumMicroOps = 4; +} + +def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 4; +} + +//===----------------------------------------------------------------------===// +// Define 5 micro-op types + +def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 7; + let NumMicroOps = 5; +} +def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +//===----------------------------------------------------------------------===// +// Define 6 micro-op types + +def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +//===----------------------------------------------------------------------===// +// Define 8 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 8; +} + +def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 8; +} + +//===----------------------------------------------------------------------===// +// Define 9 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitLD, + FalkorUnitLD, FalkorUnitXYZ, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitXYZ, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +//===----------------------------------------------------------------------===// +// Define 10 micro-op types + +def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 10; +} + +//===----------------------------------------------------------------------===// +// Define 12 micro-op types + +def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 12; +} + +// Forwarding logic is modeled for multiply add/accumulate and +// load/store base register increment. +// ----------------------------------------------------------------------------- +def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; +def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; +def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>; +def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; +def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; + +def FalkorReadIncLd : SchedReadAdvance<2, [FalkorWr_LdStInc_none_3cyc]>; +def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_LdStInc_none_3cyc]>; + +// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast +// ----------------------------------------------------------------------------- +def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() && + MI->getOperand(1).getImm() == 0}]>; +def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || + + MI->getOperand(1).getReg() == AArch64::XZR}]>; +def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>; + +def FalkorWr_FMOV : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_MOVZ : SchedWriteVariant<[ + SchedVar, + SchedVar]>; // imm fwd + + +def FalkorWr_ADDSUBsx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRSro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_ORRi : SchedWriteVariant<[ + SchedVar, // imm fwd + SchedVar]>; + +def FalkorWr_PRFMro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRVro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRQro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; //===----------------------------------------------------------------------===// // Specialize the coarse model by associating instruction groups with the @@ -22,63 +575,80 @@ include "AArch64SchedFalkorWriteRes.td" // Miscellaneous // ----------------------------------------------------------------------------- -def : InstRW<[WriteI], (instrs COPY)>; +// FIXME: This could be better modeled by looking at the regclasses of the operands. +def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>; // SIMD Floating-point Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FMULX16, FMULX32)>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FMULX32)>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FMULX64)>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^(FMUL|FMULX)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FMULX64)>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>; -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>; -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instregex "^(FMUL|FMULX)v2i64_indexed$")>; -def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>; +def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>; -def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>; +def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>; +def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>; -def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; - -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)v1i64_indexed$")>; -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>; -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; // SIMD Integer Instructions // ----------------------------------------------------------------------------- @@ -92,12 +662,14 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$" def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>; @@ -110,6 +682,8 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN) def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>; @@ -120,10 +694,14 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64) def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQDMULL(i16|i32)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>; @@ -154,7 +732,7 @@ def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; @@ -165,14 +743,18 @@ def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^SQDMULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>; @@ -186,136 +768,188 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>; def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)v[248].*$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)v[248].*$")>; // SIMD Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; -def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instrs LD2i64)>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>; - -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>; - -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>; -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>; -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instrs LD2i64_POST)>; + +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD1i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD3i64_POST)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD4i64_POST)>; + +def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD2i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instrs LD3Threev2d_POST)>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instrs LD4Fourv2d_POST)>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)_POST$")>; + +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; // Arithmetic and Logical Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>; -def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>; +def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>; // SIMD Miscellaneous Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>; def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FRECPS64, FRSQRTS64)>; -def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc], + (instregex "^INSv(i32|i64)(gpr|lane)$")>; def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; +def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>; def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>; def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>; -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instrs FRECPSv4f32, FRSQRTSv4f32)>; -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instrs FRECPSv2f64, FRSQRTSv2f64)>; def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>; def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>; @@ -328,50 +962,113 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; // SIMD Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>; -def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>; - -def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; - -def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>; - -def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>; -def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>; - -def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>; - -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>; - -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>; - -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; - -def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>; -def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(Q|D|S|H|B)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(D|S|H|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STPQi$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STPQ(post|pre)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(D|S)(i)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt], + (instregex "^STRQro(W|X)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(Q|D|S|B|H)i$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPDi, STNPSi)>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPQi)>; + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>; + +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4(i8|i16|i32|i64)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3(i8|i16|i32|i64)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4(i8|i16|i32|i64)_POST$")>; + +def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>; + +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST3Threev2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST3Threev2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>; + +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST4Fourv2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST4Fourv2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; + +def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; // Branch Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>; +def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>; def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>; +def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>; def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>; def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>; def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>; @@ -388,85 +1085,117 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>; // FP Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; -def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; -def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>; -def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(Q|D|S|H|B)i$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instrs LDNPQi)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instrs LDPQi)>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDNP(D|S)i$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDP(D|S)i$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDP(D|S)(pre|post)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDPQ(pre|post)$")>; // FP Data Processing Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^F(N)?MULSrr$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^F(N)?MULDrr$")>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>; +def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>; +def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], (instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], (instregex "^F(N)?M(ADD|SUB)Drrr$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], + (instregex "^F(N)?M(ADD|SUB)Srrr$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], + (instregex "^F(N)?M(ADD|SUB)Drrr$")>; // FP Miscellaneous Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hi|Hr|S0|Si|Sr|D0|Di|Dr|v.*_ns)$")>; +def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>; +def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd +// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr +def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; // Load Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>; - -def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>; -def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>; -def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>; - -def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; -def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>; -def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; -def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; - -def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>; -def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>; - -def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; - -def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; -def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>; -def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDNP(W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDP(W|X)i$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(W|X)l$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(BB|HH|W|X)i$")>; +def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], + (instrs LDPSWi)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], + (instregex "^LDPSW(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instrs LDRSWl)>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; // Miscellaneous Data-Processing Instructions // ----------------------------------------------------------------------------- @@ -477,24 +1206,35 @@ def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>; // Divide and Multiply Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; -def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], (instregex "^M(ADD|SUB)Wrrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; +def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], + (instregex "^M(ADD|SUB)Wrrr$")>; -def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; -def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^M(ADD|SUB)Xrrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^M(ADD|SUB)Xrrr$")>; -def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; -def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>; +def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; +def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(S|U)MULLv.*$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^(S|U)(MLAL|MLSL)v.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(S|U)MULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^(S|U)(MLAL|MLSL)v.*$")>; // Move and Shift Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>; -def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>; -def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation) +def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>], + (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>; +def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>], + (instrs LOADgot)>; // Other Instructions // ----------------------------------------------------------------------------- @@ -503,37 +1243,46 @@ def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HL def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>; def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>; def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; -def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>; -def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPWi, STNPXi)>; def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>; - -def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>; -def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>; -def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXR(B|H|W|X)$")>; + +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXR(B|H|W|X)$")>; // Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>; -def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>; -def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>; -def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>; -def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>; -def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>; - -def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>; - -def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(W|X)i$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(BB|HH|W|X)i$")>; + diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedKryoDetails.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedKryoDetails.td index 02cccccd3078c..cf4cdabb8cbfc 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedKryoDetails.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedKryoDetails.td @@ -1374,7 +1374,9 @@ def KryoWrite_3cyc_LS_LS_400ln : let Latency = 3; let NumMicroOps = 2; } def : InstRW<[KryoWrite_3cyc_LS_LS_400ln], - (instregex "(LDAX?R(B|H|W|X)|LDAXP(W|X))")>; + (instregex "LDAX?R(B|H|W|X)")>; +def : InstRW<[KryoWrite_3cyc_LS_LS_400ln, WriteLDHi], + (instregex "LDAXP(W|X)")>; def KryoWrite_3cyc_LS_LS_401ln : SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { let Latency = 3; let NumMicroOps = 2; @@ -1565,7 +1567,7 @@ def KryoWrite_3cyc_LS_258ln : SchedWriteRes<[KryoUnitLS]> { let Latency = 3; let NumMicroOps = 1; } -def : InstRW<[KryoWrite_3cyc_LS_258ln], +def : InstRW<[KryoWrite_3cyc_LS_258ln, WriteLDHi], (instregex "LDXP(W|X)")>; def KryoWrite_3cyc_LS_258_1ln : SchedWriteRes<[KryoUnitLS]> { diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedM1.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedM1.td index 3fbbc0be682d7..3b71cf8399a0d 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedM1.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedM1.td @@ -23,7 +23,7 @@ def ExynosM1Model : SchedMachineModel { let LoopMicroOpBufferSize = 24; // Based on the instruction queue size. let LoadLatency = 4; // Optimistic load cases. let MispredictPenalty = 14; // Minimum branch misprediction penalty. - let CompleteModel = 0; // Use the default model otherwise. + let CompleteModel = 1; // Use the default model otherwise. } //===----------------------------------------------------------------------===// @@ -72,14 +72,14 @@ def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; } def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; } -def M1WriteLA : SchedWriteVariant<[SchedVar, SchedVar]>; def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; } def M1WriteS2 : SchedWriteRes<[M1UnitS]> { let Latency = 2; } def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } -def M1WriteSA : SchedWriteVariant<[SchedVar, SchedVar]>; @@ -125,13 +125,13 @@ def : WriteRes { let Latency = 0; } // Load instructions. def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 4; } -def : SchedAlias; +def : SchedAlias; // Store instructions. def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } -def : SchedAlias; +def : SchedAlias; // FP data instructions. def : WriteRes { let Latency = 3; } @@ -231,6 +231,111 @@ def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } def M1WriteTB : SchedWriteRes<[M1UnitC, M1UnitALU]> { let Latency = 2; } +def M1WriteVLDA : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 6; } +def M1WriteVLDB : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 7; } +def M1WriteVLDC : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 8; } +def M1WriteVLDD : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDE : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 6; } +def M1WriteVLDF : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 10; + let ResourceCycles = [5]; } +def M1WriteVLDG : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDH : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 6; } +def M1WriteVLDI : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 12; + let ResourceCycles = [6]; } +def M1WriteVLDJ : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let ResourceCycles = [4]; } +def M1WriteVLDK : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let ResourceCycles = [4]; } +def M1WriteVLDL : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDM : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDN : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 14; + let ResourceCycles = [7]; } + +def M1WriteVSTA : WriteSequence<[WriteVST], 2>; +def M1WriteVSTB : WriteSequence<[WriteVST], 3>; +def M1WriteVSTC : WriteSequence<[WriteVST], 4>; +def M1WriteVSTD : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 7; + let ResourceCycles = [7]; } +def M1WriteVSTE : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 8; + let ResourceCycles = [8]; } +def M1WriteVSTF : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 15; + let ResourceCycles = [15]; } +def M1WriteVSTG : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 16; + let ResourceCycles = [16]; } +def M1WriteVSTH : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 14; + let ResourceCycles = [14]; } +def M1WriteVSTI : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 17; + let ResourceCycles = [17]; } // Branch instructions def : InstRW<[M1WriteB1], (instrs Bcc)>; @@ -360,8 +465,233 @@ def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64 def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>; // ASIMD load instructions. +def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[M1WriteVLDD, + WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>; +def : InstRW<[M1WriteVLDE, + WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>; +def : InstRW<[M1WriteVLDH, + WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>; +def : InstRW<[M1WriteVLDL, + WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>; +def : InstRW<[M1WriteVLDM, + WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; // ASIMD store instructions. +def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; // Cryptography instructions. def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 3654eeca530a0..10df50bcf1561 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -1,4 +1,4 @@ -//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=// +//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -79,75 +79,207 @@ def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; // 60 entry unified scheduler. def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, - THX2T99P3, THX2T99P4, THX2T99P5]> { - let BufferSize=60; + THX2T99P3, THX2T99P4, THX2T99P5]> { + let BufferSize = 60; } // Define commonly used write types for InstRW specializations. // All definitions follow the format: THX2T99Write_Cyc_. // 3 cycles on I1. -def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; } +def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 1 cycles on I2. +def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} // 4 cycles on I1. -def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; } +def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 23 cycles on I1. +def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; +} + +// 39 cycles on I1. +def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; +} // 1 cycle on I0, I1, or I2. -def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; } +def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on I0, I1, or I2. +def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on I0, I1, or I2. +def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} + +// 5 cycles on I0, I1, or I2. +def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} // 5 cycles on F1. -def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; } +def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 5; + let NumMicroOps = 2; +} // 7 cycles on F1. -def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; } +def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 7; + let NumMicroOps = 2; +} // 4 cycles on F0 or F1. -def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; } +def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} // 5 cycles on F0 or F1. -def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; } +def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} // 6 cycles on F0 or F1. -def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; } +def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 3; +} // 7 cycles on F0 or F1. -def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; } +def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} // 8 cycles on F0 or F1. -def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; } +def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 10 cycles on F0 or F1. +def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 10; + let NumMicroOps = 3; +} // 16 cycles on F0 or F1. def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 16; + let NumMicroOps = 3; let ResourceCycles = [8]; } // 23 cycles on F0 or F1. def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 23; + let NumMicroOps = 3; let ResourceCycles = [11]; } // 1 cycles on LS0 or LS1. -def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; } +def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 0; +} + +// 1 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 2; +} + +// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 3; +} + +// 2 cycles on LS0 or LS1. +def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 1; + let NumMicroOps = 2; +} // 4 cycles on LS0 or LS1. -def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; } +def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} // 5 cycles on LS0 or LS1. -def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; } +def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 5; + let NumMicroOps = 3; +} // 6 cycles on LS0 or LS1. -def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; } +def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} // 5 cycles on LS0 or LS1 and I0, I1, or I2. def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { let Latency = 5; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def THX2T99Write_6Cyc_LS01_I012_I012 : +def THX2T99Write_5Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 4; +} + +// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { let Latency = 6; let NumMicroOps = 3; @@ -162,25 +294,25 @@ def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { // 5 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 5; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 6 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 6; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 7 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 7; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 8 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 8; - let NumMicroOps = 2; + let NumMicroOps = 3; } // Define commonly used read types. @@ -195,10 +327,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; - } - //===----------------------------------------------------------------------===// // 3. Instruction Tables. @@ -211,88 +341,217 @@ let SchedModel = ThunderX2T99Model in { // Branch, immed // Branch and link, immed // Compare and branch -def : WriteRes { let Latency = 1; } +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } -def : WriteRes { let Unsupported = 1; } +def : WriteRes { + let Unsupported = 1; + let NumMicroOps = 2; +} -// Branch, register -// Branch and link, register != LR -// Branch and link, register = LR -def : WriteRes { let Latency = 1; } +//--- +// Branch +//--- +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B.*")>; +def : InstRW<[THX2T99Write_1Cyc_I2], + (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; //--- // 3.2 Arithmetic and Logical Instructions // 3.3 Move and Shift Instructions //--- + // ALU, basic // Conditional compare // Conditional select // Address generation -def : WriteRes { let Latency = 1; } +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1, 3]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteI], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + def : InstRW<[WriteI], (instrs COPY)>; // ALU, extend and/or shift def : WriteRes { let Latency = 2; - let ResourceCycles = [2]; + let ResourceCycles = [2, 3]; + let NumMicroOps = 2; } +def : InstRW<[WriteISReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; + let Latency = 1; + let ResourceCycles = [1, 3]; + let NumMicroOps = 2; } +def : InstRW<[WriteIEReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + // Move immed -def : WriteRes { let Latency = 1; } +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; // Variable shift -def : WriteRes { let Latency = 1; } +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} //--- // 3.4 Divide and Multiply Instructions //--- // Divide, W-form -// Latency range of 13-23. Take the average. +// Latency range of 13-23/13-39. def : WriteRes { - let Latency = 18; - let ResourceCycles = [18]; + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; } // Divide, X-form -// Latency range of 13-39. Take the average. def : WriteRes { - let Latency = 26; - let ResourceCycles = [26]; + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; } // Multiply accumulate, W-form -def : WriteRes { let Latency = 5; } +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; +} // Multiply accumulate, X-form -def : WriteRes { let Latency = 5; } +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; +} + +//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012], +// (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; +def : InstRW<[THX2T99Write_5Cyc_I012], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; + +def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; // Bitfield extract, two reg -def : WriteRes { let Latency = 1; } +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Multiply high +def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; + +// Miscellaneous Data-Processing Instructions +// Bitfield extract +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>; + +// Bitifield move - basic +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; -// Bitfield move, basic // Bitfield move, insert -// NOTE: Handled by WriteIS. +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>; +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>; // Count leading def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", - "^CLZ(W|X)r$")>; + "^CLZ(W|X)r$")>; + +// Reverse bits +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>; + +// Cryptography Extensions +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>; + +// CRC Instructions +// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; + +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; // Reverse bits/bytes // NOTE: Handled by WriteI. //--- -// 3.6 Load Instructions +// 3.6 Load Instructions // 3.10 FP Load Instructions //--- @@ -300,13 +559,29 @@ def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", // Load register, unscaled immed // Load register, immed unprivileged // Load register, unsigned immed -def : WriteRes { let Latency = 4; } +def : WriteRes { + let Latency = 4; + let NumMicroOps = 4; +} // Load register, immed post-index // NOTE: Handled by WriteLD, WriteI. // Load register, immed pre-index // NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes { let Latency = 1; } +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handled by WriteLD. +def : WriteRes { + let Latency = 5; + let NumMicroOps = 5; +} // Load register offset, basic // Load register, register offset, scale by 4/8 @@ -324,23 +599,229 @@ def THX2T99ReadAdrBase : SchedReadVariant<[ SchedVar]>; def : SchedAlias; -// Load pair, immed offset, normal -// Load pair, immed offset, signed words, base != SP -// Load pair, immed offset signed words, base = SP -// LDP only breaks into *one* LS micro-op. Thus -// the resources are handling by WriteLD. -def : WriteRes { - let Latency = 5; -} - // Load pair, immed pre-index, normal // Load pair, immed pre-index, signed words // Load pair, immed post-index, normal // Load pair, immed post-index, signed words // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>; + +//--- +// Prefetch +//--- +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>; + //-- -// 3.7 Store Instructions +// 3.7 Store Instructions // 3.11 FP Store Instructions //-- @@ -382,6 +863,195 @@ def : WriteRes { // Store pair, immed pre-index, X-form // NOTE: Handled by WriteAdr, WriteSTP. +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; + //--- // 3.8 FP Data Processing Instructions //--- @@ -389,28 +1059,95 @@ def : WriteRes { // FP absolute value // FP min/max // FP negate -def : WriteRes { let Latency = 5; } +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; +} // FP arithmetic def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; // FP compare -def : WriteRes { let Latency = 5; } +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; +} -// FP divide, S-form -// FP square root, S-form -def : WriteRes { +// FP Mul, Div, Sqrt +def : WriteRes { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> { let Latency = 16; let ResourceCycles = [8]; + let NumMicroOps = 4; } +def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>; +def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSrr")>; + // FP divide, D-form // FP square root, D-form -def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDrr")>; // FP multiply // FP multiply accumulate -def : WriteRes { let Latency = 6; } +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>; +def : InstRW<[THX2T99XWriteFMulAcc], + (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; // FP round to integral def : InstRW<[THX2T99Write_7Cyc_F01], @@ -426,15 +1163,25 @@ def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; // FP convert, from vec to vec reg // FP convert, from gen to vec reg // FP convert, from vec to gen reg -def : WriteRes { let Latency = 7; } +def : WriteRes { + let Latency = 7; + let NumMicroOps = 3; +} // FP move, immed // FP move, register -def : WriteRes { let Latency = 4; } +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} // FP transfer, from gen to vec reg // FP transfer, from vec to gen reg -def : WriteRes { let Latency = 4; } +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} + def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; //--- @@ -470,19 +1217,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; // ASIMD shift by register, basic, Q-form // ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, Q-form -def : WriteRes { let Latency = 7; } +def : WriteRes { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [4, 23]; +} // ASIMD arith, reduce, 4H/4S // ASIMD arith, reduce, 8B/8H // ASIMD arith, reduce, 16B -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD logical (MOV, MVN, ORN, ORR) -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^ANDv", "^BICv", "^EORv", "^MOVv", "^MVNv", + "^ORRv", "^ORNv", "^NOTv")>; +// ASIMD arith, reduce +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD polynomial (8x8) multiply long -def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD absolute diff accum, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABAL")>; +// ASIMD arith, reduce, 4H/4S +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU]?ADDL?Vv16i8v$")>; +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)Vv16i8v$")>; +// ASIMD multiply, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)" # + "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # + "(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD multiply accumulate, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD shift accumulate +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", + "SQSHRNv","SQSHRUNv", "UQRSHRNv", + "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; +// ASIMD shift by immed, complex +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>; +// ASIMD shift by register, basic, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// ASIMD shift by register, complex, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL" # + "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; +// ASIMD shift by register, complex, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD Arithmetic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", + "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # + "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", + "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", + "^SRHADD", "^SUBHNv", "^SUQADD", + "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", + "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; //--- // 3.13 ASIMD Floating-point Instructions @@ -493,7 +1356,8 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; // ASIMD FP arith, normal, D-form // ASIMD FP arith, normal, Q-form -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FABDv", "^FADDv", "^FSUBv")>; // ASIMD FP arith,pairwise, D-form // ASIMD FP arith, pairwise, Q-form @@ -503,8 +1367,15 @@ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; // ASIMD FP compare, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", - "^FCMGTv", "^FCMLEv", - "^FCMLTv")>; + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP round, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; // ASIMD FP convert, long // ASIMD FP convert, narrow @@ -512,14 +1383,26 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", // ASIMD FP convert, other, Q-form // NOTE: Handled by WriteV. +// ASIMD FP convert, long and narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + // ASIMD FP divide, D-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>; // ASIMD FP divide, Q-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>; // ASIMD FP divide, Q-form, F64 def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>; // ASIMD FP max/min, normal, D-form // ASIMD FP max/min, normal, Q-form @@ -540,20 +1423,24 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", // ASIMD FP multiply, Q-form, FZ // ASIMD FP multiply, Q-form, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP multiply accumulate, Dform, FZ // ASIMD FP multiply accumulate, Dform, no FZ // ASIMD FP multiply accumulate, Qform, FZ // ASIMD FP multiply accumulate, Qform, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP negate def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; -// ASIMD FP round, D-form -// ASIMD FP round, Q-form -// NOTE: Handled by WriteV. - //-- // 3.14 ASIMD Miscellaneous Instructions //-- @@ -563,37 +1450,66 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; // ASIMD bitwise insert, D-form // ASIMD bitwise insert, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^BIFv", "^BITv", "^BSLv")>; // ASIMD count, D-form // ASIMD count, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^CLSv", "^CLZv", "^CNTv")>; // ASIMD duplicate, gen reg // ASIMD duplicate, element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>; // ASIMD extract def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; // ASIMD extract narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>; + // ASIMD extract narrow, saturating -// NOTE: Handled by WriteV. +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; // ASIMD insert, element to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; + // ASIMD move, integer immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; // ASIMD move, FP immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; +// ASIMD table lookup, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>; + +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; + // ASIMD reciprocal estimate, D-form // ASIMD reciprocal estimate, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", - "^FRSQRTEv", "^URSQRTEv")>; + "^FRSQRTEv", "^URSQRTEv")>; // ASIMD reciprocal step, D-form, FZ // ASIMD reciprocal step, D-form, no FZ @@ -602,7 +1518,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; // ASIMD reverse -def : InstRW<[THX2T99Write_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^REV16v", "^REV32v", "^REV64v")>; // ASIMD table lookup, D-form @@ -610,135 +1526,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; // ASIMD transfer, element to word or word -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; // ASIMD transfer, element to gen reg -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>; // ASIMD transfer gen reg to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; // ASIMD transpose def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", - "^UZP1v", "^UZP2v")>; + "^UZP1v", "^UZP2v")>; // ASIMD unzip/zip def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; //-- -// 3.15 ASIMD Load Instructions +// 3.15 ASIMD Load Instructions //-- // ASIMD load, 1 element, multiple, 1 reg, D-form // ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, D-form // ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, D-form // ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01], +def : InstRW<[THX2T99Write_5Cyc_LS01], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form // ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_6Cyc_LS01], +def : InstRW<[THX2T99Write_6Cyc_LS01], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane, B/H/S // ASIMD load, 1 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1i(8|16|32|64)_POST$")>; // ASIMD load, 1 element, all lanes, D-form, B/H/S // ASIMD load, 1 element, all lanes, D-form, D // ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form, B/H/S // ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane, B/H // ASIMD load, 2 element, one lane, S // ASIMD load, 2 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2i(8|16|32|64)_POST$")>; // ASIMD load, 2 element, all lanes, D-form, B/H/S // ASIMD load, 2 element, all lanes, D-form, D // ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, multiple, D-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, one lone, B/H // ASIMD load, 3 element, one lane, S // ASIMD load, 3 element, one lane, D def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3i(8|16|32|64)_POST$")>; // ASIMD load, 3 element, all lanes, D-form, B/H/S // ASIMD load, 3 element, all lanes, D-form, D // ASIMD load, 3 element, all lanes, Q-form, B/H/S // ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_7Cyc_LS01_F01], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, one lane, B/H // ASIMD load, 4 element, one lane, S // ASIMD load, 4 element, one lane, D def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4i(8|16|32|64)_POST$")>; // ASIMD load, 4 element, all lanes, D-form, B/H/S // ASIMD load, 4 element, all lanes, D-form, D // ASIMD load, 4 element, all lanes, Q-form, B/H/S // ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_6Cyc_LS01_F01], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; //-- @@ -747,106 +1663,83 @@ def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], // ASIMD store, 1 element, multiple, 1 reg, D-form // ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, D-form // ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, D-form // ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, D-form // ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane, B/H/S // ASIMD store, 1 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST1i(8|16|32|64)_POST$")>; // ASIMD store, 2 element, multiple, D-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 2 element, one lane, B/H/S // ASIMD store, 2 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2i(8|16|32|64)_POST$")>; // ASIMD store, 3 element, multiple, D-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 3 element, one lane, B/H // ASIMD store, 3 element, one lane, S // ASIMD store, 3 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3i(8|16|32|64)_POST$")>; // ASIMD store, 4 element, multiple, D-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 4 element, one lane, B/H // ASIMD store, 4 element, one lane, S // ASIMD store, 4 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4i(8|16|32|64)_POST$")>; -//-- -// 3.17 Cryptography Extensions -//-- - -// Crypto AES ops -def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>; - -// Crypto polynomial (64x64) multiply long -def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; - -// Crypto SHA1 xor ops -// Crypto SHA1 schedule acceleration ops -// Crypto SHA256 schedule acceleration op (1 u-op) -// Crypto SHA256 schedule acceleration op (2 u-ops) -// Crypto SHA256 hash acceleration ops -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>; - -//-- -// 3.18 CRC -//-- - -// CRC checksum ops -def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>; - } // SchedModel = ThunderX2T99Model + diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.cpp index abdeac019a185..ea61124527365 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.cpp @@ -81,6 +81,7 @@ void AArch64Subtarget::initializeProperties() { break; case CortexA57: MaxInterleaveFactor = 4; + PrefFunctionAlignment = 4; break; case ExynosM1: MaxInterleaveFactor = 4; @@ -90,7 +91,12 @@ void AArch64Subtarget::initializeProperties() { break; case Falkor: MaxInterleaveFactor = 4; - VectorInsertExtractBaseCost = 2; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + CacheLineSize = 128; + PrefetchDistance = 820; + MinPrefetchStride = 2048; + MaxPrefetchIterationsAhead = 8; break; case Kryo: MaxInterleaveFactor = 4; @@ -99,6 +105,8 @@ void AArch64Subtarget::initializeProperties() { PrefetchDistance = 740; MinPrefetchStride = 1024; MaxPrefetchIterationsAhead = 11; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; break; case ThunderX2T99: CacheLineSize = 64; @@ -108,6 +116,8 @@ void AArch64Subtarget::initializeProperties() { PrefetchDistance = 128; MinPrefetchStride = 1024; MaxPrefetchIterationsAhead = 4; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; break; case ThunderX: case ThunderXT88: @@ -116,11 +126,17 @@ void AArch64Subtarget::initializeProperties() { CacheLineSize = 128; PrefFunctionAlignment = 3; PrefLoopAlignment = 2; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; break; case CortexA35: break; case CortexA53: break; - case CortexA72: break; - case CortexA73: break; + case CortexA72: + PrefFunctionAlignment = 4; + break; + case CortexA73: + PrefFunctionAlignment = 4; + break; case Others: break; } } @@ -156,12 +172,12 @@ struct AArch64GISelActualAccessor : public GISelAccessor { AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const TargetMachine &TM, bool LittleEndian, - bool ForCodeSize) - : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()), + const TargetMachine &TM, bool LittleEndian) + : AArch64GenSubtargetInfo(TT, CPU, FS), + ReserveX18(TT.isOSDarwin() || TT.isOSWindows()), IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), - TLInfo(TM, *this), GISel(), ForCodeSize(ForCodeSize) { + TLInfo(TM, *this), GISel() { #ifndef LLVM_BUILD_GLOBAL_ISEL GISelAccessor *AArch64GISel = new GISelAccessor(); #else diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.h index 2c66221886cf5..5a1f45ee25528 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64Subtarget.h @@ -70,6 +70,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasFullFP16 = false; bool HasSPE = false; bool HasLSLFast = false; + bool HasSVE = false; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove = false; @@ -83,6 +84,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { // NegativeImmediates - transform instructions with negative immediates bool NegativeImmediates = true; + // Enable 64-bit vectorization in SLP. + unsigned MinVectorRegisterBitWidth = 64; + bool UseAA = false; bool PredictableSelectIsExpensive = false; bool BalanceFPOps = false; @@ -125,8 +129,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// an optional library. std::unique_ptr GISel; - bool ForCodeSize; - private: /// initializeSubtargetDependencies - Initializes using CPUString and the /// passed in feature string so that we can use initializer lists for @@ -142,7 +144,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// of the specified triple. AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, - bool LittleEndian, bool ForCodeSize); + bool LittleEndian); /// This object will take onwership of \p GISelAccessor. void setGISelAccessor(GISelAccessor &GISel) { @@ -191,6 +193,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool isXRaySupported() const override { return true; } + unsigned getMinVectorRegisterBitWidth() const { + return MinVectorRegisterBitWidth; + } + bool isX18Reserved() const { return ReserveX18; } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } @@ -213,6 +219,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAES() const { return HasFuseAES; } bool hasFuseLiterals() const { return HasFuseLiterals; } + + /// \brief Return true if the CPU supports any kind of instruction fusion. + bool hasFusion() const { + return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || + hasFuseAES() || hasFuseLiterals(); + } + bool useRSqrt() const { return UseRSqrt; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { @@ -239,6 +252,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasFullFP16() const { return HasFullFP16; } bool hasSPE() const { return HasSPE; } bool hasLSLFast() const { return HasLSLFast; } + bool hasSVE() const { return HasSVE; } bool isLittleEndian() const { return IsLittle; } @@ -267,8 +281,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { } } - bool getForCodeSize() const { return ForCodeSize; } - /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); @@ -294,6 +306,17 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool enableEarlyIfConversion() const override; std::unique_ptr getCustomPBQPConstraints() const override; + + bool isCallingConvWin64(CallingConv::ID CC) const { + switch (CC) { + case CallingConv::C: + return isTargetWindows(); + case CallingConv::Win64: + return true; + default: + return false; + } + } }; } // End llvm namespace diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.cpp index 5a90fd1eb1baf..ba28c01a2effb 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// +#include "AArch64TargetMachine.h" #include "AArch64.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" @@ -46,6 +47,11 @@ static cl::opt EnableCCMP("aarch64-enable-ccmp", cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); +static cl::opt + EnableCondBrTuning("aarch64-enable-cond-br-tune", + cl::desc("Enable the conditional branch tuning pass"), + cl::init(true), cl::Hidden); + static cl::opt EnableMCR("aarch64-enable-mcr", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); @@ -132,6 +138,9 @@ static cl::opt EnableGlobalISelAtO( cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"), cl::init(-1)); +static cl::opt EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", + cl::init(true), cl::Hidden); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(getTheAArch64leTarget()); @@ -152,6 +161,8 @@ extern "C" void LLVMInitializeAArch64Target() { initializeAArch64PromoteConstantPass(*PR); initializeAArch64RedundantCopyEliminationPass(*PR); initializeAArch64StorePairSuppressPass(*PR); + initializeFalkorHWPFFixPass(*PR); + initializeFalkorMarkStridedAccessesLegacyPass(*PR); initializeLDTLSCleanupPass(*PR); } @@ -161,6 +172,8 @@ extern "C" void LLVMInitializeAArch64Target() { static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) return llvm::make_unique(); + if (TT.isOSBinFormatCOFF()) + return llvm::make_unique(); return llvm::make_unique(); } @@ -173,6 +186,8 @@ static std::string computeDataLayout(const Triple &TT, return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; if (TT.isOSBinFormatMachO()) return "e-m:o-i64:64-i128:128-n32:64-S128"; + if (TT.isOSBinFormatCOFF()) + return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; if (LittleEndian) return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; @@ -214,7 +229,6 @@ const AArch64Subtarget * AArch64TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); - bool ForCodeSize = F.optForSize(); std::string CPU = !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString().str() @@ -222,17 +236,15 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString().str() : TargetFS; - std::string ForCodeSizeStr = - std::string(ForCodeSize ? "+" : "-") + "forcodesize"; - auto &I = SubtargetMap[CPU + FS + ForCodeSizeStr]; + auto &I = SubtargetMap[CPU + FS]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, CPU, FS, *this, - isLittle, ForCodeSize); + isLittle); } return I.get(); } @@ -258,9 +270,9 @@ namespace { /// AArch64 Code Generator Pass Configuration Options. class AArch64PassConfig : public TargetPassConfig { public: - AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) + AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM.getOptLevel() != CodeGenOpt::None) substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); } @@ -270,17 +282,19 @@ class AArch64PassConfig : public TargetPassConfig { ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { + const AArch64Subtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(createAArch64MacroFusionDAGMutation()); + if (ST.hasFusion()) + DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); - if (ST.hasFuseLiterals()) { + if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); @@ -298,6 +312,7 @@ class AArch64PassConfig : public TargetPassConfig { bool addIRTranslator() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; + void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; #endif bool addILPOpts() override; @@ -318,13 +333,13 @@ TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { } TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { - return new AArch64PassConfig(this, PM); + return new AArch64PassConfig(*this, PM); } void AArch64PassConfig::addIRPasses() { // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg // ourselves. - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in @@ -336,14 +351,18 @@ void AArch64PassConfig::addIRPasses() { // // Run this before LSR to remove the multiplies involved in computing the // pointer values N iterations ahead. - if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch) - addPass(createLoopDataPrefetchPass()); + if (TM->getOptLevel() != CodeGenOpt::None) { + if (EnableLoopDataPrefetch) + addPass(createLoopDataPrefetchPass()); + if (EnableFalkorHWPFFix) + addPass(createFalkorMarkStridedAccessesPass()); + } TargetPassConfig::addIRPasses(); // Match interleaved memory accesses to ldN/stN intrinsics. if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createInterleavedAccessPass(TM)); + addPass(createInterleavedAccessPass()); if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices @@ -407,6 +426,12 @@ bool AArch64PassConfig::addRegBankSelect() { return false; } +void AArch64PassConfig::addPreGlobalInstructionSelect() { + // Workaround the deficiency of the fast register allocator. + if (TM->getOptLevel() == CodeGenOpt::None) + addPass(new Localizer()); +} + bool AArch64PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; @@ -424,6 +449,8 @@ bool AArch64PassConfig::addILPOpts() { addPass(createAArch64ConditionalCompares()); if (EnableMCR) addPass(&MachineCombinerID); + if (EnableCondBrTuning) + addPass(createAArch64CondBrTuning()); if (EnableEarlyIfConversion) addPass(&EarlyIfConverterID); if (EnableStPairSuppress) @@ -460,8 +487,12 @@ void AArch64PassConfig::addPreSched2() { // Expand some pseudo instructions to allow proper scheduling. addPass(createAArch64ExpandPseudoPass()); // Use load/store pair instructions when possible. - if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt) - addPass(createAArch64LoadStoreOptimizationPass()); + if (TM->getOptLevel() != CodeGenOpt::None) { + if (EnableLoadStoreOpt) + addPass(createAArch64LoadStoreOptimizationPass()); + if (EnableFalkorHWPFFix) + addPass(createFalkorHWPFFixPass()); + } } void AArch64PassConfig::addPreEmitPass() { diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.h index 2c75a3258c1cb..85de02e859e0c 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetMachine.h @@ -36,6 +36,9 @@ class AArch64TargetMachine : public LLVMTargetMachine { ~AArch64TargetMachine() override; const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some, targets is + // deprecated and should not be used. + const AArch64Subtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 8875f9b726475..4bc2c060a0684 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AArch64TargetObjectFile.h" #include "AArch64TargetMachine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; @@ -70,3 +70,11 @@ const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); return MCBinaryExpr::createSub(Res, PC, getContext()); } + +void AArch64_MachoTargetObjectFile::getNameWithPrefix( + SmallVectorImpl &OutName, const GlobalValue *GV, + const TargetMachine &TM) const { + // AArch64 does not use section-relative relocations so any global symbol must + // be accessed via at least a linker-private symbol. + getMangler().getNameWithPrefix(OutName, GV, /* CannotUsePrivateLabel */ true); +} diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.h index 05e1dfa9e6c9b..9077eb7902fd4 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -40,8 +40,14 @@ class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; + + void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, + const TargetMachine &TM) const override; }; +/// This implementation is used for AArch64 COFF targets. +class AArch64_COFFTargetObjectFile : public TargetLoweringObjectFileCOFF {}; + } // end namespace llvm #endif diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index f41f3ddc819b7..a76f080530bbc 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -9,8 +9,8 @@ #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" @@ -20,6 +20,23 @@ using namespace llvm; #define DEBUG_TYPE "aarch64tti" +static cl::opt EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", + cl::init(true), cl::Hidden); + +bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + return (CallerBits & CalleeBits) == CalleeBits; +} + /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. @@ -631,10 +648,62 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { return ST->getMaxInterleaveFactor(); } -void AArch64TTIImpl::getUnrollingPreferences(Loop *L, +// For Falkor, we want to avoid having too many strided loads in a loop since +// that can exhaust the HW prefetcher resources. We adjust the unroller +// MaxCount preference below to attempt to ensure unrolling doesn't create too +// many strided loads. +static void +getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TargetTransformInfo::UnrollingPreferences &UP) { + enum { MaxStridedLoads = 7 }; + auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) { + int StridedLoads = 0; + // FIXME? We could make this more precise by looking at the CFG and + // e.g. not counting loads in each side of an if-then-else diamond. + for (const auto BB : L->blocks()) { + for (auto &I : *BB) { + LoadInst *LMemI = dyn_cast(&I); + if (!LMemI) + continue; + + Value *PtrValue = LMemI->getPointerOperand(); + if (L->isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE.getSCEV(PtrValue); + const SCEVAddRecExpr *LSCEVAddRec = dyn_cast(LSCEV); + if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) + continue; + + // FIXME? We could take pairing of unrolled load copies into account + // by looking at the AddRec, but we would probably have to limit this + // to loops with no stores or other memory optimization barriers. + ++StridedLoads; + // We've seen enough strided loads that seeing more won't make a + // difference. + if (StridedLoads > MaxStridedLoads / 2) + return StridedLoads; + } + } + return StridedLoads; + }; + + int StridedLoads = countStridedLoads(L, SE); + DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads + << " strided loads\n"); + // Pick the largest power of 2 unroll count that won't result in too many + // strided loads. + if (StridedLoads) { + UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads); + DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount + << '\n'); + } +} + +void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { // Enable partial unrolling and runtime unrolling. - BaseT::getUnrollingPreferences(L, UP); + BaseT::getUnrollingPreferences(L, SE, UP); // For inner loop, it is more likely to be a hot one, and the runtime check // can be promoted out from LICM pass, so the overhead is less, let's try @@ -644,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; + + if (ST->getProcFamily() == AArch64Subtarget::Falkor && + EnableFalkorHWPFUnrollFix) + getFalkorUnrollingPreferences(L, SE, UP); } Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, @@ -773,6 +846,7 @@ unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() { bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { assert(isa(Ty) && "Expected Ty to be a vector type"); + unsigned ScalarBits = Ty->getScalarSizeInBits(); switch (Opcode) { case Instruction::FAdd: case Instruction::FMul: @@ -782,9 +856,10 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, case Instruction::Mul: return false; case Instruction::Add: - return Ty->getScalarSizeInBits() * Ty->getVectorNumElements() >= 128; + return ScalarBits * Ty->getVectorNumElements() >= 128; case Instruction::ICmp: - return Ty->getScalarSizeInBits() < 64; + return (ScalarBits < 64) && + (ScalarBits * Ty->getVectorNumElements() >= 128); case Instruction::FCmp: return Flags.NoNaN; default: diff --git a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.h b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.h index c48f24a736345..31c037354925d 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -51,6 +51,9 @@ class AArch64TTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + /// \name Scalar TTI Implementations /// @{ @@ -78,7 +81,7 @@ class AArch64TTIImpl : public BasicTTIImplBase { return 31; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; @@ -87,6 +90,10 @@ class AArch64TTIImpl : public BasicTTIImplBase { return 64; } + unsigned getMinVectorRegisterBitWidth() { + return ST->getMinVectorRegisterBitWidth(); + } + unsigned getMaxInterleaveFactor(unsigned VF); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, @@ -115,7 +122,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { int getCostOfKeepingLiveOverCall(ArrayRef Tys); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType); diff --git a/interpreter/llvm/src/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/interpreter/llvm/src/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 4dbcc9581a841..a79d518205450 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -86,7 +86,7 @@ class AArch64AsmParser : public MCTargetAsmParser { bool parseOperand(OperandVector &Operands, bool isCondCode, bool invertCondCode); - bool showMatchError(SMLoc Loc, unsigned ErrCode); + bool showMatchError(SMLoc Loc, unsigned ErrCode, OperandVector &Operands); bool parseDirectiveArch(SMLoc L); bool parseDirectiveCPU(SMLoc L); @@ -3257,7 +3257,10 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, } } -bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { +std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS); + +bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, + OperandVector &Operands) { switch (ErrCode) { case Match_MissingFeature: return Error(Loc, @@ -3380,8 +3383,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { return Error(Loc, "expected readable system register"); case Match_MSR: return Error(Loc, "expected writable system register or pstate"); - case Match_MnemonicFail: - return Error(Loc, "unrecognized instruction mnemonic"); + case Match_MnemonicFail: { + std::string Suggestion = AArch64MnemonicSpellCheck( + ((AArch64Operand &)*Operands[0]).getToken(), + ComputeAvailableFeatures(STI->getFeatureBits())); + return Error(Loc, "unrecognized instruction mnemonic" + Suggestion); + } default: llvm_unreachable("unexpected error code!"); } @@ -3707,7 +3714,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, Msg); } case Match_MnemonicFail: - return showMatchError(IDLoc, MatchResult); + return showMatchError(IDLoc, MatchResult, Operands); case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; @@ -3726,7 +3733,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix()) MatchResult = Match_InvalidSuffix; - return showMatchError(ErrorLoc, MatchResult); + return showMatchError(ErrorLoc, MatchResult, Operands); } case Match_InvalidMemoryIndexed1: case Match_InvalidMemoryIndexed2: @@ -3784,7 +3791,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - return showMatchError(ErrorLoc, MatchResult); + return showMatchError(ErrorLoc, MatchResult, Operands); } } @@ -3904,10 +3911,14 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) { return false; } +static SMLoc incrementLoc(SMLoc L, int Offset) { + return SMLoc::getFromPointer(L.getPointer() + Offset); +} + /// parseDirectiveCPU /// ::= .cpu id bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { - SMLoc CPULoc = getLoc(); + SMLoc CurLoc = getLoc(); StringRef CPU, ExtensionString; std::tie(CPU, ExtensionString) = @@ -3923,15 +3934,19 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { // FIXME This is using tablegen data, but should be moved to ARMTargetParser // once that is tablegen'ed if (!getSTI().isCPUStringValid(CPU)) { - Error(CPULoc, "unknown CPU name"); + Error(CurLoc, "unknown CPU name"); return false; } MCSubtargetInfo &STI = copySTI(); STI.setDefaultFeatures(CPU, ""); + CurLoc = incrementLoc(CurLoc, CPU.size()); FeatureBitset Features = STI.getFeatureBits(); for (auto Name : RequestedExtensions) { + // Advance source location past '+'. + CurLoc = incrementLoc(CurLoc, 1); + bool EnableFeature = true; if (Name.startswith_lower("no")) { @@ -3939,6 +3954,7 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { Name = Name.substr(2); } + bool FoundExtension = false; for (const auto &Extension : ExtensionMap) { if (Extension.Name != Name) continue; @@ -3952,9 +3968,15 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { uint64_t Features = ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); setAvailableFeatures(Features); + FoundExtension = true; break; } + + if (!FoundExtension) + Error(CurLoc, "unsupported architectural extension"); + + CurLoc = incrementLoc(CurLoc, Name.size()); } return false; } diff --git a/interpreter/llvm/src/lib/Target/AArch64/CMakeLists.txt b/interpreter/llvm/src/lib/Target/AArch64/CMakeLists.txt index f0f50f29be0f3..f7e0a5c7bed39 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Target/AArch64/CMakeLists.txt @@ -43,9 +43,11 @@ add_llvm_target(AArch64CodeGen AArch64AsmPrinter.cpp AArch64CleanupLocalDynamicTLSPass.cpp AArch64CollectLOH.cpp + AArch64CondBrTuning.cpp AArch64ConditionalCompares.cpp AArch64DeadRegisterDefinitionsPass.cpp AArch64ExpandPseudoInsts.cpp + AArch64FalkorHWPFFix.cpp AArch64FastISel.cpp AArch64A53Fix835769.cpp AArch64FrameLowering.cpp diff --git a/interpreter/llvm/src/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/interpreter/llvm/src/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 0d860a7eef794..7870dce5c9c0f 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -756,7 +756,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; - // Deliberate fallthrough + LLVM_FALLTHROUGH; case AArch64::ANDWrs: case AArch64::ANDSWrs: case AArch64::BICWrs: @@ -780,7 +780,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; - // Deliberate fallthrough + LLVM_FALLTHROUGH; case AArch64::ANDXrs: case AArch64::ANDSXrs: case AArch64::BICXrs: diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index ebf05ae303ddd..2bd0cbf9f7c6a 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -11,8 +11,9 @@ #include "AArch64RegisterInfo.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCAssembler.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -22,7 +23,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { @@ -43,26 +43,25 @@ class AArch64AsmBackend : public MCAsmBackend { const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { - // This table *must* be in the order that the fixup_* kinds are defined in - // AArch64FixupKinds.h. - // - // Name Offset (bits) Size (bits) Flags - { "fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal }, - { "fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal }, - { "fixup_aarch64_add_imm12", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale1", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale2", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale4", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale8", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale16", 10, 12, 0 }, - { "fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal }, - { "fixup_aarch64_movw", 5, 16, 0 }, - { "fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal }, - { "fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal }, - { "fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal }, - { "fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal }, - { "fixup_aarch64_tlsdesc_call", 0, 0, 0 } - }; + // This table *must* be in the order that the fixup_* kinds are defined + // in AArch64FixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + {"fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal}, + {"fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal}, + {"fixup_aarch64_add_imm12", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale1", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale2", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale4", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale8", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale16", 10, 12, 0}, + {"fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal}, + {"fixup_aarch64_movw", 5, 16, 0}, + {"fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal}, + {"fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal}, + {"fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal}, + {"fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal}, + {"fixup_aarch64_tlsdesc_call", 0, 0, 0}}; if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); @@ -72,8 +71,9 @@ class AArch64AsmBackend : public MCAsmBackend { return Infos[Kind - FirstTargetFixupKind]; } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; bool mayNeedRelaxation(const MCInst &Inst) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -104,8 +104,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case FK_Data_1: return 1; - case FK_Data_2: case AArch64::fixup_aarch64_movw: + case FK_Data_2: + case FK_SecRel_2: return 2; case AArch64::fixup_aarch64_pcrel_branch14: @@ -124,6 +125,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case AArch64::fixup_aarch64_pcrel_branch26: case AArch64::fixup_aarch64_pcrel_call26: case FK_Data_4: + case FK_SecRel_4: return 4; case FK_Data_8: @@ -218,6 +220,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: case FK_Data_8: + case FK_SecRel_2: + case FK_SecRel_4: return Value; } } @@ -261,13 +265,15 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con } } -void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); if (!Value) return; // Doesn't change encoding. MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + MCContext &Ctx = Asm.getContext(); // Apply any target-specific value adjustments. Value = adjustFixupValue(Fixup, Value, Ctx); @@ -275,7 +281,7 @@ void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FulleSizeInBytes = getFixupKindContainereSizeInBytes(Fixup.getKind()); @@ -289,7 +295,7 @@ void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } else { // Handle as big-endian - assert((Offset + FulleSizeInBytes) <= DataSize && "Invalid fixup size!"); + assert((Offset + FulleSizeInBytes) <= Data.size() && "Invalid fixup size!"); assert(NumBytes <= FulleSizeInBytes && "Invalid fixup size!"); for (unsigned i = 0; i != NumBytes; ++i) { unsigned Idx = FulleSizeInBytes - 1 - i; @@ -539,16 +545,13 @@ class ELFAArch64AsmBackend : public AArch64AsmBackend { return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32); } - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; }; -void ELFAArch64AsmBackend::processFixupValue( - const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, - const MCFragment *DF, const MCValue &Target, uint64_t &Value, - bool &IsResolved) { +bool ELFAArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { // The ADRP instruction adds some multiple of 0x1000 to the current PC & // ~0xfff. This means that the required offset to reach a symbol can vary by // up to one step depending on where the ADRP is in memory. For example: @@ -562,11 +565,24 @@ void ELFAArch64AsmBackend::processFixupValue( // section isn't 0x1000-aligned, we therefore need to delegate this decision // to the linker -- a relocation! if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21) - IsResolved = false; + return true; + return false; } } +namespace { +class COFFAArch64AsmBackend : public AArch64AsmBackend { +public: + COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple) + : AArch64AsmBackend(T, /*IsLittleEndian*/true) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createAArch64WinCOFFObjectWriter(OS); + } +}; +} + MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TheTriple, @@ -575,7 +591,11 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, if (TheTriple.isOSBinFormatMachO()) return new DarwinAArch64AsmBackend(T, MRI); - assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target"); + if (TheTriple.isOSBinFormatCOFF()) + return new COFFAArch64AsmBackend(T, TheTriple); + + assert(TheTriple.isOSBinFormatELF() && "Invalid target"); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); bool IsILP32 = Options.getABIName() == "ilp32"; return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32); diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 10e7241da7090..89c3e5b4c76ec 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -15,11 +15,11 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include @@ -49,10 +49,11 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, /*HasRelocationAddend*/ true), IsILP32(IsILP32) {} -#define R_CLS(rtype) \ - IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype -#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\ - "supported (LP64 eqv: " #lp64rtype ")" +#define R_CLS(rtype) \ + IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype +#define BAD_ILP32_MOV(lp64rtype) \ + "ILP32 absolute MOV relocation not " \ + "supported (LP64 eqv: " #lp64rtype ")" // assumes IsILP32 is true static bool isNonILP32reloc(const MCFixup &Fixup, @@ -60,44 +61,45 @@ static bool isNonILP32reloc(const MCFixup &Fixup, MCContext &Ctx) { if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw) return false; - switch(RefKind) { - case AArch64MCExpr::VK_ABS_G3: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3)); - return true; - case AArch64MCExpr::VK_ABS_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2)); - return true; - case AArch64MCExpr::VK_ABS_G2_S: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); - return true; - case AArch64MCExpr::VK_ABS_G2_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); - return true; - case AArch64MCExpr::VK_ABS_G1_S: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); - return true; - case AArch64MCExpr::VK_ABS_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); - return true; - case AArch64MCExpr::VK_DTPREL_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); - return true; - case AArch64MCExpr::VK_DTPREL_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); - return true; - case AArch64MCExpr::VK_TPREL_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); - return true; - case AArch64MCExpr::VK_TPREL_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); - return true; - case AArch64MCExpr::VK_GOTTPREL_G1: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); - return true; - case AArch64MCExpr::VK_GOTTPREL_G0_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); - return true; - default: return false; + switch (RefKind) { + case AArch64MCExpr::VK_ABS_G3: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3)); + return true; + case AArch64MCExpr::VK_ABS_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2)); + return true; + case AArch64MCExpr::VK_ABS_G2_S: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); + return true; + case AArch64MCExpr::VK_ABS_G2_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); + return true; + case AArch64MCExpr::VK_ABS_G1_S: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); + return true; + case AArch64MCExpr::VK_ABS_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); + return true; + case AArch64MCExpr::VK_DTPREL_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); + return true; + case AArch64MCExpr::VK_DTPREL_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); + return true; + case AArch64MCExpr::VK_TPREL_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); + return true; + case AArch64MCExpr::VK_TPREL_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); + return true; + case AArch64MCExpr::VK_GOTTPREL_G1: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); + return true; + case AArch64MCExpr::VK_GOTTPREL_G0_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); + return true; + default: + return false; } return false; } @@ -130,7 +132,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(PREL32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte PC relative data " + Ctx.reportError(Fixup.getLoc(), + "ILP32 8 byte PC relative data " "relocation not supported (LP64 eqv: PREL64)"); return ELF::R_AARCH64_NONE; } else @@ -178,7 +181,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } } else { if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx)) - return ELF::R_AARCH64_NONE; + return ELF::R_AARCH64_NONE; switch ((unsigned)Fixup.getKind()) { case FK_Data_1: Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported"); @@ -189,8 +192,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(ABS32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data " - "relocation not supported (LP64 eqv: ABS64)"); + Ctx.reportError(Fixup.getLoc(), + "ILP32 8 byte absolute data " + "relocation not supported (LP64 eqv: ABS64)"); return ELF::R_AARCH64_NONE; } else return ELF::R_AARCH64_ABS64; @@ -262,7 +266,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte unchecked GOT load/store relocation " - "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); + "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); return ELF::R_AARCH64_NONE; } } @@ -270,12 +274,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (IsILP32) { Ctx.reportError(Fixup.getLoc(), "ILP32 4 byte checked GOT load/store relocation " - "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); + "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte checked GOT load/store relocation " - "not supported (unchecked/ILP32 eqv: " - "LD32_GOT_LO12_NC)"); + "not supported (unchecked/ILP32 eqv: " + "LD32_GOT_LO12_NC)"); } return ELF::R_AARCH64_NONE; } @@ -283,7 +287,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (IsILP32) { return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC; } else { - Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store " + Ctx.reportError(Fixup.getLoc(), + "LP64 32-bit load/store " "relocation not supported (ILP32 eqv: " "TLSIE_LD32_GOTTPREL_LO12_NC)"); return ELF::R_AARCH64_NONE; @@ -295,14 +300,14 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte TLSDESC load/store relocation " - "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); + "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); return ELF::R_AARCH64_NONE; } } Ctx.reportError(Fixup.getLoc(), "invalid fixup for 32-bit load/store instruction " - "fixup_aarch64_ldst_imm12_scale4"); + "fixup_aarch64_ldst_imm12_scale4"); return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_ldst_imm12_scale8: if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) @@ -312,8 +317,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_LD64_GOT_LO12_NC; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "LD64_GOT_LO12_NC)"); + "relocation not supported (LP64 eqv: " + "LD64_GOT_LO12_NC)"); return ELF::R_AARCH64_NONE; } } @@ -330,8 +335,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "TLSIE_LD64_GOTTPREL_LO12_NC)"); + "relocation not supported (LP64 eqv: " + "TLSIE_LD64_GOTTPREL_LO12_NC)"); return ELF::R_AARCH64_NONE; } } @@ -340,8 +345,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_TLSDESC_LD64_LO12; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "TLSDESC_LD64_LO12)"); + "relocation not supported (LP64 eqv: " + "TLSDESC_LD64_LO12)"); return ELF::R_AARCH64_NONE; } } diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 271263507ae15..a0de3c39562b2 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -14,10 +14,12 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetStreamer.h" +#include "AArch64WinCOFFStreamer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -29,8 +31,8 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" @@ -210,6 +212,8 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); if (TT.isOSBinFormatELF()) return new AArch64TargetELFStreamer(S); + if (TT.isOSBinFormatCOFF()) + return new AArch64TargetWinCOFFStreamer(S); return nullptr; } diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h index 0f5b765c76972..4293dcba955ef 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -16,53 +16,47 @@ namespace llvm { namespace AArch64 { enum Fixups { - // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into - // an ADR instruction. + // A 21-bit pc-relative immediate inserted into an ADR instruction. fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind, - // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into - // an ADRP instruction. + // A 21-bit pc-relative immediate inserted into an ADRP instruction. fixup_aarch64_pcrel_adrp_imm21, - // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions. - // No alignment adjustment. All value bits are encoded. + // 12-bit fixup for add/sub instructions. No alignment adjustment. All value + // bits are encoded. fixup_aarch64_add_imm12, - // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and - // store instructions. + // unsigned 12-bit fixups for load and store instructions. fixup_aarch64_ldst_imm12_scale1, fixup_aarch64_ldst_imm12_scale2, fixup_aarch64_ldst_imm12_scale4, fixup_aarch64_ldst_imm12_scale8, fixup_aarch64_ldst_imm12_scale16, - // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by - // pc-relative loads and generates relocations directly when necessary. + // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as + // fixup_aarch64_pcrel_adrhi, except this is used by pc-relative loads and + // generates relocations directly when necessary. fixup_aarch64_ldr_pcrel_imm19, // FIXME: comment fixup_aarch64_movw, - // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative - // immediate. + // The high 14 bits of a 21-bit pc-relative immediate. fixup_aarch64_pcrel_branch14, - // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by - // b.cc and generates relocations directly when necessary. + // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as + // fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates + // relocations directly when necessary. fixup_aarch64_pcrel_branch19, - // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative - // immediate. + // The high 26 bits of a 28-bit pc-relative immediate. fixup_aarch64_pcrel_branch26, - // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative - // immediate. Distinguished from branch26 only on ELF. + // The high 26 bits of a 28-bit pc-relative immediate. Distinguished from + // branch26 only on ELF. fixup_aarch64_pcrel_call26, - // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF - // R_AARCH64_TLSDESC_CALL relocation. + // zero-space placeholder for the ELF R_AARCH64_TLSDESC_CALL relocation. fixup_aarch64_tlsdesc_call, // Marker diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 1b28df963b40d..c25bd8c8f6cc9 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -100,3 +100,9 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { HasIdentDirective = true; } + +AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() { + CommentString = ";"; + PrivateGlobalPrefix = ".L"; + PrivateLabelPrefix = ".L"; +} diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 253cd30f26eef..2d7107a372443 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H +#include "llvm/MC/MCAsmInfoCOFF.h" #include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCAsmInfoELF.h" @@ -33,6 +34,10 @@ struct AArch64MCAsmInfoELF : public MCAsmInfoELF { explicit AArch64MCAsmInfoELF(const Triple &T); }; +struct AArch64MCAsmInfoCOFF : public MCAsmInfoCOFF { + explicit AArch64MCAsmInfoCOFF(); +}; + } // namespace llvm #endif diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index f710065d9bc75..a2555496cdb94 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -14,6 +14,7 @@ #include "AArch64MCTargetDesc.h" #include "AArch64ELFStreamer.h" #include "AArch64MCAsmInfo.h" +#include "AArch64WinCOFFStreamer.h" #include "InstPrinter/AArch64InstPrinter.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -59,8 +60,10 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI; if (TheTriple.isOSBinFormatMachO()) MAI = new AArch64MCAsmInfoDarwin(); + else if (TheTriple.isOSBinFormatCOFF()) + MAI = new AArch64MCAsmInfoCOFF(); else { - assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); + assert(TheTriple.isOSBinFormatELF() && "Invalid target"); MAI = new AArch64MCAsmInfoELF(TheTriple); } @@ -74,8 +77,8 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM, CodeModel::Model &CM) { - assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) && - "Only expect Darwin and ELF targets"); + assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() || + TT.isOSBinFormatCOFF()) && "Invalid target"); if (CM == CodeModel::Default) CM = CodeModel::Small; @@ -122,6 +125,14 @@ static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, /*LabelSections*/ true); } +static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible) { + return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + IncrementalLinkerCompatible); +} + static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) { return new MCInstrAnalysis(Info); } @@ -154,6 +165,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() { // Register the obj streamers. TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer); + TargetRegistry::RegisterCOFFStreamer(*T, createWinCOFFStreamer); // Register the obj target streamer. TargetRegistry::RegisterObjectTargetStreamer( diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 615d7dab2c51f..1404926b8124c 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -60,6 +60,8 @@ MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS, uint32_t CPUType, uint32_t CPUSubtype); +MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS); + MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 3d296ba4806b9..19b2576f68951 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MathExtras.h" #include #include diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp new file mode 100644 index 0000000000000..31762b9e4cd50 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -0,0 +1,104 @@ +//= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +namespace { + +class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { +public: + AArch64WinCOFFObjectWriter() + : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {} + + ~AArch64WinCOFFObjectWriter() override = default; + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, + const MCAsmBackend &MAB) const override; + + bool recordRelocation(const MCFixup &) const override; +}; + +} // end anonymous namespace + +unsigned AArch64WinCOFFObjectWriter::getRelocType( + MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, + bool IsCrossSection, const MCAsmBackend &MAB) const { + auto Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None + : Target.getSymA()->getKind(); + + switch (static_cast(Fixup.getKind())) { + default: { + const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); + report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); + } + + case FK_Data_4: + switch (Modifier) { + default: + return COFF::IMAGE_REL_ARM64_ADDR32; + case MCSymbolRefExpr::VK_COFF_IMGREL32: + return COFF::IMAGE_REL_ARM64_ADDR32NB; + case MCSymbolRefExpr::VK_SECREL: + return COFF::IMAGE_REL_ARM64_SECREL; + } + + case FK_Data_8: + return COFF::IMAGE_REL_ARM64_ADDR64; + + case FK_SecRel_2: + return COFF::IMAGE_REL_ARM64_SECTION; + + case FK_SecRel_4: + return COFF::IMAGE_REL_ARM64_SECREL; + + case AArch64::fixup_aarch64_add_imm12: + return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A; + + case AArch64::fixup_aarch64_ldst_imm12_scale1: + case AArch64::fixup_aarch64_ldst_imm12_scale2: + case AArch64::fixup_aarch64_ldst_imm12_scale4: + case AArch64::fixup_aarch64_ldst_imm12_scale8: + case AArch64::fixup_aarch64_ldst_imm12_scale16: + return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L; + + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + return COFF::IMAGE_REL_ARM64_PAGEBASE_REL21; + + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: + return COFF::IMAGE_REL_ARM64_BRANCH26; + } +} + +bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { + return true; +} + +namespace llvm { + +MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) { + MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter(); + return createWinCOFFObjectWriter(MOTW, OS); +} + +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp new file mode 100644 index 0000000000000..6c8da27e398ff --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -0,0 +1,37 @@ +//===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64WinCOFFStreamer.h" + +using namespace llvm; + +namespace { + +class AArch64WinCOFFStreamer : public MCWinCOFFStreamer { +public: + friend class AArch64TargetWinCOFFStreamer; + + AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) {} +}; +} // end anonymous namespace + +namespace llvm { +MCWinCOFFStreamer +*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible) { + auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS); + S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); + return S; +} + +} // end llvm namespace diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h new file mode 100644 index 0000000000000..1b4fcd6804e2b --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h @@ -0,0 +1,43 @@ +//===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements WinCOFF streamer information for the AArch64 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H +#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H + +#include "AArch64TargetStreamer.h" +#include "llvm/MC/MCWinCOFFStreamer.h" + +namespace { +class AArch64WinCOFFStreamer; + +class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { +private: + AArch64WinCOFFStreamer &getStreamer(); + +public: + AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S) + : AArch64TargetStreamer(S) {} +}; + +} // end anonymous namespace + +namespace llvm { + +MCWinCOFFStreamer +*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible); +} // end llvm namespace + +#endif diff --git a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt index 6d8be5e63fbbf..56eeba8a1d4b8 100644 --- a/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -8,6 +8,8 @@ add_llvm_library(LLVMAArch64Desc AArch64MCTargetDesc.cpp AArch64MachObjectWriter.cpp AArch64TargetStreamer.cpp + AArch64WinCOFFObjectWriter.cpp + AArch64WinCOFFStreamer.cpp ) add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.h index 8f6e1e7d88466..568682899be51 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.h @@ -27,35 +27,40 @@ class PassRegistry; class Module; // R600 Passes -FunctionPass *createR600VectorRegMerger(TargetMachine &tm); -FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600VectorRegMerger(); +FunctionPass *createR600ExpandSpecialInstrsPass(); FunctionPass *createR600EmitClauseMarkers(); -FunctionPass *createR600ClauseMergePass(TargetMachine &tm); -FunctionPass *createR600Packetizer(TargetMachine &tm); -FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); +FunctionPass *createR600ClauseMergePass(); +FunctionPass *createR600Packetizer(); +FunctionPass *createR600ControlFlowFinalizer(); FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes -FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSIPeepholeSDWAPass(); FunctionPass *createSILowerI1CopiesPass(); FunctionPass *createSIShrinkInstructionsPass(); -FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm); +FunctionPass *createSILoadStoreOptimizerPass(); FunctionPass *createSIWholeQuadModePass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); FunctionPass *createSIInsertWaitcntsPass(); -FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr); +FunctionPass *createAMDGPUCodeGenPreparePass(); +FunctionPass *createAMDGPUMachineCFGStructurizerPass(); -ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr); +void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); +extern char &AMDGPUMachineCFGStructurizerID; + +void initializeAMDGPUAlwaysInlinePass(PassRegistry&); + +Pass *createAMDGPUAnnotateKernelFeaturesPass(); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; -ModulePass *createAMDGPULowerIntrinsicsPass(const TargetMachine *TM = nullptr); +ModulePass *createAMDGPULowerIntrinsicsPass(); void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); extern char &AMDGPULowerIntrinsicsID; @@ -93,7 +98,7 @@ void initializeSIOptimizeExecMaskingPass(PassRegistry &); extern char &SIOptimizeExecMaskingID; // Passes common to R600 and SI -FunctionPass *createAMDGPUPromoteAlloca(const TargetMachine *TM = nullptr); +FunctionPass *createAMDGPUPromoteAlloca(); void initializeAMDGPUPromoteAllocaPass(PassRegistry&); extern char &AMDGPUPromoteAllocaID; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.td b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.td index b279bd61e1809..f1d899c4d0039 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPU.td @@ -238,6 +238,36 @@ def FeatureSDWA : SubtargetFeature<"sdwa", "Support SDWA (Sub-DWORD Addressing) extension" >; +def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", + "HasSDWAOmod", + "true", + "Support OMod with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", + "HasSDWAScalar", + "true", + "Support scalar register with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", + "HasSDWASdst", + "true", + "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", + "HasSDWAMac", + "true", + "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", + "HasSDWAOutModsVOPC", + "true", + "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" +>; + def FeatureDPP : SubtargetFeature<"dpp", "HasDPP", "true", @@ -365,6 +395,13 @@ def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "Force to generate flat instruction for global" >; +def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < + "auto-waitcnt-before-barrier", + "AutoWaitcntBeforeBarrier", + "true", + "Hardware automatically inserts waitcnt before barrier" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -414,8 +451,8 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, - FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, - FeatureDPP + FeatureScalarStores, FeatureInv2PiInlineImm, + FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP ] >; @@ -426,6 +463,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, FeatureFastFMAF32, FeatureDPP, + FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts ] >; @@ -440,6 +478,16 @@ class SubtargetFeatureISAVersion ; +def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0, + [FeatureSouthernIslands, + FeatureFastFMAF32, + HalfRate64Ops, + FeatureLDSBankCount32]>; + +def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1, + [FeatureSouthernIslands, + FeatureLDSBankCount32]>; + def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, [FeatureSeaIslands, FeatureLDSBankCount32]>; @@ -454,6 +502,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2, [FeatureSeaIslands, FeatureLDSBankCount16]>; +def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3, + [FeatureSeaIslands, + FeatureLDSBankCount16]>; + def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0, [FeatureVolcanicIslands, FeatureLDSBankCount32, @@ -482,8 +534,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, FeatureLDSBankCount16, FeatureXNACK]>; -def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>; -def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>; +def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; + +def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. @@ -534,10 +601,12 @@ def AMDGPUAsmVariants { int VOP3_ID = 1; string SDWA = "SDWA"; int SDWA_ID = 2; + string SDWA9 = "SDWA9"; + int SDWA9_ID = 3; string DPP = "DPP"; - int DPP_ID = 3; + int DPP_ID = 4; string Disable = "Disable"; - int Disable_ID = 4; + int Disable_ID = 5; } def DefaultAMDGPUAsmParserVariant : AsmParserVariant { @@ -555,6 +624,12 @@ def SDWAAsmParserVariant : AsmParserVariant { let Name = AMDGPUAsmVariants.SDWA; } +def SDWA9AsmParserVariant : AsmParserVariant { + let Variant = AMDGPUAsmVariants.SDWA9_ID; + let Name = AMDGPUAsmVariants.SDWA9; +} + + def DPPAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.DPP_ID; let Name = AMDGPUAsmVariants.DPP; @@ -567,6 +642,7 @@ def AMDGPU : Target { let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, VOP3AsmParserVariant, SDWAAsmParserVariant, + SDWA9AsmParserVariant, DPPAsmParserVariant]; let AssemblyWriters = [AMDGPUAsmWriter]; } @@ -599,7 +675,11 @@ def isCIVI : Predicate < "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate<"FeatureCIInsts">; -def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; +def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, + AssemblerPredicate<"FeatureFlatAddressSpace">; + +def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, + AssemblerPredicate<"FeatureFlatGlobalInsts">; def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, AssemblerPredicate<"Feature16BitInsts">; @@ -607,7 +687,10 @@ def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<"FeatureVOP3P">; def HasSDWA : Predicate<"Subtarget->hasSDWA()">, - AssemblerPredicate<"FeatureSDWA">; + AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">; + +def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, + AssemblerPredicate<"FeatureSDWA,FeatureGFX9">; def HasDPP : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"FeatureDPP">; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 3c99f48e818ae..faa424eb0a64a 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -10,15 +10,15 @@ /// This is the AMGPU address space based alias analysis pass. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 1d03714874e28..6f3742ed039bd 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -9,7 +9,7 @@ // /// \file /// This pass marks all internal functions as always_inline and creates -/// duplicates of all other functions a marks the duplicates as always_inline. +/// duplicates of all other functions and marks the duplicates as always_inline. // //===----------------------------------------------------------------------===// @@ -22,18 +22,22 @@ using namespace llvm; namespace { class AMDGPUAlwaysInline : public ModulePass { - static char ID; - bool GlobalOpt; public: - AMDGPUAlwaysInline(bool GlobalOpt) : ModulePass(ID), GlobalOpt(GlobalOpt) { } + static char ID; + + AMDGPUAlwaysInline(bool GlobalOpt = false) : + ModulePass(ID), GlobalOpt(GlobalOpt) { } bool runOnModule(Module &M) override; StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; } }; } // End anonymous namespace +INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", + "AMDGPU Inline All Functions", false, false) + char AMDGPUAlwaysInline::ID = 0; bool AMDGPUAlwaysInline::runOnModule(Module &M) { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 3d8db7cd8af55..c68e5861ff25b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -15,7 +15,10 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -25,28 +28,27 @@ using namespace llvm; namespace { -class AMDGPUAnnotateKernelFeatures : public ModulePass { +class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: - const TargetMachine *TM; + const TargetMachine *TM = nullptr; AMDGPUAS AS; - static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); - void addAttrToCallers(Function *Intrin, StringRef AttrName); - bool addAttrsForIntrinsics(Module &M, ArrayRef); + bool addFeatureAttributes(Function &F); public: static char ID; - AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) : - ModulePass(ID), TM(TM_) {} - bool runOnModule(Module &M) override; + AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} + + bool doInitialization(CallGraph &CG) override; + bool runOnSCC(CallGraphSCC &SCC) override; StringRef getPassName() const override { return "AMDGPU Annotate Kernel Features"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - ModulePass::getAnalysisUsage(AU); + CallGraphSCCPass::getAnalysisUsage(AU); } static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); @@ -122,16 +124,130 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( return false; } -// Return true if an addrspacecast is used that requires the queue ptr. -bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, - AMDGPUAS AS) { +// We do not need to note the x workitem or workgroup id because they are always +// initialized. +// +// TODO: We should not add the attributes if the known compile time workgroup +// size is 1 for y/z. +static StringRef intrinsicToAttrName(Intrinsic::ID ID, + bool &NonKernelOnly, + bool &IsQueuePtr) { + switch (ID) { + case Intrinsic::amdgcn_workitem_id_x: + NonKernelOnly = true; + return "amdgpu-work-item-id-x"; + case Intrinsic::amdgcn_workgroup_id_x: + NonKernelOnly = true; + return "amdgpu-work-group-id-x"; + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + return "amdgpu-work-item-id-y"; + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + return "amdgpu-work-item-id-z"; + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::r600_read_tgid_y: + return "amdgpu-work-group-id-y"; + case Intrinsic::amdgcn_workgroup_id_z: + case Intrinsic::r600_read_tgid_z: + return "amdgpu-work-group-id-z"; + case Intrinsic::amdgcn_dispatch_ptr: + return "amdgpu-dispatch-ptr"; + case Intrinsic::amdgcn_dispatch_id: + return "amdgpu-dispatch-id"; + case Intrinsic::amdgcn_kernarg_segment_ptr: + case Intrinsic::amdgcn_implicitarg_ptr: + return "amdgpu-kernarg-segment-ptr"; + case Intrinsic::amdgcn_queue_ptr: + case Intrinsic::trap: + case Intrinsic::debugtrap: + IsQueuePtr = true; + return "amdgpu-queue-ptr"; + default: + return ""; + } +} + +static bool handleAttr(Function &Parent, const Function &Callee, + StringRef Name) { + if (Callee.hasFnAttribute(Name)) { + Parent.addFnAttr(Name); + return true; + } + + return false; +} + +static void copyFeaturesToFunction(Function &Parent, const Function &Callee, + bool &NeedQueuePtr) { + // X ids unnecessarily propagated to kernels. + static const StringRef AttrNames[] = { + { "amdgpu-work-item-id-x" }, + { "amdgpu-work-item-id-y" }, + { "amdgpu-work-item-id-z" }, + { "amdgpu-work-group-id-x" }, + { "amdgpu-work-group-id-y" }, + { "amdgpu-work-group-id-z" }, + { "amdgpu-dispatch-ptr" }, + { "amdgpu-dispatch-id" }, + { "amdgpu-kernarg-segment-ptr" } + }; + + if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) + NeedQueuePtr = true; + + for (StringRef AttrName : AttrNames) + handleAttr(Parent, Callee, AttrName); +} + +bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { + const AMDGPUSubtarget &ST = TM->getSubtarget(F); + bool HasFlat = ST.hasFlatAddressSpace(); + bool HasApertureRegs = ST.hasApertureRegs(); SmallPtrSet ConstantExprVisited; - for (const BasicBlock &BB : F) { - for (const Instruction &I : BB) { + bool Changed = false; + bool NeedQueuePtr = false; + bool HaveCall = false; + bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + CallSite CS(&I); + if (CS) { + Function *Callee = CS.getCalledFunction(); + + // TODO: Do something with indirect calls. + if (!Callee) { + if (!CS.isInlineAsm()) + HaveCall = true; + continue; + } + + Intrinsic::ID IID = Callee->getIntrinsicID(); + if (IID == Intrinsic::not_intrinsic) { + HaveCall = true; + copyFeaturesToFunction(F, *Callee, NeedQueuePtr); + Changed = true; + } else { + bool NonKernelOnly = false; + StringRef AttrName = intrinsicToAttrName(IID, + NonKernelOnly, NeedQueuePtr); + if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { + F.addFnAttr(AttrName); + Changed = true; + } + } + } + + if (NeedQueuePtr || HasApertureRegs) + continue; + if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) { - if (castRequiresQueuePtr(ASC, AS)) - return true; + if (castRequiresQueuePtr(ASC, AS)) { + NeedQueuePtr = true; + continue; + } } for (const Use &U : I.operands()) { @@ -139,98 +255,57 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) - return true; + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { + NeedQueuePtr = true; + break; + } } } } - return false; -} - -void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, - StringRef AttrName) { - SmallPtrSet SeenFuncs; - - for (User *U : Intrin->users()) { - // CallInst is the only valid user for an intrinsic. - CallInst *CI = cast(U); - - Function *CallingFunction = CI->getParent()->getParent(); - if (SeenFuncs.insert(CallingFunction).second) - CallingFunction->addFnAttr(AttrName); + if (NeedQueuePtr) { + F.addFnAttr("amdgpu-queue-ptr"); + Changed = true; } -} - -bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( - Module &M, - ArrayRef IntrinsicToAttr) { - bool Changed = false; - for (const StringRef *Arr : IntrinsicToAttr) { - if (Function *Fn = M.getFunction(Arr[0])) { - addAttrToCallers(Fn, Arr[1]); - Changed = true; - } + // TODO: We could refine this to captured pointers that could possibly be + // accessed by flat instructions. For now this is mostly a poor way of + // estimating whether there are calls before argument lowering. + if (HasFlat && !IsFunc && HaveCall) { + F.addFnAttr("amdgpu-flat-scratch"); + Changed = true; } return Changed; } -bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { +bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { + Module &M = SCC.getCallGraph().getModule(); Triple TT(M.getTargetTriple()); - AS = AMDGPU::getAMDGPUAS(M); - - static const StringRef IntrinsicToAttr[][2] = { - // .x omitted - { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, - { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, - - { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, - { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, - - { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, - { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, - - // .x omitted - { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, - { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } - }; - static const StringRef HSAIntrinsicToAttr[][2] = { - { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, - { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, - { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, - { "llvm.trap", "amdgpu-queue-ptr" }, - { "llvm.debugtrap", "amdgpu-queue-ptr" } - }; - - // TODO: We should not add the attributes if the known compile time workgroup - // size is 1 for y/z. - - // TODO: Intrinsics that require queue ptr. + bool Changed = false; + for (CallGraphNode *I : SCC) { + Function *F = I->getFunction(); + if (!F || F->isDeclaration()) + continue; - // We do not need to note the x workitem or workgroup id because they are - // always initialized. + Changed |= addFeatureAttributes(*F); + } - bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); - if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { - Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); - for (Function &F : M) { - if (F.hasFnAttribute("amdgpu-queue-ptr")) - continue; + return Changed; +} - bool HasApertureRegs = - TM && TM->getSubtarget(F).hasApertureRegs(); - if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) - F.addFnAttr("amdgpu-queue-ptr"); - } - } +bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + report_fatal_error("TargetMachine is required"); - return Changed; + AS = AMDGPU::getAMDGPUAS(CG.getModule()); + TM = &TPC->getTM(); + return false; } -ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) { - return new AMDGPUAnnotateKernelFeatures(TM); +Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { + return new AMDGPUAnnotateKernelFeatures(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 91b3649f5c39d..ed5370826647f 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -19,8 +19,8 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -107,11 +107,12 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { DFS(Start, Checklist); for (auto &BB : Checklist) { - BasicBlock::iterator StartIt = (BB == Load->getParent()) ? - BasicBlock::iterator(Load) : BB->end(); - if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr), - true, StartIt, BB, Load).isClobber()) - return true; + BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? + BasicBlock::iterator(Load) : BB->end(); + auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, + StartIt, BB, Load); + if (Q.isClobber() || Q.isUnknown()) + return true; } return false; } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index f473944cd5283..2247814cfe55d 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,25 +17,25 @@ // #include "AMDGPUAsmPrinter.h" -#include "AMDGPUTargetMachine.h" -#include "MCTargetDesc/AMDGPUTargetStreamer.h" -#include "InstPrinter/AMDGPUInstPrinter.h" -#include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "InstPrinter/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "SIDefines.h" -#include "SIMachineFunctionInfo.h" #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -268,19 +268,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF)); - OutStreamer->emitRawComment(" codeLenInByte = " + - Twine(getFunctionCodeSize(MF)), false); - OutStreamer->emitRawComment( - " NumSgprs: " + Twine(CurrentProgramInfo.NumSGPR), false); - OutStreamer->emitRawComment( - " NumVgprs: " + Twine(CurrentProgramInfo.NumVGPR), false); - OutStreamer->emitRawComment( " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false); OutStreamer->emitRawComment( " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false); - OutStreamer->emitRawComment( - " ScratchSize: " + Twine(CurrentProgramInfo.ScratchSize), false); OutStreamer->emitRawComment( " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) + " bytes/workgroup (compile time only)", false); @@ -503,40 +494,37 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects(); Info.PrivateSegmentSize = FrameInfo.getStackSize(); - if (!FrameInfo.hasCalls()) { - Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) || - MRI.isPhysRegUsed(AMDGPU::VCC_HI); - // If there are no calls, MachineRegisterInfo can tell us the used register - // count easily. + Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) || + MRI.isPhysRegUsed(AMDGPU::VCC_HI); - MCPhysReg HighestVGPRReg = AMDGPU::NoRegister; - for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { - if (MRI.isPhysRegUsed(Reg)) { - HighestVGPRReg = Reg; - break; - } - } + // If there are no calls, MachineRegisterInfo can tell us the used register + // count easily. - MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; - for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) { - if (MRI.isPhysRegUsed(Reg)) { - HighestSGPRReg = Reg; - break; - } + MCPhysReg HighestVGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestVGPRReg = Reg; + break; } + } - // We found the maximum register index. They start at 0, so add one to get the - // number of registers. - Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 : - TRI.getHWRegIndex(HighestVGPRReg) + 1; - Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 : - TRI.getHWRegIndex(HighestSGPRReg) + 1; - - return Info; + MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestSGPRReg = Reg; + break; + } } - llvm_unreachable("calls not implemented"); + // We found the maximum register index. They start at 0, so add one to get the + // number of registers. + Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestVGPRReg) + 1; + Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestSGPRReg) + 1; + + return Info; } void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index e5adeeb465e12..0a58ce06704dd 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,8 +15,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H -#include "AMDKernelCodeT.h" #include "AMDGPU.h" +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index e67ae092fddae..515cc07dd4498 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -18,8 +18,8 @@ #include "AMDGPUISelLowering.h" #include "AMDGPUSubtarget.h" #include "SIISelLowering.h" -#include "SIRegisterInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.h index 09bdf8ffcde7b..251cb7a2c440d 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -38,7 +38,8 @@ class AMDGPUCallLowering: public CallLowering { unsigned VReg) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; + static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); + static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); }; } // End of namespace llvm; #endif diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallingConv.td b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallingConv.td index d308f718aae13..4bef7a89bfe34 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -13,6 +13,8 @@ // Inversion of CCIfInReg class CCIfNotInReg : CCIf<"!ArgFlags.isInReg()", A> {} +class CCIfExtend + : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; // Calling convention for SI def CC_SI : CallingConv<[ @@ -52,7 +54,7 @@ def CC_SI : CallingConv<[ ]>>> ]>; -def RetCC_SI : CallingConv<[ +def RetCC_SI_Shader : CallingConv<[ CCIfType<[i32] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, @@ -99,6 +101,52 @@ def CC_AMDGPU_Kernel : CallingConv<[ CCCustom<"allocateKernArg"> ]>; +def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs< + (sequence "VGPR%u", 24, 255) +>; + +def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs< + (sequence "VGPR%u", 32, 255) +>; + +def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs< + (sequence "SGPR%u", 32, 103) +>; + +def CSR_AMDGPU_HighRegs : CalleeSavedRegs< + (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103) +>; + +// Calling convention for leaf functions +def CC_AMDGPU_Func : CallingConv<[ + CCIfByVal>, + CCIfType<[i1], CCPromoteToType>, + CCIfType<[i1, i8, i16], CCIfExtend>>, + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[ + VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, + VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, + VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, + VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, + CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>, + CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>, + CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>, + CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>, + CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>> +]>; + +// Calling convention for leaf functions +def RetCC_AMDGPU_Func : CallingConv<[ + CCIfType<[i1], CCPromoteToType>, + CCIfType<[i1, i16], CCIfExtend>>, + CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[ + VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, + VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, + VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, + VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, + CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">> +]>; + def CC_AMDGPU : CallingConv<[ CCIf<"static_cast" "(State.getMachineFunction().getSubtarget()).getGeneration() >=" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index e19314fe0a6c8..31ee9206ae27b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -19,18 +19,19 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -48,7 +49,6 @@ namespace { class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor { - const GCNTargetMachine *TM; const SISubtarget *ST = nullptr; DivergenceAnalysis *DA = nullptr; Module *Mod = nullptr; @@ -127,8 +127,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass, public: static char ID; - AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) : - FunctionPass(ID), TM(static_cast(TM)) {} + AMDGPUCodeGenPrepare() : FunctionPass(ID) {} bool visitFDiv(BinaryOperator &I); @@ -381,7 +380,9 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { FastMathFlags FMF = FPOp->getFastMathFlags(); bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || FMF.allowReciprocal(); - if (ST->hasFP32Denormals() && !UnsafeDiv) + + // With UnsafeDiv node will be optimized to just rcp and mul. + if (ST->hasFP32Denormals() || UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); @@ -487,10 +488,15 @@ bool AMDGPUCodeGenPrepare::doInitialization(Module &M) { } bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { - if (!TM || skipFunction(F)) + if (skipFunction(F)) + return false; + + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) return false; - ST = &TM->getSubtarget(F); + const TargetMachine &TM = TPC->getTM(); + ST = &TM.getSubtarget(F); DA = &getAnalysis(); HasUnsafeFPMath = hasUnsafeFPMath(F); @@ -507,14 +513,14 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { return MadeChange; } -INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) -INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, - "AMDGPU IR optimizations", false, false) +INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", + false, false) char AMDGPUCodeGenPrepare::ID = 0; -FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) { - return new AMDGPUCodeGenPrepare(TM); +FunctionPass *llvm::createAMDGPUCodeGenPreparePass() { + return new AMDGPUCodeGenPrepare(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 7c99752b881f4..f235313e48535 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterInfo.h" -#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUSubtarget.h" #include "SIDefines.h" -#include "SIInstrInfo.h" -#include "SIRegisterInfo.h" #include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -82,7 +82,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { void PostprocessISelDAG() override; private: - SDValue foldFrameIndex(SDValue N) const; + std::pair foldFrameIndex(SDValue N) const; bool isNoNanSrc(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, @@ -116,9 +116,11 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; - bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr, + bool SelectMUBUFScratchOffen(SDNode *Root, + SDValue Addr, SDValue &RSrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const; - bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + bool SelectMUBUFScratchOffset(SDNode *Root, + SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, @@ -136,7 +138,10 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, SDValue &ImmOffset, SDValue &VOffset) const; - bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const; + bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; + bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; @@ -1074,13 +1079,33 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); } -SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { - if (auto FI = dyn_cast(N)) - return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); - return N; +static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { + auto PSV = PtrInfo.V.dyn_cast(); + return PSV && PSV->isStack(); } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc, +std::pair AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { + const MachineFunction &MF = CurDAG->getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + + if (auto FI = dyn_cast(N)) { + SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), + FI->getValueType(0)); + + // If we can resolve this to a frame index access, this is relative to the + // frame pointer SGPR. + return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), + MVT::i32)); + } + + // If we don't know this private access is a local stack object, it needs to + // be relative to the entry point's scratch wave offset register. + return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), + MVT::i32)); +} + +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, + SDValue Addr, SDValue &Rsrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const { @@ -1089,7 +1114,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc, const SIMachineFunctionInfo *Info = MF.getInfo(); Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); - SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); if (ConstantSDNode *CAddr = dyn_cast(Addr)) { unsigned Imm = CAddr->getZExtValue(); @@ -1100,6 +1124,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc, MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits); VAddr = SDValue(MovHighBits, 0); + + // In a call sequence, stores to the argument stack area are relative to the + // stack pointer. + const MachinePointerInfo &PtrInfo = cast(Root)->getPointerInfo(); + unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? + Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); + + SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); return true; } @@ -1113,19 +1145,20 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc, // Offsets in vaddr must be positive. ConstantSDNode *C1 = cast(N1); if (isLegalMUBUFImmOffset(C1)) { - VAddr = foldFrameIndex(N0); + std::tie(VAddr, SOffset) = foldFrameIndex(N0); ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } } // (node) - VAddr = foldFrameIndex(Addr); + std::tie(VAddr, SOffset) = foldFrameIndex(Addr); ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); return true; } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, + SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset) const { @@ -1138,7 +1171,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr, const SIMachineFunctionInfo *Info = MF.getInfo(); SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); - SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); + + const MachinePointerInfo &PtrInfo = cast(Root)->getPointerInfo(); + unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? + Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); + + // FIXME: Get from MachinePointerInfo? We should only be using the frame + // offset if we know this is in a call sequence. + SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); + Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); return true; } @@ -1275,14 +1316,37 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, return true; } -bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, - SDValue &VAddr, - SDValue &SLC) const { +bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + int64_t OffsetVal = 0; + + if (Subtarget->hasFlatInstOffsets() && + CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + uint64_t COffsetVal = cast(N1)->getZExtValue(); + if (isUInt<12>(COffsetVal)) { + Addr = N0; + OffsetVal = COffsetVal; + } + } + VAddr = Addr; + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; } +bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + return SelectFlatOffset(Addr, VAddr, Offset, SLC); +} + bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const { @@ -1700,21 +1764,89 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, return true; } +static SDValue stripBitcast(SDValue Val) { + return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; +} + +// Figure out if this is really an extract of the high 16-bits of a dword. +static bool isExtractHiElt(SDValue In, SDValue &Out) { + In = stripBitcast(In); + if (In.getOpcode() != ISD::TRUNCATE) + return false; + + SDValue Srl = In.getOperand(0); + if (Srl.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShiftAmt = dyn_cast(Srl.getOperand(1))) { + if (ShiftAmt->getZExtValue() == 16) { + Out = stripBitcast(Srl.getOperand(0)); + return true; + } + } + } + + return false; +} + +// Look through operations that obscure just looking at the low 16-bits of the +// same register. +static SDValue stripExtractLoElt(SDValue In) { + if (In.getOpcode() == ISD::TRUNCATE) { + SDValue Src = In.getOperand(0); + if (Src.getValueType().getSizeInBits() == 32) + return stripBitcast(Src); + } + + return In; +} + bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const { unsigned Mods = 0; Src = In; - // FIXME: Look for on separate components if (Src.getOpcode() == ISD::FNEG) { - Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI); + Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); Src = Src.getOperand(0); } - // Packed instructions do not have abs modifiers. + if (Src.getOpcode() == ISD::BUILD_VECTOR) { + unsigned VecMods = Mods; + + SDValue Lo = stripBitcast(Src.getOperand(0)); + SDValue Hi = stripBitcast(Src.getOperand(1)); - // FIXME: Handle abs/neg of individual components. - // FIXME: Handle swizzling with op_sel + if (Lo.getOpcode() == ISD::FNEG) { + Lo = stripBitcast(Lo.getOperand(0)); + Mods ^= SISrcMods::NEG; + } + + if (Hi.getOpcode() == ISD::FNEG) { + Hi = stripBitcast(Hi.getOperand(0)); + Mods ^= SISrcMods::NEG_HI; + } + + if (isExtractHiElt(Lo, Lo)) + Mods |= SISrcMods::OP_SEL_0; + + if (isExtractHiElt(Hi, Hi)) + Mods |= SISrcMods::OP_SEL_1; + + Lo = stripExtractLoElt(Lo); + Hi = stripExtractLoElt(Hi); + + if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { + // Really a scalar input. Just select from the low half of the register to + // avoid packing. + + Src = Lo; + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; + } + + Mods = VecMods; + } + + // Packed instructions do not have abs modifiers. Mods |= SISrcMods::OP_SEL_1; SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index f80652b873730..258b1737deb38 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -21,6 +21,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "R600MachineFunctionInfo.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,7 +31,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/KnownBits.h" -#include "SIInstrInfo.h" using namespace llvm; static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, @@ -76,6 +76,45 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, } } +// Allocate up to VGPR31. +// +// TODO: Since there are no VGPR alignent requirements would it be better to +// split into individual scalar registers? +static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::i64: + case MVT::f64: + case MVT::v2i32: + case MVT::v2f32: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_64RegClass, 31); + } + case MVT::v4i32: + case MVT::v4f32: + case MVT::v2i64: + case MVT::v2f64: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_128RegClass, 29); + } + case MVT::v8i32: + case MVT::v8f32: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_256RegClass, 25); + + } + case MVT::v16i32: + case MVT::v16f32: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_512RegClass, 17); + + } + default: + return false; + } +} + #include "AMDGPUGenCallingConv.inc" // Find a larger type to do a load / store of a vector with. @@ -88,6 +127,29 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); } +bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op) +{ + assert(Op.getOpcode() == ISD::OR); + + SDValue N0 = Op->getOperand(0); + SDValue N1 = Op->getOperand(1); + EVT VT = N0.getValueType(); + + if (VT.isInteger() && !VT.isVector()) { + KnownBits LHSKnown, RHSKnown; + DAG.computeKnownBits(N0, LHSKnown); + + if (LHSKnown.Zero.getBoolValue()) { + DAG.computeKnownBits(N1, RHSKnown); + + if (!(~RHSKnown.Zero & ~LHSKnown.Zero)) + return true; + } + } + + return false; +} + AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -511,6 +573,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FABS); + setTargetDAGCombine(ISD::AssertZext); + setTargetDAGCombine(ISD::AssertSext); } //===----------------------------------------------------------------------===// @@ -773,8 +837,43 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { //===---------------------------------------------------------------------===// CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, - bool IsVarArg) const { - return CC_AMDGPU; + bool IsVarArg) { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return CC_AMDGPU_Kernel; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + return CC_AMDGPU; + case CallingConv::C: + case CallingConv::Fast: + return CC_AMDGPU_Func; + default: + report_fatal_error("Unsupported calling convention."); + } +} + +CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, + bool IsVarArg) { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return CC_AMDGPU_Kernel; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + return RetCC_SI_Shader; + case CallingConv::C: + case CallingConv::Fast: + return RetCC_AMDGPU_Func; + default: + report_fatal_error("Unsupported calling convention."); + } } /// The SelectionDAGBuilder will automatically promote function arguments @@ -786,7 +885,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting /// input values across multiple registers. Each item in the Ins array -/// represents a single value that will be stored in regsters. Ins[x].VT is +/// represents a single value that will be stored in registers. Ins[x].VT is /// the value type of the value that will be stored in the register, so /// whatever SDNode we lower the argument to needs to be this type. /// @@ -874,18 +973,15 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State, } } -void AMDGPUTargetLowering::AnalyzeReturn(CCState &State, - const SmallVectorImpl &Outs) const { - - State.AnalyzeReturn(Outs, RetCC_SI); -} - -SDValue -AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SDLoc &DL, SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + // FIXME: Fails for r600 tests + //assert(!isVarArg && Outs.empty() && OutVals.empty() && + // "wave terminate should not have return values"); return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain); } @@ -896,20 +992,12 @@ AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) { - switch (CC) { - case CallingConv::C: - case CallingConv::AMDGPU_KERNEL: - case CallingConv::SPIR_KERNEL: - return CC_AMDGPU_Kernel; - case CallingConv::AMDGPU_VS: - case CallingConv::AMDGPU_HS: - case CallingConv::AMDGPU_GS: - case CallingConv::AMDGPU_PS: - case CallingConv::AMDGPU_CS: - return CC_AMDGPU; - default: - report_fatal_error("Unsupported calling convention."); - } + return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg); +} + +CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, + bool IsVarArg) { + return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg); } SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, @@ -2505,6 +2593,31 @@ SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N, return SDValue(CSrc, 0); } +// FIXME: This should go in generic DAG combiner with an isTruncateFree check, +// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU +// issues. +SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + + // (vt2 (assertzext (truncate vt0:x), vt1)) -> + // (vt2 (truncate (assertzext vt0:x, vt1))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue N1 = N->getOperand(1); + EVT ExtVT = cast(N1)->getVT(); + SDLoc SL(N); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (SrcVT.bitsGE(ExtVT)) { + SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1); + return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg); + } + } + + return SDValue(); +} /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( @@ -2532,7 +2645,57 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const { - if (N->getValueType(0) != MVT::i64) + EVT VT = N->getValueType(0); + + ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); + if (!RHS) + return SDValue(); + + SDValue LHS = N->getOperand(0); + unsigned RHSVal = RHS->getZExtValue(); + if (!RHSVal) + return LHS; + + SDLoc SL(N); + SelectionDAG &DAG = DCI.DAG; + + switch (LHS->getOpcode()) { + default: + break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: { + // shl (ext x) => zext (shl x), if shift does not overflow int + if (VT != MVT::i64) + break; + KnownBits Known; + SDValue X = LHS->getOperand(0); + DAG.computeKnownBits(X, Known); + unsigned LZ = Known.countMinLeadingZeros(); + if (LZ < RHSVal) + break; + EVT XVT = X.getValueType(); + SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); + return DAG.getZExtOrTrunc(Shl, SL, VT); + } + case ISD::OR: + if (!isOrEquivalentToAdd(DAG, LHS)) + break; + LLVM_FALLTHROUGH; + case ISD::ADD: { + // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) + if (ConstantSDNode *C2 = dyn_cast(LHS->getOperand(1))) { + SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0), + SDValue(RHS, 0)); + SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal, + SDLoc(C2), VT); + return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V); + } + break; + } + } + + if (VT != MVT::i64) return SDValue(); // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) @@ -2540,19 +2703,9 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, // On some subtargets, 64-bit shift is a quarter rate instruction. In the // common case, splitting this into a move and a 32-bit shift is faster and // the same code size. - const ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); - if (!RHS) - return SDValue(); - - unsigned RHSVal = RHS->getZExtValue(); if (RHSVal < 32) return SDValue(); - SDValue LHS = N->getOperand(0); - - SDLoc SL(N); - SelectionDAG &DAG = DCI.DAG; - SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32); SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); @@ -3355,7 +3508,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DL); } - if ((OffsetVal + WidthVal) >= 32) { + if ((OffsetVal + WidthVal) >= 32 && + !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) { SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32); return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, BitsFrom, ShiftVal); @@ -3394,6 +3548,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, break; } + case ISD::AssertZext: + case ISD::AssertSext: + return performAssertSZExtCombine(N, DCI); } return SDValue(); } @@ -3403,18 +3560,25 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, //===----------------------------------------------------------------------===// SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const { + const TargetRegisterClass *RC, + unsigned Reg, EVT VT, + const SDLoc &SL, + bool RawReg) const { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned VirtualRegister; + unsigned VReg; + if (!MRI.isLiveIn(Reg)) { - VirtualRegister = MRI.createVirtualRegister(RC); - MRI.addLiveIn(Reg, VirtualRegister); + VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(Reg, VReg); } else { - VirtualRegister = MRI.getLiveInVirtReg(Reg); + VReg = MRI.getLiveInVirtReg(Reg); } - return DAG.getRegister(VirtualRegister, VT); + + if (RawReg) + return DAG.getRegister(VReg, VT); + + return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); } uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( @@ -3533,6 +3697,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(LOAD_CONSTANT) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) + NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3) + NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.h index 4c588a7bafd05..d85aada6053a1 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -34,6 +34,9 @@ class AMDGPUTargetLowering : public TargetLowering { /// compare. SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const; +public: + static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op); + protected: const AMDGPUSubtarget *Subtarget; AMDGPUAS AMDGPUASI; @@ -73,6 +76,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, @@ -115,9 +119,6 @@ class AMDGPUTargetLowering : public TargetLowering { SmallVectorImpl &Results) const; void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl &Ins) const; - void AnalyzeReturn(CCState &State, - const SmallVectorImpl &Outs) const; - public: AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); @@ -164,6 +165,8 @@ class AMDGPUTargetLowering : public TargetLowering { bool isCheapToSpeculateCtlz() const override; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); + static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, @@ -214,10 +217,25 @@ class AMDGPUTargetLowering : public TargetLowering { /// \brief Helper function that adds Reg to the LiveIn list of the DAG's /// MachineFunction. /// - /// \returns a RegisterSDNode representing Reg. - virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const; + /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise + /// a copy from the register. + SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT, + const SDLoc &SL, + bool RawReg = false) const; + SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode())); + } + + // Returns the raw live in register rather than a copy from it. + SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true); + } enum ImplicitParameter { FIRST_IMPLICIT, @@ -386,6 +404,8 @@ enum NodeType : unsigned { STORE_MSKOR, LOAD_CONSTANT, TBUFFER_STORE_FORMAT, + TBUFFER_STORE_FORMAT_X3, + TBUFFER_LOAD_FORMAT, ATOMIC_CMP_SWAP, ATOMIC_INC, ATOMIC_DEC, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index a01f5d37c7c16..69dc529861729 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -66,7 +66,9 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, - VI = 1 + VI = 1, + SDWA = 2, + SDWA9 = 3 }; // Wrapper for Tablegen'd function. enum Subtarget is not defined in any @@ -101,7 +103,12 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { } int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { - int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST)); + SIEncodingFamily Gen = subtargetEncodingFamily(ST); + if (get(Opcode).TSFlags & SIInstrFlags::SDWA) + Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 + : SIEncodingFamily::SDWA; + + int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); // -1 means that Opcode is already a native instruction. if (MCOp == -1) diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 12caa5118342a..41cc7d7093ec1 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -17,8 +17,8 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #include "AMDGPU.h" -#include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 353cc57427915..bcf89bb78ad66 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -70,6 +70,10 @@ def AMDGPUElseBreakOp : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>] >; +def AMDGPUAddeSubeOp : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>] +>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -179,6 +183,12 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; // out = (src1 > src0) ? 1 : 0 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; +// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own +// nodes in TargetSelectionDAG.td. +def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>; + +def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>; + def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; @@ -380,6 +390,6 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a7eac080f885d..e54c887d60906 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -126,8 +126,9 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) .add(I.getOperand(1)) .add(I.getOperand(0)) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc // Now that we selected an opcode, we need to constrain the register @@ -392,8 +393,9 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) .add(I.getOperand(0)) .addReg(PtrReg) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); I.eraseFromParent(); diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c87102e55dfb0..ef845f44d365b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #include "AMDGPU.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 9de302994e680..cc56216c355bf 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -14,10 +14,10 @@ #include "AMDGPULegalizerInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/Target/TargetOpcodes.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; @@ -28,24 +28,49 @@ using namespace llvm; AMDGPULegalizerInfo::AMDGPULegalizerInfo() { using namespace TargetOpcode; + const LLT S1= LLT::scalar(1); + const LLT V2S16 = LLT::vector(2, 16); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); const LLT P1 = LLT::pointer(1, 64); const LLT P2 = LLT::pointer(2, 64); + setAction({G_ADD, S32}, Legal); + setAction({G_AND, S32}, Legal); + + setAction({G_BITCAST, V2S16}, Legal); + setAction({G_BITCAST, 1, S32}, Legal); + + setAction({G_BITCAST, S32}, Legal); + setAction({G_BITCAST, 1, V2S16}, Legal); + + // FIXME: i1 operands to intrinsics should always be legal, but other i1 + // values may not be legal. We need to figure out how to distinguish + // between these two scenarios. + setAction({G_CONSTANT, S1}, Legal); setAction({G_CONSTANT, S32}, Legal); setAction({G_CONSTANT, S64}, Legal); + setAction({G_FCONSTANT, S32}, Legal); + setAction({G_GEP, P1}, Legal); setAction({G_GEP, P2}, Legal); setAction({G_GEP, 1, S64}, Legal); + setAction({G_ICMP, S1}, Legal); + setAction({G_ICMP, 1, S32}, Legal); + setAction({G_LOAD, P1}, Legal); setAction({G_LOAD, P2}, Legal); setAction({G_LOAD, S32}, Legal); setAction({G_LOAD, 1, P1}, Legal); setAction({G_LOAD, 1, P2}, Legal); + setAction({G_SELECT, S32}, Legal); + setAction({G_SELECT, 1, S1}, Legal); + + setAction({G_SHL, S32}, Legal); + setAction({G_STORE, S32}, Legal); setAction({G_STORE, 1, P1}, Legal); diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index dcb6670621eef..7e0e9802c0e6d 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -9,6 +9,8 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -25,19 +27,22 @@ const unsigned MaxStaticSize = 1024; class AMDGPULowerIntrinsics : public ModulePass { private: - const TargetMachine *TM; - bool makeLIDRangeMetadata(Function &F) const; public: static char ID; - AMDGPULowerIntrinsics(const TargetMachine *TM = nullptr) - : ModulePass(ID), TM(TM) { } + AMDGPULowerIntrinsics() : ModulePass(ID) {} + bool runOnModule(Module &M) override; + bool expandMemIntrinsicUses(Function &F); StringRef getPassName() const override { return "AMDGPU Lower Intrinsics"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } }; } @@ -46,8 +51,8 @@ char AMDGPULowerIntrinsics::ID = 0; char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID; -INITIALIZE_TM_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, - "Lower intrinsics", false, false) +INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false, + false) // TODO: Should refine based on estimated number of accesses (e.g. does it // require splitting based on alignment) @@ -56,7 +61,7 @@ static bool shouldExpandOperationWithSize(Value *Size) { return !CI || (CI->getZExtValue() > MaxStaticSize); } -static bool expandMemIntrinsicUses(Function &F) { +bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) { Intrinsic::ID ID = F.getIntrinsicID(); bool Changed = false; @@ -68,7 +73,10 @@ static bool expandMemIntrinsicUses(Function &F) { case Intrinsic::memcpy: { auto *Memcpy = cast(Inst); if (shouldExpandOperationWithSize(Memcpy->getLength())) { - expandMemCpyAsLoop(Memcpy); + Function *ParentFunc = Memcpy->getParent()->getParent(); + const TargetTransformInfo &TTI = + getAnalysis().getTTI(*ParentFunc); + expandMemCpyAsLoop(Memcpy, TTI); Changed = true; Memcpy->eraseFromParent(); } @@ -104,11 +112,13 @@ static bool expandMemIntrinsicUses(Function &F) { } bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const { - if (!TM) + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) return false; + const TargetMachine &TM = TPC->getTM(); + const AMDGPUSubtarget &ST = TM.getSubtarget(F); bool Changed = false; - const AMDGPUSubtarget &ST = TM->getSubtarget(F); for (auto *U : F.users()) { auto *CI = dyn_cast(U); @@ -155,6 +165,6 @@ bool AMDGPULowerIntrinsics::runOnModule(Module &M) { return Changed; } -ModulePass *llvm::createAMDGPULowerIntrinsicsPass(const TargetMachine *TM) { - return new AMDGPULowerIntrinsics(TM); +ModulePass *llvm::createAMDGPULowerIntrinsicsPass() { + return new AMDGPULowerIntrinsics(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index da247fea7de6e..63dd0d726d91d 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -38,7 +38,6 @@ using namespace llvm; #include "AMDGPUGenMCPseudoLowering.inc" - AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st, const AsmPrinter &ap): Ctx(ctx), ST(st), AP(ap) { } @@ -126,9 +125,15 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, } void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + unsigned Opcode = MI->getOpcode(); - int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode()); + // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We + // need to select it to the subtarget specific version, and there's no way to + // do that with a single pseudo source operation. + if (Opcode == AMDGPU::S_SETPC_B64_return) + Opcode = AMDGPU::S_SETPC_B64; + int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(Opcode); if (MCOpcode == -1) { LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext(); C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp new file mode 100644 index 0000000000000..9a391d06c9ea9 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -0,0 +1,2881 @@ +//===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level CFG structurizer pass. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "amdgpucfgstructurizer" + +namespace { +class PHILinearizeDestIterator; + +class PHILinearize { + friend class PHILinearizeDestIterator; + +public: + typedef std::pair PHISourceT; + +private: + typedef DenseSet PHISourcesT; + typedef struct { + unsigned DestReg; + DebugLoc DL; + PHISourcesT Sources; + } PHIInfoElementT; + typedef SmallPtrSet PHIInfoT; + PHIInfoT PHIInfo; + + static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); + static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); + static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); + static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, + MachineBasicBlock *SourceMBB); + static void phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg, + MachineBasicBlock *SourceMBB); + PHIInfoElementT *findPHIInfoElement(unsigned DestReg); + PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, + MachineBasicBlock *SourceMBB); + +public: + bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, + SmallVector &Sources); + void addDest(unsigned DestReg, const DebugLoc &DL); + void replaceDef(unsigned OldDestReg, unsigned NewDestReg); + void deleteDef(unsigned DestReg); + void addSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB); + void removeSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB = nullptr); + bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, + unsigned &DestReg); + bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); + unsigned getNumSources(unsigned DestReg); + void dump(MachineRegisterInfo *MRI); + void clear(); + + typedef PHISourcesT::iterator source_iterator; + typedef PHILinearizeDestIterator dest_iterator; + + dest_iterator dests_begin(); + dest_iterator dests_end(); + + source_iterator sources_begin(unsigned Reg); + source_iterator sources_end(unsigned Reg); +}; + +class PHILinearizeDestIterator { +private: + PHILinearize::PHIInfoT::iterator Iter; + +public: + unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } + PHILinearizeDestIterator &operator++() { + ++Iter; + return *this; + } + bool operator==(const PHILinearizeDestIterator &I) const { + return I.Iter == Iter; + } + bool operator!=(const PHILinearizeDestIterator &I) const { + return I.Iter != Iter; + } + + PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} +}; + +unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { + return Info->DestReg; +} + +void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, + unsigned NewDef) { + Info->DestReg = NewDef; +} + +PHILinearize::PHISourcesT & +PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { + return Info->Sources; +} + +void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, + unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + // Assertion ensures we don't use the same SourceMBB for the + // sources, because we cannot have different registers with + // identical predecessors, but we can have the same register for + // multiple predecessors. +#if !defined(NDEBUG) + for (auto SI : phiInfoElementGetSources(Info)) { + assert((SI.second != SourceMBB || SourceReg == SI.first)); + } +#endif + + phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); +} + +void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + auto &Sources = phiInfoElementGetSources(Info); + SmallVector ElimiatedSources; + for (auto SI : Sources) { + if (SI.first == SourceReg && + (SI.second == nullptr || SI.second == SourceMBB)) { + ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); + } + } + + for (auto &Source : ElimiatedSources) { + Sources.erase(Source); + } +} + +PHILinearize::PHIInfoElementT * +PHILinearize::findPHIInfoElement(unsigned DestReg) { + for (auto I : PHIInfo) { + if (phiInfoElementGetDest(I) == DestReg) { + return I; + } + } + return nullptr; +} + +PHILinearize::PHIInfoElementT * +PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + for (auto I : PHIInfo) { + for (auto SI : phiInfoElementGetSources(I)) { + if (SI.first == SourceReg && + (SI.second == nullptr || SI.second == SourceMBB)) { + return I; + } + } + } + return nullptr; +} + +bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, + SmallVector &Sources) { + bool FoundSource = false; + for (auto I : PHIInfo) { + for (auto SI : phiInfoElementGetSources(I)) { + if (SI.second == SourceMBB) { + FoundSource = true; + Sources.push_back(SI.first); + } + } + } + return FoundSource; +} + +void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { + assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); + PHISourcesT EmptySet; + PHIInfoElementT *NewElement = new PHIInfoElementT(); + NewElement->DestReg = DestReg; + NewElement->DL = DL; + NewElement->Sources = EmptySet; + PHIInfo.insert(NewElement); +} + +void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { + phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); +} + +void PHILinearize::deleteDef(unsigned DestReg) { + PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); + PHIInfo.erase(InfoElement); + delete InfoElement; +} + +void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); +} + +void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); +} + +bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, + unsigned &DestReg) { + PHIInfoElementT *InfoElement = + findPHIInfoElementFromSource(SourceReg, SourceMBB); + if (InfoElement != nullptr) { + DestReg = phiInfoElementGetDest(InfoElement); + return true; + } + return false; +} + +bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { + unsigned DestReg; + return findDest(Reg, SourceMBB, DestReg); +} + +unsigned PHILinearize::getNumSources(unsigned DestReg) { + return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); +} + +void PHILinearize::dump(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + dbgs() << "=PHIInfo Start=\n"; + for (auto PII : this->PHIInfo) { + PHIInfoElementT &Element = *PII; + dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) + << " Sources: {"; + for (auto &SI : Element.Sources) { + dbgs() << PrintReg(SI.first, TRI) << "(BB#" + << SI.second->getNumber() << "),"; + } + dbgs() << "}\n"; + } + dbgs() << "=PHIInfo End=\n"; +} + +void PHILinearize::clear() { PHIInfo = PHIInfoT(); } + +PHILinearize::dest_iterator PHILinearize::dests_begin() { + return PHILinearizeDestIterator(PHIInfo.begin()); +} + +PHILinearize::dest_iterator PHILinearize::dests_end() { + return PHILinearizeDestIterator(PHIInfo.end()); +} + +PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).begin(); +} +PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).end(); +} + +class RegionMRT; +class MBBMRT; + +static unsigned getPHINumInputs(MachineInstr &PHI) { + assert(PHI.isPHI()); + return (PHI.getNumOperands() - 1) / 2; +} + +static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index * 2 + 2).getMBB(); +} + +static void setPhiPred(MachineInstr &PHI, unsigned Index, + MachineBasicBlock *NewPred) { + PHI.getOperand(Index * 2 + 2).setMBB(NewPred); +} + +static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index * 2 + 1).getReg(); +} + +static unsigned getPHIDestReg(MachineInstr &PHI) { + assert(PHI.isPHI()); + return PHI.getOperand(0).getReg(); +} + +class LinearizedRegion { +protected: + MachineBasicBlock *Entry; + // The exit block is part of the region, and is the last + // merge block before exiting the region. + MachineBasicBlock *Exit; + DenseSet LiveOuts; + SmallPtrSet MBBs; + bool HasLoop; + LinearizedRegion *Parent; + RegionMRT *RMRT; + + void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, + MachineInstr *DefInstr, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, + RegionMRT *TopRegion); + + void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, + RegionMRT *TopRegion = nullptr); + +public: + void setRegionMRT(RegionMRT *Region) { RMRT = Region; } + + RegionMRT *getRegionMRT() { return RMRT; } + + void setParent(LinearizedRegion *P) { Parent = P; } + + LinearizedRegion *getParent() { return Parent; } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); + + void setBBSelectRegIn(unsigned Reg); + + unsigned getBBSelectRegIn(); + + void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); + + unsigned getBBSelectRegOut(); + + void setHasLoop(bool Value); + + bool getHasLoop(); + + void addLiveOut(unsigned VReg); + + void removeLiveOut(unsigned Reg); + + void replaceLiveOut(unsigned OldReg, unsigned NewReg); + + void replaceRegister(unsigned Register, unsigned NewRegister, + MachineRegisterInfo *MRI, bool ReplaceInside, + bool ReplaceOutside, bool IncludeLoopPHIs); + + void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + DenseSet *getLiveOuts(); + + void setEntry(MachineBasicBlock *NewEntry); + + MachineBasicBlock *getEntry(); + + void setExit(MachineBasicBlock *NewExit); + + MachineBasicBlock *getExit(); + + void addMBB(MachineBasicBlock *MBB); + + void addMBBs(LinearizedRegion *InnerRegion); + + bool contains(MachineBasicBlock *MBB); + + bool isLiveOut(unsigned Reg); + + bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); + + void removeFalseRegisterKills(MachineRegisterInfo *MRI); + + void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + LinearizedRegion(); + + ~LinearizedRegion(); +}; + +class MRT { +protected: + RegionMRT *Parent; + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + +public: + unsigned getBBSelectRegIn() { return BBSelectRegIn; } + + unsigned getBBSelectRegOut() { return BBSelectRegOut; } + + void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } + + void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } + + virtual RegionMRT *getRegionMRT() { return nullptr; } + + virtual MBBMRT *getMBBMRT() { return nullptr; } + + bool isRegion() { return getRegionMRT() != nullptr; } + + bool isMBB() { return getMBBMRT() != nullptr; } + + bool isRoot() { return Parent == nullptr; } + + void setParent(RegionMRT *Region) { Parent = Region; } + + RegionMRT *getParent() { return Parent; } + + static MachineBasicBlock * + initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap); + + static RegionMRT *buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + const SIInstrInfo *TII, + MachineRegisterInfo *MRI); + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; + + void dumpDepth(int depth) { + for (int i = depth; i > 0; --i) { + dbgs() << " "; + } + } + + virtual ~MRT() {} +}; + +class MBBMRT : public MRT { + MachineBasicBlock *MBB; + +public: + virtual MBBMRT *getMBBMRT() { return this; } + + MachineBasicBlock *getMBB() { return MBB; } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "MBB: " << getMBB()->getNumber(); + dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); + dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; + } + + MBBMRT(MachineBasicBlock *BB) : MBB(BB) { + setParent(nullptr); + setBBSelectRegOut(0); + setBBSelectRegIn(0); + } +}; + +class RegionMRT : public MRT { +protected: + MachineRegion *Region; + LinearizedRegion *LRegion; + MachineBasicBlock *Succ; + + SetVector Children; + +public: + virtual RegionMRT *getRegionMRT() { return this; } + + void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { + LRegion = LinearizeRegion; + } + + LinearizedRegion *getLinearizedRegion() { return LRegion; } + + MachineRegion *getMachineRegion() { return Region; } + + unsigned getInnerOutputRegister() { + return (*(Children.begin()))->getBBSelectRegOut(); + } + + void addChild(MRT *Tree) { Children.insert(Tree); } + + SetVector *getChildren() { return &Children; } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "Region: " << (void *)Region; + dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); + dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; + + dumpDepth(depth); + if (getSucc()) + dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; + else + dbgs() << "Succ: none \n"; + for (auto MRTI : Children) { + MRTI->dump(TRI, depth + 1); + } + } + + MRT *getEntryTree() { return Children.back(); } + + MRT *getExitTree() { return Children.front(); } + + MachineBasicBlock *getEntry() { + MRT *Tree = Children.back(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() + : Tree->getMBBMRT()->getMBB(); + } + + MachineBasicBlock *getExit() { + MRT *Tree = Children.front(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() + : Tree->getMBBMRT()->getMBB(); + } + + void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } + + MachineBasicBlock *getSucc() { return Succ; } + + bool contains(MachineBasicBlock *MBB) { + for (auto CI : Children) { + if (CI->isMBB()) { + if (MBB == CI->getMBBMRT()->getMBB()) { + return true; + } + } else { + if (CI->getRegionMRT()->contains(MBB)) { + return true; + } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && + CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { + return true; + } + } + } + return false; + } + + void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { + LinearizedRegion *LRegion = getLinearizedRegion(); + LRegion->replaceLiveOut(Register, NewRegister); + for (auto &CI : Children) { + if (CI->isRegion()) { + CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + } + } + } + + RegionMRT(MachineRegion *MachineRegion) + : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { + setParent(nullptr); + setBBSelectRegOut(0); + setBBSelectRegIn(0); + } + + virtual ~RegionMRT() { + if (LRegion) { + delete LRegion; + } + + for (auto CI : Children) { + delete &(*CI); + } + } +}; + +static unsigned createBBSelectReg(const SIInstrInfo *TII, + MachineRegisterInfo *MRI) { + return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); +} + +MachineBasicBlock * +MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap) { + for (auto &MFI : MF) { + MachineBasicBlock *ExitMBB = &MFI; + if (ExitMBB->succ_size() == 0) { + return ExitMBB; + } + } + llvm_unreachable("CFG has no exit block"); + return nullptr; +} + +RegionMRT *MRT::buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + const SIInstrInfo *TII, MachineRegisterInfo *MRI) { + SmallPtrSet PlacedRegions; + DenseMap RegionMap; + MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); + RegionMRT *Result = new RegionMRT(TopLevelRegion); + RegionMap[TopLevelRegion] = Result; + + // Insert the exit block first, we need it to be the merge node + // for the top level region. + MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); + + unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); + MBBMRT *ExitMRT = new MBBMRT(Exit); + RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); + ExitMRT->setBBSelectRegIn(BBSelectRegIn); + + for (auto MBBI : post_order(&(MF.front()))) { + MachineBasicBlock *MBB = &(*MBBI); + + // Skip Exit since we already added it + if (MBB == Exit) { + continue; + } + + DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); + MBBMRT *NewMBB = new MBBMRT(MBB); + MachineRegion *Region = RegionInfo->getRegionFor(MBB); + + // Ensure we have the MRT region + if (RegionMap.count(Region) == 0) { + RegionMRT *NewMRTRegion = new RegionMRT(Region); + RegionMap[Region] = NewMRTRegion; + + // Ensure all parents are in the RegionMap + MachineRegion *Parent = Region->getParent(); + while (RegionMap.count(Parent) == 0) { + RegionMRT *NewMRTParent = new RegionMRT(Parent); + NewMRTParent->addChild(NewMRTRegion); + NewMRTRegion->setParent(NewMRTParent); + RegionMap[Parent] = NewMRTParent; + NewMRTRegion = NewMRTParent; + Parent = Parent->getParent(); + } + RegionMap[Parent]->addChild(NewMRTRegion); + NewMRTRegion->setParent(RegionMap[Parent]); + } + + // Add MBB to Region MRT + RegionMap[Region]->addChild(NewMBB); + NewMBB->setParent(RegionMap[Region]); + RegionMap[Region]->setSucc(Region->getExit()); + } + return Result; +} + +void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + if (TRI->isVirtualRegister(Reg)) { + DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); + // If this is a source register to a PHI we are chaining, it + // must be live out. + if (PHIInfo.isSource(Reg)) { + DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If this is live out of the MBB + for (auto &UI : MRI->use_operands(Reg)) { + if (UI.getParent()->getParent() != MBB) { + DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() + << "): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If the use is in the same MBB we have to make sure + // it is after the def, otherwise it is live out in a loop + MachineInstr *UseInstr = UI.getParent(); + for (MachineBasicBlock::instr_iterator + MII = UseInstr->getIterator(), + MIE = UseInstr->getParent()->instr_end(); + MII != MIE; ++MII) { + if ((&(*MII)) == DefInstr) { + DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) + << "\n"); + addLiveOut(Reg); + } + } + } + } + } + } +} + +void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + if (TRI->isVirtualRegister(Reg)) { + DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); + for (auto &UI : MRI->use_operands(Reg)) { + if (!Region->contains(UI.getParent()->getParent())) { + DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region + << "): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } + } + } +} + +void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); + for (auto &II : *MBB) { + for (auto &RI : II.defs()) { + storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); + } + for (auto &IRI : II.implicit_operands()) { + if (IRI.isDef()) { + storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); + } + } + } + + // If we have a successor with a PHI, source coming from this MBB we have to + // add the register as live out + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); + SI != E; ++SI) { + for (auto &II : *(*SI)) { + if (II.isPHI()) { + MachineInstr &PHI = II; + int numPreds = getPHINumInputs(PHI); + for (int i = 0; i < numPreds; ++i) { + if (getPHIPred(PHI, i) == MBB) { + unsigned PHIReg = getPHISourceReg(PHI, i); + DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() + << " -> BB#" << (*SI)->getNumber() + << "): " << PrintReg(PHIReg, TRI) << "\n"); + addLiveOut(PHIReg); + } + } + } + } + } + + DEBUG(dbgs() << "-Store Live Outs Endn-\n"); +} + +void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo, + RegionMRT *TopRegion) { + for (auto &II : *MBB) { + for (auto &RI : II.defs()) { + storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, + PHIInfo); + } + for (auto &IRI : II.implicit_operands()) { + if (IRI.isDef()) { + storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, + TRI, PHIInfo); + } + } + } +} + +void LinearizedRegion::storeLiveOuts(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo, + RegionMRT *CurrentTopRegion) { + MachineBasicBlock *Exit = Region->getSucc(); + + RegionMRT *TopRegion = + CurrentTopRegion == nullptr ? Region : CurrentTopRegion; + + // Check if exit is end of function, if so, no live outs. + if (Exit == nullptr) + return; + + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (CI->isMBB()) { + auto MBB = CI->getMBBMRT()->getMBB(); + storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); + } else { + LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); + // We should be limited to only store registers that are live out from the + // lineaized region + for (auto MBBI : SubRegion->MBBs) { + storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); + } + } + } + + if (CurrentTopRegion == nullptr) { + auto Succ = Region->getSucc(); + for (auto &II : *Succ) { + if (II.isPHI()) { + MachineInstr &PHI = II; + int numPreds = getPHINumInputs(PHI); + for (int i = 0; i < numPreds; ++i) { + if (Region->contains(getPHIPred(PHI, i))) { + unsigned PHIReg = getPHISourceReg(PHI, i); + DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region + << "): " << PrintReg(PHIReg, TRI) << "\n"); + addLiveOut(PHIReg); + } + } + } + } + } +} + +void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { + OS << "Linearized Region {"; + bool IsFirst = true; + for (const auto &MBB : MBBs) { + if (IsFirst) { + IsFirst = false; + } else { + OS << " ,"; + } + OS << MBB->getNumber(); + } + OS << "} (" << Entry->getNumber() << ", " + << (Exit == nullptr ? -1 : Exit->getNumber()) + << "): In:" << PrintReg(getBBSelectRegIn(), TRI) + << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; + for (auto &LI : LiveOuts) { + OS << PrintReg(LI, TRI) << " "; + } + OS << "} \n"; +} + +unsigned LinearizedRegion::getBBSelectRegIn() { + return getRegionMRT()->getBBSelectRegIn(); +} + +unsigned LinearizedRegion::getBBSelectRegOut() { + return getRegionMRT()->getBBSelectRegOut(); +} + +void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } + +bool LinearizedRegion::getHasLoop() { return HasLoop; } + +void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } + +void LinearizedRegion::removeLiveOut(unsigned Reg) { + if (isLiveOut(Reg)) + LiveOuts.erase(Reg); +} + +void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { + if (isLiveOut(OldReg)) { + removeLiveOut(OldReg); + addLiveOut(NewReg); + } +} + +void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, + MachineRegisterInfo *MRI, + bool ReplaceInside, bool ReplaceOutside, + bool IncludeLoopPHI) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + DEBUG(dbgs() << "Pepareing to replace register (region): " + << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); + + // If we are replacing outside, we also need to update the LiveOuts + if (ReplaceOutside && + (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { + LinearizedRegion *Current = this; + while (Current != nullptr && Current->getEntry() != nullptr) { + DEBUG(dbgs() << "Region before register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current->replaceLiveOut(Register, NewRegister); + DEBUG(dbgs() << "Region after register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current = Current->getParent(); + } + } + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + + // We don't rewrite defs. + if (O.isDef()) + continue; + + bool IsInside = contains(O.getParent()->getParent()); + bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && + O.getParent()->getParent() == getEntry()); + bool ShouldReplace = (IsInside && ReplaceInside) || + (!IsInside && ReplaceOutside) || + (IncludeLoopPHI && IsLoopPHI); + if (ShouldReplace) { + + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + } else { + DEBUG(dbgs() << "Replacing register (region): " + << PrintReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + O.setReg(NewRegister); + } + } + } +} + +void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); +} + +void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); +} + +DenseSet *LinearizedRegion::getLiveOuts() { return &LiveOuts; } + +void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { + Entry = NewEntry; +} + +MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } + +void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } + +MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } + +void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } + +void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { + for (const auto &MBB : InnerRegion->MBBs) { + addMBB(MBB); + } +} + +bool LinearizedRegion::contains(MachineBasicBlock *MBB) { + return MBBs.count(MBB) == 1; +} + +bool LinearizedRegion::isLiveOut(unsigned Reg) { + return LiveOuts.count(Reg) == 1; +} + +bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { + return MRI->def_begin(Reg) == MRI->def_end(); +} + +// After the code has been structurized, what was flagged as kills +// before are no longer register kills. +void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + for (auto MBBI : MBBs) { + MachineBasicBlock *MBB = MBBI; + for (auto &II : *MBB) { + for (auto &RI : II.uses()) { + if (RI.isReg()) { + unsigned Reg = RI.getReg(); + if (TRI->isVirtualRegister(Reg)) { + if (hasNoDef(Reg, MRI)) + continue; + if (!MRI->hasOneDef(Reg)) { + DEBUG(this->getEntry()->getParent()->dump()); + DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); + } + + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " + << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " + << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + MachineOperand *Def = &(*(MRI->def_begin(Reg))); + MachineOperand *UseOperand = &(RI); + bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; + if (UseIsOutsideDefMBB && UseOperand->isKill()) { + DEBUG(dbgs() << "Removing kill flag on register: " + << PrintReg(Reg, TRI) << "\n"); + UseOperand->setIsKill(false); + } + } + } + } + } + } +} + +void LinearizedRegion::initLiveOut(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + storeLiveOuts(Region, MRI, TRI, PHIInfo); +} + +LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + setEntry(MBB); + setExit(MBB); + storeLiveOuts(MBB, MRI, TRI, PHIInfo); + MBBs.insert(MBB); + Parent = nullptr; +} + +LinearizedRegion::LinearizedRegion() { + setEntry(nullptr); + setExit(nullptr); + Parent = nullptr; +} + +LinearizedRegion::~LinearizedRegion() {} + +class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { +private: + const MachineRegionInfo *Regions; + const SIInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + unsigned BBSelectRegister; + PHILinearize PHIInfo; + DenseMap FallthroughMap; + + void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, + SmallVector &RegionIndices); + void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &RegionIndices); + void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHINonRegionIndices); + + void storePHILinearizationInfoDest( + unsigned LDestReg, MachineInstr &PHI, + SmallVector *RegionIndices = nullptr); + + unsigned storePHILinearizationInfo(MachineInstr &PHI, + SmallVector *RegionIndices); + + void extractKilledPHIs(MachineBasicBlock *MBB); + + bool shrinkPHI(MachineInstr &PHI, SmallVector &PHIIndices, + unsigned *ReplaceReg); + + bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, unsigned *ReplaceReg); + + void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices); + void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices); + void replaceLiveOutRegs(MachineInstr &PHI, + SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, + LinearizedRegion *LRegion); + void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, + MachineInstr &PHI, LinearizedRegion *LRegion); + + void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion); + void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, + MachineInstr &PHI); + void rewriteRegionEntryPHIs(LinearizedRegion *Region, + MachineBasicBlock *IfMBB); + + bool regionIsSimpleIf(RegionMRT *Region); + + void transformSimpleIfRegion(RegionMRT *Region); + + void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); + + void insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + const DebugLoc &DL = DebugLoc()); + + MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); + + void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, unsigned DestRegister, + unsigned IfSourceRegister, unsigned CodeSourceRegister, + bool IsUndefIfSource = false); + + MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, + MachineBasicBlock *SelectBB, unsigned IfReg, + bool InheritPreds); + + void prunePHIInfo(MachineBasicBlock *MBB); + void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); + + void createEntryPHIs(LinearizedRegion *CurrentRegion); + void resolvePHIInfos(MachineBasicBlock *FunctionEntry); + + void replaceRegisterWith(unsigned Register, unsigned NewRegister); + + MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBB, + LinearizedRegion *LRegion, + unsigned BBSelectRegIn, + unsigned BBSelectRegOut); + + MachineBasicBlock * + createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, + unsigned BBSelectRegIn, unsigned BBSelectRegOut); + void ensureCondIsNotKilled(SmallVector Cond); + + void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg); + + MachineInstr *getDefInstr(unsigned Reg); + void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, unsigned DestReg, + unsigned SourceReg); + bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, + unsigned Register); + void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion); + + void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); + void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion); + + MachineBasicBlock *splitExit(LinearizedRegion *LRegion); + + MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); + + LinearizedRegion *initLinearizedRegion(RegionMRT *Region); + + bool structurizeComplexRegion(RegionMRT *Region); + + bool structurizeRegion(RegionMRT *Region); + + bool structurizeRegions(RegionMRT *Region, bool isTopRegion); + +public: + static char ID; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { + initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } + + void initFallthroughMap(MachineFunction &MF); + + void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); + + unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, + MachineRegisterInfo *MRI, + const SIInstrInfo *TII); + + RegionMRT *RMRT; + void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } + + RegionMRT *getRegionMRT() { return RMRT; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +char AMDGPUMachineCFGStructurizer::ID = 0; + +bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Succ = Region->getSucc(); + bool FoundBypass = false; + bool FoundIf = false; + + if (Entry->succ_size() != 2) { + return false; + } + + for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), + E = Entry->succ_end(); + SI != E; ++SI) { + MachineBasicBlock *Current = *SI; + + if (Current == Succ) { + FoundBypass = true; + } else if ((Current->succ_size() == 1) && + *(Current->succ_begin()) == Succ) { + FoundIf = true; + } + } + + return FoundIf && FoundBypass; +} + +void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Exit = Region->getExit(); + TII->convertNonUniformIfRegion(Entry, Exit); +} + +static void fixMBBTerminator(MachineBasicBlock *MBB) { + + if (MBB->succ_size() == 1) { + auto *Succ = *(MBB->succ_begin()); + for (auto &TI : MBB->terminators()) { + for (auto &UI : TI.uses()) { + if (UI.isMBB() && UI.getMBB() != Succ) { + UI.setMBB(Succ); + } + } + } + } +} + +static void fixRegionTerminator(RegionMRT *Region) { + MachineBasicBlock *InternalSucc = nullptr; + MachineBasicBlock *ExternalSucc = nullptr; + LinearizedRegion *LRegion = Region->getLinearizedRegion(); + auto Exit = LRegion->getExit(); + + SmallPtrSet Successors; + for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), + SE = Exit->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *Succ = *SI; + if (LRegion->contains(Succ)) { + // Do not allow re-assign + assert(InternalSucc == nullptr); + InternalSucc = Succ; + } else { + // Do not allow re-assign + assert(ExternalSucc == nullptr); + ExternalSucc = Succ; + } + } + + for (auto &TI : Exit->terminators()) { + for (auto &UI : TI.uses()) { + if (UI.isMBB()) { + auto Target = UI.getMBB(); + if (Target != InternalSucc && Target != ExternalSucc) { + UI.setMBB(ExternalSucc); + } + } + } + } +} + +// If a region region is just a sequence of regions (and the exit +// block in the case of the top level region), we can simply skip +// linearizing it, because it is already linear +bool regionIsSequence(RegionMRT *Region) { + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (!CI->isRegion()) { + if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { + return false; + } + } + } + return true; +} + +void fixupRegionExits(RegionMRT *Region) { + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (!CI->isRegion()) { + fixMBBTerminator(CI->getMBBMRT()->getMBB()); + } else { + fixRegionTerminator(CI->getRegionMRT()); + } + } +} + +void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( + RegionMRT *Region, MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( + LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( + LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHINonRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (!Region->contains(Pred)) { + PHINonRegionIndices.push_back(i); + } + } +} + +void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( + unsigned LDestReg, MachineInstr &PHI, + SmallVector *RegionIndices) { + if (RegionIndices) { + for (auto i : *RegionIndices) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); + } + } else { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); + } + } +} + +unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( + MachineInstr &PHI, SmallVector *RegionIndices) { + unsigned DestReg = getPHIDestReg(PHI); + unsigned LinearizeDestReg = + MRI->createVirtualRegister(MRI->getRegClass(DestReg)); + PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); + storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); + return LinearizeDestReg; +} + +void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { + // We need to create a new chain for the killed phi, but there is no + // need to do the renaming outside or inside the block. + SmallPtrSet PHIs; + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); + I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + unsigned PHIDestReg = getPHIDestReg(Instr); + DEBUG(dbgs() << "Extractking killed phi:\n"); + DEBUG(Instr.dump()); + PHIs.insert(&Instr); + PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); + storePHILinearizationInfoDest(PHIDestReg, Instr); + } + } + + for (auto PI : PHIs) { + PI->eraseFromParent(); + } +} + +static bool isPHIRegionIndex(SmallVector PHIRegionIndices, + unsigned Index) { + for (auto i : PHIRegionIndices) { + if (i == Index) + return true; + } + return false; +} + +bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); +} + +bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + DEBUG(dbgs() << "Shrink PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) + << " = PHI("); + + bool Replaced = false; + unsigned NumInputs = getPHINumInputs(PHI); + int SingleExternalEntryIndex = -1; + for (unsigned i = 0; i < NumInputs; ++i) { + if (!isPHIRegionIndex(PHIIndices, i)) { + if (SingleExternalEntryIndex == -1) { + // Single entry + SingleExternalEntryIndex = i; + } else { + // Multiple entries + SingleExternalEntryIndex = -2; + } + } + } + + if (SingleExternalEntryIndex > -1) { + *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); + // We should not rewrite the code, we should only pick up the single value + // that represents the shrunk PHI. + Replaced = true; + } else { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + if (SourceMBB) { + MIB.addReg(CombinedSourceReg); + MIB.addMBB(SourceMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << SourceMBB->getNumber()); + } + + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } + PHI.eraseFromParent(); + return Replaced; +} + +void AMDGPUMachineCFGStructurizer::replacePHI( + MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices) { + DEBUG(dbgs() << "Replace PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) + << " = PHI("); + + bool HasExternalEdge = false; + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + if (!isPHIRegionIndex(PHIRegionIndices, i)) { + HasExternalEdge = true; + } + } + + if (HasExternalEdge) { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(LastMerge); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << LastMerge->getNumber()); + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } else { + replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); + } + PHI.eraseFromParent(); +} + +void AMDGPUMachineCFGStructurizer::replaceEntryPHI( + MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices) { + + DEBUG(dbgs() << "Replace entry PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with "); + + unsigned NumInputs = getPHINumInputs(PHI); + unsigned NumNonRegionInputs = NumInputs; + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + NumNonRegionInputs--; + } + } + + if (NumNonRegionInputs == 0) { + auto DestReg = getPHIDestReg(PHI); + replaceRegisterWith(DestReg, CombinedSourceReg); + DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n"); + PHI.eraseFromParent(); + } else { + DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << " = PHI("); + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(IfMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << IfMBB->getNumber()); + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + PHI.eraseFromParent(); + } +} + +void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( + MachineInstr &PHI, SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, LinearizedRegion *LRegion) { + bool WasLiveOut = false; + for (auto PII : PHIRegionIndices) { + unsigned Reg = getPHISourceReg(PHI, PII); + if (LRegion->isLiveOut(Reg)) { + bool IsDead = true; + + // Check if register is live out of the basic block + MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); + for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) { + if ((*UI).getParent()->getParent() != DefMBB) { + IsDead = false; + } + } + + DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is " + << (IsDead ? "dead" : "alive") << " after PHI replace\n"); + if (IsDead) { + LRegion->removeLiveOut(Reg); + } + WasLiveOut = true; + } + } + + if (WasLiveOut) + LRegion->addLiveOut(CombinedSourceReg); +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, + MachineBasicBlock *LastMerge, + MachineInstr &PHI, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(Region, PHI, PHIRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHIRegionIndices); + + replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); + replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, + MachineBasicBlock *IfMBB, + MachineInstr &PHI) { + SmallVector PHINonRegionIndices; + getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHINonRegionIndices); + replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); +} + +static void collectPHIs(MachineBasicBlock *MBB, + SmallVector &PHIs) { + for (auto &BBI : *MBB) { + if (BBI.isPHI()) { + PHIs.push_back(&BBI); + } + } +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, + MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion) { + SmallVector PHIs; + auto Exit = Region->getSucc(); + if (Exit == nullptr) + return; + + collectPHIs(Exit, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); + } +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, + MachineBasicBlock *IfMBB) { + SmallVector PHIs; + auto Entry = Region->getEntry(); + + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionEntryPHI(Region, IfMBB, *PHII); + } +} + +void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + const DebugLoc &DL) { + DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() + << " -> " << Dest->getNumber() << "\n"); + MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); + bool HasTerminator = Terminator != MBB->instr_end(); + if (HasTerminator) { + TII->ReplaceTailWithBranchTo(Terminator, Dest); + } + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { + TII->insertUnconditionalBranch(*MBB, Dest, DL); + } +} + +static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { + MachineBasicBlock *result = nullptr; + for (auto &MFI : MF) { + if (MFI.succ_size() == 0) { + if (result == nullptr) { + result = &MFI; + } else { + return nullptr; + } + } + } + + return result; +} + +static bool hasOneExitNode(MachineFunction &MF) { + return getSingleExitNode(MF) != nullptr; +} + +MachineBasicBlock * +AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { + auto Exit = Region->getSucc(); + + // If the exit is the end of the function, we just use the existing + MachineFunction *MF = Region->getEntry()->getParent(); + if (Exit == nullptr && hasOneExitNode(*MF)) { + return &(*(--(Region->getEntry()->getParent()->end()))); + } + + MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); + if (Exit == nullptr) { + MachineFunction::iterator ExitIter = MF->end(); + MF->insert(ExitIter, LastMerge); + } else { + MachineFunction::iterator ExitIter = Exit->getIterator(); + MF->insert(ExitIter, LastMerge); + LastMerge->addSuccessor(Exit); + insertUnconditionalBranch(LastMerge, Exit); + DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); + } + return LastMerge; +} + +void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned DestRegister, + unsigned IfSourceRegister, + unsigned CodeSourceRegister, + bool IsUndefIfSource) { + // If this is the function exit block, we don't need a phi. + if (MergeBB->succ_begin() == MergeBB->succ_end()) { + return; + } + DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() + << "): " << PrintReg(DestRegister, TRI) << " = PHI(" + << PrintReg(IfSourceRegister, TRI) << ", BB#" + << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) + << ", BB#" << CodeBB->getNumber() << ")\n"); + const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); + MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestRegister); + if (IsUndefIfSource && false) { + MIB.addReg(IfSourceRegister, RegState::Undef); + } else { + MIB.addReg(IfSourceRegister); + } + MIB.addMBB(IfBB); + MIB.addReg(CodeSourceRegister); + MIB.addMBB(CodeBB); +} + +static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), + E = MBB->succ_end(); + PI != E; ++PI) { + if ((*PI) != MBB) { + (MBB)->removeSuccessor(*PI); + } + } +} + +static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, + MachineBasicBlock *EndMBB) { + + // We have to check against the StartMBB successor becasuse a + // structurized region with a loop will have the entry block split, + // and the backedge will go to the entry successor. + DenseSet> Succs; + unsigned SuccSize = StartMBB->succ_size(); + if (SuccSize > 0) { + MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); + for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), + E = EndMBB->succ_end(); + PI != E; ++PI) { + // Either we have a back-edge to the entry block, or a back-edge to the + // successor of the entry block since the block may be split. + if ((*PI) != StartMBB && + !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { + Succs.insert( + std::pair(EndMBB, *PI)); + } + } + } + + for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), + E = StartMBB->pred_end(); + PI != E; ++PI) { + if ((*PI) != EndMBB) { + Succs.insert( + std::pair(*PI, StartMBB)); + } + } + + for (auto SI : Succs) { + std::pair Edge = SI; + DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" + << Edge.second->getNumber() << "\n"); + Edge.first->removeSuccessor(Edge.second); + } +} + +MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( + MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, + bool InheritPreds) { + MachineFunction *MF = MergeBB->getParent(); + MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); + + if (InheritPreds) { + for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), + E = CodeBBStart->pred_end(); + PI != E; ++PI) { + if ((*PI) != CodeBBEnd) { + MachineBasicBlock *Pred = (*PI); + Pred->addSuccessor(IfBB); + } + } + } + + removeExternalCFGEdges(CodeBBStart, CodeBBEnd); + + auto CodeBBStartI = CodeBBStart->getIterator(); + auto CodeBBEndI = CodeBBEnd->getIterator(); + auto MergeIter = MergeBB->getIterator(); + MF->insert(MergeIter, IfBB); + MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); + IfBB->addSuccessor(MergeBB); + IfBB->addSuccessor(CodeBBStart); + + DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); + // Ensure that the MergeBB is a successor of the CodeEndBB. + if (!CodeBBEnd->isSuccessor(MergeBB)) + CodeBBEnd->addSuccessor(MergeBB); + + DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" + << CodeBBEnd->getNumber() << "\n"); + + // If we have a single predecessor we can find a reasonable debug location + MachineBasicBlock *SinglePred = + CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; + const DebugLoc &DL = SinglePred + ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) + : DebugLoc(); + + unsigned Reg = + TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, + SelectBB->getNumber() /* CodeBBStart->getNumber() */); + if (&(*(IfBB->getParent()->begin())) == IfBB) { + TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, + CodeBBStart->getNumber()); + } + MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); + + return IfBB; +} + +void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( + SmallVector Cond) { + if (Cond.size() != 1) + return; + if (!Cond[0].isReg()) + return; + + unsigned CondReg = Cond[0].getReg(); + for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { + (*UI).setIsKill(false); + } +} + +void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg) { + MachineBasicBlock *TrueBB = nullptr; + MachineBasicBlock *FalseBB = nullptr; + SmallVector Cond; + MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; + TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); + + const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); + + if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { + // This is an exit block, hence no successors. We will assign the + // bb select register to the entry block. + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, + CodeBB->getParent()->begin()->getNumber()); + insertUnconditionalBranch(CodeBB, MergeBB, DL); + return; + } + + if (FalseBB == nullptr && TrueBB == nullptr) { + TrueBB = FallthroughBB; + } else if (TrueBB != nullptr) { + FalseBB = + (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB; + } + + if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, TrueBB->getNumber()); + } else { + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); + unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); + unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + TrueBBReg, TrueBB->getNumber()); + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + FalseBBReg, FalseBB->getNumber()); + ensureCondIsNotKilled(Cond); + TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, Cond, TrueBBReg, FalseBBReg); + } + + insertUnconditionalBranch(CodeBB, MergeBB, DL); +} + +MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + DEBUG(dbgs() << "DEFS BEGIN:\n"); + for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { + DEBUG(DI->getParent()->dump()); + } + DEBUG(dbgs() << "DEFS END\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + return (*(MRI->def_begin(Reg))).getParent(); +} + +void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + unsigned DestReg, + unsigned SourceReg) { + // In this function we know we are part of a chain already, so we need + // to add the registers to the existing chain, and rename the register + // inside the region. + bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); + MachineInstr *DefInstr = getDefInstr(SourceReg); + if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) { + // Handle the case where the def is a PHI-def inside a basic + // block, then we only need to do renaming. Special care needs to + // be taken if the PHI-def is part of an existing chain, or if a + // new one needs to be created. + InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); + + // We collect all PHI Information, and if we are at the region entry, + // all PHIs will be removed, and then re-introduced if needed. + storePHILinearizationInfoDest(DestReg, *DefInstr); + // We have picked up all the information we need now and can remove + // the PHI + PHIInfo.removeSource(DestReg, SourceReg, CodeBB); + DefInstr->eraseFromParent(); + } else { + // If this is not a phi-def, or it is a phi-def but from a linearized region + if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) { + // If this is a single BB and the definition is in this block we + // need to replace any uses outside the region. + InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); + } + const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); + unsigned NextDestReg = MRI->createVirtualRegister(RegClass); + bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; + DEBUG(dbgs() << "Insert Chained PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, + SourceReg, IsLastDef); + + PHIInfo.removeSource(DestReg, SourceReg, CodeBB); + if (IsLastDef) { + const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); + TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, + NextDestReg, 0); + PHIInfo.deleteDef(DestReg); + } else { + PHIInfo.replaceDef(DestReg, NextDestReg); + } + } +} + +bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, + LinearizedRegion *InnerRegion, + unsigned Register) { + return getDefInstr(Register)->getParent() == MBB || + InnerRegion->contains(getDefInstr(Register)->getParent()); +} + +void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion) { + DenseSet *LiveOuts = InnerRegion->getLiveOuts(); + SmallVector OldLiveOuts; + bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); + for (auto OLI : *LiveOuts) { + OldLiveOuts.push_back(OLI); + } + + for (auto LI : OldLiveOuts) { + DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); + if (!containsDef(CodeBB, InnerRegion, LI) || + (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { + // If the register simly lives through the CodeBB, we don't have + // to rewrite anything since the register is not defined in this + // part of the code. + DEBUG(dbgs() << "- through"); + continue; + } + DEBUG(dbgs() << "\n"); + unsigned Reg = LI; + if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { + // If the register is live out, we do want to create a phi, + // unless it is from the Exit block, becasuse in that case there + // is already a PHI, and no need to create a new one. + + // If the register is just a live out def and not part of a phi + // chain, we need to create a PHI node to handle the if region, + // and replace all uses outside of the region with the new dest + // register, unless it is the outgoing BB select register. We have + // already creaed phi nodes for these. + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); + unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); + // Create initializer, this value is never used, but is needed + // to satisfy SSA. + DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); + TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), + IfSourceReg, 0); + + InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); + DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, + IfSourceReg, Reg, true); + } + } + + // Handle the chained definitions in PHIInfo, checking if this basic block + // is a source block for a definition. + SmallVector Sources; + if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { + DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() + << "\n"); + for (auto SI : Sources) { + unsigned DestReg; + PHIInfo.findDest(SI, CodeBB, DestReg); + insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); + } + DEBUG(dbgs() << "Insertion done.\n"); + } + + DEBUG(PHIInfo.dump(MRI)); +} + +void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Before PHI Prune\n"); + DEBUG(PHIInfo.dump(MRI)); + SmallVector, 4> + ElimiatedSources; + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + + unsigned DestReg = *DRI; + auto SE = PHIInfo.sources_end(DestReg); + + bool MBBContainsPHISource = false; + // Check if there is a PHI source in this MBB + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + unsigned SourceReg = (*SRI).first; + MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); + if (Def->getParent()->getParent() == MBB) { + MBBContainsPHISource = true; + } + } + + // If so, all other sources are useless since we know this block + // is always executed when the region is executed. + if (MBBContainsPHISource) { + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + PHILinearize::PHISourceT Source = *SRI; + unsigned SourceReg = Source.first; + MachineBasicBlock *SourceMBB = Source.second; + MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); + if (Def->getParent()->getParent() != MBB) { + ElimiatedSources.push_back( + std::make_tuple(DestReg, SourceReg, SourceMBB)); + } + } + } + } + + // Remove the PHI sources that are in the given MBB + for (auto &SourceInfo : ElimiatedSources) { + PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), + std::get<2>(SourceInfo)); + } + DEBUG(dbgs() << "After PHI Prune\n"); + DEBUG(PHIInfo.dump(MRI)); +} + +void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, + unsigned DestReg) { + MachineBasicBlock *Entry = CurrentRegion->getEntry(); + MachineBasicBlock *Exit = CurrentRegion->getExit(); + + DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() + << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n"); + + int NumSources = 0; + auto SE = PHIInfo.sources_end(DestReg); + + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + NumSources++; + } + + if (NumSources == 1) { + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = (*SRI).first; + replaceRegisterWith(DestReg, SourceReg); + } else { + const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); + MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestReg); + DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << " = PHI("); + + unsigned CurrentBackedgeReg = 0; + + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + unsigned SourceReg = (*SRI).first; + + if (CurrentRegion->contains((*SRI).second)) { + if (CurrentBackedgeReg == 0) { + CurrentBackedgeReg = SourceReg; + } else { + MachineInstr *PHIDefInstr = getDefInstr(SourceReg); + MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); + const TargetRegisterClass *RegClass = + MRI->getRegClass(CurrentBackedgeReg); + unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); + MachineInstrBuilder BackedgePHI = + BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, + TII->get(TargetOpcode::PHI), NewBackedgeReg); + BackedgePHI.addReg(CurrentBackedgeReg); + BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); + BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); + BackedgePHI.addMBB((*SRI).second); + CurrentBackedgeReg = NewBackedgeReg; + DEBUG(dbgs() << "Inserting backedge PHI: " + << PrintReg(NewBackedgeReg, TRI) << " = PHI(" + << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" + << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " + << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI) + << ", BB#" << (*SRI).second->getNumber()); + } + } else { + MIB.addReg(SourceReg); + MIB.addMBB((*SRI).second); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << (*SRI).second->getNumber() << ", "); + } + } + + // Add the final backedge register source to the entry phi + if (CurrentBackedgeReg != 0) { + MIB.addReg(CurrentBackedgeReg); + MIB.addMBB(Exit); + DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" + << Exit->getNumber() << ")\n"); + } else { + DEBUG(dbgs() << ")\n"); + } + } +} + +void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { + DEBUG(PHIInfo.dump(MRI)); + + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + + unsigned DestReg = *DRI; + createEntryPHI(CurrentRegion, DestReg); + } + PHIInfo.clear(); +} + +void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, + unsigned NewRegister) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + // We don't handle physical registers, but if we need to + // in the future This is how we do it: + // O.substPhysReg(NewRegister, *TRI); + } else { + DEBUG(dbgs() << "Replacing register: " + << PrintReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + O.setReg(NewRegister); + } + } + PHIInfo.deleteDef(Register); + + getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + + DEBUG(PHIInfo.dump(MRI)); +} + +void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { + DEBUG(dbgs() << "Resolve PHI Infos\n"); + DEBUG(PHIInfo.dump(MRI)); + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + unsigned DestReg = *DRI; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = (*SRI).first; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) + << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); + + assert(PHIInfo.sources_end(DestReg) == ++SRI && + "More than one phi source in entry node"); + replaceRegisterWith(DestReg, SourceReg); + } +} + +static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { + return ((&(*(MBB->getParent()->begin()))) == MBB); +} + +MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, + LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, + unsigned BBSelectRegOut) { + if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { + // Handle non-loop function entry block. + // We need to allow loops to the entry block and then + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + resolvePHIInfos(CodeBB); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } + if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { + // Handle non-loop region entry block. + MachineFunction *MF = MergeBB->getParent(); + auto MergeIter = MergeBB->getIterator(); + auto CodeBBStartIter = CodeBB->getIterator(); + auto CodeBBEndIter = ++(CodeBB->getIterator()); + if (CodeBBEndIter != MergeIter) { + MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); + } + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + prunePHIInfo(CodeBB); + createEntryPHIs(CurrentRegion); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } else { + // Handle internal block. + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); + unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); + rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); + bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, + BBSelectRegIn, IsRegionEntryBB); + CurrentRegion->addMBB(IfBB); + // If this is the entry block we need to make the If block the new + // linearized region entry. + if (IsRegionEntryBB) { + CurrentRegion->setEntry(IfBB); + + if (CurrentRegion->getHasLoop()) { + MachineBasicBlock *RegionExit = CurrentRegion->getExit(); + MachineBasicBlock *ETrueBB = nullptr; + MachineBasicBlock *EFalseBB = nullptr; + SmallVector ECond; + + const DebugLoc &DL = DebugLoc(); + TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); + TII->removeBranch(*RegionExit); + + // We need to create a backedge if there is a loop + unsigned Reg = TII->insertNE( + RegionExit, RegionExit->instr_end(), DL, + CurrentRegion->getRegionMRT()->getInnerOutputRegister(), + CurrentRegion->getRegionMRT()->getEntry()->getNumber()); + MachineOperand RegOp = + MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + DEBUG(dbgs() << "RegionExitReg: "); + DEBUG(Cond[0].print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, + Cond, DebugLoc()); + RegionExit->addSuccessor(CurrentRegion->getEntry()); + } + } + CurrentRegion->addMBB(CodeBB); + LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); + + InnerRegion.setParent(CurrentRegion); + DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); + insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + InnerRegion.addMBB(MergeBB); + + DEBUG(InnerRegion.print(dbgs(), TRI)); + rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); + extractKilledPHIs(CodeBB); + if (IsRegionEntryBB) { + createEntryPHIs(CurrentRegion); + } + return IfBB; + } +} + +MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, + unsigned BBSelectRegIn, unsigned BBSelectRegOut) { + unsigned CodeBBSelectReg = + InnerRegion->getRegionMRT()->getInnerOutputRegister(); + MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); + MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, + SelectBB, BBSelectRegIn, true); + CurrentRegion->addMBB(IfBB); + bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); + if (isEntry) { + + if (CurrentRegion->getHasLoop()) { + MachineBasicBlock *RegionExit = CurrentRegion->getExit(); + MachineBasicBlock *ETrueBB = nullptr; + MachineBasicBlock *EFalseBB = nullptr; + SmallVector ECond; + + const DebugLoc &DL = DebugLoc(); + TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); + TII->removeBranch(*RegionExit); + + // We need to create a backedge if there is a loop + unsigned Reg = + TII->insertNE(RegionExit, RegionExit->instr_end(), DL, + CurrentRegion->getRegionMRT()->getInnerOutputRegister(), + CurrentRegion->getRegionMRT()->getEntry()->getNumber()); + MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + DEBUG(dbgs() << "RegionExitReg: "); + DEBUG(Cond[0].print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, + Cond, DebugLoc()); + RegionExit->addSuccessor(IfBB); + } + } + CurrentRegion->addMBBs(InnerRegion); + DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); + insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + + rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, + CurrentRegion); + + rewriteRegionEntryPHIs(InnerRegion, IfBB); + + if (isEntry) { + CurrentRegion->setEntry(IfBB); + } + + if (isEntry) { + createEntryPHIs(CurrentRegion); + } + + return IfBB; +} + +void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, + MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); + + assert(PHIRegionIndices.size() == 1); + + unsigned RegionIndex = PHIRegionIndices[0]; + unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); + MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); + unsigned PHIDest = getPHIDestReg(PHI); + unsigned PHISource = PHIDest; + unsigned ReplaceReg; + + if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { + PHISource = ReplaceReg; + } + + const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); + unsigned NewDestReg = MRI->createVirtualRegister(RegClass); + LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); + MachineInstrBuilder MIB = + BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), + TII->get(TargetOpcode::PHI), NewDestReg); + DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) + << " = PHI("); + MIB.addReg(PHISource); + MIB.addMBB(Entry); + DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); + MIB.addReg(RegionSourceReg); + MIB.addMBB(RegionSourceMBB); + DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" + << RegionSourceMBB->getNumber() << ")\n"); +} + +void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIs; + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); + } +} + +// Split the exit block so that we can insert a end control flow +MachineBasicBlock * +AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { + auto MRTRegion = LRegion->getRegionMRT(); + auto Exit = LRegion->getExit(); + auto MF = Exit->getParent(); + auto Succ = MRTRegion->getSucc(); + + auto NewExit = MF->CreateMachineBasicBlock(); + auto AfterExitIter = Exit->getIterator(); + AfterExitIter++; + MF->insert(AfterExitIter, NewExit); + Exit->removeSuccessor(Succ); + Exit->addSuccessor(NewExit); + NewExit->addSuccessor(Succ); + insertUnconditionalBranch(NewExit, Succ); + LRegion->addMBB(NewExit); + LRegion->setExit(NewExit); + + DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); + + // Replace any PHI Predecessors in the successor with NewExit + for (auto &II : *Succ) { + MachineInstr &Instr = II; + + // If we are past the PHI instructions we are done + if (!Instr.isPHI()) + break; + + int numPreds = getPHINumInputs(Instr); + for (int i = 0; i < numPreds; ++i) { + auto Pred = getPHIPred(Instr, i); + if (Pred == Exit) { + setPhiPred(Instr, i, NewExit); + } + } + } + + return NewExit; +} + + +static MachineBasicBlock *split(MachineBasicBlock::iterator I) { + // Create the fall-through block. + MachineBasicBlock *MBB = (*I).getParent(); + MachineFunction *MF = MBB->getParent(); + MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); + auto MBBIter = ++(MBB->getIterator()); + MF->insert(MBBIter, SuccMBB); + SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); + MBB->addSuccessor(SuccMBB); + + // Splice the code over. + SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); + + return SuccMBB; +} + +// Split the entry block separating PHI-nodes and the rest of the code +// This is needed to insert an initializer for the bb select register +// inloop regions. + +MachineBasicBlock * +AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { + MachineBasicBlock *Entry = LRegion->getEntry(); + MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); + MachineBasicBlock *Exit = LRegion->getExit(); + + DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" + << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() + << "\n"); + LRegion->addMBB(EntrySucc); + + // Make the backedge go to Entry Succ + if (Exit->isSuccessor(Entry)) { + Exit->removeSuccessor(Entry); + } + Exit->addSuccessor(EntrySucc); + MachineInstr &Branch = *(Exit->instr_rbegin()); + for (auto &UI : Branch.uses()) { + if (UI.isMBB() && UI.getMBB() == Entry) { + UI.setMBB(EntrySucc); + } + } + + splitLoopPHIs(Entry, EntrySucc, LRegion); + + return EntrySucc; +} + +LinearizedRegion * +AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { + LinearizedRegion *LRegion = Region->getLinearizedRegion(); + LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); + LRegion->setEntry(Region->getEntry()); + return LRegion; +} + +static void removeOldExitPreds(RegionMRT *Region) { + MachineBasicBlock *Exit = Region->getSucc(); + if (Exit == nullptr) { + return; + } + for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), + E = Exit->pred_end(); + PI != E; ++PI) { + if (Region->contains(*PI)) { + (*PI)->removeSuccessor(Exit); + } + } +} + +static bool mbbHasBackEdge(MachineBasicBlock *MBB, + SmallPtrSet &MBBs) { + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { + if (MBBs.count(*SI) != 0) { + return true; + } + } + return false; +} + +static bool containsNewBackedge(MRT *Tree, + SmallPtrSet &MBBs) { + // Need to traverse this in reverse since it is in post order. + if (Tree == nullptr) + return false; + + if (Tree->isMBB()) { + MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); + MBBs.insert(MBB); + if (mbbHasBackEdge(MBB, MBBs)) { + return true; + } + } else { + RegionMRT *Region = Tree->getRegionMRT(); + SetVector *Children = Region->getChildren(); + for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { + if (containsNewBackedge(*CI, MBBs)) + return true; + } + } + return false; +} + +static bool containsNewBackedge(RegionMRT *Region) { + SmallPtrSet MBBs; + return containsNewBackedge(Region, MBBs); +} + +bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { + auto *LRegion = initLinearizedRegion(Region); + LRegion->setHasLoop(containsNewBackedge(Region)); + MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); + MachineBasicBlock *CurrentMerge = LastMerge; + LRegion->addMBB(LastMerge); + LRegion->setExit(LastMerge); + + rewriteRegionExitPHIs(Region, LastMerge, LRegion); + removeOldExitPreds(Region); + + DEBUG(PHIInfo.dump(MRI)); + + SetVector *Children = Region->getChildren(); + DEBUG(dbgs() << "===========If Region Start===============\n"); + if (LRegion->getHasLoop()) { + DEBUG(dbgs() << "Has Backedge: Yes\n"); + } else { + DEBUG(dbgs() << "Has Backedge: No\n"); + } + + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { + DEBUG(dbgs() << "CurrentRegion: \n"); + DEBUG(LRegion->print(dbgs(), TRI)); + + auto CNI = CI; + ++CNI; + + MRT *Child = (*CI); + + if (Child->isRegion()) { + + LinearizedRegion *InnerLRegion = + Child->getRegionMRT()->getLinearizedRegion(); + // We found the block is the exit of an inner region, we need + // to put it in the current linearized region. + + DEBUG(dbgs() << "Linearizing region: "); + DEBUG(InnerLRegion->print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + + MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); + if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { + // Entry has already been linearized, no need to do this region. + unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); + unsigned InnerSelectReg = + InnerLRegion->getRegionMRT()->getInnerOutputRegister(); + replaceRegisterWith(InnerSelectReg, OuterSelect), + resolvePHIInfos(InnerEntry); + if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) + InnerLRegion->getExit()->addSuccessor(CurrentMerge); + continue; + } + + BBSelectRegOut = Child->getBBSelectRegOut(); + BBSelectRegIn = Child->getBBSelectRegIn(); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) + << "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) + << "\n"); + + MachineBasicBlock *IfEnd = CurrentMerge; + CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, + Child->getRegionMRT()->getEntry(), + BBSelectRegIn, BBSelectRegOut); + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } else { + MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); + DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); + + if (MBB == getSingleExitNode(*(MBB->getParent()))) { + // If this is the exit block then we need to skip to the next. + // The "in" register will be transferred to "out" in the next + // iteration. + continue; + } + + BBSelectRegOut = Child->getBBSelectRegOut(); + BBSelectRegIn = Child->getBBSelectRegIn(); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) + << "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) + << "\n"); + + MachineBasicBlock *IfEnd = CurrentMerge; + // This is a basic block that is not part of an inner region, we + // need to put it in the current linearized region. + CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, + BBSelectRegOut); + if (CurrentMerge) { + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } + + DEBUG(PHIInfo.dump(MRI)); + } + } + + LRegion->removeFalseRegisterKills(MRI); + + if (LRegion->getHasLoop()) { + MachineBasicBlock *NewSucc = splitEntry(LRegion); + if (isFunctionEntryBlock(LRegion->getEntry())) { + resolvePHIInfos(LRegion->getEntry()); + } + const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); + unsigned InReg = LRegion->getBBSelectRegIn(); + unsigned InnerSelectReg = + MRI->createVirtualRegister(MRI->getRegClass(InReg)); + unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); + TII->materializeImmediate(*(LRegion->getEntry()), + LRegion->getEntry()->getFirstTerminator(), DL, + NewInReg, Region->getEntry()->getNumber()); + // Need to be careful about updating the registers inside the region. + LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); + DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); + insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, + InnerSelectReg, NewInReg, + LRegion->getRegionMRT()->getInnerOutputRegister()); + splitExit(LRegion); + TII->convertNonUniformLoopRegion(NewSucc, LastMerge); + } + + if (Region->isRoot()) { + TII->insertReturn(*LastMerge); + } + + DEBUG(Region->getEntry()->getParent()->dump()); + DEBUG(LRegion->print(dbgs(), TRI)); + DEBUG(PHIInfo.dump(MRI)); + + DEBUG(dbgs() << "===========If Region End===============\n"); + + Region->setLinearizedRegion(LRegion); + return true; +} + +bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { + if (false && regionIsSimpleIf(Region)) { + transformSimpleIfRegion(Region); + return true; + } else if (regionIsSequence(Region)) { + fixupRegionExits(Region); + return false; + } else { + structurizeComplexRegion(Region); + } + return false; +} + +static int structurize_once = 0; + +bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, + bool isTopRegion) { + bool Changed = false; + + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (CI->isRegion()) { + Changed |= structurizeRegions(CI->getRegionMRT(), false); + } + } + + if (structurize_once < 2 || true) { + Changed |= structurizeRegion(Region); + structurize_once++; + } + return Changed; +} + +void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { + DEBUG(dbgs() << "Fallthrough Map:\n"); + for (auto &MBBI : MF) { + MachineBasicBlock *MBB = MBBI.getFallThrough(); + if (MBB != nullptr) { + DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " + << MBB->getNumber() << "\n"); + } + FallthroughMap[&MBBI] = MBB; + } +} + +void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, + unsigned SelectOut) { + LinearizedRegion *LRegion = new LinearizedRegion(); + if (SelectOut) { + LRegion->addLiveOut(SelectOut); + DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI) + << "\n"); + } + LRegion->setRegionMRT(Region); + Region->setLinearizedRegion(LRegion); + LRegion->setParent(Region->getParent() + ? Region->getParent()->getLinearizedRegion() + : nullptr); +} + +unsigned +AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, + MachineRegisterInfo *MRI, + const SIInstrInfo *TII) { + if (MRT->isRegion()) { + RegionMRT *Region = MRT->getRegionMRT(); + Region->setBBSelectRegOut(SelectOut); + unsigned InnerSelectOut = createBBSelectReg(TII, MRI); + + // Fixme: Move linearization creation to the original spot + createLinearizedRegion(Region, SelectOut); + + for (auto CI = Region->getChildren()->begin(), + CE = Region->getChildren()->end(); + CI != CE; ++CI) { + InnerSelectOut = + initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); + } + MRT->setBBSelectRegIn(InnerSelectOut); + return InnerSelectOut; + } else { + MRT->setBBSelectRegOut(SelectOut); + unsigned NewSelectIn = createBBSelectReg(TII, MRI); + MRT->setBBSelectRegIn(NewSelectIn); + return NewSelectIn; + } +} + +static void checkRegOnlyPHIInputs(MachineFunction &MF) { + for (auto &MBBI : MF) { + for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), + E = MBBI.instr_end(); + I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + int numPreds = getPHINumInputs(Instr); + for (int i = 0; i < numPreds; ++i) { + assert(Instr.getOperand(i * 2 + 1).isReg() && + "PHI Operand not a register"); + } + } + } + } +} + + +INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", + "AMDGPU Machine CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) +INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", + "AMDGPU Machine CFG Structurizer", false, false) + +char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; + + +bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { + const SISubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &(MF.getRegInfo()); + initFallthroughMap(MF); + + checkRegOnlyPHIInputs(MF); + DEBUG(dbgs() << "----STRUCTURIZER START----\n"); + DEBUG(MF.dump()); + + Regions = &(getAnalysis().getRegionInfo()); + DEBUG(Regions->dump()); + + RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); + setRegionMRT(RTree); + initializeSelectRegisters(RTree, 0, MRI, TII); + DEBUG(RTree->dump(TRI)); + bool result = structurizeRegions(RTree, true); + delete RTree; + DEBUG(dbgs() << "----STRUCTURIZER END----\n"); + initFallthroughMap(MF); + return result; +} + +FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { + return new AMDGPUMachineCFGStructurizer(); +} diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index fe7283ccf7d91..9fb7f5f889271 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -12,21 +12,6 @@ using namespace llvm; -static bool isEntryFunctionCC(CallingConv::ID CC) { - switch (CC) { - case CallingConv::AMDGPU_KERNEL: - case CallingConv::SPIR_KERNEL: - case CallingConv::AMDGPU_VS: - case CallingConv::AMDGPU_HS: - case CallingConv::AMDGPU_GS: - case CallingConv::AMDGPU_PS: - case CallingConv::AMDGPU_CS: - return true; - default: - return false; - } -} - AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), LocalMemoryObjects(), @@ -34,7 +19,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MaxKernArgAlign(0), LDSSize(0), ABIArgOffset(0), - IsEntryFunction(isEntryFunctionCC(MF.getFunction()->getCallingConv())), + IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction()->getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, // except reserved size is not correctly aligned. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 8bfeb67ad4ecd..99bb61b21db06 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -10,8 +10,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp new file mode 100644 index 0000000000000..7263ba73d1550 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp @@ -0,0 +1,64 @@ +//===--- AMDGPUMacroFusion.cpp - AMDGPU Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the AMDGPU implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMacroFusion.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" + +#include "llvm/CodeGen/MacroFusion.h" + +using namespace llvm; + +namespace { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const SIInstrInfo &TII = static_cast(TII_); + + switch (SecondMI.getOpcode()) { + case AMDGPU::V_ADDC_U32_e64: + case AMDGPU::V_SUBB_U32_e64: + case AMDGPU::V_CNDMASK_B32_e64: { + // Try to cluster defs of condition registers to their uses. This improves + // the chance VCC will be available which will allow shrinking to VOP2 + // encodings. + if (!FirstMI) + return true; + + const MachineOperand *Src2 = TII.getNamedOperand(SecondMI, + AMDGPU::OpName::src2); + return FirstMI->definesRegister(Src2->getReg()); + } + default: + return false; + } + + return false; +} + +} // end namespace + + +namespace llvm { + +std::unique_ptr createAMDGPUMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMacroFusion.h new file mode 100644 index 0000000000000..844958580a65b --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUMacroFusion.h @@ -0,0 +1,19 @@ +//===- AMDGPUMacroFusion.h - AMDGPU Macro Fusion ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); +/// to AMDGPUPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr createAMDGPUMacroFusionDAGMutation(); + +} // llvm diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index e40f395577471..625c9b77e2dec 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -32,11 +33,11 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -96,18 +97,20 @@ class AMDGPUPromoteAlloca : public FunctionPass { Instruction *UseInst, int OpIdx0, int OpIdx1) const; + /// Check whether we have enough local memory for promotion. + bool hasSufficientLocalMem(const Function &F); + public: static char ID; - AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) : - FunctionPass(ID), TM(TM_) {} + AMDGPUPromoteAlloca() : FunctionPass(ID) {} bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "AMDGPU Promote Alloca"; } - void handleAlloca(AllocaInst &I); + bool handleAlloca(AllocaInst &I, bool SufficientLDS); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -119,132 +122,49 @@ class AMDGPUPromoteAlloca : public FunctionPass { char AMDGPUPromoteAlloca::ID = 0; -INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE, - "AMDGPU promote alloca to vector or LDS", false, false) +INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE, + "AMDGPU promote alloca to vector or LDS", false, false) char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID; bool AMDGPUPromoteAlloca::doInitialization(Module &M) { - if (!TM) - return false; - Mod = &M; DL = &Mod->getDataLayout(); - const Triple &TT = TM->getTargetTriple(); - - IsAMDGCN = TT.getArch() == Triple::amdgcn; - IsAMDHSA = TT.getOS() == Triple::AMDHSA; - return false; } bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { - if (!TM || skipFunction(F)) + if (skipFunction(F)) return false; - const AMDGPUSubtarget &ST = TM->getSubtarget(F); - if (!ST.isPromoteAllocaEnabled()) + if (auto *TPC = getAnalysisIfAvailable()) + TM = &TPC->getTM(); + else return false; - AS = AMDGPU::getAMDGPUAS(*F.getParent()); - - FunctionType *FTy = F.getFunctionType(); - - // If the function has any arguments in the local address space, then it's - // possible these arguments require the entire local memory space, so - // we cannot use local memory in the pass. - for (Type *ParamTy : FTy->params()) { - PointerType *PtrTy = dyn_cast(ParamTy); - if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { - LocalMemLimit = 0; - DEBUG(dbgs() << "Function has local memory argument. Promoting to " - "local memory disabled.\n"); - return false; - } - } - - LocalMemLimit = ST.getLocalMemorySize(); - if (LocalMemLimit == 0) - return false; - - const DataLayout &DL = Mod->getDataLayout(); - - // Check how much local memory is being used by global objects - CurrentLocalMemUsage = 0; - for (GlobalVariable &GV : Mod->globals()) { - if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) - continue; - - for (const User *U : GV.users()) { - const Instruction *Use = dyn_cast(U); - if (!Use) - continue; - - if (Use->getParent()->getParent() == &F) { - unsigned Align = GV.getAlignment(); - if (Align == 0) - Align = DL.getABITypeAlignment(GV.getValueType()); - - // FIXME: Try to account for padding here. The padding is currently - // determined from the inverse order of uses in the function. I'm not - // sure if the use list order is in any way connected to this, so the - // total reported size is likely incorrect. - uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); - CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align); - CurrentLocalMemUsage += AllocSize; - break; - } - } - } - - unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, - F); - - // Restrict local memory usage so that we don't drastically reduce occupancy, - // unless it is already significantly reduced. - - // TODO: Have some sort of hint or other heuristics to guess occupancy based - // on other factors.. - unsigned OccupancyHint = ST.getWavesPerEU(F).second; - if (OccupancyHint == 0) - OccupancyHint = 7; - - // Clamp to max value. - OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); - // Check the hint but ignore it if it's obviously wrong from the existing LDS - // usage. - MaxOccupancy = std::min(OccupancyHint, MaxOccupancy); - - - // Round up to the next tier of usage. - unsigned MaxSizeWithWaveCount - = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F); + const Triple &TT = TM->getTargetTriple(); + IsAMDGCN = TT.getArch() == Triple::amdgcn; + IsAMDHSA = TT.getOS() == Triple::AMDHSA; - // Program is possibly broken by using more local mem than available. - if (CurrentLocalMemUsage > MaxSizeWithWaveCount) + const AMDGPUSubtarget &ST = TM->getSubtarget(F); + if (!ST.isPromoteAllocaEnabled()) return false; - LocalMemLimit = MaxSizeWithWaveCount; - - DEBUG( - dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" - << " Rounding size to " << MaxSizeWithWaveCount - << " with a maximum occupancy of " << MaxOccupancy << '\n' - << " and " << (LocalMemLimit - CurrentLocalMemUsage) - << " available for promotion\n" - ); + AS = AMDGPU::getAMDGPUAS(*F.getParent()); + bool SufficientLDS = hasSufficientLocalMem(F); + bool Changed = false; BasicBlock &EntryBB = *F.begin(); for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) { AllocaInst *AI = dyn_cast(I); ++I; if (AI) - handleAlloca(*AI); + Changed |= handleAlloca(*AI, SufficientLDS); } - return true; + return Changed; } std::pair @@ -399,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: { LoadInst *LI = cast(Inst); - return !LI->isVolatile(); + // Currently only handle the case where the Pointer Operand is a GEP so check for that case. + return isa(LI->getPointerOperand()) && !LI->isVolatile(); } case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; case Instruction::Store: { - // Must be the stored pointer operand, not a stored value. + // Must be the stored pointer operand, not a stored value, plus + // since it should be canonical form, the User should be a GEP. StoreInst *SI = cast(Inst); - return (SI->getPointerOperand() == User) && !SI->isVolatile(); + return (SI->getPointerOperand() == User) && isa(User) && !SI->isVolatile(); } default: return false; @@ -421,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. + // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these + // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getElementType()->isVectorTy() || + AllocaTy->getElementType()->isArrayTy() || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2) { DEBUG(dbgs() << " Cannot convert type to vector\n"); @@ -470,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(0); + Value *Ptr = cast(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); @@ -483,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(1); + StoreInst *SI = cast(Inst); + Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, - Inst->getOperand(0), + SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); @@ -660,12 +586,105 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes( return true; } +bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { + + FunctionType *FTy = F.getFunctionType(); + const AMDGPUSubtarget &ST = TM->getSubtarget(F); + + // If the function has any arguments in the local address space, then it's + // possible these arguments require the entire local memory space, so + // we cannot use local memory in the pass. + for (Type *ParamTy : FTy->params()) { + PointerType *PtrTy = dyn_cast(ParamTy); + if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { + LocalMemLimit = 0; + DEBUG(dbgs() << "Function has local memory argument. Promoting to " + "local memory disabled.\n"); + return false; + } + } + + LocalMemLimit = ST.getLocalMemorySize(); + if (LocalMemLimit == 0) + return false; + + const DataLayout &DL = Mod->getDataLayout(); + + // Check how much local memory is being used by global objects + CurrentLocalMemUsage = 0; + for (GlobalVariable &GV : Mod->globals()) { + if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) + continue; + + for (const User *U : GV.users()) { + const Instruction *Use = dyn_cast(U); + if (!Use) + continue; + + if (Use->getParent()->getParent() == &F) { + unsigned Align = GV.getAlignment(); + if (Align == 0) + Align = DL.getABITypeAlignment(GV.getValueType()); + + // FIXME: Try to account for padding here. The padding is currently + // determined from the inverse order of uses in the function. I'm not + // sure if the use list order is in any way connected to this, so the + // total reported size is likely incorrect. + uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); + CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align); + CurrentLocalMemUsage += AllocSize; + break; + } + } + } + + unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, + F); + + // Restrict local memory usage so that we don't drastically reduce occupancy, + // unless it is already significantly reduced. + + // TODO: Have some sort of hint or other heuristics to guess occupancy based + // on other factors.. + unsigned OccupancyHint = ST.getWavesPerEU(F).second; + if (OccupancyHint == 0) + OccupancyHint = 7; + + // Clamp to max value. + OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); + + // Check the hint but ignore it if it's obviously wrong from the existing LDS + // usage. + MaxOccupancy = std::min(OccupancyHint, MaxOccupancy); + + + // Round up to the next tier of usage. + unsigned MaxSizeWithWaveCount + = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F); + + // Program is possibly broken by using more local mem than available. + if (CurrentLocalMemUsage > MaxSizeWithWaveCount) + return false; + + LocalMemLimit = MaxSizeWithWaveCount; + + DEBUG( + dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" + << " Rounding size to " << MaxSizeWithWaveCount + << " with a maximum occupancy of " << MaxOccupancy << '\n' + << " and " << (LocalMemLimit - CurrentLocalMemUsage) + << " available for promotion\n" + ); + + return true; +} + // FIXME: Should try to pick the most likely to be profitable allocas first. -void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { +bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) - return; + return false; IRBuilder<> Builder(&I); @@ -674,10 +693,8 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I, AS)) { - DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); - return; - } + if (tryPromoteAllocaToVector(&I, AS)) + return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); @@ -691,9 +708,13 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { break; default: DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); - return; + return false; } + // Not likely to have sufficient local memory for promotion. + if (!SufficientLDS) + return false; + const AMDGPUSubtarget &ST = TM->getSubtarget(ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; @@ -717,7 +738,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); - return; + return false; } CurrentLocalMemUsage = NewSize; @@ -726,7 +747,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); - return; + return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); @@ -872,8 +893,9 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } + return true; } -FunctionPass *llvm::createAMDGPUPromoteAlloca(const TargetMachine *TM) { - return new AMDGPUPromoteAlloca(TM); +FunctionPass *llvm::createAMDGPUPromoteAlloca() { + return new AMDGPUPromoteAlloca(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp new file mode 100644 index 0000000000000..36d88f52910d5 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -0,0 +1,353 @@ +//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===// + +#ifdef AMDGPU_REG_ASM_NAMES + +static const char *const VGPR32RegNames[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", + "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", + "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", + "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", + "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", + "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", + "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", + "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", + "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", + "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", + "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", + "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", + "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", + "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", + "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", + "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", + "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", + "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", + "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", + "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", + "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", + "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", + "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", + "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", + "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", + "v252", "v253", "v254", "v255" +}; + +static const char *const SGPR32RegNames[] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", + "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", + "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", + "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", + "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", + "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", + "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", + "s100", "s101", "s102", "s103" +}; + +static const char *const VGPR64RegNames[] = { + "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]", + "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]", + "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]", + "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]", + "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]", + "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]", + "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]", + "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]", + "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]", + "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]", + "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]", + "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]", + "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]", + "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]", + "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]", + "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]", + "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]", + "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]", + "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]", + "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]", + "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]", + "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]", + "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]", + "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]", + "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]", + "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]", + "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]", + "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]", + "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]", + "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]", + "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]", + "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]", + "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]", + "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]", + "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]", + "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]", + "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]", + "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]", + "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]", + "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]", + "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]", + "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]", + "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]", + "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]", + "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]", + "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]", + "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]", + "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]", + "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]", + "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]", + "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]" +}; + +static const char *const VGPR96RegNames[] = { + "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]", + "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]", + "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]", + "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]", + "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]", + "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]", + "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]", + "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]", + "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]", + "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]", + "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]", + "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]", + "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]", + "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]", + "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]", + "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]", + "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]", + "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]", + "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]", + "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]", + "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]", + "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]", + "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]", + "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]", + "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]", + "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]", + "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]", + "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]", + "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]", + "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]", + "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]", + "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]", + "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]", + "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]", + "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]", + "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]", + "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]", + "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]", + "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]", + "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]", + "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]", + "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]", + "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]", + "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]", + "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]", + "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]", + "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]", + "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]", + "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]", + "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]", + "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]" +}; + +static const char *const VGPR128RegNames[] = { + "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]", + "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]", + "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]", + "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]", + "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]", + "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]", + "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]", + "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]", + "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]", + "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]", + "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]", + "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]", + "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]", + "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]", + "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]", + "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]", + "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]", + "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]", + "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]", + "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]", + "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]", + "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]", + "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]", + "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]", + "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]", + "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]", + "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]", + "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]", + "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]", + "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]", + "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]", + "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]", + "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]", + "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]", + "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]", + "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]", + "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]", + "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]", + "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]", + "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]", + "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]", + "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]", + "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]", + "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]", + "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]", + "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]", + "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]", + "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]", + "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]", + "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]", + "v[250:253]", "v[251:254]", "v[252:255]" +}; + +static const char *const VGPR256RegNames[] = { + "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]", + "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]", + "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]", + "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]", + "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]", + "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]", + "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]", + "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]", + "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]", + "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]", + "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]", + "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]", + "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]", + "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]", + "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]", + "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]", + "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]", + "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]", + "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]", + "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]", + "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]", + "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]", + "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]", + "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]", + "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]", + "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]", + "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]", + "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]", + "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]", + "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]", + "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]", + "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]", + "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]", + "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]", + "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]", + "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]", + "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]", + "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]", + "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]", + "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]", + "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]", + "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]", + "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]", + "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]", + "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]", + "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]", + "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]", + "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]", + "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]", + "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]" +}; + +static const char *const VGPR512RegNames[] = { + "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]", + "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]", + "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]", + "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]", + "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]", + "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]", + "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]", + "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]", + "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]", + "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]", + "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]", + "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]", + "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]", + "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]", + "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]", + "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]", + "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]", + "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]", + "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]", + "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]", + "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]", + "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]", + "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]", + "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]", + "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]", + "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]", + "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]", + "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]", + "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]", + "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]", + "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]", + "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]", + "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]", + "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]", + "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]", + "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]", + "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]", + "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]", + "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]", + "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]", + "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]", + "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]", + "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]", + "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]", + "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]", + "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]", + "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]", + "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]", + "v[240:255]" +}; + +static const char *const SGPR64RegNames[] = { + "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]", + "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]", + "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]", + "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]", + "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]", + "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", + "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", + "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", + "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]" +}; + +static const char *const SGPR128RegNames[] = { + "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]", + "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]", + "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]", + "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]", + "s[96:99]", "s[100:103]" +}; + +static const char *const SGPR256RegNames[] = { + "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]", + "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]", + "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]", + "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]", + "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]" +}; + +static const char *const SGPR512RegNames[] = { + "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]", + "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]", + "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]", + "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" +}; + +#endif diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 7c198a1b8a3f5..201fdc1974c68 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -36,7 +36,6 @@ class AMDGPUGenRegisterBankInfo : public RegisterBankInfo { #define GET_TARGET_REGBANK_CLASS #include "AMDGPUGenRegisterBank.inc" - }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const SIRegisterInfo *TRI; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 941f2d8a468a8..ff58aa5741a1a 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -14,6 +14,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUTargetMachine.h" +#include "SIRegisterInfo.h" using namespace llvm; @@ -24,18 +25,6 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} // they are not supported at this time. //===----------------------------------------------------------------------===// -// Dummy to not crash RegisterClassInfo. -static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister; - -const MCPhysReg *AMDGPURegisterInfo::getCalleeSavedRegs( - const MachineFunction *) const { - return &CalleeSavedReg; -} - -unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return AMDGPU::NoRegister; -} - unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { static const unsigned SubRegs[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, @@ -50,3 +39,34 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { #define GET_REGINFO_TARGET_DESC #include "AMDGPUGenRegisterInfo.inc" + +// Forced to be here by one .inc +const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( + const MachineFunction *MF) const { + CallingConv::ID CC = MF->getFunction()->getCallingConv(); + switch (CC) { + case CallingConv::C: + case CallingConv::Fast: + return CSR_AMDGPU_HighRegs_SaveList; + default: { + // Dummy to not crash RegisterClassInfo. + static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister; + return &NoCalleeSavedReg; + } + } +} + +const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + switch (CC) { + case CallingConv::C: + case CallingConv::Fast: + return CSR_AMDGPU_HighRegs_RegMask; + default: + return nullptr; + } +} + +unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return AMDGPU::NoRegister; +} diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.h index 22b1663821d96..d8604d2590f1f 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -30,9 +30,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) unsigned getSubRegFromChannel(unsigned Channel) const; - - const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override; - unsigned getFrameRegister(const MachineFunction &MF) const override; }; } // End namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 6e301b4ad527a..7796176290108 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -13,6 +13,14 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "AMDGPUCallLowering.h" +#include "AMDGPUInstructionSelector.h" +#include "AMDGPULegalizerInfo.h" +#include "AMDGPURegisterBankInfo.h" +#endif #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -72,6 +80,31 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct SIGISelActualAccessor : public GISelAccessor { + std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + const AMDGPUCallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), @@ -91,6 +124,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FPExceptions(false), DX10Clamp(false), FlatForGlobal(false), + AutoWaitcntBeforeBarrier(false), UnalignedScratchAccess(false), UnalignedBufferAccess(false), @@ -123,6 +157,11 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasScalarStores(false), HasInv2PiInlineImm(false), HasSDWA(false), + HasSDWAOmod(false), + HasSDWAScalar(false), + HasSDWASdst(false), + HasSDWAMac(false), + HasSDWAOutModsVOPC(false), HasDPP(false), FlatAddressSpace(false), FlatInstOffsets(false), @@ -238,7 +277,7 @@ std::pair AMDGPUSubtarget::getWavesPerEU( // Make sure requested values are compatible with values implied by requested // minimum/maximum flat work group sizes. if (RequestedFlatWorkGroupSize && - Requested.first > MinImpliedByFlatWorkGroupSize) + Requested.first < MinImpliedByFlatWorkGroupSize) return Default; return Requested; @@ -259,18 +298,21 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::r600_read_tidig_x: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_x: Dim = 0; break; case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::r600_read_tidig_y: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_y: Dim = 1; break; case Intrinsic::amdgcn_workitem_id_z: case Intrinsic::r600_read_tidig_z: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_z: Dim = 2; break; @@ -311,11 +353,23 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, TLInfo(TM, *this) {} SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) : - AMDGPUSubtarget(TT, GPU, FS, TM), - InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) {} + const TargetMachine &TM) + : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + TLInfo(TM, *this) { +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); + GISel->CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new AMDGPULegalizerInfo()); + + GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + GISel->InstSelector.reset(new AMDGPUInstructionSelector( + *this, *static_cast(GISel->RegBankInfo.get()))); +#endif + setGISelAccessor(*GISel); +} void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.h index bed7d326b3dd5..d4b6a5fe8020b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -16,12 +16,12 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "R600ISelLowering.h" #include "R600FrameLowering.h" -#include "SIInstrInfo.h" -#include "SIISelLowering.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" #include "SIFrameLowering.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Triple.h" @@ -57,9 +57,12 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { enum { ISAVersion0_0_0, + ISAVersion6_0_0, + ISAVersion6_0_1, ISAVersion7_0_0, ISAVersion7_0_1, ISAVersion7_0_2, + ISAVersion7_0_3, ISAVersion8_0_0, ISAVersion8_0_1, ISAVersion8_0_2, @@ -67,7 +70,9 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { ISAVersion8_0_4, ISAVersion8_1_0, ISAVersion9_0_0, - ISAVersion9_0_1 + ISAVersion9_0_1, + ISAVersion9_0_2, + ISAVersion9_0_3 }; enum TrapHandlerAbi { @@ -110,6 +115,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { bool FPExceptions; bool DX10Clamp; bool FlatForGlobal; + bool AutoWaitcntBeforeBarrier; bool UnalignedScratchAccess; bool UnalignedBufferAccess; bool HasApertureRegs; @@ -143,6 +149,11 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { bool HasScalarStores; bool HasInv2PiInlineImm; bool HasSDWA; + bool HasSDWAOmod; + bool HasSDWAScalar; + bool HasSDWASdst; + bool HasSDWAMac; + bool HasSDWAOutModsVOPC; bool HasDPP; bool FlatAddressSpace; bool FlatInstOffsets; @@ -195,7 +206,8 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { } bool isOpenCLEnv() const { - return TargetTriple.getEnvironment() == Triple::OpenCL; + return TargetTriple.getEnvironment() == Triple::OpenCL || + TargetTriple.getEnvironmentName() == "amdgizcl"; } Generation getGeneration() const { @@ -289,6 +301,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return getGeneration() >= GFX9; } + bool hasMin3Max3_16() const { + return getGeneration() >= GFX9; + } + bool hasCARRY() const { return (getGeneration() >= EVERGREEN); } @@ -343,6 +359,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return FP64FP16Denormals; } + bool supportsMinMaxDenormModes() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + bool hasFPExceptions() const { return FPExceptions; } @@ -359,6 +379,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return FlatForGlobal; } + bool hasAutoWaitcntBeforeBarrier() const { + return AutoWaitcntBeforeBarrier; + } + bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; } @@ -412,6 +436,30 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; } + bool hasSDWA() const { + return HasSDWA; + } + + bool hasSDWAOmod() const { + return HasSDWAOmod; + } + + bool hasSDWAScalar() const { + return HasSDWAScalar; + } + + bool hasSDWASdst() const { + return HasSDWASdst; + } + + bool hasSDWAMac() const { + return HasSDWAMac; + } + + bool hasSDWAOutModsVOPC() const { + return HasSDWAOutModsVOPC; + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { @@ -666,10 +714,6 @@ class SISubtarget final : public AMDGPUSubtarget { return HasInv2PiInlineImm; } - bool hasSDWA() const { - return HasSDWA; - } - bool hasDPP() const { return HasDPP; } @@ -723,12 +767,6 @@ class SISubtarget final : public AMDGPUSubtarget { /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; - /// \returns True if waitcnt instruction is needed before barrier instruction, - /// false otherwise. - bool needWaitcntBeforeBarrier() const { - return getGeneration() < GFX9; - } - /// \returns true if the flat_scratch register should be initialized with the /// pointer to the wave's scratch memory rather than a size and offset. bool flatScratchIsPointer() const { @@ -783,7 +821,7 @@ class SISubtarget final : public AMDGPUSubtarget { /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());; + return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits()); } /// \returns VGPR encoding granularity supported by the subtarget. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index cd5bad04d0b3e..dc868f010d85c 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -19,35 +19,33 @@ #include "AMDGPUCallLowering.h" #include "AMDGPUInstructionSelector.h" #include "AMDGPULegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "AMDGPURegisterBankInfo.h" -#endif +#include "AMDGPUMacroFusion.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" #include "GCNIterativeScheduler.h" #include "GCNSchedStrategy.h" #include "R600MachineScheduler.h" #include "SIMachineScheduler.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/AlwaysInliner.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Vectorize.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Vectorize.h" #include using namespace llvm; @@ -85,7 +83,7 @@ static cl::opt EnableLoadStoreVectorizer( static cl::opt ScalarizeGlobal( "amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), - cl::init(false), + cl::init(true), cl::Hidden); // Option to run internalize pass. @@ -116,7 +114,14 @@ static cl::opt EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, static cl::opt EnableSIInsertWaitcntsPass( "enable-si-insert-waitcnts", cl::desc("Use new waitcnt insertion pass"), - cl::init(false)); + cl::init(true)); + +// Option to run late CFG structurizer +static cl::opt LateCFGStructurize( + "amdgpu-late-structurize", + cl::desc("Enable late CFG structurization"), + cl::init(false), + cl::Hidden); extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target @@ -132,6 +137,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSIShrinkInstructionsPass(*PR); initializeSIFixControlFlowLiveIntervalsPass(*PR); initializeSILoadStoreOptimizerPass(*PR); + initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPULowerIntrinsicsPass(*PR); @@ -168,6 +174,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { new GCNScheduleDAGMILive(C, make_unique(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); return DAG; } @@ -334,6 +341,14 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { PM.add(createAMDGPUExternalAAWrapperPass()); } }); + + Builder.addExtension( + PassManagerBuilder::EP_CGSCCOptimizerLate, + [](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + // Add infer address spaces pass to the opt pipeline after inlining + // but before SROA to increase SROA opportunities. + PM.add(createInferAddressSpacesPass()); + }); } //===----------------------------------------------------------------------===// @@ -373,31 +388,6 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl( // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct SIGISelActualAccessor : public GISelAccessor { - std::unique_ptr CallLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - const AMDGPUCallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, @@ -419,21 +409,6 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, GPU, FS, *this); - -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); - GISel->CallLoweringInfo.reset( - new AMDGPUCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new AMDGPULegalizerInfo()); - - GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo())); - GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I, - *static_cast(GISel->RegBankInfo.get()))); -#endif - - I->setGISelAccessor(*GISel); } I->setScalarizeGlobalBehavior(ScalarizeGlobal); @@ -449,7 +424,7 @@ namespace { class AMDGPUPassConfig : public TargetPassConfig { public: - AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) + AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { // Exceptions and StackMaps are not supported, so these passes will never do // anything. @@ -480,7 +455,7 @@ class AMDGPUPassConfig : public TargetPassConfig { class R600PassConfig final : public AMDGPUPassConfig { public: - R600PassConfig(TargetMachine *TM, PassManagerBase &PM) + R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) : AMDGPUPassConfig(TM, PM) {} ScheduleDAGInstrs *createMachineScheduler( @@ -496,7 +471,7 @@ class R600PassConfig final : public AMDGPUPassConfig { class GCNPassConfig final : public AMDGPUPassConfig { public: - GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) + GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) : AMDGPUPassConfig(TM, PM) {} GCNTargetMachine &getGCNTargetMachine() const { @@ -563,7 +538,7 @@ void AMDGPUPassConfig::addIRPasses() { disablePass(&FuncletLayoutID); disablePass(&PatchableFunctionID); - addPass(createAMDGPULowerIntrinsicsPass(&TM)); + addPass(createAMDGPULowerIntrinsicsPass()); // Function calls are not supported, so make sure we inline everything. addPass(createAMDGPUAlwaysInlinePass()); @@ -578,8 +553,7 @@ void AMDGPUPassConfig::addIRPasses() { if (TM.getTargetTriple().getArch() == Triple::amdgcn) { // TODO: May want to move later or split into an early and late one. - addPass(createAMDGPUCodeGenPreparePass( - static_cast(&TM))); + addPass(createAMDGPUCodeGenPreparePass()); } // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. @@ -587,7 +561,7 @@ void AMDGPUPassConfig::addIRPasses() { if (TM.getOptLevel() > CodeGenOpt::None) { addPass(createInferAddressSpacesPass()); - addPass(createAMDGPUPromoteAlloca(&TM)); + addPass(createAMDGPUPromoteAlloca()); if (EnableSROA) addPass(createSROAPass()); @@ -657,26 +631,26 @@ bool R600PassConfig::addPreISel() { } void R600PassConfig::addPreRegAlloc() { - addPass(createR600VectorRegMerger(*TM)); + addPass(createR600VectorRegMerger()); } void R600PassConfig::addPreSched2() { addPass(createR600EmitClauseMarkers(), false); if (EnableR600IfConvert) addPass(&IfConverterID, false); - addPass(createR600ClauseMergePass(*TM), false); + addPass(createR600ClauseMergePass(), false); } void R600PassConfig::addPreEmitPass() { addPass(createAMDGPUCFGStructurizerPass(), false); - addPass(createR600ExpandSpecialInstrsPass(*TM), false); + addPass(createR600ExpandSpecialInstrsPass(), false); addPass(&FinalizeMachineBundlesID, false); - addPass(createR600Packetizer(*TM), false); - addPass(createR600ControlFlowFinalizer(*TM), false); + addPass(createR600Packetizer(), false); + addPass(createR600ControlFlowFinalizer(), false); } TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { - return new R600PassConfig(this, PM); + return new R600PassConfig(*this, PM); } //===----------------------------------------------------------------------===// @@ -696,17 +670,19 @@ bool GCNPassConfig::addPreISel() { // FIXME: We need to run a pass to propagate the attributes when calls are // supported. - const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); - addPass(createAMDGPUAnnotateKernelFeaturesPass(&TM)); + addPass(createAMDGPUAnnotateKernelFeaturesPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. addPass(&AMDGPUUnifyDivergentExitNodesID); - addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + if (!LateCFGStructurize) { + addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + } addPass(createSinkingPass()); - addPass(createSITypeRewriter()); addPass(createAMDGPUAnnotateUniformValues()); - addPass(createSIAnnotateControlFlowPass()); + if (!LateCFGStructurize) { + addPass(createSIAnnotateControlFlowPass()); + } return false; } @@ -724,11 +700,14 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); addPass(&SILoadStoreOptimizerID); - addPass(createSIShrinkInstructionsPass()); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); + addPass(&MachineLICMID); + addPass(&MachineCSEID); + addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); } + addPass(createSIShrinkInstructionsPass()); } bool GCNPassConfig::addILPOpts() { @@ -770,6 +749,9 @@ bool GCNPassConfig::addGlobalInstructionSelect() { #endif void GCNPassConfig::addPreRegAlloc() { + if (LateCFGStructurize) { + addPass(createAMDGPUMachineCFGStructurizerPass()); + } addPass(createSIWholeQuadModePass()); } @@ -829,6 +811,6 @@ void GCNPassConfig::addPreEmitPass() { } TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { - return new GCNPassConfig(this, PM); + return new GCNPassConfig(*this, PM); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 934bf7f31bab4..a3c7c1982d0a6 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -69,7 +69,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { return -1; return 0; } - }; //===----------------------------------------------------------------------===// @@ -89,6 +88,10 @@ class R600TargetMachine final : public AMDGPUTargetMachine { TargetPassConfig *createPassConfig(PassManagerBase &PM) override; const R600Subtarget *getSubtargetImpl(const Function &) const override; + + bool isMachineVerifierClean() const override { + return false; + } }; //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index c96761c0b04ec..6c1885e67fcb7 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" -#include "Utils/AMDGPUBaseInfo.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index beafebc1284a6..89a03902dc69b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -63,7 +63,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, return false; } -void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, +void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Threshold = 300; // Twice the default. UP.MaxCount = UINT_MAX; @@ -184,9 +184,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, } } -unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { - if (Vec) - return 0; +unsigned AMDGPUTTIImpl::getHardwareNumberOfRegisters(bool Vec) const { + // The concept of vector registers doesn't really exist. Some packed vector + // operations operate on the normal 32-bit registers. // Number of VGPRs on SI. if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) @@ -195,8 +195,18 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } -unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) { - return Vector ? 0 : 32; +unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) const { + // This is really the number of registers to fill when vectorizing / + // interleaving loops, so we lie to avoid trying to use all registers. + return getHardwareNumberOfRegisters(Vec) >> 3; +} + +unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const { + return 32; +} + +unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const { + return 32; } unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { @@ -247,11 +257,11 @@ bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { // Disable unrolling if the loop is not vectorized. + // TODO: Enable this again. if (VF == 1) return 1; - // Semi-arbitrary large amount. - return 64; + return 8; } int AMDGPUTTIImpl::getArithmeticInstrCost( @@ -489,6 +499,19 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { return false; } +bool AMDGPUTTIImpl::isAlwaysUniform(const Value *V) const { + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_readfirstlane: + case Intrinsic::amdgcn_readlane: + return true; + } + } + return false; +} + unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { if (ST->hasVOP3PInsts()) { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index e0024e21e82b9..9a320bdfcc3d4 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -68,15 +68,18 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { bool hasBranchDivergence() { return true; } - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TTI::PSK_FastHardware; } - unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getHardwareNumberOfRegisters(bool Vector) const; + unsigned getNumberOfRegisters(bool Vector) const; + unsigned getRegisterBitWidth(bool Vector) const ; + unsigned getMinVectorRegisterBitWidth() const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, @@ -103,6 +106,7 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; + bool isAlwaysUniform(const Value *V) const; unsigned getFlatAddressSpace() const { // Don't bother running InferAddressSpaces pass on graphics shaders which diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 70c848f3c7bd7..b37c274102bc8 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -11,18 +11,19 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" -#include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -40,12 +41,11 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -152,6 +152,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { ImmTyExpTgt, ImmTyExpCompr, ImmTyExpVM, + ImmTyDFMT, + ImmTyNFMT, ImmTyHwreg, ImmTyOff, ImmTySendMsg, @@ -161,7 +163,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { ImmTyOpSel, ImmTyOpSelHi, ImmTyNegLo, - ImmTyNegHi + ImmTyNegHi, + ImmTySwizzle }; struct TokOp { @@ -259,6 +262,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); } + bool isSDWARegKind() const; + bool isImmTy(ImmTy ImmT) const { return isImm() && Imm.Type == ImmT; } @@ -284,10 +289,15 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } + + bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); } + bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } bool isTFE() const { return isImmTy(ImmTyTFE); } + bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); } + bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); } bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } @@ -474,6 +484,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isSWaitCnt() const; bool isHwreg() const; bool isSendMsg() const; + bool isSwizzle() const; bool isSMRDOffset8() const; bool isSMRDOffset20() const; bool isSMRDLiteralOffset() const; @@ -631,6 +642,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; case ImmTyTFE: OS << "TFE"; break; + case ImmTyDFMT: OS << "DFMT"; break; + case ImmTyNFMT: OS << "NFMT"; break; case ImmTyClampSI: OS << "ClampSI"; break; case ImmTyOModSI: OS << "OModSI"; break; case ImmTyDppCtrl: OS << "DppCtrl"; break; @@ -659,6 +672,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { case ImmTyOpSelHi: OS << "OpSelHi"; break; case ImmTyNegLo: OS << "NegLo"; break; case ImmTyNegHi: OS << "NegHi"; break; + case ImmTySwizzle: OS << "Swizzle"; break; } } @@ -811,14 +825,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool ParseSectionDirectiveHSAText(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); - bool ParseDirectiveAMDGPUHsaModuleGlobal(); - bool ParseDirectiveAMDGPUHsaProgramGlobal(); - bool ParseSectionDirectiveHSADataGlobalAgent(); - bool ParseSectionDirectiveHSADataGlobalProgram(); - bool ParseSectionDirectiveHSARodataReadonlyAgent(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -881,10 +889,18 @@ class AMDGPUAsmParser : public MCTargetAsmParser { return AMDGPU::isVI(getSTI()); } + bool isGFX9() const { + return AMDGPU::isGFX9(getSTI()); + } + bool hasInv2PiInlineImm() const { return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } + bool hasFlatOffsets() const { + return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; + } + bool hasSGPR102_SGPR103() const { return !isVI(); } @@ -985,11 +1001,18 @@ class AMDGPUAsmParser : public MCTargetAsmParser { void errorExpTgt(); OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); - bool validateOperandLimitations(const MCInst &Inst); + bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); + bool validateConstantBusLimitations(const MCInst &Inst); + bool validateEarlyClobberLimitations(const MCInst &Inst); bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; - bool isSGPR(unsigned Reg); + + bool trySkipId(const StringRef Id); + bool trySkipToken(const AsmToken::TokenKind Kind); + bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); + bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); + bool parseExpr(int64_t &Imm); public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); @@ -1000,9 +1023,24 @@ class AMDGPUAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseInterpAttr(OperandVector &Operands); OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); + bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg); + OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); + bool parseSwizzleOffset(int64_t &Imm); + bool parseSwizzleMacro(int64_t &Imm); + bool parseSwizzleQuadPerm(int64_t &Imm); + bool parseSwizzleBitmaskPerm(int64_t &Imm); + bool parseSwizzleBroadcast(int64_t &Imm); + bool parseSwizzleSwap(int64_t &Imm); + bool parseSwizzleReverse(int64_t &Imm); + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } + void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); + AMDGPUOperand::Ptr defaultGLC() const; AMDGPUOperand::Ptr defaultSLC() const; AMDGPUOperand::Ptr defaultTFE() const; @@ -1015,20 +1053,18 @@ class AMDGPUAsmParser : public MCTargetAsmParser { AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMRDOffset20() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; + AMDGPUOperand::Ptr defaultOffsetU12() const; + AMDGPUOperand::Ptr defaultOffsetS13() const; OperandMatchResultTy parseOModOperand(OperandVector &Operands); - void cvtId(MCInst &Inst, const OperandVector &Operands); - void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); - - void cvtVOP3Impl(MCInst &Inst, - const OperandVector &Operands, - OptionalImmIndexMap &OptionalIdx); + void cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); - void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); - void cvtMIMG(MCInst &Inst, const OperandVector &Operands); + void cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic = false); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); @@ -1042,9 +1078,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType); + uint64_t BasicInstType, bool skipVcc = false); }; struct OptionalOperand { @@ -1171,7 +1208,7 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { } bool AMDGPUOperand::isLiteralImm(MVT type) const { - // Check that this imediate can be added as literal + // Check that this immediate can be added as literal if (!isImmTy(ImmTyNone)) { return false; } @@ -1215,6 +1252,15 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const { return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); } +bool AMDGPUOperand::isSDWARegKind() const { + if (AsmParser->isVI()) + return isVReg(); + else if (AsmParser->isGFX9()) + return isRegKind(); + else + return false; +} + uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const { assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); @@ -1950,6 +1996,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } } + if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { + // FIXME: Produces error without correct column reported. + auto OpNum = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); + const auto &Op = Inst.getOperand(OpNum); + if (Op.getImm() != 0) + return Match_InvalidOperand; + } + return Match_Success; } @@ -1966,7 +2021,8 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { } if (isForcedSDWA()) { - static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA}; + static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, + AMDGPUAsmVariants::SDWA9}; return makeArrayRef(Variants); } @@ -1977,7 +2033,7 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP }; return makeArrayRef(Variants); @@ -2000,14 +2056,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { return AMDGPU::NoRegister; } -bool AMDGPUAsmParser::isSGPR(unsigned Reg) { - const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); - const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); - return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || - Reg == AMDGPU::SCC; -} - // NB: This code is correct only when used to check constant // bus limitations because GFX7 support no f16 inline constants. // Note that there are no cases when a GFX7 opcode violates @@ -2049,10 +2097,11 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { if (MO.isImm()) { return !isInlineConstant(Inst, OpIdx); } - return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg())); + return !MO.isReg() || + isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); } -bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { +bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { const unsigned Opcode = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opcode); unsigned ConstantBusUseCount = 0; @@ -2060,7 +2109,8 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { if (Desc.TSFlags & (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | - SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) { + SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | + SIInstrFlags::SDWA)) { // Check special imm operands (used by madmk, etc) if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { @@ -2105,6 +2155,60 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { return ConstantBusUseCount <= 1; } +bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { + + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + + const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); + if (DstIdx == -1 || + Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { + return true; + } + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + assert(DstIdx != -1); + const MCOperand &Dst = Inst.getOperand(DstIdx); + assert(Dst.isReg()); + const unsigned DstReg = mc2PseudoReg(Dst.getReg()); + + const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + for (int SrcIdx : SrcIndices) { + if (SrcIdx == -1) break; + const MCOperand &Src = Inst.getOperand(SrcIdx); + if (Src.isReg()) { + const unsigned SrcReg = mc2PseudoReg(Src.getReg()); + if (isRegIntersect(DstReg, SrcReg, TRI)) { + return false; + } + } + } + + return true; +} + +bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, + const SMLoc &IDLoc) { + if (!validateConstantBusLimitations(Inst)) { + Error(IDLoc, + "invalid operand (violates constant bus restrictions)"); + return false; + } + if (!validateEarlyClobberLimitations(Inst)) { + Error(IDLoc, + "destination must be different than all sources"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2137,9 +2241,8 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (Result) { default: break; case Match_Success: - if (!validateOperandLimitations(Inst)) { - return Error(IDLoc, - "invalid operand (violates constant bus restrictions)"); + if (!validateInstruction(Inst, IDLoc)) { + return true; } Inst.setLoc(IDLoc); Out.EmitInstruction(Inst, getSTI()); @@ -2344,12 +2447,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { return false; } -bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSATextSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { if (getLexer().isNot(AsmToken::Identifier)) return TokError("expected symbol name"); @@ -2363,46 +2460,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalAgentSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalProgramSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSARodataReadonlyAgentSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2418,27 +2475,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); - if (IDVal == ".hsatext") - return ParseSectionDirectiveHSAText(); - if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); - if (IDVal == ".amdgpu_hsa_module_global") - return ParseDirectiveAMDGPUHsaModuleGlobal(); - - if (IDVal == ".amdgpu_hsa_program_global") - return ParseDirectiveAMDGPUHsaProgramGlobal(); - - if (IDVal == ".hsadata_global_agent") - return ParseSectionDirectiveHSADataGlobalAgent(); - - if (IDVal == ".hsadata_global_program") - return ParseSectionDirectiveHSADataGlobalProgram(); - - if (IDVal == ".hsarodata_readonly_agent") - return ParseSectionDirectiveHSARodataReadonlyAgent(); - return true; } @@ -2586,11 +2625,21 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { return MatchOperand_ParseFail; Parser.Lex(); + + bool IsMinus = false; + if (getLexer().getKind() == AsmToken::Minus) { + Parser.Lex(); + IsMinus = true; + } + if (getLexer().isNot(AsmToken::Integer)) return MatchOperand_ParseFail; if (getParser().parseAbsoluteExpression(Int)) return MatchOperand_ParseFail; + + if (IsMinus) + Int = -Int; break; } } @@ -2786,7 +2835,13 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, OptionalIdx[Op.getImmTy()] = i; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); + AMDGPUOperand::ImmTy OffsetType = + (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || + Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : + AMDGPUOperand::ImmTyOffset; + + addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); + if (!IsGdsHardcoded) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); } @@ -2796,6 +2851,7 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; + unsigned OperandIdx[4]; unsigned EnMask = 0; int SrcIdx = 0; @@ -2804,15 +2860,18 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { // Add the register arguments if (Op.isReg()) { - EnMask |= (1 << SrcIdx); + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); Op.addRegOperands(Inst, 1); ++SrcIdx; continue; } if (Op.isOff()) { - ++SrcIdx; + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); + ++SrcIdx; continue; } @@ -2828,6 +2887,22 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { OptionalIdx[Op.getImmTy()] = i; } + assert(SrcIdx == 4); + + bool Compr = false; + if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { + Compr = true; + Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); + Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); + Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); + } + + for (auto i = 0; i < SrcIdx; ++i) { + if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { + EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); + } + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); @@ -2872,6 +2947,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().isNot(AsmToken::Integer)) return true; + SMLoc ValLoc = Parser.getTok().getLoc(); if (getParser().parseAbsoluteExpression(CntVal)) return true; @@ -2889,21 +2965,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); } - // To improve diagnostics, do not skip delimiters on errors - if (!Failed) { - if (getLexer().isNot(AsmToken::RParen)) { - return true; - } - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { - const AsmToken NextToken = getLexer().peekTok(); - if (NextToken.is(AsmToken::Identifier)) { - Parser.Lex(); - } + if (Failed) { + Error(ValLoc, "too large value for " + CntName); + return true; + } + + if (getLexer().isNot(AsmToken::RParen)) { + return true; + } + + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); } } - return Failed; + return false; } OperandMatchResultTy @@ -3364,6 +3443,298 @@ bool AMDGPUOperand::isSendMsg() const { return isImmTy(ImmTySendMsg); } +//===----------------------------------------------------------------------===// +// parser helpers +//===----------------------------------------------------------------------===// + +bool +AMDGPUAsmParser::trySkipId(const StringRef Id) { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == Id) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { + if (getLexer().getKind() == Kind) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, + const StringRef ErrMsg) { + if (!trySkipToken(Kind)) { + Error(Parser.getTok().getLoc(), ErrMsg); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseExpr(int64_t &Imm) { + return !getParser().parseAbsoluteExpression(Imm); +} + +bool +AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { + SMLoc S = Parser.getTok().getLoc(); + if (getLexer().getKind() == AsmToken::String) { + Val = Parser.getTok().getStringContents(); + Parser.Lex(); + return true; + } else { + Error(S, ErrMsg); + return false; + } +} + +//===----------------------------------------------------------------------===// +// swizzle +//===----------------------------------------------------------------------===// + +LLVM_READNONE +static unsigned +encodeBitmaskPerm(const unsigned AndMask, + const unsigned OrMask, + const unsigned XorMask) { + using namespace llvm::AMDGPU::Swizzle; + + return BITMASK_PERM_ENC | + (AndMask << BITMASK_AND_SHIFT) | + (OrMask << BITMASK_OR_SHIFT) | + (XorMask << BITMASK_XOR_SHIFT); +} + +bool +AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg) { + for (unsigned i = 0; i < OpNum; ++i) { + if (!skipToken(AsmToken::Comma, "expected a comma")){ + return false; + } + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (!parseExpr(Op[i])) { + return false; + } + if (Op[i] < MinVal || Op[i] > MaxVal) { + Error(ExprLoc, ErrMsg); + return false; + } + } + + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + int64_t Lane[LANE_NUM]; + if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, + "expected a 2-bit lane id")) { + Imm = QUAD_PERM_ENC; + for (auto i = 0; i < LANE_NUM; ++i) { + Imm |= Lane[i] << (LANE_SHIFT * i); + } + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + int64_t LaneIdx; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, + "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + if (parseSwizzleOperands(1, &LaneIdx, + 0, GroupSize - 1, + "lane id must be in the interval [0,group size - 1]")) { + Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 1, 16, "group size must be in the interval [1,16]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (!skipToken(AsmToken::Comma, "expected a comma")) { + return false; + } + + StringRef Ctl; + SMLoc StrLoc = Parser.getTok().getLoc(); + if (!parseString(Ctl)) { + return false; + } + if (Ctl.size() != BITMASK_WIDTH) { + Error(StrLoc, "expected a 5-character mask"); + return false; + } + + unsigned AndMask = 0; + unsigned OrMask = 0; + unsigned XorMask = 0; + + for (size_t i = 0; i < Ctl.size(); ++i) { + unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); + switch(Ctl[i]) { + default: + Error(StrLoc, "invalid mask"); + return false; + case '0': + break; + case '1': + OrMask |= Mask; + break; + case 'p': + AndMask |= Mask; + break; + case 'i': + AndMask |= Mask; + XorMask |= Mask; + break; + } + } + + Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { + + SMLoc OffsetLoc = Parser.getTok().getLoc(); + + if (!parseExpr(Imm)) { + return false; + } + if (!isUInt<16>(Imm)) { + Error(OffsetLoc, "expected a 16-bit offset"); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (skipToken(AsmToken::LParen, "expected a left parentheses")) { + + SMLoc ModeLoc = Parser.getTok().getLoc(); + bool Ok = false; + + if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { + Ok = parseSwizzleQuadPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { + Ok = parseSwizzleBitmaskPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { + Ok = parseSwizzleBroadcast(Imm); + } else if (trySkipId(IdSymbolic[ID_SWAP])) { + Ok = parseSwizzleSwap(Imm); + } else if (trySkipId(IdSymbolic[ID_REVERSE])) { + Ok = parseSwizzleReverse(Imm); + } else { + Error(ModeLoc, "expected a swizzle mode"); + } + + return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); + } + + return false; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Imm = 0; + + if (trySkipId("offset")) { + + bool Ok = false; + if (skipToken(AsmToken::Colon, "expected a colon")) { + if (trySkipId("swizzle")) { + Ok = parseSwizzleMacro(Imm); + } else { + Ok = parseSwizzleOffset(Imm); + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); + + return Ok? MatchOperand_Success : MatchOperand_ParseFail; + } else { + return MatchOperand_NoMatch; + } +} + +bool +AMDGPUOperand::isSwizzle() const { + return isImmTy(ImmTySwizzle); +} + //===----------------------------------------------------------------------===// // sopp branch targets //===----------------------------------------------------------------------===// @@ -3453,52 +3824,60 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } -//===----------------------------------------------------------------------===// -// mimg -//===----------------------------------------------------------------------===// - -void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - +void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); // Add the register arguments - if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); continue; - } else if (Op.isImmModifier()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - llvm_unreachable("unexpected operand type"); } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOffset); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } -void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { +//===----------------------------------------------------------------------===// +// mimg +//===----------------------------------------------------------------------===// + +void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - // Add src, same as dst - ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); + if (IsAtomic) { + // Add src, same as dst + ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); + } OptionalImmIndexMap OptionalIdx; @@ -3526,6 +3905,10 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); } +void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { + cvtMIMG(Inst, Operands, true); +} + AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask); } @@ -3576,6 +3959,14 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + //===----------------------------------------------------------------------===// // vop3 //===----------------------------------------------------------------------===// @@ -3625,6 +4016,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, + {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr}, + {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, @@ -3642,6 +4035,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, + {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, @@ -3694,25 +4088,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) return MatchOperand_NoMatch; } -void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - for (unsigned E = Operands.size(); I != E; ++I) - ((AMDGPUOperand &)*Operands[I]).addRegOrImmOperands(Inst, 1); -} - -void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands) { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::VOP3) { - cvtVOP3(Inst, Operands); - } else { - cvtId(Inst, Operands); - } -} - static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -3724,91 +4099,78 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; } -void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands, - OptionalImmIndexMap &OptionalIdx) { +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + unsigned Opc = Inst.getOpcode(); + unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { - Op.addRegOrImmWithFPInputModsOperands(Inst, 2); - } else if (Op.isImmModifier()) { - OptionalIdx[Op.getImmTy()] = I; - } else if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); - } else { - llvm_unreachable("unhandled operand type"); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { + // This instruction has src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else if (Op.isRegOrImm()) { + Op.addRegOrImmOperands(Inst, 1); + } else { + llvm_unreachable("unhandled operand type"); + } + } + } else { + // No src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } } } -} -void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { - OptionalImmIndexMap OptionalIdx; - - cvtVOP3Impl(Inst, Operands, OptionalIdx); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } // special case v_mac_{f16, f32}: // it has src2 register operand that is tied to dst operand // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0 - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si || - Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || - Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi) { + if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_F16_e64_vi) { auto it = Inst.begin(); - std::advance( - it, - AMDGPU::getNamedOperandIdx(Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ? - AMDGPU::V_MAC_F16_e64 : - AMDGPU::V_MAC_F32_e64, - AMDGPU::OpName::src2_modifiers)); + std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 ++it; Inst.insert(it, Inst.getOperand(0)); // src2 = dst } } -void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) { +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; - - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (Op.isMod()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - Op.addRegOrImmOperands(Inst, 1); - } - } - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + cvtVOP3(Inst, Operands, OptionalIdx); } void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptIdx; - cvtVOP3Impl(Inst, Operands, OptIdx); + cvtVOP3(Inst, Operands, OptIdx); // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 // instruction, and then figure out where to actually put the modifiers int Opc = Inst.getOpcode(); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI); - } - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1); @@ -3860,7 +4222,7 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - Inst.getOperand(ModIdx).setImm(ModVal); + Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); } } @@ -4130,14 +4492,19 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); } +void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); +} + void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, SIInstrFlags::VOPC); + cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType) { + uint64_t BasicInstType, bool skipVcc) { using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; + bool skippedVcc = false; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -4147,15 +4514,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - // Add the register arguments - if ((BasicInstType == SIInstrFlags::VOPC || - BasicInstType == SIInstrFlags::VOP2)&& - Op.isReg() && - Op.Reg.RegNo == AMDGPU::VCC) { - // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. - // Skip it. - continue; - } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { + // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. + // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) + // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. + // Skip VCC only if we didn't skip it on previous iteration. + if (BasicInstType == SIInstrFlags::VOP2 && + (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { + skippedVcc = true; + continue; + } else if (BasicInstType == SIInstrFlags::VOPC && + Inst.getNumOperands() == 0) { + skippedVcc = true; + continue; + } + } + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments @@ -4163,20 +4537,28 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } else { llvm_unreachable("Invalid operand type"); } + skippedVcc = false; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { - // V_NOP_sdwa_vi has no optional sdwa arguments + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && + Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); @@ -4184,6 +4566,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; @@ -4199,10 +4582,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { auto it = Inst.begin(); std::advance( - it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); + it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); Inst.insert(it, Inst.getOperand(0)); // src2 = dst } - } /// Force static initialization. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/BUFInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/BUFInstructions.td index 89eddb9ce961f..2e96c14eaa320 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/BUFInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/BUFInstructions.td @@ -11,8 +11,8 @@ def MUBUFAddr32 : ComplexPattern; def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; -def MUBUFScratchOffen : ComplexPattern; -def MUBUFScratchOffset : ComplexPattern; +def MUBUFScratchOffen : ComplexPattern; +def MUBUFScratchOffset : ComplexPattern; def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern; @@ -57,6 +57,11 @@ class MUBUFAddr64Table { string OpName = NAME # suffix; } +class MTBUFAddr64Table { + bit IsAddr64 = is_addr64; + string OpName = NAME # suffix; +} + //===----------------------------------------------------------------------===// // MTBUF classes //===----------------------------------------------------------------------===// @@ -78,14 +83,31 @@ class MTBUF_Pseudo offen = 0; + bits<1> idxen = 0; + bits<1> addr64 = 0; + bits<1> has_vdata = 1; + bits<1> has_vaddr = 1; + bits<1> has_glc = 1; + bits<1> glc_value = 0; // the value for glc if no such operand + bits<4> dfmt_value = 1; // the value for dfmt if no such operand + bits<3> nfmt_value = 0; // the value for nfmt if no such operand + bits<1> has_srsrc = 1; + bits<1> has_soffset = 1; + bits<1> has_offset = 1; + bits<1> has_slc = 1; + bits<1> has_tfe = 1; + bits<1> has_dfmt = 1; + bits<1> has_nfmt = 1; } class MTBUF_Real : - InstSI , - Enc64 { + InstSI { let isPseudo = 0; let isCodeGenOnly = 0; @@ -97,57 +119,168 @@ class MTBUF_Real : let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags; - bits<8> vdata; bits<12> offset; - bits<1> offen; - bits<1> idxen; - bits<1> glc; - bits<1> addr64; - bits<4> dfmt; - bits<3> nfmt; - bits<8> vaddr; - bits<7> srsrc; - bits<1> slc; - bits<1> tfe; - bits<8> soffset; - - let Inst{11-0} = offset; - let Inst{12} = offen; - let Inst{13} = idxen; - let Inst{14} = glc; - let Inst{22-19} = dfmt; - let Inst{25-23} = nfmt; - let Inst{31-26} = 0x3a; //encoding - let Inst{39-32} = vaddr; - let Inst{47-40} = vdata; - let Inst{52-48} = srsrc{6-2}; - let Inst{54} = slc; - let Inst{55} = tfe; - let Inst{63-56} = soffset; + bits<1> glc; + bits<4> dfmt; + bits<3> nfmt; + bits<8> vaddr; + bits<8> vdata; + bits<7> srsrc; + bits<1> slc; + bits<1> tfe; + bits<8> soffset; +} + +class getMTBUFInsDA vdataList, + list vaddrList=[]> { + RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); + RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); + dag InsNoData = !if(!empty(vaddrList), + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe), + (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe) + ); + dag InsData = !if(!empty(vaddrList), + (ins vdataClass:$vdata, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, + slc:$slc, tfe:$tfe), + (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, + slc:$slc, tfe:$tfe) + ); + dag ret = !if(!empty(vdataList), InsNoData, InsData); } -class MTBUF_Load_Pseudo : MTBUF_Pseudo < - opName, (outs regClass:$dst), - (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc, - i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), - " $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"# - " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> { +class getMTBUFIns vdataList=[]> { + dag ret = + !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA.ret, + (ins)))))); +} + +class getMTBUFAsmOps { + string Pfx = + !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset", + !if(!eq(addrKind, BUFAddrKind.OffEn), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen", + !if(!eq(addrKind, BUFAddrKind.IdxEn), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen", + !if(!eq(addrKind, BUFAddrKind.BothEn), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen", + !if(!eq(addrKind, BUFAddrKind.Addr64), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64", + ""))))); + string ret = Pfx # "$offset"; +} + +class MTBUF_SetupAddr { + bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1, + !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + + bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1, + !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + + bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0); + + bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1); +} + +class MTBUF_Load_Pseudo pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind> + : MTBUF_Pseudo.ret, + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe", + pattern>, + MTBUF_SetupAddr { + let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 1; let mayStore = 0; } -class MTBUF_Store_Pseudo : MTBUF_Pseudo < - opName, (outs), - (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, - i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, - SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), - " $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"# - " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> { +multiclass MTBUF_Pseudo_Loads { + + def _OFFSET : MTBUF_Load_Pseudo , + MTBUFAddr64Table<0>; + + def _ADDR64 : MTBUF_Load_Pseudo , + MTBUFAddr64Table<1>; + + def _OFFEN : MTBUF_Load_Pseudo ; + def _IDXEN : MTBUF_Load_Pseudo ; + def _BOTHEN : MTBUF_Load_Pseudo ; + + let DisableWQM = 1 in { + def _OFFSET_exact : MTBUF_Load_Pseudo ; + def _OFFEN_exact : MTBUF_Load_Pseudo ; + def _IDXEN_exact : MTBUF_Load_Pseudo ; + def _BOTHEN_exact : MTBUF_Load_Pseudo ; + } +} + +class MTBUF_Store_Pseudo pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind, + RegisterClass vdataClassCopy = vdataClass> + : MTBUF_Pseudo.ret, + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe", + pattern>, + MTBUF_SetupAddr { + let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 0; let mayStore = 1; } +multiclass MTBUF_Pseudo_Stores { + + def _OFFSET : MTBUF_Store_Pseudo , + MTBUFAddr64Table<0>; + + def _ADDR64 : MTBUF_Store_Pseudo , + MTBUFAddr64Table<1>; + + def _OFFEN : MTBUF_Store_Pseudo ; + def _IDXEN : MTBUF_Store_Pseudo ; + def _BOTHEN : MTBUF_Store_Pseudo ; + + let DisableWQM = 1 in { + def _OFFSET_exact : MTBUF_Store_Pseudo ; + def _OFFEN_exact : MTBUF_Store_Pseudo ; + def _IDXEN_exact : MTBUF_Store_Pseudo ; + def _BOTHEN_exact : MTBUF_Store_Pseudo ; + } +} + + //===----------------------------------------------------------------------===// // MUBUF classes //===----------------------------------------------------------------------===// @@ -676,14 +809,14 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", // MTBUF Instructions //===----------------------------------------------------------------------===// -//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0, "tbuffer_load_format_x", []>; -//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <1, "tbuffer_load_format_xy", []>; -//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <2, "tbuffer_load_format_xyz", []>; -def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Pseudo <"tbuffer_load_format_xyzw", VReg_128>; -def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>; -def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>; -def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>; -def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_128>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; } // End let SubtargetPredicate = isGCN @@ -1093,22 +1226,98 @@ defm : MUBUFScratchStorePat : Pat< - (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, - i32:$soffset, imm:$inst_offset, imm:$dfmt, - imm:$nfmt, imm:$offen, imm:$idxen, - imm:$glc, imm:$slc, imm:$tfe), - (opcode - $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), - (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, - (as_i1imm $slc), (as_i1imm $tfe), $soffset) ->; +//===----------------------------------------------------------------------===// +// tbuffer_load/store_format patterns +//===----------------------------------------------------------------------===// + +multiclass MTBUF_LoadIntrinsicPat { + def : Pat< + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; -def : MTBUF_StoreResource ; -def : MTBUF_StoreResource ; -def : MTBUF_StoreResource ; -def : MTBUF_StoreResource ; + def : Pat< + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast(opcode # _BOTHEN) + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; +} + +defm : MTBUF_LoadIntrinsicPat; +defm : MTBUF_LoadIntrinsicPat; +defm : MTBUF_LoadIntrinsicPat; +defm : MTBUF_LoadIntrinsicPat; +defm : MTBUF_LoadIntrinsicPat; +defm : MTBUF_LoadIntrinsicPat; + +multiclass MTBUF_StoreIntrinsicPat { + def : Pat< + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, + (as_i16imm $offset), (as_i8imm $dfmt), + (as_i8imm $nfmt), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, + (as_i16imm $offset), (as_i8imm $dfmt), + (as_i8imm $nfmt), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, + (as_i16imm $offset), (as_i8imm $dfmt), + (as_i8imm $nfmt), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, + imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast(opcode # _BOTHEN_exact) + $vdata, + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; +} + +defm : MTBUF_StoreIntrinsicPat; +defm : MTBUF_StoreIntrinsicPat; +defm : MTBUF_StoreIntrinsicPat; +defm : MTBUF_StoreIntrinsicPat; +defm : MTBUF_StoreIntrinsicPat; +defm : MTBUF_StoreIntrinsicPat; +defm : MTBUF_StoreIntrinsicPat; +defm : MTBUF_StoreIntrinsicPat; } // End let Predicates = [isGCN] @@ -1224,21 +1433,44 @@ def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>; class MTBUF_Real_si op, MTBUF_Pseudo ps> : MTBUF_Real, + Enc64, SIMCInstr { let AssemblerPredicate=isSICI; let DecoderNamespace="SICI"; - bits<1> addr64; - let Inst{15} = addr64; + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{15} = ps.addr64; let Inst{18-16} = op; + let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value); + let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value); + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -def TBUFFER_LOAD_FORMAT_XYZW_si : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>; -def TBUFFER_STORE_FORMAT_X_si : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>; -def TBUFFER_STORE_FORMAT_XY_si : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>; -def TBUFFER_STORE_FORMAT_XYZ_si : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>; -def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>; +multiclass MTBUF_Real_AllAddr_si op> { + def _OFFSET_si : MTBUF_Real_si (NAME#"_OFFSET")>; + def _ADDR64_si : MTBUF_Real_si (NAME#"_ADDR64")>; + def _OFFEN_si : MTBUF_Real_si (NAME#"_OFFEN")>; + def _IDXEN_si : MTBUF_Real_si (NAME#"_IDXEN")>; + def _BOTHEN_si : MTBUF_Real_si (NAME#"_BOTHEN")>; +} +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>; +//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>; //===----------------------------------------------------------------------===// // CI @@ -1350,16 +1582,39 @@ def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; class MTBUF_Real_vi op, MTBUF_Pseudo ps> : MTBUF_Real, + Enc64, SIMCInstr { let AssemblerPredicate=isVI; let DecoderNamespace="VI"; + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); let Inst{18-15} = op; + let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value); + let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value); + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -def TBUFFER_LOAD_FORMAT_XYZW_vi : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>; -def TBUFFER_STORE_FORMAT_X_vi : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>; -def TBUFFER_STORE_FORMAT_XY_vi : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>; -def TBUFFER_STORE_FORMAT_XYZ_vi : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>; -def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>; +multiclass MTBUF_Real_AllAddr_vi op> { + def _OFFSET_vi : MTBUF_Real_vi (NAME#"_OFFSET")>; + def _OFFEN_vi : MTBUF_Real_vi (NAME#"_OFFEN")>; + def _IDXEN_vi : MTBUF_Real_vi (NAME#"_IDXEN")>; + def _BOTHEN_vi : MTBUF_Real_vi (NAME#"_BOTHEN")>; +} +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>; +//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/CMakeLists.txt b/interpreter/llvm/src/lib/Target/AMDGPU/CMakeLists.txt index 7c0ef4aeac3c7..971208c5db847 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Target/AMDGPU/CMakeLists.txt @@ -47,7 +47,9 @@ add_llvm_target(AMDGPUCodeGen AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp AMDGPULowerIntrinsics.cpp + AMDGPUMacroFusion.cpp AMDGPUMCInstLower.cpp + AMDGPUMachineCFGStructurizer.cpp AMDGPUMachineFunction.cpp AMDGPUUnifyMetadata.cpp AMDGPUOpenCLImageTypeLoweringPass.cpp @@ -57,6 +59,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUISelLowering.cpp AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp + AMDGPURegAsmNames.inc.cpp AMDGPURegisterInfo.cpp AMDGPUUnifyDivergentExitNodes.cpp GCNHazardRecognizer.cpp @@ -94,7 +97,6 @@ add_llvm_target(AMDGPUCodeGen SIPeepholeSDWA.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp - SITypeRewriter.cpp SIWholeQuadMode.cpp GCNIterativeScheduler.cpp GCNMinRegStrategy.cpp diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/DSInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/DSInstructions.td index 357e18108e7e8..fc516c3b39c28 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/DSInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/DSInstructions.td @@ -145,10 +145,10 @@ class DS_1A2D_Off8_RET +class DS_1A_RET : DS_Pseudo { let has_data0 = 0; @@ -440,7 +440,7 @@ def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">; def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">; let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in { -def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">; +def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, SwizzleImm>; } let mayStore = 0 in { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4fb03b62bba9a..966c6fec20c63 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -20,21 +20,20 @@ #include "AMDGPUDisassembler.h" #include "AMDGPU.h" #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/TargetRegistry.h" - using namespace llvm; #define DEBUG_TYPE "amdgpu-disassembler" @@ -50,6 +49,17 @@ addOperand(MCInst &Inst, const MCOperand& Opnd) { MCDisassembler::SoftFail; } +static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, + uint16_t NameIdx) { + int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); + if (OpIdx != -1) { + auto I = MI.begin(); + std::advance(I, OpIdx); + MI.insert(I, Op); + } + return OpIdx; +} + static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder) { auto DAsm = static_cast(Decoder); @@ -62,32 +72,34 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, return addOperand(Inst, MCOperand::createImm(Imm)); } -#define DECODE_OPERAND2(RegClass, DecName) \ -static DecodeStatus Decode##RegClass##RegisterClass(MCInst &Inst, \ - unsigned Imm, \ - uint64_t /*Addr*/, \ - const void *Decoder) { \ +#define DECODE_OPERAND(StaticDecoderName, DecoderName) \ +static DecodeStatus StaticDecoderName(MCInst &Inst, \ + unsigned Imm, \ + uint64_t /*Addr*/, \ + const void *Decoder) { \ auto DAsm = static_cast(Decoder); \ - return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \ + return addOperand(Inst, DAsm->DecoderName(Imm)); \ } -#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass) +#define DECODE_OPERAND_REG(RegClass) \ +DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) -DECODE_OPERAND(VGPR_32) -DECODE_OPERAND(VS_32) -DECODE_OPERAND(VS_64) +DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VS_32) +DECODE_OPERAND_REG(VS_64) +DECODE_OPERAND_REG(VS_128) -DECODE_OPERAND(VReg_64) -DECODE_OPERAND(VReg_96) -DECODE_OPERAND(VReg_128) +DECODE_OPERAND_REG(VReg_64) +DECODE_OPERAND_REG(VReg_96) +DECODE_OPERAND_REG(VReg_128) -DECODE_OPERAND(SReg_32) -DECODE_OPERAND(SReg_32_XM0_XEXEC) -DECODE_OPERAND(SReg_64) -DECODE_OPERAND(SReg_64_XEXEC) -DECODE_OPERAND(SReg_128) -DECODE_OPERAND(SReg_256) -DECODE_OPERAND(SReg_512) +DECODE_OPERAND_REG(SReg_32) +DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) +DECODE_OPERAND_REG(SReg_64) +DECODE_OPERAND_REG(SReg_64_XEXEC) +DECODE_OPERAND_REG(SReg_128) +DECODE_OPERAND_REG(SReg_256) +DECODE_OPERAND_REG(SReg_512) static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, @@ -106,6 +118,13 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } +#define DECODE_SDWA(DecName) \ +DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) + +DECODE_SDWA(Src32) +DECODE_SDWA(Src16) +DECODE_SDWA(VopcDst) + #include "AMDGPUGenDisassemblerTables.inc" //===----------------------------------------------------------------------===// @@ -126,6 +145,7 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, assert(MI.getOpcode() == 0); assert(MI.getNumOperands() == 0); MCInst TmpInst; + HasLiteral = false; const auto SavedBytes = Bytes; if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { MI = TmpInst; @@ -141,6 +161,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, raw_ostream &WS, raw_ostream &CS) const { CommentStream = &CS; + bool IsSDWA = false; // ToDo: AMDGPUDisassembler supports only VI ISA. if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) @@ -162,7 +183,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Res) break; Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); - if (Res) break; + if (Res) { IsSDWA = true; break; } + + Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); + if (Res) { IsSDWA = true; break; } } // Reinitialize Bytes as DPP64 could have eaten too much @@ -189,17 +213,36 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi)) { // Insert dummy unused src2_modifiers. - int Src2ModIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::src2_modifiers); - auto I = MI.begin(); - std::advance(I, Src2ModIdx); - MI.insert(I, MCOperand::createImm(0)); + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src2_modifiers); } + if (Res && IsSDWA) + Res = convertSDWAInst(MI); + Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0; return Res; } +DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { + if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { + if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) + // VOPC - insert clamp + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); + } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); + if (SDst != -1) { + // VOPC - insert VCC register as sdst + insertNamedMCOperand(MI, MCOperand::createReg(AMDGPU::VCC), + AMDGPU::OpName::sdst); + } else { + // VOP1/2 - insert omod if present in instruction + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); + } + } + return MCDisassembler::Success; +} + const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { return getContext().getRegisterInfo()-> getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); @@ -276,6 +319,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { return decodeSrcOp(OPW64, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { + return decodeSrcOp(OPW128, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { return decodeSrcOp(OPW16, Val); } @@ -343,10 +390,15 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants - if (Bytes.size() < 4) - return errOperand(0, "cannot read literal, inst bytes left " + - Twine(Bytes.size())); - return MCOperand::createImm(eatBytes(Bytes)); + if (!HasLiteral) { + if (Bytes.size() < 4) { + return errOperand(0, "cannot read literal, inst bytes left " + + Twine(Bytes.size())); + } + HasLiteral = true; + Literal = eatBytes(Bytes); + } + return MCOperand::createImm(Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -508,8 +560,6 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c return createSRegOperand(getTtmpClassId(Width), Val - TTMP_MIN); } - assert(Width == OPW16 || Width == OPW32 || Width == OPW64); - if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) return decodeIntImmed(Val); @@ -576,6 +626,57 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { return errOperand(Val, "unknown operand encoding " + Twine(Val)); } +MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, + unsigned Val) const { + using namespace AMDGPU::SDWA; + + if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { + // XXX: static_cast is needed to avoid stupid warning: + // compare with unsigned is always true + if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast(Val) && + Val <= SDWA9EncValues::SRC_VGPR_MAX) { + return createRegOperand(getVgprClassId(Width), + Val - SDWA9EncValues::SRC_VGPR_MIN); + } + if (SDWA9EncValues::SRC_SGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_SGPR_MAX) { + return createSRegOperand(getSgprClassId(Width), + Val - SDWA9EncValues::SRC_SGPR_MIN); + } + + return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); + } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + return createRegOperand(getVgprClassId(Width), Val); + } + llvm_unreachable("unsupported target"); +} + +MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { + return decodeSDWASrc(OPW16, Val); +} + +MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { + return decodeSDWASrc(OPW32, Val); +} + + +MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { + using namespace AMDGPU::SDWA; + + assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && + "SDWAVopcDst should be present only on GFX9"); + if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { + Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + if (Val > AMDGPU::EncValues::SGPR_MAX) { + return decodeSpecialReg64(Val); + } else { + return createSRegOperand(getSgprClassId(OPW64), Val); + } + } else { + return createRegOperand(AMDGPU::VCC); + } +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index d50665187e10b..4c755be099995 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -20,8 +20,8 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -#include #include +#include #include namespace llvm { @@ -39,6 +39,8 @@ class Twine; class AMDGPUDisassembler : public MCDisassembler { private: mutable ArrayRef Bytes; + mutable uint32_t Literal; + mutable bool HasLiteral; public: AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : @@ -63,9 +65,12 @@ class AMDGPUDisassembler : public MCDisassembler { uint64_t Inst, uint64_t Address) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; + MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; MCOperand decodeOperand_VS_64(unsigned Val) const; + MCOperand decodeOperand_VS_128(unsigned Val) const; MCOperand decodeOperand_VSrc16(unsigned Val) const; MCOperand decodeOperand_VSrcV216(unsigned Val) const; @@ -102,6 +107,11 @@ class AMDGPUDisassembler : public MCDisassembler { MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; + + MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSDWASrc16(unsigned Val) const; + MCOperand decodeSDWASrc32(unsigned Val) const; + MCOperand decodeSDWAVopcDst(unsigned Val) const; }; //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/FLATInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/FLATInstructions.td index 8ba9efd42c703..edca6fcd812c8 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/FLATInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/FLATInstructions.td @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// -def FLATAtomic : ComplexPattern; +def FLATAtomic : ComplexPattern; +def FLATOffset : ComplexPattern; //===----------------------------------------------------------------------===// // FLAT classes @@ -30,8 +31,6 @@ class FLAT_Pseudo is_flat_global = 0; + bits<1> is_flat_scratch = 0; + bits<1> has_vdst = 1; bits<1> has_data = 1; bits<1> has_glc = 1; bits<1> glcValue = 0; + + // TODO: M0 if it could possibly access LDS (before gfx9? only)? + let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); } class FLAT_Real op, FLAT_Pseudo ps> : @@ -55,6 +60,8 @@ class FLAT_Real op, FLAT_Pseudo ps> : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AsmMatchConverter = ps.AsmMatchConverter; + let TSFlags = ps.TSFlags; + let UseNamedOperandTable = ps.UseNamedOperandTable; // encoding fields bits<8> vaddr; @@ -63,10 +70,26 @@ class FLAT_Real op, FLAT_Pseudo ps> : bits<1> slc; bits<1> glc; + // Only valid on gfx9 + bits<1> lds = 0; // XXX - What does this actually do? + + // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + bits<2> seg = !if(ps.is_flat_global, 0b10, + !if(ps.is_flat_scratch, 0b01, 0)); + + // Signed offset. Highest bit ignored for flat and treated as 12-bit + // unsigned for flat acceses. + bits<13> offset; + bits<1> nv = 0; // XXX - What does this actually do? + // We don't use tfe right now, and it was removed in gfx9. bits<1> tfe = 0; - // 15-0 is reserved. + // Only valid on GFX9+ + let Inst{12-0} = offset; + let Inst{13} = lds; + let Inst{15-14} = seg; + let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); let Inst{17} = slc; let Inst{24-18} = op; @@ -74,41 +97,70 @@ class FLAT_Real op, FLAT_Pseudo ps> : let Inst{39-32} = vaddr; let Inst{47-40} = !if(ps.has_data, vdata, ?); // 54-48 is reserved. - let Inst{55} = tfe; + let Inst{55} = nv; // nv on GFX9+, TFE before. let Inst{63-56} = !if(ps.has_vdst, vdst, ?); } -class FLAT_Load_Pseudo : FLAT_Pseudo< +class FLAT_Load_Pseudo : FLAT_Pseudo< opName, (outs regClass:$vdst), - (ins VReg_64:$vaddr, GLC:$glc, slc:$slc), - " $vdst, $vaddr$glc$slc"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vdst, $vaddr$offset$glc$slc"> { let has_data = 0; let mayLoad = 1; } -class FLAT_Store_Pseudo : FLAT_Pseudo< +class FLAT_Global_Load_Pseudo : + FLAT_Load_Pseudo { + let is_flat_global = 1; +} + +class FLAT_Scratch_Load_Pseudo : + FLAT_Load_Pseudo { + let is_flat_scratch = 1; +} + +class FLAT_Store_Pseudo : FLAT_Pseudo< opName, (outs), - (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc), - " $vaddr, $vdata$glc$slc"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vaddr, $vdata$offset$glc$slc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; } +class FLAT_Global_Store_Pseudo : + FLAT_Store_Pseudo { + let is_flat_global = 1; +} + +class FLAT_Scratch_Store_Pseudo : + FLAT_Store_Pseudo { + let is_flat_scratch = 1; +} + multiclass FLAT_Atomic_Pseudo< string opName, RegisterClass vdst_rc, ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit HasSignedOffset = 0> { def "" : FLAT_Pseudo , AtomicNoRet { let mayLoad = 1; @@ -121,10 +173,12 @@ multiclass FLAT_Atomic_Pseudo< def _RTN : FLAT_Pseudo , + (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, AtomicNoRet { let mayLoad = 1; let mayStore = 1; @@ -279,6 +333,26 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", } // End SubtargetPredicate = isCI +let SubtargetPredicate = HasFlatGlobalInsts in { +def GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; +def GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; +def GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; +def GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; +def GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; +def GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; +def GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; +def GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; + +def GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; +def GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; +def GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; +def GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; +def GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; +def GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; + +} // End SubtargetPredicate = HasFlatGlobalInsts + + //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// @@ -312,31 +386,31 @@ def flat_truncstorei16 : flat_st ; // Patterns for global loads with no offset. class FlatLoadPat : Pat < - (vt (node i64:$addr)), - (inst $addr, 0, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (inst $vaddr, $offset, 0, $slc) >; class FlatLoadAtomicPat : Pat < - (vt (node i64:$addr)), - (inst $addr, 1, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (inst $vaddr, $offset, 1, $slc) >; class FlatStorePat : Pat < - (node vt:$data, i64:$addr), - (inst $addr, $data, 0, 0) + (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)), + (inst $vaddr, $data, $offset, 0, $slc) >; class FlatStoreAtomicPat : Pat < // atomic store follows atomic binop convention so the address comes // first. - (node i64:$addr, vt:$data), - (inst $addr, $data, 1, 0) + (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), + (inst $vaddr, $data, $offset, 1, $slc) >; class FlatAtomicPat : Pat < - (vt (node i64:$addr, data_vt:$data)), - (inst $addr, $data, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), + (inst $vaddr, $data, $offset, $slc) >; let Predicates = [isCIVI] in { @@ -530,3 +604,18 @@ defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; +def GLOBAL_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, GLOBAL_LOAD_UBYTE>; +def GLOBAL_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, GLOBAL_LOAD_SBYTE>; +def GLOBAL_LOAD_USHORT_vi : FLAT_Real_vi <0x12, GLOBAL_LOAD_USHORT>; +def GLOBAL_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, GLOBAL_LOAD_SSHORT>; +def GLOBAL_LOAD_DWORD_vi : FLAT_Real_vi <0x14, GLOBAL_LOAD_DWORD>; +def GLOBAL_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, GLOBAL_LOAD_DWORDX2>; +def GLOBAL_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, GLOBAL_LOAD_DWORDX4>; +def GLOBAL_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, GLOBAL_LOAD_DWORDX3>; + +def GLOBAL_STORE_BYTE_vi : FLAT_Real_vi <0x18, GLOBAL_STORE_BYTE>; +def GLOBAL_STORE_SHORT_vi : FLAT_Real_vi <0x1a, GLOBAL_STORE_SHORT>; +def GLOBAL_STORE_DWORD_vi : FLAT_Real_vi <0x1c, GLOBAL_STORE_DWORD>; +def GLOBAL_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, GLOBAL_STORE_DWORDX2>; +def GLOBAL_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, GLOBAL_STORE_DWORDX4>; +def GLOBAL_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, GLOBAL_STORE_DWORDX3>; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 80fc4ac9d2a3e..cd9e7fb04f16b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 3bb5c9bc22b7d..2e7641cda3755 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -17,7 +17,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace llvm { std::vector makeMinRegSchedule(ArrayRef TopRoots, @@ -191,6 +191,7 @@ class GCNIterativeScheduler::OverrideLegacyStrategy { } }; +namespace { // just a stub to make base class happy class SchedStrategyStub : public MachineSchedStrategy { public: @@ -202,6 +203,7 @@ class SchedStrategyStub : public MachineSchedStrategy { void releaseTopNode(SUnit *SU) override {} void releaseBottomNode(SUnit *SU) override {} }; +} // namespace GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, StrategyKind S) diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/GCNMinRegStrategy.cpp index c6d0f21799508..0657f67b217de 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/GCNMinRegStrategy.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/GCNMinRegStrategy.cpp @@ -15,8 +15,9 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" +namespace { class GCNMinRegScheduler { struct Candidate : ilist_node { const SUnit *SU; @@ -71,6 +72,7 @@ class GCNMinRegScheduler { std::vector schedule(ArrayRef TopRoots, const ScheduleDAG &DAG); }; +} // namespace void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) { NumPreds.resize(SUnits.size()); diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.cpp index 20f54cf4a53b8..1d02c7fdffbf5 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #include "GCNRegPressure.h" +#include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD @@ -27,7 +28,7 @@ void llvm::printLivesAt(SlotIndex SI, unsigned Num = 0; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { const unsigned Reg = TargetRegisterInfo::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) + if (!LIS.hasInterval(Reg)) continue; const auto &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { @@ -63,15 +64,6 @@ static bool isEqual(const GCNRPTracker::LiveRegSet &S1, return true; } -static GCNRPTracker::LiveRegSet -stripEmpty(const GCNRPTracker::LiveRegSet &LR) { - GCNRPTracker::LiveRegSet Res; - for (const auto &P : LR) { - if (P.second.any()) - Res.insert(P); - } - return Res; -} #endif /////////////////////////////////////////////////////////////////////////////// @@ -185,6 +177,64 @@ void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const { } #endif + +static LaneBitmask getDefRegMask(const MachineOperand &MO, + const MachineRegisterInfo &MRI) { + assert(MO.isDef() && MO.isReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())); + + // We don't rely on read-undef flag because in case of tentative schedule + // tracking it isn't set correctly yet. This works correctly however since + // use mask has been tracked before using LIS. + return MO.getSubReg() == 0 ? + MRI.getMaxLaneMaskForVReg(MO.getReg()) : + MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg()); +} + +static LaneBitmask getUsedRegMask(const MachineOperand &MO, + const MachineRegisterInfo &MRI, + const LiveIntervals &LIS) { + assert(MO.isUse() && MO.isReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())); + + if (auto SubReg = MO.getSubReg()) + return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); + + auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg()); + if (MaxMask.getAsInteger() == 1) // cannot have subregs + return MaxMask; + + // For a tentative schedule LIS isn't updated yet but livemask should remain + // the same on any schedule. Subreg defs can be reordered but they all must + // dominate uses anyway. + auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex(); + return getLiveLaneMask(MO.getReg(), SI, LIS, MRI); +} + +static SmallVector +collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI) { + SmallVector Res; + for (const auto &MO : MI.operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + if (!MO.isUse() || !MO.readsReg()) + continue; + + auto const UsedMask = getUsedRegMask(MO, MRI, LIS); + + auto Reg = MO.getReg(); + auto I = std::find_if(Res.begin(), Res.end(), [Reg](const RegisterMaskPair &RM) { + return RM.RegUnit == Reg; + }); + if (I != Res.end()) + I->LaneMask |= UsedMask; + else + Res.push_back(RegisterMaskPair(Reg, UsedMask)); + } + return Res; +} + /////////////////////////////////////////////////////////////////////////////// // GCNRPTracker @@ -192,7 +242,6 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI) { - assert(!MRI.reg_nodbg_empty(Reg)); LaneBitmask LiveMask; const auto &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { @@ -214,7 +263,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { auto Reg = TargetRegisterInfo::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) + if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); if (LiveMask.any()) @@ -223,42 +272,18 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, return LiveRegs; } -void GCNUpwardRPTracker::reset(const MachineInstr &MI) { +void GCNUpwardRPTracker::reset(const MachineInstr &MI, + const LiveRegSet *LiveRegsCopy) { MRI = &MI.getParent()->getParent()->getRegInfo(); - LiveRegs = getLiveRegsAfter(MI, LIS); + if (LiveRegsCopy) { + if (&LiveRegs != LiveRegsCopy) + LiveRegs = *LiveRegsCopy; + } else { + LiveRegs = getLiveRegsAfter(MI, LIS); + } MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); } -LaneBitmask GCNUpwardRPTracker::getDefRegMask(const MachineOperand &MO) const { - assert(MO.isDef() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); - - // We don't rely on read-undef flag because in case of tentative schedule - // tracking it isn't set correctly yet. This works correctly however since - // use mask has been tracked before using LIS. - return MO.getSubReg() == 0 ? - MRI->getMaxLaneMaskForVReg(MO.getReg()) : - MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg()); -} - -LaneBitmask GCNUpwardRPTracker::getUsedRegMask(const MachineOperand &MO) const { - assert(MO.isUse() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); - - if (auto SubReg = MO.getSubReg()) - return MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); - - auto MaxMask = MRI->getMaxLaneMaskForVReg(MO.getReg()); - if (MaxMask.getAsInteger() == 1) // cannot have subregs - return MaxMask; - - // For a tentative schedule LIS isn't updated yet but livemask should remain - // the same on any schedule. Subreg defs can be reordered but they all must - // dominate uses anyway. - auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex(); - return getLiveLaneMask(MO.getReg(), SI, LIS, *MRI); -} - void GCNUpwardRPTracker::recede(const MachineInstr &MI) { assert(MRI && "call reset first"); @@ -267,36 +292,136 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { if (MI.isDebugValue()) return; - // process all defs first to ensure early clobbers are handled correctly - // iterating over operands() to catch implicit defs - for (const auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + auto const RegUses = collectVirtualRegUses(MI, LIS, *MRI); + + // calc pressure at the MI (defs + uses) + auto AtMIPressure = CurPressure; + for (const auto &U : RegUses) { + auto LiveMask = LiveRegs[U.RegUnit]; + AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI); + } + // update max pressure + MaxPressure = max(AtMIPressure, MaxPressure); + + for (const auto &MO : MI.defs()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()) || + MO.isDead()) continue; auto Reg = MO.getReg(); - auto &LiveMask = LiveRegs[Reg]; + auto I = LiveRegs.find(Reg); + if (I == LiveRegs.end()) + continue; + auto &LiveMask = I->second; auto PrevMask = LiveMask; - LiveMask &= ~getDefRegMask(MO); + LiveMask &= ~getDefRegMask(MO, *MRI); CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); + if (LiveMask.none()) + LiveRegs.erase(I); + } + for (const auto &U : RegUses) { + auto &LiveMask = LiveRegs[U.RegUnit]; + auto PrevMask = LiveMask; + LiveMask |= U.LaneMask; + CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI); + } + assert(CurPressure == getRegPressure(*MRI, LiveRegs)); +} + +bool GCNDownwardRPTracker::reset(const MachineInstr &MI, + const LiveRegSet *LiveRegsCopy) { + MRI = &MI.getParent()->getParent()->getRegInfo(); + LastTrackedMI = nullptr; + MBBEnd = MI.getParent()->end(); + NextMI = &MI; + NextMI = skipDebugInstructionsForward(NextMI, MBBEnd); + if (NextMI == MBBEnd) + return false; + if (LiveRegsCopy) { + if (&LiveRegs != LiveRegsCopy) + LiveRegs = *LiveRegsCopy; + } else { + LiveRegs = getLiveRegsBefore(*NextMI, LIS); } + MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); + return true; +} - // then all uses - for (const auto &MO : MI.uses()) { - if (!MO.isReg() || !MO.readsReg() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; +bool GCNDownwardRPTracker::advanceBeforeNext() { + assert(MRI && "call reset first"); - auto Reg = MO.getReg(); + NextMI = skipDebugInstructionsForward(NextMI, MBBEnd); + if (NextMI == MBBEnd) + return false; + + SlotIndex SI = LIS.getInstructionIndex(*NextMI).getBaseIndex(); + assert(SI.isValid()); + + // Remove dead registers or mask bits. + for (auto &It : LiveRegs) { + const LiveInterval &LI = LIS.getInterval(It.first); + if (LI.hasSubRanges()) { + for (const auto &S : LI.subranges()) { + if (!S.liveAt(SI)) { + auto PrevMask = It.second; + It.second &= ~S.LaneMask; + CurPressure.inc(It.first, PrevMask, It.second, *MRI); + } + } + } else if (!LI.liveAt(SI)) { + auto PrevMask = It.second; + It.second = LaneBitmask::getNone(); + CurPressure.inc(It.first, PrevMask, It.second, *MRI); + } + if (It.second.none()) + LiveRegs.erase(It.first); + } + + MaxPressure = max(MaxPressure, CurPressure); + + return true; +} + +void GCNDownwardRPTracker::advanceToNext() { + LastTrackedMI = &*NextMI++; + + // Add new registers or mask bits. + for (const auto &MO : LastTrackedMI->defs()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; auto &LiveMask = LiveRegs[Reg]; auto PrevMask = LiveMask; - LiveMask |= getUsedRegMask(MO); + LiveMask |= getDefRegMask(MO, *MRI); CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); } MaxPressure = max(MaxPressure, CurPressure); } +bool GCNDownwardRPTracker::advance() { + // If we have just called reset live set is actual. + if ((NextMI == MBBEnd) || (LastTrackedMI && !advanceBeforeNext())) + return false; + advanceToNext(); + return true; +} + +bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator End) { + while (NextMI != End) + if (!advance()) return false; + return true; +} + +bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin, + MachineBasicBlock::const_iterator End, + const LiveRegSet *LiveRegsCopy) { + reset(*Begin, LiveRegsCopy); + return advance(End); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, @@ -331,7 +456,7 @@ static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, bool GCNUpwardRPTracker::isValid() const { const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex(); const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI); - const auto TrackedLR = stripEmpty(LiveRegs); + const auto &TrackedLR = LiveRegs; if (!isEqual(LISLR, TrackedLR)) { dbgs() << "\nGCNUpwardRPTracker error: Tracked and" @@ -352,4 +477,16 @@ bool GCNUpwardRPTracker::isValid() const { return true; } +void GCNRPTracker::printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs, + const MachineRegisterInfo &MRI) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + auto It = LiveRegs.find(Reg); + if (It != LiveRegs.end() && It->second.any()) + OS << ' ' << PrintVRegOrUnit(Reg, TRI) << ':' + << PrintLaneMask(It->second); + } + OS << '\n'; +} #endif diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.h b/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.h index c33363170c6cf..5dfe44053e728 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/GCNRegPressure.h @@ -92,16 +92,19 @@ class GCNRPTracker { typedef DenseMap LiveRegSet; protected: + const LiveIntervals &LIS; LiveRegSet LiveRegs; GCNRegPressure CurPressure, MaxPressure; const MachineInstr *LastTrackedMI = nullptr; mutable const MachineRegisterInfo *MRI = nullptr; - GCNRPTracker() {} + GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} public: // live regs for the current state const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; } const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; } + void clearMaxPressure() { MaxPressure.clear(); } + // returns MaxPressure, resetting it decltype(MaxPressure) moveMaxPressure() { auto Res = MaxPressure; @@ -111,17 +114,16 @@ class GCNRPTracker { decltype(LiveRegs) moveLiveRegs() { return std::move(LiveRegs); } + static void printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs, + const MachineRegisterInfo &MRI); }; class GCNUpwardRPTracker : public GCNRPTracker { - const LiveIntervals &LIS; - LaneBitmask getDefRegMask(const MachineOperand &MO) const; - LaneBitmask getUsedRegMask(const MachineOperand &MO) const; public: - GCNUpwardRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} + GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} // reset tracker to the point just below MI // filling live regs upon this point using LIS - void reset(const MachineInstr &MI); + void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); // move to the state just above the MI void recede(const MachineInstr &MI); @@ -131,6 +133,41 @@ class GCNUpwardRPTracker : public GCNRPTracker { bool isValid() const; }; +class GCNDownwardRPTracker : public GCNRPTracker { + // Last position of reset or advanceBeforeNext + MachineBasicBlock::const_iterator NextMI; + + MachineBasicBlock::const_iterator MBBEnd; + +public: + GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} + + const MachineBasicBlock::const_iterator getNext() const { return NextMI; } + + // Reset tracker to the point before the MI + // filling live regs upon this point using LIS. + // Returns false if block is empty except debug values. + bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); + + // Move to the state right before the next MI. Returns false if reached + // end of the block. + bool advanceBeforeNext(); + + // Move to the state at the MI, advanceBeforeNext has to be called first. + void advanceToNext(); + + // Move to the state at the next MI. Returns false if reached end of block. + bool advance(); + + // Advance instructions until before End. + bool advance(MachineBasicBlock::const_iterator End); + + // Reset to Begin and advance to End. + bool advance(MachineBasicBlock::const_iterator Begin, + MachineBasicBlock::const_iterator End, + const LiveRegSet *LiveRegsCopy = nullptr); +}; + LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 9f07d28c708bc..155b400ba022b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/MathExtras.h" -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" using namespace llvm; @@ -316,46 +316,57 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, MFI(*MF.getInfo()), StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(), *MF.getFunction())), - MinOccupancy(StartingOccupancy), Stage(0) { + MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); } void GCNScheduleDAGMILive::schedule() { + if (Stage == 0) { + // Just record regions at the first pass. + Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); + return; + } + std::vector Unsched; Unsched.reserve(NumRegionInstrs); for (auto &I : *this) Unsched.push_back(&I); - std::pair PressureBefore; + GCNRegPressure PressureBefore; if (LIS) { - DEBUG(dbgs() << "Pressure before scheduling:\n"); - discoverLiveIns(); - PressureBefore = getRealRegPressure(); + PressureBefore = Pressure[RegionIdx]; + + DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; + GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); + dbgs() << "Region live-in pressure: "; + llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); + dbgs() << "Region register pressure: "; + PressureBefore.print(dbgs())); } ScheduleDAGMILive::schedule(); - if (Stage == 0) - Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); + Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); if (!LIS) return; // Check the results of scheduling. GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; - DEBUG(dbgs() << "Pressure after scheduling:\n"); auto PressureAfter = getRealRegPressure(); - LiveIns.clear(); - if (PressureAfter.first <= S.SGPRCriticalLimit && - PressureAfter.second <= S.VGPRCriticalLimit) { + DEBUG(dbgs() << "Pressure after scheduling: "; PressureAfter.print(dbgs())); + + if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && + PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { + Pressure[RegionIdx] = PressureAfter; DEBUG(dbgs() << "Pressure in desired limits, done.\n"); return; } - unsigned WavesAfter = getMaxWaves(PressureAfter.first, - PressureAfter.second, MF); - unsigned WavesBefore = getMaxWaves(PressureBefore.first, - PressureBefore.second, MF); + unsigned WavesAfter = getMaxWaves(PressureAfter.getSGPRNum(), + PressureAfter.getVGPRNum(), MF); + unsigned WavesBefore = getMaxWaves(PressureBefore.getSGPRNum(), + PressureBefore.getVGPRNum(), MF); DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore << ", after " << WavesAfter << ".\n"); @@ -368,8 +379,10 @@ void GCNScheduleDAGMILive::schedule() { << MinOccupancy << ".\n"); } - if (WavesAfter >= WavesBefore) + if (WavesAfter >= WavesBefore) { + Pressure[RegionIdx] = PressureAfter; return; + } DEBUG(dbgs() << "Attempting to revert scheduling.\n"); RegionEnd = RegionBegin; @@ -398,167 +411,139 @@ void GCNScheduleDAGMILive::schedule() { DEBUG(dbgs() << "Scheduling " << *MI); } RegionBegin = Unsched.front()->getIterator(); - if (Stage == 0) - Regions.back() = std::make_pair(RegionBegin, RegionEnd); + Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); placeDebugValues(); } -static inline void setMask(const MachineRegisterInfo &MRI, - const SIRegisterInfo *SRI, unsigned Reg, - LaneBitmask &PrevMask, LaneBitmask NewMask, - unsigned &SGPRs, unsigned &VGPRs) { - int NewRegs = countPopulation(NewMask.getAsInteger()) - - countPopulation(PrevMask.getAsInteger()); - if (SRI->isSGPRReg(MRI, Reg)) - SGPRs += NewRegs; - if (SRI->isVGPR(MRI, Reg)) - VGPRs += NewRegs; - assert ((int)SGPRs >= 0 && (int)VGPRs >= 0); - PrevMask = NewMask; +GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { + GCNDownwardRPTracker RPTracker(*LIS); + RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); + return RPTracker.moveMaxPressure(); } -void GCNScheduleDAGMILive::discoverLiveIns() { - unsigned SGPRs = 0; - unsigned VGPRs = 0; +void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { + GCNDownwardRPTracker RPTracker(*LIS); + + // If the block has the only successor then live-ins of that successor are + // live-outs of the current block. We can reuse calculated live set if the + // successor will be sent to scheduling past current block. + const MachineBasicBlock *OnlySucc = nullptr; + if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { + SlotIndexes *Ind = LIS->getSlotIndexes(); + if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) + OnlySucc = *MBB->succ_begin(); + } - auto I = begin(); - I = skipDebugInstructionsForward(I, I->getParent()->end()); - const SIRegisterInfo *SRI = static_cast(TRI); - SlotIndex SI = LIS->getInstructionIndex(*I).getBaseIndex(); - assert (SI.isValid()); - - DEBUG(dbgs() << "Region live-ins:"); - for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) - continue; - const LiveInterval &LI = LIS->getInterval(Reg); - LaneBitmask LaneMask = LaneBitmask::getNone(); - if (LI.hasSubRanges()) { - for (const auto &S : LI.subranges()) - if (S.liveAt(SI)) - LaneMask |= S.LaneMask; - } else if (LI.liveAt(SI)) { - LaneMask = MRI.getMaxLaneMaskForVReg(Reg); - } + // Scheduler sends regions from the end of the block upwards. + size_t CurRegion = RegionIdx; + for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) + if (Regions[CurRegion].first->getParent() != MBB) + break; + --CurRegion; + + auto I = MBB->begin(); + auto LiveInIt = MBBLiveIns.find(MBB); + if (LiveInIt != MBBLiveIns.end()) { + auto LiveIn = std::move(LiveInIt->second); + RPTracker.reset(*MBB->begin(), &LiveIn); + MBBLiveIns.erase(LiveInIt); + } else { + I = Regions[CurRegion].first; + RPTracker.reset(*I); + } - if (LaneMask.any()) { - setMask(MRI, SRI, Reg, LiveIns[Reg], LaneMask, SGPRs, VGPRs); + for ( ; ; ) { + I = RPTracker.getNext(); - DEBUG(dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':' - << PrintLaneMask(LiveIns[Reg])); + if (Regions[CurRegion].first == I) { + LiveIns[CurRegion] = RPTracker.getLiveRegs(); + RPTracker.clearMaxPressure(); } - } - LiveInPressure = std::make_pair(SGPRs, VGPRs); + if (Regions[CurRegion].second == I) { + Pressure[CurRegion] = RPTracker.moveMaxPressure(); + if (CurRegion-- == RegionIdx) + break; + } + RPTracker.advanceToNext(); + RPTracker.advanceBeforeNext(); + } - DEBUG(dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs - << "\nVGPR = " << VGPRs << '\n'); + if (OnlySucc) { + if (I != MBB->end()) { + RPTracker.advanceToNext(); + RPTracker.advance(MBB->end()); + } + RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); + RPTracker.advanceBeforeNext(); + MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); + } } -std::pair -GCNScheduleDAGMILive::getRealRegPressure() const { - unsigned SGPRs, MaxSGPRs, VGPRs, MaxVGPRs; - SGPRs = MaxSGPRs = LiveInPressure.first; - VGPRs = MaxVGPRs = LiveInPressure.second; - - const SIRegisterInfo *SRI = static_cast(TRI); - DenseMap LiveRegs(LiveIns); +void GCNScheduleDAGMILive::finalizeSchedule() { + GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; + DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); - for (const MachineInstr &MI : *this) { - if (MI.isDebugValue()) - continue; - SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex(); - assert (SI.isValid()); + LiveIns.resize(Regions.size()); + Pressure.resize(Regions.size()); - // Remove dead registers or mask bits. - for (auto &It : LiveRegs) { - if (It.second.none()) - continue; - const LiveInterval &LI = LIS->getInterval(It.first); - if (LI.hasSubRanges()) { - for (const auto &S : LI.subranges()) - if (!S.liveAt(SI)) - setMask(MRI, SRI, It.first, It.second, It.second & ~S.LaneMask, - SGPRs, VGPRs); - } else if (!LI.liveAt(SI)) { - setMask(MRI, SRI, It.first, It.second, LaneBitmask::getNone(), - SGPRs, VGPRs); - } - } + do { + Stage++; + RegionIdx = 0; + MachineBasicBlock *MBB = nullptr; - // Add new registers or mask bits. - for (const auto &MO : MI.defs()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - unsigned SubRegIdx = MO.getSubReg(); - LaneBitmask LaneMask = SubRegIdx != 0 - ? TRI->getSubRegIndexLaneMask(SubRegIdx) - : MRI.getMaxLaneMaskForVReg(Reg); - LaneBitmask &LM = LiveRegs[Reg]; - setMask(MRI, SRI, Reg, LM, LM | LaneMask, SGPRs, VGPRs); - } - MaxSGPRs = std::max(MaxSGPRs, SGPRs); - MaxVGPRs = std::max(MaxVGPRs, VGPRs); - } + if (Stage > 1) { + // Retry function scheduling if we found resulting occupancy and it is + // lower than used for first pass scheduling. This will give more freedom + // to schedule low register pressure blocks. + // Code is partially copied from MachineSchedulerBase::scheduleRegions(). - DEBUG(dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs - << "\nVGPR = " << MaxVGPRs << '\n'); + if (!LIS || StartingOccupancy <= MinOccupancy) + break; - return std::make_pair(MaxSGPRs, MaxVGPRs); -} + DEBUG(dbgs() + << "Retrying function scheduling with lowest recorded occupancy " + << MinOccupancy << ".\n"); -void GCNScheduleDAGMILive::finalizeSchedule() { - // Retry function scheduling if we found resulting occupancy and it is - // lower than used for first pass scheduling. This will give more freedom - // to schedule low register pressure blocks. - // Code is partially copied from MachineSchedulerBase::scheduleRegions(). + S.setTargetOccupancy(MinOccupancy); + } - if (!LIS || StartingOccupancy <= MinOccupancy) - return; + for (auto Region : Regions) { + RegionBegin = Region.first; + RegionEnd = Region.second; - DEBUG(dbgs() << "Retrying function scheduling with lowest recorded occupancy " - << MinOccupancy << ".\n"); + if (RegionBegin->getParent() != MBB) { + if (MBB) finishBlock(); + MBB = RegionBegin->getParent(); + startBlock(MBB); + if (Stage == 1) + computeBlockPressure(MBB); + } - Stage++; - GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; - S.setTargetOccupancy(MinOccupancy); + unsigned NumRegionInstrs = std::distance(begin(), end()); + enterRegion(MBB, begin(), end(), NumRegionInstrs); - MachineBasicBlock *MBB = nullptr; - for (auto Region : Regions) { - RegionBegin = Region.first; - RegionEnd = Region.second; + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (begin() == end() || begin() == std::prev(end())) { + exitRegion(); + continue; + } - if (RegionBegin->getParent() != MBB) { - if (MBB) finishBlock(); - MBB = RegionBegin->getParent(); - startBlock(MBB); - } + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << MF.getName() + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *begin() << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); - unsigned NumRegionInstrs = std::distance(begin(), end()); - enterRegion(MBB, begin(), end(), NumRegionInstrs); + schedule(); - // Skip empty scheduling regions (0 or 1 schedulable instructions). - if (begin() == end() || begin() == std::prev(end())) { exitRegion(); - continue; + ++RegionIdx; } - DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF.getName() - << ":BB#" << MBB->getNumber() << " " << MBB->getName() - << "\n From: " << *begin() << " To: "; - if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; - else dbgs() << "End"; - dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); + finishBlock(); - schedule(); - - exitRegion(); - } - finishBlock(); - LiveIns.shrink_and_clear(); + } while (Stage < 2); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.h b/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.h index 15af232704ffa..060d2ca72d93d 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H +#include "GCNRegPressure.h" #include "llvm/CodeGen/MachineScheduler.h" namespace llvm { @@ -65,7 +66,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { const SIMachineFunctionInfo &MFI; - // Occupancy target at the begining of function scheduling cycle. + // Occupancy target at the beginning of function scheduling cycle. unsigned StartingOccupancy; // Minimal real occupancy recorder for the function. @@ -74,21 +75,28 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { // Scheduling stage number. unsigned Stage; + // Current region index. + size_t RegionIdx; + // Vecor of regions recorder for later rescheduling SmallVector, 32> Regions; - // Region live-ins. - DenseMap LiveIns; + // Region live-in cache. + SmallVector LiveIns; + + // Region pressure cache. + SmallVector Pressure; + + // Temporary basic block live-in cache. + DenseMap MBBLiveIns; - // Number of live-ins to the current region, first SGPR then VGPR. - std::pair LiveInPressure; + // Return current region pressure. + GCNRegPressure getRealRegPressure() const; - // Collect current region live-ins. - void discoverLiveIns(); + // Compute and cache live-ins and pressure for all regions in block. + void computeBlockPressure(const MachineBasicBlock *MBB); - // Return current region pressure. First value is SGPR number, second is VGPR. - std::pair getRealRegPressure() const; public: GCNScheduleDAGMILive(MachineSchedContext *C, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index a817ff3cbaf09..a844081db5b2d 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUInstPrinter.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCExpr.h" @@ -72,6 +72,11 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff); } +void AMDGPUInstPrinter::printS16ImmDecOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatDec(static_cast(MI->getOperand(OpNo).getImm())); +} + void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -118,6 +123,16 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint16_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm != 0) { + O << ((OpNo == 0)? "offset:" : " offset:"); + printS16ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -216,6 +231,24 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo, O << " vm"; } +void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) { + O << " dfmt:"; + printU8ImmDecOperand(MI, OpNo, O); + } +} + +void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) { + O << " nfmt:"; + printU8ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI) { switch (RegNo) { @@ -264,6 +297,11 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, case AMDGPU::FLAT_SCR_HI: O << "flat_scratch_hi"; return; + case AMDGPU::FP_REG: + case AMDGPU::SP_REG: + case AMDGPU::SCRATCH_WAVE_OFFSET_REG: + case AMDGPU::PRIVATE_RSRC_REG: + llvm_unreachable("pseudo-register should not ever be emitted"); default: break; } @@ -379,7 +417,6 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O) { uint16_t Lo16 = static_cast(Imm); - assert(Lo16 == static_cast(Imm >> 16)); printImmediate16(Lo16, STI, O); } @@ -1160,6 +1197,112 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, O << SImm16; // Unknown simm16 code. } +static void printSwizzleBitmask(const uint16_t AndMask, + const uint16_t OrMask, + const uint16_t XorMask, + raw_ostream &O) { + using namespace llvm::AMDGPU::Swizzle; + + uint16_t Probe0 = ((0 & AndMask) | OrMask) ^ XorMask; + uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask; + + O << "\""; + + for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) { + uint16_t p0 = Probe0 & Mask; + uint16_t p1 = Probe1 & Mask; + + if (p0 == p1) { + if (p0 == 0) { + O << "0"; + } else { + O << "1"; + } + } else { + if (p0 == 0) { + O << "p"; + } else { + O << "i"; + } + } + } + + O << "\""; +} + +void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + using namespace llvm::AMDGPU::Swizzle; + + uint16_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm == 0) { + return; + } + + O << " offset:"; + + if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) { + + O << "swizzle(" << IdSymbolic[ID_QUAD_PERM]; + for (auto i = 0; i < LANE_NUM; ++i) { + O << ","; + O << formatDec(Imm & LANE_MASK); + Imm >>= LANE_SHIFT; + } + O << ")"; + + } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) { + + uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK; + uint16_t OrMask = (Imm >> BITMASK_OR_SHIFT) & BITMASK_MASK; + uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK; + + if (AndMask == BITMASK_MAX && + OrMask == 0 && + countPopulation(XorMask) == 1) { + + O << "swizzle(" << IdSymbolic[ID_SWAP]; + O << ","; + O << formatDec(XorMask); + O << ")"; + + } else if (AndMask == BITMASK_MAX && + OrMask == 0 && XorMask > 0 && + isPowerOf2_64(XorMask + 1)) { + + O << "swizzle(" << IdSymbolic[ID_REVERSE]; + O << ","; + O << formatDec(XorMask + 1); + O << ")"; + + } else { + + uint16_t GroupSize = BITMASK_MAX - AndMask + 1; + if (GroupSize > 1 && + isPowerOf2_64(GroupSize) && + OrMask < GroupSize && + XorMask == 0) { + + O << "swizzle(" << IdSymbolic[ID_BROADCAST]; + O << ","; + O << formatDec(GroupSize); + O << ","; + O << formatDec(OrMask); + O << ")"; + + } else { + O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM]; + O << ","; + printSwizzleBitmask(AndMask, OrMask, XorMask, O); + O << ")"; + } + } + } else { + printU16ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index c0b8e5c510893..7bbf99a85f409 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -42,6 +42,7 @@ class AMDGPUInstPrinter : public MCInstPrinter { void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O, @@ -52,6 +53,9 @@ class AMDGPUInstPrinter : public MCInstPrinter { void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printOffsetS13(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printOffset1(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -84,6 +88,10 @@ class AMDGPUInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); void printExpVM(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printDFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printNFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printRegOperand(unsigned RegNo, raw_ostream &O); void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -193,6 +201,8 @@ class AMDGPUInstPrinter : public MCInstPrinter { raw_ostream &O); void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index f3266fe82955c..a50e3eb8d9cee 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -8,8 +8,8 @@ /// \file //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" @@ -30,14 +30,9 @@ class AMDGPUAsmBackend : public MCAsmBackend { unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; }; - void processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { @@ -102,36 +97,11 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } } -void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { - MCValue Res; - - // When we have complex expressions like: BB0_1 + (BB0_2 - 4), which are - // used for long branches, this function will be called with - // IsResolved = false and Value set to some pre-computed value. In - // the example above, the value would be: - // (BB0_1 + (BB0_2 - 4)) - CurrentOffsetFromStartOfFunction. - // This is not what we want. We just want the expression computation - // only. The reason the MC layer subtracts the current offset from the - // expression is because the fixup is of kind FK_PCRel_4. - // For these scenarios, evaluateAsValue gives us the computation that we - // want. - if (!IsResolved && Fixup.getValue()->evaluateAsValue(Res, Layout) && - Res.isAbsolute()) { - Value = Res.getConstant(); - IsResolved = true; - - } - if (IsResolved) - Value = adjustFixupValue(Fixup, Value, &Asm.getContext()); -} - -void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { + Value = adjustFixupValue(Fixup, Value, &Asm.getContext()); if (!Value) return; // Doesn't change encoding. @@ -142,7 +112,7 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); uint32_t Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the bits from // the fixup value. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 647017d5061d5..4e828a791e09f 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -13,20 +13,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadataStreamer.h" +#include "AMDGPU.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" -#include "llvm/Support/YAMLTraits.h" - -using namespace llvm::AMDGPU; -using namespace llvm::AMDGPU::CodeObject; - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -37,192 +29,7 @@ static cl::opt VerifyCodeObjectMetadata( "amdgpu-verify-comd", cl::desc("Verify AMDGPU Code Object Metadata")); -namespace yaml { - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, AccessQualifier &EN) { - YIO.enumCase(EN, "Default", AccessQualifier::Default); - YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); - YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); - YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { - YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); - YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); - YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); - YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); - YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); - YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, ValueKind &EN) { - YIO.enumCase(EN, "ByValue", ValueKind::ByValue); - YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); - YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); - YIO.enumCase(EN, "Sampler", ValueKind::Sampler); - YIO.enumCase(EN, "Image", ValueKind::Image); - YIO.enumCase(EN, "Pipe", ValueKind::Pipe); - YIO.enumCase(EN, "Queue", ValueKind::Queue); - YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); - YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); - YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); - YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); - YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); - YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); - YIO.enumCase(EN, "HiddenCompletionAction", - ValueKind::HiddenCompletionAction); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, ValueType &EN) { - YIO.enumCase(EN, "Struct", ValueType::Struct); - YIO.enumCase(EN, "I8", ValueType::I8); - YIO.enumCase(EN, "U8", ValueType::U8); - YIO.enumCase(EN, "I16", ValueType::I16); - YIO.enumCase(EN, "U16", ValueType::U16); - YIO.enumCase(EN, "F16", ValueType::F16); - YIO.enumCase(EN, "I32", ValueType::I32); - YIO.enumCase(EN, "U32", ValueType::U32); - YIO.enumCase(EN, "F32", ValueType::F32); - YIO.enumCase(EN, "I64", ValueType::I64); - YIO.enumCase(EN, "U64", ValueType::U64); - YIO.enumCase(EN, "F64", ValueType::F64); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { - YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, - MD.mReqdWorkGroupSize, std::vector()); - YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, - MD.mWorkGroupSizeHint, std::vector()); - YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, - MD.mVecTypeHint, std::string()); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { - YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); - YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); - YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); - YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); - YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, - uint32_t(0)); - YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, - AccessQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, - AddressSpaceQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); - YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); - YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); - YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); - YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); - YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, - MD.mKernargSegmentSize, uint64_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, - MD.mWorkgroupGroupSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, - MD.mWorkitemPrivateSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, - MD.mWavefrontNumSGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, - MD.mWorkitemNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, - MD.mKernargSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, - MD.mGroupSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, - MD.mPrivateSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, - MD.mWavefrontSize, uint8_t(0)); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { - YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, - MD.mDebuggerABIVersion, std::vector()); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, - MD.mReservedNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, - MD.mReservedFirstVGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, - MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, - MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Metadata &MD) { - YIO.mapRequired(Kernel::Key::Name, MD.mName); - YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); - YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, - std::vector()); - if (!MD.mAttrs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); - if (!MD.mArgs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Args, MD.mArgs); - if (!MD.mCodeProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); - if (!MD.mDebugProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, CodeObject::Metadata &MD) { - YIO.mapRequired(Key::Version, MD.mVersion); - YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); - if (!MD.mKernels.empty() || !YIO.outputting()) - YIO.mapOptional(Key::Kernels, MD.mKernels); - } -}; - -} // end namespace yaml - namespace AMDGPU { - -/* static */ -std::error_code CodeObject::Metadata::fromYamlString( - std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) { - yaml::Input YamlInput(YamlString); - YamlInput >> CodeObjectMetadata; - return YamlInput.error(); -} - -/* static */ -std::error_code CodeObject::Metadata::toYamlString( - CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) { - raw_string_ostream YamlStream(YamlString); - yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); - YamlOutput << CodeObjectMetadata; - return std::error_code(); -} - namespace CodeObject { void MetadataStreamer::dump(StringRef YamlString) const { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h index 8d4c51763f63d..c6681431d74d4 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -17,9 +17,9 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #include "AMDGPU.h" -#include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" #include "llvm/Support/ErrorOr.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 073d19422e863..6abe7f3d37d5e 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index 3d3858ab47ece..1b062064ace1c 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -52,6 +52,18 @@ class AMDGPUMCCodeEmitter : public MCCodeEmitter { return 0; } + virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + + virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + protected: uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; void verifyInstructionPredicates(const MCInst &MI, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8dc863f723e2e..2a0032fc9adcd 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUTargetStreamer.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -25,7 +26,6 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" namespace llvm { @@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } } -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n'; -} - -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; -} - bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) @@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); } -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_LOCAL); -} - -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_GLOBAL); -} - bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 5c588bbded9c0..968128e94d0b2 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -44,10 +44,6 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; - virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0; - - virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitStartOfCodeObjectMetadata(const Module &Mod); virtual void EmitKernelCodeObjectMetadata( @@ -74,10 +70,6 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; @@ -105,10 +97,6 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 6015ec190fd47..eab90e1d344ca 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "R600Defines.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index bda0928036fde..376c9bfe5ccf2 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -69,6 +69,14 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + + unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace @@ -244,9 +252,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { uint16_t Lo16 = static_cast(Imm); - assert(Lo16 == static_cast(Imm >> 16)); uint32_t Encoding = getLit16Encoding(Lo16, STI); - assert(Encoding != 255 && "packed constants can only be inline immediates"); return Encoding; } default: @@ -319,6 +325,63 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, return getMachineOpValue(MI, MO, Fixups, STI); } +unsigned +SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9EncValues::SRC_VGPR_MASK; + if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { + RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; + } + return RegEnc; +} + +unsigned +SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + if (Reg != AMDGPU::VCC) { + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK; + } + return RegEnc; +} + +static bool needsPCRel(const MCExpr *Expr) { + switch (Expr->getKind()) { + case MCExpr::SymbolRef: + return true; + case MCExpr::Binary: { + auto *BE = cast(Expr); + if (BE->getOpcode() == MCBinaryExpr::Sub) + return false; + return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS()); + } + case MCExpr::Unary: + return needsPCRel(cast(Expr)->getSubExpr()); + case MCExpr::Target: + case MCExpr::Constant: + return false; + } + llvm_unreachable("invalid kind"); +} + uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, @@ -327,12 +390,21 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, return MRI.getEncodingValue(MO.getReg()); if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) { - const auto *Expr = dyn_cast(MO.getExpr()); + // FIXME: If this is expression is PCRel or not should not depend on what + // the expression looks like. Given that this is just a general expression, + // it should probably be FK_Data_4 and whatever is producing + // + // s_add_u32 s2, s2, (extern_const_addrspace+16 + // + // And expecting a PCRel should instead produce + // + // .Ltmp1: + // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1 MCFixupKind Kind; - if (Expr && Expr->getSymbol().isExternal()) - Kind = FK_Data_4; - else + if (needsPCRel(MO.getExpr())) Kind = FK_PCRel_4; + else + Kind = FK_Data_4; Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc())); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/MIMGInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/MIMGInstructions.td index a515eecc222af..06e2c11b01935 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/MIMGInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/MIMGInstructions.td @@ -26,6 +26,7 @@ class MIMG_Helper op, string asm, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/Processors.td b/interpreter/llvm/src/lib/Target/AMDGPU/Processors.td index 0e4eda982139d..d30d1d382588c 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/Processors.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/Processors.td @@ -80,50 +80,53 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"SI", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx600", SIFullSpeedModel, + [FeatureISAVersion6_0_0]>; + +def : ProcessorModel<"SI", SIFullSpeedModel, + [FeatureISAVersion6_0_0] +>; + +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureISAVersion6_0_0] >; -def : ProcessorModel<"tahiti", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx601", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1] >; -def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"bonaire", SIQuarterSpeedModel, +def : ProcessorModel<"gfx700", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"kabini", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2] +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, + [FeatureISAVersion7_0_0] >; def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"hawaii", SIFullSpeedModel, +def : ProcessorModel<"gfx701", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; -def : ProcessorModel<"mullins", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2]>; - -def : ProcessorModel<"gfx700", SIQuarterSpeedModel, - [FeatureISAVersion7_0_0] ->; - -def : ProcessorModel<"gfx701", SIFullSpeedModel, +def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; @@ -131,6 +134,17 @@ def : ProcessorModel<"gfx702", SIQuarterSpeedModel, [FeatureISAVersion7_0_2] >; +def : ProcessorModel<"gfx703", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"kabini", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"mullins", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3]>; + //===----------------------------------------------------------------------===// // Volcanic Islands //===----------------------------------------------------------------------===// @@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel, [FeatureISAVersion8_1_0] >; -def : ProcessorModel<"gfx900", SIQuarterSpeedModel, - [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32] +//===----------------------------------------------------------------------===// +// GFX9 +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"gfx900", SIQuarterSpeedModel, + [FeatureISAVersion9_0_0] +>; + +def : ProcessorModel<"gfx901", SIQuarterSpeedModel, + [FeatureISAVersion9_0_1] +>; + +def : ProcessorModel<"gfx902", SIQuarterSpeedModel, + [FeatureISAVersion9_0_2] >; -def : ProcessorModel<"gfx901", SIQuarterSpeedModel, - [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32] +def : ProcessorModel<"gfx903", SIQuarterSpeedModel, + [FeatureISAVersion9_0_3] >; + diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600ClauseMergePass.cpp index d0aba38f786d3..fbe45cb222d93 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -62,7 +62,7 @@ class R600ClauseMergePass : public MachineFunctionPass { const MachineInstr &LatrCFAlu) const; public: - R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } + R600ClauseMergePass() : MachineFunctionPass(ID) { } bool runOnMachineFunction(MachineFunction &MF) override; @@ -208,6 +208,6 @@ StringRef R600ClauseMergePass::getPassName() const { } // end anonymous namespace -llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { - return new R600ClauseMergePass(TM); +llvm::FunctionPass *llvm::createR600ClauseMergePass() { + return new R600ClauseMergePass(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 811b905588b4b..00cbd24b84fbc 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -12,15 +12,14 @@ /// computing their address on the fly ; it also sets STACK_SIZE info. //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,6 +29,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -499,7 +499,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass { } public: - R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID) {} + R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override { ST = &MF.getSubtarget(); @@ -555,7 +555,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass { CFStack.pushBranch(AMDGPU::CF_PUSH_EG); } else CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE); - + LLVM_FALLTHROUGH; case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); @@ -706,6 +706,6 @@ char R600ControlFlowFinalizer::ID = 0; } // end anonymous namespace -FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { - return new R600ControlFlowFinalizer(TM); +FunctionPass *llvm::createR600ControlFlowFinalizer() { + return new R600ControlFlowFinalizer(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index 03fc1aff5ec15..0d8ccd088ec4e 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -15,10 +15,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 3e46e6387614e..66def2d29caff 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -15,11 +15,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -37,7 +37,7 @@ class R600ExpandSpecialInstrsPass : public MachineFunctionPass { unsigned Op); public: - R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), + R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID), TII(nullptr) { } bool runOnMachineFunction(MachineFunction &MF) override; @@ -51,8 +51,8 @@ class R600ExpandSpecialInstrsPass : public MachineFunctionPass { char R600ExpandSpecialInstrsPass::ID = 0; -FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { - return new R600ExpandSpecialInstrsPass(TM); +FunctionPass *llvm::createR600ExpandSpecialInstrsPass() { + return new R600ExpandSpecialInstrsPass(); } void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600FrameLowering.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600FrameLowering.cpp index 1f01ad732e00a..37787b3c5f729 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -10,8 +10,8 @@ #include "R600FrameLowering.h" #include "AMDGPUSubtarget.h" #include "R600RegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/MathExtras.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.cpp index 3590a9b05e1d0..69a63b6941ef2 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -584,23 +584,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return LowerImplicitParameter(DAG, VT, DL, 8); case Intrinsic::r600_read_tgid_x: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_X, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_X, VT); case Intrinsic::r600_read_tgid_y: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Y, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Y, VT); case Intrinsic::r600_read_tgid_z: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Z, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Z, VT); case Intrinsic::r600_read_tidig_x: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_X, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_X, VT); case Intrinsic::r600_read_tidig_y: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Y, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Y, VT); case Intrinsic::r600_read_tidig_z: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Z, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Z, VT); case Intrinsic::r600_recipsqrt_ieee: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); @@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { assert(Store->getAlignment() >= 2); - Mask = DAG.getConstant(0xffff, DL, MVT::i32);; + Mask = DAG.getConstant(0xffff, DL, MVT::i32); } else { llvm_unreachable("Unsupported private trunc store"); } @@ -1618,6 +1618,15 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { + // Local and Private addresses do not handle vectors. Limit to i32 + if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) { + return (MemVT.getSizeInBits() <= 32); + } + return true; +} + bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.h b/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.h index 9700ce14c6f31..2a774693f02b3 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600ISelLowering.h @@ -44,6 +44,9 @@ class R600TargetLowering final : public AMDGPUTargetLowering { EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600InstrInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600InstrInfo.cpp index 2422d57269eb9..c5da5e4042004 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "R600InstrInfo.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" -#include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/BitVector.h" @@ -35,8 +35,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include #include -#include #include +#include #include #include #include diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600MachineScheduler.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600MachineScheduler.cpp index db18e5bd1afae..a7e540f9d14d3 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -13,16 +13,16 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index d90008a550aeb..502dd3bce97e1 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -124,7 +124,7 @@ class R600VectorRegMerger : public MachineFunctionPass { public: static char ID; - R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), + R600VectorRegMerger() : MachineFunctionPass(ID), TII(nullptr) { } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -396,6 +396,6 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { return false; } -llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { - return new R600VectorRegMerger(tm); +llvm::FunctionPass *llvm::createR600VectorRegMerger() { + return new R600VectorRegMerger(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600Packetizer.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600Packetizer.cpp index 5b6dd1ed128dc..1cb40938cee72 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600Packetizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" @@ -24,6 +23,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -36,7 +36,7 @@ class R600Packetizer : public MachineFunctionPass { public: static char ID; - R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} + R600Packetizer() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -404,6 +404,6 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { } // end anonymous namespace -llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) { - return new R600Packetizer(tm); +llvm::FunctionPass *llvm::createR600Packetizer() { + return new R600Packetizer(); } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.cpp index dfdc602b80cdf..7501facb0cba1 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -56,6 +56,18 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +// Dummy to not crash RegisterClassInfo. +static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister; + +const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs( + const MachineFunction *) const { + return &CalleeSavedReg; +} + +unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return AMDGPU::NoRegister; +} + unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { return this->getEncodingValue(reg) >> HW_CHAN_SHIFT; } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.h index 9dfb3106c6ccb..f0d9644b02f20 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.h @@ -27,6 +27,8 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo { R600RegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; /// \brief get the HW encoding for a register's channel. unsigned getHWRegChan(unsigned reg) const; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.td b/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.td index cc667d985a82e..3c1e8527284cf 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/R600RegisterInfo.td @@ -226,7 +226,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_Addr, R600_KC0, R600_KC1, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, - ALU_CONST, ALU_PARAM, OQAP + ALU_CONST, ALU_PARAM, OQAP, INDIRECT_BASE_ADDR )>; def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp index 62ebef8e91af4..b5c439b21b893 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -19,8 +19,8 @@ // //===----------------------------------------------------------------------===// -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIDefines.h b/interpreter/llvm/src/lib/Target/AMDGPU/SIDefines.h index a01330cb9171e..3915c0e5bdbed 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIDefines.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIDefines.h @@ -118,6 +118,10 @@ namespace AMDGPU { // Operand for source modifiers for VOP instructions OPERAND_INPUT_MODS, + // Operand for SDWA instructions + OPERAND_SDWA_SRC, + OPERAND_SDWA_VOPC_DST, + /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, OPERAND_KIMM16 @@ -160,7 +164,8 @@ namespace AMDGPUAsmVariants { DEFAULT = 0, VOP3 = 1, SDWA = 2, - DPP = 3 + SDWA9 = 3, + DPP = 4 }; } @@ -276,6 +281,46 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11] } // namespace Hwreg +namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. + +enum Id { // id of symbolic names + ID_QUAD_PERM = 0, + ID_BITMASK_PERM, + ID_SWAP, + ID_REVERSE, + ID_BROADCAST +}; + +enum EncBits { + + // swizzle mode encodings + + QUAD_PERM_ENC = 0x8000, + QUAD_PERM_ENC_MASK = 0xFF00, + + BITMASK_PERM_ENC = 0x0000, + BITMASK_PERM_ENC_MASK = 0x8000, + + // QUAD_PERM encodings + + LANE_MASK = 0x3, + LANE_MAX = LANE_MASK, + LANE_SHIFT = 2, + LANE_NUM = 4, + + // BITMASK_PERM encodings + + BITMASK_MASK = 0x1F, + BITMASK_MAX = BITMASK_MASK, + BITMASK_WIDTH = 5, + + BITMASK_AND_SHIFT = 0, + BITMASK_OR_SHIFT = 5, + BITMASK_XOR_SHIFT = 10 +}; + +} // namespace Swizzle + namespace SDWA { enum SdwaSel { @@ -294,6 +339,18 @@ enum DstUnused { UNUSED_PRESERVE = 2, }; +enum SDWA9EncValues{ + SRC_SGPR_MASK = 0x100, + SRC_VGPR_MASK = 0xFF, + VOPC_DST_VCC_MASK = 0x80, + VOPC_DST_SGPR_MASK = 0x7F, + + SRC_VGPR_MIN = 0, + SRC_VGPR_MAX = 255, + SRC_SGPR_MIN = 256, + SRC_SGPR_MAX = 357, +}; + } // namespace SDWA } // namespace AMDGPU diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 3cca815d87739..0a795c99f94e5 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -65,10 +65,10 @@ /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -174,6 +174,31 @@ static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); } +static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, + const SIRegisterInfo *TRI, + const SIInstrInfo *TII) { + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + auto &Src = MI.getOperand(1); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = Src.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + !TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + + for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) { + const auto *UseMI = MO.getParent(); + if (UseMI == &MI) + continue; + if (MO.isDef() || UseMI->getParent() != MI.getParent() || + UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END || + !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src)) + return false; + } + // Change VGPR to SGPR destination. + MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg))); + return true; +} + // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. // // SGPRx = ... @@ -214,6 +239,9 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) return false; + if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII)) + return true; + // TODO: Could have multiple extracts? unsigned SubReg = CopyUse.getOperand(1).getSubReg(); if (SubReg != AMDGPU::NoSubRegister) @@ -563,6 +591,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; } TII->moveToVALU(MI); + } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { + tryChangeVGPRtoSGPRinCopy(MI, TRI, TII); } break; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIFoldOperands.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIFoldOperands.cpp index d63414735b95a..0aad8f0843d62 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -13,6 +13,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -35,9 +36,12 @@ struct FoldCandidate { }; unsigned char UseOpNo; MachineOperand::MachineOperandType Kind; + bool Commuted; - FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : - UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) { + FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, + bool Commuted_ = false) : + UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()), + Commuted(Commuted_) { if (FoldOp->isImm()) { ImmToFold = FoldOp->getImm(); } else if (FoldOp->isFI()) { @@ -59,6 +63,10 @@ struct FoldCandidate { bool isReg() const { return Kind == MachineOperand::MO_Register; } + + bool isCommuted() const { + return Commuted; + } }; class SIFoldOperands : public MachineFunctionPass { @@ -129,6 +137,7 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII, = TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType); } + return false; } default: return false; @@ -159,6 +168,8 @@ static bool updateOperand(FoldCandidate &Fold, if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && TargetRegisterInfo::isVirtualRegister(New->getReg())) { Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); + + Old.setIsUndef(New->isUndef()); return true; } @@ -237,8 +248,13 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) return false; - if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) + if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { + TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1); return false; + } + + FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true)); + return true; } FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); @@ -247,9 +263,10 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, // If the use operand doesn't care about the value, this may be an operand only // used for register indexing, in which case it is unsafe to fold. -static bool isUseSafeToFold(const MachineInstr &MI, +static bool isUseSafeToFold(const SIInstrInfo *TII, + const MachineInstr &MI, const MachineOperand &UseMO) { - return !UseMO.isUndef(); + return !UseMO.isUndef() && !TII->isSDWA(MI); //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); } @@ -261,7 +278,7 @@ void SIFoldOperands::foldOperand( SmallVectorImpl &CopiesToReplace) const { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); - if (!isUseSafeToFold(*UseMI, UseOp)) + if (!isUseSafeToFold(TII, *UseMI, UseOp)) return; // FIXME: Fold operands with subregs. @@ -457,7 +474,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI, return &Op; MachineInstr *Def = MRI.getVRegDef(Op.getReg()); - if (Def->isMoveImmediate()) { + if (Def && Def->isMoveImmediate()) { MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) return &ImmSrc; @@ -636,6 +653,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, // again. The same constant folded instruction could also have a second // use operand. NextUse = MRI->use_begin(Dst.getReg()); + FoldList.clear(); continue; } @@ -698,6 +716,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); tryFoldInst(TII, Fold.UseMI); + } else if (Fold.isCommuted()) { + // Restoring instruction's original operand order if fold has failed. + TII->commuteInstruction(*Fold.UseMI, false); } } } @@ -714,7 +735,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { // Make sure sources are identical. const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() || + if (!Src0->isReg() || !Src1->isReg() || + Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; @@ -904,12 +926,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // level. bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath(); - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; - for (I = MBB.begin(); I != MBB.end(); I = Next) { + for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.cpp index 1279f845de0e3..7334781916d81 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,10 +8,10 @@ //==-----------------------------------------------------------------------===// #include "SIFrameLowering.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -158,7 +158,7 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // No replacement necessary. if (ScratchWaveOffsetReg == AMDGPU::NoRegister || !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { - assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister); + assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG); return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); } @@ -189,8 +189,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // ---- // 13 (+1) unsigned ReservedRegCount = 13; - if (SPReg != AMDGPU::NoRegister) - ++ReservedRegCount; if (AllSGPRs.size() < ReservedRegCount) return std::make_pair(ScratchWaveOffsetReg, SPReg); @@ -208,13 +206,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); MFI->setScratchWaveOffsetReg(Reg); ScratchWaveOffsetReg = Reg; - } else { - if (SPReg == AMDGPU::NoRegister) - break; - - MRI.replaceRegWith(SPReg, Reg); - MFI->setStackPtrOffsetReg(Reg); - SPReg = Reg; break; } } @@ -223,8 +214,8 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( return std::make_pair(ScratchWaveOffsetReg, SPReg); } -void SIFrameLowering::emitPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const { +void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget(); @@ -255,13 +246,16 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // this point it appears we need the setup. This part of the prolog should be // emitted after frame indices are eliminated. - if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit()) + if (MFI->hasFlatScratchInit()) emitFlatScratchInit(ST, MF, MBB); unsigned SPReg = MFI->getStackPtrOffsetReg(); - if (SPReg != AMDGPU::NoRegister) { + if (SPReg != AMDGPU::SP_REG) { + assert(MRI.isReserved(SPReg) && "SPReg used but not reserved"); + DebugLoc DL; - int64_t StackSize = MF.getFrameInfo().getStackSize(); + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + int64_t StackSize = FrameInfo.getStackSize(); if (StackSize == 0) { BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) @@ -293,7 +287,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { + if (ST.isAmdCodeObjectV2(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } @@ -372,14 +366,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - if (MFI->hasPrivateMemoryInputPtr()) { + if (MFI->hasImplicitBufferPtr()) { unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); BuildMI(MBB, I, DL, Mov64, Rsrc01) - .addReg(PreloadedPrivateBufferReg) + .addReg(MFI->getImplicitBufferPtrUserSGPR()) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); } else { const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); @@ -394,7 +388,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineMemOperand::MODereferenceable, 0, 0); BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) - .addReg(PreloadedPrivateBufferReg) + .addReg(MFI->getImplicitBufferPtrUserSGPR()) .addImm(0) // offset .addImm(0) // glc .addMemOperand(MMO) @@ -424,9 +418,71 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } } +void SIFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (FuncInfo->isEntryFunction()) { + emitEntryFunctionPrologue(MF, MBB); + return; + } + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SISubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); + + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + + bool NeedFP = hasFP(MF); + if (NeedFP) { + // If we need a base pointer, set it up here. It's whatever the value of + // the stack pointer is at this point. Any variable size objects will be + // allocated after this, so we can still use the base pointer to reference + // locals. + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) + .addReg(StackPtrReg) + .setMIFlag(MachineInstr::FrameSetup); + } + + uint32_t NumBytes = MFI.getStackSize(); + if (NumBytes != 0 && hasSP(MF)) { + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) + .addReg(StackPtrReg) + .addImm(NumBytes * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameSetup); + } +} + void SIFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (FuncInfo->isEntryFunction()) + return; + unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + if (StackPtrReg == AMDGPU::NoRegister) + return; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + uint32_t NumBytes = MFI.getStackSize(); + + const SISubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL; + + // FIXME: Clarify distinction between no set SP and SP. For callee functions, + // it's really whether we need SP to be accurate or not. + + if (NumBytes != 0 && hasSP(MF)) { + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) + .addReg(StackPtrReg) + .addImm(NumBytes * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameDestroy); + } } static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { @@ -559,3 +615,19 @@ void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI); } } + +bool SIFrameLowering::hasFP(const MachineFunction &MF) const { + // All stack operations are relative to the frame offset SGPR. + // TODO: Still want to eliminate sometimes. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // XXX - Is this only called after frame is finalized? Should be able to check + // frame size. + return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI); +} + +bool SIFrameLowering::hasSP(const MachineFunction &MF) const { + // All stack operations are relative to the frame offset SGPR. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasCalls() || MFI.hasVarSizedObjects(); +} diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.h b/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.h index 7ccd02b3c86a7..d4dfa1c7eaa86 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIFrameLowering.h @@ -26,6 +26,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering { AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~SIFrameLowering() override = default; + void emitEntryFunctionPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, @@ -58,6 +60,10 @@ class SIFrameLowering final : public AMDGPUFrameLowering { /// \brief Emits debugger prologue. void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; + +public: + bool hasFP(const MachineFunction &MF) const override; + bool hasSP(const MachineFunction &MF) const; }; } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.cpp index 48a14e4dbea26..2356405f09199 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,12 +17,12 @@ #define _USE_MATH_DEFINES #endif +#include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUIntrinsicInfo.h" -#include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" #include "SIDefines.h" -#include "SIISelLowering.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" @@ -211,6 +211,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDO, MVT::i32, Legal); setOperationAction(ISD::USUBO, MVT::i32, Legal); + setOperationAction(ISD::ADDCARRY, MVT::i32, Legal); + setOperationAction(ISD::SUBCARRY, MVT::i32, Legal); + // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, @@ -471,6 +474,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); } + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::ADDCARRY); + setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::SUBCARRY); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FMINNUM); @@ -540,7 +547,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = 0; const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); - Info.vol = !Vol || !Vol->isNullValue(); + Info.vol = !Vol || !Vol->isZero(); Info.readMem = true; Info.writeMem = true; return true; @@ -567,9 +574,17 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, } bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const { - // Flat instructions do not have offsets, and only have the register - // address. - return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); + if (!Subtarget->hasFlatInstOffsets()) { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && AM.Scale == 0; + } + + // GFX9 added a 13-bit signed offset. When using regular flat instructions, + // the sign bit is ignored and is treated as a 12-bit unsigned offset. + + // Just r + i + return isUInt<12>(AM.BaseOffs) && AM.Scale == 0; } bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const { @@ -698,6 +713,19 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } } +bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { + if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) { + return (MemVT.getSizeInBits() <= 4 * 32); + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { + unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize(); + return (MemVT.getSizeInBits() <= MaxPrivateBits); + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { + return (MemVT.getSizeInBits() <= 2 * 32); + } + return true; +} + bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, @@ -914,6 +942,55 @@ SDValue SITargetLowering::lowerKernargMemParameter( return DAG.getMergeValues({ Val, Load.getValue(1) }, SL); } +SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, + const SDLoc &SL, SDValue Chain, + const ISD::InputArg &Arg) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (Arg.Flags.isByVal()) { + unsigned Size = Arg.Flags.getByValSize(); + int FrameIdx = MFI.CreateFixedObject(Size, VA.getLocMemOffset(), false); + return DAG.getFrameIndex(FrameIdx, MVT::i32); + } + + unsigned ArgOffset = VA.getLocMemOffset(); + unsigned ArgSize = VA.getValVT().getStoreSize(); + + int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, true); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + SDValue ArgValue; + + // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + MVT MemVT = VA.getValVT(); + + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::BCvt: + MemVT = VA.getLocVT(); + break; + case CCValAssign::SExt: + ExtType = ISD::SEXTLOAD; + break; + case CCValAssign::ZExt: + ExtType = ISD::ZEXTLOAD; + break; + case CCValAssign::AExt: + ExtType = ISD::EXTLOAD; + break; + } + + ArgValue = DAG.getExtLoad( + ExtType, SL, VA.getLocVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + MemVT); + return ArgValue; +} + static void processShaderInputArgs(SmallVectorImpl &Splits, CallingConv::ID CallConv, ArrayRef Ins, @@ -992,10 +1069,10 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) { - if (Info.hasPrivateMemoryInputPtr()) { - unsigned PrivateMemoryPtrReg = Info.addPrivateMemoryPtr(TRI); - MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SGPR_64RegClass); - CCInfo.AllocateReg(PrivateMemoryPtrReg); + if (Info.hasImplicitBufferPtr()) { + unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI); + MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(ImplicitBufferPtrReg); } // FIXME: How should these inputs interact with inreg / custom SGPR inputs? @@ -1097,7 +1174,8 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, SIMachineFunctionInfo &Info) { // Now that we've figured out where the scratch register inputs are, see if // should reserve the arguments and use them directly. - bool HasStackObjects = MF.getFrameInfo().hasStackObjects(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool HasStackObjects = MFI.hasStackObjects(); // Record that we know we have non-spill stack objects so we don't need to // check all stack objects later. @@ -1223,8 +1301,10 @@ SDValue SITargetLowering::LowerFormalArguments( !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); + } else if (IsKernel) { + assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); } else { - assert(!IsKernel || (Info->hasWorkGroupIDX() && Info->hasWorkItemIDX())); + Splits.append(Ins.begin(), Ins.end()); } if (IsEntryFunc) { @@ -1278,20 +1358,50 @@ SDValue SITargetLowering::LowerFormalArguments( InVals.push_back(Arg); continue; + } else if (!IsEntryFunc && VA.isMemLoc()) { + SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg); + InVals.push_back(Val); + if (!Arg.Flags.isByVal()) + Chains.push_back(Val.getValue(1)); + continue; } - if (VA.isMemLoc()) - report_fatal_error("memloc not supported with calling convention"); - assert(VA.isRegLoc() && "Parameter must be in a register!"); unsigned Reg = VA.getLocReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); + EVT ValVT = VA.getValVT(); Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); - if (Arg.VT.isVector()) { + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, DL, VT, Val, + DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, DL, VT, Val, + DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + default: + llvm_unreachable("Unknown loc info!"); + } + + if (IsShader && Arg.VT.isVector()) { // Build a vector from the registers Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); unsigned NumElements = ParamType->getVectorNumElements(); @@ -1318,15 +1428,36 @@ SDValue SITargetLowering::LowerFormalArguments( } // Start adding system SGPRs. - if (IsEntryFunc) + if (IsEntryFunc) { allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader); - - reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info); + } else { + CCInfo.AllocateReg(Info->getScratchRSrcReg()); + CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); + CCInfo.AllocateReg(Info->getFrameOffsetReg()); + } return Chains.empty() ? Chain : DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } +// TODO: If return values can't fit in registers, we should return as many as +// possible in registers before passing on stack. +bool SITargetLowering::CanLowerReturn( + CallingConv::ID CallConv, + MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const { + // Replacing returns with sret/stack usage doesn't make sense for shaders. + // FIXME: Also sort of a workaround for custom vector splitting in LowerReturn + // for shaders. Vector types should be explicitly handled by CC. + if (AMDGPU::isEntryFunctionCC(CallConv)) + return true; + + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg)); +} + SDValue SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1336,11 +1467,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *Info = MF.getInfo(); - if (!AMDGPU::isShader(CallConv)) + if (AMDGPU::isKernel(CallConv)) { return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs, OutVals, DL, DAG); + } + + bool IsShader = AMDGPU::isShader(CallConv); Info->setIfReturnsVoid(Outs.size() == 0); + bool IsWaveEnd = Info->returnsVoid() && IsShader; SmallVector Splits; SmallVector SplitVals; @@ -1349,7 +1484,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, for (unsigned i = 0, e = Outs.size(); i != e; ++i) { const ISD::OutputArg &Out = Outs[i]; - if (Out.VT.isVector()) { + if (IsShader && Out.VT.isVector()) { MVT VT = Out.VT.getVectorElementType(); ISD::OutputArg NewOut = Out; NewOut.Flags.setSplit(); @@ -1380,29 +1515,58 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, *DAG.getContext()); // Analyze outgoing return values. - AnalyzeReturn(CCInfo, Splits); + CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg)); SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + // Add return address for callable functions. + if (!Info->isEntryFunction()) { + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); + SDValue ReturnAddrReg = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); + + // FIXME: Should be able to use a vreg here, but need a way to prevent it + // from being allcoated to a CSR. + + SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), + MVT::i64); + + Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag); + Flag = Chain.getValue(1); + + RetOps.push_back(PhysReturnAddrReg); + } + // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); + // TODO: Partially return in registers if return values don't fit. SDValue Arg = SplitVals[realRVLocIdx]; // Copied from other backends. switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + default: + llvm_unreachable("Unknown loc info!"); } Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); @@ -1410,12 +1574,16 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + // FIXME: Does sret work properly? + // Update chain and glue. RetOps[0] = Chain; if (Flag.getNode()) RetOps.push_back(Flag); - unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN_TO_EPILOG; + unsigned Opc = AMDGPUISD::ENDPGM; + if (!IsWaveEnd) + Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; return DAG.getNode(Opc, DL, MVT::Other, RetOps); } @@ -2212,20 +2380,16 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast(N->getOperand(0))->getZExtValue(); - switch (IID) { - case Intrinsic::amdgcn_cvt_pkrtz: { + if (IID == Intrinsic::amdgcn_cvt_pkrtz) { SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); SDLoc SL(N); SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32, Src0, Src1); - Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt)); return; } - default: - break; - } + break; } case ISD::SELECT: { SDLoc SL(N); @@ -2457,7 +2621,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16); - return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);; + return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc); } SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { @@ -2660,6 +2824,15 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); + DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); + + // Make sure we we do any optimizations that will make it easier to fold + // source modifiers before obscuring it with bit operations. + + // XXX - Why doesn't this get called when vector_shuffle is expanded? + if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI)) + return Combined; + if (const ConstantSDNode *CIdx = dyn_cast(Idx)) { SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec); @@ -2834,7 +3007,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntrinsicID) { case Intrinsic::amdgcn_implicit_buffer_ptr: { - unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + if (getSubtarget()->isAmdCodeObjectV2(MF)) + return emitNonHSAIntrinsicError(DAG, DL, VT); + + unsigned Reg = TRI->getPreloadedValue(MF, + SIRegisterInfo::IMPLICIT_BUFFER_PTR); return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); } case Intrinsic::amdgcn_dispatch_ptr: @@ -3124,6 +3301,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast(Op.getOperand(1))->getZExtValue(); SDLoc DL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: { @@ -3149,7 +3328,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // glc Op.getOperand(6) // slc }; - MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? @@ -3164,6 +3342,29 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO); } + case Intrinsic::amdgcn_tbuffer_load: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Op.getOperand(4), // voffset + Op.getOperand(5), // soffset + Op.getOperand(6), // offset + Op.getOperand(7), // dfmt + Op.getOperand(8), // nfmt + Op.getOperand(9), // glc + Op.getOperand(10) // slc + }; + + EVT VT = Op.getOperand(2).getValueType(); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOLoad, + VT.getStoreSize(), VT.getStoreSize()); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, + Op->getVTList(), Ops, VT, MMO); + } // Basic sample. case Intrinsic::amdgcn_image_sample: case Intrinsic::amdgcn_image_sample_cl: @@ -3229,10 +3430,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); SDLoc DL(Op); SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue(); + MachineFunction &MF = DAG.getMachineFunction(); switch (IntrinsicID) { case Intrinsic::amdgcn_exp: { @@ -3299,33 +3500,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain, Op.getOperand(2), Op.getOperand(3)); } - case AMDGPUIntrinsic::SI_tbuffer_store: { - SDValue Ops[] = { - Chain, - Op.getOperand(2), - Op.getOperand(3), - Op.getOperand(4), - Op.getOperand(5), - Op.getOperand(6), - Op.getOperand(7), - Op.getOperand(8), - Op.getOperand(9), - Op.getOperand(10), - Op.getOperand(11), - Op.getOperand(12), - Op.getOperand(13), - Op.getOperand(14) - }; - - EVT VT = Op.getOperand(3).getValueType(); - - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOStore, - VT.getStoreSize(), 4); - return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, - Op->getVTList(), Ops, VT, MMO); - } case AMDGPUIntrinsic::AMDGPU_kill: { SDValue Src = Op.getOperand(2); if (const ConstantFPSDNode *K = dyn_cast(Src)) { @@ -3341,7 +3515,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { - const MachineFunction &MF = DAG.getMachineFunction(); const SISubtarget &ST = MF.getSubtarget(); unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second; if (WGSize <= ST.getWavefrontSize()) @@ -3350,6 +3523,75 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } return SDValue(); }; + case AMDGPUIntrinsic::SI_tbuffer_store: { + + // Extract vindex and voffset from vaddr as appropriate + const ConstantSDNode *OffEn = cast(Op.getOperand(10)); + const ConstantSDNode *IdxEn = cast(Op.getOperand(11)); + SDValue VAddr = Op.getOperand(5); + + SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); + + assert(!(OffEn->isOne() && IdxEn->isOne()) && + "Legacy intrinsic doesn't support both offset and index - use new version"); + + SDValue VIndex = IdxEn->isOne() ? VAddr : Zero; + SDValue VOffset = OffEn->isOne() ? VAddr : Zero; + + // Deal with the vec-3 case + const ConstantSDNode *NumChannels = cast(Op.getOperand(4)); + auto Opcode = NumChannels->getZExtValue() == 3 ? + AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT; + + SDValue Ops[] = { + Chain, + Op.getOperand(3), // vdata + Op.getOperand(2), // rsrc + VIndex, + VOffset, + Op.getOperand(6), // soffset + Op.getOperand(7), // inst_offset + Op.getOperand(8), // dfmt + Op.getOperand(9), // nfmt + Op.getOperand(12), // glc + Op.getOperand(13), // slc + }; + + assert((cast(Op.getOperand(14)))->getZExtValue() == 0 && + "Value of tfe other than zero is unsupported"); + + EVT VT = Op.getOperand(3).getValueType(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOStore, + VT.getStoreSize(), 4); + return DAG.getMemIntrinsicNode(Opcode, DL, + Op->getVTList(), Ops, VT, MMO); + } + + case Intrinsic::amdgcn_tbuffer_store: { + SDValue Ops[] = { + Chain, + Op.getOperand(2), // vdata + Op.getOperand(3), // rsrc + Op.getOperand(4), // vindex + Op.getOperand(5), // voffset + Op.getOperand(6), // soffset + Op.getOperand(7), // offset + Op.getOperand(8), // dfmt + Op.getOperand(9), // nfmt + Op.getOperand(10), // glc + Op.getOperand(11) // slc + }; + EVT VT = Op.getOperand(3).getValueType(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOStore, + VT.getStoreSize(), 4); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, + Op->getVTList(), Ops, VT, MMO); + } + default: return Op; } @@ -3415,7 +3657,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && - isMemOpHasNoClobberedMemOperand(Load)) + !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private @@ -3496,7 +3738,9 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); EVT VT = Op.getValueType(); - bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags Flags = Op->getFlags(); + bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || + Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal(); if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) return SDValue(); @@ -3531,15 +3775,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } } - const SDNodeFlags Flags = Op->getFlags(); - - if (Unsafe || Flags.hasAllowReciprocal()) { + if (Unsafe) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) - SDNodeFlags NewFlags; - NewFlags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags); } return SDValue(); @@ -4074,6 +4314,23 @@ SDValue SITargetLowering::splitBinaryBitConstantOp( return SDValue(); } +// Returns true if argument is a boolean value which is not serialized into +// memory or argument and does not require v_cmdmask_b32 to be deserialized. +static bool isBoolSGPR(SDValue V) { + if (V.getValueType() != MVT::i1) + return false; + switch (V.getOpcode()) { + default: break; + case ISD::SETCC: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case AMDGPUISD::FP_CLASS: + return true; + } + return false; +} + SDValue SITargetLowering::performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.isBeforeLegalize()) @@ -4085,12 +4342,40 @@ SDValue SITargetLowering::performAndCombine(SDNode *N, SDValue RHS = N->getOperand(1); - if (VT == MVT::i64) { - const ConstantSDNode *CRHS = dyn_cast(RHS); - if (CRHS) { - if (SDValue Split - = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS)) - return Split; + const ConstantSDNode *CRHS = dyn_cast(RHS); + if (VT == MVT::i64 && CRHS) { + if (SDValue Split + = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS)) + return Split; + } + + if (CRHS && VT == MVT::i32) { + // and (srl x, c), mask => shl (bfe x, nb + c, mask >> nb), nb + // nb = number of trailing zeroes in mask + // It can be optimized out using SDWA for GFX8+ in the SDWA peephole pass, + // given that we are selecting 8 or 16 bit fields starting at byte boundary. + uint64_t Mask = CRHS->getZExtValue(); + unsigned Bits = countPopulation(Mask); + if (getSubtarget()->hasSDWA() && LHS->getOpcode() == ISD::SRL && + (Bits == 8 || Bits == 16) && isShiftedMask_64(Mask) && !(Mask & 1)) { + if (auto *CShift = dyn_cast(LHS->getOperand(1))) { + unsigned Shift = CShift->getZExtValue(); + unsigned NB = CRHS->getAPIntValue().countTrailingZeros(); + unsigned Offset = NB + Shift; + if ((Offset & (Bits - 1)) == 0) { // Starts at a byte or word boundary. + SDLoc SL(N); + SDValue BFE = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, + LHS->getOperand(0), + DAG.getConstant(Offset, SL, MVT::i32), + DAG.getConstant(Bits, SL, MVT::i32)); + EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + SDValue Ext = DAG.getNode(ISD::AssertZext, SL, VT, BFE, + DAG.getValueType(NarrowVT)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(LHS), VT, Ext, + DAG.getConstant(NB, SDLoc(CRHS), MVT::i32)); + return Shl; + } + } } } @@ -4134,6 +4419,16 @@ SDValue SITargetLowering::performAndCombine(SDNode *N, } } + if (VT == MVT::i32 && + (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) { + // and x, (sext cc from i1) => select cc, x, 0 + if (RHS.getOpcode() != ISD::SIGN_EXTEND) + std::swap(LHS, RHS); + if (isBoolSGPR(RHS.getOperand(0))) + return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0), + LHS, DAG.getConstant(0, SDLoc(N), MVT::i32)); + } + return SDValue(); } @@ -4327,15 +4622,110 @@ SDValue SITargetLowering::performClassCombine(SDNode *N, return SDValue(); } +static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { + if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) + return true; + + return DAG.isKnownNeverNaN(Op); +} + +static bool isCanonicalized(SelectionDAG &DAG, SDValue Op, + const SISubtarget *ST, unsigned MaxDepth=5) { + // If source is a result of another standard FP operation it is already in + // canonical form. + + switch (Op.getOpcode()) { + default: + break; + + // These will flush denorms if required. + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FSQRT: + case ISD::FCEIL: + case ISD::FFLOOR: + case ISD::FMA: + case ISD::FMAD: + + case ISD::FCANONICALIZE: + return true; + + case ISD::FP_ROUND: + return Op.getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP_EXTEND: + return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + return ST->hasFP16Denormals(); + + // It can/will be lowered or combined as a bit operation. + // Need to check their input recursively to handle. + case ISD::FNEG: + case ISD::FABS: + return (MaxDepth > 0) && + isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1); + + case ISD::FSIN: + case ISD::FCOS: + case ISD::FSINCOS: + return Op.getValueType().getScalarType() != MVT::f16; + + // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. + // For such targets need to check their input recursively. + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + + if (ST->supportsMinMaxDenormModes() && + DAG.isKnownNeverNaN(Op.getOperand(0)) && + DAG.isKnownNeverNaN(Op.getOperand(1))) + return true; + + return (MaxDepth > 0) && + isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1) && + isCanonicalized(DAG, Op.getOperand(1), ST, MaxDepth - 1); + + case ISD::ConstantFP: { + auto F = cast(Op)->getValueAPF(); + return !F.isDenormal() && !(F.isNaN() && F.isSignaling()); + } + } + return false; +} + // Constant fold canonicalize. SDValue SITargetLowering::performFCanonicalizeCombine( SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); - if (!CFP) + + if (!CFP) { + SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType().getScalarType(); + auto ST = getSubtarget(); + + if (((VT == MVT::f32 && ST->hasFP32Denormals()) || + (VT == MVT::f64 && ST->hasFP64Denormals()) || + (VT == MVT::f16 && ST->hasFP16Denormals())) && + DAG.isKnownNeverNaN(N0)) + return N0; + + bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction()); + + if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) && + isCanonicalized(DAG, N0, ST)) + return N0; + return SDValue(); + } - SelectionDAG &DAG = DCI.DAG; const APFloat &C = CFP->getValueAPF(); // Flush denormals to 0 if not enabled. @@ -4428,13 +4818,6 @@ SDValue SITargetLowering::performIntMed3ImmCombine( return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3); } -static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { - if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) - return true; - - return DAG.isKnownNeverNaN(Op); -} - SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, SDValue Op0, @@ -4491,7 +4874,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY && - VT != MVT::f64) { + VT != MVT::f64 && + ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) { // max(max(a, b), c) -> max3(a, b, c) // min(min(a, b), c) -> min3(a, b, c) if (Op0.getOpcode() == Opc && Op0.hasOneUse()) { @@ -4646,6 +5030,102 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, return 0; } +SDValue SITargetLowering::performAddCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if (VT != MVT::i32) + return SDValue(); + + SDLoc SL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // add x, zext (setcc) => addcarry x, 0, setcc + // add x, sext (setcc) => subcarry x, 0, setcc + unsigned Opc = LHS.getOpcode(); + if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND || + Opc == ISD::ANY_EXTEND || Opc == ISD::ADDCARRY) + std::swap(RHS, LHS); + + Opc = RHS.getOpcode(); + switch (Opc) { + default: break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: { + auto Cond = RHS.getOperand(0); + if (!isBoolSGPR(Cond)) + break; + SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1); + SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond }; + Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY; + return DAG.getNode(Opc, SL, VTList, Args); + } + case ISD::ADDCARRY: { + // add x, (addcarry y, 0, cc) => addcarry x, y, cc + auto C = dyn_cast(RHS.getOperand(1)); + if (!C || C->getZExtValue() != 0) break; + SDValue Args[] = { LHS, RHS.getOperand(0), RHS.getOperand(2) }; + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), RHS->getVTList(), Args); + } + } + return SDValue(); +} + +SDValue SITargetLowering::performSubCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if (VT != MVT::i32) + return SDValue(); + + SDLoc SL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + unsigned Opc = LHS.getOpcode(); + if (Opc != ISD::SUBCARRY) + std::swap(RHS, LHS); + + if (LHS.getOpcode() == ISD::SUBCARRY) { + // sub (subcarry x, 0, cc), y => subcarry x, y, cc + auto C = dyn_cast(LHS.getOperand(1)); + if (!C || C->getZExtValue() != 0) + return SDValue(); + SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) }; + return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args); + } + return SDValue(); +} + +SDValue SITargetLowering::performAddCarrySubCarryCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + + if (N->getValueType(0) != MVT::i32) + return SDValue(); + + auto C = dyn_cast(N->getOperand(1)); + if (!C || C->getZExtValue() != 0) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue LHS = N->getOperand(0); + + // addcarry (add x, y), 0, cc => addcarry x, y, cc + // subcarry (sub x, y), 0, cc => subcarry x, y, cc + unsigned LHSOpc = LHS.getOpcode(); + unsigned Opc = N->getOpcode(); + if ((LHSOpc == ISD::ADD && Opc == ISD::ADDCARRY) || + (LHSOpc == ISD::SUB && Opc == ISD::SUBCARRY)) { + SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2) }; + return DAG.getNode(Opc, SDLoc(N), N->getVTList(), Args); + } + return SDValue(); +} + SDValue SITargetLowering::performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) @@ -4743,6 +5223,35 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); EVT VT = LHS.getValueType(); + ISD::CondCode CC = cast(N->getOperand(2))->get(); + + auto CRHS = dyn_cast(RHS); + if (!CRHS) { + CRHS = dyn_cast(LHS); + if (CRHS) { + std::swap(LHS, RHS); + CC = getSetCCSwappedOperands(CC); + } + } + + if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND && + isBoolSGPR(LHS.getOperand(0))) { + // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1 + // setcc (sext from i1 cc), -1, eq|sle|uge) => cc + // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1 + // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc + if ((CRHS->isAllOnesValue() && + (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) || + (CRHS->isNullValue() && + (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE))) + return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), + DAG.getConstant(-1, SL, MVT::i1)); + if ((CRHS->isAllOnesValue() && + (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) || + (CRHS->isNullValue() && + (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT))) + return LHS.getOperand(0); + } if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() && VT != MVT::f16)) @@ -4750,7 +5259,6 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, // Match isinf pattern // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity)) - ISD::CondCode CC = cast(N->getOperand(2))->get(); if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) { const ConstantFPSDNode *CRHS = dyn_cast(RHS); if (!CRHS) @@ -4816,6 +5324,13 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); + case ISD::ADD: + return performAddCombine(N, DCI); + case ISD::SUB: + return performSubCombine(N, DCI); + case ISD::ADDCARRY: + case ISD::SUBCARRY: + return performAddCarrySubCarryCombine(N, DCI); case ISD::FADD: return performFAddCombine(N, DCI); case ISD::FSUB: @@ -5232,15 +5747,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL, return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops); } -SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const { - SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT); - - return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()), - cast(VReg)->getReg(), VT); -} - //===----------------------------------------------------------------------===// // SI Inline Assembly Support //===----------------------------------------------------------------------===// @@ -5323,3 +5829,44 @@ SITargetLowering::getConstraintType(StringRef Constraint) const { } return TargetLowering::getConstraintType(Constraint); } + +// Figure out which registers should be reserved for stack access. Only after +// the function is legalized do we know all of the non-spill stack objects or if +// calls are present. +void SITargetLowering::finalizeLowering(MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *Info = MF.getInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SISubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + if (Info->isEntryFunction()) { + // Callable functions have fixed registers used for stack access. + reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info); + } + + // We have to assume the SP is needed in case there are calls in the function + // during lowering. Calls are only detected after the function is + // lowered. We're about to reserve registers, so don't bother using it if we + // aren't really going to use it. + bool NeedSP = !Info->isEntryFunction() || + MFI.hasVarSizedObjects() || + MFI.hasCalls(); + + if (NeedSP) { + unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF); + Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg); + + assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg()); + assert(!TRI->isSubRegister(Info->getScratchRSrcReg(), + Info->getStackPtrOffsetReg())); + MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg()); + } + + MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg()); + MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg()); + MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG, + Info->getScratchWaveOffsetReg()); + + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.h b/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.h index 046e677756d12..e6bb3d6cd4191 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIISelLowering.h @@ -28,6 +28,10 @@ class SITargetLowering final : public AMDGPUTargetLowering { uint64_t Offset, bool Signed, const ISD::InputArg *Arg = nullptr) const; + SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, + const SDLoc &SL, SDValue Chain, + const ISD::InputArg &Arg) const; + SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const override; SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, @@ -104,6 +108,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { unsigned getFusedOpcode(const SelectionDAG &DAG, const SDNode *N0, const SDNode *N1) const; + SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -146,6 +153,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override; @@ -177,7 +187,12 @@ class SITargetLowering final : public AMDGPUTargetLowering { const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; @@ -205,8 +220,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override; - SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const override; SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, @@ -219,6 +232,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { ConstraintType getConstraintType(StringRef Constraint) const override; SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const; + + void finalizeLowering(MachineFunction &MF) const override; }; } // End namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index b5e3ce3dfe3ed..0f009a48754ad 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -229,7 +229,7 @@ class BlockWaitcntBrackets { MachineInstr &MI); BlockWaitcntBrackets() - : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false), + : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false), LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { @@ -826,7 +826,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::RETURN || - MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { + MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || + MI.getOpcode() == AMDGPU::S_SETPC_B64_return) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) { @@ -1008,7 +1009,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( // occurs before the instruction. Doing it here prevents any additional // S_WAITCNTs from being emitted if the instruction was marked as // requiring a WAITCNT beforehand. - if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) { + if (MI.getOpcode() == AMDGPU::S_BARRIER && + !ST->hasAutoWaitcntBeforeBarrier()) { EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); EmitSwaitcnt |= ScoreBrackets->updateByWait( @@ -1149,8 +1151,10 @@ void SIInsertWaitcnts::updateEventWaitCntAfter( // instruction, update the upper-bound of the appropriate counter's // bracket and the destination operand scores. // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere. - if (TII->isDS(Inst) && (Inst.mayLoad() || Inst.mayStore())) { - if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) { + uint64_t TSFlags = Inst.getDesc().TSFlags; + if (TII->isDS(Inst) && (TSFlags & SIInstrFlags::LGKM_CNT)) { + if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds) && + TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) { ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst); ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst); } else { @@ -1183,7 +1187,7 @@ void SIInsertWaitcnts::updateEventWaitCntAfter( Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() && - (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()))) { + (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst); } } else if (TII->isSMRD(Inst)) { @@ -1715,6 +1719,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); MLI = &getAnalysis(); IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits()); + const SIMachineFunctionInfo *MFI = MF.getInfo(); AMDGPUASI = ST->getAMDGPUAS(); HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV); @@ -1859,5 +1864,19 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { } } + if (!MFI->isEntryFunction()) { + // Wait for any outstanding memory operations that the input registers may + // depend on. We can't track them and it's better to to the wait after the + // costly call sequence. + + // TODO: Could insert earlier and schedule more liberally with operations + // that only use caller preserved registers. + MachineBasicBlock &EntryBB = MF.front(); + BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); + + Modified = true; + } + return Modified; } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaits.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaits.cpp index 9f32ecfa52ff1..bc86515d8b1fe 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -630,7 +630,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // but we also want to wait for any other outstanding transfers before // signalling other hardware blocks if ((I->getOpcode() == AMDGPU::S_BARRIER && - ST->needWaitcntBeforeBarrier()) || + !ST->hasAutoWaitcntBeforeBarrier()) || I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrFormats.td b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrFormats.td index b83a1fe187eb7..02c9b4b1f0eeb 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrFormats.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrFormats.td @@ -228,10 +228,10 @@ class EXPe : Enc64 { bits<1> compr; bits<1> done; bits<1> vm; - bits<8> vsrc0; - bits<8> vsrc1; - bits<8> vsrc2; - bits<8> vsrc3; + bits<8> src0; + bits<8> src1; + bits<8> src2; + bits<8> src3; let Inst{3-0} = en; let Inst{9-4} = tgt; @@ -239,10 +239,10 @@ class EXPe : Enc64 { let Inst{11} = done; let Inst{12} = vm; let Inst{31-26} = 0x3e; - let Inst{39-32} = vsrc0; - let Inst{47-40} = vsrc1; - let Inst{55-48} = vsrc2; - let Inst{63-56} = vsrc3; + let Inst{39-32} = src0; + let Inst{47-40} = src1; + let Inst{55-48} = src2; + let Inst{63-56} = src3; } let Uses = [EXEC] in { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.cpp index 92e452a3d6a06..a7e0feb10b9f1 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -20,10 +20,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" @@ -468,13 +468,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); - if (Idx == SubIndices.size() - 1) - Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); - if (Idx == 0) Builder.addReg(DestReg, RegState::Define | RegState::Implicit); - Builder.addReg(SrcReg, RegState::Implicit); + bool UseKill = KillSrc && Idx == SubIndices.size() - 1; + Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); } } @@ -496,6 +494,188 @@ int SIInstrInfo::commuteOpcode(unsigned Opcode) const { return Opcode; } +void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DestReg, + int64_t Value) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); + if (RegClass == &AMDGPU::SReg_32RegClass || + RegClass == &AMDGPU::SGPR_32RegClass || + RegClass == &AMDGPU::SReg_32_XM0RegClass || + RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) + .addImm(Value); + return; + } + + if (RegClass == &AMDGPU::SReg_64RegClass || + RegClass == &AMDGPU::SGPR_64RegClass || + RegClass == &AMDGPU::SReg_64_XEXECRegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) + .addImm(Value); + return; + } + + if (RegClass == &AMDGPU::VGPR_32RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addImm(Value); + return; + } + if (RegClass == &AMDGPU::VReg_64RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) + .addImm(Value); + return; + } + + unsigned EltSize = 4; + unsigned Opcode = AMDGPU::V_MOV_B32_e32; + if (RI.isSGPRClass(RegClass)) { + if (RI.getRegSizeInBits(*RegClass) > 32) { + Opcode = AMDGPU::S_MOV_B64; + EltSize = 8; + } else { + Opcode = AMDGPU::S_MOV_B32; + EltSize = 4; + } + } + + ArrayRef SubIndices = RI.getRegSplitParts(RegClass, EltSize); + for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { + int64_t IdxValue = Idx == 0 ? Value : 0; + + MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, + get(Opcode), RI.getSubReg(DestReg, Idx)); + Builder.addImm(IdxValue); + } +} + +const TargetRegisterClass * +SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { + return &AMDGPU::VGPR_32RegClass; +} + +void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DstReg, + ArrayRef Cond, + unsigned TrueReg, + unsigned FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass && + "Not a VGPR32 reg"); + + if (Cond.size() == 1) { + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .add(Cond[0]); + } else if (Cond.size() == 2) { + assert(Cond[0].isImm() && "Cond[0] is not an immediate"); + switch (Cond[0].getImm()) { + case SIInstrInfo::SCC_TRUE: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(-1) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + break; + } + case SIInstrInfo::SCC_FALSE: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(0) + .addImm(-1); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + break; + } + case SIInstrInfo::VCCNZ: { + MachineOperand RegOp = Cond[1]; + RegOp.setImplicit(false); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .add(RegOp); + break; + } + case SIInstrInfo::VCCZ: { + MachineOperand RegOp = Cond[1]; + RegOp.setImplicit(false); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(TrueReg) + .addReg(FalseReg) + .add(RegOp); + break; + } + case SIInstrInfo::EXECNZ: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(-1) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + break; + } + case SIInstrInfo::EXECZ: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(0) + .addImm(-1); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + llvm_unreachable("Unhandled branch predicate EXECZ"); + break; + } + default: + llvm_unreachable("invalid branch predicate"); + } + } else { + llvm_unreachable("Can only handle Cond size 1 or 2"); + } +} + +unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + unsigned SrcReg, int Value) const { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) + .addImm(Value) + .addReg(SrcReg); + + return Reg; +} + +unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + unsigned SrcReg, int Value) const { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) + .addImm(Value) + .addReg(SrcReg); + + return Reg; +} + unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { if (RI.getRegSizeInBits(*DstRC) == 32) { @@ -583,14 +763,14 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) - .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit); + .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); // Add the scratch resource registers as implicit uses because we may end up // needing them, and need to ensure that the reserved registers are // correctly handled. if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine); + Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); } return; @@ -614,7 +794,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addReg(MFI->getFrameOffsetReg()) // scratch_offset .addImm(0) // offset .addMemOperand(MMO); } @@ -687,11 +867,11 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) - .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit); + .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine); + Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); } return; @@ -710,10 +890,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex) // vaddr - .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset - .addImm(0) // offset + .addFrameIndex(FrameIndex) // vaddr + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getFrameOffsetReg()) // scratch_offset + .addImm(0) // offset .addMemOperand(MMO); } @@ -834,6 +1014,20 @@ void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, insertWaitStates(MBB, MI, 1); } +void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { + auto MF = MBB.getParent(); + SIMachineFunctionInfo *Info = MF->getInfo(); + + assert(Info->isEntryFunction()); + + if (MBB.succ_empty()) { + bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); + if (HasNoTerminator) + BuildMI(MBB, MBB.end(), DebugLoc(), + get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG)); + } +} + unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return 1; // FIXME: Do wait states equal cycles? @@ -1241,14 +1435,20 @@ bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, return false; } - BranchPredicate Pred = getBranchPredicate(I->getOpcode()); - if (Pred == INVALID_BR) - return true; + MachineBasicBlock *CondBB = nullptr; - MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); - Cond.push_back(MachineOperand::CreateImm(Pred)); - Cond.push_back(I->getOperand(1)); // Save the branch register. + if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + CondBB = I->getOperand(1).getMBB(); + Cond.push_back(I->getOperand(0)); + } else { + BranchPredicate Pred = getBranchPredicate(I->getOpcode()); + if (Pred == INVALID_BR) + return true; + CondBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Pred)); + Cond.push_back(I->getOperand(1)); // Save the branch register. + } ++I; if (I == MBB.end()) { @@ -1351,6 +1551,13 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, return 1; } + if(Cond.size() == 1 && Cond[0].isReg()) { + BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) + .add(Cond[0]) + .addMBB(TBB); + return 1; + } + assert(TBB && Cond[0].isImm()); unsigned Opcode @@ -1390,9 +1597,16 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, bool SIInstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { - assert(Cond.size() == 2); - Cond[0].setImm(-Cond[0].getImm()); - return false; + if (Cond.size() != 2) { + return true; + } + + if (Cond[0].isImm()) { + Cond[0].setImm(-Cond[0].getImm()); + return false; + } + + return true; } bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, @@ -1808,10 +2022,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, return nullptr; case AMDGPU::V_MAC_F16_e64: IsF16 = true; + LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e64: break; case AMDGPU::V_MAC_F16_e32: IsF16 = true; + LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e32: { int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); @@ -1894,7 +2110,9 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const { - if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET) + if (!MO.isImm() || + OperandType < AMDGPU::OPERAND_SRC_FIRST || + OperandType > AMDGPU::OPERAND_SRC_LAST) return false; // MachineOperand provides no way to tell the true operand size, since it only @@ -2115,7 +2333,12 @@ static bool isSubRegOf(const SIRegisterInfo &TRI, bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { uint16_t Opcode = MI.getOpcode(); - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) + return true; + + const MachineFunction *MF = MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); @@ -2214,8 +2437,77 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + // Verify SDWA + if (isSDWA(MI)) { + + if (!ST.hasSDWA()) { + ErrInfo = "SDWA is not supported on this target"; + return false; + } + + int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); + + const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; + + for (int OpIdx: OpIndicies) { + if (OpIdx == -1) + continue; + const MachineOperand &MO = MI.getOperand(OpIdx); + + if (!ST.hasSDWAScalar()) { + // Only VGPRS on VI + if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { + ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; + return false; + } + } else { + // No immediates on GFX9 + if (!MO.isReg()) { + ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9"; + return false; + } + } + } + + if (!ST.hasSDWAOmod()) { + // No omod allowed on VI + const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod != nullptr && + (!OMod->isImm() || OMod->getImm() != 0)) { + ErrInfo = "OMod not allowed in SDWA instructions on VI"; + return false; + } + } + + uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); + if (isVOPC(BasicOpcode)) { + if (!ST.hasSDWASdst() && DstIdx != -1) { + // Only vcc allowed as dst on VI for VOPC + const MachineOperand &Dst = MI.getOperand(DstIdx); + if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { + ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; + return false; + } + } else if (!ST.hasSDWAOutModsVOPC()) { + // No clamp allowed on GFX9 for VOPC + const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); + if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { + ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; + return false; + } + + // No omod allowed on GFX9 for VOPC + const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { + ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; + return false; + } + } + } + } + // Verify VOP* - if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) { + if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) { // Only look at the true operands. Only a real operand can use the constant // bus, and we don't want to check pseudo-operands like the source modifier // flags. @@ -2345,6 +2637,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (isFLAT(MI) && !MF->getSubtarget().hasFlatInstOffsets()) { + const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); + if (Offset->getImm() != 0) { + ErrInfo = "subtarget does not support offsets in flat instructions"; + return false; + } + } + return true; } @@ -3108,8 +3408,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { } void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { - SmallVector Worklist; - Worklist.push_back(&TopInst); + SetVectorType Worklist; + Worklist.insert(&TopInst); while (!Worklist.empty()) { MachineInstr &Inst = *Worklist.pop_back_val(); @@ -3310,7 +3610,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } } -void SIInstrInfo::lowerScalarAbs(SmallVectorImpl &Worklist, +void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3335,7 +3635,7 @@ void SIInstrInfo::lowerScalarAbs(SmallVectorImpl &Worklist, } void SIInstrInfo::splitScalar64BitUnaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3386,7 +3686,7 @@ void SIInstrInfo::splitScalar64BitUnaryOp( } void SIInstrInfo::splitScalar64BitBinaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3453,7 +3753,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp( } void SIInstrInfo::splitScalar64BitBCNT( - SmallVectorImpl &Worklist, MachineInstr &Inst) const { + SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3489,7 +3789,7 @@ void SIInstrInfo::splitScalar64BitBCNT( addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } -void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, +void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3553,12 +3853,12 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, void SIInstrInfo::addUsersToMoveToVALUWorklist( unsigned DstReg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const { + SetVectorType &Worklist) const { for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), E = MRI.use_end(); I != E;) { MachineInstr &UseMI = *I->getParent(); if (!canReadVGPR(UseMI, I.getOperandNo())) { - Worklist.push_back(&UseMI); + Worklist.insert(&UseMI); do { ++I; @@ -3569,7 +3869,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( } } -void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, +void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -3632,7 +3932,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, } void SIInstrInfo::addSCCDefUsersToVALUWorklist( - MachineInstr &SCCDefInst, SmallVectorImpl &Worklist) const { + MachineInstr &SCCDefInst, SetVectorType &Worklist) const { // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineInstr &MI : @@ -3643,7 +3943,7 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist( return; if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1) - Worklist.push_back(&MI); + Worklist.insert(&MI); } } @@ -3920,6 +4220,82 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { return false; } +bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { + return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; +} + +void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const { + MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); + assert(TI != IfEntry->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = IfEntry->getParent(); + MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); + + if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstr *SIIF = + BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) + .add(Branch->getOperand(0)) + .add(Branch->getOperand(1)); + MachineInstr *SIEND = + BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) + .addReg(DstReg); + + IfEntry->erase(TI); + IfEntry->insert(IfEntry->end(), SIIF); + IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); + } +} + +void SIInstrInfo::convertNonUniformLoopRegion( + MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { + MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); + // We expect 2 terminators, one conditional and one unconditional. + assert(TI != LoopEnd->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = LoopEnd->getParent(); + MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); + + if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstrBuilder HeaderPHIBuilder = + BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); + for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), + E = LoopEntry->pred_end(); + PI != E; ++PI) { + if (*PI == LoopEnd) { + HeaderPHIBuilder.addReg(BackEdgeReg); + } else { + MachineBasicBlock *PMBB = *PI; + unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), + ZeroReg, 0); + HeaderPHIBuilder.addReg(ZeroReg); + } + HeaderPHIBuilder.addMBB(*PI); + } + MachineInstr *HeaderPhi = HeaderPHIBuilder; + MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), + get(AMDGPU::SI_IF_BREAK), BackEdgeReg) + .addReg(DstReg) + .add(Branch->getOperand(0)); + MachineInstr *SILOOP = + BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) + .addReg(BackEdgeReg) + .addMBB(LoopEntry); + + LoopEntry->insert(LoopEntry->begin(), HeaderPhi); + LoopEnd->erase(TI); + LoopEnd->insert(LoopEnd->end(), SIIFBREAK); + LoopEnd->insert(LoopEnd->end(), SILOOP); + } +} + ArrayRef> SIInstrInfo::getSerializableTargetIndices() const { static const std::pair TargetIndices[] = { @@ -3946,6 +4322,24 @@ SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const return new GCNHazardRecognizer(MF); } +std::pair +SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF & MO_MASK, TF & ~MO_MASK); +} + +ArrayRef> +SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + static const std::pair TargetFlags[] = { + { MO_GOTPCREL, "amdgpu-gotprel" }, + { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, + { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, + { MO_REL32_LO, "amdgpu-rel32-lo" }, + { MO_REL32_HI, "amdgpu-rel32-hi" } + }; + + return makeArrayRef(TargetFlags); +} + bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.h index 03a5ef74b1797..3dd5bc89e6c77 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.h @@ -19,6 +19,7 @@ #include "AMDGPUInstrInfo.h" #include "SIDefines.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/SetVector.h" namespace llvm { @@ -38,6 +39,8 @@ class SIInstrInfo final : public AMDGPUInstrInfo { EXECZ = 3 }; + typedef SmallSetVector SetVectorType; + static unsigned getBranchOpcode(BranchPredicate Cond); static BranchPredicate getBranchPredicate(unsigned Opcode); @@ -56,30 +59,30 @@ class SIInstrInfo final : public AMDGPUInstrInfo { void swapOperands(MachineInstr &Inst) const; - void lowerScalarAbs(SmallVectorImpl &Worklist, + void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitUnaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitUnaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBinaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBCNT(SmallVectorImpl &Worklist, + void splitScalar64BitBCNT(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitBFE(SmallVectorImpl &Worklist, + void splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const; - void movePackToVALU(SmallVectorImpl &Worklist, + void movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const; void addUsersToMoveToVALUWorklist( unsigned Reg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const; + SetVectorType &Worklist) const; void addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, - SmallVectorImpl &Worklist) const; + SetVectorType &Worklist) const; const TargetRegisterClass * getDestEquivalentVGPRClass(const MachineInstr &Inst) const; @@ -100,6 +103,8 @@ class SIInstrInfo final : public AMDGPUInstrInfo { public: enum TargetOperandFlags { + MO_MASK = 0x7, + MO_NONE = 0, // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. MO_GOTPCREL = 1, @@ -143,6 +148,23 @@ class SIInstrInfo final : public AMDGPUInstrInfo { RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const; + void materializeImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, + unsigned DestReg, + int64_t Value) const; + + const TargetRegisterClass *getPreferredSelectRegClass( + unsigned Size) const; + + unsigned insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned SrcReg, int Value) const; + + unsigned insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned SrcReg, int Value) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -193,7 +215,7 @@ class SIInstrInfo final : public AMDGPUInstrInfo { bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, - bool AllowModify) const override; + bool AllowModify = false) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; @@ -218,6 +240,11 @@ class SIInstrInfo final : public AMDGPUInstrInfo { unsigned DstReg, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const override; + void insertVectorSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef Cond, + unsigned TrueReg, unsigned FalseReg) const; + bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; @@ -705,6 +732,7 @@ class SIInstrInfo final : public AMDGPUInstrInfo { void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + void insertReturn(MachineBasicBlock &MBB) const; /// \brief Return the number of wait states that result from executing this /// instruction. unsigned getNumWaitStates(const MachineInstr &MI) const; @@ -750,9 +778,23 @@ class SIInstrInfo final : public AMDGPUInstrInfo { bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; + bool isNonUniformBranchInstr(MachineInstr &Instr) const; + + void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const; + + void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const; + + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + ArrayRef> getSerializableTargetIndices() const override; + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; @@ -782,6 +824,9 @@ namespace AMDGPU { LLVM_READONLY int getSDWAOp(uint16_t Opcode); + LLVM_READONLY + int getBasicFromSDWAOp(uint16_t Opcode); + LLVM_READONLY int getCommuteRev(uint16_t Opcode); diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.td b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.td index 7b052844f177b..088173680fa89 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstrInfo.td @@ -20,6 +20,8 @@ def SIEncodingFamily { int NONE = -1; int SI = 0; int VI = 1; + int SDWA = 2; + int SDWA9 = 3; } //===----------------------------------------------------------------------===// @@ -39,25 +41,41 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] >; -def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", - SDTypeProfile<0, 13, - [SDTCisVT<0, v4i32>, // rsrc(SGPR) - SDTCisVT<1, iAny>, // vdata(VGPR) - SDTCisVT<2, i32>, // num_channels(imm) - SDTCisVT<3, i32>, // vaddr(VGPR) +def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", + SDTypeProfile<1, 9, + [ // vdata + SDTCisVT<1, v4i32>, // rsrc + SDTCisVT<2, i32>, // vindex(VGPR) + SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) - SDTCisVT<5, i32>, // inst_offset(imm) + SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // dfmt(imm) SDTCisVT<7, i32>, // nfmt(imm) - SDTCisVT<8, i32>, // offen(imm) - SDTCisVT<9, i32>, // idxen(imm) - SDTCisVT<10, i32>, // glc(imm) - SDTCisVT<11, i32>, // slc(imm) - SDTCisVT<12, i32> // tfe(imm) + SDTCisVT<8, i32>, // glc(imm) + SDTCisVT<9, i32> // slc(imm) ]>, - [SDNPMayStore, SDNPMemOperand, SDNPHasChain] + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] >; +def SDTtbuffer_store : SDTypeProfile<0, 10, + [ // vdata + SDTCisVT<1, v4i32>, // rsrc + SDTCisVT<2, i32>, // vindex(VGPR) + SDTCisVT<3, i32>, // voffset(VGPR) + SDTCisVT<4, i32>, // soffset(SGPR) + SDTCisVT<5, i32>, // offset(imm) + SDTCisVT<6, i32>, // dfmt(imm) + SDTCisVT<7, i32>, // nfmt(imm) + SDTCisVT<8, i32>, // glc(imm) + SDTCisVT<9, i32> // slc(imm) + ]>; + +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; +def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3", + SDTtbuffer_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; + def SDTBufferLoad : SDTypeProfile<1, 5, [ // vdata SDTCisVT<1, v4i32>, // rsrc @@ -318,6 +336,10 @@ def NegSubInlineConst16 : ImmLeaf= -64; }], NegateImm>; +def ShiftAmt32Imm : PatLeaf <(imm), [{ + return N->getZExtValue() < 32; +}]>; + //===----------------------------------------------------------------------===// // Custom Operands //===----------------------------------------------------------------------===// @@ -383,6 +405,14 @@ def SendMsgMatchClass : AsmOperandClass { let RenderMethod = "addImmOperands"; } +def SwizzleMatchClass : AsmOperandClass { + let Name = "Swizzle"; + let PredicateMethod = "isSwizzle"; + let ParserMethod = "parseSwizzleOp"; + let RenderMethod = "addImmOperands"; + let IsOptional = 1; +} + def ExpTgtMatchClass : AsmOperandClass { let Name = "ExpTgt"; let PredicateMethod = "isExpTgt"; @@ -395,6 +425,11 @@ def SendMsgImm : Operand { let ParserMatchClass = SendMsgMatchClass; } +def SwizzleImm : Operand { + let PrintMethod = "printSwizzle"; + let ParserMatchClass = SwizzleMatchClass; +} + def SWaitMatchClass : AsmOperandClass { let Name = "SWaitCnt"; let RenderMethod = "addImmOperands"; @@ -439,6 +474,27 @@ def ExpSrc3 : RegisterOperand { let ParserMatchClass = VReg32OrOffClass; } +class SDWASrc : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA_SRC"; + let EncoderMethod = "getSDWASrcEncoding"; +} + +def SDWASrc32 : SDWASrc { + let DecoderMethod = "decodeSDWASrc32"; +} + +def SDWASrc16 : SDWASrc { + let DecoderMethod = "decodeSDWASrc16"; +} + +def SDWAVopcDst : VOPDstOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA_VOPC_DST"; + let EncoderMethod = "getSDWAVopcDstEncoding"; + let DecoderMethod = "decodeSDWAVopcDst"; +} + class NamedMatchClass : AsmOperandClass { let Name = "Imm"#CName; let PredicateMethod = "is"#CName; @@ -458,11 +514,21 @@ class NamedOperandU8 : Operand { let ParserMatchClass = MatchClass; } +class NamedOperandU12 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU16 : Operand { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; } +class NamedOperandS13 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU32 : Operand { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; @@ -480,6 +546,8 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; +def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>; +def offset_s13 : NamedOperandS13<"OffsetS13", NamedMatchClass<"OffsetS13">>; def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; @@ -499,6 +567,9 @@ def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; +def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>; +def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>; + def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; @@ -588,6 +659,16 @@ class IntInputMods : InputMods def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; +def FPRegSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWARegWithFPInputMods"; + let ParserMethod = "parseRegWithFPInputMods"; + let PredicateMethod = "isSDWARegKind"; +} + +def FPRegSDWAInputMods : InputMods { + let PrintMethod = "printOperandAndFPInputMods"; +} + def FPVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; @@ -598,6 +679,17 @@ def FPVRegInputMods : InputMods { let PrintMethod = "printOperandAndFPInputMods"; } + +def IntRegSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWARegWithIntInputMods"; + let ParserMethod = "parseRegWithIntInputMods"; + let PredicateMethod = "isSDWARegKind"; +} + +def IntRegSDWAInputMods : InputMods { + let PrintMethod = "printOperandAndIntInputMods"; +} + def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; let ParserMethod = "parseRegWithIntInputMods"; @@ -783,6 +875,14 @@ class getVALUDstForVT { VOPDstOperand)))); // else VT == i1 } +// Returns the register class to use for the destination of VOP[12C] +// instructions with SDWA extension +class getSDWADstForVT { + RegisterOperand ret = !if(!eq(VT.Size, 1), + SDWAVopcDst, // VOPC + VOPDstOperand); // VOP1/2 32-bit dst +} + // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT { @@ -823,6 +923,9 @@ class getVregSrcForVT { !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } +class getSDWASrcForVT { + RegisterOperand ret = !if(!eq(VT.Size, 16), SDWASrc16, SDWASrc32); +} // Returns the register class to use for sources of VOP3 instructions for the // given VT. @@ -917,7 +1020,7 @@ class getSrcMod { ); } -// Return type of input modifiers operand specified input operand for SDWA/DPP +// Return type of input modifiers operand specified input operand for DPP class getSrcModExt { bit isFP = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, @@ -926,6 +1029,15 @@ class getSrcModExt { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +// Return type of input modifiers operand specified input operand for SDWA +class getSrcModSDWA { + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + 0))); + Operand ret = !if(isFP, FPRegSDWAInputMods, IntRegSDWAInputMods); +} + // Returns the input arguments for VOP[12C] instructions for the given SrcVT. class getIns32 { dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 @@ -1054,37 +1166,67 @@ class getInsDPP { dag ret = !if(!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) (ins), !if(!eq(NumSrcArgs, 1), - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel), + // VOP1 + !if(!eq(HasSDWAOMod, 0), + // VOP1_SDWA without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel), + // VOP1_SDWA with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel)), !if(!eq(NumSrcArgs, 2), !if(!eq(DstVT.Size, 1), - // VOPC_SDWA with modifiers + // VOPC_SDWA (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP2_SDWA or VOPC_SDWA with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel)), + // VOP2_SDWA + !if(!eq(HasSDWAOMod, 0), + // VOP2_SDWA without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel))), (ins)/* endif */))); } // Outs for DPP and SDWA -class getOutsExt { +class getOutsExt { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions - (outs DstRCDPP:$vdst)), + (outs DstRCExt:$vdst)), + (outs)); // V_NOP +} + +// Outs for SDWA +class getOutsSDWA { + dag ret = !if(HasDst, + !if(!eq(DstVT.Size, 1), + (outs DstRCSDWA:$sdst), + (outs DstRCSDWA:$vdst)), (outs)); // V_NOP } @@ -1153,8 +1295,7 @@ class getAsmDPP { +class getAsmSDWA { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), " vcc", // use vcc token as dst for VOPC instructioins @@ -1182,6 +1323,35 @@ class getAsmSDWA { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", // VOPC + "$vdst"), // VOP1/2 + ""); + string src0 = "$src0_modifiers"; + string src1 = "$src1_modifiers"; + string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod"); + string args = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + ", "#src0, + ", "#src0#", "#src1 + ) + ); + string sdwa = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + out_mods#" $dst_sel $dst_unused $src0_sel", + !if(!eq(DstVT.Size, 1), + " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC + out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" + ) + ) + ); + string ret = dst#args#sdwa; +} + + // Function that checks if instruction supports DPP and SDWA class getHasExt { @@ -1218,7 +1388,7 @@ class VOPProfile _ArgVT> { field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT.ret; field RegisterOperand DstRCDPP = getVALUDstForVT.ret; - field RegisterOperand DstRCSDWA = getVALUDstForVT.ret; + field RegisterOperand DstRCSDWA = getSDWADstForVT.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterClass Src1RC32 = getVregSrcForVT.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; @@ -1226,15 +1396,15 @@ class VOPProfile _ArgVT> { field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; field RegisterClass Src0DPP = getVregSrcForVT.ret; field RegisterClass Src1DPP = getVregSrcForVT.ret; - field RegisterClass Src0SDWA = getVregSrcForVT.ret; - field RegisterClass Src1SDWA = getVregSrcForVT.ret; + field RegisterOperand Src0SDWA = getSDWASrcForVT.ret; + field RegisterOperand Src1SDWA = getSDWASrcForVT.ret; field Operand Src0Mod = getSrcMod.ret; field Operand Src1Mod = getSrcMod.ret; field Operand Src2Mod = getSrcMod.ret; field Operand Src0ModDPP = getSrcModExt.ret; field Operand Src1ModDPP = getSrcModExt.ret; - field Operand Src0ModSDWA = getSrcModExt.ret; - field Operand Src1ModSDWA = getSrcModExt.ret; + field Operand Src0ModSDWA = getSrcModSDWA.ret; + field Operand Src1ModSDWA = getSrcModSDWA.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); @@ -1261,14 +1431,16 @@ class VOPProfile _ArgVT> { field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0); field bit HasClamp = HasModifiers; - field bit HasSDWAClamp = HasSrc0; + field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd.ret, HasClamp>.ret; field bit IsPacked = isPackedType.ret; field bit HasOpSel = IsPacked; - field bit HasOMod = !if(HasOpSel, 0, HasModifiers); + field bit HasOMod = !if(HasOpSel, 0, isFloatType.ret); + field bit HasSDWAOMod = isFloatType.ret; field bit HasExt = getHasExt.ret; + field bit HasSDWA9 = HasExt; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -1281,7 +1453,7 @@ class VOPProfile _ArgVT> { field dag Outs32 = Outs; field dag Outs64 = Outs; field dag OutsDPP = getOutsExt.ret; - field dag OutsSDWA = getOutsExt.ret; + field dag OutsSDWA = getOutsSDWA.ret; field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64 _ArgVT> { field dag InsDPP = getInsDPP.ret; field dag InsSDWA = getInsSDWA.ret; + field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; field string AsmVOP3P = getAsmVOP3P.ret; field string AsmDPP = getAsmDPP.ret; - field string AsmSDWA = getAsmSDWA.ret; + field string AsmSDWA = getAsmSDWA.ret; + field string AsmSDWA9 = getAsmSDWA9.ret; } class VOP_NO_EXT : VOPProfile { let HasExt = 0; + let HasSDWA9 = 0; } def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; @@ -1327,6 +1502,8 @@ def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; +def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; + def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; @@ -1446,6 +1623,15 @@ def getSDWAOp : InstrMapping { let ValueCols = [["SDWA"]]; } +// Maps SDWA instructions to their ordinary counterparts +def getBasicFromSDWAOp : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["AsmVariantName"]; + let KeyCol = ["SDWA"]; + let ValueCols = [["Default"]]; +} + def getMaskedMIMGOp : InstrMapping { let FilterClass = "MIMG_Mask"; let RowFields = ["Op"]; @@ -1478,7 +1664,9 @@ def getMCOpcodeGen : InstrMapping { let ColFields = ["Subtarget"]; let KeyCol = [!cast(SIEncodingFamily.NONE)]; let ValueCols = [[!cast(SIEncodingFamily.SI)], - [!cast(SIEncodingFamily.VI)]]; + [!cast(SIEncodingFamily.VI)], + [!cast(SIEncodingFamily.SDWA)], + [!cast(SIEncodingFamily.SDWA9)]]; } // Get equivalent SOPK instruction. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstructions.td index 7ccb54f54e349..ba69e42d9125f 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIInstructions.td @@ -174,6 +174,13 @@ def SI_MASK_BRANCH : VPseudoInstSI < let isTerminator = 1 in { + def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI < + (outs), + (ins SReg_64:$vcc, brtarget:$target), + [(brcond i1:$vcc, bb:$target)]> { + let Size = 12; +} + def SI_IF: CFPseudoInstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> { @@ -922,6 +929,14 @@ def : UMad24Pat; defm : BFIPatterns ; def : ROTRPattern ; +def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))), + (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), + (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; + +def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), + (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), + (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; + /********** ====================== **********/ /********** Indirect addressing **********/ /********** ====================== **********/ @@ -1045,7 +1060,7 @@ def : Pat < class FPToI1Pat : Pat < (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), - (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) + (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE)) >; def : FPToI1Pat; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 933a16646746d..c6ad61a325ccd 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -97,9 +97,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass { public: static char ID; - SILoadStoreOptimizer() : MachineFunctionPass(ID) {} - - SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) { + SILoadStoreOptimizer() : MachineFunctionPass(ID) { initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); } @@ -129,8 +127,8 @@ char SILoadStoreOptimizer::ID = 0; char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID; -FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) { - return new SILoadStoreOptimizer(TM); +FunctionPass *llvm::createSILoadStoreOptimizerPass() { + return new SILoadStoreOptimizer(); } static void moveInstsAfter(MachineBasicBlock::iterator I, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SILowerControlFlow.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SILowerControlFlow.cpp index 35d3a93d8710d..5f1c7f1fc42f1 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -60,8 +60,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SILowerI1Copies.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SILowerI1Copies.cpp index 3680e02da5769..ba616ada0c9ce 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index adebb8c4a1c5b..a7c8166ff6d27 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -23,10 +23,10 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), - ScratchRSrcReg(AMDGPU::NoRegister), - ScratchWaveOffsetReg(AMDGPU::NoRegister), - FrameOffsetReg(AMDGPU::NoRegister), - StackPtrOffsetReg(AMDGPU::NoRegister), + ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG), + ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG), + FrameOffsetReg(AMDGPU::FP_REG), + StackPtrOffsetReg(AMDGPU::SP_REG), PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), DispatchPtrUserSGPR(AMDGPU::NoRegister), QueuePtrUserSGPR(AMDGPU::NoRegister), @@ -42,6 +42,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), + WorkItemIDXVGPR(AMDGPU::NoRegister), + WorkItemIDYVGPR(AMDGPU::NoRegister), + WorkItemIDZVGPR(AMDGPU::NoRegister), PSInputAddr(0), PSInputEnable(0), ReturnsVoid(true), @@ -74,38 +77,54 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false), - PrivateMemoryInputPtr(false) { + ImplicitBufferPtr(false) { const SISubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); WavesPerEU = ST.getWavesPerEU(*F); - // Non-entry functions have no special inputs for now. - // TODO: Return early for non-entry CCs. + if (!isEntryFunction()) { + // Non-entry functions have no special inputs for now, other registers + // required for scratch access. + ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; + ScratchWaveOffsetReg = AMDGPU::SGPR4; + FrameOffsetReg = AMDGPU::SGPR5; + StackPtrOffsetReg = AMDGPU::SGPR32; - CallingConv::ID CC = F->getCallingConv(); - if (CC == CallingConv::AMDGPU_PS) - PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); + // FIXME: Not really a system SGPR. + PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg; + } - if (AMDGPU::isKernel(CC)) { - KernargSegmentPtr = true; + CallingConv::ID CC = F->getCallingConv(); + if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { + KernargSegmentPtr = !F->arg_empty(); WorkGroupIDX = true; WorkItemIDX = true; + } else if (CC == CallingConv::AMDGPU_PS) { + PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); } if (ST.debuggerEmitPrologue()) { // Enable everything. + WorkGroupIDX = true; WorkGroupIDY = true; WorkGroupIDZ = true; + WorkItemIDX = true; WorkItemIDY = true; WorkItemIDZ = true; } else { + if (F->hasFnAttribute("amdgpu-work-group-id-x")) + WorkGroupIDX = true; + if (F->hasFnAttribute("amdgpu-work-group-id-y")) WorkGroupIDY = true; if (F->hasFnAttribute("amdgpu-work-group-id-z")) WorkGroupIDZ = true; + if (F->hasFnAttribute("amdgpu-work-item-id-x")) + WorkItemIDX = true; + if (F->hasFnAttribute("amdgpu-work-item-id-y")) WorkItemIDY = true; @@ -113,25 +132,28 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDZ = true; } - // X, XY, and XYZ are the only supported combinations, so make sure Y is - // enabled if Z is. - if (WorkItemIDZ) - WorkItemIDY = true; - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); bool MaySpill = ST.isVGPRSpillingEnabled(*F); bool HasStackObjects = FrameInfo.hasStackObjects(); - if (HasStackObjects || MaySpill) { - PrivateSegmentWaveByteOffset = true; + if (isEntryFunction()) { + // X, XY, and XYZ are the only supported combinations, so make sure Y is + // enabled if Z is. + if (WorkItemIDZ) + WorkItemIDY = true; + + if (HasStackObjects || MaySpill) { + PrivateSegmentWaveByteOffset = true; - // HS and GS always have the scratch wave offset in SGPR5 on GFX9. - if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && - (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) - PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; + // HS and GS always have the scratch wave offset in SGPR5 on GFX9. + if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && + (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) + PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; + } } - if (ST.isAmdCodeObjectV2(MF)) { + bool IsCOV2 = ST.isAmdCodeObjectV2(MF); + if (IsCOV2) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -145,14 +167,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) DispatchID = true; } else if (ST.isMesaGfxShader(MF)) { if (HasStackObjects || MaySpill) - PrivateMemoryInputPtr = true; + ImplicitBufferPtr = true; } - // We don't need to worry about accessing spills with flat instructions. - // TODO: On VI where we must use flat for global, we should be able to omit - // this if it is never used for generic access. - if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) - FlatScratchInit = true; + if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr")) + KernargSegmentPtr = true; + + if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { + // TODO: This could be refined a lot. The attribute is a poor way of + // detecting calls that may require it before argument lowering. + if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) + FlatScratchInit = true; + } } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( @@ -198,11 +224,11 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { return FlatScratchInitUserSGPR; } -unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { - PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( +unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { + ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); NumUserSGPRs += 2; - return PrivateMemoryPtrUserSGPR; + return ImplicitBufferPtrUserSGPR; } /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.h index dc9f509e60ae2..4c7f38a09a484 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -15,8 +15,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #include "AMDGPUMachineFunction.h" -#include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" @@ -97,7 +97,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned StackPtrOffsetReg; // Input registers for non-HSA ABI - unsigned PrivateMemoryPtrUserSGPR; + unsigned ImplicitBufferPtrUserSGPR; // Input registers setup for the HSA ABI. // User SGPRs in allocation order. @@ -119,6 +119,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned WorkGroupInfoSystemSGPR; unsigned PrivateSegmentWaveByteOffsetSystemSGPR; + // VGPR inputs. These are always v0, v1 and v2 for entry functions. + unsigned WorkItemIDXVGPR; + unsigned WorkItemIDYVGPR; + unsigned WorkItemIDZVGPR; + // Graphics info. unsigned PSInputAddr; unsigned PSInputEnable; @@ -179,7 +184,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // Private memory buffer // Compute directly in sgpr[0:1] // Other shaders indirect 64-bits at sgpr[0:1] - bool PrivateMemoryInputPtr : 1; + bool ImplicitBufferPtr : 1; MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); @@ -236,7 +241,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); - unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI); + unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); // Add system SGPRs. unsigned addWorkGroupIDX() { @@ -341,8 +346,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { return WorkItemIDZ; } - bool hasPrivateMemoryInputPtr() const { - return PrivateMemoryInputPtr; + bool hasImplicitBufferPtr() const { + return ImplicitBufferPtr; } unsigned getNumUserSGPRs() const { @@ -377,10 +382,13 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { } void setStackPtrOffsetReg(unsigned Reg) { - assert(Reg != AMDGPU::NoRegister && "Should never be unset"); StackPtrOffsetReg = Reg; } + // Note the unset value for this is AMDGPU::SP_REG rather than + // NoRegister. This is mostly a workaround for MIR tests where state that + // can't be directly computed from the function is not preserved in serialized + // MIR. unsigned getStackPtrOffsetReg() const { return StackPtrOffsetReg; } @@ -388,17 +396,16 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { void setScratchWaveOffsetReg(unsigned Reg) { assert(Reg != AMDGPU::NoRegister && "Should never be unset"); ScratchWaveOffsetReg = Reg; - - // FIXME: Only for entry functions. - FrameOffsetReg = ScratchWaveOffsetReg; + if (isEntryFunction()) + FrameOffsetReg = ScratchWaveOffsetReg; } unsigned getQueuePtrUserSGPR() const { return QueuePtrUserSGPR; } - unsigned getPrivateMemoryPtrUserSGPR() const { - return PrivateMemoryPtrUserSGPR; + unsigned getImplicitBufferPtrUserSGPR() const { + return ImplicitBufferPtrUserSGPR; } bool hasSpilledSGPRs() const { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineScheduler.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineScheduler.cpp index 9d4e677400e69..34886c48f461d 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "SIMachineScheduler.h" #include "AMDGPU.h" #include "SIInstrInfo.h" -#include "SIMachineScheduler.h" #include "SIRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -38,7 +38,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" // This scheduler implements a different scheduling algorithm than // GenericScheduler. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index e02c2e3240e84..e2ac6631d2f32 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -20,16 +20,16 @@ /// //===----------------------------------------------------------------------===// - #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include +#include using namespace llvm; @@ -44,27 +44,32 @@ namespace { class SDWAOperand; class SIPeepholeSDWA : public MachineFunctionPass { +public: + typedef SmallVector SDWAOperandsVector; + private: MachineRegisterInfo *MRI; const SIRegisterInfo *TRI; const SIInstrInfo *TII; std::unordered_map> SDWAOperands; + std::unordered_map PotentialMatches; + SmallVector ConvertedInstructions; Optional foldToImm(const MachineOperand &Op) const; public: static char ID; - typedef SmallVector, 4> SDWAOperandsVector; - SIPeepholeSDWA() : MachineFunctionPass(ID) { initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; void matchSDWAOperands(MachineFunction &MF); + bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); + void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; StringRef getPassName() const override { return "SI Peephole SDWA"; } @@ -123,7 +128,8 @@ class SDWASrcOperand : public SDWAOperand { bool getNeg() const { return Neg; } bool getSext() const { return Sext; } - uint64_t getSrcMods() const; + uint64_t getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const; }; class SDWADstOperand : public SDWAOperand { @@ -218,7 +224,7 @@ static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { static bool isSubregOf(const MachineOperand &SubReg, const MachineOperand &SuperReg, const TargetRegisterInfo *TRI) { - + if (!SuperReg.isReg() || !SubReg.isReg()) return false; @@ -234,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg, return SuperMask.all(); } -uint64_t SDWASrcOperand::getSrcMods() const { +uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const { uint64_t Mods = 0; + const auto *MI = SrcOp->getParent(); + if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { + Mods = Mod->getImm(); + } + } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { + Mods = Mod->getImm(); + } + } if (Abs || Neg) { assert(!Sext && "Float and integer src modifiers can't be set simulteniously"); Mods |= Abs ? SISrcMods::ABS : 0; - Mods |= Neg ? SISrcMods::NEG : 0; + Mods ^= Neg ? SISrcMods::NEG : 0; } else if (Sext) { Mods |= SISrcMods::SEXT; } @@ -285,7 +302,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); MachineOperand *SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); - assert(Src && Src->isReg()); + assert(Src && (Src->isReg() || Src->isImm())); if (!isSameReg(*Src, *getReplacedOperand())) { // If this is not src0 then it should be src1 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); @@ -306,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { } copyRegOperand(*Src, *getTargetOperand()); SrcSel->setImm(getSrcSel()); - SrcMods->setImm(getSrcMods()); + SrcMods->setImm(getSrcMods(TII, Src)); getTargetOperand()->setIsKill(false); return true; } @@ -403,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { switch (Opcode) { case AMDGPU::V_LSHRREV_B32_e32: case AMDGPU::V_ASHRREV_I32_e32: - case AMDGPU::V_LSHLREV_B32_e32: { + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_LSHLREV_B32_e64: { // from: v_lshrrev_b32_e32 v1, 16/24, v0 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 @@ -426,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B32_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || + Opcode == AMDGPU::V_LSHLREV_B32_e64) { auto SDWADst = make_unique( Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -435,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique( Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, - Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B32_e32 && + Opcode != AMDGPU::V_LSHRREV_B32_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -445,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { case AMDGPU::V_LSHRREV_B16_e32: case AMDGPU::V_ASHRREV_I16_e32: - case AMDGPU::V_LSHLREV_B16_e32: { + case AMDGPU::V_LSHLREV_B16_e32: + case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_LSHLREV_B16_e64: { // from: v_lshrrev_b16_e32 v1, 8, v0 // to SDWA src:v0 src_sel:BYTE_1 @@ -466,16 +491,18 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B16_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || + Opcode == AMDGPU::V_LSHLREV_B16_e64) { auto SDWADst = - make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); + make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); SDWAOperands[&MI] = std::move(SDWADst); ++NumSDWAPatternsFound; } else { auto SDWASrc = make_unique( Src1, Dst, BYTE_1, false, false, - Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B16_e32 && + Opcode != AMDGPU::V_LSHRREV_B16_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -530,7 +557,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - + if (TRI->isPhysicalRegister(Src0->getReg()) || TRI->isPhysicalRegister(Dst->getReg())) break; @@ -543,28 +570,33 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { ++NumSDWAPatternsFound; break; } - case AMDGPU::V_AND_B32_e32: { + case AMDGPU::V_AND_B32_e32: + case AMDGPU::V_AND_B32_e64: { // e.g.: // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 // to SDWA src:v0 src_sel:WORD_0/BYTE_0 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + auto ValSrc = Src1; auto Imm = foldToImm(*Src0); - if (!Imm) - break; - if (*Imm != 0x0000ffff && *Imm != 0x000000ff) + if (!Imm) { + Imm = foldToImm(*Src1); + ValSrc = Src0; + } + + if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) break; - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - + if (TRI->isPhysicalRegister(Src1->getReg()) || TRI->isPhysicalRegister(Dst->getReg())) break; auto SDWASrc = make_unique( - Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); + ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -575,21 +607,49 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } } -bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, - const SDWAOperandsVector &SDWAOperands) { - // Check if this instruction can be converted to SDWA: - // 1. Does this opcode support SDWA - if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1) +bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, + const SISubtarget &ST) const { + // Check if this instruction has opcode that supports SDWA + int Opc = MI.getOpcode(); + if (AMDGPU::getSDWAOp(Opc) == -1) + Opc = AMDGPU::getVOPe32(Opc); + + if (Opc == -1 || AMDGPU::getSDWAOp(Opc) == -1) + return false; + + if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) return false; - // 2. Are all operands - VGPRs - for (const MachineOperand &Operand : MI.explicit_operands()) { - if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg())) + if (TII->isVOPC(Opc)) { + if (!ST.hasSDWASdst()) { + const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + if (SDst && SDst->getReg() != AMDGPU::VCC) + return false; + } + + if (!ST.hasSDWAOutModsVOPC() && + (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) || + TII->hasModifiersSet(MI, AMDGPU::OpName::omod))) return false; + + } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) || + !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { + return false; } + if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 || + Opc == AMDGPU::V_MAC_F32_e32)) + return false; + + return true; +} + +bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, + const SDWAOperandsVector &SDWAOperands) { // Convert to sdwa int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); + if (SDWAOpcode == -1) + SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); assert(SDWAOpcode != -1); const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); @@ -603,8 +663,13 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); + } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { + assert(Dst && + AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + SDWAInst.add(*Dst); } else { - assert(TII->isVOPC(MI)); + assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + SDWAInst.addReg(AMDGPU::VCC, RegState::Define); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -614,7 +679,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, Src0 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src0); // Copy src1 if present, initialize src1_modifiers. @@ -623,10 +691,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, assert( AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src1); - } else { - assert(TII->isVOP1(MI)); } if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || @@ -637,16 +706,32 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, SDWAInst.add(*Src2); } - // Initialize clamp. + // Copy clamp if present, initialize otherwise assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); - SDWAInst.addImm(0); + MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); + if (Clamp) { + SDWAInst.add(*Clamp); + } else { + SDWAInst.addImm(0); + } - // Initialize dst_sel and dst_unused if present - if (Dst) { - assert( - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1); + // Copy omod if present, initialize otherwise if needed + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { + MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod) { + SDWAInst.add(*OMod); + } else { + SDWAInst.addImm(0); + } + } + + // Initialize dst_sel if present + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + } + + // Initialize dst_unused if present + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); } @@ -664,9 +749,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Apply all sdwa operand pattenrs bool Converted = false; for (auto &Operand : SDWAOperands) { - Converted |= Operand->convertToSDWA(*SDWAInst, TII); + // There should be no intesection between SDWA operands and potential MIs + // e.g.: + // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 + // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0 + // v_add_u32 v3, v4, v2 + // + // In that example it is possible that we would fold 2nd instruction into 3rd + // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was + // already destroyed). So if SDWAOperand is also a potential MI then do not + // apply it. + if (PotentialMatches.count(Operand->getParentInst()) == 0) + Converted |= Operand->convertToSDWA(*SDWAInst, TII); } - if (!Converted) { + if (Converted) { + ConvertedInstructions.push_back(SDWAInst); + } else { SDWAInst->eraseFromParent(); return false; } @@ -679,27 +777,56 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, return true; } +// If an instruction was converted to SDWA it should not have immediates or SGPR +// operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs. +void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const { + const MCInstrDesc &Desc = TII->get(MI.getOpcode()); + unsigned ConstantBusCount = 0; + for (MachineOperand &Op: MI.explicit_uses()) { + if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) + continue; + + unsigned I = MI.getOperandNo(&Op); + if (Desc.OpInfo[I].RegClass == -1 || + !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) + continue; + + if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() && + TRI->isSGPRReg(*MRI, Op.getReg())) { + ++ConstantBusCount; + continue; + } + + unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), + TII->get(AMDGPU::V_MOV_B32_e32), VGPR); + if (Op.isImm()) + Copy.addImm(Op.getImm()); + else if (Op.isReg()) + Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0, + Op.getSubReg()); + Op.ChangeToRegister(VGPR, false); + } +} + bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); - if (!ST.hasSDWA() || - !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9 + if (!ST.hasSDWA()) return false; - } MRI = &MF.getRegInfo(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); - std::unordered_map PotentialMatches; - + // Find all SDWA operands in MF. matchSDWAOperands(MF); - for (auto &OperandPair : SDWAOperands) { - auto &Operand = OperandPair.second; + for (const auto &OperandPair : SDWAOperands) { + const auto &Operand = OperandPair.second; MachineInstr *PotentialMI = Operand->potentialToConvert(TII); - if (PotentialMI) { - PotentialMatches[PotentialMI].push_back(std::move(Operand)); + if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { + PotentialMatches[PotentialMI].push_back(Operand.get()); } } @@ -708,6 +835,12 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { convertToSDWA(PotentialMI, PotentialPair.second); } + PotentialMatches.clear(); SDWAOperands.clear(); - return false; + + bool Ret = !ConvertedInstructions.empty(); + while (!ConvertedInstructions.empty()) + legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST); + + return Ret; } diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8820e294562ba..4a3fbb4593bb3 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "SIRegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -117,11 +117,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } -unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( - const MachineFunction &MF) const { - - const SISubtarget &ST = MF.getSubtarget(); - unsigned RegCount = ST.getMaxNumSGPRs(MF); +static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { unsigned Reg; // Try to place it in a hole after PrivateSegmentBufferReg. @@ -134,9 +130,22 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( // wave offset before it. Reg = RegCount - 5; } + + return Reg; +} + +unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( + const MachineFunction &MF) const { + const SISubtarget &ST = MF.getSubtarget(); + unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); return AMDGPU::SGPR_32RegClass.getRegister(Reg); } +unsigned SIRegisterInfo::reservedStackPtrOffsetReg( + const MachineFunction &MF) const { + return AMDGPU::SGPR32; +} + BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); @@ -198,15 +207,37 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); } + // We have to assume the SP is needed in case there are calls in the function, + // which is detected after the function is lowered. If we aren't really going + // to need SP, don't bother reserving it. + unsigned StackPtrReg = MFI->getStackPtrOffsetReg(); + + if (StackPtrReg != AMDGPU::NoRegister) { + reserveRegisterTuples(Reserved, StackPtrReg); + assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); + } + + unsigned FrameReg = MFI->getFrameOffsetReg(); + if (FrameReg != AMDGPU::NoRegister) { + reserveRegisterTuples(Reserved, FrameReg); + assert(!isSubRegister(ScratchRSrcReg, FrameReg)); + } + return Reserved; } bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { - return Fn.getFrameInfo().hasStackObjects(); + const SIMachineFunctionInfo *Info = Fn.getInfo(); + if (Info->isEntryFunction()) { + const MachineFrameInfo &MFI = Fn.getFrameInfo(); + return MFI.hasStackObjects() || MFI.hasCalls(); + } + + // May need scavenger for dealing with callee saved registers. + return true; } -bool -SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { +bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { return MF.getFrameInfo().hasStackObjects(); } @@ -318,8 +349,11 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); assert(FIOp && FIOp->isFI() && "frame index must be address operand"); - assert(TII->isMUBUF(MI)); + assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == + MF->getInfo()->getFrameOffsetReg() && + "should only be seeing frame offset relative FrameIndex"); + MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); int64_t NewOffset = OffsetOp->getImm() + Offset; @@ -654,11 +688,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) + .addReg(MFI->getFrameOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); + .addReg(MFI->getFrameOffsetReg()); } BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) @@ -715,11 +749,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) - .addReg(TmpReg, RegState::Kill) // src - .addFrameIndex(Index) // vaddr - .addReg(MFI->getScratchRSrcReg()) // srrsrc - .addReg(MFI->getScratchWaveOffsetReg()) // soffset - .addImm(i * 4) // offset + .addReg(TmpReg, RegState::Kill) // src + .addFrameIndex(Index) // vaddr + .addReg(MFI->getScratchRSrcReg()) // srrsrc + .addReg(MFI->getFrameOffsetReg()) // soffset + .addImm(i * 4) // offset .addMemOperand(MMO); } } @@ -806,11 +840,11 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) + .addReg(MFI->getFrameOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); + .addReg(MFI->getFrameOffsetReg()); } auto MIB = @@ -853,10 +887,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) - .addFrameIndex(Index) // vaddr - .addReg(MFI->getScratchRSrcReg()) // srsrc - .addReg(MFI->getScratchWaveOffsetReg()) // soffset - .addImm(i * 4) // offset + .addFrameIndex(Index) // vaddr + .addReg(MFI->getScratchRSrcReg()) // srsrc + .addReg(MFI->getFrameOffsetReg()) // soffset + .addImm(i * 4) // offset .addMemOperand(MMO); auto MIB = @@ -981,12 +1015,83 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } default: { - if (TII->isMUBUF(*MI)) { + const DebugLoc &DL = MI->getDebugLoc(); + bool IsMUBUF = TII->isMUBUF(*MI); + + if (!IsMUBUF && + MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) { + // Convert to an absolute stack address by finding the offset from the + // scratch wave base and scaling by the wave size. + // + // In an entry function/kernel the stack address is already the absolute + // address relative to the the scratch wave offset. + + unsigned DiffReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; + unsigned ResultReg = IsCopy ? + MI->getOperand(0).getReg() : + MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) + .addReg(MFI->getFrameOffsetReg()) + .addReg(MFI->getScratchWaveOffsetReg()); + + int64_t Offset = FrameInfo.getObjectOffset(Index); + if (Offset == 0) { + // XXX - This never happens because of emergency scavenging slot at 0? + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) + .addImm(Log2_32(ST.getWavefrontSize())) + .addReg(DiffReg); + } else { + unsigned CarryOut + = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + unsigned ScaledReg + = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) + .addImm(Log2_32(ST.getWavefrontSize())) + .addReg(DiffReg, RegState::Kill); + + // TODO: Fold if use instruction is another add of a constant. + if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) + .addReg(CarryOut, RegState::Define | RegState::Dead) + .addImm(Offset) + .addReg(ScaledReg, RegState::Kill); + } else { + unsigned ConstOffsetReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) + .addReg(CarryOut, RegState::Define | RegState::Dead) + .addReg(ConstOffsetReg, RegState::Kill) + .addReg(ScaledReg, RegState::Kill); + } + + MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC); + } + + // Don't introduce an extra copy if we're just materializing in a mov. + if (IsCopy) + MI->eraseFromParent(); + else + FIOp.ChangeToRegister(ResultReg, false, false, true); + return; + } + + if (IsMUBUF) { // Disable offen so we don't need a 0 vgpr base. assert(static_cast(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)); + assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() + == MFI->getFrameOffsetReg()); + int64_t Offset = FrameInfo.getObjectOffset(Index); int64_t OldImm = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); @@ -995,23 +1100,85 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (isUInt<12>(NewOffset) && buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { MI->eraseFromParent(); - break; + return; } } + // If the offset is simply too big, don't convert to a scratch wave offset + // relative index. + int64_t Offset = FrameInfo.getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) - .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); } } } } +StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { + #define AMDGPU_REG_ASM_NAMES + #include "AMDGPURegAsmNames.inc.cpp" + + #define REG_RANGE(BeginReg, EndReg, RegTable) \ + if (Reg >= BeginReg && Reg <= EndReg) { \ + unsigned Index = Reg - BeginReg; \ + assert(Index < array_lengthof(RegTable)); \ + return RegTable[Index]; \ + } + + REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); + REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, + VGPR96RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, + AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, + VGPR128RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, + AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, + SGPR128RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, + AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR256RegNames); + + REG_RANGE( + AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, + AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR512RegNames); + + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, + AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR256RegNames); + + REG_RANGE( + AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, + AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR512RegNames + ); + +#undef REG_RANGE + + // FIXME: Rename flat_scr so we don't need to special case this. + switch (Reg) { + case AMDGPU::FLAT_SCR: + return "flat_scratch"; + case AMDGPU::FLAT_SCR_LO: + return "flat_scratch_lo"; + case AMDGPU::FLAT_SCR_HI: + return "flat_scratch_hi"; + default: + // For the special named registers the default is fine. + return TargetRegisterInfo::getRegAsmName(Reg); + } +} + // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { @@ -1189,12 +1356,11 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - if (ST.isAmdCodeObjectV2(MF)) { - assert(MFI->hasPrivateSegmentBuffer()); - return MFI->PrivateSegmentBufferUserSGPR; - } - assert(MFI->hasPrivateMemoryInputPtr()); - return MFI->PrivateMemoryPtrUserSGPR; + assert(MFI->hasPrivateSegmentBuffer()); + return MFI->PrivateSegmentBufferUserSGPR; + case SIRegisterInfo::IMPLICIT_BUFFER_PTR: + assert(MFI->hasImplicitBufferPtr()); + return MFI->ImplicitBufferPtrUserSGPR; case SIRegisterInfo::KERNARG_SEGMENT_PTR: assert(MFI->hasKernargSegmentPtr()); return MFI->KernargSegmentPtrUserSGPR; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.h index 679ed229758a0..600cc886cb595 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -57,8 +58,22 @@ class SIRegisterInfo final : public AMDGPURegisterInfo { unsigned reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const; + unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + + // Stack access is very expensive. CSRs are also the high registers, and we + // want to minimize the number of used registers. + unsigned getCSRFirstUseCost() const override { + return 100; + } + + unsigned getFrameRegister(const MachineFunction &MF) const override; + bool requiresRegisterScavenging(const MachineFunction &Fn) const override; bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; @@ -103,6 +118,8 @@ class SIRegisterInfo final : public AMDGPURegisterInfo { bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const; + StringRef getRegAsmName(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } @@ -180,12 +197,13 @@ class SIRegisterInfo final : public AMDGPURegisterInfo { WORKGROUP_ID_Y = 11, WORKGROUP_ID_Z = 12, PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14, + IMPLICIT_BUFFER_PTR = 15, // VGPRS: - FIRST_VGPR_VALUE = 15, + FIRST_VGPR_VALUE = 16, WORKITEM_ID_X = FIRST_VGPR_VALUE, - WORKITEM_ID_Y = 16, - WORKITEM_ID_Z = 17 + WORKITEM_ID_Y = 17, + WORKITEM_ID_Z = 18 }; /// \brief Returns the physical register that \p Value is stored in. @@ -228,6 +246,11 @@ class SIRegisterInfo final : public AMDGPURegisterInfo { const int *getRegUnitPressureSets(unsigned RegUnit) const override; + unsigned getReturnAddressReg(const MachineFunction &MF) const { + // Not a callee saved register. + return AMDGPU::SGPR30_SGPR31; + } + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.td b/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.td index fc808011cd889..d097b78890e35 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIRegisterInfo.td @@ -23,6 +23,13 @@ class SIReg regIdx = 0> : Register, def VCC_LO : SIReg<"vcc_lo", 106>; def VCC_HI : SIReg<"vcc_hi", 107>; +// Pseudo-registers: Used as placeholders during isel and immediately +// replaced, never seeing the verifier. +def PRIVATE_RSRC_REG : SIReg<"", 0>; +def FP_REG : SIReg<"", 0>; +def SP_REG : SIReg<"", 0>; +def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>; + // VCC for 64-bit instructions def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, DwarfRegAlias { @@ -314,7 +321,8 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128R let isAllocatable = 0; } -def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> { +def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, + (add SGPR_128, TTMP_128)> { let AllocationPriority = 10; } @@ -464,7 +472,9 @@ defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; defm VSrc : RegImmOperand<"VS", "VSrc">; -def VSrc_128 : RegisterOperand; +def VSrc_128 : RegisterOperand { + let DecoderMethod = "DecodeVS_128RegisterClass"; +} //===----------------------------------------------------------------------===// // VSrc_* Operands with an VGPR diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/SIShrinkInstructions.cpp index c5f121757e623..874fbadca7f35 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -92,6 +92,8 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, case AMDGPU::V_ADDC_U32_e64: case AMDGPU::V_SUBB_U32_e64: + if (TII->getNamedOperand(MI, AMDGPU::OpName::src1)->isImm()) + return false; // Additional verification is needed for sdst/src2. return true; @@ -108,10 +110,8 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, } const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - const MachineOperand *Src1Mod = - TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); - - if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0))) + if (Src1 && (!isVGPR(Src1, TRI, MRI) || + TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))) return false; // We don't need to check src0, all input types are legal, so just make sure @@ -120,58 +120,64 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, return false; // Check output modifiers - if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) - return false; - - return !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp); + return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && + !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp); } /// \brief This function checks \p MI for operands defined by a move immediate /// instruction and then folds the literal constant into the instruction if it -/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction -/// and will only fold literal constants if we are still in SSA. -static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, +/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions. +static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute = true) { - - if (!MRI.isSSA()) - return; - assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); - // Only one literal constant is allowed per instruction, so if src0 is a - // literal constant then we can't do any folding. - if (TII->isLiteralConstant(MI, Src0Idx)) - return; - // Try to fold Src0 MachineOperand &Src0 = MI.getOperand(Src0Idx); - if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) { + if (Src0.isReg()) { unsigned Reg = Src0.getReg(); - MachineInstr *Def = MRI.getUniqueVRegDef(Reg); - if (Def && Def->isMoveImmediate()) { - MachineOperand &MovSrc = Def->getOperand(1); - bool ConstantFolded = false; - - if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || - isUInt<32>(MovSrc.getImm()))) { - Src0.ChangeToImmediate(MovSrc.getImm()); - ConstantFolded = true; - } - if (ConstantFolded) { - if (MRI.use_empty(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (Def && Def->isMoveImmediate()) { + MachineOperand &MovSrc = Def->getOperand(1); + bool ConstantFolded = false; + + if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || + isUInt<32>(MovSrc.getImm()))) { + // It's possible to have only one component of a super-reg defined by + // a single mov, so we need to clear any subregister flag. + Src0.setSubReg(0); + Src0.ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFI()) { + Src0.setSubReg(0); + Src0.ChangeToFrameIndex(MovSrc.getIndex()); + ConstantFolded = true; + } + + if (ConstantFolded) { + assert(MRI.use_empty(Reg)); Def->eraseFromParent(); - ++NumLiteralConstantsFolded; - return; + ++NumLiteralConstantsFolded; + return true; + } } } } // We have failed to fold src0, so commute the instruction and try again. - if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI)) - foldImmediates(MI, TII, MRI, false); + if (TryToCommute && MI.isCommutable()) { + if (TII->commuteInstruction(MI)) { + if (foldImmediates(MI, TII, MRI, false)) + return true; + // Commute back. + TII->commuteInstruction(MI); + } + } + + return false; } // Copy MachineOperand with all flags except setting it as implicit. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SMInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/SMInstructions.td index 5b840a14dbc33..73dd8b7daa4ea 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SMInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SMInstructions.td @@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N)) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && + !Ld->isVolatile() && static_cast(getTargetLowering())->isMemOpUniform(N) && static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/SOPInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/SOPInstructions.td index 593439c2a3cd4..ec29a66c8bbbe 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/SOPInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/SOPInstructions.td @@ -184,13 +184,27 @@ def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32">; def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">; def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">; def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">; -def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64">; +def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64", + [(set i64:$sdst, (int_amdgcn_s_getpc))] +>; + +let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in { -let isTerminator = 1, isBarrier = 1, - isBranch = 1, isIndirectBranch = 1 in { +let isBranch = 1, isIndirectBranch = 1 in { def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">; +} // End isBranch = 1, isIndirectBranch = 1 + +let isReturn = 1 in { +// Define variant marked as return rather than branch. +def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>; } -def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64">; +} // End isTerminator = 1, isBarrier = 1 + +let isCall = 1 in { +def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64" +>; +} + def S_RFE_B64 : SOP1_1 <"s_rfe_b64">; let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp index 9908fc003ce70..92fb762ebd731 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; -/// \brief The target which suports all AMD GPUs. This will eventually +/// \brief The target which supports all AMD GPUs. This will eventually /// be deprecated and there will be a R600 target and a GCN target. Target &llvm::getTheAMDGPUTarget() { static Target TheAMDGPUTarget; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index b6868de6a74e3..03b11ae80500e 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -65,5 +65,18 @@ const char* const IdSymbolic[] = { }; } // namespace Hwreg + +namespace Swizzle { + +// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h. +const char* const IdSymbolic[] = { + "QUAD_PERM", + "BITMASK_PERM", + "SWAP", + "REVERSE", + "BROADCAST", +}; + +} // namespace Swizzle } // namespace AMDGPU } // namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h index b2dc2c0e364cd..ebb2be22b4879 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h @@ -25,6 +25,12 @@ namespace Hwreg { // Symbolic names for the hwreg(...) syntax. extern const char* const IdSymbolic[]; } // namespace Hwreg + +namespace Swizzle { // Symbolic names for the swizzle(...) syntax. + +extern const char* const IdSymbolic[]; + +} // namespace Swizzle } // namespace AMDGPU } // namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d565c84bfedaa..67ad904ca9723 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUBaseInfo.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include @@ -38,7 +38,6 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" - #define GET_INSTRINFO_NAMED_OPS #include "AMDGPUGenInstrInfo.inc" #undef GET_INSTRINFO_NAMED_OPS @@ -104,6 +103,11 @@ namespace AMDGPU { namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { + // SI. + if (Features.test(FeatureISAVersion6_0_0)) + return {6, 0, 0}; + if (Features.test(FeatureISAVersion6_0_1)) + return {6, 0, 1}; // CI. if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; @@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 1}; if (Features.test(FeatureISAVersion7_0_2)) return {7, 0, 2}; + if (Features.test(FeatureISAVersion7_0_3)) + return {7, 0, 3}; // VI. if (Features.test(FeatureISAVersion8_0_0)) @@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {9, 0, 0}; if (Features.test(FeatureISAVersion9_0_1)) return {9, 0, 1}; + if (Features.test(FeatureISAVersion9_0_2)) + return {9, 0, 2}; + if (Features.test(FeatureISAVersion9_0_3)) + return {9, 0, 3}; if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) return {0, 0, 0}; @@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } -MCSection *getHSATextSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_EXECINSTR | - ELF::SHF_AMDGPU_HSA_AGENT | - ELF::SHF_AMDGPU_HSA_CODE); -} - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL | - ELF::SHF_AMDGPU_HSA_AGENT); -} - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL); -} - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | - ELF::SHF_AMDGPU_HSA_AGENT); -} - bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } @@ -518,7 +501,18 @@ bool isCompute(CallingConv::ID cc) { } bool isEntryFunctionCC(CallingConv::ID CC) { - return true; + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + return true; + default: + return false; + } } bool isSI(const MCSubtargetInfo &STI) { @@ -533,6 +527,24 @@ bool isVI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; } +bool isGFX9(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; +} + +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { + const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); + const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || + Reg == AMDGPU::SCC; +} + +bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { + for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { + if (*R == Reg1) return true; + } + return false; +} + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { switch(Reg) { diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index d6c836eb748b1..936e4921a7097 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/interpreter/llvm/src/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); -MCSection *getHSATextSection(MCContext &Ctx); - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx); - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); @@ -262,7 +255,6 @@ bool isEntryFunctionCC(CallingConv::ID CC); LLVM_READNONE inline bool isKernel(CallingConv::ID CC) { switch (CC) { - case CallingConv::C: case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: return true; @@ -274,6 +266,13 @@ inline bool isKernel(CallingConv::ID CC) { bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); +bool isGFX9(const MCSubtargetInfo &STI); + +/// \brief Is Reg - scalar register +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); + +/// \brief Is there any intersection between registers +bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/VOP1Instructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/VOP1Instructions.td index 1febc6bf8ec20..96b33c373f052 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/VOP1Instructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/VOP1Instructions.td @@ -30,6 +30,15 @@ class VOP1_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{31-25} = 0x3f; // encoding } +class VOP1_SDWA9Ae op, VOPProfile P> : VOP_SDWA9Ae

{ + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + class VOP1_Pseudo pattern=[], bit VOP1Only = 0> : InstSI , VOP , @@ -243,6 +252,7 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC64 = VRegSrc_32; let HasExt = 0; + let HasSDWA9 = 0; } // Special case because there are no true output operands. Hack vdst @@ -258,16 +268,19 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, VCSrc_b32:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + + let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; let Asm64 = getAsm64<1, 1, 0, 1>.ret; let AsmDPP = getAsmDPP<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; + let AsmSDWA = getAsmSDWA<1, 1>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; let HasExt = 0; + let HasSDWA9 = 0; let HasDst = 0; let EmitDst = 1; // force vdst emission } @@ -324,7 +337,7 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; @@ -347,7 +360,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { def : Pat< (f32 (f16_to_fp i16:$src)), @@ -523,6 +536,10 @@ multiclass VOP1_Real_vi op> { VOP_SDWA_Real (NAME#"_sdwa")>, VOP1_SDWAe (NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP1_SDWA9Ae (NAME#"_sdwa").Pfl>; + // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP1_DPP(NAME#"_e32")>; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/VOP2Instructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/VOP2Instructions.td index f5310a261a1de..d5acb49b4f39c 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/VOP2Instructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/VOP2Instructions.td @@ -48,6 +48,18 @@ class VOP2_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{31} = 0x0; // encoding } +class VOP2_SDWA9Ae op, VOPProfile P> : VOP_SDWA9Ae

{ + bits<8> vdst; + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + class VOP2_Pseudo pattern=[], string suffix = "_e32"> : InstSI , VOP , @@ -105,7 +117,10 @@ class VOP2_SDWA_Pseudo pattern=[]> : class getVOP2Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (node (P.Src0VT + !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); } @@ -121,10 +136,9 @@ multiclass VOP2Inst .ret>, Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa : VOP2_SDWA_Pseudo ; } -// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst multiclass VOP2bInst , Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } } def _e64 : VOP3_Pseudo .ret>, @@ -198,18 +214,22 @@ class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { VGPR_32:$src2, // stub argument dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, VGPR_32:$src2, // stub argument - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; - let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret; + let AsmSDWA = getAsmSDWA<1, 2, vt>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret; let HasSrc2 = 0; let HasSrc2Mods = 0; let HasExt = 1; + let HasSDWA9 = 0; } def VOP_MAC_F16 : VOP_MAC { @@ -229,6 +249,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$vdst, vcc, $src0, $src1"; let Asm64 = "$vdst, $sdst, $src0, $src1"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -246,6 +267,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -254,9 +276,10 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { // implicit VCC use. let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); - let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0, - Src1Mod:$src1_modifiers, Src1SDWA:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, @@ -264,6 +287,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; + let HasSDWA9 = 1; } // Read in from vcc or arbitrary SGPR @@ -286,6 +310,8 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; + let HasExt = 0; + let HasSDWA9 = 0; } def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { @@ -295,6 +321,8 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; + let HasExt = 0; + let HasSDWA9 = 0; } //===----------------------------------------------------------------------===// @@ -357,20 +385,29 @@ def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">; } // End isConvergent = 1 -defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; -defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>; -defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; -defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; -defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; -defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst" -defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>; -defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>; -defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, AMDGPUpkrtz_f16_f32>; -defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>; -defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>; +defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT>; +defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT>; +defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_lo>; +defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_hi>; +defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT, AMDGPUldexp>; +defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT>; // TODO: set "Uses = dst" +defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT>; +defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT>; +defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT, AMDGPUpkrtz_f16_f32>; +defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT>; +defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT>; } // End SubtargetPredicate = isGCN +def : Pat< + (AMDGPUadde i32:$src0, i32:$src1, i1:$src2), + (V_ADDC_U32_e64 $src0, $src1, $src2) +>; + +def : Pat< + (AMDGPUsube i32:$src0, i32:$src1, i1:$src2), + (V_SUBB_U32_e64 $src0, $src1, $src2) +>; // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { @@ -387,7 +424,7 @@ defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; } // End let SubtargetPredicate = SICI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; @@ -418,7 +455,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; } } // End isCommutable = 1 -} // End SubtargetPredicate = isVI +} // End SubtargetPredicate = Has16BitInsts // Note: 16-bit instructions produce a 0 result in the high 16-bits. multiclass Arithmetic_i16_Pats { @@ -468,7 +505,7 @@ class ZExt_i16_i1_Pat : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) >; -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; @@ -513,7 +550,7 @@ def : Pat< (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] //===----------------------------------------------------------------------===// // SI @@ -657,6 +694,17 @@ multiclass VOP2_Real_e64_vi op> { VOP3e_vi (NAME#"_e64").Pfl>; } +multiclass VOP2_Real_e64only_vi op> { + def _e64_vi : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3e_vi (NAME#"_e64").Pfl> { + // Hack to stop printing _e64 + VOP3_Pseudo ps = !cast(NAME#"_e64"); + let OutOperandList = (outs VGPR_32:$vdst); + let AsmString = ps.Mnemonic # " " # ps.AsmOperands; + } +} + multiclass Base_VOP2be_Real_e32e64_vi op> : VOP2_Real_e32_vi { def _e64_vi : VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, @@ -675,15 +723,21 @@ multiclass VOP2_SDWA_Real op> { VOP2_SDWAe (NAME#"_sdwa").Pfl>; } +multiclass VOP2_SDWA9_Real op> { + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP2_SDWA9Ae (NAME#"_sdwa").Pfl>; +} + multiclass VOP2be_Real_e32e64_vi op> : - Base_VOP2be_Real_e32e64_vi, VOP2_SDWA_Real { + Base_VOP2be_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP(NAME#"_e32")>; } multiclass VOP2_Real_e32e64_vi op> : - Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real { + Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP(NAME#"_e32")>; @@ -724,17 +778,17 @@ defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>; defm V_READLANE_B32 : VOP32_Real_vi <0x289>; defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; -defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>; -defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>; -defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>; -defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>; -defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>; -defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>; -defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>; -defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>; -defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>; -defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>; -defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>; +defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; +defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; +defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; +defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; +defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; +defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; +defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; +defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; +defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; +defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; @@ -762,9 +816,11 @@ let SubtargetPredicate = isVI in { // Aliases to simplify matching of floating-point instructions that // are VOP2 on SI and VOP3 on VI. -class SI2_VI3Alias : InstAlias < +class SI2_VI3Alias : InstAlias < name#" $dst, $src0, $src1", - (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0) + !if(inst.Pfl.HasOMod, + (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), + (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) >, PredicateControl { let UseInstAsmMatchConverter = 0; let AsmVariantName = AMDGPUAsmVariants.VOP3; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/VOP3Instructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/VOP3Instructions.td index 217a074888532..92ed0706dc011 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/VOP3Instructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/VOP3Instructions.td @@ -12,17 +12,21 @@ //===----------------------------------------------------------------------===// class getVOP3ModPat { + dag src0 = !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); + list ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (node (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (node (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))]; + (node (P.Src0VT src0)))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -92,6 +96,7 @@ class VOP3_Profile : VOPProfile { class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { // v_div_scale_{f32|f64} do not support input modifiers. let HasModifiers = 0; + let HasOMod = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } @@ -172,8 +177,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; -def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile>; -def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile>; +def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, int_amdgcn_alignbit>; +def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile, AMDGPUfmin3>; def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile, AMDGPUsmin3>; def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile, AMDGPUumin3>; @@ -209,7 +214,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; + +let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile, int_amdgcn_mqsad_pk_u16_u8>; +} // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; @@ -232,9 +240,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; let SubtargetPredicate = isCIVI in { -def V_MQSAD_U16_U8 : VOP3Inst <"v_mqsad_u16_u8", VOP3_Profile>; +let Constraints = "@earlyclobber $vdst" in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile, int_amdgcn_qsad_pk_u16_u8>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile, int_amdgcn_mqsad_u32_u8>; +} // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; @@ -244,11 +253,12 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { + +def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; let isCommutable = 1 in { -def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; def V_INTERP_P1LL_F16 : VOP3Inst <"v_interp_p1ll_f16", VOP3_Profile>; def V_INTERP_P1LV_F16 : VOP3Inst <"v_interp_p1lv_f16", VOP3_Profile>; @@ -259,12 +269,13 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; } // End isCommutable = 1 +} // End SubtargetPredicate = Has16BitInsts +let SubtargetPredicate = isVI in { def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile>; - } // End SubtargetPredicate = isVI -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { multiclass Ternary_i16_Pats { @@ -289,7 +300,7 @@ def : Pat< defm: Ternary_i16_Pats; defm: Ternary_i16_Pats; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile>; @@ -301,10 +312,19 @@ def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile>; def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile>; def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile>; + def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile, AMDGPUfmed3>; def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile, AMDGPUsmed3>; def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile, AMDGPUumed3>; -} + +def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile, AMDGPUfmin3>; +def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile, AMDGPUsmin3>; +def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile, AMDGPUumin3>; + +def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile, AMDGPUfmax3>; +def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile, AMDGPUsmax3>; +def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile, AMDGPUumax3>; +} // End SubtargetPredicate = isGFX9 //===----------------------------------------------------------------------===// @@ -402,7 +422,6 @@ multiclass VOP3be_Real_ci op> { } } -defm V_MQSAD_U16_U8 : VOP3_Real_ci <0x172>; defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>; defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>; defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>; @@ -426,7 +445,6 @@ multiclass VOP3be_Real_vi op> { } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" -defm V_MQSAD_U16_U8 : VOP3_Real_vi <0x172>; defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>; @@ -512,6 +530,15 @@ defm V_OR3_B32 : VOP3_Real_vi <0x202>; defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>; defm V_XAD_U32 : VOP3_Real_vi <0x1f3>; + +defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>; +defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>; +defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>; + +defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>; +defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>; +defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>; + defm V_MED3_F16 : VOP3_Real_vi <0x1fa>; defm V_MED3_I16 : VOP3_Real_vi <0x1fb>; defm V_MED3_U16 : VOP3_Real_vi <0x1fc>; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/VOP3PInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/VOP3PInstructions.td index 96d343099132c..3becf758aaa3e 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/VOP3PInstructions.td @@ -16,15 +16,27 @@ class VOP3PInst !if(P.HasModifiers, getVOP3PModPat.ret, getVOP3Pat.ret) >; -// Non-packed instructions that use the VOP3P encoding. i.e. where -// omod/abs are used. +// Non-packed instructions that use the VOP3P encoding. +// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. class VOP3_VOP3PInst : - VOP3P_Pseudo.ret, getVOP3Pat.ret) ->; + VOP3P_Pseudo { + let InOperandList = + (ins + FP32InputMods:$src0_modifiers, VCSrc_f32:$src0, + FP32InputMods:$src1_modifiers, VCSrc_f32:$src1, + FP32InputMods:$src2_modifiers, VCSrc_f32:$src2, + clampmod:$clamp, + op_sel:$op_sel, + op_sel_hi:$op_sel_hi); + let AsmOperands = + " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp"; +} let isCommutable = 1 in { def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile, fma>; +def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile>; +def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile>; + def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile, fadd>; def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile, fmul>; def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile, fmaxnum>; @@ -32,7 +44,6 @@ def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile, add>; def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile>; -def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile, sub>; def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile, mul>; def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile, smin>; @@ -41,14 +52,20 @@ def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile, umax>; } +def V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile>; +def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile, sub>; + def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile, lshl_rev>; def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile, ashr_rev>; def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile, lshr_rev>; // XXX - Commutable? -def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile>; -def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile>; -def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile>; +// These are VOP3a-like opcodes which accept no omod. +// Size of src arguments (16/32) is controlled by op_sel. +// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. +def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile>; +def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile>; +def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile>; multiclass VOP3P_Real_vi op> { @@ -59,6 +76,7 @@ multiclass VOP3P_Real_vi op> { } } +defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>; defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>; defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>; defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>; @@ -67,8 +85,10 @@ defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>; defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>; defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>; defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>; +defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>; defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>; +defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>; defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>; defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>; defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/VOPCInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/VOPCInstructions.td index a3550a63677ba..b636fc9be431b 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/VOPCInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/VOPCInstructions.td @@ -34,6 +34,17 @@ class VOPC_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{44-43} = SDWA.UNUSED_PRESERVE; } +class VOPC_SDWA9e op, VOPProfile P> : VOP_SDWA9Be

{ + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + + //===----------------------------------------------------------------------===// // VOPC classes //===----------------------------------------------------------------------===// @@ -137,6 +148,19 @@ class VOPCInstAlias : let SubtargetPredicate = AssemblerPredicate; } +class getVOPCPat64 : LetDummies { + list ret = !if(P.HasModifiers, + [(set i1:$sdst, + (setcc (P.Src0VT + !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), + cond))], + [(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]); +} + + multiclass VOPC_Pseudos , + def _e64 : VOP3_Pseudo.ret>, Commutable_REV { let Defs = !if(DefExec, [EXEC], []); let SchedRW = P.Schedule; @@ -517,9 +534,11 @@ class VOPC_Class_Profile sched, ValueType vt> : VOPC_Profile { let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); let Asm64 = "$sdst, $src0_modifiers, $src1"; + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); + let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; let HasSrc1Mods = 0; let HasClamp = 0; @@ -621,7 +640,7 @@ class FCMP_Pattern : Pat < (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - DSTCLAMP.NONE, DSTOMOD.NONE) + DSTCLAMP.NONE) >; def : FCMP_Pattern ; @@ -920,6 +939,10 @@ multiclass VOPC_Real_vi op> { VOP_SDWA_Real (NAME#"_sdwa")>, VOPC_SDWAe (NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOPC_SDWA9e (NAME#"_sdwa").Pfl>; + def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_vi")> { let AssemblerPredicate = isVI; diff --git a/interpreter/llvm/src/lib/Target/AMDGPU/VOPInstructions.td b/interpreter/llvm/src/lib/Target/AMDGPU/VOPInstructions.td index 69906c419db3b..b47538ba0349a 100644 --- a/interpreter/llvm/src/lib/Target/AMDGPU/VOPInstructions.td +++ b/interpreter/llvm/src/lib/Target/AMDGPU/VOPInstructions.td @@ -51,12 +51,8 @@ class VOP3Common pattern = [], let AsmVariantName = AMDGPUAsmVariants.VOP3; let AsmMatchConverter = - !if(!eq(VOP3Only,1), - !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"), - !if(!eq(P.HasModifiers, 1), - "cvtVOP3_2_mod", - !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "") - ) - ); + !if(!and(P.IsPacked, isVOP3P), + "cvtVOP3P", + !if(!or(P.HasModifiers, P.HasOMod), + "cvtVOP3", + "")); VOPProfile Pfl = P; } @@ -142,6 +136,8 @@ class VOP3_Real : let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; let Uses = ps.Uses; + + VOPProfile Pfl = ps.Pfl; } // XXX - Is there any reason to distingusih this from regular VOP3 @@ -232,11 +228,11 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1 let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2 - let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0) - let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1) - let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2) + let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0) + let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1) + let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2) - let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2) + let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, 0); // op_sel_hi(2) let Inst{15} = !if(P.HasClamp, clamp{0}, 0); @@ -245,8 +241,8 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); - let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0) - let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1) + let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, 0); // op_sel_hi(0) + let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, 0); // op_sel_hi(1) let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo) let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo) let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) @@ -293,11 +289,65 @@ class VOP_SDWAe : Enc64 { let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); - let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); + let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); +} + +// GFX9 adds two features to SDWA: +// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. +// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather +// than VGPRs (at most 1 can be an SGPR); +// b. OMOD is the standard output modifier (result *2, *4, /2) +// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This +// replaces OMOD and the dest fields with SD and SDST (SGPR destination) +// field. +// a. When SD=1, the SDST is used as the destination for the compare result; +// b. When SD=0, VCC is used. +// +// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA + +// gfx9 SDWA basic encoding +class VOP_SDWA9e : Enc64 { + bits<9> src0; // {src0_sgpr{0}, src0{7-0}} + bits<3> src0_sel; + bits<2> src0_modifiers; // float: {abs,neg}, int {sext} + bits<3> src1_sel; + bits<2> src1_modifiers; + bits<1> src1_sgpr; + + let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); + let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); + let Inst{55} = !if(P.HasSrc0, src0{8}, 0); + let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); + let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); + let Inst{63} = 0; // src1_sgpr - should be specified in subclass +} + +// gfx9 SDWA-A +class VOP_SDWA9Ae : VOP_SDWA9e

{ + bits<3> dst_sel; + bits<2> dst_unused; + bits<1> clamp; + bits<2> omod; + + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); + let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); + let Inst{47-46} = !if(P.HasSDWAOMod, omod{1-0}, 0); +} + +// gfx9 SDWA-B +class VOP_SDWA9Be : VOP_SDWA9e

{ + bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}} + + let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0); + let Inst{47} = !if(P.EmitDst, sdst{7}, 0); } class VOP_SDWA_Pseudo pattern=[]> : @@ -312,6 +362,7 @@ class VOP_SDWA_Pseudo pattern=[]> : string Mnemonic = opName; string AsmOperands = P.AsmSDWA; + string AsmOperands9 = P.AsmSDWA9; let Size = 8; let mayLoad = 0; @@ -333,7 +384,7 @@ class VOP_SDWA_Pseudo pattern=[]> : class VOP_SDWA_Real : InstSI , - SIMCInstr { + SIMCInstr { let isPseudo = 0; let isCodeGenOnly = 0; @@ -358,6 +409,35 @@ class VOP_SDWA_Real : let TSFlags = ps.TSFlags; } +class VOP_SDWA9_Real : + InstSI , + SIMCInstr { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + + let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst); + let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst); + let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA9"; + + // Copy relevant pseudo op flags + let AsmMatchConverter = ps.AsmMatchConverter; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; +} + class VOP_DPPe : Enc64 { bits<2> src0_modifiers; bits<8> src0; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARM.td b/interpreter/llvm/src/lib/Target/ARM/ARM.td index 46fd1f70ee99b..e49c1babac210 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARM.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARM.td @@ -17,143 +17,172 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// ARM Helper classes. +// ARM Subtarget state. // -class ProcNoItin Features> - : Processor; +def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", + "true", "Thumb mode">; + +def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", + "true", "Use software floating " + "point features.">; -class Architecture features > - : SubtargetFeature; //===----------------------------------------------------------------------===// -// ARM Subtarget state. +// ARM Subtarget features. // -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", - "Thumb mode">; +// Floating Point, HW Division and Neon Support +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions">; -def ModeSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", - "Use software floating point features.">; +def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", + "Enable VFP3 instructions", + [FeatureVFP2]>; -//===----------------------------------------------------------------------===// -// ARM Subtarget features. -// +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable NEON instructions", + [FeatureVFP3]>; + +def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", + "Enable half-precision " + "floating point">; + +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Enable full half-precision " + "floating point", + [FeatureFPARMv8]>; + +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports " + "single precision only">; + +def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", + "Restrict FP to 16 double registers">; + +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", + "HasHardwareDivideInThumb", "true", + "Enable divide instructions in Thumb">; + +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; + +// Atomic Support +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb/dsb) instructions">; + +def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", + "Has v7 clrex instruction">; -def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", - "Enable VFP2 instructions">; -def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", - "Enable VFP3 instructions", - [FeatureVFP2]>; -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable NEON instructions", - [FeatureVFP3]>; -def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", - "Enable Thumb2 instructions">; -def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution", - [ModeThumb]>; -def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", - "Enable half-precision floating point">; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3, FeatureFP16]>; -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", - "true", "Enable ARMv8 FP", - [FeatureVFP4]>; -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Enable full half-precision floating point", - [FeatureFPARMv8]>; -def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", - "Restrict FP to 16 double registers">; -def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb", - "true", - "Enable divide instructions in Thumb">; -def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", - "Enable divide instructions in ARM mode">; -def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", - "Has data barrier (dmb / dsb) instructions">; -def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", - "Has v7 clrex instruction">; def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", - "Has v8 acquire/release (lda/ldaex etc) instructions">; -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", - "FP compare + branch is slow">; -def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", - "Floating point unit supports single precision only">; -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable support for Performance Monitor extensions">; -def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", - "Enable support for TrustZone security extensions">; -def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", - "Enable support for ARMv8-M Security Extensions">; -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable support for Cryptography extensions", - [FeatureNEON]>; -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable support for CRC instructions">; + "Has v8 acquire/release (lda/ldaex " + " etc) instructions">; + + +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", + "FP compare + branch is slow">; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance " + "Monitor extensions">; + + +// TrustZone Security Extensions +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone " + "security extensions">; + +def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", + "Enable support for ARMv8-M " + "Security Extensions">; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for " + "Cryptography extensions", + [FeatureNEON]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; + + // Not to be confused with FeatureHasRetAddrStack (return address stack) -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable Reliability, Availability and Serviceability extensions">; -def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", - "Enable fast computation of positive address offsets">; +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable Reliability, Availability " + "and Serviceability extensions">; + +// Fast computation of non-negative address offsets +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of " + "positive address offsets">; +// Fast execution of AES crypto operations +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; -// Cyclone has preferred instructions for zeroing VFP registers, which can -// execute in 0 cycles. -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; +// Cyclone can zero VFP registers in 0 cycles. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; -// Whether or not it may be profitable to unpredicate certain instructions -// during if conversion. +// Whether it is profitable to unpredicate certain instructions during if-conversion def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", - "IsProfitableToUnpredicate", - "true", + "IsProfitableToUnpredicate", "true", "Is profitable to unpredicate">; // Some targets (e.g. Swift) have microcoded VGETLNi32. -def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", - "HasSlowVGETLNi32", "true", - "Has slow VGETLNi32 - prefer VMOV">; +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; // Some targets (e.g. Swift) have microcoded VDUP32. -def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", - "Has slow VDUP32 - prefer VMOV">; +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", + "true", + "Has slow VDUP32 - prefer VMOV">; // Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON // for scalar FP, as this allows more effective execution domain optimization. -def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", - "true", "Prefer VMOVSR">; +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; // Swift has ISHST barriers compatible with Atomic Release semantics but weaker // than ISH def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", - "true", "Prefer ISHST barriers">; + "true", "Prefer ISHST barriers">; // Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. -def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", - "Has muxed AGU and NEON/FPU">; +def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", + "true", + "Has muxed AGU and NEON/FPU">; -// On some targets, a VLDM/VSTM starting with an odd register number needs more -// microops than single VLDRS. +// Whether VLDM/VSTM starting with odd register number need more microops +// than single VLDRS def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", - "true", "VLDM/VSTM starting with an odd register is slow">; + "true", "VLDM/VSTM starting " + "with an odd register is slow">; // Some targets have a renaming dependency when loading into D subregisters. def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", "SlowLoadDSubregister", "true", "Loading into D subregs is slow">; + // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. -def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", - "Expand VFP/NEON MLA/MLS instructions">; +def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", + "ExpandMLx", "true", + "Expand VFP/NEON MLA/MLS instructions">; // Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", @@ -161,15 +190,18 @@ def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. -def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", - "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">; +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", + "UseNEONForFPMovs", "true", + "Convert VMOVSR, VMOVRS, " + "VMOVS to NEON">; // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. This affects instruction selection and should // only be enabled if the handling of denormals is not important. -def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", - "true", - "Use NEON for single precision FP">; +def FeatureNEONForFP : SubtargetFeature<"neonfp", + "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; // On some processors, VLDn instructions that access unaligned data take one // extra cycle. Take that into account when computing operand latencies. @@ -180,18 +212,18 @@ def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", // Some processors have a nonpipelined VFP coprocessor. def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", "NonpipelinedVFP", "true", - "VFP instructions are not pipelined">; + "VFP instructions are not pipelined">; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. -def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", - "Disable VFP / NEON MAC instructions">; +def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", + "Disable VFP / NEON MAC instructions">; // Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", - "HasVMLxForwarding", "true", - "Has multiplier accumulator forwarding">; + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", @@ -205,69 +237,106 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; -def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", - "AvoidMOVsShifterOperand", "true", - "Avoid movs instructions with shifter operand">; +/// Disable +1 predication cost for instructions updating CPSR. +/// Enabled for Cortex-A57. +def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", + "CheapPredicableCPSRDef", + "true", + "Disable +1 predication cost for instructions updating CPSR">; + +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with " + "shifter operand">; // Some processors perform return stack prediction. CodeGen should avoid issue // "normal" call instructions to callees which do not return. -def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true", - "Has return address stack">; +def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", + "HasRetAddrStack", "true", + "Has return address stack">; + +// Some processors have no branch predictor, which changes the expected cost of +// taking a branch which affects the choice of whether to use predicated +// instructions. +def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", + "HasBranchPredictor", "false", + "Has no branch predictor">; /// DSP extension. -def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", - "Supports DSP instructions in ARM and/or Thumb2">; +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", + "Supports DSP instructions in " + "ARM and/or Thumb2">; // Multiprocessing extension. -def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", - "Supports Multiprocessing extension">; +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; // Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). def FeatureVirtualization : SubtargetFeature<"virtualization", - "HasVirtualization", "true", - "Supports Virtualization extension", - [FeatureHWDivThumb, FeatureHWDivARM]>; + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDivThumb, FeatureHWDivARM]>; -// M-series ISA -def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", - "Is microcontroller profile ('M' series)">; +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; -// R-series ISA -def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", - "Is realtime profile ('R' series)">; +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + +def FeatureExecuteOnly : SubtargetFeature<"execute-only", + "GenExecuteOnly", "true", + "Enable the generation of " + "execute only code.">; + +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; + +def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", + "Don't use movt/movw pairs for " + "32-bit imms">; + +def FeatureNoNegativeImmediates + : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + + +//===----------------------------------------------------------------------===// +// ARM architecture class +// // A-series ISA def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", "Is application profile ('A' series)">; -// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. -// See ARMInstrInfo.td for details. -def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", - "NaCl trap">; +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", - "Disallow all unaligned memory " - "access">; +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", + "Is microcontroller profile ('M' series)">; -def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", - "Generate calls via indirect call " - "instructions">; -def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", - "Reserve R9, making it unavailable as " - "GPR">; +def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", + "Enable Thumb2 instructions">; -def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", - "Don't use movt/movw pairs for 32-bit " - "imms">; +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution", + [ModeThumb]>; -def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", - "NegativeImmediates", "false", - "Convert immediates and instructions " - "to their negated or complemented " - "equivalent when the immediate does " - "not fit in the encoding.">; //===----------------------------------------------------------------------===// // ARM ISAa. @@ -275,43 +344,57 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; + def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", "Support ARM v5T instructions", [HasV4TOps]>; + def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", - "Support ARM v5TE, v5TEj, and v5TExp instructions", + "Support ARM v5TE, v5TEj, and " + "v5TExp instructions", [HasV5TOps]>; + def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", "Support ARM v6 instructions", [HasV5TEOps]>; + def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", "Support ARM v6M instructions", [HasV6Ops]>; + def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", "Support ARM v8M Baseline instructions", [HasV6MOps]>; + def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", "Support ARM v6k instructions", [HasV6Ops]>; + def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; + def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops, FeaturePerfMon, FeatureV7Clrex]>; + +def HasV8MMainlineOps : + SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", + "Support ARM v8M Mainline instructions", + [HasV7Ops]>; + def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", [HasV7Ops, FeatureAcquireRelease]>; + def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops]>; -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", "Support ARM v8.2a instructions", [HasV8_1aOps]>; -def HasV8MMainlineOps : SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", - "Support ARM v8M Mainline instructions", - [HasV7Ops]>; //===----------------------------------------------------------------------===// @@ -367,11 +450,17 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", "Cortex-M3 ARM processors", []>; + //===----------------------------------------------------------------------===// -// ARM schedules. +// ARM Helper classes. // -include "ARMSchedule.td" +class Architecture features> + : SubtargetFeature; + +class ProcNoItin Features> + : Processor; //===----------------------------------------------------------------------===// @@ -527,12 +616,21 @@ def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; +//===----------------------------------------------------------------------===// +// ARM schedules. +//===----------------------------------------------------------------------===// +// +include "ARMSchedule.td" + //===----------------------------------------------------------------------===// // ARM processors // // Dummy CPU, used to target architectures -def : ProcNoItin<"generic", []>; +def : ProcessorModel<"generic", CortexA8Model, []>; + +// FIXME: Several processors below are not using their own scheduler +// model, but one of similar/previous processor. These should be fixed. def : ProcNoItin<"arm8", [ARMv4]>; def : ProcNoItin<"arm810", [ARMv4]>; @@ -593,7 +691,6 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, FeatureVFP2, FeatureHasSlowFPVMLx]>; -// FIXME: A5 has currently the same Schedule model as A8 def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, FeatureHasRetAddrStack, FeatureTrustZone, @@ -637,7 +734,6 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, FeatureCheckVLDnAlign, FeatureMP]>; -// FIXME: A12 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, FeatureHasRetAddrStack, FeatureTrustZone, @@ -647,7 +743,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, FeatureVirtualization, FeatureMP]>; -// FIXME: A15 has currently the same Schedule model as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureDontWidenVMOVS, FeatureHasRetAddrStack, @@ -659,7 +754,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureAvoidPartialCPSR, FeatureVirtualization]>; -// FIXME: A17 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, FeatureHasRetAddrStack, FeatureTrustZone, @@ -669,9 +763,7 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, FeatureAvoidPartialCPSR, FeatureVirtualization]>; -// FIXME: krait has currently the same Schedule model as A9 -// FIXME: krait has currently the same features as A9 plus VFP4 and hardware -// division features. +// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureHasRetAddrStack, FeatureMuxedUnits, @@ -701,12 +793,10 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureSlowVGETLNi32, FeatureSlowVDUP32]>; -// FIXME: R4 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, FeatureAvoidPartialCPSR]>; -// FIXME: R4F has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, FeatureSlowFPBrcc, @@ -715,7 +805,6 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureD16, FeatureAvoidPartialCPSR]>; -// FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureHasRetAddrStack, FeatureVFP3, @@ -725,7 +814,6 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureHasRetAddrStack, FeatureVFP3, @@ -748,13 +836,19 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>; -def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>; +def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; -def : ProcNoItin<"cortex-m4", [ARMv7em, +def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, FeatureVFP4, FeatureVFPOnlySP, - FeatureD16]>; + FeatureD16, + FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-m7", [ARMv7em, FeatureFPARMv8, @@ -763,11 +857,12 @@ def : ProcNoItin<"cortex-m7", [ARMv7em, def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, FeatureNoMovt]>; -def : ProcNoItin<"cortex-m33", [ARMv8mMainline, +def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8, FeatureD16, - FeatureVFPOnlySP]>; + FeatureVFPOnlySP, + FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-a32", [ARMv8a, FeatureHWDivThumb, @@ -788,12 +883,14 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, FeatureCRC, FeatureFPAO]>; -def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, - FeatureFPAO]>; + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, FeatureHWDivThumb, @@ -807,7 +904,6 @@ def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureCrypto, FeatureCRC]>; -// Cyclone is very similar to swift def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, @@ -853,9 +949,7 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, //===----------------------------------------------------------------------===// include "ARMRegisterInfo.td" - include "ARMRegisterBanks.td" - include "ARMCallingConv.td" //===----------------------------------------------------------------------===// @@ -863,7 +957,6 @@ include "ARMCallingConv.td" //===----------------------------------------------------------------------===// include "ARMInstrInfo.td" - def ARMInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// @@ -884,7 +977,7 @@ def ARMAsmParserVariant : AsmParserVariant { } def ARM : Target { - // Pull in Instruction Info: + // Pull in Instruction Info. let InstructionSet = ARMInstrInfo; let AssemblyWriters = [ARMAsmWriter]; let AssemblyParserVariants = [ARMAsmParserVariant]; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMAsmPrinter.cpp index 14e197f477f1d..582153daebde9 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,6 +23,8 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -43,9 +45,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" @@ -1103,6 +1103,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; + LLVM_FALLTHROUGH; case ARM::STMDB_UPD: case ARM::t2STMDB_UPD: case ARM::VSTMDDB_UPD: @@ -1504,6 +1505,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::CONSTPOOL_ENTRY: { + if (Subtarget->genExecuteOnly()) + llvm_unreachable("execute-only should not generate constant pools"); + /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5c9d589e26252..3cf5950a1918d 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -21,9 +21,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -558,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate( return Found; } -static bool isCPSRDefined(const MachineInstr *MI) { - for (const auto &MO : MI->operands()) +bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { + for (const auto &MO : MI.operands()) if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) return true; return false; } +bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + return Offset.getReg() != 0; +} + +// Load with negative register offset requires additional 1cyc and +I unit +// for Cortex A57 +bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + const MachineOperand &Opc = MI.getOperand(Op + 2); + assert(Opc.isImm()); + assert(Offset.isReg()); + int64_t OpcImm = Opc.getImm(); + + bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; + return (isSub && Offset.getReg() != 0); +} + +bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + +// Load, scaled register offset, not plus LSL2 +bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + + bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); + if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled + bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2); + return !SimpleScaled; +} + +// Minus reg for ldstso addr mode +bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; +} + +// Load, scaled register offset +bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + static bool isEligibleForITBlock(const MachineInstr *MI) { switch (MI->getOpcode()) { default: return true; @@ -590,7 +645,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) { case ARM::tSUBi3: // SUB (immediate) T1 case ARM::tSUBi8: // SUB (immediate) T2 case ARM::tSUBrr: // SUB (register) T1 - return !isCPSRDefined(MI); + return !ARMBaseInstrInfo::isCPSRDefined(*MI); } } @@ -610,12 +665,14 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { const ARMFunctionInfo *AFI = MI.getParent()->getParent()->getInfo(); + // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. + // In their ARM encoding, they can't be encoded in a conditional form. + if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; + if (AFI->isThumb2Function()) { if (getSubtarget().restrictIT()) return isV8EligibleForIT(&MI); - } else { // non-Thumb - if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) - return false; } return true; @@ -1794,9 +1851,9 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, } bool ARMBaseInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &, +isProfitableToIfCvt(MachineBasicBlock &TBB, unsigned TCycles, unsigned TExtra, - MachineBasicBlock &, + MachineBasicBlock &FBB, unsigned FCycles, unsigned FExtra, BranchProbability Probability) const { if (!TCycles) @@ -1806,14 +1863,46 @@ isProfitableToIfCvt(MachineBasicBlock &, // Here we scale up each component of UnpredCost to avoid precision issue when // scaling TCycles/FCycles by Probability. const unsigned ScalingUpFactor = 1024; - unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); - unsigned FUnpredCost = + + unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; + unsigned UnpredCost; + if (!Subtarget.hasBranchPredictor()) { + // When we don't have a branch predictor it's always cheaper to not take a + // branch than take it, so we have to take that into account. + unsigned NotTakenBranchCost = 1; + unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); + unsigned TUnpredCycles, FUnpredCycles; + if (!FCycles) { + // Triangle: TBB is the fallthrough + TUnpredCycles = TCycles + NotTakenBranchCost; + FUnpredCycles = TakenBranchCost; + } else { + // Diamond: TBB is the block that is branched to, FBB is the fallthrough + TUnpredCycles = TCycles + TakenBranchCost; + FUnpredCycles = FCycles + NotTakenBranchCost; + // The branch at the end of FBB will disappear when it's predicated, so + // discount it from PredCost. + PredCost -= 1 * ScalingUpFactor; + } + // The total cost is the cost of each path scaled by their probabilites + unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); + UnpredCost = TUnpredCost + FUnpredCost; + // When predicating assume that the first IT can be folded away but later + // ones cost one cycle each + if (Subtarget.isThumb2() && TCycles + FCycles > 4) { + PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; + } + } else { + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FCycles * ScalingUpFactor); - unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1 * ScalingUpFactor; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + UnpredCost = TUnpredCost + FUnpredCost; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + } - return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; + return PredCost <= UnpredCost; } bool @@ -3349,6 +3438,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return DefCycle; } +bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { + unsigned BaseReg = MI.getOperand(0).getReg(); + for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { + const auto &Op = MI.getOperand(i); + if (Op.isReg() && Op.getReg() == BaseReg) + return true; + } + return false; +} +unsigned +ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { + // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops + // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops) + return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); +} + int ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, @@ -4119,7 +4224,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef())) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. return 1; @@ -4148,7 +4254,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } const MCInstrDesc &MCID = MI.getDesc(); - if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { + if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef()))) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.h b/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.h index dd7fe871345af..c52e572786d48 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMBaseInstrInfo.h @@ -159,6 +159,24 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { bool isPredicable(const MachineInstr &MI) const override; + // CPSR defined in instruction + static bool isCPSRDefined(const MachineInstr &MI); + bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const; + bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const; + + // Load, scaled register offset + bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load, scaled register offset, not plus LSL2 + bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const; + // Minus reg for ldstso addr mode + bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const; + // Scaled register offset in address mode 2 + bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load multiple, base reg in list + bool isLDMBaseRegInList(const MachineInstr &MI) const; + // get LDM variable defs size + unsigned getLDMVariableDefsSize(const MachineInstr &MI) const; + /// GetInstSize - Returns the size of the specified MachineInstr. /// unsigned getInstSizeInBytes(const MachineInstr &MI) const override; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b18ed509ed23f..370c0a7f5c537 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -117,7 +117,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const ARMSubtarget &STI = MF.getSubtarget(); if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() && @@ -163,7 +163,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, // both or otherwise does not want to enable this optimization, the function // should return NULL if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return nullptr; return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; @@ -193,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned R = 0; R < 16; ++R) markSuperRegs(Reserved, ARM::D16 + R); } - const TargetRegisterClass *RC = &ARM::GPRPairRegClass; - for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) - for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) - if (Reserved.test(*SI)) markSuperRegs(Reserved, *I); + const TargetRegisterClass &RC = ARM::GPRPairRegClass; + for (unsigned Reg : RC) + for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) + markSuperRegs(Reserved, Reg); assert(checkAllSuperRegsMarked(Reserved)); return Reserved; @@ -315,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, Hints.push_back(PairedPhys); // Then prefer even or odd registers. - for (unsigned I = 0, E = Order.size(); I != E; ++I) { - unsigned Reg = Order[I]; + for (unsigned Reg : Order) { if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd) continue; // Don't provide hints that are paired to a reserved register. @@ -659,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); unsigned i = 0; - - while (!MI->getOperand(i).isFI()) { - ++i; - assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); - } + for (; !MI->getOperand(i).isFI(); ++i) + assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!"); // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.cpp index 46ac4d0ad9333..051827a6a6a2f 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -34,6 +35,19 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { + if (T->isArrayTy()) + return true; + + if (T->isStructTy()) { + // For now we only allow homogeneous structs that we can manipulate with + // G_MERGE_VALUES and G_UNMERGE_VALUES + auto StructT = cast(T); + for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i) + if (StructT->getElementType(i) != StructT->getElementType(0)) + return false; + return true; + } + EVT VT = TLI.getValueType(DL, T, true); if (!VT.isSimple() || VT.isVector() || !(VT.isInteger() || VT.isFloatingPoint())) @@ -119,8 +133,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), MRI.createGenericVirtualRegister(LLT::scalar(32))}; - MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0); - MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32); + MIRBuilder.buildUnmerge(NewRegs, Arg.Reg); bool IsLittle = MIRBuilder.getMF().getSubtarget().isLittle(); if (!IsLittle) @@ -148,23 +161,55 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { }; } // End anonymous namespace. -void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, - SmallVectorImpl &SplitArgs, - const DataLayout &DL, - MachineRegisterInfo &MRI) const { +void ARMCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, + MachineFunction &MF, const SplitArgTy &PerformArgSplit) const { const ARMTargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); + const DataLayout &DL = MF.getDataLayout(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const Function *F = MF.getFunction(); SmallVector SplitVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - assert(SplitVTs.size() == 1 && "Unsupported type"); + if (SplitVTs.size() == 1) { + // Even if there is no splitting to do, we still want to replace the + // original type (e.g. pointer type -> integer). + auto Flags = OrigArg.Flags; + unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); + Flags.setOrigAlign(OriginalAlignment); + SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags, + OrigArg.IsFixed); + return; + } - // Even if there is no splitting to do, we still want to replace the original - // type (e.g. pointer type -> integer). - SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); + unsigned FirstRegIdx = SplitArgs.size(); + for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { + EVT SplitVT = SplitVTs[i]; + Type *SplitTy = SplitVT.getTypeForEVT(Ctx); + auto Flags = OrigArg.Flags; + + unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); + Flags.setOrigAlign(OriginalAlignment); + + bool NeedsConsecutiveRegisters = + TLI.functionArgumentNeedsConsecutiveRegisters( + SplitTy, F->getCallingConv(), F->isVarArg()); + if (NeedsConsecutiveRegisters) { + Flags.setInConsecutiveRegs(); + if (i == e - 1) + Flags.setInConsecutiveRegsLast(); + } + + SplitArgs.push_back( + ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)), + SplitTy, Flags, OrigArg.IsFixed}); + } + + for (unsigned i = 0; i < Offsets.size(); ++i) + PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8); } /// Lower the return value for the already existing \p Ret. This assumes that @@ -185,9 +230,15 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, return false; SmallVector SplitVTs; + SmallVector Regs; ArgInfo RetInfo(VReg, Val->getType()); setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(RetInfo, SplitVTs, DL, MF.getRegInfo()); + splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { + Regs.push_back(Reg); + }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, VReg); CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); @@ -270,7 +321,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - // The necesary extensions are handled on the other side of the ABI + // The necessary extensions are handled on the other side of the ABI // boundary. markPhysRegUsed(PhysReg); MIRBuilder.buildCopy(ValVReg, PhysReg); @@ -302,7 +353,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { if (!IsLittle) std::swap(NewRegs[0], NewRegs[1]); - MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32}); + MIRBuilder.buildMerge(Arg.Reg, NewRegs); return 1; } @@ -335,6 +386,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return false; auto &MF = MIRBuilder.getMF(); + auto &MBB = MIRBuilder.getMBB(); auto DL = MF.getDataLayout(); auto &TLI = *getTLI(); @@ -350,17 +402,31 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); + FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), + AssignFn); + SmallVector ArgInfos; + SmallVector SplitRegs; unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo()); + + SplitRegs.clear(); + + splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + SplitRegs.push_back(Reg); + }); + + if (!SplitRegs.empty()) + MIRBuilder.buildMerge(VRegs[Idx], SplitRegs); + Idx++; } - FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), - AssignFn); + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); + return handleAssignments(MIRBuilder, ArgInfos, ArgHandler); } @@ -386,7 +452,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI(); const auto &DL = MF.getDataLayout(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const auto &STI = MF.getSubtarget(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); if (MF.getSubtarget().genLongCalls()) @@ -398,6 +465,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // registers, but don't insert it yet. auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( TRI->getCallPreservedMask(MF, CallConv)); + if (Callee.isReg()) { + auto CalleeReg = Callee.getReg(); + if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), + *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0)); + } SmallVector ArgInfos; for (auto Arg : OrigArgs) { @@ -407,7 +481,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!Arg.IsFixed) return false; - splitToValueTypes(Arg, ArgInfos, DL, MRI); + SmallVector Regs; + splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + Regs.push_back(Reg); + }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, Arg.Reg); } auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); @@ -423,12 +503,22 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; ArgInfos.clear(); - splitToValueTypes(OrigRet, ArgInfos, DL, MRI); + SmallVector SplitRegs; + splitToValueTypes(OrigRet, ArgInfos, MF, + [&](unsigned Reg, uint64_t Offset) { + SplitRegs.push_back(Reg); + }); auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false); CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; + + if (!SplitRegs.empty()) { + // We have split the value and allocated each individual piece, now build + // it up again. + MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs); + } } // We now know the size of the stack - update the ADJCALLSTACKDOWN diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.h b/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.h index 6404c7a2689ee..f5a6872336f60 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMCallLowering.h @@ -42,11 +42,14 @@ class ARMCallLowering : public CallLowering { bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg, MachineInstrBuilder &Ret) const; + typedef std::function SplitArgTy; + /// Split an argument into one or more arguments that the CC lowering can cope /// with (e.g. replace pointers with integers). void splitToValueTypes(const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI) const; + MachineFunction &MF, + const SplitArgTy &PerformArgSplit) const; }; } // End of namespace llvm #endif diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMConstantIslandPass.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMConstantIslandPass.cpp index 6434df317aa8d..667337dc9267f 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -21,10 +21,10 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 78a9144bd3214..46d8f0dba6914 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -757,14 +757,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MI.eraseFromParent(); } -static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MBB->addLiveIn(*I); -} - /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as -/// possible. This only gets used at -O0 so we don't care about efficiency of the -/// generated code. +/// possible. This only gets used at -O0 so we don't care about efficiency of +/// the generated code. bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdrexOp, unsigned StrexOp, @@ -773,16 +768,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, bool IsThumb = STI->isThumb(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + const MachineOperand &Dest = MI.getOperand(0); + unsigned TempReg = MI.getOperand(1).getReg(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + unsigned NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -795,8 +788,8 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, if (UxtOp) { MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg()) - .addReg(Desired.getReg(), RegState::Kill); + BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) + .addReg(DesiredReg, RegState::Kill); if (!IsThumb) MIB.addImm(0); MIB.add(predOps(ARMCC::AL)); @@ -806,14 +799,10 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, // ldrex rDest, [rAddr] // cmp rDest, rDesired // bne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); - MIB.addReg(Addr.getReg()); + MIB.addReg(AddrReg); if (LdrexOp == ARM::t2LDREX) MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. MIB.add(predOps(ARMCC::AL)); @@ -821,7 +810,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) - .add(Desired) + .addReg(DesiredReg) .add(predOps(ARMCC::AL)); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; BuildMI(LoadCmpBB, DL, TII->get(Bcc)) @@ -832,24 +821,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strex rStatus, rNew, [rAddr] - // cmp rStatus, #0 + // strex rTempReg, rNew, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - - - MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg); - MIB.add(New); - MIB.add(Addr); + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) + .addReg(NewReg) + .addReg(AddrReg); if (StrexOp == ARM::t2STREX) MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. MIB.add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) @@ -861,12 +845,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -893,20 +889,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); + unsigned TempReg = MI.getOperand(1).getReg(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + MachineOperand New = MI.getOperand(4); + New.setIsKill(false); unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); - unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0); - unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); + unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -920,28 +915,23 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo - // sbcs rStatus, rDestHi, rDesiredHi + // sbcs rTempReg, rDestHi, rDesiredHi // bne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); - MIB.addReg(Addr.getReg()).add(predOps(ARMCC::AL)); + MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestLo, getKillRegState(Dest.isDead())) - .addReg(DesiredLo, getKillRegState(Desired.isDead())) + .addReg(DesiredLo) .add(predOps(ARMCC::AL)); BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestHi, getKillRegState(Dest.isDead())) - .addReg(DesiredHi, getKillRegState(Desired.isDead())) + .addReg(DesiredHi) .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; @@ -953,21 +943,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strexd rStatus, rNewLo, rNewHi, [rAddr] - // cmp rStatus, #0 + // strexd rTempReg, rNewLo, rNewHi, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; - MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); + MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); - MIB.add(Addr).add(predOps(ARMCC::AL)); + MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) @@ -979,12 +965,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMFastISel.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMFastISel.cpp index 4f6a73b5980d1..bf00ef61c2d1b 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMFastISel.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMFastISel.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -250,8 +250,7 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { return false; // Look to see if our OptionalDef is defining CPSR or CCR. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; if (MO.getReg() == ARM::CPSR) *CPSR = true; @@ -267,8 +266,8 @@ bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { AFI->isThumb2Function()) return MI->isPredicable(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) - if (MCID.OpInfo[i].isPredicate()) + for (const MCOperandInfo &opInfo : MCID.operands()) + if (opInfo.isPredicate()) return true; return false; @@ -1972,7 +1971,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, break; } case CCValAssign::AExt: - // Intentional fall-through. Handle AExt and ZExt. + // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); @@ -2001,6 +2000,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, assert(VA.getLocVT() == MVT::f64 && "Custom lowering for v2f64 args not available"); + // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size() CCValAssign &NextVA = ArgLocs[++i]; assert(VA.isRegLoc() && NextVA.isRegLoc() && @@ -2172,8 +2172,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc)); AddOptionalDefs(MIB); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned R : RetRegs) + MIB.addReg(R, RegState::Implicit); return true; } @@ -2233,8 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { ArgRegs.reserve(I->getNumOperands()); ArgVTs.reserve(I->getNumOperands()); ArgFlags.reserve(I->getNumOperands()); - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value *Op = I->getOperand(i); + for (Value *Op : I->operands()) { unsigned Arg = getRegForValue(Op); if (Arg == 0) return false; @@ -2278,8 +2277,8 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { MIB.addExternalSymbol(TLI.getLibcallName(Call)); // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); + for (unsigned R : RegArgs) + MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2423,8 +2422,8 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); + for (unsigned R : RegArgs) + MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2932,13 +2931,12 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, bool Found = false; bool isZExt; - for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends); - i != e; ++i) { - if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() && - (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm && - MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) { + for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) { + if (FLE.Opc[isThumb2] == MI->getOpcode() && + (uint64_t)FLE.ExpectedImm == Imm && + MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) { Found = true; - isZExt = FoldableLoadExtends[i].isZExt; + isZExt = FLE.isZExt; } } if (!Found) return false; @@ -3057,9 +3055,8 @@ bool ARMFastISel::fastLowerArguments() { }; const TargetRegisterClass *RC = &ARM::rGPRRegClass; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { - unsigned ArgNo = I->getArgNo(); + for (const Argument &Arg : F->args()) { + unsigned ArgNo = Arg.getArgNo(); unsigned SrcReg = GPRArgRegs[ArgNo]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. @@ -3069,7 +3066,7 @@ bool ARMFastISel::fastLowerArguments() { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(DstReg, getKillRegState(true)); - updateValueMap(&*I, ResultReg); + updateValueMap(&Arg, ResultReg); } return true; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMFrameLowering.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMFrameLowering.cpp index 4f7a0ab4e2203..16b54e8848c23 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMFrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMFrameLowering.cpp @@ -20,9 +20,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -968,8 +968,9 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - bool isLiveIn = MF.getRegInfo().isLiveIn(Reg); - if (!isLiveIn) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + bool isLiveIn = MRI.isLiveIn(Reg); + if (!isLiveIn && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); // If NoGap is true, push consecutive registers and then leave the rest // for other instructions. e.g. diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7f9fe55a5c38b..f75dd4de3f96c 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2682,9 +2682,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) { SDNode *ResNode; if (Subtarget->isThumb()) { - SDValue Pred = getAL(CurDAG, dl); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; + SDValue Ops[] = { + CPIdx, + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getEntryNode() + }; ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, Ops); } else { @@ -2698,6 +2701,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) { ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, Ops); } + // Annotate the Node with memory operand information so that MachineInstr + // queries work properly. This e.g. gives the register allocation the + // required information for rematerialization. + MachineFunction& MF = CurDAG->getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = MF.getMachineMemOperand( + MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, 4, 4); + + cast(ResNode)->setMemRefs(MemOp, MemOp+1); + ReplaceNode(N, ResNode); return; } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.cpp index f8b584db7b99b..27dda93387b6f 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "ARMISelLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" -#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" @@ -29,13 +29,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/VectorUtils.h" @@ -61,7 +61,6 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -103,8 +102,8 @@ #include #include #include -#include #include +#include #include #include @@ -127,7 +126,7 @@ static cl::opt EnableConstpoolPromotion( "arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), - cl::init(true)); + cl::init(false)); // FIXME: set to true by default once PR32780 is fixed static cl::opt ConstpoolPromotionMaxSize( "arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), @@ -585,7 +584,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); - setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); @@ -603,7 +601,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); - setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); @@ -620,7 +617,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); - setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); @@ -743,7 +739,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOWI, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); @@ -2674,12 +2669,35 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. -static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here SDLoc dl(Op); ConstantPoolSDNode *CP = cast(Op); SDValue Res; + + // When generating execute-only code Constant Pools must be promoted to the + // global data section. It's a bit ugly that we can't share them across basic + // blocks, but this way we guarantee that execute-only behaves correct with + // position-independent addressing modes. + if (Subtarget->genExecuteOnly()) { + auto AFI = DAG.getMachineFunction().getInfo(); + auto T = const_cast(CP->getType()); + auto C = const_cast(CP->getConstVal()); + auto M = const_cast(DAG.getMachineFunction(). + getFunction()->getParent()); + auto GV = new GlobalVariable( + *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, + Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + + Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + + Twine(AFI->createPICLabelUId()) + ); + SDValue GA = DAG.getTargetGlobalAddress(dyn_cast(GV), + dl, PtrVT); + return LowerGlobalAddress(GA, DAG); + } + if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); @@ -3123,6 +3141,19 @@ static bool isReadOnly(const GlobalValue *GV) { isa(GV); } +SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3367,9 +3398,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc dl(Op); - ConstantSDNode *ScopeN = cast(Op.getOperand(2)); - auto Scope = static_cast(ScopeN->getZExtValue()); - if (Scope == SynchronizationScope::SingleThread) + ConstantSDNode *SSIDNode = cast(Op.getOperand(2)); + auto SSID = static_cast(SSIDNode->getZExtValue()); + if (SSID == SyncScope::SingleThread) return Op; if (!Subtarget->hasDataBarrier()) { @@ -5325,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; + case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; - case ISD::SETLT: Swap = true; + case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGT: Opc = ARMISD::VCGT; break; - case ISD::SETLE: Swap = true; + case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGE: Opc = ARMISD::VCGE; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } @@ -5870,7 +5901,10 @@ static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; @@ -5901,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { @@ -5941,7 +5978,10 @@ static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -5974,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -7549,6 +7592,9 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { SDValue VHi = DAG.getAnyExtOrTrunc( DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), dl, MVT::i32); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + if (isBigEndian) + std::swap (VLo, VHi); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); @@ -7576,10 +7622,14 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N, MemOp[0] = cast(N)->getMemOperand(); cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1); - Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, - SDValue(CmpSwap, 0))); - Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, - SDValue(CmpSwap, 0))); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0, + SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1, + SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(SDValue(CmpSwap, 2)); } @@ -7639,21 +7689,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); - case ISD::ConstantPool: - if (Subtarget->genExecuteOnly()) - llvm_unreachable("execute-only should not generate constant pools"); - return LowerConstantPool(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::GlobalAddress: - switch (Subtarget->getTargetTriple().getObjectFormat()) { - default: llvm_unreachable("unknown object format"); - case Triple::COFF: - return LowerGlobalAddressWindows(Op, DAG); - case Triple::ELF: - return LowerGlobalAddressELF(Op, DAG); - case Triple::MachO: - return LowerGlobalAddressDarwin(Op, DAG); - } + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -8767,6 +8805,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; case CodeModel::Large: @@ -8782,6 +8822,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; } @@ -12147,12 +12189,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, } } - // Lowering to i32/i16 if the size permits. - if (Size >= 4) - return MVT::i32; - else if (Size >= 2) - return MVT::i16; - // Let the target-independent logic figure it out. return MVT::Other; } @@ -13759,7 +13795,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SV->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SV].push_back(SubVec); } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.h b/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.h index 875c06210ae60..f05b142552369 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMISelLowering.h @@ -510,7 +510,8 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; - bool canMergeStoresTo(EVT MemVT) const override { + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override { // Do not merge to larger than i32. return (MemVT.getSizeInBits() <= 32); } @@ -601,6 +602,8 @@ class InstrItineraryData; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMInstrInfo.td b/interpreter/llvm/src/lib/Target/ARM/ARMInstrInfo.td index d06b7d0896f16..7206083a70791 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMInstrInfo.td @@ -6053,21 +6053,21 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), // significantly more naive than the standard expansion: we conservatively // assume seq_cst, strong cmpxchg and omit clrex on failure. -let Constraints = "@earlyclobber $Rd,@earlyclobber $status", +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMInstrNEON.td b/interpreter/llvm/src/lib/Target/ARM/ARMInstrNEON.td index 51290e5a5b93b..858136a820784 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMInstrNEON.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMInstrNEON.td @@ -674,7 +674,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { class VLD1D op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins AddrMode:$Rn), IIC_VLD1, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -682,7 +682,7 @@ class VLD1D op7_4, string Dt, Operand AddrMode> class VLD1Q op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins AddrMode:$Rn), IIC_VLD1x2, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -703,7 +703,7 @@ multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -711,7 +711,7 @@ multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -720,7 +720,7 @@ multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -728,7 +728,7 @@ multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -747,7 +747,7 @@ defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VLD1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -756,7 +756,7 @@ multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -764,7 +764,7 @@ multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -780,15 +780,15 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VLD1d64TPseudo : VLDQQPseudo; -def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo; +def VLD1d64TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD3]>; // ...with 4 registers class VLD1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -797,7 +797,7 @@ multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -805,7 +805,7 @@ multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -821,9 +821,9 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VLD1d64QPseudo : VLDQQPseudo; -def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo; +def VLD1d64QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -837,22 +837,22 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8Pseudo : VLDQQPseudo; -def VLD2q16Pseudo : VLDQQPseudo; -def VLD2q32Pseudo : VLDQQPseudo; +def VLD2q8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; // ...with address register writeback: multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, @@ -875,45 +875,45 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, } defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo; -def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo; -def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; // ...with double-spaced registers def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6:$Rn), IIC_VLD3, - "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -923,9 +923,9 @@ def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo : VLDQQPseudo; -def VLD3d16Pseudo : VLDQQPseudo; -def VLD3d32Pseudo : VLDQQPseudo; +def VLD3d8Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; // ...with address register writeback: class VLD3DWB op11_8, bits<4> op7_4, string Dt> @@ -933,7 +933,7 @@ class VLD3DWB op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -942,9 +942,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo_UPD : VLDQQWBPseudo; -def VLD3d16Pseudo_UPD : VLDQQWBPseudo; -def VLD3d32Pseudo_UPD : VLDQQWBPseudo; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; // ...with double-spaced registers: def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; @@ -954,25 +954,26 @@ def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; -def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; // ...alternate versions to be allocated odd register numbers: -def VLD3q8oddPseudo : VLDQQQQPseudo; -def VLD3q16oddPseudo : VLDQQQQPseudo; -def VLD3q32oddPseudo : VLDQQQQPseudo; +def VLD3q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$Rn), IIC_VLD4, - "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, + Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -982,9 +983,9 @@ def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo : VLDQQPseudo; -def VLD4d16Pseudo : VLDQQPseudo; -def VLD4d32Pseudo : VLDQQPseudo; +def VLD4d8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; // ...with address register writeback: class VLD4DWB op11_8, bits<4> op7_4, string Dt> @@ -992,7 +993,7 @@ class VLD4DWB op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -1001,9 +1002,9 @@ def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo_UPD : VLDQQWBPseudo; -def VLD4d16Pseudo_UPD : VLDQQWBPseudo; -def VLD4d32Pseudo_UPD : VLDQQWBPseudo; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; // ...with double-spaced registers: def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; @@ -1013,18 +1014,18 @@ def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; -def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; // ...alternate versions to be allocated odd register numbers: -def VLD4q8oddPseudo : VLDQQQQPseudo; -def VLD4q16oddPseudo : VLDQQQQPseudo; -def VLD4q32oddPseudo : VLDQQQQPseudo; +def VLD4q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1076,11 +1077,12 @@ class VLD1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, "$src = $Vd", [(set DPR:$Vd, (vector_insert (Ty DPR:$src), (i32 (LoadOp addrmode6oneL32:$Rn)), - imm:$lane))]> { + imm:$lane))]>, Sched<[WriteVLD1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD1LN"; } -class VLD1QLNPseudo : VLDQLNPseudo { +class VLD1QLNPseudo : VLDQLNPseudo, + Sched<[WriteVLD1]> { let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), (i32 (LoadOp addrmode6:$addr)), imm:$lane))]; @@ -1117,7 +1119,7 @@ class VLD1LNWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$src = $Vd, $Rn.addr = $wb", []> { + "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let DecoderMethod = "DecodeVLD1LN"; } @@ -1134,16 +1136,16 @@ def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { let Inst{4} = Rn{4}; } -def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo; -def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo; -def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo; +def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2", []> { + "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2LN"; @@ -1159,9 +1161,9 @@ def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo : VLDQLNPseudo; -def VLD2LNd16Pseudo : VLDQLNPseudo; -def VLD2LNd32Pseudo : VLDQLNPseudo; +def VLD2LNd8Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; // ...with double-spaced registers: def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { @@ -1171,8 +1173,8 @@ def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo : VLDQQLNPseudo; -def VLD2LNq32Pseudo : VLDQQLNPseudo; +def VLD2LNq16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; // ...with address register writeback: class VLD2LNWB op11_8, bits<4> op7_4, string Dt> @@ -1195,9 +1197,9 @@ def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo; -def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo; -def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo; +def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1206,8 +1208,8 @@ def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN op11_8, bits<4> op7_4, string Dt> @@ -1215,7 +1217,7 @@ class VLD3LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD3LN"; } @@ -1230,9 +1232,9 @@ def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo : VLDQQLNPseudo; -def VLD3LNd16Pseudo : VLDQQLNPseudo; -def VLD3LNd32Pseudo : VLDQQLNPseudo; +def VLD3LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { @@ -1242,8 +1244,8 @@ def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo : VLDQQQQLNPseudo; -def VLD3LNq32Pseudo : VLDQQQQLNPseudo; +def VLD3LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD3LNWB op11_8, bits<4> op7_4, string Dt> @@ -1254,7 +1256,7 @@ class VLD3LNWB op11_8, bits<4> op7_4, string Dt> IIC_VLD3lnu, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", - []> { + []>, Sched<[WriteVLD2]> { let DecoderMethod = "DecodeVLD3LN"; } @@ -1268,9 +1270,9 @@ def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo; -def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1279,8 +1281,8 @@ def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; -def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN op11_8, bits<4> op7_4, string Dt> @@ -1289,7 +1291,8 @@ class VLD4LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; @@ -1306,9 +1309,9 @@ def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo : VLDQQLNPseudo; -def VLD4LNd16Pseudo : VLDQQLNPseudo; -def VLD4LNd32Pseudo : VLDQQLNPseudo; +def VLD4LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { @@ -1319,8 +1322,8 @@ def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo : VLDQQQQLNPseudo; -def VLD4LNq32Pseudo : VLDQQQQLNPseudo; +def VLD4LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD4LNWB op11_8, bits<4> op7_4, string Dt> @@ -1347,9 +1350,9 @@ def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1359,8 +1362,8 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; -def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1371,7 +1374,8 @@ class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1434,7 +1438,7 @@ multiclass VLD1QDUPWB op7_4, string Dt, Operand AddrMode> { (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1491,7 +1495,7 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; @@ -1500,7 +1504,7 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } @@ -1524,7 +1528,8 @@ defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, class VLD3DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6dup:$Rn), IIC_VLD3dup, - "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; @@ -1534,9 +1539,9 @@ def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; -def VLD3DUPd8Pseudo : VLDQQPseudo; -def VLD3DUPd16Pseudo : VLDQQPseudo; -def VLD3DUPd32Pseudo : VLDQQPseudo; +def VLD3DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; @@ -1548,7 +1553,7 @@ class VLD3DUPWB op7_4, string Dt, Operand AddrMode> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } @@ -1561,9 +1566,9 @@ def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; -def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; -def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; -def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo; +def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; // VLD4DUP : Vector Load (single 4-element structure to all lanes) class VLD4DUP op7_4, string Dt> @@ -1580,9 +1585,9 @@ def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo : VLDQQPseudo; -def VLD4DUPd16Pseudo : VLDQQPseudo; -def VLD4DUPd32Pseudo : VLDQQPseudo; +def VLD4DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; @@ -1595,7 +1600,7 @@ class VLD4DUPWB op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4DupInstruction"; } @@ -1608,9 +1613,9 @@ def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; -def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; -def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo; +def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1657,14 +1662,14 @@ class VSTQQQQWBPseudo // VST1 : Vector Store (multiple single elements) class VST1D op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), - IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), - IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1685,7 +1690,7 @@ multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1694,7 +1699,7 @@ multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1703,7 +1708,7 @@ multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1712,7 +1717,7 @@ multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1732,7 +1737,7 @@ defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VST1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins AddrMode:$Rn, VecListThreeD:$Vd), - IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1741,7 +1746,7 @@ multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1750,7 +1755,7 @@ multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1766,16 +1771,16 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VST1d64TPseudo : VSTQQPseudo; -def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo; -def VST1d64TPseudoWB_register : VSTQQWBPseudo; +def VST1d64TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST3]>; // ...with 4 registers class VST1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", - []> { + []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1784,7 +1789,7 @@ multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1793,7 +1798,7 @@ multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1809,9 +1814,9 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VST1d64QPseudo : VSTQQPseudo; -def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo; -def VST1d64QPseudoWB_register : VSTQQWBPseudo; +def VST1d64QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST4]>; // VST2 : Vector Store (multiple 2-element structures) class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -1824,22 +1829,22 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; -def VST2q8Pseudo : VSTQQPseudo; -def VST2q16Pseudo : VSTQQPseudo; -def VST2q32Pseudo : VSTQQPseudo; +def VST2q8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; // ...with address register writeback: multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, @@ -1847,7 +1852,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1855,7 +1860,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1864,7 +1869,7 @@ multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1873,7 +1878,7 @@ multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1890,12 +1895,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; -def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q8PseudoWB_register : VSTQQWBregisterPseudo; -def VST2q16PseudoWB_register : VSTQQWBregisterPseudo; -def VST2q32PseudoWB_register : VSTQQWBregisterPseudo; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; // ...with double-spaced registers def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, @@ -1915,7 +1920,7 @@ defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, class VST3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, - "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -1925,9 +1930,9 @@ def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo : VSTQQPseudo; -def VST3d16Pseudo : VSTQQPseudo; -def VST3d32Pseudo : VSTQQPseudo; +def VST3d8Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; // ...with address register writeback: class VST3DWB op11_8, bits<4> op7_4, string Dt> @@ -1935,7 +1940,7 @@ class VST3DWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -1944,9 +1949,9 @@ def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo_UPD : VSTQQWBPseudo; -def VST3d16Pseudo_UPD : VSTQQWBPseudo; -def VST3d32Pseudo_UPD : VSTQQWBPseudo; +def VST3d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; // ...with double-spaced registers: def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; @@ -1956,25 +1961,25 @@ def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; -def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; // ...alternate versions to be allocated odd register numbers: -def VST3q8oddPseudo : VSTQQQQPseudo; -def VST3q16oddPseudo : VSTQQQQPseudo; -def VST3q32oddPseudo : VSTQQQQPseudo; +def VST3q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; // VST4 : Vector Store (multiple 4-element structures) class VST4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -1984,9 +1989,9 @@ def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo : VSTQQPseudo; -def VST4d16Pseudo : VSTQQPseudo; -def VST4d32Pseudo : VSTQQPseudo; +def VST4d8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; // ...with address register writeback: class VST4DWB op11_8, bits<4> op7_4, string Dt> @@ -1994,7 +1999,7 @@ class VST4DWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -2003,9 +2008,9 @@ def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo_UPD : VSTQQWBPseudo; -def VST4d16Pseudo_UPD : VSTQQWBPseudo; -def VST4d32Pseudo_UPD : VSTQQWBPseudo; +def VST4d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; // ...with double-spaced registers: def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; @@ -2015,18 +2020,18 @@ def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; -def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; // ...alternate versions to be allocated odd register numbers: -def VST4q8oddPseudo : VSTQQQQPseudo; -def VST4q16oddPseudo : VSTQQQQPseudo; -def VST4q32oddPseudo : VSTQQQQPseudo; +def VST4q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 @@ -2060,12 +2065,13 @@ class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, + Sched<[WriteVST1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } class VST1QLNPseudo - : VSTQLNPseudo { + : VSTQLNPseudo, Sched<[WriteVST1]> { let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr)]; } @@ -2104,11 +2110,12 @@ class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - AdrMode:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]>, + Sched<[WriteVST1]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo - : VSTQLNWBPseudo { + : VSTQLNWBPseudo, Sched<[WriteVST1]> { let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr, am6offset:$offset))]; } @@ -2139,7 +2146,7 @@ class VST2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; @@ -2155,9 +2162,9 @@ def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo : VSTQLNPseudo; -def VST2LNd16Pseudo : VSTQLNPseudo; -def VST2LNd32Pseudo : VSTQLNPseudo; +def VST2LNd8Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; // ...with double-spaced registers: def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { @@ -2169,8 +2176,8 @@ def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { let Inst{4} = Rn{4}; } -def VST2LNq16Pseudo : VSTQQLNPseudo; -def VST2LNq32Pseudo : VSTQQLNPseudo; +def VST2LNq16Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; // ...with address register writeback: class VST2LNWB op11_8, bits<4> op7_4, string Dt> @@ -2193,9 +2200,9 @@ def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo; +def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2204,15 +2211,16 @@ def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo; -def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo; +def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, + Sched<[WriteVST2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST3LN"; } @@ -2227,9 +2235,9 @@ def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo : VSTQQLNPseudo; -def VST3LNd16Pseudo : VSTQQLNPseudo; -def VST3LNd32Pseudo : VSTQQLNPseudo; +def VST3LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { @@ -2263,9 +2271,9 @@ def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2274,8 +2282,8 @@ def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN op11_8, bits<4> op7_4, string Dt> @@ -2283,7 +2291,7 @@ class VST4LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST4LN"; @@ -2300,9 +2308,9 @@ def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo : VSTQQLNPseudo; -def VST4LNd16Pseudo : VSTQQLNPseudo; -def VST4LNd32Pseudo : VSTQQLNPseudo; +def VST4LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { @@ -2313,8 +2321,8 @@ def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo : VSTQQQQLNPseudo; -def VST4LNq32Pseudo : VSTQQQQLNPseudo; +def VST4LNq16Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; // ...with address register writeback: class VST4LNWB op11_8, bits<4> op7_4, string Dt> @@ -2339,9 +2347,9 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2351,8 +2359,8 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb.td b/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb.td index bee83dfb6f636..891a8f482f0a0 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb.td @@ -1413,14 +1413,15 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them // and make use of the same compressed jump table format as Thumb-2. -let Size = 2 in { +let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, + isIndirectBranch = 1 in { def tTBB_JT : tPseudoInst<(outs), - (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, - Sched<[WriteBr]>; + (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, + IIC_Br, []>, Sched<[WriteBr]>; def tTBH_JT : tPseudoInst<(outs), - (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, - Sched<[WriteBr]>; + (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, + IIC_Br, []>, Sched<[WriteBr]>; } //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb2.td b/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb2.td index bf3d820e7b7d0..42eac12e457b2 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb2.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMInstrThumb2.td @@ -3494,7 +3494,8 @@ def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br, let AsmMatchConverter = "cvtThumbBranches"; } -let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { +let Size = 4, isNotDuplicable = 1, isBranch = 1, isTerminator = 1, + isBarrier = 1, isIndirectBranch = 1 in { // available in both v8-M.Baseline and Thumb2 targets def t2BR_JT : t2basePseudoInst<(outs), @@ -4755,6 +4756,16 @@ def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift", def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift", (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +// Aliases for the above with the .w qualifier +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; + // ADR w/o the .w suffix def : t2InstAlias<"adr${p} $Rd, $addr", (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; @@ -4788,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm", // Pseudo instruction ldr Rt, =immediate def t2LDRConstPool : t2AsmPseudo<"ldr${p} $Rt, $immediate", - (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $immediate", (t2LDRConstPool GPRnopc:$Rt, diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMInstrVFP.td b/interpreter/llvm/src/lib/Target/ARM/ARMInstrVFP.td index 0f225156d4cac..5d887c4fcbf24 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMInstrVFP.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMInstrVFP.td @@ -1958,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1966,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1976,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0, IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -2007,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -2015,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2025,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0, IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -2056,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2073,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0, IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMInstructionSelector.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMInstructionSelector.cpp index 8c680cdf9b47f..faed6b867e2bc 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMInstructionSelector.cpp @@ -20,6 +20,8 @@ #define DEBUG_TYPE "arm-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -42,6 +44,33 @@ class ARMInstructionSelector : public InstructionSelector { private: bool selectImpl(MachineInstr &I) const; + struct CmpConstants; + struct InsertInfo; + + bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const; + + // Helper for inserting a comparison sequence that sets \p ResReg to either 1 + // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or + // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS). + bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg, + ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const; + + // Set \p DestReg to \p Constant. + void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const; + + bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; + + // Check if the types match and both operands have the expected size and + // register bank. + bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS, + unsigned ExpectedSize, unsigned ExpectedRegBankID) const; + + // Check if the register has the expected size and register bank. + bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize, + unsigned ExpectedRegBankID) const; + const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; const ARMBaseTargetMachine &TM; @@ -127,34 +156,30 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } -static bool selectSequence(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP"); +static bool selectMergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP"); - // We only support G_SEQUENCE as a way to stick together two scalar GPRs + // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs // into one DPR. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 64 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_SEQUENCE"); + "Unsupported operand for G_MERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - unsigned VReg2 = MIB->getOperand(3).getReg(); + "Unsupported operand for G_MERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 32 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - - // Remove the operands corresponding to the offsets. - MIB->RemoveOperand(4); - MIB->RemoveOperand(2); + "Unsupported operand for G_MERGE_VALUES"); MIB->setDesc(TII.get(ARM::VMOVDRR)); MIB.add(predOps(ARMCC::AL)); @@ -162,30 +187,32 @@ static bool selectSequence(MachineInstrBuilder &MIB, return true; } -static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP"); +static bool selectUnmergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP"); - // We only support G_EXTRACT as a way to break up one DPR into two GPRs. + // We only support G_UNMERGE_VALUES as a way to break up one DPR into two + // GPRs. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 32 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_EXTRACT"); + "Unsupported operand for G_UNMERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; - assert(MRI.getType(VReg1).getSizeInBits() == 64 && - RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_EXTRACT"); - assert(MIB->getOperand(2).getImm() % 32 == 0 && - "Unsupported operand for G_EXTRACT"); - - // Remove the operands corresponding to the offsets. - MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32); + assert(MRI.getType(VReg1).getSizeInBits() == 32 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); + (void)VReg2; + assert(MRI.getType(VReg2).getSizeInBits() == 64 && + RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); - MIB->setDesc(TII.get(ARM::VGETLNi32)); + MIB->setDesc(TII.get(ARM::VMOVRRD)); MIB.add(predOps(ARMCC::AL)); return true; @@ -245,6 +272,259 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, return Opc; } +// When lowering comparisons, we sometimes need to perform two compares instead +// of just one. Get the condition codes for both comparisons. If only one is +// needed, the second member of the pair is ARMCC::AL. +static std::pair +getComparePreds(CmpInst::Predicate Pred) { + std::pair Preds = {ARMCC::AL, ARMCC::AL}; + switch (Pred) { + case CmpInst::FCMP_ONE: + Preds = {ARMCC::GT, ARMCC::MI}; + break; + case CmpInst::FCMP_UEQ: + Preds = {ARMCC::EQ, ARMCC::VS}; + break; + case CmpInst::ICMP_EQ: + case CmpInst::FCMP_OEQ: + Preds.first = ARMCC::EQ; + break; + case CmpInst::ICMP_SGT: + case CmpInst::FCMP_OGT: + Preds.first = ARMCC::GT; + break; + case CmpInst::ICMP_SGE: + case CmpInst::FCMP_OGE: + Preds.first = ARMCC::GE; + break; + case CmpInst::ICMP_UGT: + case CmpInst::FCMP_UGT: + Preds.first = ARMCC::HI; + break; + case CmpInst::FCMP_OLT: + Preds.first = ARMCC::MI; + break; + case CmpInst::ICMP_ULE: + case CmpInst::FCMP_OLE: + Preds.first = ARMCC::LS; + break; + case CmpInst::FCMP_ORD: + Preds.first = ARMCC::VC; + break; + case CmpInst::FCMP_UNO: + Preds.first = ARMCC::VS; + break; + case CmpInst::FCMP_UGE: + Preds.first = ARMCC::PL; + break; + case CmpInst::ICMP_SLT: + case CmpInst::FCMP_ULT: + Preds.first = ARMCC::LT; + break; + case CmpInst::ICMP_SLE: + case CmpInst::FCMP_ULE: + Preds.first = ARMCC::LE; + break; + case CmpInst::FCMP_UNE: + case CmpInst::ICMP_NE: + Preds.first = ARMCC::NE; + break; + case CmpInst::ICMP_UGE: + Preds.first = ARMCC::HS; + break; + case CmpInst::ICMP_ULT: + Preds.first = ARMCC::LO; + break; + default: + break; + } + assert(Preds.first != ARMCC::AL && "No comparisons needed?"); + return Preds; +} + +struct ARMInstructionSelector::CmpConstants { + CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank, + unsigned OpSize) + : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode), + OperandRegBankID(OpRegBank), OperandSize(OpSize) {} + + // The opcode used for performing the comparison. + const unsigned ComparisonOpcode; + + // The opcode used for reading the flags set by the comparison. May be + // ARM::INSTRUCTION_LIST_END if we don't need to read the flags. + const unsigned ReadFlagsOpcode; + + // The assumed register bank ID for the operands. + const unsigned OperandRegBankID; + + // The assumed size in bits for the operands. + const unsigned OperandSize; +}; + +struct ARMInstructionSelector::InsertInfo { + InsertInfo(MachineInstrBuilder &MIB) + : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())), + DbgLoc(MIB->getDebugLoc()) {} + + MachineBasicBlock &MBB; + const MachineBasicBlock::instr_iterator InsertBefore; + const DebugLoc &DbgLoc; +}; + +void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg, + unsigned Constant) const { + (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi)) + .addDef(DestReg) + .addImm(Constant) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); +} + +bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI, + unsigned LHSReg, unsigned RHSReg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + return MRI.getType(LHSReg) == MRI.getType(RHSReg) && + validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) && + validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID); +} + +bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) { + DEBUG(dbgs() << "Unexpected size for register"); + return false; + } + + if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) { + DEBUG(dbgs() << "Unexpected register bank for register"); + return false; + } + + return true; +} + +bool ARMInstructionSelector::selectCmp(CmpConstants Helper, + MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + const InsertInfo I(MIB); + + auto ResReg = MIB->getOperand(0).getReg(); + if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID)) + return false; + + auto Cond = + static_cast(MIB->getOperand(1).getPredicate()); + if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) { + putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0); + MIB->eraseFromParent(); + return true; + } + + auto LHSReg = MIB->getOperand(2).getReg(); + auto RHSReg = MIB->getOperand(3).getReg(); + if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize, + Helper.OperandRegBankID)) + return false; + + auto ARMConds = getComparePreds(Cond); + auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass); + putConstant(I, ZeroReg, 0); + + if (ARMConds.second == ARMCC::AL) { + // Simple case, we only need one comparison and we're done. + if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg, + ZeroReg)) + return false; + } else { + // Not so simple, we need two successive comparisons. + auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass); + if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg, + RHSReg, ZeroReg)) + return false; + if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg, + IntermediateRes)) + return false; + } + + MIB->eraseFromParent(); + return true; +} + +bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I, + unsigned ResReg, + ARMCC::CondCodes Cond, + unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const { + // Perform the comparison. + auto CmpI = + BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode)) + .addUse(LHSReg) + .addUse(RHSReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Read the comparison flags (if necessary). + if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) { + auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, + TII.get(Helper.ReadFlagsOpcode)) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI)) + return false; + } + + // Select either 1 or the previous result based on the value of the flags. + auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi)) + .addDef(ResReg) + .addUse(PrevRes) + .addImm(1) + .add(predOps(Cond, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + return true; +} + +bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + auto &MBB = *MIB->getParent(); + auto InsertBefore = std::next(MIB->getIterator()); + auto &DbgLoc = MIB->getDebugLoc(); + + // Compare the condition to 0. + auto CondReg = MIB->getOperand(1).getReg(); + assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) && + "Unsupported types for select operation"); + auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri)) + .addUse(CondReg) + .addImm(0) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Move a value into the result register based on the result of the + // comparison. + auto ResReg = MIB->getOperand(0).getReg(); + auto TrueReg = MIB->getOperand(2).getReg(); + auto FalseReg = MIB->getOperand(3).getReg(); + assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) && + validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) && + "Unsupported types for select operation"); + auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr)) + .addDef(ResReg) + .addUse(TrueReg) + .addUse(FalseReg) + .add(predOps(ARMCC::EQ, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + MIB->eraseFromParent(); + return true; +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -345,25 +625,36 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } - case G_ADD: + case G_SELECT: + return selectSelect(MIB, MRI); + case G_ICMP: { + CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, + ARM::GPRRegBankID, 32); + return selectCmp(Helper, MIB, MRI); + } + case G_FCMP: { + assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); + + unsigned OpReg = I.getOperand(2).getReg(); + unsigned Size = MRI.getType(OpReg).getSizeInBits(); + + if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { + DEBUG(dbgs() << "Subtarget only supports single precision"); + return false; + } + if (Size != 32 && Size != 64) { + DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); + return false; + } + + CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, + ARM::FPRRegBankID, Size); + return selectCmp(Helper, MIB, MRI); + } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; - case G_SUB: - I.setDesc(TII.get(ARM::SUBrr)); - MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); - break; - case G_MUL: - if (TII.getSubtarget().hasV6Ops()) { - I.setDesc(TII.get(ARM::MUL)); - } else { - assert(TII.getSubtarget().useMulOps() && "Unsupported target"); - I.setDesc(TII.get(ARM::MULv5)); - MIB->getOperand(0).setIsEarlyClobber(true); - } - MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); - break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). @@ -372,11 +663,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { break; case G_CONSTANT: { unsigned Reg = I.getOperand(0).getReg(); - if (MRI.getType(Reg).getSizeInBits() != 32) + + if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) return false; - assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Expected constant to live in a GPR"); I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); @@ -422,16 +712,39 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { MIB.addImm(0).add(predOps(ARMCC::AL)); break; } - case G_SEQUENCE: { - if (!selectSequence(MIB, TII, MRI, TRI, RBI)) + case G_MERGE_VALUES: { + if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } - case G_EXTRACT: { - if (!selectExtract(MIB, TII, MRI, TRI, RBI)) + case G_UNMERGE_VALUES: { + if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } + case G_BRCOND: { + if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) { + DEBUG(dbgs() << "Unsupported condition register for G_BRCOND"); + return false; + } + + // Set the flags. + auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri)) + .addReg(I.getOperand(0).getReg()) + .addImm(1) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI)) + return false; + + // Branch conditionally. + auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) + .add(I.getOperand(1)) + .add(predOps(ARMCC::EQ, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) + return false; + I.eraseFromParent(); + return true; + } default: return false; } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.cpp index 5bf6c7aed6b82..1c17c07e4cb00 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "ARMLegalizerInfo.h" +#include "ARMCallLowering.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -26,6 +28,10 @@ using namespace llvm; #error "You shouldn't build this" #endif +static bool AEABI(const ARMSubtarget &ST) { + return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); +} + ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { using namespace TargetOpcode; @@ -45,7 +51,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, 1, p0}, Legal); } - for (unsigned Op : {G_ADD, G_SUB, G_MUL}) { + for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { for (auto Ty : {s1, s8, s16}) setAction({Op, Ty}, WidenScalar); setAction({Op, s32}, Legal); @@ -53,16 +59,24 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { for (unsigned Op : {G_SDIV, G_UDIV}) { for (auto Ty : {s8, s16}) - // FIXME: We need WidenScalar here, but in the case of targets with - // software division we'll also need Libcall afterwards. Treat as Custom - // until we have better support for chaining legalization actions. - setAction({Op, Ty}, Custom); + setAction({Op, Ty}, WidenScalar); if (ST.hasDivideInARMMode()) setAction({Op, s32}, Legal); else setAction({Op, s32}, Libcall); } + for (unsigned Op : {G_SREM, G_UREM}) { + for (auto Ty : {s8, s16}) + setAction({Op, Ty}, WidenScalar); + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Lower); + else if (AEABI(ST)) + setAction({Op, s32}, Custom); + else + setAction({Op, s32}, Libcall); + } + for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); for (auto Ty : {s1, s8, s16}) @@ -72,7 +86,21 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); + setAction({G_SELECT, s32}, Legal); + setAction({G_SELECT, p0}, Legal); + setAction({G_SELECT, 1, s1}, Legal); + + setAction({G_BRCOND, s1}, Legal); + setAction({G_CONSTANT, s32}, Legal); + for (auto Ty : {s1, s8, s16}) + setAction({G_CONSTANT, Ty}, WidenScalar); + + setAction({G_ICMP, s1}, Legal); + for (auto Ty : {s8, s16}) + setAction({G_ICMP, 1, Ty}, WidenScalar); + for (auto Ty : {s32, p0}) + setAction({G_ICMP, 1, Ty}, Legal); if (!ST.useSoftFloat() && ST.hasVFP2()) { setAction({G_FADD, s32}, Legal); @@ -80,9 +108,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_LOAD, s64}, Legal); setAction({G_STORE, s64}, Legal); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Legal); + setAction({G_FCMP, 1, s64}, Legal); } else { for (auto Ty : {s32, s64}) setAction({G_FADD, Ty}, Libcall); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Custom); + setAction({G_FCMP, 1, s64}, Custom); + + if (AEABI(ST)) + setFCmpLibcallsAEABI(); + else + setFCmpLibcallsGNU(); } for (unsigned Op : {G_FREM, G_FPOW}) @@ -92,47 +133,223 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { computeTables(); } +void ARMLegalizerInfo::setFCmpLibcallsAEABI() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; +} + +void ARMLegalizerInfo::setFCmpLibcallsGNU() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}, + {RTLIB::UO_F32, CmpInst::ICMP_NE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}, + {RTLIB::UO_F64, CmpInst::ICMP_NE}}; +} + +ARMLegalizerInfo::FCmpLibcallsList +ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, + unsigned Size) const { + assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate"); + if (Size == 32) + return FCmp32Libcalls[Predicate]; + if (Size == 64) + return FCmp64Libcalls[Predicate]; + llvm_unreachable("Unsupported size for FCmp predicate"); +} + bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { using namespace TargetOpcode; + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { default: return false; - case G_SDIV: - case G_UDIV: { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8)) + case G_SREM: + case G_UREM: { + unsigned OriginalResult = MI.getOperand(0).getReg(); + auto Size = MRI.getType(OriginalResult).getSizeInBits(); + if (Size != 32) return false; - // We need to widen to 32 bits and then maybe, if the target requires, - // transform into a libcall. - LegalizerHelper Helper(MIRBuilder.getMF()); + auto Libcall = + MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; - MachineInstr *NewMI = nullptr; - Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { - // Store the new, 32-bit div instruction. - if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV) - NewMI = MI; - }); + // Our divmod libcalls return a struct containing the quotient and the + // remainder. We need to create a virtual register for it. + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + Type *ArgTy = Type::getInt32Ty(Ctx); + StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); + auto RetVal = MRI.createGenericVirtualRegister( + getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); - auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32)); - Helper.MIRBuilder.stopRecordingInsertions(); - if (Result == LegalizerHelper::UnableToLegalize) { + auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); + if (Status != LegalizerHelper::Legalized) return false; + + // The remainder is the second result of divmod. Split the return value into + // a new, unused register for the quotient and the destination of the + // original instruction for the remainder. + MIRBuilder.buildUnmerge( + {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, + RetVal); + break; + } + case G_FCMP: { + assert(MRI.getType(MI.getOperand(2).getReg()) == + MRI.getType(MI.getOperand(3).getReg()) && + "Mismatched operands for G_FCMP"); + auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + + auto OriginalResult = MI.getOperand(0).getReg(); + auto Predicate = + static_cast(MI.getOperand(1).getPredicate()); + auto Libcalls = getFCmpLibcalls(Predicate, OpSize); + + if (Libcalls.empty()) { + assert((Predicate == CmpInst::FCMP_TRUE || + Predicate == CmpInst::FCMP_FALSE) && + "Predicate needs libcalls, but none specified"); + MIRBuilder.buildConstant(OriginalResult, + Predicate == CmpInst::FCMP_TRUE ? 1 : 0); + MI.eraseFromParent(); + return true; } - assert(NewMI && "Couldn't find widened instruction"); - assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) && - "Unexpected widened instruction"); - assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 && - "Unexpected type for the widened instruction"); - - Result = Helper.legalizeInstrStep(*NewMI); - if (Result == LegalizerHelper::UnableToLegalize) { - return false; + + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); + auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + auto *RetTy = Type::getInt32Ty(Ctx); + + SmallVector Results; + for (auto Libcall : Libcalls) { + auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32)); + auto Status = + createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy}, + {{MI.getOperand(2).getReg(), ArgTy}, + {MI.getOperand(3).getReg(), ArgTy}}); + + if (Status != LegalizerHelper::Legalized) + return false; + + auto ProcessedResult = + Libcalls.size() == 1 + ? OriginalResult + : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult)); + + // We have a result, but we need to transform it into a proper 1-bit 0 or + // 1, taking into account the different peculiarities of the values + // returned by the comparison functions. + CmpInst::Predicate ResultPred = Libcall.Predicate; + if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { + // We have a nice 0 or 1, and we just need to truncate it back to 1 bit + // to keep the types consistent. + MIRBuilder.buildTrunc(ProcessedResult, LibcallResult); + } else { + // We need to compare against 0. + assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate"); + auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MIRBuilder.buildConstant(Zero, 0); + MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero); + } + Results.push_back(ProcessedResult); + } + + if (Results.size() != 1) { + assert(Results.size() == 2 && "Unexpected number of results"); + MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]); } - return true; + break; } } + + MI.eraseFromParent(); + return true; } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.h b/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.h index a9bdd367737e5..78ab9412c04ba 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMLegalizerInfo.h @@ -14,7 +14,10 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H #define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H +#include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/IR/Instructions.h" namespace llvm { @@ -27,6 +30,36 @@ class ARMLegalizerInfo : public LegalizerInfo { bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override; + +private: + void setFCmpLibcallsGNU(); + void setFCmpLibcallsAEABI(); + + struct FCmpLibcallInfo { + // Which libcall this is. + RTLIB::Libcall LibcallID; + + // The predicate to be used when comparing the value returned by the + // function with a relevant constant (currently hard-coded to zero). This is + // necessary because often the libcall will return e.g. a value greater than + // 0 to represent 'true' and anything negative to represent 'false', or + // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is + // needed, this should be CmpInst::BAD_ICMP_PREDICATE. + CmpInst::Predicate Predicate; + }; + using FCmpLibcallsList = SmallVector; + + // Map from each FCmp predicate to the corresponding libcall infos. A FCmp + // instruction may be lowered to one or two libcalls, which is why we need a + // list. If two libcalls are needed, their results will be OR'ed. + using FCmpLibcallsMapTy = IndexedMap; + + FCmpLibcallsMapTy FCmp32Libcalls; + FCmpLibcallsMapTy FCmp64Libcalls; + + // Get the libcall(s) corresponding to \p Predicate for operands of \p Size + // bits. + FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const; }; } // End llvm namespace. #endif diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 72fcf7cd6a4fd..7a452d4a20952 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -33,7 +34,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMMCInstLower.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMMCInstLower.cpp index 9e9c1ba6c1140..48b02d40b2466 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMMCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMMCInstLower.cpp @@ -25,9 +25,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/ErrorHandling.h" @@ -153,9 +153,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, break; } - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; if (AP.lowerOperand(MO, MCOp)) { if (MCOp.isImm() && EncodeImms) { diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMMacroFusion.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMMacroFusion.cpp new file mode 100644 index 0000000000000..1b6e97c28d453 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/ARM/ARMMacroFusion.cpp @@ -0,0 +1,57 @@ +//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "ARMMacroFusion.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const ARMSubtarget &ST = static_cast(TSI); + + // Assume wildcards for unspecified instrs. + unsigned FirstOpcode = + FirstMI ? FirstMI->getOpcode() + : static_cast(ARM::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); + + if (ST.hasFuseAES()) + // Fuse AES crypto operations. + switch(SecondOpcode) { + // AES encode. + case ARM::AESMC : + return FirstOpcode == ARM::AESE || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + // AES decode. + case ARM::AESIMC: + return FirstOpcode == ARM::AESD || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + } + + return false; +} + +std::unique_ptr createARMMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMMacroFusion.h b/interpreter/llvm/src/lib/Target/ARM/ARMMacroFusion.h new file mode 100644 index 0000000000000..1e4fc6687eae8 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/ARM/ARMMacroFusion.h @@ -0,0 +1,24 @@ +//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM definition of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createARMMacroFusionDAGMutation()); +/// to ARMPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr createARMMacroFusionDAGMutation(); + +} // llvm diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMRegisterBankInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMRegisterBankInfo.cpp index a20997c95cd9a..8449302358948 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -212,8 +212,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned NumOperands = MI.getNumOperands(); const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; @@ -221,6 +219,9 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_ADD: case G_SUB: case G_MUL: + case G_AND: + case G_OR: + case G_XOR: case G_SDIV: case G_UDIV: case G_SEXT: @@ -233,51 +234,110 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; case G_LOAD: - case G_STORE: + case G_STORE: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); OperandsMapping = Ty.getSizeInBits() == 64 ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], &ARM::ValueMappings[ARM::GPR3OpsIdx]}) : &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; - case G_FADD: + } + case G_FADD: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) && "Unsupported size for G_FADD"); OperandsMapping = Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx] : &ARM::ValueMappings[ARM::SPR3OpsIdx]; break; + } case G_CONSTANT: case G_FRAME_INDEX: OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; - case G_SEQUENCE: { - // We only support G_SEQUENCE for creating a double precision floating point - // value out of two GPRs. - LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); + case G_SELECT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + (void)Ty; + LLT Ty2 = MRI.getType(MI.getOperand(1).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT"); + assert(Ty2.getSizeInBits() == 1 && "Unsupported size for G_SELECT"); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } + case G_ICMP: { + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + (void)Ty2; + assert(Ty2.getSizeInBits() == 32 && "Unsupported size for G_ICMP"); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } + case G_FCMP: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + (void)Ty; + LLT Ty1 = MRI.getType(MI.getOperand(2).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP"); + assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() && + "Mismatched operand sizes for G_FCMP"); + + unsigned Size = Ty1.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP"); + + auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx] + : &ARM::ValueMappings[ARM::DPR3OpsIdx]; + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + FPRValueMapping, FPRValueMapping}); + break; + } + case G_MERGE_VALUES: { + // We only support G_MERGE_VALUES for creating a double precision floating + // point value out of two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || Ty2.getSizeInBits() != 32) return getInvalidInstructionMapping(); OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); break; } - case G_EXTRACT: { - // We only support G_EXTRACT for splitting a double precision floating point - // value into two GPRs. + case G_UNMERGE_VALUES: { + // We only support G_UNMERGE_VALUES for splitting a double precision + // floating point value into two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); - if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 || - MI.getOperand(2).getImm() % 32 != 0) + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || + Ty2.getSizeInBits() != 64) return getInvalidInstructionMapping(); - OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], - &ARM::ValueMappings[ARM::DPR3OpsIdx], - nullptr, nullptr}); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); break; } + case G_BR: + OperandsMapping = getOperandsMapping({nullptr}); + break; + case G_BRCOND: + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + break; default: return getInvalidInstructionMapping(); } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMRegisterInfo.td b/interpreter/llvm/src/lib/Target/ARM/ARMRegisterInfo.td index 02cbfb1fa9f15..b10583bc7983c 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMRegisterInfo.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMRegisterInfo.td @@ -245,6 +245,10 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { // the general GPR register class above (MOV, e.g.) def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; +// Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow +// the PC to be used as a destination operand as well. +def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>; + // The high registers in thumb mode, R8-R15. def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMSchedule.td b/interpreter/llvm/src/lib/Target/ARM/ARMSchedule.td index 87eb4c2b9074d..53e012f13ee24 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMSchedule.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMSchedule.td @@ -131,11 +131,25 @@ def WriteFPDIV64 : SchedWrite; def WriteFPSQRT32 : SchedWrite; def WriteFPSQRT64 : SchedWrite; +// Vector load and stores +def WriteVLD1 : SchedWrite; +def WriteVLD2 : SchedWrite; +def WriteVLD3 : SchedWrite; +def WriteVLD4 : SchedWrite; +def WriteVST1 : SchedWrite; +def WriteVST2 : SchedWrite; +def WriteVST3 : SchedWrite; +def WriteVST4 : SchedWrite; + + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = static_cast(SchedModel->getInstrInfo()); (void)TII; + const ARMSubtarget *STI = + static_cast(SchedModel->getSubtargetInfo()); + (void)STI; }]>; def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; @@ -409,3 +423,5 @@ include "ARMScheduleA8.td" include "ARMScheduleA9.td" include "ARMScheduleSwift.td" include "ARMScheduleR52.td" +include "ARMScheduleA57.td" +include "ARMScheduleM3.td" diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA57.td b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA57.td new file mode 100644 index 0000000000000..525079d12d516 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA57.td @@ -0,0 +1,1471 @@ +//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// *** Common description and scheduling model parameters taken from AArch64 *** +// The Cortex-A57 is a traditional superscalar microprocessor with a +// conservative 3-wide in-order stage for decode and dispatch. Combined with the +// much wider out-of-order issue stage, this produced a need to carefully +// schedule micro-ops so that all three decoded each cycle are successfully +// issued as the reservation station(s) simply don't stay occupied for long. +// Therefore, IssueWidth is set to the narrower of the two at three, while still +// modeling the machine as out-of-order. + +def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>; +def IsCPSRDefinedAndPredicatedPred : + SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>; + +// Cortex A57 rev. r1p0 or later (false = r0px) +def IsR1P0AndLaterPred : SchedPredicate<[{false}]>; + +// If Addrmode3 contains register offset (not immediate) +def IsLdrAm3RegOffPred : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3RegOffPredX2 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>; +def IsLdrAm3RegOffPredX3 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>; + +// If Addrmode3 contains "minus register" +def IsLdrAm3NegRegOffPred : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3NegRegOffPredX2 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>; +def IsLdrAm3NegRegOffPredX3 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>; + +// Load, scaled register offset, not plus LSL2 +def IsLdstsoScaledNotOptimalPredX0 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>; +def IsLdstsoScaledNotOptimalPred : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>; +def IsLdstsoScaledNotOptimalPredX2 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdstsoScaledPred : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>; +def IsLdstsoScaledPredX2 : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>; + +def IsLdstsoMinusRegPredX0 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>; +def IsLdstsoMinusRegPred : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>; +def IsLdstsoMinusRegPredX2 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdrAm2ScaledPred : + SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>; + +// LDM, base reg in list +def IsLdmBaseRegInList : + SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>; + +class A57WriteLMOpsListType writes> { + list Writes = writes; + SchedMachineModel SchedModel = ?; +} + +// *** Common description and scheduling model parameters taken from AArch64 *** +// (AArch64SchedA57.td) +def CortexA57Model : SchedMachineModel { + let IssueWidth = 3; // 3-way decode and dispatch + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1>; // Type B micro-ops +def A57UnitI : ProcResource<2>; // Type I micro-ops +def A57UnitM : ProcResource<1>; // Type M micro-ops +def A57UnitL : ProcResource<1>; // Type L micro-ops +def A57UnitS : ProcResource<1>; // Type S micro-ops + +def A57UnitX : ProcResource<1>; // Type X micro-ops (F1) +def A57UnitW : ProcResource<1>; // Type W micro-ops (F0) + +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "ARMScheduleA57WriteRes.td" + +// To have "CompleteModel = 1", support of pseudos and special instructions +def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", + "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", + "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", + "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", + "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", + "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG", + "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>; + +def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; + +// Specific memory instrs +def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", + "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>; + +// coprocessor moves +def : InstRW<[WriteNoop, WriteNoop], (instregex + "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$", + "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$", + "(t2)?MSR(banked|i|_AR|_M)?$")>; + +// Deprecated instructions +def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; + +// Pseudos +def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", + "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", + "tLDRpci_pic", "t2SUBS_PC_LR", + "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", + "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "WIN__CHKSTK", "WIN__DBZCHK")>; + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; + +// --- 3.2 Branch Instructions --- +// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ + +def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$", + "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>; +def : InstRW<[A57Write_1cyc_1B_1I], + (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>; +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>; +// Pseudos +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>; +def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr", + "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>; +def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>; + +// --- 3.3 Arithmetic and Logical Instructions --- +// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S}, +// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST + +def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; + +// shift by register, conditional or unconditional +// TODO: according to the doc, conditional uses I0/I1, unconditional uses M +// Why more complex instruction uses more simple pipeline? +// May be an error in doc. +def A57WriteALUsi : SchedWriteVariant<[ + // lsl #2, lsl #1, or lsr #1. + SchedVar, + SchedVar +]>; +def A57WriteALUsr : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteALUSsr : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57ReadALUsr : SchedReadVariant<[ + SchedVar, + SchedVar +]>; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def A57WriteCMPsr : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +// --- 3.4 Move and Shift Instructions --- +// Move, basic +// MOV{S}, MOVW, MVN{S} +def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)", + "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL", + "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>; + +// Move, shift by immed, setflags/no setflags +// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN +// setflags = isCPSRDefined +def A57WriteMOVsi : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi", + "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi", + "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>; + +// shift by register, conditional or unconditional, setflags/no setflags +def A57WriteMOVsr : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs", + "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr", + "(t2|t)RORrr")>; + +// Move, top +// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later +def A57WriteMOVT : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>; + +def A57WriteI2pc : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>; +def A57WriteI2ld : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>; +def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>; +def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; + +// +2cyc for branch forms +def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>; + +// --- 3.5 Divide and Multiply Instructions --- +// Divide: SDIV, UDIV +// latency from documentration: 4 ­‐ 20, maximum taken +def : SchedAlias; +// Multiply: tMul not bound to common WriteRes types +def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>; +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; + +// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, +// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R} +// Multiply-accumulate pipelines support late-forwarding of accumulate operands +// from similar μops, allowing a typical sequence of multiply-accumulate μops +// to issue one every 1 cycle (sched advance = 2). +def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } +def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; + +// Multiply long: SMULL, UMULL +def : SchedAlias; +def : SchedAlias; + +// --- 3.6 Saturating and Parallel Arithmetic Instructions --- +// Parallel arith +// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8 +// Conditional GE-setting instructions require three extra μops +// and two additional cycles to conditionally update the GE field. +def A57WriteParArith : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW< [A57WriteParArith], (instregex + "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)", + "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>; + +// Parallel arith with exchange: SASX, SSAX, UASX, USAX +def A57WriteParArithExch : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteParArithExch], + (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>; + +// Parallel halving arith +// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex + "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)", + "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>; + +// Parallel halving arith with exchange +// SHASX, SHSAX, UHASX, UHSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX", + "(t2)?UHASX", "(t2)?UHSAX")>; + +// Parallel saturating arith +// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)", + "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>; + +// Parallel saturating arith with exchange +// QASX, QSAX, UQASX, UQSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX", + "(t2)?UQASX", "(t2)?UQSAX")>; + +// Saturate: SSAT, SSAT16, USAT, USAT16 +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>; + +// Saturating arith: QADD, QSUB +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>; + +// Saturating doubling arith: QDADD, QDSUB +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>; + +// --- 3.7 Miscellaneous Data-Processing Instructions --- +// Bit field extract: SBFX, UBFX +def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>; + +// Bit field insert/clear: BFI, BFC +def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>; + +// Select bytes, conditional/unconditional +def A57WriteSEL : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>; + +// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH +def : InstRW<[A57Write_1cyc_1I], + (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>; + +// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>; + +// Sign/zero extend and add, parallel: SXTAB16, UXTAB16 +def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>; + +// Sum of absolute differences: USAD8, USADA8 +def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>; + +// --- 3.8 Load Instructions --- + +// Load, immed offset +// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate +def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12", + "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)", + "PICLDR", "tLDR")>; + +def : InstRW<[A57Write_4cyc_1L], + (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>; + +// For "Load, register offset, minus" we need +1cyc, +1I +def A57WriteLdrAm3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>; +def A57WriteLdrAm3X2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>; +def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>; + +def A57WriteLdrAmLDSTSO : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>; + +def A57WrBackOne : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; +} +def A57WrBackTwo : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def A57WrBackThree : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} + +// --- LDR pre-indexed --- +// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update) +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM", + "LDRB_PRE_IMM", "t2LDRB_PRE")>; + +// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update) +// (5 cyc load result for not-lsl2 scaled) +def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo], + (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>; + +def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack], + (instregex "LDR(H|SH|SB)_PRE")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_PRE")>; + +// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm. +def A57WriteLdrDAm3Pre : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack], + (instregex "LDRD_PRE")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_PRE")>; + +// --- LDR post-indexed --- +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM", + "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>; + +def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack], + (instregex "LDR(H|SH|SB)_POST")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_POST")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG", + "LDRB_POST_REG", "LDR(B?)T_POST$")>; + +def A57WriteLdrTRegPost : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L" +def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack], + (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>; + +def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm. +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_POST")>; + +// --- Preload instructions --- +// Preload, immed offset +def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12", + "t2PLDW?(i8|pci|s)", "(t2)?PLI")>; + +// Preload, register offset, +// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2 +// otherwise 4cyc "L" +def A57WritePLD : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>; + +// --- Load multiple instructions --- +foreach NumAddr = 1-8 in { + def A57LMAddrPred#NumAddr : + SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>; +} + +def A57LDMOpsListNoregin : A57WriteLMOpsListType< + [A57Write_3cyc_1L, A57Write_3cyc_1L, + A57Write_4cyc_1L, A57Write_4cyc_1L, + A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L]>; +def A57WriteLDMnoreginlist : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57LDMOpsListRegin : A57WriteLMOpsListType< + [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>; +def A57WriteLDMreginlist : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57LDMOpsList_Upd : A57WriteLMOpsListType< + [A57WrBackOne, + A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I, + A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>; +def A57WriteLDM_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57WriteLDM : SchedWriteVariant<[ + SchedVar, + SchedVar +]> { let Variadic=1; } + +def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>; + +// TODO: no writeback latency defined in documentation (implemented as 1 cyc) +def : InstRW<[A57WriteLDM_Upd], + (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; + +// --- 3.9 Store Instructions --- + +// Store, immed offset +def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", + "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; + +// Store, register offset +// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S", +// otherwise 1cyc S. +def A57WriteStrAmLDSTSO : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>; + +// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg. +def A57WriteStrAm3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>; +def A57WriteStrAm3X2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>; + +// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback) +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM", + "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)", + "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>; + +// Store, register pre-indexed: +// 1(1) "S, I0/I1" for plus reg +// 3(2) "I0/I1, S" for minus reg +// 1(2) "S, M" for scaled plus lsl2 +// 3(2) "I0/I1, S" for other scaled +def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre], + (instregex "STR_PRE_REG", "STRB_PRE_REG")>; + +// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE) +// 1(1) "S, I0/I1" for imm or reg plus +// 3(2) "I0/I1, S" for reg minus +def A57WriteStrAm3PreX2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2], + (instregex "STRH_PRE")>; + +def A57WriteStrAm3PreX3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3], + (instregex "STRD_PRE")>; + +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM", + "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>; + +// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not) +def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG", + "STRB(T?)_POST_REG", "STR(B?)T_POST$")>; + +// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr +// 1(1) "S, I0/I1" both for reg or imm +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>; + +// --- Store multiple instructions --- +// TODO: no writeback latency defined in documentation +def A57WriteSTM : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteSTM_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; + +def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; +def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], + (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; + +// --- 3.10 FP Data Processing Instructions --- +def : SchedAlias; +def : SchedAlias; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; + +// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional +def A57WriteVcmp : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteVcmp], + (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; + +// fp convert +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; + +def : SchedAlias; + +// FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; + +// FP divide, FP square root +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +// FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; + +// FP multiply-accumulate pipelines support late forwarding of the result +// from FP multiply μops to the accumulate operands of an +// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle +// after the FP multiply μop has been issued +// FP multiply, FZ +def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; } + +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; + +// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate +// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS +def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } + +// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.) +// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.) +// Currently, there is no way to define different read advances for VFMA operand +// from VFMA or from VMUL, so there will be 5 read advance. +// Zero latency (instead of one) for VMUL->VFMA shouldn't break something. +// The same situation with ASIMD VMUL/VFMA instructions +// def A57ReadVFMA : SchedRead; +// def : ReadAdvance; +// def : ReadAdvance; +def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>; + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; + +// --- 3.11 FP Miscellaneous Instructions --- +// VMOV: 3cyc "F0/F1" for imm/reg +def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; + +// 5cyc L for FP transfer, vfp to core reg, +// 5cyc L for FP transfer, core reg to vfp +def : SchedAlias; +// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). +def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>; + +// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg +def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>; + +// --- 3.12 FP Load Instructions --- +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>; + +// FP load multiple (VLDM) + +def A57VLDMOpsListUncond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_11cyc_1L, + A57Write_12cyc_1L, A57Write_12cyc_1L]>; +def A57WriteVLDMuncond : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57VLDMOpsListCond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_12cyc_1L, + A57Write_13cyc_1L, A57Write_14cyc_1L, + A57Write_15cyc_1L, A57Write_16cyc_1L, + A57Write_17cyc_1L, A57Write_18cyc_1L, + A57Write_19cyc_1L, A57Write_20cyc_1L]>; +def A57WriteVLDMcond : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57WriteVLDM : SchedWriteVariant<[ + SchedVar, + SchedVar +]> { let Variadic=1; } + +def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>; + +def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I, + A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>; +def A57WriteVLDMuncond_UPD : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I, + A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I, + A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I, + A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I, + A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>; +def A57WriteVLDMcond_UPD : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57WriteVLDM_UPD : SchedWriteVariant<[ + SchedVar, + SchedVar +]> { let Variadic=1; } + +def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD], + (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>; + +// --- 3.13 FP Store Instructions --- +def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>; + +def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>; + +def A57WriteVSTMs : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteVSTMd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteVSTMs_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteVSTMd_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; + +def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>; +def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd], + (instregex "VSTM(SIA_UPD|SDB_UPD)")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd], + (instregex "VSTM(DIA_UPD|DDB_UPD)")>; + +// --- 3.14 ASIMD Integer Instructions --- + +// ASIMD absolute diff, 3cyc F0/F1 for integer VABD +def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>; + +// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form +def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>; +def : InstRW<[A57WriteVABAD, A57ReadVABAD], + (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>; +def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>; +def : InstRW<[A57WriteVABAQ, A57ReadVABAQ], + (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>; + +// ASIMD absolute diff accum long: 4(1) F1 for VABAL +def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>; +def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; + +// ASIMD absolute diff long: 3cyc F0/F1 for VABDL +def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; + +// ASIMD arith, basic +def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW", + "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", + "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>; + +// ASIMD arith, complex +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", + "VQABS", "VQADD", "VQNEG", "VQSUB", + "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>; + +// ASIMD compare +def : InstRW<[A57Write_3cyc_1V], + (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>; + +// ASIMD logical +def : InstRW<[A57Write_3cyc_1V], + (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>; + +// ASIMD max/min +def : InstRW<[A57Write_3cyc_1V], + (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>; + +// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply +// and multiply-with-accumulate instructions relative to r0pX. +def A57WriteVMULD_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A57WriteVMULD_VecInt], (instregex + "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)", + "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>; + +// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later +def A57WriteVMULQ_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A57WriteVMULQ_VecInt], (instregex + "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)", + "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>; + +// ASIMD multiply accumulate, D-form +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAD_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVMLAD_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt], + (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>; + +// ASIMD multiply accumulate, Q-form +// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAQ_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVMLAQ_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt], + (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>; + +// ASIMD multiply accumulate long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAL_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVMLAL_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt], + (instregex "VMLAL(s|u)", "VMLSL(s|u)")>; + +// ASIMD multiply accumulate saturating long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (3 or 2 ReadAdvance) +def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVQDMLAL_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], + (instregex "VQDMLAL", "VQDMLSL")>; + +// ASIMD multiply long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +def A57WriteVMULL_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A57WriteVMULL_VecInt], + (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>; + +// ASIMD pairwise add and accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>; +def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>; + +// ASIMD shift accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>; +def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>; + +// ASIMD shift by immed, basic +def : InstRW<[A57Write_3cyc_1X], + (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>; + +// ASIMD shift by immed, complex +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)", + "VRSHRN")>; + +// ASIMD shift by immed and insert, basic, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by immed and insert, basic, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, basic, D-form +def : InstRW<[A57Write_3cyc_1X], (instregex + "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, basic, Q-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, complex, D-form +// VQRSHL, VQSHL, VRSHL +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", + "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, complex, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", + "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// --- 3.15 ASIMD Floating-Point Instructions --- +// ASIMD FP absolute value +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>; + +// ASIMD FP arith +def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", + "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; + +// ASIMD FP compare +def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", + "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; + +// ASIMD FP convert, integer +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)", + "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)", + "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>; + +// ASIMD FP convert, half-precision: 8cyc F0/F1 +def : InstRW<[A57Write_8cyc_1V], (instregex + "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)", + "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", + "VCVT(f2h|h2f)")>; + +// ASIMD FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex + "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>; + +// ASIMD FP multiply +def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; + +// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence +def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57ReadVMLA_VecFP : + SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>; +def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP], + (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>; + +// ASIMD FP negate +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>; + +// ASIMD FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex + "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>; + +// --- 3.16 ASIMD Miscellaneous Instructions --- + +// ASIMD bitwise insert +def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>; + +// ASIMD count +def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; + +// ASIMD duplicate, core reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>; + +// ASIMD duplicate, scalar: 3cyc "F0/F1" +def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>; + +// ASIMD extract +def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; + +// ASIMD move, immed +def : InstRW<[A57Write_3cyc_1V], (instregex + "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", + "VMOVQ0")>; + +// ASIMD move, narrowing +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; + +// ASIMD move, saturating +def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>; + +// ASIMD reciprocal estimate +def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>; + +// ASIMD reciprocal step, FZ +def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>; + +// ASIMD reverse, swap, table lookup (1-2 reg) +def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>; + +// ASIMD table lookup (3-4 reg) +def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>; + +// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1" +def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>; + +// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>; + +// ASIMD transpose +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>; + +// ASIMD unzip/zip, D-form +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], + (instregex "VUZPd", "VZIPd")>; + +// ASIMD unzip/zip, Q-form +def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V], + (instregex "VUZPq", "VZIPq")>; + +// --- 3.17 ASIMD Load Instructions --- + +// Overriden via InstRW for this processor. +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>; +def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)wb")>; + +// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_6cyc_1L], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>; + +def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>; + +// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], + (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; + +// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2b(8|16|32)wb")>; + +// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", + "VLD2LN(d|q)(8|16|32)Pseudo$")>; +// 2 results + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne], + (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; +// 1 result + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb", + "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1" +// 3 results +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)$")>; +// 1 result +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; +// 3 results + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)_UPD$")>; +// 1 result + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3LN(d|q)32$", + "VLD3LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3LN(d|q)(8|16)$", + "VLD3LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3DUP(d|q)(8|16|32)$", + "VLD3DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4LN(d|q)32$", + "VLD4LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4LN(d|q)(8|16)$", + "VLD4LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4DUP(d|q)(8|16|32)$", + "VLD4DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.18 ASIMD Store Instructions --- + +// ASIMD store, 1 element, multiple, 1 reg: 1cyc S +def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "VST1d(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 2 reg: 2cyc S +def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I], + (instregex "VST1q(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 3 reg: 3cyc S +def : InstRW<[A57Write_3cyc_1S], + (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I], + (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; +// ASIMD store, 1 element, multiple, 4 reg: 4cyc S +def : InstRW<[A57Write_4cyc_1S], + (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I], + (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; +// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; +// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2(d|b)(8|16|32)$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2(b|d)(8|16|32)wb")>; +// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S" +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; +// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2LN(d|q)(8|16|32)_UPD", + "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 3 element, multiple, 3 reg +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3(d|q)(8|16|32)_UPD", + "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 3 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3LN(d|q)(8|16|32)_UPD", + "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 4 element, multiple, 4 reg +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST4(d|q)(8|16|32)_UPD", + "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 4 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST4LN(d|q)(8|16|32)_UPD", + "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.19 Cryptography Extensions --- +// Crypto AES ops +// AESD, AESE, AESIMC, AESMC: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; +// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>; +// Crypto SHA1 xor ops: 6cyc F0/F1 +def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; +// Crypto SHA1 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; +// Crypto SHA1 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; +// Crypto SHA256 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; +// Crypto SHA256 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; + +// --- 3.20 CRC --- +def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>; + +// ----------------------------------------------------------------------------- +// Common definitions +def : WriteRes { let Latency = 0; let NumMicroOps = 0; } +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; + +} // SchedModel = CortexA57Model + diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA57WriteRes.td b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA57WriteRes.td new file mode 100644 index 0000000000000..670717dc7c138 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA57WriteRes.td @@ -0,0 +1,323 @@ +//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued as follows: one to I pipe, six to S pipes and +// four to V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; + let ResourceCycles = [17]; } +def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; + let ResourceCycles = [18]; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; + let ResourceCycles = [19]; } +def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20; + let ResourceCycles = [20]; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; } +def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; } +def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; + let ResourceCycles = [35]; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } + +// A57Write_3cyc_1L - A57Write_20cyc_1L +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L : SchedWriteRes<[A57UnitL]> { + let Latency = Lat; + } +} + +// A57Write_4cyc_1S - A57Write_16cyc_1S +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S : SchedWriteRes<[A57UnitS]> { + let Latency = Lat; + } +} + +def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; } +def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57Write_6cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 6; } +def A57Write_6cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 6; } +def A57Write_8cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 8; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 64; + let NumMicroOps = 2; + let ResourceCycles = [32, 32]; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1M : SchedWriteRes<[A57UnitS, + A57UnitM]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_6cyc_1B_1L : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_2cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 36; + let NumMicroOps = 2; + let ResourceCycles = [18, 18]; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// A57Write_3cyc_1L_1I - A57Write_20cyc_1L_1I +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L_1I : SchedWriteRes<[A57UnitL, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// A57Write_4cyc_1S_1I - A57Write_16cyc_1S_1I +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S_1I : SchedWriteRes<[A57UnitS, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_4cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_4cyc_1I_1L_1M : SchedWriteRes<[A57UnitI, A57UnitL, A57UnitM]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 9; + let NumMicroOps = 3; +} diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA9.td b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA9.td index 8fb8a2a3b6d2d..4e72b13d94cbc 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA9.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleA9.td @@ -1981,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + // Reserve A9UnitFP for 2 consecutive cycles. def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleM3.td b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleM3.td new file mode 100644 index 0000000000000..93f8299f9bd04 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleM3.td @@ -0,0 +1,21 @@ +//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the ARM Cortex-M3 processor. +// +//===----------------------------------------------------------------------===// + +def CortexM3Model : SchedMachineModel { + let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue + let MicroOpBufferSize = 0; // In-order + let LoadLatency = 2; // Latency when not pipelined, not pc-relative + let MispredictPenalty = 2; // Best case branch taken cost + + let CompleteModel = 0; +} diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleR52.td b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleR52.td index 537e5da9669f3..782be9b60a7ae 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleR52.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleR52.td @@ -120,6 +120,12 @@ def : WriteRes { def : WriteRes { let Latency = 7; } def : WriteRes { let Latency = 17; } +// Overriden via InstRW for this processor. +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + def : ReadAdvance; // mul operand read in F1 def : ReadAdvance; // fp-mac operand read in F1 @@ -712,20 +718,20 @@ def R52WriteSTM : SchedWriteVariant<[ // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with // another instruction in slot-1, but only in the last issue. -def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;} -def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 5;} +def : WriteRes { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2]; let SingleIssue = 1; } -def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3]; let SingleIssue = 1; } -def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 8; let NumMicroOps = 7; let ResourceCycles = [4]; @@ -828,95 +834,6 @@ def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>; def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; -//--- -// VLDx. Vector Loads -//--- -// 1-element structure load -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>; - -// 2-element structure load -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>; - -// 3-element structure load -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - -// 4-element structure load -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - //--- // VSTx. Vector Stores //--- diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleSwift.td b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleSwift.td index dc041c6c6006b..b838688c6f04e 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMScheduleSwift.td +++ b/interpreter/llvm/src/lib/Target/ARM/ARMScheduleSwift.td @@ -1070,6 +1070,16 @@ let SchedModel = SwiftModel in { def : ReadAdvance; def : ReadAdvance; + // Overriden via InstRW for this processor. + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + // Not specified. def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; // Preload. diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.cpp index b8a708a20a955..2c42a13361664 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.cpp @@ -11,6 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "ARM.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "ARMCallLowering.h" +#include "ARMLegalizerInfo.h" +#include "ARMRegisterBankInfo.h" +#endif #include "ARMSubtarget.h" #include "ARMFrameLowering.h" #include "ARMInstrInfo.h" @@ -23,15 +30,22 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Target/TargetOptions.h" #include #include @@ -78,11 +92,6 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } -/// EnableExecuteOnly - Enables the generation of execute-only code on supported -/// targets -static cl::opt -EnableExecuteOnly("arm-execute-only"); - ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, StringRef FS) { ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); @@ -92,13 +101,41 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, return new ARMFrameLowering(STI); } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct ARMGISelActualAccessor : public GISelAccessor { + std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(TM.Options), TM(TM), - FrameLowering(initializeFrameLowering(CPU, FS)), + CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), + TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -106,7 +143,29 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), - TLInfo(TM, *this) {} + TLInfo(TM, *this) { + assert((isThumb() || hasARMOps()) && + "Target must either be thumb or support ARM operations!"); + +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); + GISel->CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new ARMLegalizerInfo(*this)); + + auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo()); + + // FIXME: At this point, we can't rely on Subtarget having RBI. + // It's awkward to mix passing RBI and the Subtarget; should we pass + // TII/TRI as well? + GISel->InstSelector.reset(createARMInstructionSelector( + *static_cast(&TM), *this, *RBI)); + + GISel->RegBankInfo.reset(RBI); +#endif + setGISelAccessor(*GISel); +} const CallLowering *ARMSubtarget::getCallLowering() const { assert(GISel && "Access to GlobalISel APIs not set"); diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.h b/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.h index d2630685d91b7..e15b17512c964 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMSubtarget.h @@ -234,6 +234,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// CPSR setting instruction. bool AvoidCPSRPartialUpdate = false; + /// CheapPredicableCPSRDef - If true, disable +1 predication cost + /// for instructions updating CPSR. Enabled for Cortex-A57. + bool CheapPredicableCPSRDef = false; + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting /// movs with shifter operand (i.e. asr, lsl, lsr). bool AvoidMOVsShifterOperand = false; @@ -242,6 +246,11 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// avoid issue "normal" call instructions to callees which do not return. bool HasRetAddrStack = false; + /// HasBranchPredictor - True if the subtarget has a branch predictor. Having + /// a branch predictor or not changes the expected cost of taking a branch + /// which affects the choice of whether to use predicated instructions. + bool HasBranchPredictor = true; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension = false; @@ -281,6 +290,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// HasFPAO - if true, processor does positive address offset computation faster bool HasFPAO = false; + /// HasFuseAES - if true, processor executes back to back AES instruction + /// pairs faster. + bool HasFuseAES = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -543,8 +556,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool nonpipelinedVFP() const { return NonpipelinedVFP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRetAddrStack() const { return HasRetAddrStack; } + bool hasBranchPredictor() const { return HasBranchPredictor; } bool hasMPExtension() const { return HasMPExtension; } bool hasDSP() const { return HasDSP; } bool useNaClTrap() const { return UseNaClTrap; } @@ -556,6 +571,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool hasD16() const { return HasD16; } bool hasFullFP16() const { return HasFullFP16; } + bool hasFuseAES() const { return HasFuseAES; } + /// \brief Return true if the CPU supports any kind of instruction fusion. + bool hasFusion() const { return hasFuseAES(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.cpp index 5583d6148b086..c323a1d368dee 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,12 +11,8 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMCallLowering.h" -#include "ARMLegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "ARMRegisterBankInfo.h" -#endif #include "ARMSubtarget.h" +#include "ARMMacroFusion.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" @@ -28,7 +24,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDepsFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" @@ -37,6 +32,7 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" @@ -85,9 +81,9 @@ namespace llvm { extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine X(getTheARMLETarget()); + RegisterTargetMachine A(getTheThumbLETarget()); RegisterTargetMachine Y(getTheARMBETarget()); - RegisterTargetMachine A(getTheThumbLETarget()); - RegisterTargetMachine B(getTheThumbBETarget()); + RegisterTargetMachine B(getTheThumbBETarget()); PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeGlobalISel(Registry); @@ -108,60 +104,20 @@ static std::unique_ptr createTLOF(const Triple &TT) { static ARMBaseTargetMachine::ARMABI computeTargetABI(const Triple &TT, StringRef CPU, const TargetOptions &Options) { - if (Options.MCOptions.getABIName() == "aapcs16") + StringRef ABIName = Options.MCOptions.getABIName(); + + if (ABIName.empty()) + ABIName = ARM::computeDefaultTargetABI(TT, CPU); + + if (ABIName == "aapcs16") return ARMBaseTargetMachine::ARM_ABI_AAPCS16; - else if (Options.MCOptions.getABIName().startswith("aapcs")) + else if (ABIName.startswith("aapcs")) return ARMBaseTargetMachine::ARM_ABI_AAPCS; - else if (Options.MCOptions.getABIName().startswith("apcs")) + else if (ABIName.startswith("apcs")) return ARMBaseTargetMachine::ARM_ABI_APCS; - assert(Options.MCOptions.getABIName().empty() && - "Unknown target-abi option!"); - - ARMBaseTargetMachine::ARMABI TargetABI = - ARMBaseTargetMachine::ARM_ABI_UNKNOWN; - - unsigned ArchKind = ARM::parseCPUArch(CPU); - StringRef ArchName = ARM::getArchName(ArchKind); - // FIXME: This is duplicated code from the front end and should be unified. - if (TT.isOSBinFormatMachO()) { - if (TT.getEnvironment() == Triple::EABI || - (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) || - ARM::parseArchProfile(ArchName) == ARM::PK_M) { - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else if (TT.isWatchABI()) { - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; - } else { - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - } - } else if (TT.isOSWindows()) { - // FIXME: this is invalid for WindowsCE - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else { - // Select the default based on the platform. - switch (TT.getEnvironment()) { - case Triple::Android: - case Triple::GNUEABI: - case Triple::GNUEABIHF: - case Triple::MuslEABI: - case Triple::MuslEABIHF: - case Triple::EABIHF: - case Triple::EABI: - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - break; - case Triple::GNU: - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - break; - default: - if (TT.isOSNetBSD()) - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - else - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - break; - } - } - - return TargetABI; + llvm_unreachable("Unhandled/unknown ABI Name!"); + return ARMBaseTargetMachine::ARM_ABI_UNKNOWN; } static std::string computeDataLayout(const Triple &TT, StringRef CPU, @@ -246,56 +202,39 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM, OL), TargetABI(computeTargetABI(TT, CPU, Options)), - TLOF(createTLOF(getTargetTriple())), - Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { + TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) { // Default to triple-appropriate float ABI - if (Options.FloatABIType == FloatABI::Default) - this->Options.FloatABIType = - Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; + if (Options.FloatABIType == FloatABI::Default) { + if (TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABIHF || + TargetTriple.getEnvironment() == Triple::EABIHF || + TargetTriple.isOSWindows() || + TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16) + this->Options.FloatABIType = FloatABI::Hard; + else + this->Options.FloatABIType = FloatABI::Soft; + } // Default to triple-appropriate EABI if (Options.EABIVersion == EABI::Default || Options.EABIVersion == EABI::Unknown) { // musl is compatible with glibc with regard to EABI version - if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI()) + if ((TargetTriple.getEnvironment() == Triple::GNUEABI || + TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABI || + TargetTriple.getEnvironment() == Triple::MuslEABIHF) && + !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) this->Options.EABIVersion = EABI::GNU; else this->Options.EABIVersion = EABI::EABI5; } + + initAsmInfo(); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct ARMGISelActualAccessor : public GISelAccessor { - std::unique_ptr CallLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - const ARMSubtarget * ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -327,24 +266,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle); - -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); - GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new ARMLegalizerInfo(*I)); - - auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo()); - - // FIXME: At this point, we can't rely on Subtarget having RBI. - // It's awkward to mix passing RBI and the Subtarget; should we pass - // TII/TRI as well? - GISel->InstSelector.reset(createARMInstructionSelector(*this, *I, *RBI)); - - GISel->RegBankInfo.reset(RBI); -#endif - I->setGISelAccessor(*GISel); } return I.get(); } @@ -355,22 +276,6 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { }); } -void ARMTargetMachine::anchor() {} - -ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, CodeGenOpt::Level OL, - bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); - if (!Subtarget.hasARMOps()) - report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " - "support ARM mode execution!"); -} - -void ARMLETargetMachine::anchor() {} ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -378,9 +283,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ARMBETargetMachine::anchor() {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -388,52 +291,40 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void ThumbTargetMachine::anchor() {} - -ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); -} - -void ThumbLETargetMachine::anchor() {} - -ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ThumbBETargetMachine::anchor() {} - -ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { /// ARM Code Generator Pass Configuration Options. class ARMPassConfig : public TargetPassConfig { public: - ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM) + ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} ARMBaseTargetMachine &getARMTargetMachine() const { return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); + return DAG; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); + return DAG; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; @@ -464,14 +355,14 @@ INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix", "ARM Execution Dependency Fix", false, false) TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { - return new ARMPassConfig(this, PM); + return new ARMPassConfig(*this, PM); } void ARMPassConfig::addIRPasses() { if (TM->Options.ThreadModel == ThreadModel::Single) addPass(createLowerAtomicPass()); else - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in @@ -486,7 +377,7 @@ void ARMPassConfig::addIRPasses() { // Match interleaved memory accesses to ldN/stN intrinsics. if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createInterleavedAccessPass(TM)); + addPass(createInterleavedAccessPass()); } bool ARMPassConfig::addPreISel() { diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.h b/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.h index f0ca9427d9fb0..22ce949367f34 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMTargetMachine.h @@ -36,7 +36,6 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { protected: std::unique_ptr TLOF; - ARMSubtarget Subtarget; bool isLittle; mutable StringMap> SubtargetMap; @@ -47,8 +46,10 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { CodeGenOpt::Level OL, bool isLittle); ~ARMBaseTargetMachine() override; - const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } const ARMSubtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some targets, is + // deprecated and should not be used. + const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } /// \brief Get the TargetIRAnalysis for this target. @@ -60,25 +61,15 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } -}; - -/// ARM target machine. -/// -class ARMTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); -public: - ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); + bool isMachineVerifierClean() const override { + return false; + } }; -/// ARM little endian target machine. +/// ARM/Thumb little endian target machine. /// -class ARMLETargetMachine : public ARMTargetMachine { - void anchor() override; - +class ARMLETargetMachine : public ARMBaseTargetMachine { public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -86,11 +77,9 @@ class ARMLETargetMachine : public ARMTargetMachine { CodeGenOpt::Level OL); }; -/// ARM big endian target machine. +/// ARM/Thumb big endian target machine. /// -class ARMBETargetMachine : public ARMTargetMachine { - void anchor() override; - +class ARMBETargetMachine : public ARMBaseTargetMachine { public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -98,44 +87,6 @@ class ARMBETargetMachine : public ARMTargetMachine { CodeGenOpt::Level OL); }; -/// Thumb target machine. -/// Due to the way architectures are handled, this represents both -/// Thumb-1 and Thumb-2. -/// -class ThumbTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); - -public: - ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); -}; - -/// Thumb little endian target machine. -/// -class ThumbLETargetMachine : public ThumbTargetMachine { - void anchor() override; - -public: - ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// Thumb big endian target machine. -/// -class ThumbBETargetMachine : public ThumbTargetMachine { - void anchor() override; - -public: - ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.cpp index 94f9e8dfebbf8..88bab64ffaf28 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" -#include "ARMTargetObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" #include @@ -30,9 +30,9 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { - const ARMTargetMachine &ARM_TM = static_cast(TM); - bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS; - genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); + const ARMBaseTargetMachine &ARM_TM = static_cast(TM); + bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS; + // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(isAAPCS_ABI); @@ -43,16 +43,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, AttributesSection = getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0); - - // Make code section unreadable when in execute-only mode - if (genExecuteOnly) { - unsigned Type = ELF::SHT_PROGBITS; - unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE; - // Since we cannot modify flags for an existing section, we create a new - // section with the right flags, and use 0 as the unique ID for - // execute-only text - TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U); - } } const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( @@ -74,21 +64,27 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const { getContext()); } -MCSection * -ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO, - SectionKind SK, const TargetMachine &TM) const { +static bool isExecuteOnlyFunction(const GlobalObject *GO, SectionKind SK, + const TargetMachine &TM) { + if (const Function *F = dyn_cast(GO)) + if (TM.getSubtarget(*F).genExecuteOnly() && SK.isText()) + return true; + return false; +} + +MCSection *ARMElfTargetObjectFile::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const { // Set execute-only access for the explicit section - if (genExecuteOnly && SK.isText()) + if (isExecuteOnlyFunction(GO, SK, TM)) SK = SectionKind::getExecuteOnly(); return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM); } -MCSection * -ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO, - SectionKind SK, const TargetMachine &TM) const { +MCSection *ARMElfTargetObjectFile::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const { // Place the global in the execute-only text section - if (genExecuteOnly && SK.isText()) + if (isExecuteOnlyFunction(GO, SK, TM)) SK = SectionKind::getExecuteOnly(); return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM); diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.h b/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.h index dbb8128269dce..bd7aa1cfe02b7 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMTargetObjectFile.h @@ -16,8 +16,6 @@ namespace llvm { class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { - mutable bool genExecuteOnly = false; - protected: const MCSection *AttributesSection = nullptr; diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.cpp index 8eb9dbf5f9de6..51b0fedd2b54f 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,6 +15,24 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +bool ARMTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // To inline a callee, all features not in the whitelist must match exactly. + bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == + (CalleeBits & ~InlineFeatureWhitelist); + // For features in the whitelist, the callee's features must be a subset of + // the callers'. + bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == + (CalleeBits & InlineFeatureWhitelist); + return MatchExact && MatchSubset; +} + int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); diff --git a/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.h b/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.h index 7de0543dfa5e0..0695a4e633467 100644 --- a/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase { const ARMSubtarget *ST; const ARMTargetLowering *TLI; + // Currently the following features are excluded from InlineFeatureWhitelist. + // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16 + // Depending on whether they are set or unset, different + // instructions/registers are available. For example, inlining a callee with + // -thumb-mode in a caller with +thumb-mode, may cause the assembler to + // fail if the callee uses ARM only instructions, e.g. in inline asm. + const FeatureBitset InlineFeatureWhitelist = { + ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, + ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, + ARM::FeatureFullFP16, ARM::FeatureHWDivThumb, + ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, + ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, + ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, + ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, + ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, + ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, + ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, + ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, + ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, + ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, + ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, + ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, + ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding, + ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR, + ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp, + ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor, + ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, + ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, + ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates + }; + const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } @@ -41,6 +74,9 @@ class ARMTTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced @@ -78,7 +114,7 @@ class ARMTTIImpl : public BasicTTIImplBase { return 13; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; diff --git a/interpreter/llvm/src/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/interpreter/llvm/src/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ada816c163897..1129826f21f64 100644 --- a/interpreter/llvm/src/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -39,10 +41,8 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetParser.h" @@ -1026,6 +1026,15 @@ class ARMOperand : public MCParsedAsmOperand { ARM_AM::getSOImmVal(-Value) != -1); } bool isT2SOImm() const { + // If we have an immediate that's not a constant, treat it as an expression + // needing a fixup. + if (isImm() && !isa(getImm())) { + // We want to avoid matching :upper16: and :lower16: as we want these + // expressions to match in isImm0_65535Expr() + const ARMMCExpr *ARM16Expr = dyn_cast(getImm()); + return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)); + } if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); if (!CE) return false; @@ -5240,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // Fall though for the Identifier case that is not a register or a // special name. + LLVM_FALLTHROUGH; } case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets @@ -6851,6 +6861,17 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { bool ARMAsmParser::processInstruction(MCInst &Inst, const OperandVector &Operands, MCStreamer &Out) { + // Check if we have the wide qualifier, because if it's present we + // must avoid selecting a 16-bit thumb instruction. + bool HasWideQualifier = false; + for (auto &Op : Operands) { + ARMOperand &ARMOp = static_cast(*Op); + if (ARMOp.isToken() && ARMOp.getToken() == ".w") { + HasWideQualifier = true; + break; + } + } + switch (Inst.getOpcode()) { // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. case ARM::LDRT_POST: @@ -6930,8 +6951,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && Inst.getOperand(1).getImm() <= 0xff && - !(static_cast(*Operands[2]).isToken() && - static_cast(*Operands[2]).getToken() == ".w")) + !HasWideQualifier) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -6962,10 +6982,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, else if (Inst.getOpcode() == ARM::t2LDRConstPool) TmpInst.setOpcode(ARM::t2LDRpci); const ARMOperand &PoolOperand = - (static_cast(*Operands[2]).isToken() && - static_cast(*Operands[2]).getToken() == ".w") ? - static_cast(*Operands[4]) : - static_cast(*Operands[3]); + (HasWideQualifier ? + static_cast(*Operands[4]) : + static_cast(*Operands[3])); const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm(); // If SubExprVal is a constant we may be able to use a MOV if (isa(SubExprVal) && @@ -8108,8 +8127,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && - !(static_cast(*Operands[3]).isToken() && - static_cast(*Operands[3]).getToken() == ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8143,7 +8161,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg()) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; @@ -8177,7 +8196,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, bool isNarrow = false; if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; @@ -8404,11 +8424,10 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // wide encoding wasn't explicit. if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || !isARMLowRegister(Inst.getOperand(0).getReg()) || - (unsigned)Inst.getOperand(2).getImm() > 255 || - ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != 0)) || - (static_cast(*Operands[3]).isToken() && - static_cast(*Operands[3]).getToken() == ".w")) + (Inst.getOperand(2).isImm() && + (unsigned)Inst.getOperand(2).getImm() > 255) || + Inst.getOperand(5).getReg() != (inITBlock() ? 0 : ARM::CPSR) || + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? @@ -8437,8 +8456,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, } if (!Transform || Inst.getOperand(5).getReg() != 0 || - (static_cast(*Operands[3]).isToken() && - static_cast(*Operands[3]).getToken() == ".w")) + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(ARM::tADDhirr); @@ -8556,12 +8574,10 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - (unsigned)Inst.getOperand(1).getImm() <= 255 && - ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && - Inst.getOperand(4).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(4).getReg() == 0)) && - (!static_cast(*Operands[2]).isToken() || - static_cast(*Operands[2]).getToken() != ".w")) { + (Inst.getOperand(1).isImm() && + (unsigned)Inst.getOperand(1).getImm() <= 255) && + Inst.getOperand(4).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { // The operands aren't in the same order for tMOVi8... MCInst TmpInst; TmpInst.setOpcode(ARM::tMOVi8); @@ -8582,8 +8598,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR && - (!static_cast(*Operands[2]).isToken() || - static_cast(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { // The operands aren't the same for tMOV[S]r... (no cc_out) MCInst TmpInst; TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); @@ -8605,8 +8620,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == 0 && - (!static_cast(*Operands[2]).isToken() || - static_cast(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("Illegal opcode!"); @@ -8705,11 +8719,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if ((isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast(*Operands[3]).isToken() || - !static_cast(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8745,11 +8756,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(2).getReg())) && (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast(*Operands[3]).isToken() || - !static_cast(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8985,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, return PlainMatchResult; } +std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS); + static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, @@ -9078,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "invalid operand for instruction"); } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction", + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = ARMMnemonicSpellCheck( + ((ARMOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, ((ARMOperand &)*Operands[0]).getLocRange()); + } case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/interpreter/llvm/src/lib/Target/ARM/CMakeLists.txt b/interpreter/llvm/src/lib/Target/ARM/CMakeLists.txt index 3cde43967568b..cf6827fd6ca19 100644 --- a/interpreter/llvm/src/lib/Target/ARM/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Target/ARM/CMakeLists.txt @@ -49,6 +49,7 @@ add_llvm_target(ARMCodeGen ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp + ARMMacroFusion.cpp ARMRegisterInfo.cpp ARMOptimizeBarriersPass.cpp ARMSelectionDAGInfo.cpp diff --git a/interpreter/llvm/src/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/interpreter/llvm/src/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e812d32cc76f6..5ab236b7fd4c0 100644 --- a/interpreter/llvm/src/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -486,7 +486,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - Size = 0; + Size = 4; return MCDisassembler::Fail; } diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 40bf545e83224..a77df7a2598f4 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackend.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackendDarwin.h" #include "MCTargetDesc/ARMAsmBackendELF.h" #include "MCTargetDesc/ARMAsmBackendWinCOFF.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -31,10 +33,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 0, 20, 0}, {"fixup_t2_movw_lo16", 0, 20, 0}, {"fixup_arm_mod_imm", 0, 12, 0}, + {"fixup_t2_so_imm", 0, 26, 0}, }; const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 12, 20, 0}, {"fixup_t2_movw_lo16", 12, 20, 0}, {"fixup_arm_mod_imm", 20, 12, 0}, + {"fixup_t2_so_imm", 26, 6, 0}, }; if (Kind < FirstTargetFixupKind) @@ -356,11 +358,26 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, return Value; } -unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - bool IsPCRel, MCContext &Ctx, - bool IsLittleEndian, - bool IsResolved) const { +unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const { unsigned Kind = Fixup.getKind(); + + // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT + // and .word relocations they put the Thumb bit into the addend if possible. + // Other relocation types don't want this bit though (branches couldn't encode + // it if it *was* present, and no other relocations exist) and it can + // interfere with checking valid expressions. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) && + (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 || + Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 || + Kind == ARM::fixup_t2_movt_hi16)) + Value |= 1; + } + switch (Kind) { default: Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type"); @@ -374,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_SecRel_4: return Value; case ARM::fixup_arm_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_arm_movw_lo16: { @@ -386,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } case ARM::fixup_t2_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: { @@ -503,6 +520,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_thumb_bl: { + // FIXME: We get both thumb1 and thumb2 in here, so we can only check for + // the less strict thumb2 value. + if (!isInt<26>(Value - 4)) { + Ctx.reportError(Fixup.getLoc(), "Relocation out of range"); + return 0; + } + // The value doesn't encode the low bit (always zero) and is offset by // four. The 32-bit immediate value is encoded as // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) @@ -693,33 +717,33 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return 0; } return Value; + case ARM::fixup_t2_so_imm: { + Value = ARM_AM::getT2SOImmVal(Value); + if ((int64_t)Value < 0) { + Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value"); + return 0; + } + // Value will contain a 12-bit value broken up into a 4-bit shift in bits + // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate + // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit + // 10 of the upper half-word and imm3 is placed at 14:12 of the lower + // half-word. + uint64_t EncValue = 0; + EncValue |= (Value & 0x800) << 15; + EncValue |= (Value & 0x700) << 4; + EncValue |= (Value & 0xff); + return swapHalfWords(EncValue, IsLittleEndian); + } } } -void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { +bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; - // MachO (the only user of "Value") tries to make .o files that look vaguely - // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit - // into the addend if possible. Other relocation types don't want this bit - // though (branches couldn't encode it if it *was* present, and no other - // relocations exist) and it can interfere with checking valid expressions. - if ((unsigned)Fixup.getKind() == FK_Data_4 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) { - if (Sym) { - if (Asm.isThumbFunc(Sym)) - Value |= 1; - } - } - if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { + const unsigned FixupKind = Fixup.getKind() ; + if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { assert(Sym && "How did we resolve this?"); // If the symbol is external the linker will handle it. @@ -727,17 +751,32 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // If the symbol is out of range, produce a relocation and hope the // linker can handle it. GNU AS produces an error in this case. - if (Sym->isExternal() || Value >= 0x400004) - IsResolved = false; + if (Sym->isExternal()) + return true; + } + // Create relocations for unconditional branches to function symbols with + // different execution mode in ELF binaries. + if (Sym && Sym->isELF()) { + unsigned Type = dyn_cast(Sym)->getType(); + if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { + if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) + return true; + if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || + FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_condbranch || + FixupKind == ARM::fixup_t2_uncondbranch)) + return true; + } } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination // symbol's thumb-ness to get interworking right. - if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || - (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) - IsResolved = false; + if (A && (FixupKind == ARM::fixup_arm_thumb_blx || + FixupKind == ARM::fixup_arm_blx || + FixupKind == ARM::fixup_arm_uncondbl || + FixupKind == ARM::fixup_arm_condbl)) + return true; + return false; } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -783,6 +822,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_so_imm: return 4; case FK_SecRel_2: @@ -835,27 +875,31 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: case ARM::fixup_arm_mod_imm: + case ARM::fixup_t2_so_imm: // Instruction size is 4 bytes. return 4; } } -void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, bool IsPCRel, - MCContext &Ctx) const { +void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true); + MCContext &Ctx = Asm.getContext(); + Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx, + IsLittleEndian); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FullSizeBytes; if (!IsLittleEndian) { FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind()); - assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!"); + assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!"); assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); } diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 2ddedb5d61059..02374966dafe7 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -38,19 +38,17 @@ class ARMAsmBackend : public MCAsmBackend { const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - /// processFixupValue - Target hook to process the literal value of a fixup - /// if necessary. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; - - unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel, - MCContext &Ctx, bool IsLittleEndian, - bool IsResolved) const; - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + + unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 09dc0173ade65..bd729fabedf5a 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H #include "ARMAsmBackend.h" -#include "llvm/Support/MachO.h" +#include "llvm/BinaryFormat/MachO.h" namespace llvm { class ARMAsmBackendDarwin : public ARMAsmBackend { diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index e1fa245718202..59f31be69d58c 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -9,12 +9,12 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 4d6c52f3cd492..93f4006cee876 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -43,12 +44,11 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 3fe2302bdd372..831589ba0581a 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -15,55 +15,47 @@ namespace llvm { namespace ARM { enum Fixups { - // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol - // addresses + // 12-bit PC relative relocation for symbol addresses fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind, - // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with - // the 16-bit halfwords reordered. + // Equivalent to fixup_arm_ldst_pcrel_12, with the 16-bit halfwords reordered. fixup_t2_ldst_pcrel_12, - // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol - // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. + // 10-bit PC relative relocation for symbol addresses used in + // LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. fixup_arm_pcrel_10_unscaled, - // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses - // used in VFP instructions where the lower 2 bits are not encoded - // (so it's encoded as an 8-bit immediate). + // 10-bit PC relative relocation for symbol addresses used in VFP instructions + // where the lower 2 bits are not encoded (so it's encoded as an 8-bit + // immediate). fixup_arm_pcrel_10, - // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for - // the short-swapped encoding of Thumb2 instructions. + // Equivalent to fixup_arm_pcrel_10, accounting for the short-swapped encoding + // of Thumb2 instructions. fixup_t2_pcrel_10, - // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses - // used in VFP instructions where bit 0 not encoded (so it's encoded as an - // 8-bit immediate). + // 9-bit PC relative relocation for symbol addresses used in VFP instructions + // where bit 0 not encoded (so it's encoded as an 8-bit immediate). fixup_arm_pcrel_9, - // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for - // the short-swapped encoding of Thumb2 instructions. + // Equivalent to fixup_arm_pcrel_9, accounting for the short-swapped encoding + // of Thumb2 instructions. fixup_t2_pcrel_9, - // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol - // addresses where the lower 2 bits are not encoded (so it's encoded as an - // 8-bit immediate). + // 10-bit PC relative relocation for symbol addresses where the lower 2 bits + // are not encoded (so it's encoded as an 8-bit immediate). fixup_thumb_adr_pcrel_10, - // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR - // instruction. + // 12-bit PC relative relocation for the ADR instruction. fixup_arm_adr_pcrel_12, - // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR - // instruction. + // 12-bit PC relative relocation for the ADR instruction. fixup_t2_adr_pcrel_12, - // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch - // instructions. + // 24-bit PC relative relocation for conditional branch instructions. fixup_arm_condbranch, - // fixup_arm_uncondbranch - 24-bit PC relative relocation for - // branch instructions. (unconditional) + // 24-bit PC relative relocation for branch instructions. (unconditional) fixup_arm_uncondbranch, - // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct - // uconditional branch instructions. + // 20-bit PC relative relocation for Thumb2 direct uconditional branch + // instructions. fixup_t2_condbranch, - // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct - // branch unconditional branch instructions. + // 20-bit PC relative relocation for Thumb2 direct branch unconditional branch + // instructions. fixup_t2_uncondbranch, - // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. + // 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, // The following fixups handle the ARM BL instructions. These can be @@ -75,46 +67,48 @@ enum Fixups { // MachO does not draw a distinction between the two cases, so it will treat // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups. - // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions. + // Fixup for unconditional ARM BL instructions. fixup_arm_uncondbl, - // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial - // conditionalisation. + // Fixup for ARM BL instructions with nontrivial conditionalisation. fixup_arm_condbl, - // fixup_arm_blx - Fixup for ARM BLX instructions. + // Fixup for ARM BLX instructions. fixup_arm_blx, - // fixup_arm_thumb_bl - Fixup for Thumb BL instructions. + // Fixup for Thumb BL instructions. fixup_arm_thumb_bl, - // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions. + // Fixup for Thumb BLX instructions. fixup_arm_thumb_blx, - // fixup_arm_thumb_cb - Fixup for Thumb branch instructions. + // Fixup for Thumb branch instructions. fixup_arm_thumb_cb, - // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs. + // Fixup for Thumb load/store from constant pool instrs. fixup_arm_thumb_cp, - // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions. + // Fixup for Thumb conditional branching instructions. fixup_arm_thumb_bcc, // The next two are for the movt/movw pair // the 16bit imm field are split into imm{15-12} and imm{11-0} fixup_arm_movt_hi16, // :upper16: fixup_arm_movw_lo16, // :lower16: - fixup_t2_movt_hi16, // :upper16: - fixup_t2_movw_lo16, // :lower16: + fixup_t2_movt_hi16, // :upper16: + fixup_t2_movw_lo16, // :lower16: - // fixup_arm_mod_imm - Fixup for mod_imm + // Fixup for mod_imm fixup_arm_mod_imm, + // Fixup for Thumb2 8-bit rotated operand + fixup_t2_so_imm, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; } -} +} // namespace llvm #endif diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index d9df2c6da7ec4..f1f35f409900d 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -339,7 +339,17 @@ class ARMMCCodeEmitter : public MCCodeEmitter { unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - unsigned SoImm = MI.getOperand(Op).getImm(); + const MCOperand &MO = MI.getOperand(Op); + + // Support for fixups (MCFixup) + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // Fixups resolve to plain values that need to be encoded. + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + return 0; + } + unsigned SoImm = MO.getImm(); unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm); assert(Encoded != ~0U && "Not a Thumb2 so_imm value?"); return Encoded; diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 477755157040d..b8a8b1f7619a0 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMMCTargetDesc.h" #include "ARMBaseInfo.h" #include "ARMMCAsmInfo.h" -#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCELFStreamer.h" diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 34c770440e1ba..5516a1bdb03da 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -9,10 +9,10 @@ #include "ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm-c/Disassembler.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b77181f29b2d0..4a8139dea6682 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 7ae2f864d79dc..f74fb2e20b5a3 100644 --- a/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -33,8 +33,8 @@ class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { ~ARMWinCOFFObjectWriter() override = default; - unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection, + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; bool recordRelocation(const MCFixup &) const override; @@ -42,7 +42,8 @@ class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { } // end anonymous namespace -unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, +unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const { diff --git a/interpreter/llvm/src/lib/Target/ARM/Thumb1FrameLowering.cpp b/interpreter/llvm/src/lib/Target/ARM/Thumb1FrameLowering.cpp index d0fd366ab9ed5..5709b4e617987 100644 --- a/interpreter/llvm/src/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -11,26 +11,26 @@ // //===----------------------------------------------------------------------===// +#include "Thumb1FrameLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "ThumbRegisterInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCDwarf.h" @@ -236,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R12: if (STI.splitFramePushPop(MF)) break; - // fallthough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: @@ -535,14 +535,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. - LivePhysRegs UsedRegs(STI.getRegisterInfo()); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + LivePhysRegs UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); @@ -561,18 +561,17 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); - for (int Register = GPRsNoLRSP.find_first(); Register != -1; - Register = GPRsNoLRSP.find_next(Register)) { + for (unsigned Register : GPRsNoLRSP.set_bits()) { if (!UsedRegs.contains(Register)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Register)) { @@ -699,13 +698,14 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, CopyRegs.insert(ArgReg); // Push the low registers and lr + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!LoRegsToSave.empty()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { if (LoRegsToSave.count(Reg)) { - bool isKill = !MF.getRegInfo().isLiveIn(Reg); - if (isKill) + bool isKill = !MRI.isLiveIn(Reg); + if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); MIB.addReg(Reg, getKillRegState(isKill)); @@ -747,8 +747,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, SmallVector RegsToPush; while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { if (HiRegsToSave.count(*HiRegToSave)) { - bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave); - if (isKill) + bool isKill = !MRI.isLiveIn(*HiRegToSave); + if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. diff --git a/interpreter/llvm/src/lib/Target/ARM/Thumb1InstrInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/Thumb1InstrInfo.cpp index 0ebf55924647f..3a3920a2db327 100644 --- a/interpreter/llvm/src/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/interpreter/llvm/src/lib/Target/ARM/Thumb2InstrInfo.cpp b/interpreter/llvm/src/lib/Target/ARM/Thumb2InstrInfo.cpp index 2e2dfe035e263..9125be96a07b4 100644 --- a/interpreter/llvm/src/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,26 @@ // //===----------------------------------------------------------------------===// -#include "Thumb2InstrInfo.h" -#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include using namespace llvm; @@ -30,7 +40,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI() {} + : ARMBaseInstrInfo(STI) {} /// Return the noop instruction to use for a noop. void Thumb2InstrInfo::getNoop(MCInst &NopInst) const { @@ -539,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Add cc_out operand if the original instruction did not have one. if (!HasCCOut) MI.addOperand(MachineOperand::CreateReg(0, false)); - } else { - // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) return false; diff --git a/interpreter/llvm/src/lib/Target/ARM/Thumb2SizeReduction.cpp b/interpreter/llvm/src/lib/Target/ARM/Thumb2SizeReduction.cpp index c90475c28db77..d911dd97b1ac7 100644 --- a/interpreter/llvm/src/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/interpreter/llvm/src/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -14,10 +14,10 @@ #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/interpreter/llvm/src/lib/Target/AVR/AVR.h b/interpreter/llvm/src/lib/Target/AVR/AVR.h index 8e5cc5360ad43..5eadf7bdcef6a 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVR.h +++ b/interpreter/llvm/src/lib/Target/AVR/AVR.h @@ -15,8 +15,8 @@ #ifndef LLVM_AVR_H #define LLVM_AVR_H -#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRAsmPrinter.cpp index d6491ce5c3bfc..c058c9e1f5348 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRAsmPrinter.cpp @@ -18,8 +18,8 @@ #include "InstPrinter/AVRInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" @@ -149,7 +149,10 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, (void)MO; assert(MO.isReg() && "Unexpected inline asm memory operand"); - // TODO: We can look up the alternative name for the register if it's given. + // TODO: We should be able to look up the alternative name for + // the register if it's given. + // TableGen doesn't expose a way of getting retrieving names + // for registers. if (MI->getOperand(OpNum).getReg() == AVR::R31R30) { O << "Z"; } else { diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRDevices.td b/interpreter/llvm/src/lib/Target/AVR/AVRDevices.td index 9224af613d148..62def45744372 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRDevices.td +++ b/interpreter/llvm/src/lib/Target/AVR/AVRDevices.td @@ -6,7 +6,6 @@ // :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD. // In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`. // avr2 (with SRAM) adds the rest of the variants. -// :TODO: s/AVRTiny/Tiny // A feature set aggregates features, grouping them. We don't want to create a @@ -136,7 +135,7 @@ def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">; def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">; def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">; def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">; -def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">; +def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">; def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">; def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">; def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">; @@ -189,7 +188,7 @@ def FamilyAVR51 : Family<"avr51", def FamilyAVR6 : Family<"avr6", [FamilyAVR51]>; -def FamilyAVRTiny : Family<"avrtiny", +def FamilyTiny : Family<"avrtiny", [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding]>; @@ -240,7 +239,7 @@ def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>; def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>; def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>; def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>; -def : Device<"avrtiny", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"avrtiny", FamilyTiny, ELFArchTiny>; // Specific MCUs def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>; @@ -480,12 +479,12 @@ def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>; def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>; def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>; def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>; -def : Device<"attiny4", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny5", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny9", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny10", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny20", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny40", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny102", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny104", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny4", FamilyTiny, ELFArchTiny>; +def : Device<"attiny5", FamilyTiny, ELFArchTiny>; +def : Device<"attiny9", FamilyTiny, ELFArchTiny>; +def : Device<"attiny10", FamilyTiny, ELFArchTiny>; +def : Device<"attiny20", FamilyTiny, ELFArchTiny>; +def : Device<"attiny40", FamilyTiny, ELFArchTiny>; +def : Device<"attiny102", FamilyTiny, ELFArchTiny>; +def : Device<"attiny104", FamilyTiny, ELFArchTiny>; diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRISelLowering.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRISelLowering.cpp index ef9c00e4b784e..7d3faac1dcc20 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRISelLowering.cpp @@ -1500,9 +1500,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); // BB: - // cp 0, N + // cpi N, 0 // breq RemBB - BuildMI(BB, dl, TII.get(AVR::CPRdRr)).addReg(ShiftAmtSrcReg).addReg(AVR::R0); + BuildMI(BB, dl, TII.get(AVR::CPIRdK)).addReg(ShiftAmtSrcReg).addImm(0); BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB); // LoopBB: diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.cpp index afba66b2e69bb..744aa723c416c 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.cpp @@ -402,7 +402,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { - assert(!BytesAdded && "code size not handled"); + if (BytesAdded) *BytesAdded = 0; // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); @@ -411,19 +411,24 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, if (Cond.empty()) { assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB); + auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); return 1; } // Conditional branch. unsigned Count = 0; AVRCC::CondCodes CC = (AVRCC::CondCodes)Cond[0].getImm(); - BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB); + auto &CondMI = *BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB); + + if (BytesAdded) *BytesAdded += getInstSizeInBytes(CondMI); ++Count; if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB); + auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB); + if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI); ++Count; } @@ -432,7 +437,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { - assert(!BytesRemoved && "code size not handled"); + if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; @@ -450,6 +455,7 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, } // Remove the branch. + if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I); I->eraseFromParent(); I = MBB.end(); ++Count; @@ -494,5 +500,61 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { } } +MachineBasicBlock * +AVRInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected opcode!"); + case AVR::JMPk: + case AVR::CALLk: + case AVR::RCALLk: + case AVR::RJMPk: + case AVR::BREQk: + case AVR::BRNEk: + case AVR::BRSHk: + case AVR::BRLOk: + case AVR::BRMIk: + case AVR::BRPLk: + case AVR::BRGEk: + case AVR::BRLTk: + return MI.getOperand(0).getMBB(); + case AVR::BRBSsk: + case AVR::BRBCsk: + return MI.getOperand(1).getMBB(); + case AVR::SBRCRrB: + case AVR::SBRSRrB: + case AVR::SBICAb: + case AVR::SBISAb: + llvm_unreachable("unimplemented branch instructions"); + } +} + +bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + + switch (BranchOp) { + default: + llvm_unreachable("unexpected opcode!"); + case AVR::JMPk: + case AVR::CALLk: + assert(BrOffset >= 0 && "offset must be absolute address"); + return isUIntN(16, BrOffset); + case AVR::RCALLk: + case AVR::RJMPk: + return isIntN(13, BrOffset); + case AVR::BRBSsk: + case AVR::BRBCsk: + case AVR::BREQk: + case AVR::BRNEk: + case AVR::BRSHk: + case AVR::BRLOk: + case AVR::BRMIk: + case AVR::BRPLk: + case AVR::BRGEk: + case AVR::BRLTk: + return isIntN(7, BrOffset); + } +} + } // end of namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.h b/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.h index c5105dafe5eb5..f42d34fb28480 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.h +++ b/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.h @@ -103,6 +103,10 @@ class AVRInstrInfo : public AVRGenInstrInfo { bool reverseBranchCondition(SmallVectorImpl &Cond) const override; + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; private: const AVRRegisterInfo RI; }; diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.td b/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.td index 06ad2b3ffdf8c..184e4d53f7c8f 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/AVR/AVRInstrInfo.td @@ -902,10 +902,9 @@ let Defs = [SREG] in // CPI Rd, K // Compares a register with an 8 bit immediate. - let Uses = [SREG] in def CPIRdK : FRdK<0b0011, (outs), - (ins GPR8:$rd, imm_ldi8:$k), + (ins LD8:$rd, imm_ldi8:$k), "cpi\t$rd, $k", [(AVRcmp i8:$rd, imm:$k), (implicit SREG)]>; } @@ -1412,17 +1411,11 @@ hasSideEffects = 0 in def LPMRdZ : FLPMX<0, 0, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpm\t$dst, $z", []>, Requires<[HasLPMX]>; - def LPMWRdZ : Pseudo<(outs DREGS:$dst), - (ins ZREGS:$z), - "lpmw\t$dst, $z", - []>, - Requires<[HasLPMX]>; - // Load program memory, while postincrementing the Z register. let mayLoad = 1, Defs = [R31R30] in @@ -1430,13 +1423,19 @@ hasSideEffects = 0 in def LPMRdZPi : FLPMX<0, 1, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpm\t$dst, $z+", []>, Requires<[HasLPMX]>; + def LPMWRdZ : Pseudo<(outs DREGS:$dst), + (ins ZREG:$z), + "lpmw\t$dst, $z", + []>, + Requires<[HasLPMX]>; + def LPMWRdZPi : Pseudo<(outs DREGS:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpmw\t$dst, $z+", []>, Requires<[HasLPMX]>; @@ -1459,7 +1458,7 @@ hasSideEffects = 0 in def ELPMRdZ : FLPMX<1, 0, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "elpm\t$dst, $z", []>, Requires<[HasELPMX]>; @@ -1468,7 +1467,7 @@ hasSideEffects = 0 in def ELPMRdZPi : FLPMX<1, 1, (outs GPR8:$dst), - (ins ZREGS: $z), + (ins ZREG: $z), "elpm\t$dst, $z+", []>, Requires<[HasELPMX]>; @@ -1488,7 +1487,7 @@ let Uses = [R1, R0] in let Defs = [R31R30] in def SPMZPi : F16<0b1001010111111000, (outs), - (ins ZREGS:$z), + (ins ZREG:$z), "spm $z+", []>, Requires<[HasSPMX]>; @@ -1565,28 +1564,28 @@ hasSideEffects = 0 in // Read-Write-Modify (RMW) instructions. def XCHZRd : FZRd<0b100, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "xch\t$z, $rd", []>, Requires<[SupportsRMW]>; def LASZRd : FZRd<0b101, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "las\t$z, $rd", []>, Requires<[SupportsRMW]>; def LACZRd : FZRd<0b110, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "lac\t$z, $rd", []>, Requires<[SupportsRMW]>; def LATZRd : FZRd<0b111, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "lat\t$z, $rd", []>, Requires<[SupportsRMW]>; diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRMCInstLower.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRMCInstLower.cpp index 475dda420e892..dfefd09bc4b86 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRMCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRMCInstLower.cpp @@ -37,10 +37,22 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO, Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); } + bool IsFunction = MO.isGlobal() && isa(MO.getGlobal()); + if (TF & AVRII::MO_LO) { - Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx); + if (IsFunction) { + // N.B. Should we use _GS fixups here to cope with >128k progmem? + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_LO8, Expr, IsNegated, Ctx); + } else { + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx); + } } else if (TF & AVRII::MO_HI) { - Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx); + if (IsFunction) { + // N.B. Should we use _GS fixups here to cope with >128k progmem? + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_HI8, Expr, IsNegated, Ctx); + } else { + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx); + } } else if (TF != 0) { llvm_unreachable("Unknown target flag on symbol operand"); } diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.cpp index 11a47bad78baa..249dc5512c289 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.cpp @@ -51,7 +51,6 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF, BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const AVRTargetMachine &TM = static_cast(MF.getTarget()); // Reserve the intermediate result registers r1 and r2 // The result of instructions like 'mul' is always stored here. @@ -96,7 +95,8 @@ AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, } /// Fold a frame offset shared between two add instructions into a single one. -static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) { +static void foldFrameOffset(MachineBasicBlock::iterator &II, int &Offset, unsigned DstReg) { + MachineInstr &MI = *II; int Opcode = MI.getOpcode(); // Don't bother trying if the next instruction is not an add or a sub. @@ -121,6 +121,7 @@ static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) { } // Finally remove the instruction. + II++; MI.eraseFromParent(); } @@ -159,6 +160,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned DstReg = MI.getOperand(0).getReg(); assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); + II++; // Skip over the FRMIDX (and now MOVW) instruction. + // Generally, to load a frame address two add instructions are emitted that // could get folded into a single one: // movw r31:r30, r29:r28 @@ -167,7 +170,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to: // movw r31:r30, r29:r28 // adiw r31:r30, 45 - foldFrameOffset(*std::next(II), Offset, DstReg); + if (II != MBB.end()) + foldFrameOffset(II, Offset, DstReg); // Select the best opcode based on DstReg and the offset size. switch (DstReg) { @@ -188,7 +192,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } - MachineInstr *New = BuildMI(MBB, std::next(II), dl, TII.get(Opcode), DstReg) + MachineInstr *New = BuildMI(MBB, II, dl, TII.get(Opcode), DstReg) .addReg(DstReg, RegState::Kill) .addImm(Offset); New->getOperand(3).setIsDead(); @@ -269,4 +273,3 @@ void AVRRegisterInfo::splitReg(unsigned Reg, } } // end of namespace llvm - diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.td b/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.td index 32650fc66751e..8162f12052be5 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.td +++ b/interpreter/llvm/src/lib/Target/AVR/AVRRegisterInfo.td @@ -110,8 +110,6 @@ CoveredBySubRegs = 1 in // Register Classes //===----------------------------------------------------------------------===// -//:TODO: use proper set instructions instead of using always "add" - // Main 8-bit register class. def GPR8 : RegisterClass<"AVR", [i8], 8, ( @@ -199,14 +197,11 @@ def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, // We have a bunch of instructions with an explicit Z register argument. We // model this using a register class containing only the Z register. -// :TODO: Rename to 'ZREG'. -def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>; +def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>; // Register class used for the stack read pseudo instruction. def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>; -//:TODO: if we remove this we get an error in tablegen -//:TODO: this is just a hack, remove it once add16 works! // Status register. def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>; def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)> diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.cpp index c228d051d7719..556d69ec52341 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.cpp @@ -13,7 +13,7 @@ #include "AVRSubtarget.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "AVR.h" diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.h b/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.h index a37849c3f3f77..b0e634f861687 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.h +++ b/interpreter/llvm/src/lib/Target/AVR/AVRSubtarget.h @@ -14,10 +14,9 @@ #ifndef LLVM_AVR_SUBTARGET_H #define LLVM_AVR_SUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "AVRFrameLowering.h" #include "AVRISelLowering.h" diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.cpp index fb3262916b4fd..a9d61ffc952c3 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.cpp @@ -15,12 +15,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" -#include "AVRTargetObjectFile.h" #include "AVR.h" +#include "AVRTargetObjectFile.h" #include "MCTargetDesc/AVRMCTargetDesc.h" namespace llvm { @@ -57,7 +57,7 @@ namespace { /// AVR Code Generator Pass Configuration Options. class AVRPassConfig : public TargetPassConfig { public: - AVRPassConfig(AVRTargetMachine *TM, PassManagerBase &PM) + AVRPassConfig(AVRTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} AVRTargetMachine &getAVRTargetMachine() const { @@ -66,12 +66,13 @@ class AVRPassConfig : public TargetPassConfig { bool addInstSelector() override; void addPreSched2() override; + void addPreEmitPass() override; void addPreRegAlloc() override; }; } // namespace TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) { - return new AVRPassConfig(this, PM); + return new AVRPassConfig(*this, PM); } extern "C" void LLVMInitializeAVRTarget() { @@ -115,4 +116,9 @@ void AVRPassConfig::addPreSched2() { addPass(createAVRExpandPseudoPass()); } +void AVRPassConfig::addPreEmitPass() { + // Must run branch selection immediately preceding the asm printer. + addPass(&BranchRelaxationPassID); +} + } // end of namespace llvm diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.h b/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.h index 10345193d14af..795e94e6af03a 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/AVR/AVRTargetMachine.h @@ -41,6 +41,10 @@ class AVRTargetMachine : public LLVMTargetMachine { TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool isMachineVerifierClean() const override { + return false; + } + private: std::unique_ptr TLOF; AVRSubtarget SubTarget; diff --git a/interpreter/llvm/src/lib/Target/AVR/AVRTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/AVR/AVRTargetObjectFile.cpp index af14d9292f276..0cebb0f043f92 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AVRTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AVRTargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AVRTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "AVR.h" diff --git a/interpreter/llvm/src/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/interpreter/llvm/src/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 5b0398c0ca34b..5004736365c7b 100644 --- a/interpreter/llvm/src/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -18,12 +18,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -466,6 +466,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) { if (!tryParseRegisterOperand(Operands)) { return false; } + LLVM_FALLTHROUGH; case AsmToken::LParen: case AsmToken::Integer: case AsmToken::Dot: diff --git a/interpreter/llvm/src/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/interpreter/llvm/src/lib/Target/AVR/Disassembler/AVRDisassembler.cpp index d2a21fb646354..e69accfa9393b 100644 --- a/interpreter/llvm/src/lib/Target/AVR/Disassembler/AVRDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/Disassembler/AVRDisassembler.cpp @@ -16,11 +16,11 @@ #include "AVRSubtarget.h" #include "MCTargetDesc/AVRMCTargetDesc.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp b/interpreter/llvm/src/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp index 316b7836df0d7..0f34b8e18ff96 100644 --- a/interpreter/llvm/src/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp @@ -106,7 +106,7 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) || (MOI.RegClass == AVR::PTRDISPREGSRegClassID) || - (MOI.RegClass == AVR::ZREGSRegClassID); + (MOI.RegClass == AVR::ZREGRegClassID); if (isPtrReg) { O << getRegisterName(Op.getReg(), AVR::ptr); diff --git a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 5c3b45ac23285..d18298385adf2 100644 --- a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -230,13 +230,25 @@ void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value, namespace llvm { // Prepare value for the target space for it -void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t &Value, +void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, + const MCValue &Target, + uint64_t &Value, MCContext *Ctx) const { // The size of the fixup in bits. uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize; unsigned Kind = Fixup.getKind(); + // Parsed LLVM-generated temporary labels are already + // adjusted for instruction size, but normal labels aren't. + // + // To handle both cases, we simply un-adjust the temporary label + // case so it acts like all other labels. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->getSymbol().isTemporary()) + Value += 2; + } + switch (Kind) { default: llvm_unreachable("unhandled fixup"); @@ -333,9 +345,10 @@ MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { MCELFObjectTargetWriter::getOSABI(OSType)); } -void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void AVRAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsPCRel) const { + adjustFixupValue(Fixup, Target, Value, &Asm.getContext()); if (Value == 0) return; // Doesn't change encoding. @@ -349,7 +362,7 @@ void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. @@ -436,30 +449,16 @@ bool AVRAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void AVRAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { +bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { switch ((unsigned) Fixup.getKind()) { + default: return false; // Fixups which should always be recorded as relocations. case AVR::fixup_7_pcrel: case AVR::fixup_13_pcrel: case AVR::fixup_call: - IsResolved = false; - break; - default: - // Parsed LLVM-generated temporary labels are already - // adjusted for instruction size, but normal labels aren't. - // - // To handle both cases, we simply un-adjust the temporary label - // case so it acts like all other labels. - if (Target.getSymA()->getSymbol().isTemporary()) - Value += 2; - - adjustFixupValue(Fixup, Value, &Asm.getContext()); - break; + return true; } } diff --git a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h index f2be2494684a2..4a75e3b0d22d2 100644 --- a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h +++ b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h @@ -35,13 +35,14 @@ class AVRAsmBackend : public MCAsmBackend { AVRAsmBackend(Triple::OSType OSType) : MCAsmBackend(), OSType(OSType) {} - void adjustFixupValue(const MCFixup &Fixup, uint64_t &Value, - MCContext *Ctx = nullptr) const; + void adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, + uint64_t &Value, MCContext *Ctx = nullptr) const; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsPCRel) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; @@ -63,10 +64,8 @@ class AVRAsmBackend : public MCAsmBackend { bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; private: Triple::OSType OSType; diff --git a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp index 7137548210058..6d126ed622aa1 100644 --- a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp @@ -1,8 +1,8 @@ #include "AVRELFStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "AVRMCTargetDesc.h" @@ -33,7 +33,7 @@ static unsigned getEFlagsForFeatureSet(const FeatureBitset &Features) { EFlags |= ELF::EF_AVR_ARCH_AVR51; else if (Features[AVR::ELFArchAVR6]) EFlags |= ELF::EF_AVR_ARCH_AVR6; - else if (Features[AVR::ELFArchAVRTiny]) + else if (Features[AVR::ELFArchTiny]) EFlags |= ELF::EF_AVR_ARCH_AVRTINY; else if (Features[AVR::ELFArchXMEGA1]) EFlags |= ELF::EF_AVR_ARCH_XMEGA1; diff --git a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp index 400296b8409b4..085afd23a83ce 100644 --- a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp @@ -9,11 +9,11 @@ #include "AVRMCExpr.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/MC/MCAsmLayout.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index a4fa5c0a93108..826430e94b9c2 100644 --- a/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AVRMCTargetDesc.h" #include "AVRELFStreamer.h" #include "AVRMCAsmInfo.h" -#include "AVRMCTargetDesc.h" #include "AVRTargetStreamer.h" #include "InstPrinter/AVRInstPrinter.h" diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/BPF/BPFAsmPrinter.cpp index c5201465e0743..9397c78f3dff8 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/BPFAsmPrinter.cpp @@ -18,10 +18,10 @@ #include "BPFTargetMachine.h" #include "InstPrinter/BPFInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" @@ -35,14 +35,15 @@ using namespace llvm; namespace { class BPFAsmPrinter : public AsmPrinter { public: - explicit BPFAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) + explicit BPFAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) {} StringRef getPassName() const override { return "BPF Assembly Printer"; } void EmitInstruction(const MachineInstr *MI) override; }; -} +} // namespace void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) { diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/BPF/BPFISelDAGToDAG.cpp index 279cdb1a89b42..f48429ee57b0a 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -16,17 +16,21 @@ #include "BPFRegisterInfo.h" #include "BPFSubtarget.h" #include "BPFTargetMachine.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" + using namespace llvm; #define DEBUG_TYPE "bpf-isel" @@ -42,6 +46,8 @@ class BPFDAGToDAGISel : public SelectionDAGISel { return "BPF DAG->DAG Pattern Instruction Selection"; } + void PreprocessISelDAG() override; + private: // Include the pieces autogenerated from the target description. #include "BPFGenDAGISel.inc" @@ -51,15 +57,39 @@ class BPFDAGToDAGISel : public SelectionDAGISel { // Complex Pattern for address selection. bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + + // Node preprocessing cases + void PreprocessLoad(SDNode *Node, SelectionDAG::allnodes_iterator I); + void PreprocessCopyToReg(SDNode *Node); + void PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator I); + + // Find constants from a constant structure + typedef std::vector val_vec_type; + bool fillGenericConstant(const DataLayout &DL, const Constant *CV, + val_vec_type &Vals, uint64_t Offset); + bool fillConstantDataArray(const DataLayout &DL, const ConstantDataArray *CDA, + val_vec_type &Vals, int Offset); + bool fillConstantArray(const DataLayout &DL, const ConstantArray *CA, + val_vec_type &Vals, int Offset); + bool fillConstantStruct(const DataLayout &DL, const ConstantStruct *CS, + val_vec_type &Vals, int Offset); + bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset, + uint64_t Size, unsigned char *ByteSeq); + bool checkLoadDef(unsigned DefReg, unsigned match_load_op); + + // Mapping from ConstantStruct global value to corresponding byte-list values + std::map cs_vals_; + // Mapping from vreg to load memory opcode + std::map load_to_vreg_; }; -} +} // namespace // ComplexPattern used on BPF Load/Store instructions bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // if Address is FI, get the TargetFrameIndex. SDLoc DL(Addr); if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; } @@ -85,13 +115,14 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { } } - Base = Addr; + Base = Addr; Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; } // ComplexPattern used on BPF FI instruction -bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { +bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) { SDLoc DL(Addr); if (!CurDAG->isBaseWithConstantOffset(Addr)) @@ -102,8 +133,7 @@ bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = - dyn_cast(Addr.getOperand(0))) + if (FrameIndexSDNode *FIN = dyn_cast(Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); else return false; @@ -129,7 +159,8 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { // tablegen selection should be handled here. switch (Opcode) { - default: break; + default: + break; case ISD::SDIV: { DebugLoc Empty; const DebugLoc &DL = Node->getDebugLoc(); @@ -181,6 +212,367 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } +void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, + SelectionDAG::allnodes_iterator I) { + union { + uint8_t c[8]; + uint16_t s; + uint32_t i; + uint64_t d; + } new_val; // hold up the constant values replacing loads. + bool to_replace = false; + SDLoc DL(Node); + const LoadSDNode *LD = cast(Node); + uint64_t size = LD->getMemOperand()->getSize(); + + if (!size || size > 8 || (size & (size - 1))) + return; + + SDNode *LDAddrNode = LD->getOperand(1).getNode(); + // Match LDAddr against either global_addr or (global_addr + offset) + unsigned opcode = LDAddrNode->getOpcode(); + if (opcode == ISD::ADD) { + SDValue OP1 = LDAddrNode->getOperand(0); + SDValue OP2 = LDAddrNode->getOperand(1); + + // We want to find the pattern global_addr + offset + SDNode *OP1N = OP1.getNode(); + if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || OP1N->getNumOperands() == 0) + return; + + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + const GlobalAddressSDNode *GADN = + dyn_cast(OP1N->getOperand(0).getNode()); + const ConstantSDNode *CDN = dyn_cast(OP2.getNode()); + if (GADN && CDN) + to_replace = + getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c); + } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END && + LDAddrNode->getNumOperands() > 0) { + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + SDValue OP1 = LDAddrNode->getOperand(0); + if (const GlobalAddressSDNode *GADN = + dyn_cast(OP1.getNode())) + to_replace = getConstantFieldValue(GADN, 0, size, new_val.c); + } + + if (!to_replace) + return; + + // replacing the old with a new value + uint64_t val; + if (size == 1) + val = new_val.c[0]; + else if (size == 2) + val = new_val.s; + else if (size == 4) + val = new_val.i; + else { + val = new_val.d; + } + + DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val + << '\n'); + SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); + + // After replacement, the current node is dead, we need to + // go backward one step to make iterator still work + I--; + SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)}; + SDValue To[] = {NVal, NVal}; + CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); + I++; + // It is safe to delete node now + CurDAG->DeleteNode(Node); +} + +void BPFDAGToDAGISel::PreprocessISelDAG() { + // Iterate through all nodes, interested in the following cases: + // + // . loads from ConstantStruct or ConstantArray of constructs + // which can be turns into constant itself, with this we can + // avoid reading from read-only section at runtime. + // + // . reg truncating is often the result of 8/16/32bit->64bit or + // 8/16bit->32bit conversion. If the reg value is loaded with + // masked byte width, the AND operation can be removed since + // BPF LOAD already has zero extension. + // + // This also solved a correctness issue. + // In BPF socket-related program, e.g., __sk_buff->{data, data_end} + // are 32-bit registers, but later on, kernel verifier will rewrite + // it with 64-bit value. Therefore, truncating the value after the + // load will result in incorrect code. + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *Node = &*I++; + unsigned Opcode = Node->getOpcode(); + if (Opcode == ISD::LOAD) + PreprocessLoad(Node, I); + else if (Opcode == ISD::CopyToReg) + PreprocessCopyToReg(Node); + else if (Opcode == ISD::AND) + PreprocessTrunc(Node, I); + } +} + +bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node, + uint64_t Offset, uint64_t Size, + unsigned char *ByteSeq) { + const GlobalVariable *V = dyn_cast(Node->getGlobal()); + + if (!V || !V->hasInitializer()) + return false; + + const Constant *Init = V->getInitializer(); + const DataLayout &DL = CurDAG->getDataLayout(); + val_vec_type TmpVal; + + auto it = cs_vals_.find(static_cast(Init)); + if (it != cs_vals_.end()) { + TmpVal = it->second; + } else { + uint64_t total_size = 0; + if (const ConstantStruct *CS = dyn_cast(Init)) + total_size = + DL.getStructLayout(cast(CS->getType()))->getSizeInBytes(); + else if (const ConstantArray *CA = dyn_cast(Init)) + total_size = DL.getTypeAllocSize(CA->getType()->getElementType()) * + CA->getNumOperands(); + else + return false; + + val_vec_type Vals(total_size, 0); + if (fillGenericConstant(DL, Init, Vals, 0) == false) + return false; + cs_vals_[static_cast(Init)] = Vals; + TmpVal = std::move(Vals); + } + + // test whether host endianness matches target + union { + uint8_t c[2]; + uint16_t s; + } test_buf; + uint16_t test_val = 0x2345; + if (DL.isLittleEndian()) + support::endian::write16le(test_buf.c, test_val); + else + support::endian::write16be(test_buf.c, test_val); + + bool endian_match = test_buf.s == test_val; + for (uint64_t i = Offset, j = 0; i < Offset + Size; i++, j++) + ByteSeq[j] = endian_match ? TmpVal[i] : TmpVal[Offset + Size - 1 - j]; + + return true; +} + +bool BPFDAGToDAGISel::fillGenericConstant(const DataLayout &DL, + const Constant *CV, + val_vec_type &Vals, uint64_t Offset) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + + if (isa(CV) || isa(CV)) + return true; // already done + + if (const ConstantInt *CI = dyn_cast(CV)) { + uint64_t val = CI->getZExtValue(); + DEBUG(dbgs() << "Byte array at offset " << Offset << " with value " << val + << '\n'); + + if (Size > 8 || (Size & (Size - 1))) + return false; + + // Store based on target endian + for (uint64_t i = 0; i < Size; ++i) { + Vals[Offset + i] = DL.isLittleEndian() + ? ((val >> (i * 8)) & 0xFF) + : ((val >> ((Size - i - 1) * 8)) & 0xFF); + } + return true; + } + + if (const ConstantDataArray *CDA = dyn_cast(CV)) + return fillConstantDataArray(DL, CDA, Vals, Offset); + + if (const ConstantArray *CA = dyn_cast(CV)) + return fillConstantArray(DL, CA, Vals, Offset); + + if (const ConstantStruct *CVS = dyn_cast(CV)) + return fillConstantStruct(DL, CVS, Vals, Offset); + + return false; +} + +bool BPFDAGToDAGISel::fillConstantDataArray(const DataLayout &DL, + const ConstantDataArray *CDA, + val_vec_type &Vals, int Offset) { + for (unsigned i = 0, e = CDA->getNumElements(); i != e; ++i) { + if (fillGenericConstant(DL, CDA->getElementAsConstant(i), Vals, Offset) == + false) + return false; + Offset += DL.getTypeAllocSize(CDA->getElementAsConstant(i)->getType()); + } + + return true; +} + +bool BPFDAGToDAGISel::fillConstantArray(const DataLayout &DL, + const ConstantArray *CA, + val_vec_type &Vals, int Offset) { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + if (fillGenericConstant(DL, CA->getOperand(i), Vals, Offset) == false) + return false; + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } + + return true; +} + +bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL, + const ConstantStruct *CS, + val_vec_type &Vals, int Offset) { + const StructLayout *Layout = DL.getStructLayout(CS->getType()); + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + const Constant *Field = CS->getOperand(i); + uint64_t SizeSoFar = Layout->getElementOffset(i); + if (fillGenericConstant(DL, Field, Vals, Offset + SizeSoFar) == false) + return false; + } + return true; +} + +void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) { + const RegisterSDNode *RegN = dyn_cast(Node->getOperand(1)); + if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) + return; + + const LoadSDNode *LD = dyn_cast(Node->getOperand(2)); + if (!LD) + return; + + // Assign a load value to a virtual register. record its load width + unsigned mem_load_op = 0; + switch (LD->getMemOperand()->getSize()) { + default: + return; + case 4: + mem_load_op = BPF::LDW; + break; + case 2: + mem_load_op = BPF::LDH; + break; + case 1: + mem_load_op = BPF::LDB; + break; + } + + DEBUG(dbgs() << "Find Load Value to VReg " + << TargetRegisterInfo::virtReg2Index(RegN->getReg()) << '\n'); + load_to_vreg_[RegN->getReg()] = mem_load_op; +} + +void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, + SelectionDAG::allnodes_iterator I) { + ConstantSDNode *MaskN = dyn_cast(Node->getOperand(1)); + if (!MaskN) + return; + + unsigned match_load_op = 0; + switch (MaskN->getZExtValue()) { + default: + return; + case 0xFFFFFFFF: + match_load_op = BPF::LDW; + break; + case 0xFFFF: + match_load_op = BPF::LDH; + break; + case 0xFF: + match_load_op = BPF::LDB; + break; + } + + // The Reg operand should be a virtual register, which is defined + // outside the current basic block. DAG combiner has done a pretty + // good job in removing truncating inside a single basic block. + SDValue BaseV = Node->getOperand(0); + if (BaseV.getOpcode() != ISD::CopyFromReg) + return; + + const RegisterSDNode *RegN = + dyn_cast(BaseV.getNode()->getOperand(1)); + if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) + return; + unsigned AndOpReg = RegN->getReg(); + DEBUG(dbgs() << "Examine %vreg" << TargetRegisterInfo::virtReg2Index(AndOpReg) + << '\n'); + + // Examine the PHI insns in the MachineBasicBlock to found out the + // definitions of this virtual register. At this stage (DAG2DAG + // transformation), only PHI machine insns are available in the machine basic + // block. + MachineBasicBlock *MBB = FuncInfo->MBB; + MachineInstr *MII = nullptr; + for (auto &MI : *MBB) { + for (unsigned i = 0; i < MI.getNumOperands(); ++i) { + const MachineOperand &MOP = MI.getOperand(i); + if (!MOP.isReg() || !MOP.isDef()) + continue; + unsigned Reg = MOP.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && Reg == AndOpReg) { + MII = &MI; + break; + } + } + } + + if (MII == nullptr) { + // No phi definition in this block. + if (!checkLoadDef(AndOpReg, match_load_op)) + return; + } else { + // The PHI node looks like: + // %vreg2 = PHI %vreg0, , %vreg1, + // Trace each incoming definition, e.g., (%vreg0, BB#1) and (%vreg1, BB#3) + // The AND operation can be removed if both %vreg0 in BB#1 and %vreg1 in + // BB#3 are defined with with a load matching the MaskN. + DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n'); + unsigned PrevReg = -1; + for (unsigned i = 0; i < MII->getNumOperands(); ++i) { + const MachineOperand &MOP = MII->getOperand(i); + if (MOP.isReg()) { + if (MOP.isDef()) + continue; + PrevReg = MOP.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(PrevReg)) + return; + if (!checkLoadDef(PrevReg, match_load_op)) + return; + } + } + } + + DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump(); + dbgs() << '\n'); + + I--; + CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); + I++; + CurDAG->DeleteNode(Node); +} + +bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) { + auto it = load_to_vreg_.find(DefReg); + if (it == load_to_vreg_.end()) + return false; // The definition of register is not exported yet. + + return it->second == match_load_op; +} + FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) { return new BPFDAGToDAGISel(TM); } diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.cpp b/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.cpp index 6897161c903cb..81b0aa7f8b98f 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.cpp @@ -132,6 +132,10 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128; } +bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + return false; +} + SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::BR_CC: @@ -496,8 +500,11 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + auto N = cast(Op); + assert(N->getOffset() == 0 && "Invalid offset for global address"); + SDLoc DL(Op); - const GlobalValue *GV = cast(Op)->getGlobal(); + const GlobalValue *GV = N->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64); return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); @@ -508,8 +515,9 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); + bool isSelectOp = MI.getOpcode() == BPF::Select; - assert(MI.getOpcode() == BPF::Select && "Unexpected instr type to insert"); + assert((isSelectOp || MI.getOpcode() == BPF::Select_Ri) && "Unexpected instr type to insert"); // To "insert" a SELECT instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg @@ -541,48 +549,40 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Insert Branch if Flag unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); int CC = MI.getOperand(3).getImm(); + int NewCC; switch (CC) { case ISD::SETGT: - BuildMI(BB, DL, TII.get(BPF::JSGT_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JSGT_rr : BPF::JSGT_ri; break; case ISD::SETUGT: - BuildMI(BB, DL, TII.get(BPF::JUGT_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JUGT_rr : BPF::JUGT_ri; break; case ISD::SETGE: - BuildMI(BB, DL, TII.get(BPF::JSGE_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JSGE_rr : BPF::JSGE_ri; break; case ISD::SETUGE: - BuildMI(BB, DL, TII.get(BPF::JUGE_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JUGE_rr : BPF::JUGE_ri; break; case ISD::SETEQ: - BuildMI(BB, DL, TII.get(BPF::JEQ_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JEQ_rr : BPF::JEQ_ri; break; case ISD::SETNE: - BuildMI(BB, DL, TII.get(BPF::JNE_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JNE_rr : BPF::JNE_ri; break; default: report_fatal_error("unimplemented select CondCode " + Twine(CC)); } + if (isSelectOp) + BuildMI(BB, DL, TII.get(NewCC)) + .addReg(LHS) + .addReg(MI.getOperand(2).getReg()) + .addMBB(Copy1MBB); + else + BuildMI(BB, DL, TII.get(NewCC)) + .addReg(LHS) + .addImm(MI.getOperand(2).getImm()) + .addMBB(Copy1MBB); // Copy0MBB: // %FalseValue = ... diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.h b/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.h index 3d1726be286eb..0b8a8ca20c3b3 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.h +++ b/interpreter/llvm/src/lib/Target/BPF/BPFISelLowering.h @@ -42,6 +42,10 @@ class BPFTargetLowering : public TargetLowering { // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + // This method decides whether folding a constant offset + // with the given GlobalAddress is legal. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.cpp b/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.cpp index e38facead9228..5351cfa95020e 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstrInfo.h" +#include "BPF.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.td b/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.td index c6c0ff587c6b8..f68357809add2 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/BPF/BPFInstrInfo.td @@ -51,7 +51,7 @@ def u64imm : Operand { let PrintMethod = "printImm64Operand"; } -def i64immSExt32 : PatLeaf<(imm), +def i64immSExt32 : PatLeaf<(i64 imm), [{return isInt<32>(N->getSExtValue()); }]>; // Addressing modes. @@ -67,17 +67,17 @@ def MEMri : Operand { } // Conditional code predicates - used for pattern matching for jump instructions -def BPF_CC_EQ : PatLeaf<(imm), +def BPF_CC_EQ : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETEQ);}]>; -def BPF_CC_NE : PatLeaf<(imm), +def BPF_CC_NE : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETNE);}]>; -def BPF_CC_GE : PatLeaf<(imm), +def BPF_CC_GE : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETGE);}]>; -def BPF_CC_GT : PatLeaf<(imm), +def BPF_CC_GT : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETGT);}]>; -def BPF_CC_GTU : PatLeaf<(imm), +def BPF_CC_GTU : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETUGT);}]>; -def BPF_CC_GEU : PatLeaf<(imm), +def BPF_CC_GEU : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETUGE);}]>; // jump instructions @@ -460,6 +460,11 @@ let usesCustomInserter = 1 in { "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", [(set i64:$dst, (BPFselectcc i64:$lhs, i64:$rhs, (i64 imm:$imm), i64:$src, i64:$src2))]>; + def Select_Ri : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR:$src, GPR:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i64:$dst, + (BPFselectcc i64:$lhs, (i64 imm:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; } // load 64-bit global addr into register diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/BPF/BPFRegisterInfo.cpp index 7925bee9c5879..273843e92701e 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/BPFRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFRegisterInfo.h" +#include "BPF.h" #include "BPFSubtarget.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/IR/DiagnosticInfo.h" #define GET_REGINFO_TARGET_DESC #include "BPFGenRegisterInfo.inc" diff --git a/interpreter/llvm/src/lib/Target/BPF/BPFTargetMachine.cpp b/interpreter/llvm/src/lib/Target/BPF/BPFTargetMachine.cpp index 897695633e46b..d84b0a80fc0cc 100644 --- a/interpreter/llvm/src/lib/Target/BPF/BPFTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/BPFTargetMachine.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFTargetMachine.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/LegacyPassManager.h" +#include "BPF.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" @@ -58,7 +58,7 @@ namespace { // BPF Code Generator Pass Configuration Options. class BPFPassConfig : public TargetPassConfig { public: - BPFPassConfig(BPFTargetMachine *TM, PassManagerBase &PM) + BPFPassConfig(BPFTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} BPFTargetMachine &getBPFTargetMachine() const { @@ -70,7 +70,7 @@ class BPFPassConfig : public TargetPassConfig { } TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { - return new BPFPassConfig(this, PM); + return new BPFPassConfig(*this, PM); } // Install an instruction selector pass using diff --git a/interpreter/llvm/src/lib/Target/BPF/CMakeLists.txt b/interpreter/llvm/src/lib/Target/BPF/CMakeLists.txt index e2654b0465df1..4918653ff19da 100644 --- a/interpreter/llvm/src/lib/Target/BPF/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Target/BPF/CMakeLists.txt @@ -4,7 +4,7 @@ tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info) tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info) tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM BPFGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM BPFGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM BPFGenDAGISel.inc -gen-dag-isel) tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM BPFGenCallingConv.inc -gen-callingconv) diff --git a/interpreter/llvm/src/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/interpreter/llvm/src/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index b98621ca47494..a1d732c339e5f 100644 --- a/interpreter/llvm/src/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -15,10 +15,10 @@ #include "BPFSubtarget.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" diff --git a/interpreter/llvm/src/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/interpreter/llvm/src/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index ffd29f3ea9915..64e986fe0f046 100644 --- a/interpreter/llvm/src/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstPrinter.h" +#include "BPF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 80357a63a4e12..9fc812cdef14f 100644 --- a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -27,8 +27,9 @@ class BPFAsmBackend : public MCAsmBackend { : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {} ~BPFAsmBackend() override = default; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -61,9 +62,10 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, bool IsPCRel, - MCContext &Ctx) const { +void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { assert(Value == 0); } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) { diff --git a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index ebe9abd8ffac4..d5e1d7706edc0 100644 --- a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index b58409730de04..797904e1c9768 100644 --- a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCTargetDesc.h" #include "BPF.h" #include "InstPrinter/BPFInstPrinter.h" -#include "MCTargetDesc/BPFMCTargetDesc.h" #include "MCTargetDesc/BPFMCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 3df673eaeb4b4..d1c97c9987e1b 100644 --- a/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/interpreter/llvm/src/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H #define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H -#include "llvm/Support/DataTypes.h" #include "llvm/Config/config.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class MCAsmBackend; diff --git a/interpreter/llvm/src/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/interpreter/llvm/src/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index b0b2644fffbec..d901abbd16925 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -17,11 +17,12 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonShuffler.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -42,13 +43,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -1413,6 +1413,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " case Hexagon::CONST32: is32bit = true; + LLVM_FALLTHROUGH; // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) " case Hexagon::CONST64: // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) diff --git a/interpreter/llvm/src/lib/Target/Hexagon/BitTracker.cpp b/interpreter/llvm/src/lib/Target/Hexagon/BitTracker.cpp index 07767d1037a98..5b02aa3ca3ae9 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/BitTracker.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/BitTracker.cpp @@ -65,9 +65,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include #include #include +#include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/interpreter/llvm/src/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 87c212b6163fe..586220dfec262 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -12,12 +12,12 @@ #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -25,8 +25,8 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonAsmPrinter.cpp index bb5128e7500fb..e689483a09993 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonAsmPrinter.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" @@ -23,6 +23,7 @@ #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitSimplify.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitSimplify.cpp index 8502bf24c02f6..d75d95a6baeae 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -7,14 +7,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexbit" - #include "HexagonBitTracker.h" #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -42,6 +40,8 @@ #include #include +#define DEBUG_TYPE "hexbit" + using namespace llvm; static cl::opt PreserveTiedOps("hexbit-keep-tied", cl::Hidden, @@ -1947,8 +1947,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) { switch (Opc) { case Hexagon::S2_storeri_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerh_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerb_io: break; default: diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitTracker.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitTracker.cpp index af0f8b265bdae..3de5310882409 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonBitTracker.h" +#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" @@ -937,6 +937,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, case Hexagon::J2_jumpfnew: case Hexagon::J2_jumpfnewpt: Negated = true; + LLVM_FALLTHROUGH; case Hexagon::J2_jumpt: case Hexagon::J2_jumptpt: case Hexagon::J2_jumptnew: diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonBlockRanges.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonBlockRanges.h index 717480314d160..769ec7044a0ee 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonBlockRanges.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonBlockRanges.h @@ -14,8 +14,8 @@ #include #include #include -#include #include +#include namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonCommonGEP.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonCommonGEP.cpp index a07ba77e6f3e1..b5b46f2b7d199 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -175,7 +175,8 @@ namespace { None = 0, Root = 0x01, Internal = 0x02, - Used = 0x04 + Used = 0x04, + InBounds = 0x08 }; uint32_t Flags; @@ -231,6 +232,11 @@ namespace { OS << ','; OS << "used"; } + if (GN.Flags & GepNode::InBounds) { + if (Comma) + OS << ','; + OS << "inbounds"; + } OS << "} "; if (GN.Flags & GepNode::Root) OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')'; @@ -334,10 +340,11 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); GepNode *N = new (*Mem) GepNode; Value *PtrOp = GepI->getPointerOperand(); + uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0; ValueToNodeMap::iterator F = NM.find(PtrOp); if (F == NM.end()) { N->BaseVal = PtrOp; - N->Flags |= GepNode::Root; + N->Flags |= GepNode::Root | InBounds; } else { // If PtrOp was a GEP instruction, it must have already been processed. // The ValueToNodeMap entry for it is the last gep node in the generated @@ -373,7 +380,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Value *Op = *OI; GepNode *Nx = new (*Mem) GepNode; Nx->Parent = PN; // Link Nx to the previous node. - Nx->Flags |= GepNode::Internal; + Nx->Flags |= GepNode::Internal | InBounds; Nx->PTy = PtrTy; Nx->Idx = Op; Nodes.push_back(Nx); @@ -1081,7 +1088,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, GepNode *RN = NA[0]; assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); - Value *NewInst = nullptr; + GetElementPtrInst *NewInst = nullptr; Value *Input = RN->BaseVal; Value **IdxList = new Value*[Num+1]; unsigned nax = 0; @@ -1112,6 +1119,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, Type *InpTy = Input->getType(); Type *ElTy = cast(InpTy->getScalarType())->getElementType(); NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); + NewInst->setIsInBounds(RN->Flags & GepNode::InBounds); DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); Input = NewInst; } while (nax <= Num); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonConstPropagation.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonConstPropagation.cpp index 783b916e04b05..49ddd6961f8a9 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2244,6 +2244,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &BrI, case Hexagon::J2_jumpfnew: case Hexagon::J2_jumpfnewpt: Negated = true; + LLVM_FALLTHROUGH; case Hexagon::J2_jumpt: case Hexagon::J2_jumptnew: case Hexagon::J2_jumptnewpt: @@ -2276,7 +2277,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &BrI, goto Undetermined; uint32_t Props = PredC.properties(); - bool CTrue = false, CFalse = false;; + bool CTrue = false, CFalse = false; if (Props & ConstantProperties::Zero) CFalse = true; else if (Props & ConstantProperties::NonZero) diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 8118c8eb149d9..6b4f534282568 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" -#include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/PassSupport.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonDepInstrInfo.td b/interpreter/llvm/src/lib/Target/Hexagon/HexagonDepInstrInfo.td index 2dc74632e9be2..30ebf89c98083 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -45863,6 +45863,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000000; let isSoloAin1 = 1; +let hasSideEffects = 1; } def Y2_dccleaninva : HInst< (outs), @@ -45872,6 +45873,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000010; let isSoloAin1 = 1; +let hasSideEffects = 1; } def Y2_dcfetch : HInst< (outs), @@ -45900,6 +45902,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000001; let isSoloAin1 = 1; +let hasSideEffects = 1; } def Y2_dczeroa : HInst< (outs), @@ -45909,6 +45912,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000110; let isSoloAin1 = 1; +let hasSideEffects = 1; let mayStore = 1; } def Y2_icinva : HInst< diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 67af947e089dd..80361015e6499 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -59,15 +59,13 @@ // J2_jump , %PC // Successors according to CFG: BB#6 BB#3 -#define DEBUG_TYPE "hexagon-eif" - #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" @@ -90,6 +88,8 @@ #include #include +#define DEBUG_TYPE "hexagon-eif" + using namespace llvm; namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 9f8c9ded8127b..a2f6dd68c1a13 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -86,8 +86,6 @@ // however, is that finding the locations where the implicit uses need // to be added, and updating the live ranges will be more involved. -#define DEBUG_TYPE "expand-condsets" - #include "HexagonInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -116,6 +114,8 @@ #include #include +#define DEBUG_TYPE "expand-condsets" + using namespace llvm; static cl::opt OptTfrLimit("expand-condsets-tfr-limit", @@ -567,8 +567,19 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, } llvm_unreachable("Invalid register operand"); } - if (SO.isImm() || SO.isFPImm()) - return IfTrue ? C2_cmoveit : C2_cmoveif; + switch (SO.getType()) { + case MachineOperand::MO_Immediate: + case MachineOperand::MO_FPImmediate: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + return IfTrue ? C2_cmoveit : C2_cmoveif; + default: + break; + } llvm_unreachable("Unexpected source operand"); } diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 015d3b840e6fe..23d4e2610d9a5 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -12,10 +12,9 @@ // form. //===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" #include "Hexagon.h" #include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.cpp index a04aca4afa0f1..e5e75198b2d18 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -8,10 +8,8 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-pei" - -#include "HexagonBlockRanges.h" #include "HexagonFrameLowering.h" +#include "HexagonBlockRanges.h" #include "HexagonInstrInfo.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" @@ -63,6 +61,8 @@ #include #include +#define DEBUG_TYPE "hexagon-pei" + // Hexagon stack frame layout as defined by the ABI: // // Incoming arguments @@ -178,8 +178,8 @@ static cl::opt EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden, cl::desc("Enable long calls for save-restore stubs."), cl::init(false), cl::ZeroOrMore); -static cl::opt UseAllocframe("use-allocframe", cl::init(true), - cl::Hidden, cl::desc("Use allocframe more conservatively")); +static cl::opt EliminateFramePointer("hexagon-fp-elim", cl::init(true), + cl::Hidden, cl::desc("Refrain from using FP whenever possible")); static cl::opt OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, cl::init(true), cl::desc("Optimize spill slots")); @@ -550,7 +550,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, auto &HST = MF.getSubtarget(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); - DebugLoc dl; unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment()); @@ -584,77 +583,56 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, MI->eraseFromParent(); } - if (!hasFP(MF)) - return; - - // Check for overflow. - // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? - const unsigned int ALLOCFRAME_MAX = 16384; + DebugLoc dl = MBB.findDebugLoc(InsertPt); - // Create a dummy memory operand to avoid allocframe from being treated as - // a volatile memory reference. - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, - 4, 4); - - if (NumBytes >= ALLOCFRAME_MAX) { - // Emit allocframe(#0). - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) - .addImm(0) - .addMemOperand(MMO); - - // Subtract offset from frame pointer. - // We use a caller-saved non-parameter register for that. - unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg(); - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32), - CallerSavedReg).addImm(NumBytes); - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP) + if (hasFP(MF)) { + insertAllocframe(MBB, InsertPt, NumBytes); + if (AlignStack) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) + .addReg(SP) + .addImm(-int64_t(MaxAlign)); + } + // If the stack-checking is enabled, and we spilled the callee-saved + // registers inline (i.e. did not use a spill function), then call + // the stack checker directly. + if (EnableStackOVFSanitizer && !PrologueStubs) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk)) + .addExternalSymbol("__runtime_stack_check"); + } else if (NumBytes > 0) { + assert(alignTo(NumBytes, 8) == NumBytes); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) .addReg(SP) - .addReg(CallerSavedReg); - } else { - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) - .addImm(NumBytes) - .addMemOperand(MMO); + .addImm(-int(NumBytes)); } - - if (AlignStack) { - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) - .addReg(SP) - .addImm(-int64_t(MaxAlign)); - } - - // If the stack-checking is enabled, and we spilled the callee-saved - // registers inline (i.e. did not use a spill function), then call - // the stack checker directly. - if (EnableStackOVFSanitizer && !PrologueStubs) - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk)) - .addExternalSymbol("__runtime_stack_check"); } void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { MachineFunction &MF = *MBB.getParent(); - if (!hasFP(MF)) - return; - auto &HST = MF.getSubtarget(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); unsigned SP = HRI.getStackRegister(); + MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); + DebugLoc dl = MBB.findDebugLoc(InsertPt); + + if (!hasFP(MF)) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (unsigned NumBytes = MFI.getStackSize()) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) + .addReg(SP) + .addImm(NumBytes); + } + return; + } + MachineInstr *RetI = getReturn(MBB); unsigned RetOpc = RetI ? RetI->getOpcode() : 0; - MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); - DebugLoc DL; - if (InsertPt != MBB.end()) - DL = InsertPt->getDebugLoc(); - else if (!MBB.empty()) - DL = std::prev(MBB.end())->getDebugLoc(); - // Handle EH_RETURN. if (RetOpc == Hexagon::EH_RETURN_JMPR) { - BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); - BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP) .addReg(SP) .addReg(Hexagon::R28); return; @@ -699,16 +677,52 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { // otherwise just add deallocframe. The function could be returning via a // tail call. if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) { - BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); return; } unsigned NewOpc = Hexagon::L4_return; - MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc)); + MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)); // Transfer the function live-out registers. NewI->copyImplicitOps(MF, *RetI); MBB.erase(RetI); } +void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const { + MachineFunction &MF = *MBB.getParent(); + auto &HST = MF.getSubtarget(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + + // Check for overflow. + // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? + const unsigned int ALLOCFRAME_MAX = 16384; + + // Create a dummy memory operand to avoid allocframe from being treated as + // a volatile memory reference. + auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0), + MachineMemOperand::MOStore, 4, 4); + + DebugLoc dl = MBB.findDebugLoc(InsertPt); + + if (NumBytes >= ALLOCFRAME_MAX) { + // Emit allocframe(#0). + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(0) + .addMemOperand(MMO); + + // Subtract the size from the stack pointer. + unsigned SP = HRI.getStackRegister(); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) + .addReg(SP) + .addImm(-int(NumBytes)); + } else { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(NumBytes) + .addMemOperand(MMO); + } +} + void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF, MachineBasicBlock &SaveB) const { SetVector Worklist; @@ -928,12 +942,11 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, } bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { + if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + return false; + auto &MFI = MF.getFrameInfo(); auto &HRI = *MF.getSubtarget().getRegisterInfo(); - - bool HasFixed = MFI.getNumFixedObjects(); - bool HasPrealloc = const_cast(MFI) - .getLocalFrameObjectCount(); bool HasExtraAlign = HRI.needsStackRealignment(MF); bool HasAlloca = MFI.hasVarSizedObjects(); @@ -947,18 +960,23 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { // By default we want to use SP (since it's always there). FP requires // some setup (i.e. ALLOCFRAME). - // Fixed and preallocated objects need FP if the distance from them to - // the SP is unknown (as is with alloca or aligna). - if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign)) + // Both, alloca and stack alignment modify the stack pointer by an + // undetermined value, so we need to save it at the entry to the function + // (i.e. use allocframe). + if (HasAlloca || HasExtraAlign) return true; if (MFI.getStackSize() > 0) { - if (EnableStackOVFSanitizer || UseAllocframe) + // If FP-elimination is disabled, we have to use FP at this point. + const TargetMachine &TM = MF.getTarget(); + if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer) + return true; + if (EnableStackOVFSanitizer) return true; } - if (MFI.hasCalls() || - MF.getInfo()->hasClobberLR()) + const auto &HMFI = *MF.getInfo(); + if (MFI.hasCalls() || HMFI.hasClobberLR()) return true; return false; @@ -1051,10 +1069,27 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, bool HasExtraAlign = HRI.needsStackRealignment(MF); bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; - unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); auto &HMFI = *MF.getInfo(); - unsigned AP = HMFI.getStackAlignBasePhysReg(); unsigned FrameSize = MFI.getStackSize(); + unsigned SP = HRI.getStackRegister(); + unsigned FP = HRI.getFrameRegister(); + unsigned AP = HMFI.getStackAlignBasePhysReg(); + // It may happen that AP will be absent even HasAlloca && HasExtraAlign + // is true. HasExtraAlign may be set because of vector spills, without + // aligned locals or aligned outgoing function arguments. Since vector + // spills will ultimately be "unaligned", it is safe to use FP as the + // base register. + // In fact, in such a scenario the stack is actually not required to be + // aligned, although it may end up being aligned anyway, since this + // particular case is not easily detectable. The alignment will be + // unnecessary, but not incorrect. + // Unfortunately there is no quick way to verify that the above is + // indeed the case (and that it's not a result of an error), so just + // assume that missing AP will be replaced by FP. + // (A better fix would be to rematerialize AP from FP and always align + // vector spills.) + if (AP == 0) + AP = FP; bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). // Use FP at -O0, except when there are objects with extra alignment. @@ -1119,7 +1154,7 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, // there will be no SP -= FrameSize), so the frame size should not be // added to the calculated offset. int RealOffset = Offset; - if (!UseFP && !UseAP && HasFP) + if (!UseFP && !UseAP) RealOffset = FrameSize+Offset; return RealOffset; } @@ -1657,7 +1692,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, // defined. From the point of view of the liveness tracking, it is ok to // store it as a whole, but if we break it up we may end up storing a // register that is entirely undefined. - LivePhysRegs LPR(&HRI); + LivePhysRegs LPR(HRI); LPR.addLiveIns(B); SmallVector,2> Clobbers; for (auto R = B.begin(); R != It; ++R) { @@ -2386,10 +2421,12 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, /// be generated via inline code. If this function returns "true", inline /// code will be generated. If this function returns "false", additional /// checks are performed, which may still lead to the inline code. -bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, +bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const { if (MF.getInfo()->hasEHReturn()) return true; + if (!hasFP(MF)) + return true; if (!isOptSize(MF) && !isMinSize(MF)) if (MF.getTarget().getOptLevel() > CodeGenOpt::Default) return true; @@ -2416,7 +2453,7 @@ bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, return false; } -bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, +bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF, const CSIVect &CSI) const { if (shouldInlineCSR(MF, CSI)) return false; @@ -2429,7 +2466,7 @@ bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, return Threshold < NumCSI; } -bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, +bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF, const CSIVect &CSI) const { if (shouldInlineCSR(MF, CSI)) return false; @@ -2454,9 +2491,44 @@ bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const { unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF); auto &HST = MF.getSubtarget(); // A fairly simplistic guess as to whether a potential load/store to a - // stack location could require an extra register. It does not account - // for store-immediate instructions. - if (HST.useHVXOps()) - return StackSize > 256; + // stack location could require an extra register. + if (HST.useHVXOps() && StackSize > 256) + return true; + + // Check if the function has store-immediate instructions that access + // the stack. Since the offset field is not extendable, if the stack + // size exceeds the offset limit (6 bits, shifted), the stores will + // require a new base register. + bool HasImmStack = false; + unsigned MinLS = ~0u; // Log_2 of the memory access size. + + for (const MachineBasicBlock &B : MF) { + for (const MachineInstr &MI : B) { + unsigned LS = 0; + switch (MI.getOpcode()) { + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + case Hexagon::S4_storeiri_io: + ++LS; + LLVM_FALLTHROUGH; + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + case Hexagon::S4_storeirh_io: + ++LS; + LLVM_FALLTHROUGH; + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + case Hexagon::S4_storeirb_io: + if (MI.getOperand(0).isFI()) + HasImmStack = true; + MinLS = std::min(MinLS, LS); + break; + } + } + } + + if (HasImmStack) + return !isUInt<6>(StackSize >> MinLS); + return false; } diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.h index 529a61d4a5b52..f4d4e1b61a266 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonFrameLowering.h @@ -48,6 +48,15 @@ class HexagonFrameLowering : public TargetFrameLowering { return true; } + bool hasReservedCallFrame(const MachineFunction &MF) const override { + // We always reserve call frame as a part of the initial stack allocation. + return true; + } + bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override { + // Override this function to avoid calling hasFP before CSI is set + // (the default implementation calls hasFP). + return true; + } MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; @@ -94,6 +103,8 @@ class HexagonFrameLowering : public TargetFrameLowering { unsigned SP, unsigned CF) const; void insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const; void insertEpilogueInBlock(MachineBasicBlock &MBB) const; + void insertAllocframe(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const; bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI, bool &PrologueStubs) const; bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, @@ -148,9 +159,9 @@ class HexagonFrameLowering : public TargetFrameLowering { void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI, bool IsDef, bool IsKill) const; - bool shouldInlineCSR(MachineFunction &MF, const CSIVect &CSI) const; - bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; - bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; + bool shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const; + bool useSpillFunction(const MachineFunction &MF, const CSIVect &CSI) const; + bool useRestoreFunction(const MachineFunction &MF, const CSIVect &CSI) const; bool mayOverflowFrameOffset(MachineFunction &MF) const; }; diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenExtract.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenExtract.cpp index c99ad5130aef5..7c6de6d513e8d 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -14,10 +14,10 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenInsert.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenInsert.cpp index 54d99d399f885..0a955aedaf1a8 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexinsert" - #include "BitTracker.h" #include "HexagonBitTracker.h" #include "HexagonInstrInfo.h" @@ -17,9 +15,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -34,8 +32,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include #include @@ -44,6 +42,8 @@ #include #include +#define DEBUG_TYPE "hexinsert" + using namespace llvm; static cl::opt VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenMux.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenMux.cpp index 85222944c77ca..5abbcbba72ddd 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenMux.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -40,8 +41,8 @@ #include "llvm/Pass.h" #include "llvm/Support/MathExtras.h" #include -#include #include +#include #include using namespace llvm; @@ -59,9 +60,7 @@ namespace { public: static char ID; - HexagonGenMux() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) { - initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry()); - } + HexagonGenMux() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon generate mux instructions"; @@ -79,8 +78,8 @@ namespace { } private: - const HexagonInstrInfo *HII; - const HexagonRegisterInfo *HRI; + const HexagonInstrInfo *HII = nullptr; + const HexagonRegisterInfo *HRI = nullptr; struct CondsetInfo { unsigned PredR = 0; @@ -134,7 +133,7 @@ namespace { } // end anonymous namespace -INITIALIZE_PASS(HexagonGenMux, "hexagon-mux", +INITIALIZE_PASS(HexagonGenMux, "hexagon-gen-mux", "Hexagon generate mux instructions", false, false) void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const { @@ -235,8 +234,11 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; + MachineOperand &PredOp = MI->getOperand(1); + if (PredOp.isUndef()) + continue; - unsigned PR = MI->getOperand(1).getReg(); + unsigned PR = PredOp.getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); @@ -316,22 +318,49 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); } - for (unsigned I = 0, N = ML.size(); I < N; ++I) { - MuxInfo &MX = ML[I]; - MachineBasicBlock &B = *MX.At->getParent(); - DebugLoc DL = MX.At->getDebugLoc(); + for (MuxInfo &MX : ML) { unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF); if (!MxOpc) continue; - BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) - .addReg(MX.PredR) - .add(*MX.SrcT) - .add(*MX.SrcF); + MachineBasicBlock &B = *MX.At->getParent(); + const DebugLoc &DL = B.findDebugLoc(MX.At); + auto NewMux = BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) + .addReg(MX.PredR) + .add(*MX.SrcT) + .add(*MX.SrcF); + NewMux->clearKillInfo(); B.erase(MX.Def1); B.erase(MX.Def2); Changed = true; } + // Fix up kill flags. + + LivePhysRegs LPR(*HRI); + LPR.addLiveOuts(B); + auto IsLive = [&LPR,this] (unsigned Reg) -> bool { + for (MCSubRegIterator S(Reg, HRI, true); S.isValid(); ++S) + if (LPR.contains(*S)) + return true; + return false; + }; + for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) { + if (I->isDebugValue()) + continue; + // This isn't 100% accurate, but it's safe. + // It won't detect (as a kill) a case like this + // r0 = add r0, 1 <-- r0 should be "killed" + // ... = r0 + for (MachineOperand &Op : I->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + assert(Op.getSubReg() == 0 && "Should have physical registers only"); + bool Live = IsLive(Op.getReg()); + Op.setIsKill(!Live); + } + LPR.stepBackward(*I); + } + return Changed; } diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenPredicate.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenPredicate.cpp index f14c733dcf511..2da211563e0a1 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "gen-pred" - #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SetVector.h" @@ -35,6 +33,8 @@ #include #include +#define DEBUG_TYPE "gen-pred" + using namespace llvm; namespace llvm { @@ -334,6 +334,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) { if (MRI->getRegClass(PR.R) != PredRC) return false; // If it is a copy between two predicate registers, fall through. + LLVM_FALLTHROUGH; } case Hexagon::C2_and: case Hexagon::C2_andn: diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 8e10c521a77d3..0163b2e2bdc46 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -71,6 +71,9 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { return true; } + bool ComplexPatternFuncMutatesDAG() const override { + return true; + } void PreprocessISelDAG() override; void EmitFunctionEntryCode() override; @@ -81,6 +84,7 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { inline bool SelectAddrGP(SDValue &N, SDValue &R); bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); bool SelectAddrFI(SDValue &N, SDValue &R); + bool DetectUseSxtw(SDValue &N, SDValue &R); StringRef getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; @@ -106,7 +110,6 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl); void SelectStore(SDNode *N); void SelectSHL(SDNode *N); - void SelectMul(SDNode *N); void SelectZeroExtend(SDNode *N); void SelectIntrinsicWChain(SDNode *N); void SelectIntrinsicWOChain(SDNode *N); @@ -118,9 +121,10 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { #include "HexagonGenDAGISel.inc" private: - bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); + bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); bool isOrEquivalentToAdd(const SDNode *N) const; bool isAlignedMemNode(const MemSDNode *N) const; + bool isSmallStackStore(const StoreSDNode *N) const; bool isPositiveHalfWord(const SDNode *N) const; // DAG preprocessing functions. @@ -237,22 +241,31 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: - if (isAlignedMemNode(LD)) - Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; - else + if (isAlignedMemNode(LD)) { + if (LD->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai; + else + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; + } else { Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai; + } break; // 128B case MVT::v128i8: case MVT::v64i16: case MVT::v32i32: case MVT::v16i64: - if (isAlignedMemNode(LD)) - Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B - : Hexagon::V6_vL32b_ai_128B; - else + if (isAlignedMemNode(LD)) { + if (LD->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi_128B + : Hexagon::V6_vL32b_nt_ai_128B; + else + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B + : Hexagon::V6_vL32b_ai_128B; + } else { Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi_128B : Hexagon::V6_vL32Ub_ai_128B; + } break; default: llvm_unreachable("Unexpected memory type in indexed load"); @@ -525,22 +538,31 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: - if (isAlignedMemNode(ST)) - Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; - else + if (isAlignedMemNode(ST)) { + if (ST->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai; + else + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; + } else { Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai; + } break; // 128B case MVT::v128i8: case MVT::v64i16: case MVT::v32i32: case MVT::v16i64: - if (isAlignedMemNode(ST)) - Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B - : Hexagon::V6_vS32b_ai_128B; - else + if (isAlignedMemNode(ST)) { + if (ST->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi_128B + : Hexagon::V6_vS32b_nt_ai_128B; + else + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B + : Hexagon::V6_vS32b_ai_128B; + } else { Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi_128B : Hexagon::V6_vS32Ub_ai_128B; + } break; default: llvm_unreachable("Unexpected memory type in indexed store"); @@ -597,90 +619,6 @@ void HexagonDAGToDAGISel::SelectStore(SDNode *N) { SelectCode(ST); } -void HexagonDAGToDAGISel::SelectMul(SDNode *N) { - SDLoc dl(N); - - // %conv.i = sext i32 %tmp1 to i64 - // %conv2.i = sext i32 %add to i64 - // %mul.i = mul nsw i64 %conv2.i, %conv.i - // - // --- match with the following --- - // - // %mul.i = mpy (%tmp1, %add) - // - - if (N->getValueType(0) == MVT::i64) { - // Shifting a i64 signed multiply. - SDValue MulOp0 = N->getOperand(0); - SDValue MulOp1 = N->getOperand(1); - - SDValue OP0; - SDValue OP1; - - // Handle sign_extend and sextload. - if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext0 = MulOp0.getOperand(0); - if (Sext0.getNode()->getValueType(0) != MVT::i32) { - SelectCode(N); - return; - } - OP0 = Sext0; - } else if (MulOp0.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast(MulOp0.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - SelectCode(N); - return; - } - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), TargetConst0, - Chain), 0); - } else { - SelectCode(N); - return; - } - - // Same goes for the second operand. - if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext1 = MulOp1.getOperand(0); - if (Sext1.getNode()->getValueType(0) != MVT::i32) { - SelectCode(N); - return; - } - OP1 = Sext1; - } else if (MulOp1.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast(MulOp1.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - SelectCode(N); - return; - } - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), TargetConst0, - Chain), 0); - } else { - SelectCode(N); - return; - } - - // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, - MVT::i64, OP0, OP1); - ReplaceNode(N, Result); - return; - } - - SelectCode(N); -} - void HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDLoc dl(N); SDValue Shl_0 = N->getOperand(0); @@ -843,7 +781,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { SDValue V = N->getOperand(1); SDValue U; - if (isValueExtension(V, Bits, U)) { + if (keepsLowBits(V, Bits, U)) { SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), N->getOperand(0), U); ReplaceNode(N, R.getNode()); @@ -949,7 +887,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::SHL: return SelectSHL(N); case ISD::LOAD: return SelectLoad(N); case ISD::STORE: return SelectStore(N); - case ISD::MUL: return SelectMul(N); case ISD::ZERO_EXTEND: return SelectZeroExtend(N); case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N); case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); @@ -1327,7 +1264,7 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() { } // Match a frame index that can be used in an addressing mode. -bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { +bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) { if (N.getOpcode() != ISD::FrameIndex) return false; auto &HFI = *HST->getFrameLowering(); @@ -1388,16 +1325,83 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, return false; } -bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, - unsigned FromBits, SDValue &Src) { +bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) { + // This (complex pattern) function is meant to detect a sign-extension + // i32->i64 on a per-operand basis. This would allow writing single + // patterns that would cover a number of combinations of different ways + // a sign-extensions could be written. For example: + // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y) + // could match either one of these: + // (mul (sext x) (sext_inreg y)) + // (mul (sext-load *p) (sext_inreg y)) + // (mul (sext_inreg x) (sext y)) + // etc. + // + // The returned value will have type i64 and its low word will + // contain the value being extended. The high bits are not specified. + // The returned type is i64 because the original type of N was i64, + // but the users of this function should only use the low-word of the + // result, e.g. + // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y)) + + if (N.getValueType() != MVT::i64) + return false; + EVT SrcVT; + unsigned Opc = N.getOpcode(); + switch (Opc) { + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_INREG: { + // sext_inreg has the source type as a separate operand. + EVT T = Opc == ISD::SIGN_EXTEND + ? N.getOperand(0).getValueType() + : cast(N.getOperand(1))->getVT(); + if (T.getSizeInBits() != 32) + return false; + R = N.getOperand(0); + break; + } + case ISD::LOAD: { + LoadSDNode *L = cast(N); + if (L->getExtensionType() != ISD::SEXTLOAD) + return false; + // All extending loads extend to i32, so even if the value in + // memory is shorter than 32 bits, it will be i32 after the load. + if (L->getMemoryVT().getSizeInBits() > 32) + return false; + R = N; + break; + } + default: + return false; + } + EVT RT = R.getValueType(); + if (RT == MVT::i64) + return true; + assert(RT == MVT::i32); + // This is only to produce a value of type i64. Do not rely on the + // high bits produced by this. + const SDLoc &dl(N); + SDValue Ops[] = { + CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), + R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), + R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) + }; + SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, + MVT::i64, Ops); + R = SDValue(T, 0); + return true; +} + +bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, + SDValue &Src) { unsigned Opc = Val.getOpcode(); switch (Opc) { case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: { - SDValue const &Op0 = Val.getOperand(0); + const SDValue &Op0 = Val.getOperand(0); EVT T = Op0.getValueType(); - if (T.isInteger() && T.getSizeInBits() == FromBits) { + if (T.isInteger() && T.getSizeInBits() == NumBits) { Src = Op0; return true; } @@ -1408,23 +1412,23 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, case ISD::AssertZext: if (Val.getOperand(0).getValueType().isInteger()) { VTSDNode *T = cast(Val.getOperand(1)); - if (T->getVT().getSizeInBits() == FromBits) { + if (T->getVT().getSizeInBits() == NumBits) { Src = Val.getOperand(0); return true; } } break; case ISD::AND: { - // Check if this is an AND with "FromBits" of lower bits set to 1. - uint64_t FromMask = (1 << FromBits) - 1; + // Check if this is an AND with NumBits of lower bits set to 1. + uint64_t Mask = (1 << NumBits) - 1; if (ConstantSDNode *C = dyn_cast(Val.getOperand(0))) { - if (C->getZExtValue() == FromMask) { + if (C->getZExtValue() == Mask) { Src = Val.getOperand(1); return true; } } if (ConstantSDNode *C = dyn_cast(Val.getOperand(1))) { - if (C->getZExtValue() == FromMask) { + if (C->getZExtValue() == Mask) { Src = Val.getOperand(0); return true; } @@ -1433,16 +1437,16 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, } case ISD::OR: case ISD::XOR: { - // OR/XOR with the lower "FromBits" bits set to 0. - uint64_t FromMask = (1 << FromBits) - 1; + // OR/XOR with the lower NumBits bits set to 0. + uint64_t Mask = (1 << NumBits) - 1; if (ConstantSDNode *C = dyn_cast(Val.getOperand(0))) { - if ((C->getZExtValue() & FromMask) == 0) { + if ((C->getZExtValue() & Mask) == 0) { Src = Val.getOperand(1); return true; } } if (ConstantSDNode *C = dyn_cast(Val.getOperand(1))) { - if ((C->getZExtValue() & FromMask) == 0) { + if ((C->getZExtValue() & Mask) == 0) { Src = Val.getOperand(0); return true; } @@ -1477,6 +1481,20 @@ bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { return N->getAlignment() >= N->getMemoryVT().getStoreSize(); } +bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const { + unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF); + switch (N->getMemoryVT().getStoreSize()) { + case 1: + return StackSize <= 56; // 1*2^6 - 8 + case 2: + return StackSize <= 120; // 2*2^6 - 8 + case 4: + return StackSize <= 248; // 4*2^6 - 8 + default: + return false; + } +} + // Return true when the given node fits in a positive half word. bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { if (const ConstantSDNode *CN = dyn_cast(N)) { diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.cpp index 1dffebe97f2db..3997702bc962d 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonISelLowering.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" @@ -26,8 +26,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" @@ -716,6 +716,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); auto PtrVT = getPointerTy(MF.getDataLayout()); // Check for varargs. @@ -832,7 +833,6 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (NeedsArgAlign && Subtarget.hasV60TOps()) { DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); // V6 vectors passed by value have 64 or 128 byte alignment depending // on whether we are 64 byte vector mode or 128 byte. bool UseHVXDbl = Subtarget.useHVXDblOps(); @@ -916,10 +916,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Glue); if (IsTailCall) { - MF.getFrameInfo().setHasTailCall(); + MFI.setHasTailCall(); return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); } + // Set this here because we need to know this for "hasFP" in frame lowering. + // The target-independent code calls getFrameRegister before setting it, and + // getFrameRegister uses hasFP to determine whether the function has FP. + MFI.setHasCalls(true); + unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL; Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); Glue = Chain.getValue(1); @@ -1002,51 +1007,46 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); MachineFunction &MF = DAG.getMachineFunction(); - auto &FuncInfo = *MF.getInfo(); - switch (Node->getOpcode()) { - case ISD::INLINEASM: { - unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) - --NumOps; // Ignore the flag operand. - - for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - if (FuncInfo.hasClobberLR()) - break; - unsigned Flags = - cast(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); - ++i; // Skip the ID value. - - switch (InlineAsm::getKind(Flags)) { - default: llvm_unreachable("Bad flags!"); - case InlineAsm::Kind_RegDef: - case InlineAsm::Kind_RegUse: - case InlineAsm::Kind_Imm: - case InlineAsm::Kind_Clobber: - case InlineAsm::Kind_Mem: { - for (; NumVals; --NumVals, ++i) {} - break; - } - case InlineAsm::Kind_RegDefEarlyClobber: { - for (; NumVals; --NumVals, ++i) { - unsigned Reg = - cast(Node->getOperand(i))->getReg(); - - // Check it to be lr - const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo(); - if (Reg == QRI->getRARegister()) { - FuncInfo.setHasClobberLR(true); - break; - } - } - break; - } + auto &HMFI = *MF.getInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); + unsigned LR = HRI.getRARegister(); + + if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR()) + return Op; + + unsigned NumOps = Op.getNumOperands(); + if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = cast(Op.getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: + llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Mem: + i += NumVals; + break; + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast(Op.getOperand(i))->getReg(); + if (Reg != LR) + continue; + HMFI.setHasClobberLR(true); + return Op; } + break; } } - } // Node->getOpcode + } + return Op; } @@ -1286,18 +1286,6 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV)); } -// Creates a SPLAT instruction for a constant value VAL. -static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT, - SDValue Val) { - if (VT.getSimpleVT() == MVT::v4i8) - return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); - - if (VT.getSimpleVT() == MVT::v4i16) - return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val); - - return SDValue(); -} - static bool isSExtFree(SDValue N) { // A sign-extend of a truncate of a sign-extend is free. if (N.getOpcode() == ISD::TRUNCATE && @@ -1376,79 +1364,6 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// Handle only specific vector loads. -SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - LoadSDNode *LoadNode = cast(Op); - SDValue Chain = LoadNode->getChain(); - SDValue Ptr = Op.getOperand(1); - SDValue LoweredLoad; - SDValue Result; - SDValue Base = LoadNode->getBasePtr(); - ISD::LoadExtType Ext = LoadNode->getExtensionType(); - unsigned Alignment = LoadNode->getAlignment(); - SDValue LoadChain; - - if(Ext == ISD::NON_EXTLOAD) - Ext = ISD::ZEXTLOAD; - - if (VT == MVT::v4i16) { - if (Alignment == 2) { - SDValue Loads[4]; - // Base load. - Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // Base+2 load. - SDValue Increment = DAG.getConstant(2, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // SHL 16, then OR base and base+2. - SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32); - SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); - SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]); - // Base + 4. - Increment = DAG.getConstant(4, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // Base + 6. - Increment = DAG.getConstant(6, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // SHL 16, then OR base+4 and base+6. - Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); - SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); - // Combine to i64. This could be optimised out later if we can - // affect reg allocation of this code. - Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2); - LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - Loads[0].getValue(1), Loads[1].getValue(1), - Loads[2].getValue(1), Loads[3].getValue(1)); - } else { - // Perform default type expansion. - Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), - LoadNode->getAlignment(), - LoadNode->getMemOperand()->getFlags()); - LoadChain = Result.getValue(1); - } - } else - llvm_unreachable("Custom lowering unsupported load"); - - Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); - // Since we pretend to lower a load, we need the original chain - // info attached to the result. - SDValue Ops[] = { Result, LoadChain }; - - return DAG.getMergeValues(Ops, DL); -} - SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); @@ -1928,11 +1843,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BSWAP, MVT::i32, Legal); setOperationAction(ISD::BSWAP, MVT::i64, Legal); - - // We custom lower i64 to i64 mul, so that it is not considered as a legal - // operation. There is a pattern that will match i64 mul and transform it - // to a series of instructions. - setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MUL, MVT::i64, Legal); for (unsigned IntExpOp : { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, @@ -1977,18 +1888,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Handling of vector operations. // - // Custom lower v4i16 load only. Let v4i16 store to be - // promoted for now. promoteLdStType(MVT::v4i8, MVT::i32); promoteLdStType(MVT::v2i16, MVT::i32); promoteLdStType(MVT::v8i8, MVT::i64); + promoteLdStType(MVT::v4i16, MVT::i64); promoteLdStType(MVT::v2i32, MVT::i64); - setOperationAction(ISD::LOAD, MVT::v4i16, Custom); - setOperationAction(ISD::STORE, MVT::v4i16, Promote); - AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64); - AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64); - // Set the action for vector operations to "expand", then override it with // either "custom" or "legal" for specific cases. static const unsigned VectExpOps[] = { @@ -2003,7 +1908,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Floating point arithmetic/math functions: ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN, - ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, @@ -2305,32 +2210,14 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::JT: return "HexagonISD::JT"; case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; - case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; - case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; - case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB"; - case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; - case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ"; - case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT"; - case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU"; - case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ"; - case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT"; - case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU"; - case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; - case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; - case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; - case HexagonISD::VPACK: return "HexagonISD::VPACK"; - case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; - case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; - case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; - case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH"; - case HexagonISD::VSRAH: return "HexagonISD::VSRAH"; - case HexagonISD::VSRAW: return "HexagonISD::VSRAW"; - case HexagonISD::VSRLH: return "HexagonISD::VSRLH"; - case HexagonISD::VSRLW: return "HexagonISD::VSRLW"; - case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH"; - case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW"; + case HexagonISD::VPACKE: return "HexagonISD::VPACKE"; + case HexagonISD::VPACKO: return "HexagonISD::VPACKO"; + case HexagonISD::VASL: return "HexagonISD::VASL"; + case HexagonISD::VASR: return "HexagonISD::VASR"; + case HexagonISD::VLSR: return "HexagonISD::VLSR"; + case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; case HexagonISD::OP_END: break; } @@ -2419,7 +2306,7 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) - return createSplat(DAG, dl, VT, V1.getOperand(0)); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR // (and probably will turn into a SCALAR_TO_VECTOR once legalization @@ -2434,28 +2321,26 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) } } if (IsScalarToVector) - return createSplat(DAG, dl, VT, V1.getOperand(0)); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); } - return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32)); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, + DAG.getConstant(Lane, dl, MVT::i32)); } if (UseHVX) { ArrayRef Mask = SVN->getMask(); size_t MaskLen = Mask.size(); - int ElemSizeInBits = VT.getScalarSizeInBits(); - if ((Subtarget.useHVXSglOps() && (ElemSizeInBits * MaskLen) == 64 * 8) || - (Subtarget.useHVXDblOps() && (ElemSizeInBits * MaskLen) == 128 * 8)) { - // Return 1 for odd and 2 of even - StridedLoadKind Pattern = isStridedLoad(Mask); + unsigned SizeInBits = VT.getScalarSizeInBits() * MaskLen; + if ((Subtarget.useHVXSglOps() && SizeInBits == 64 * 8) || + (Subtarget.useHVXDblOps() && SizeInBits == 128 * 8)) { + StridedLoadKind Pattern = isStridedLoad(Mask); if (Pattern == StridedLoadKind::NoPattern) return SDValue(); - SDValue Vec0 = Op.getOperand(0); - SDValue Vec1 = Op.getOperand(1); - SDValue StridePattern = DAG.getConstant(Pattern, dl, MVT::i32); - SDValue Ops[] = { Vec1, Vec0, StridePattern }; - return DAG.getNode(HexagonISD::VPACK, dl, VT, Ops); + unsigned Opc = Pattern == StridedLoadKind::Even ? HexagonISD::VPACKE + : HexagonISD::VPACKO; + return DAG.getNode(Opc, dl, VT, {Op.getOperand(1), Op.getOperand(0)}); } // We used to assert in the "else" part here, but that is bad for Halide // Halide creates intermediate double registers by interleaving two @@ -2512,13 +2397,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { if (VT.getSimpleVT() == MVT::v4i16) { switch (Op.getOpcode()) { case ISD::SRA: - Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); break; case ISD::SHL: - Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); break; case ISD::SRL: - Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); break; default: return SDValue(); @@ -2526,13 +2411,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { } else if (VT.getSimpleVT() == MVT::v2i32) { switch (Op.getOpcode()) { case ISD::SRA: - Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); break; case ISD::SHL: - Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); break; case ISD::SRL: - Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); break; default: return SDValue(); @@ -2556,19 +2441,26 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Size > 64) return SDValue(); - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; unsigned NElts = BVN->getNumOperands(); // Try to generate a SPLAT instruction. - if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) && - (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, true) && SplatBitSize <= 16)) { - unsigned SplatBits = APSplatBits.getZExtValue(); - int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >> - (32 - SplatBitSize)); - return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32)); + if (VT == MVT::v4i8 || VT == MVT::v4i16 || VT == MVT::v2i32) { + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, false)) { + if (SplatBitSize == VT.getVectorElementType().getSizeInBits()) { + unsigned ZV = APSplatBits.getZExtValue(); + assert(SplatBitSize <= 32 && "Can only handle up to i32"); + // Sign-extend the splat value from SplatBitSize to 32. + int32_t SV = SplatBitSize < 32 + ? int32_t(ZV << (32-SplatBitSize)) >> (32-SplatBitSize) + : int32_t(ZV); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, + DAG.getConstant(SV, dl, MVT::i32)); + } + } } // Try to generate COMBINE to build v2i32 vectors. @@ -2999,8 +2891,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); - // Custom lower some vector loads. - case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VSELECT: return LowerVSELECT(Op, DAG); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.h index 1415156487c07..d66cbc95e9188 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonISelLowering.h @@ -52,36 +52,18 @@ namespace HexagonISD { COMBINE, PACKHL, - VSPLATB, - VSPLATH, - SHUFFEB, - SHUFFEH, - SHUFFOB, - SHUFFOH, - VSXTBH, - VSXTBW, - VSRAW, - VSRAH, - VSRLW, - VSRLH, - VSHLW, - VSHLH, - VCMPBEQ, - VCMPBGT, - VCMPBGTU, - VCMPHEQ, - VCMPHGT, - VCMPHGTU, - VCMPWEQ, - VCMPWGT, - VCMPWGTU, + VSPLAT, + VASL, + VASR, + VLSR, INSERT, INSERTRP, EXTRACTU, EXTRACTURP, VCOMBINE, - VPACK, + VPACKE, + VPACKO, TC_RETURN, EH_RETURN, DCFETCH, @@ -183,7 +165,6 @@ namespace HexagonISD { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.cpp index 03794511414e8..c77c669f4ca75 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "HexagonInstrInfo.h" #include "Hexagon.h" #include "HexagonHazardRecognizer.h" -#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SmallPtrSet.h" @@ -57,9 +57,9 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRMAP_INFO -#include "HexagonGenInstrInfo.inc" -#include "HexagonGenDFAPacketizer.inc" #include "HexagonDepTimingClasses.h" +#include "HexagonGenDFAPacketizer.inc" +#include "HexagonGenInstrInfo.inc" cl::opt ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" @@ -94,10 +94,6 @@ static cl::opt UseDFAHazardRec("dfa-hazard-rec", /// /// Constants for Hexagon instructions. /// -const int Hexagon_MEMV_OFFSET_MAX_128B = 896; // #s4: -8*128...7*128 -const int Hexagon_MEMV_OFFSET_MIN_128B = -1024; // #s4 -const int Hexagon_MEMV_OFFSET_MAX = 448; // #s4: -8*64...7*64 -const int Hexagon_MEMV_OFFSET_MIN = -512; // #s4 const int Hexagon_MEMW_OFFSET_MAX = 4095; const int Hexagon_MEMW_OFFSET_MIN = -4096; const int Hexagon_MEMD_OFFSET_MAX = 8191; @@ -254,15 +250,19 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case Hexagon::L2_loadri_io: case Hexagon::L2_loadrd_io: case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vL32b_nt_ai: case Hexagon::V6_vL32b_ai_128B: + case Hexagon::V6_vL32b_nt_ai_128B: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::LDriw_pred: case Hexagon::LDriw_mod: case Hexagon::PS_vloadrq_ai: case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrw_nt_ai: case Hexagon::PS_vloadrq_ai_128B: - case Hexagon::PS_vloadrw_ai_128B: { + case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrw_nt_ai_128B: { const MachineOperand OpFI = MI.getOperand(1); if (!OpFI.isFI()) return 0; @@ -1254,13 +1254,19 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(2); const MachineOperand &Op3 = MI.getOperand(3); - LivePhysRegs LiveAtMI(&HRI); + LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); + unsigned PReg = Op1.getReg(); + assert(Op1.getSubReg() == 0); + unsigned PState = getRegState(Op1); + if (Op0.getReg() != Op2.getReg()) { + unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill + : PState; auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov)) .add(Op0) - .add(Op1) + .addReg(PReg, S) .add(Op2); if (IsDestLive) T.addReg(Op0.getReg(), RegState::Implicit); @@ -1269,7 +1275,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { if (Op0.getReg() != Op3.getReg()) { auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov)) .add(Op0) - .add(Op1) + .addReg(PReg, PState) .add(Op3); if (IsDestLive) T.addReg(Op0.getReg(), RegState::Implicit); @@ -1283,15 +1289,21 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineOperand &Op1 = MI.getOperand(1); MachineOperand &Op2 = MI.getOperand(2); MachineOperand &Op3 = MI.getOperand(3); - LivePhysRegs LiveAtMI(&HRI); + LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); + unsigned PReg = Op1.getReg(); + assert(Op1.getSubReg() == 0); + unsigned PState = getRegState(Op1); if (Op0.getReg() != Op2.getReg()) { + unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill + : PState; unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo); unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine)) .add(Op0) + .addReg(PReg, S) .add(Op1) .addReg(SrcHi) .addReg(SrcLo); @@ -1304,7 +1316,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine)) .add(Op0) - .add(Op1) + .addReg(PReg, PState) .addReg(SrcHi) .addReg(SrcLo); if (IsDestLive) @@ -1718,6 +1730,39 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, return false; } +std::pair +HexagonInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF & ~HexagonII::MO_Bitmasks, + TF & HexagonII::MO_Bitmasks); +} + +ArrayRef> +HexagonInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace HexagonII; + static const std::pair Flags[] = { + {MO_PCREL, "hexagon-pcrel"}, + {MO_GOT, "hexagon-got"}, + {MO_LO16, "hexagon-lo16"}, + {MO_HI16, "hexagon-hi16"}, + {MO_GPREL, "hexagon-gprel"}, + {MO_GDGOT, "hexagon-gdgot"}, + {MO_GDPLT, "hexagon-gdplt"}, + {MO_IE, "hexagon-ie"}, + {MO_IEGOT, "hexagon-iegot"}, + {MO_TPREL, "hexagon-tprel"} + }; + return makeArrayRef(Flags); +} + +ArrayRef> +HexagonInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { + using namespace HexagonII; + static const std::pair Flags[] = { + {HMOTF_ConstExtended, "hexagon-ext"} + }; + return makeArrayRef(Flags); +} + unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; @@ -1769,161 +1814,6 @@ bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const { return getType(MI) == HexagonII::TypeCJ && MI.isBranch(); } -bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { - return (MI.isBranch() && isPredicated(MI)) || - isConditionalTransfer(MI) || - isConditionalALU32(MI) || - isConditionalLoad(MI) || - // Predicated stores which don't have a .new on any operands. - (MI.mayStore() && isPredicated(MI) && !isNewValueStore(MI) && - !isPredicatedNew(MI)); -} - -bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - case Hexagon::A2_paddf: - case Hexagon::A2_paddfnew: - case Hexagon::A2_paddif: - case Hexagon::A2_paddifnew: - case Hexagon::A2_paddit: - case Hexagon::A2_padditnew: - case Hexagon::A2_paddt: - case Hexagon::A2_paddtnew: - case Hexagon::A2_pandf: - case Hexagon::A2_pandfnew: - case Hexagon::A2_pandt: - case Hexagon::A2_pandtnew: - case Hexagon::A2_porf: - case Hexagon::A2_porfnew: - case Hexagon::A2_port: - case Hexagon::A2_portnew: - case Hexagon::A2_psubf: - case Hexagon::A2_psubfnew: - case Hexagon::A2_psubt: - case Hexagon::A2_psubtnew: - case Hexagon::A2_pxorf: - case Hexagon::A2_pxorfnew: - case Hexagon::A2_pxort: - case Hexagon::A2_pxortnew: - case Hexagon::A4_paslhf: - case Hexagon::A4_paslhfnew: - case Hexagon::A4_paslht: - case Hexagon::A4_paslhtnew: - case Hexagon::A4_pasrhf: - case Hexagon::A4_pasrhfnew: - case Hexagon::A4_pasrht: - case Hexagon::A4_pasrhtnew: - case Hexagon::A4_psxtbf: - case Hexagon::A4_psxtbfnew: - case Hexagon::A4_psxtbt: - case Hexagon::A4_psxtbtnew: - case Hexagon::A4_psxthf: - case Hexagon::A4_psxthfnew: - case Hexagon::A4_psxtht: - case Hexagon::A4_psxthtnew: - case Hexagon::A4_pzxtbf: - case Hexagon::A4_pzxtbfnew: - case Hexagon::A4_pzxtbt: - case Hexagon::A4_pzxtbtnew: - case Hexagon::A4_pzxthf: - case Hexagon::A4_pzxthfnew: - case Hexagon::A4_pzxtht: - case Hexagon::A4_pzxthtnew: - case Hexagon::C2_ccombinewf: - case Hexagon::C2_ccombinewt: - return true; - } - return false; -} - -// FIXME - Function name and it's functionality don't match. -// It should be renamed to hasPredNewOpcode() -bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { - if (!MI.getDesc().mayLoad() || !isPredicated(MI)) - return false; - - int PNewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode()); - // Instruction with valid predicated-new opcode can be promoted to .new. - return PNewOpcode >= 0; -} - -// Returns true if an instruction is a conditional store. -// -// Note: It doesn't include conditional new-value stores as they can't be -// converted to .new predicate. -bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - default: return false; - case Hexagon::S4_storeirbt_io: - case Hexagon::S4_storeirbf_io: - case Hexagon::S4_pstorerbt_rr: - case Hexagon::S4_pstorerbf_rr: - case Hexagon::S2_pstorerbt_io: - case Hexagon::S2_pstorerbf_io: - case Hexagon::S2_pstorerbt_pi: - case Hexagon::S2_pstorerbf_pi: - case Hexagon::S2_pstorerdt_io: - case Hexagon::S2_pstorerdf_io: - case Hexagon::S4_pstorerdt_rr: - case Hexagon::S4_pstorerdf_rr: - case Hexagon::S2_pstorerdt_pi: - case Hexagon::S2_pstorerdf_pi: - case Hexagon::S2_pstorerht_io: - case Hexagon::S2_pstorerhf_io: - case Hexagon::S4_storeirht_io: - case Hexagon::S4_storeirhf_io: - case Hexagon::S4_pstorerht_rr: - case Hexagon::S4_pstorerhf_rr: - case Hexagon::S2_pstorerht_pi: - case Hexagon::S2_pstorerhf_pi: - case Hexagon::S2_pstorerit_io: - case Hexagon::S2_pstorerif_io: - case Hexagon::S4_storeirit_io: - case Hexagon::S4_storeirif_io: - case Hexagon::S4_pstorerit_rr: - case Hexagon::S4_pstorerif_rr: - case Hexagon::S2_pstorerit_pi: - case Hexagon::S2_pstorerif_pi: - - // V4 global address store before promoting to dot new. - case Hexagon::S4_pstorerdt_abs: - case Hexagon::S4_pstorerdf_abs: - case Hexagon::S4_pstorerbt_abs: - case Hexagon::S4_pstorerbf_abs: - case Hexagon::S4_pstorerht_abs: - case Hexagon::S4_pstorerhf_abs: - case Hexagon::S4_pstorerit_abs: - case Hexagon::S4_pstorerif_abs: - return true; - - // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded - // from the "Conditional Store" list. Because a predicated new value store - // would NOT be promoted to a double dot new store. - // This function returns yes for those stores that are predicated but not - // yet promoted to predicate dot new instructions. - } -} - -bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - case Hexagon::A2_tfrt: - case Hexagon::A2_tfrf: - case Hexagon::C2_cmoveit: - case Hexagon::C2_cmoveif: - case Hexagon::A2_tfrtnew: - case Hexagon::A2_tfrfnew: - case Hexagon::C2_cmovenewit: - case Hexagon::C2_cmovenewif: - case Hexagon::A2_tfrpt: - case Hexagon::A2_tfrpf: - return true; - - default: - return false; - } - return false; -} - // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle // isFPImm and later getFPImm as well. bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { @@ -1944,7 +1834,7 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { const MachineOperand &MO = MI.getOperand(ExtOpNum); // Use MO operand flags to determine if MO // has the HMOTF_ConstExtended flag set. - if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) return true; // If this is a Machine BB address we are talking about, and it is // not marked as extended, say so. @@ -1954,9 +1844,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { // We could be using an instruction with an extendable immediate and shoehorn // a global address into it. If it is a global address it will be constant // extended. We do this for COMBINE. - // We currently only handle isGlobal() because it is the only kind of - // object we are going to end up with here for now. - // In the future we probably should add isSymbol(), etc. if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || MO.isJTI() || MO.isCPI() || MO.isFPImm()) return true; @@ -2108,11 +1995,9 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const { return true; // Use MO operand flags to determine if one of MI's operands // has HMOTF_ConstExtended flag set. - for (MachineInstr::const_mop_iterator I = MI.operands_begin(), - E = MI.operands_end(); I != E; ++I) { - if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) + for (const MachineOperand &MO : MI.operands()) + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) return true; - } return false; } @@ -2592,25 +2477,31 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, switch (Opcode) { case Hexagon::PS_vstorerq_ai: case Hexagon::PS_vstorerw_ai: + case Hexagon::PS_vstorerw_nt_ai: case Hexagon::PS_vloadrq_ai: case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrw_nt_ai: case Hexagon::V6_vL32b_ai: case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vL32b_nt_ai: + case Hexagon::V6_vS32b_nt_ai: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vS32Ub_ai: - return (Offset >= Hexagon_MEMV_OFFSET_MIN) && - (Offset <= Hexagon_MEMV_OFFSET_MAX); + return isShiftedInt<4,6>(Offset); case Hexagon::PS_vstorerq_ai_128B: case Hexagon::PS_vstorerw_ai_128B: + case Hexagon::PS_vstorerw_nt_ai_128B: case Hexagon::PS_vloadrq_ai_128B: case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrw_nt_ai_128B: case Hexagon::V6_vL32b_ai_128B: case Hexagon::V6_vS32b_ai_128B: + case Hexagon::V6_vL32b_nt_ai_128B: + case Hexagon::V6_vS32b_nt_ai_128B: case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::V6_vS32Ub_ai_128B: - return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) && - (Offset <= Hexagon_MEMV_OFFSET_MAX_128B); + return isShiftedInt<4,7>(Offset); case Hexagon::J2_loop0i: case Hexagon::J2_loop1i: @@ -3319,11 +3210,19 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_cur_pi; case Hexagon::V6_vL32b_ai: return Hexagon::V6_vL32b_cur_ai; + case Hexagon::V6_vL32b_nt_pi: + return Hexagon::V6_vL32b_nt_cur_pi; + case Hexagon::V6_vL32b_nt_ai: + return Hexagon::V6_vL32b_nt_cur_ai; //128B case Hexagon::V6_vL32b_pi_128B: return Hexagon::V6_vL32b_cur_pi_128B; case Hexagon::V6_vL32b_ai_128B: return Hexagon::V6_vL32b_cur_ai_128B; + case Hexagon::V6_vL32b_nt_pi_128B: + return Hexagon::V6_vL32b_nt_cur_pi_128B; + case Hexagon::V6_vL32b_nt_ai_128B: + return Hexagon::V6_vL32b_nt_cur_ai_128B; } return 0; } @@ -3336,11 +3235,19 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_pi; case Hexagon::V6_vL32b_cur_ai: return Hexagon::V6_vL32b_ai; + case Hexagon::V6_vL32b_nt_cur_pi: + return Hexagon::V6_vL32b_nt_pi; + case Hexagon::V6_vL32b_nt_cur_ai: + return Hexagon::V6_vL32b_nt_ai; //128B case Hexagon::V6_vL32b_cur_pi_128B: return Hexagon::V6_vL32b_pi_128B; case Hexagon::V6_vL32b_cur_ai_128B: return Hexagon::V6_vL32b_ai_128B; + case Hexagon::V6_vL32b_nt_cur_pi_128B: + return Hexagon::V6_vL32b_nt_pi_128B; + case Hexagon::V6_vL32b_nt_cur_ai_128B: + return Hexagon::V6_vL32b_nt_ai_128B; } return 0; } @@ -3474,6 +3381,8 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. +// If MBPI is null, all edges will be treated as equally likely for the +// purposes of establishing a predication hint. int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const { // We assume that block can have at most two successors. @@ -3482,9 +3391,16 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, bool Taken = false; const BranchProbability OneHalf(1, 2); + auto getEdgeProbability = [MBPI] (const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) { + if (MBPI) + return MBPI->getEdgeProbability(Src, Dst); + return BranchProbability(1, Src->succ_size()); + }; + if (BrTarget.isMBB()) { const MachineBasicBlock *Dst = BrTarget.getMBB(); - Taken = MBPI->getEdgeProbability(Src, Dst) >= OneHalf; + Taken = getEdgeProbability(Src, Dst) >= OneHalf; } else { // The branch target is not a basic block (most likely a function). // Since BPI only gives probabilities for targets that are basic blocks, @@ -3521,7 +3437,7 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, for (const MachineBasicBlock *SB : B.successors()) { if (!B.isLayoutSuccessor(SB)) continue; - Taken = MBPI->getEdgeProbability(Src, SB) < OneHalf; + Taken = getEdgeProbability(Src, SB) < OneHalf; break; } } else { @@ -3534,7 +3450,7 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, BT = Op.getMBB(); break; } - Taken = BT && MBPI->getEdgeProbability(Src, BT) < OneHalf; + Taken = BT && getEdgeProbability(Src, BT) < OneHalf; } } // if (!Bad) } @@ -3565,9 +3481,7 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode()); if (NewOpcode >= 0) return NewOpcode; - - dbgs() << "Cannot convert to .new: " << getName(MI.getOpcode()) << '\n'; - llvm_unreachable(nullptr); + return 0; } int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.h index 97b9bc9546885..0436ce3ac475b 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonInstrInfo.h @@ -301,6 +301,27 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { const MachineInstr &UseMI, unsigned UseIdx) const override; + /// Decompose the machine operand's target flags into two values - the direct + /// target flag value and any of bit flags that are applied. + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + /// Return an array that contains the direct target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + + /// Return an array that contains the bitmask target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + ArrayRef> + getSerializableBitmaskMachineOperandTargetFlags() const override; + bool isTailCall(const MachineInstr &MI) const override; /// HexagonInstrInfo specifics. @@ -314,11 +335,6 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool isAccumulator(const MachineInstr &MI) const; bool isComplex(const MachineInstr &MI) const; bool isCompoundBranchInstr(const MachineInstr &MI) const; - bool isCondInst(const MachineInstr &MI) const; - bool isConditionalALU32 (const MachineInstr &MI) const; - bool isConditionalLoad(const MachineInstr &MI) const; - bool isConditionalStore(const MachineInstr &MI) const; - bool isConditionalTransfer(const MachineInstr &MI) const; bool isConstExtended(const MachineInstr &MI) const; bool isDeallocRet(const MachineInstr &MI) const; bool isDependent(const MachineInstr &ProdMI, diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonIntrinsics.td b/interpreter/llvm/src/lib/Target/Hexagon/HexagonIntrinsics.td index c611857ec26af..104a28654dd50 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonIntrinsics.td @@ -1366,6 +1366,18 @@ defm : MaskedStore ; defm : MaskedStore ; defm : MaskedStore ; +//******************************************************************* +// SYSTEM +//******************************************************************* + +def: T_R_pat; +def: T_R_pat; +def: T_R_pat; +def: T_R_pat; + +def: T_RR_pat; +def: T_RP_pat; + include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index e4df7ff5c2008..f82ad6cb3da6a 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -23,11 +23,11 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include #include @@ -59,6 +59,9 @@ cl::opt HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); +static cl::opt SimplifyLimit("hlir-simplify-limit", cl::init(10000), + cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); + static const char *HexagonVolatileMemcpyName = "hexagon_memcpy_forward_vp4cp4n2"; @@ -399,7 +402,7 @@ void Simplifier::Context::cleanup() { for (Value *V : Clones) { Instruction *U = cast(V); if (!U->getParent()) - delete U; + U->deleteValue(); } } @@ -477,7 +480,7 @@ Value *Simplifier::simplify(Context &C) { WorkListType Q; Q.push_back(C.Root); unsigned Count = 0; - const unsigned Limit = 100000; + const unsigned Limit = SimplifyLimit; while (!Q.empty()) { if (Count++ >= Limit) @@ -501,8 +504,7 @@ Value *Simplifier::simplify(Context &C) { Q.push_back(Op); } } - assert(Count < Limit && "Infinite loop in HLIR/simplify?"); - return C.Root; + return Count < Limit ? C.Root : nullptr; } @@ -1742,7 +1744,8 @@ bool PolynomialMultiplyRecognize::recognize() { // wide as the target's pmpy instruction. if (!promoteTypes(LoopB, ExitB)) return false; - convertShiftsToLeft(LoopB, ExitB, IterCount); + if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) + return false; cleanupLoopBody(LoopB); } diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 324108284a9a5..1a26805d190d0 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -49,7 +49,7 @@ static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace { class HexagonCallMutation : public ScheduleDAGMutation { @@ -563,40 +563,33 @@ void ConvergingVLIWScheduler::readyQueueVerboseDump( } #endif -/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); - if (!Pred.isScheduled) { - // We found an available, but not scheduled, predecessor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return nullptr; - OnlyAvailablePred = &Pred; - } +/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { + if (SU->NumPredsLeft == 0) + return false; + + for (auto &Pred : SU->Preds) { + // We found an available, but not scheduled, predecessor. + if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) + return false; } - return OnlyAvailablePred; + + return true; } -/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledSucc(SUnit *SU) { - SUnit *OnlyAvailableSucc = nullptr; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - SUnit &Succ = *I->getSUnit(); - if (!Succ.isScheduled) { - // We found an available, but not scheduled, successor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) - return nullptr; - OnlyAvailableSucc = &Succ; - } +/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { + if (SU->NumSuccsLeft == 0) + return false; + + for (auto &Succ : SU->Succs) { + // We found an available, but not scheduled, successor. + if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) + return false; } - return OnlyAvailableSucc; + return true; } // Constants used to denote relative importance of @@ -673,12 +666,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Count the number of nodes that // this node is the sole unscheduled node for. for (const SDep &SI : SU->Succs) - if (getSingleUnscheduledPred(SI.getSUnit()) == SU) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? for (const SDep &PI : SU->Preds) - if (getSingleUnscheduledSucc(PI.getSUnit()) == SU) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonNewValueJump.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonNewValueJump.cpp index d73fc7c73185d..e93f075f4ccd8 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -69,9 +69,7 @@ namespace { public: static char ID; - HexagonNewValueJump() : MachineFunctionPass(ID) { - initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry()); - } + HexagonNewValueJump() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); @@ -445,8 +443,6 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; - bool MO1IsKill = false; - bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; @@ -548,14 +544,10 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI.getOperand(1).getReg(); - if (MI.getOperand(1).isKill()) - MO1IsKill = true; - if (isSecondOpReg) { + if (isSecondOpReg) cmpOp2 = MI.getOperand(2).getReg(); - if (MI.getOperand(2).isKill()) - MO2IsKill = true; - } else + else cmpOp2 = MI.getOperand(2).getImm(); continue; } @@ -605,11 +597,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if ((COp == Hexagon::C2_cmpeq || COp == Hexagon::C4_cmpneq) && (feederReg == (unsigned) cmpOp2)) { unsigned tmp = cmpReg1; - bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; - MO1IsKill = MO2IsKill; cmpOp2 = tmp; - MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, @@ -623,31 +612,33 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // make sure we are respecting the kill values of // the operands of the feeder. - bool updatedIsKill = false; - for (unsigned i = 0; i < MI.getNumOperands(); i++) { - MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.isUse()) { - unsigned feederReg = MO.getReg(); - for (MachineBasicBlock::iterator localII = feederPos, - end = jmpPos; localII != end; localII++) { - MachineInstr &localMI = *localII; - for (unsigned j = 0; j < localMI.getNumOperands(); j++) { - MachineOperand &localMO = localMI.getOperand(j); - if (localMO.isReg() && localMO.isUse() && - localMO.isKill() && feederReg == localMO.getReg()) { - // We found that there is kill of a use register - // Set up a kill flag on the register - localMO.setIsKill(false); - MO.setIsKill(); - updatedIsKill = true; - break; - } + auto TransferKills = [jmpPos,cmpPos] (MachineInstr &MI) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned UseR = MO.getReg(); + for (auto I = std::next(MI.getIterator()); I != jmpPos; ++I) { + if (I == cmpPos) + continue; + for (MachineOperand &Op : I->operands()) { + if (!Op.isReg() || !Op.isUse() || !Op.isKill()) + continue; + if (Op.getReg() != UseR) + continue; + // We found that there is kill of a use register + // Set up a kill flag on the register + Op.setIsKill(false); + MO.setIsKill(true); + return; } - if (updatedIsKill) break; } } - if (updatedIsKill) break; - } + }; + + TransferKills(*feederPos); + TransferKills(*cmpPos); + bool MO1IsKill = cmpPos->killsRegister(cmpReg1, QRI); + bool MO2IsKill = isSecondOpReg && cmpPos->killsRegister(cmpOp2, QRI); MBB->splice(jmpPos, MI.getParent(), MI); MBB->splice(jmpPos, MI.getParent(), cmpInstr); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 27b40f134b1f4..374ffa3799b03 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -10,8 +10,6 @@ // load/store instructions. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "opt-addr-mode" - #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "MCTargetDesc/HexagonBaseInfo.h" @@ -36,6 +34,8 @@ #include #include +#define DEBUG_TYPE "opt-addr-mode" + static cl::opt CodeGrowthLimit("hexagon-amode-growth-limit", cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode " "optimization")); @@ -535,9 +535,9 @@ bool HexagonOptAddrMode::processBlock(NodeAddr BA) { !MI->getOperand(1).isGlobal()) continue; - DEBUG(dbgs() << "[Analyzing A2_tfrsi]: " << *MI << "\n"); - DEBUG(dbgs() << "\t[InstrNode]: " << Print>(IA, *DFG) - << "\n"); + DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode()) << "]: " + << *MI << "\n\t[InstrNode]: " + << Print>(IA, *DFG) << '\n'); NodeList UNodeList; getAllRealUses(SA, UNodeList); @@ -605,7 +605,9 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { const TargetOperandInfo TOI(*HII); DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI); - G.build(); + // Need to keep dead phis because we can propagate uses of registers into + // nodes dominated by those would-be phis. + G.build(BuildOptions::KeepDeadPhis); DFG = &G; Liveness L(MRI, *DFG); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonPatterns.td b/interpreter/llvm/src/lib/Target/Hexagon/HexagonPatterns.td index 81b5e10c11731..804a547d5b339 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonPatterns.td +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonPatterns.td @@ -1,3 +1,12 @@ +//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + // Pattern fragment that combines the value type and the register class // into a single parameter. @@ -345,7 +354,7 @@ def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; -def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), +def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1), (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; @@ -382,48 +391,47 @@ def: T_MType_acc_pat3 ; def: T_MType_acc_pat3 ; def: T_MType_acc_pat3 ; +// This complex pattern is really only to detect various forms of +// sign-extension i32->i64. The selected value will be of type i64 +// whose low word is the value being extended. The high word is +// unspecified. +def Usxtw : ComplexPattern; + def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; -def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>; def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; +def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; -// Return true if for a 32 to 64-bit sign-extended load. -def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{ - LoadSDNode *LD = dyn_cast(N); - if (!LD) - return false; - return LD->getExtensionType() == ISD::SEXTLOAD && - LD->getMemoryVT().getScalarType() == MVT::i32; -}]>; - -def: Pat<(mul (Aext64 I32:$src1), (Aext64 I32:$src2)), - (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(i32 (trunc (sra (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(i32 (trunc (srl (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(mul (Sext64 I32:$src1), (Sext64 I32:$src2)), - (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), + (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; -def: Pat<(mul Sext64Ld:$src1, Sext64Ld:$src2), - (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; +def: Pat<(mul Sext64:$Rs, Sext64:$Rt), + (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; // Multiply and accumulate, use full result. // Rxx[+-]=mpy(Rs,Rt) -def: Pat<(add I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), - (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(sub I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), - (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(add I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(add I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(sub I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(sub I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; class Storepi_pat @@ -545,7 +553,8 @@ def: Storexm_simple_pat; def: Storexm_simple_pat; def: Storexm_simple_pat; -def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>; +def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; +def: Pat <(i64 (sext_inreg I64:$src, i32)), (A2_sxtw (LoReg I64:$src))>; def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src), (A2_abs IntRegs:$src)>; @@ -679,6 +688,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs), defm: Storexm_pat; def: Storexm_simple_pat; +def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), + (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>; def: Pat<(sra I64:$src, u6_0ImmPred:$u6), (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; def: Pat<(srl I64:$src, u6_0ImmPred:$u6), @@ -791,27 +802,19 @@ def: Pat<(i64 (sext_inreg I64:$src1, i16)), def: Pat<(i64 (sext_inreg I64:$src1, i8)), (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; -// We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a J2_jumpf. -def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2), - bb:$offset)>; - -def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset), + (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset), + (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$Pu, bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$Pu, bb:$offset)>; // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)), - bb:$offset)>; +def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>; + // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. @@ -865,15 +868,13 @@ def: Pat<(i1 (setne I1:$src1, I1:$src2)), def: Pat<(i1 (setne I64:$src1, I64:$src2)), (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; -// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge I32:$src1, I32:$src2)), - (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>; +// rs >= rt -> rt <= rs +def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), + (C4_cmplte I32:$Rt, I32:$Rs)>; -// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) let AddedComplexity = 30 in -def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)), - (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; +def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), + (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -1159,8 +1160,8 @@ multiclass MinMax_pats_p { defm: T_MinMax_pats; } -def: Pat<(add (Sext64 I32:$Rs), I64:$Rt), - (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(add Sext64:$Rs, I64:$Rt), + (A2_addsp (LoReg Sext64:$Rs), DoubleRegs:$Rt)>; let AddedComplexity = 200 in { defm: MinMax_pats_p; @@ -1474,16 +1475,22 @@ def i32in8ImmPred: PatLeaf<(i32 imm), [{ return v == (int64_t)(int8_t)v; }]>; +class SmallStackStore + : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ + return isSmallStackStore(cast(N)); +}]>; let AddedComplexity = 40 in { // Even though the offset is not extendable in the store-immediate, we // can still generate the fi# in the base address. If the final offset // is not valid for the instruction, we will replace it with a scratch // register. -// def: Storexm_fi_pat ; -// def: Storexm_fi_pat ; -// def: Storexm_fi_pat ; + def: Storexm_fi_pat , s32_0ImmPred, + ToImmByte, S4_storeirb_io>; + def: Storexm_fi_pat , i16in8ImmPred, + ToImmHalf, S4_storeirh_io>; + def: Storexm_fi_pat , i32in8ImmPred, + ToImmWord, S4_storeiri_io>; // defm: Storexm_fi_add_pat ; @@ -1639,9 +1646,14 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>; def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; - +def: Pat<(add (mul I32:$Rs, I32:$Rt), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>; def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), @@ -2134,6 +2146,11 @@ let AddedComplexity = 30 in { def: Storea_pat; def: Storea_pat; def: Storea_pat; + def: Storea_pat; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; } let AddedComplexity = 30 in { @@ -2142,6 +2159,19 @@ let AddedComplexity = 30 in { def: Loada_pat; def: Loada_pat; def: Loada_pat; + def: Loada_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; } // Indexed store word - global address. @@ -2220,6 +2250,12 @@ def: Storea_pat, I32, addrgp, PS_storerhabs>; def: Storea_pat, I32, addrgp, PS_storeriabs>; def: Storea_pat, I64, addrgp, PS_storerdabs>; +// Prefer this pattern to S2_asl_i_p_or for the special case of joining +// two 32-bit words into a 64-bit word. +let AddedComplexity = 200 in +def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), + (A2_combinew I32:$a, I32:$b)>; + def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)), (i64 (zext (i32 (and I32:$a, (i32 65535)))))), (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))), @@ -2712,6 +2748,15 @@ def: Pat<(fneg F64:$Rs), (S2_togglebit_i (HiReg $Rs), 31), isub_hi, (i32 (LoReg $Rs)), isub_lo)>; +def: Pat<(mul I64:$Rss, I64:$Rtt), + (A2_combinew + (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), + (LoReg $Rss), + (HiReg $Rtt)), + (LoReg $Rtt), + (HiReg $Rss)), + (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>; + def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ return isAlignedMemNode(dyn_cast(N)); }]>; @@ -2731,6 +2776,9 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [ multiclass vS32b_ai_pats { // Aligned stores + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2739,6 +2787,9 @@ multiclass vS32b_ai_pats { Requires<[UseHVXSgl]>; // 128B Aligned stores + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; @@ -2748,6 +2799,11 @@ multiclass vS32b_ai_pats { // Fold Add R+OFF into vector store. let AddedComplexity = 10 in { + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, Iss4_6:$offset)), + (V6_vS32b_nt_ai IntRegs:$src2, Iss4_6:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), (add IntRegs:$src2, Iss4_6:$offset)), (V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset, @@ -2760,6 +2816,11 @@ multiclass vS32b_ai_pats { Requires<[UseHVXSgl]>; // Fold Add R+OFF into vector store 128B. + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, Iss4_7:$offset)), + (V6_vS32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), (add IntRegs:$src2, Iss4_7:$offset)), (V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset, @@ -2781,6 +2842,9 @@ defm : vS32b_ai_pats ; multiclass vL32b_ai_pats { // Aligned loads + def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; def : Pat < (VTSgl (alignedload IntRegs:$addr)), (V6_vL32b_ai IntRegs:$addr, 0) >, Requires<[UseHVXSgl]>; @@ -2789,6 +2853,9 @@ multiclass vL32b_ai_pats { Requires<[UseHVXSgl]>; // 128B Load + def : Pat < (VTDbl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; def : Pat < (VTDbl (alignedload IntRegs:$addr)), (V6_vL32b_ai_128B IntRegs:$addr, 0) >, Requires<[UseHVXDbl]>; @@ -2798,6 +2865,9 @@ multiclass vL32b_ai_pats { // Fold Add R+OFF into vector load. let AddedComplexity = 10 in { + def : Pat<(VTDbl (alignednontemporalload (add IntRegs:$src2, Iss4_7:$offset))), + (V6_vL32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset)>, + Requires<[UseHVXDbl]>; def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))), (V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; @@ -2805,6 +2875,9 @@ multiclass vL32b_ai_pats { (V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; + def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, Iss4_6:$offset))), + (V6_vL32b_nt_ai IntRegs:$src2, Iss4_6:$offset)>, + Requires<[UseHVXSgl]>; def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))), (V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>, Requires<[UseHVXSgl]>; @@ -2820,6 +2893,9 @@ defm : vL32b_ai_pats ; defm : vL32b_ai_pats ; multiclass STrivv_pats { + def : Pat<(alignednontemporalstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2827,6 +2903,10 @@ multiclass STrivv_pats { (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; + def : Pat<(alignednontemporalstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), (PS_vstorerw_ai_128B IntRegs:$addr, 0, (VTDbl VecDblRegs128B:$src1))>, @@ -2843,6 +2923,9 @@ defm : STrivv_pats ; defm : STrivv_pats ; multiclass LDrivv_pats { + def : Pat<(VTSgl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; def : Pat<(VTSgl (alignedload I32:$addr)), (PS_vloadrw_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; @@ -2850,6 +2933,9 @@ multiclass LDrivv_pats { (PS_vloadrwu_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; + def : Pat<(VTDbl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; def : Pat<(VTDbl (alignedload I32:$addr)), (PS_vloadrw_ai_128B I32:$addr, 0)>, Requires<[UseHVXDbl]>; @@ -2891,45 +2977,40 @@ def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, Requires<[UseHVXDbl]>; -def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>, - SDTCisInt<3>]>; - -def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>; - -// 0 as the last argument denotes vpacke. 1 denotes vpacko -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 0))), - (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 1))), - (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 0))), - (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 1))), - (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; - -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 1))), - (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 1))), - (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; +def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; + +def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; +def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; + +let Predicates = [UseHVXSgl] in { + def: Pat<(v64i8 (HexagonVPACKE (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt))), + (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v64i8 (HexagonVPACKO (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt))), + (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v32i16 (HexagonVPACKE (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt))), + (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v32i16 (HexagonVPACKO (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt))), + (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>; +} + +let Predicates = [UseHVXDbl] in { + def: Pat<(v128i8 (HexagonVPACKE (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt))), + (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v128i8 (HexagonVPACKO (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt))), + (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v64i16 (HexagonVPACKE (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt))), + (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v64i16 (HexagonVPACKO (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt))), + (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; +} def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; @@ -2982,16 +3063,20 @@ def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; -def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; -def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; +def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; // Replicate the low 8-bits from 32-bits input register into each of the // four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; +def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; // Replicate the low 16-bits from 32-bits input register into each of the // four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; +def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + +def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), + (A2_combineii imm:$s8, imm:$s8)>; +def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (A2_combinew I32:$Rs, I32:$Rs)>; class VArith_pat @@ -3019,94 +3104,51 @@ def: VArith_pat ; def: VArith_pat ; def: VArith_pat ; -def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_asr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_lsr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_asl_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asl_i_vh V4I16:$b, imm:$c)>; -def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; -def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; +def SDTHexagonVShift + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; -def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; +def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; +def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; -def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)), +def: Pat<(v2i32 (HexagonVASL V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVASL V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVASR V2I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)), +def: Pat<(v4i16 (HexagonVASR V4I16:$Rs, u4_0ImmPred:$u4)), (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)), +def: Pat<(v2i32 (HexagonVLSR V2I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)), +def: Pat<(v4i16 (HexagonVLSR V4I16:$Rs, u4_0ImmPred:$u4)), (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; class vshift_rr_pat : Pat <(Op Value:$Rs, I32:$Rt), (MI Value:$Rs, I32:$Rt)>; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; - - -def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; -def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; -def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; - -def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; - - -class vcmp_i1_pat - : Pat <(i1 (Op Value:$Rs, Value:$Rt)), - (MI Value:$Rs, Value:$Rt)>; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; class vcmp_vi1_pat @@ -3216,13 +3258,6 @@ def: Pat<(v4i8 (trunc V4I16:$Rs)), def: Pat<(v2i16 (trunc V2I32:$Rs)), (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - -def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; -def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; - -def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; -def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; - def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; @@ -3283,31 +3318,6 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; -def SDTHexagonBinOp64 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; - -def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; -def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; -def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; -def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; - -class ShufflePat - : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), - (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b -def: ShufflePat; - -// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b -def: ShufflePat; - -// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h -def: ShufflePat; - -// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h -def: ShufflePat; - - // Truncated store from v4i16 to v4i8. def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr), diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonPeephole.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonPeephole.cpp index ee3209354688d..7d961a238ae28 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonPeephole.cpp @@ -100,9 +100,6 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } - - private: - void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src); }; } @@ -132,7 +129,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { PeepholeDoubleRegsMap.clear(); // Traverse the basic block. - for (MachineInstr &MI : *MBB) { + for (auto I = MBB->begin(), E = MBB->end(), NextI = I; I != E; I = NextI) { + NextI = std::next(I); + MachineInstr &MI = *I; // Look for sign extends: // %vreg170 = SXTW %vreg166 if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) { @@ -280,14 +279,13 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (NewOp) { unsigned PSrc = MI.getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { - MI.getOperand(PR).setReg(POrig); + BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(), + QII->get(NewOp), MI.getOperand(0).getReg()) + .addReg(POrig) + .add(MI.getOperand(S2)) + .add(MI.getOperand(S1)); MRI->clearKillFlags(POrig); - MI.setDesc(QII->get(NewOp)); - // Swap operands S1 and S2. - MachineOperand Op1 = MI.getOperand(S1); - MachineOperand Op2 = MI.getOperand(S2); - ChangeOpInto(MI.getOperand(S1), Op2); - ChangeOpInto(MI.getOperand(S2), Op1); + MI.eraseFromParent(); } } // if (NewOp) } // if (!Done) @@ -299,40 +297,6 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { return true; } -void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { - assert (&Dst != &Src && "Cannot duplicate into itself"); - switch (Dst.getType()) { - case MachineOperand::MO_Register: - if (Src.isReg()) { - Dst.setReg(Src.getReg()); - Dst.setSubReg(Src.getSubReg()); - MRI->clearKillFlags(Src.getReg()); - } else if (Src.isImm()) { - Dst.ChangeToImmediate(Src.getImm()); - } else { - llvm_unreachable("Unexpected src operand type"); - } - break; - - case MachineOperand::MO_Immediate: - if (Src.isImm()) { - Dst.setImm(Src.getImm()); - } else if (Src.isReg()) { - Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(), - false, Src.isDead(), Src.isUndef(), - Src.isDebug()); - Dst.setSubReg(Src.getSubReg()); - } else { - llvm_unreachable("Unexpected src operand type"); - } - break; - - default: - llvm_unreachable("Unexpected dst operand type"); - break; - } -} - FunctionPass *llvm::createHexagonPeephole() { return new HexagonPeephole(); } diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonPseudo.td b/interpreter/llvm/src/lib/Target/Hexagon/HexagonPseudo.td index 0f99dfe342b80..b42c1ab975a80 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonPseudo.td +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonPseudo.td @@ -407,11 +407,25 @@ def PS_vstorerw_ai: STrivv_template, def PS_vstorerw_ai_128B: STrivv_template, Requires<[HasV60T,UseHVXDbl]>; +def PS_vstorerw_nt_ai: STrivv_template, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vstorerw_nt_ai_128B: STrivv_template, + Requires<[HasV60T,UseHVXDbl]>; + def PS_vstorerwu_ai: STrivv_template, Requires<[HasV60T,UseHVXSgl]>; def PS_vstorerwu_ai_128B: STrivv_template, Requires<[HasV60T,UseHVXDbl]>; +let isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0 in { + def PS_vstorerq_ai: Pseudo<(outs), + (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs:$Qt), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vstorerq_ai_128B: Pseudo<(outs), + (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs128B:$Qt), "", []>, + Requires<[HasV60T,UseHVXDbl]>; +} + // Vector load pseudos let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in @@ -424,35 +438,26 @@ def PS_vloadrw_ai: LDrivv_template, def PS_vloadrw_ai_128B: LDrivv_template, Requires<[HasV60T,UseHVXDbl]>; +def PS_vloadrw_nt_ai: LDrivv_template, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vloadrw_nt_ai_128B: LDrivv_template, + Requires<[HasV60T,UseHVXDbl]>; + def PS_vloadrwu_ai: LDrivv_template, Requires<[HasV60T,UseHVXSgl]>; def PS_vloadrwu_ai_128B: LDrivv_template, Requires<[HasV60T,UseHVXDbl]>; -// Store vector predicate pseudo. -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, - isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { - def PS_vstorerq_ai : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXSgl]>; - - def PS_vstorerq_ai_128B : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VectorRegs:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXSgl]>; - - def PS_vloadrq_ai : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXDbl]>; - - def PS_vloadrq_ai_128B : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXDbl]>; +let isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in { + def PS_vloadrq_ai: Pseudo<(outs VecPredRegs:$Qd), + (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vloadrq_ai_128B: Pseudo<(outs VecPredRegs128B:$Qd), + (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>, + Requires<[HasV60T,UseHVXDbl]>; } + let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in class VSELInst : InstHexagon; diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2a1bb63af7892..1fc157900ed5d 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -50,11 +50,6 @@ bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const { R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1; } -bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const { - return Hexagon::R16 <= Reg && Reg <= Hexagon::R27; -} - - const MCPhysReg * HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF, const TargetRegisterClass *RC) const { diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.h index 8a3f175b84881..5f65fad2cc042 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -77,7 +77,6 @@ class HexagonRegisterInfo : public HexagonGenRegisterInfo { unsigned getFirstCallerSavedNonParamReg() const; bool isEHReturnCalleeSaveReg(unsigned Reg) const; - bool isCalleeSaveReg(unsigned Reg) const; }; } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonSplitDouble.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonSplitDouble.cpp index 471e32221b292..4fa929a20810a 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -13,8 +13,8 @@ #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -350,6 +350,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { MI->getOperand(2).getImm()); case Hexagon::A4_combineri: ImmX++; + // Fall through into A4_combineir. + LLVM_FALLTHROUGH; case Hexagon::A4_combineir: { ImmX++; int64_t V = MI->getOperand(ImmX).getImm(); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.cpp index 8851a23ae8ace..0aada8a53c979 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -1,4 +1,4 @@ -//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===// +//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===// // // The LLVM Compiler Infrastructure // @@ -11,13 +11,23 @@ // //===----------------------------------------------------------------------===// -#include "HexagonSubtarget.h" #include "Hexagon.h" +#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include +#include #include using namespace llvm; @@ -119,9 +129,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering() { - + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { initializeEnvironment(); // Initialize scheduling itinerary for the specified CPU. @@ -196,7 +204,6 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, updateLatency(*SrcInst, *DstInst, Dep); } - void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { for (auto &SU : DAG->SUnits) { if (!SU.isInstr()) @@ -240,18 +247,18 @@ void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { } } - void HexagonSubtarget::getPostRAMutations( - std::vector> &Mutations) const { - Mutations.push_back(make_unique()); + std::vector> &Mutations) const { + Mutations.push_back( + llvm::make_unique()); } void HexagonSubtarget::getSMSMutations( - std::vector> &Mutations) const { - Mutations.push_back(make_unique()); + std::vector> &Mutations) const { + Mutations.push_back( + llvm::make_unique()); } - // Pin the vtable to this file. void HexagonSubtarget::anchor() {} @@ -447,4 +454,3 @@ unsigned HexagonSubtarget::getL1PrefetchDistance() const { bool HexagonSubtarget::enableSubRegLiveness() const { return EnableSubregLiveness; } - diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.h index 4379efa79c9cd..753dca0000652 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonSubtarget.h @@ -1,4 +1,4 @@ -//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===// +//===- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,12 +15,17 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H #include "HexagonFrameLowering.h" -#include "HexagonISelLowering.h" #include "HexagonInstrInfo.h" +#include "HexagonISelLowering.h" #include "HexagonSelectionDAGInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include #include +#include #define GET_SUBTARGETINFO_HEADER #include "HexagonGenSubtargetInfo.inc" @@ -30,6 +35,12 @@ namespace llvm { +class MachineInstr; +class SDep; +class SUnit; +class TargetMachine; +class Triple; + class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); @@ -57,6 +68,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; InstrItineraryData InstrItins; + void initializeEnvironment(); public: @@ -108,6 +120,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; + // Always use the TargetLowering default scheduler. // FIXME: This will use the vliw scheduler which is probably just hurting // compiler time and will be removed eventually anyway. @@ -124,6 +137,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { unsigned getSmallDataThreshold() const { return Hexagon_SMALL_DATA_THRESHOLD; } + const HexagonArchEnum &getHexagonArchVersion() const { return HexagonArchVersion; } @@ -155,4 +169,4 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetMachine.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetMachine.cpp index 6913d50bbcaab..7d88b51f32dd3 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -23,8 +23,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -110,8 +110,11 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", namespace llvm { extern char &HexagonExpandCondsetsID; void initializeHexagonExpandCondsetsPass(PassRegistry&); + void initializeHexagonGenMuxPass(PassRegistry&); void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); + void initializeHexagonNewValueJumpPass(PassRegistry&); void initializeHexagonOptAddrModePass(PassRegistry&); + void initializeHexagonPacketizerPass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); FunctionPass *createHexagonBitSimplify(); @@ -152,8 +155,13 @@ static Reloc::Model getEffectiveRelocModel(Optional RM) { extern "C" void LLVMInitializeHexagonTarget() { // Register the target. RegisterTargetMachine X(getTheHexagonTarget()); - initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - initializeHexagonOptAddrModePass(*PassRegistry::getPassRegistry()); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonGenMuxPass(PR); + initializeHexagonLoopIdiomRecognizePass(PR); + initializeHexagonNewValueJumpPass(PR); + initializeHexagonOptAddrModePass(PR); + initializeHexagonPacketizerPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, @@ -223,7 +231,7 @@ namespace { /// Hexagon Code Generator Pass Configuration Options. class HexagonPassConfig : public TargetPassConfig { public: - HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) + HexagonPassConfig(HexagonTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} HexagonTargetMachine &getHexagonTargetMachine() const { @@ -245,14 +253,14 @@ class HexagonPassConfig : public TargetPassConfig { } // namespace TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { - return new HexagonPassConfig(this, PM); + return new HexagonPassConfig(*this, PM); } void HexagonPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); if (!NoOpt) { if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); @@ -276,27 +284,26 @@ bool HexagonPassConfig::addInstSelector() { if (!NoOpt) { // Create logical operations on predicate registers. if (EnableGenPred) - addPass(createHexagonGenPredicate(), false); + addPass(createHexagonGenPredicate()); // Rotate loops to expose bit-simplification opportunities. if (EnableLoopResched) - addPass(createHexagonLoopRescheduling(), false); + addPass(createHexagonLoopRescheduling()); // Split double registers. if (!DisableHSDR) addPass(createHexagonSplitDoubleRegs()); // Bit simplification. if (EnableBitSimplify) - addPass(createHexagonBitSimplify(), false); + addPass(createHexagonBitSimplify()); addPass(createHexagonPeephole()); - printAndVerify("After hexagon peephole pass"); // Constant propagation. if (!DisableHCP) { - addPass(createHexagonConstPropagationPass(), false); - addPass(&UnreachableMachineBlockElimID, false); + addPass(createHexagonConstPropagationPass()); + addPass(&UnreachableMachineBlockElimID); } if (EnableGenInsert) - addPass(createHexagonGenInsert(), false); + addPass(createHexagonGenInsert()); if (EnableEarlyIf) - addPass(createHexagonEarlyIfConversion(), false); + addPass(createHexagonEarlyIfConversion()); } return false; @@ -307,9 +314,9 @@ void HexagonPassConfig::addPreRegAlloc() { if (EnableExpandCondsets) insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID); if (!DisableStoreWidening) - addPass(createHexagonStoreWidening(), false); + addPass(createHexagonStoreWidening()); if (!DisableHardwareLoops) - addPass(createHexagonHardwareLoops(), false); + addPass(createHexagonHardwareLoops()); } if (TM->getOptLevel() >= CodeGenOpt::Default) addPass(&MachinePipelinerID); @@ -320,16 +327,16 @@ void HexagonPassConfig::addPostRegAlloc() { if (EnableRDFOpt) addPass(createHexagonRDFOpt()); if (!DisableHexagonCFGOpt) - addPass(createHexagonCFGOptimizer(), false); + addPass(createHexagonCFGOptimizer()); if (!DisableAModeOpt) - addPass(createHexagonOptAddrMode(), false); + addPass(createHexagonOptAddrMode()); } } void HexagonPassConfig::addPreSched2() { - addPass(createHexagonCopyToCombine(), false); + addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) - addPass(&IfConverterID, false); + addPass(&IfConverterID); addPass(createHexagonSplitConst32AndConst64()); } @@ -337,17 +344,17 @@ void HexagonPassConfig::addPreEmitPass() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) - addPass(createHexagonNewValueJump(), false); + addPass(createHexagonNewValueJump()); - addPass(createHexagonBranchRelaxation(), false); + addPass(createHexagonBranchRelaxation()); // Create Packets. if (!NoOpt) { if (!DisableHardwareLoops) - addPass(createHexagonFixupHwLoops(), false); + addPass(createHexagonFixupHwLoops()); // Generate MUX from pairs of conditional transfers. if (EnableGenMux) - addPass(createHexagonGenMux(), false); + addPass(createHexagonGenMux()); addPass(createHexagonPacketizer(), false); } diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index c9c4f95dbaaa5..ea86c9c42f478 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalObject.h" @@ -28,7 +29,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -49,6 +49,14 @@ static cl::opt TraceGVPlacement("trace-gv-placement", cl::Hidden, cl::init(false), cl::desc("Trace global value placement")); +static cl::opt + EmitJtInText("hexagon-emit-jt-text", cl::Hidden, cl::init(false), + cl::desc("Emit hexagon jump tables in function section")); + +static cl::opt + EmitLutInText("hexagon-emit-lut-text", cl::Hidden, cl::init(false), + cl::desc("Emit hexagon lookup tables in function section")); + // TraceGVPlacement controls messages for all builds. For builds with assertions // (debug or release), messages are also controlled by the usual debug flags // (e.g. -debug and -debug-only=globallayout) @@ -132,6 +140,13 @@ MCSection *HexagonTargetObjectFile::SelectSectionForGlobal( << (Kind.isBSS() ? "kind_bss " : "" ) << (Kind.isBSSLocal() ? "kind_bss_local " : "" )); + // If the lookup table is used by more than one function, do not place + // it in text section. + if (EmitLutInText && GO->getName().startswith("switch.table")) { + if (const Function *Fn = getLutUsedFunction(GO)) + return selectSectionForLookupTable(GO, TM, Fn); + } + if (isGlobalInSmallSection(GO, TM)) return selectSmallSectionForGlobal(GO, Kind, TM); @@ -256,6 +271,11 @@ unsigned HexagonTargetObjectFile::getSmallDataSize() const { return SmallDataThreshold; } +bool HexagonTargetObjectFile::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + return EmitJtInText; +} + /// Descends any type down to "elementary" components, /// discovering the smallest addressable one. /// If zero is returned, declaration will not be modified. @@ -393,3 +413,39 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( // Otherwise, we work the same as ELF. return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM); } + +// Return the function that uses the lookup table. If there are more +// than one live function that uses this look table, bail out and place +// the lookup table in default section. +const Function * +HexagonTargetObjectFile::getLutUsedFunction(const GlobalObject *GO) const { + const Function *ReturnFn = nullptr; + for (auto U : GO->users()) { + // validate each instance of user to be a live function. + auto *I = dyn_cast(U); + if (!I) + continue; + auto *Bb = I->getParent(); + if (!Bb) + continue; + auto *UserFn = Bb->getParent(); + if (!ReturnFn) + ReturnFn = UserFn; + else if (ReturnFn != UserFn) + return nullptr; + } + return ReturnFn; +} + +MCSection *HexagonTargetObjectFile::selectSectionForLookupTable( + const GlobalObject *GO, const TargetMachine &TM, const Function *Fn) const { + + SectionKind Kind = SectionKind::getText(); + // If the function has explicit section, place the lookup table in this + // explicit section. + if (Fn->hasSection()) + return getExplicitSectionGlobal(Fn, Kind, TM); + + const auto *FuncObj = dyn_cast(Fn); + return SelectSectionForGlobal(FuncObj, Kind, TM); +} diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.h index 58dff2b95e199..eff44f097e03f 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -33,6 +33,11 @@ namespace llvm { unsigned getSmallDataSize() const; + bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, + const Function &F) const override; + + const Function *getLutUsedFunction(const GlobalObject *GO) const; + private: MCSectionELF *SmallDataSection; MCSectionELF *SmallBSSSection; @@ -43,6 +48,10 @@ namespace llvm { MCSection *selectSmallSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const; + + MCSection *selectSectionForLookupTable(const GlobalObject *GO, + const TargetMachine &TM, + const Function *Fn) const; }; } // namespace llvm diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index d578bfab3658b..aac810e29fe98 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -21,6 +21,10 @@ using namespace llvm; #define DEBUG_TYPE "hexagontti" +static cl::opt EmitLookupTables("hexagon-emit-lookup-tables", + cl::init(true), cl::Hidden, + cl::desc("Control lookup table emission on Hexagon target")); + TargetTransformInfo::PopcntSupportKind HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { // Return Fast Hardware support as every input < 64 bits will be promoted @@ -29,7 +33,7 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { } // The Hexagon target can unroll loops with run-time trip counts. -void HexagonTTIImpl::getUnrollingPreferences(Loop *L, +void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Runtime = UP.Partial = true; } @@ -46,8 +50,9 @@ unsigned HexagonTTIImpl::getCacheLineSize() const { return getST()->getL1CacheLineSize(); } -int HexagonTTIImpl::getUserCost(const User *U) { - auto isCastFoldedIntoLoad = [] (const CastInst *CI) -> bool { +int HexagonTTIImpl::getUserCost(const User *U, + ArrayRef Operands) { + auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; const LoadInst *LI = dyn_cast(CI->getOperand(0)); @@ -67,5 +72,9 @@ int HexagonTTIImpl::getUserCost(const User *U) { if (const CastInst *CI = dyn_cast(U)) if (isCastFoldedIntoLoad(CI)) return TargetTransformInfo::TCC_Free; - return BaseT::getUserCost(U); + return BaseT::getUserCost(U, Operands); +} + +bool HexagonTTIImpl::shouldBuildLookupTables() const { + return EmitLookupTables; } diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 8414bfc4e197f..ab5a6e07d8736 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -46,7 +46,8 @@ class HexagonTTIImpl : public BasicTTIImplBase { TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; // The Hexagon target can unroll loops with run-time trip counts. - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); // L1 cache prefetch. unsigned getPrefetchDistance() const; @@ -61,7 +62,10 @@ class HexagonTTIImpl : public BasicTTIImplBase { /// @} - int getUserCost(const User *U); + int getUserCost(const User *U, ArrayRef Operands); + + // Hexagon specific decision to generate a lookup table. + bool shouldBuildLookupTables() const; }; } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/interpreter/llvm/src/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c21b6e2515d31..a3021e3dfe432 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,10 +16,10 @@ // prune the dependence. // //===----------------------------------------------------------------------===// +#include "HexagonVLIWPacketizer.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "HexagonVLIWPacketizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -60,9 +60,7 @@ namespace { class HexagonPacketizer : public MachineFunctionPass { public: static char ID; - HexagonPacketizer() : MachineFunctionPass(ID) { - initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); - } + HexagonPacketizer() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -89,14 +87,14 @@ namespace { char HexagonPacketizer::ID = 0; } -INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", - false, false) +INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer", + "Hexagon Packetizer", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", - false, false) +INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer", + "Hexagon Packetizer", false, false) HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, @@ -214,12 +212,12 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { for (auto &MB : MF) { auto Begin = MB.begin(), End = MB.end(); while (Begin != End) { - // First the first non-boundary starting from the end of the last + // Find the first non-boundary starting from the end of the last // scheduling region. MachineBasicBlock::iterator RB = Begin; while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF)) ++RB; - // First the first boundary starting from the beginning of the new + // Find the first boundary starting from the beginning of the new // region. MachineBasicBlock::iterator RE = RB; while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF)) @@ -273,25 +271,17 @@ bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI, if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) return true; - // Check if this is a predicate dependence. - const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); - if (RC == &Hexagon::PredRegsRegClass) - return true; - - // Assumes that the first operand of the CALLr is the function address. - if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { - const MachineOperand MO = MI.getOperand(0); - if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) - return true; + // Call-like instructions can be packetized with preceding instructions + // that define registers implicitly used or modified by the call. Explicit + // uses are still prohibited, as in the case of indirect calls: + // r0 = ... + // J2_jumpr r0 + if (DepType == SDep::Data) { + for (const MachineOperand MO : MI.operands()) + if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit()) + return true; } - if (HII->isJumpR(MI)) { - const MachineOperand &MO = HII->isPredicated(MI) ? MI.getOperand(1) - : MI.getOperand(0); - assert(MO.isReg() && MO.isUse()); - if (MO.getReg() == DepReg) - return true; - } return false; } @@ -333,11 +323,13 @@ bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC) { // Vector stores can be predicated, and can be new-value stores, but // they cannot be predicated on a .new predicate value. - if (NewRC == &Hexagon::PredRegsRegClass) + if (NewRC == &Hexagon::PredRegsRegClass) { if (HII->isHVXVec(MI) && MI.mayStore()) return false; - return HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn() || - HII->mayBeNewStore(MI); + return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, nullptr) > 0; + } + // If the class is not PredRegs, it could only apply to new-value stores. + return HII->mayBeNewStore(MI); } // Promote an instructiont to its .cur form. @@ -760,11 +752,14 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI, return false; } -static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) { +static bool isImplicitDependency(const MachineInstr &I, bool CheckDef, + unsigned DepReg) { for (auto &MO : I.operands()) { - if (MO.isRegMask() && MO.clobbersPhysReg(DepReg)) + if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(DepReg)) return true; - if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) + if (!MO.isReg() || MO.getReg() != DepReg || !MO.isImplicit()) + continue; + if (CheckDef == MO.isDef()) return true; } return false; @@ -798,7 +793,8 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, // If dependency is trough an implicitly defined register, we should not // newify the use. - if (isImplicitDependency(PI, DepReg)) + if (isImplicitDependency(PI, true, DepReg) || + isImplicitDependency(MI, false, DepReg)) return false; const MCInstrDesc& MCID = PI.getDesc(); @@ -808,8 +804,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, // predicate .new if (RC == &Hexagon::PredRegsRegClass) - if (HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn()) - return HII->predCanBeUsedAsDotNew(PI, DepReg); + return HII->predCanBeUsedAsDotNew(PI, DepReg); if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) return false; diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 904403543e186..2a0edda8dcee8 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -12,9 +12,9 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCCodeEmitter.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -199,13 +199,8 @@ class HexagonAsmBackend : public MCAsmBackend { return Infos[Kind - FirstTargetFixupKind]; } - /// processFixupValue - Target hook to adjust the literal value of a fixup - /// if necessary. IsResolved signals whether the caller believes a relocation - /// is needed; the target can modify the value. The default does nothing. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override { + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override { MCFixupKind Kind = Fixup.getKind(); switch((unsigned)Kind) { @@ -301,8 +296,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_LD_PLT_B22_PCREL_X: case fixup_Hexagon_LD_PLT_B32_PCREL_X: // These relocations should always have a relocation recorded - IsResolved = false; - return; + return true; case fixup_Hexagon_B22_PCREL: //IsResolved = false; @@ -319,7 +313,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B7_PCREL: case fixup_Hexagon_B7_PCREL_X: if (DisableFixup) - IsResolved = false; + return true; break; case FK_Data_1: @@ -328,8 +322,9 @@ class HexagonAsmBackend : public MCAsmBackend { case FK_PCRel_4: case fixup_Hexagon_32: // Leave these relocations alone as they are used for EH. - return; + return false; } + return false; } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -415,9 +410,9 @@ class HexagonAsmBackend : public MCAsmBackend { /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t FixupValue, bool IsPCRel, - MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t FixupValue, bool IsResolved) const override { // When FixupValue is 0 the relocation is external and there // is nothing for us to do. @@ -432,8 +427,8 @@ class HexagonAsmBackend : public MCAsmBackend { // to a real offset before we can use it. uint32_t Offset = Fixup.getOffset(); unsigned NumBytes = getFixupKindNumBytes(Kind); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - char *InstAddr = Data + Offset; + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + char *InstAddr = Data.data() + Offset; Value = adjustFixupValue(Kind, FixupValue); if(!Value) @@ -447,6 +442,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B7_PCREL: if (!(isIntN(7, sValue))) HandleFixupError(7, 2, (int64_t)FixupValue, "B7_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B7_PCREL_X: InstMask = 0x00001f18; // Word32_B7 Reloc = (((Value >> 2) & 0x1f) << 8) | // Value 6-2 = Target 12-8 @@ -456,6 +452,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B9_PCREL: if (!(isIntN(9, sValue))) HandleFixupError(9, 2, (int64_t)FixupValue, "B9_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B9_PCREL_X: InstMask = 0x003000fe; // Word32_B9 Reloc = (((Value >> 7) & 0x3) << 20) | // Value 8-7 = Target 21-20 @@ -467,6 +464,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B13_PCREL: if (!(isIntN(13, sValue))) HandleFixupError(13, 2, (int64_t)FixupValue, "B13_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B13_PCREL_X: InstMask = 0x00202ffe; // Word32_B13 Reloc = (((Value >> 12) & 0x1) << 21) | // Value 12 = Target 21 @@ -477,6 +475,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B15_PCREL: if (!(isIntN(15, sValue))) HandleFixupError(15, 2, (int64_t)FixupValue, "B15_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B15_PCREL_X: InstMask = 0x00df20fe; // Word32_B15 Reloc = (((Value >> 13) & 0x3) << 22) | // Value 14-13 = Target 23-22 @@ -488,6 +487,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B22_PCREL: if (!(isIntN(22, sValue))) HandleFixupError(22, 2, (int64_t)FixupValue, "B22_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B22_PCREL_X: InstMask = 0x01ff3ffe; // Word32_B22 Reloc = (((Value >> 13) & 0x1ff) << 16) | // Value 21-13 = Target 24-16 @@ -517,7 +517,7 @@ class HexagonAsmBackend : public MCAsmBackend { dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << ": AValue=0x"; dbgs().write_hex(FixupValue) << ": Offset=" << Offset << - ": Size=" << DataSize << + ": Size=" << Data.size() << ": OInst=0x"; dbgs().write_hex(OldData) << ": Reloc=0x"; dbgs().write_hex(Reloc);); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index d8009c5da08ee..7f90e83fc8e9e 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -169,8 +169,11 @@ namespace HexagonII { // Hexagon specific MO operand flag mask. enum HexagonMOTargetFlagVal { - //===------------------------------------------------------------------===// - // Hexagon Specific MachineOperand flags. + // Hexagon-specific MachineOperand target flags. + // + // When chaning these, make sure to update + // getSerializableDirectMachineOperandTargetFlags and + // getSerializableBitmaskMachineOperandTargetFlags if needed. MO_NO_FLAG, /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation @@ -207,10 +210,12 @@ namespace HexagonII { MO_TPREL, // HMOTF_ConstExtended - // Addendum to abovem, indicates a const extended op + // Addendum to above, indicates a const extended op // Can be used as a mask. - HMOTF_ConstExtended = 0x80 + HMOTF_ConstExtended = 0x80, + // Union of all bitmasks (currently only HMOTF_ConstExtended). + MO_Bitmasks = HMOTF_ConstExtended }; // Hexagon Sub-instruction classes. diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index dd790fd41257d..1929152129fa7 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "HexagonAsmPrinter.h" #include "HexagonInstPrinter.h" +#include "HexagonAsmPrinter.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmInfo.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 70410ff03a641..50f00d1aaeacf 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonFixupKinds.h" -#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/Statistic.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index e8f154a1fa533..c7114c7f18a0a 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -701,33 +701,32 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { break; case Hexagon::A2_addi: Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); - assert(Absolute);(void)Absolute; - if (Value == 1) { - Result.setOpcode(Hexagon::SA1_inc); - addOps(Result, Inst, 0); - addOps(Result, Inst, 1); - break; - } // 1,2 SUBInst $Rd = add($Rs, #1) - else if (Value == -1) { - Result.setOpcode(Hexagon::SA1_dec); - addOps(Result, Inst, 0); - addOps(Result, Inst, 1); - addOps(Result, Inst, 2); - break; - } // 1,2 SUBInst $Rd = add($Rs,#-1) - else if (Inst.getOperand(1).getReg() == Hexagon::R29) { - Result.setOpcode(Hexagon::SA1_addsp); - addOps(Result, Inst, 0); - addOps(Result, Inst, 2); - break; - } // 1,3 SUBInst $Rd = add(r29, #$u6_2) - else { - Result.setOpcode(Hexagon::SA1_addi); - addOps(Result, Inst, 0); - addOps(Result, Inst, 1); - addOps(Result, Inst, 2); - break; - } // 1,2,3 SUBInst $Rx = add($Rx, #$s7) + if (Absolute) { + if (Value == 1) { + Result.setOpcode(Hexagon::SA1_inc); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs, #1) + if (Value == -1) { + Result.setOpcode(Hexagon::SA1_dec); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; + } // 1,2 SUBInst $Rd = add($Rs,#-1) + if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::SA1_addsp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; + } // 1,3 SUBInst $Rd = add(r29, #$u6_2) + } + Result.setOpcode(Hexagon::SA1_addi); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rx = add($Rx, #$s7) case Hexagon::A2_add: Result.setOpcode(Hexagon::SA1_addrx); addOps(Result, Inst, 0); diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 9e1ff9ca35d76..47007e08a2ff9 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -29,7 +30,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index aece36790486e..b2c7f1569380c 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "hexagon-shuffle" +#include "MCTargetDesc/HexagonMCShuffler.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bb98c2bbef6d7..1a361548f9386 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "Hexagon.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" @@ -27,10 +28,9 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 564d43b45cb87..1604e7c8dc549 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -259,6 +259,7 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeCVI_VM_VP_LDU: ++onlyNo1; + LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: case HexagonII::TypeLD: @@ -274,6 +275,7 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeCVI_VM_STU: ++onlyNo1; + LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: case HexagonII::TypeST: diff --git a/interpreter/llvm/src/lib/Target/Hexagon/RDFDeadCode.cpp b/interpreter/llvm/src/lib/Target/Hexagon/RDFDeadCode.cpp index 9aa8ad68e07e2..60a12dcf2f03d 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,9 +9,9 @@ // // RDF-based generic dead code elimination. +#include "RDFDeadCode.h" #include "RDFGraph.h" #include "RDFLiveness.h" -#include "RDFDeadCode.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/RDFGraph.cpp b/interpreter/llvm/src/lib/Target/Hexagon/RDFGraph.cpp index 7a2895aa4e8c6..8d12723708990 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/RDFGraph.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/RDFGraph.cpp @@ -10,8 +10,8 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/interpreter/llvm/src/lib/Target/Hexagon/RDFLiveness.cpp b/interpreter/llvm/src/lib/Target/Hexagon/RDFLiveness.cpp index 9d8a3881797bc..83e8968086d8c 100644 --- a/interpreter/llvm/src/lib/Target/Hexagon/RDFLiveness.cpp +++ b/interpreter/llvm/src/lib/Target/Hexagon/RDFLiveness.cpp @@ -23,8 +23,8 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. // -#include "RDFGraph.h" #include "RDFLiveness.h" +#include "RDFGraph.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" diff --git a/interpreter/llvm/src/lib/Target/LLVMBuild.txt b/interpreter/llvm/src/lib/Target/LLVMBuild.txt index 8be2a898e3802..34b966df7761b 100644 --- a/interpreter/llvm/src/lib/Target/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/Target/LLVMBuild.txt @@ -29,6 +29,7 @@ subdirectories = MSP430 NVPTX Mips + Nios2 PowerPC RISCV Sparc diff --git a/interpreter/llvm/src/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/interpreter/llvm/src/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 1d6c07974beb4..1394ac7210f2f 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -28,8 +28,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -787,6 +787,7 @@ std::unique_ptr LanaiAsmParser::parseImmediate() { case AsmToken::Dot: if (!Parser.parseExpression(ExprVal)) return LanaiOperand::createImm(ExprVal, Start, End); + LLVM_FALLTHROUGH; default: return nullptr; } diff --git a/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.cpp b/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.cpp index 2a9bc25d7fadb..a2f005ce445a8 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -76,7 +76,7 @@ namespace { // Lanai Code Generator Pass Configuration Options. class LanaiPassConfig : public TargetPassConfig { public: - LanaiPassConfig(LanaiTargetMachine *TM, PassManagerBase *PassManager) + LanaiPassConfig(LanaiTargetMachine &TM, PassManagerBase *PassManager) : TargetPassConfig(TM, *PassManager) {} LanaiTargetMachine &getLanaiTargetMachine() const { @@ -91,7 +91,7 @@ class LanaiPassConfig : public TargetPassConfig { TargetPassConfig * LanaiTargetMachine::createPassConfig(PassManagerBase &PassManager) { - return new LanaiPassConfig(this, &PassManager); + return new LanaiPassConfig(*this, &PassManager); } // Install an instruction selector pass. diff --git a/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.h b/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.h index 5278c70d909da..083ba6fdf8416 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetMachine.h @@ -49,6 +49,10 @@ class LanaiTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isMachineVerifierClean() const override { + return false; + } }; } // namespace llvm diff --git a/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetObjectFile.cpp index 7475dbd68ae49..38e75108ba16c 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/Lanai/LanaiTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "LanaiSubtarget.h" #include "LanaiTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index 0ef1401ef531a..bbce5f670c99e 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -49,8 +49,9 @@ class LanaiAsmBackend : public MCAsmBackend { LanaiAsmBackend(const Target &T, Triple::OSType OST) : MCAsmBackend(), OSType(OST) {} - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -88,9 +89,10 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void LanaiAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned /*DataSize*/, uint64_t Value, - bool /*IsPCRel*/, MCContext & /*Ctx*/) const { +void LanaiAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool /*IsResolved*/) const { MCFixupKind Kind = Fixup.getKind(); Value = adjustFixupValue(static_cast(Kind), Value); diff --git a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index e02bba529bd50..64cd3342ac18b 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/LanaiBaseInfo.h" #include "MCTargetDesc/LanaiFixupKinds.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp index 10254677a5ad1..c3727416ecb94 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp @@ -19,8 +19,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index a47ff9ff3d61d..bcbde2b8b7947 100644 --- a/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "LanaiMCAsmInfo.h" #include "LanaiMCTargetDesc.h" #include "InstPrinter/LanaiInstPrinter.h" +#include "LanaiMCAsmInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCInst.h" diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430.td b/interpreter/llvm/src/lib/Target/MSP430/MSP430.td index dfea669f3ba1b..203864dd40650 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430.td +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430.td @@ -22,6 +22,18 @@ def FeatureX : SubtargetFeature<"ext", "ExtendedInsts", "true", "Enable MSP430-X extensions">; +def FeatureHWMult16 + : SubtargetFeature<"hwmult16", "HWMultMode", "HWMult16", + "Enable 16-bit hardware multiplier">; + +def FeatureHWMult32 + : SubtargetFeature<"hwmult32", "HWMultMode", "HWMult32", + "Enable 32-bit hardware multiplier">; + +def FeatureHWMultF5 + : SubtargetFeature<"hwmultf5", "HWMultMode", "HWMultF5", + "Enable F5 series hardware multiplier">; + //===----------------------------------------------------------------------===// // MSP430 supported processors. //===----------------------------------------------------------------------===// @@ -29,6 +41,8 @@ class Proc Features> : Processor; def : Proc<"generic", []>; +def : Proc<"msp430", []>; +def : Proc<"msp430x", [FeatureX]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430AsmPrinter.cpp b/interpreter/llvm/src/lib/Target/MSP430/MSP430AsmPrinter.cpp index abf062fe86ae4..f39c21fc8aa2b 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "InstPrinter/MSP430InstPrinter.h" +#include "MSP430.h" #include "MSP430InstrInfo.h" #include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index cd58eda5d924c..0b02f79f472a1 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -403,12 +403,12 @@ void MSP430DAGToDAGISel::Select(SDNode *Node) { int FI = cast(Node)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16); if (Node->hasOneUse()) { - CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16, TFI, + CurDAG->SelectNodeTo(Node, MSP430::ADDframe, MVT::i16, TFI, CurDAG->getTargetConstant(0, dl, MVT::i16)); return; } ReplaceNode(Node, CurDAG->getMachineNode( - MSP430::ADD16ri, dl, MVT::i16, TFI, + MSP430::ADDframe, dl, MVT::i16, TFI, CurDAG->getTargetConstant(0, dl, MVT::i16))); return; } diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelLowering.cpp b/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelLowering.cpp index cc6e64043f543..dae14fd301ee0 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -38,27 +38,6 @@ using namespace llvm; #define DEBUG_TYPE "msp430-lower" -typedef enum { - NoHWMult, - HWMult16, - HWMult32, - HWMultF5 -} HWMultUseMode; - -static cl::opt -HWMultMode("mhwmult", cl::Hidden, - cl::desc("Hardware multiplier use mode"), - cl::init(NoHWMult), - cl::values( - clEnumValN(NoHWMult, "none", - "Do not use hardware multiplier"), - clEnumValN(HWMult16, "16bit", - "Use 16-bit hardware multiplier"), - clEnumValN(HWMult32, "32bit", - "Use 32-bit hardware multiplier"), - clEnumValN(HWMultF5, "f5series", - "Use F5 series hardware multiplier"))); - MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, const MSP430Subtarget &STI) : TargetLowering(TM) { @@ -262,7 +241,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, setCmpLibcallCC(LC.Op, LC.Cond); } - if (HWMultMode == HWMult16) { + if (STI.hasHWMult16()) { const struct { const RTLIB::Libcall Op; const char * const Name; @@ -277,7 +256,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); } - } else if (HWMultMode == HWMult32) { + } else if (STI.hasHWMult32()) { const struct { const RTLIB::Libcall Op; const char * const Name; @@ -292,7 +271,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); } - } else if (HWMultMode == HWMultF5) { + } else if (STI.hasHWMultF5()) { const struct { const RTLIB::Libcall Op; const char * const Name; diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430InstrInfo.td b/interpreter/llvm/src/lib/Target/MSP430/MSP430InstrInfo.td index 1cd18611e52c5..cec43040f60d4 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430InstrInfo.td +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430InstrInfo.td @@ -122,6 +122,11 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), [(MSP430callseq_end timm:$amt1, timm:$amt2)]>; } +let Defs = [SR], Uses = [SP] in { +def ADDframe : Pseudo<(outs GR16:$dst), (ins i16imm:$base, i16imm:$offset), + "# ADDframe PSEUDO", []>; +} + let usesCustomInserter = 1 in { let Uses = [SR] in { def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430RegisterInfo.cpp b/interpreter/llvm/src/lib/Target/MSP430/MSP430RegisterInfo.cpp index 9600bc28f1004..7a3b7a8bd5ff7 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -127,7 +127,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Fold imm into offset Offset += MI.getOperand(FIOperandNum + 1).getImm(); - if (MI.getOpcode() == MSP430::ADD16ri) { + if (MI.getOpcode() == MSP430::ADDframe) { // This is actually "load effective address" of the stack slot // instruction. We have only two-address instructions, thus we need to // expand it into mov + add diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.cpp b/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.cpp index 6216348e4d719..776a9dcb11d4f 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.cpp @@ -19,6 +19,20 @@ using namespace llvm; #define DEBUG_TYPE "msp430-subtarget" +static cl::opt +HWMultModeOption("mhwmult", cl::Hidden, + cl::desc("Hardware multiplier use mode for MSP430"), + cl::init(MSP430Subtarget::NoHWMult), + cl::values( + clEnumValN(MSP430Subtarget::NoHWMult, "none", + "Do not use hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMult16, "16bit", + "Use 16-bit hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMult32, "32bit", + "Use 32-bit hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMultF5, "f5series", + "Use F5 series hardware multiplier"))); + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "MSP430GenSubtargetInfo.inc" @@ -27,7 +41,18 @@ void MSP430Subtarget::anchor() { } MSP430Subtarget & MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - ParseSubtargetFeatures("generic", FS); + ExtendedInsts = false; + HWMultMode = NoHWMult; + + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "msp430"; + + ParseSubtargetFeatures(CPUName, FS); + + if (HWMultModeOption != NoHWMult) + HWMultMode = HWMultModeOption; + return *this; } diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.h b/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.h index 1a00d85e01cb1..8828dfd65878f 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.h +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430Subtarget.h @@ -30,8 +30,15 @@ namespace llvm { class StringRef; class MSP430Subtarget : public MSP430GenSubtargetInfo { +public: + enum HWMultEnum { + NoHWMult, HWMult16, HWMult32, HWMultF5 + }; + +private: virtual void anchor(); bool ExtendedInsts; + HWMultEnum HWMultMode; MSP430FrameLowering FrameLowering; MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; @@ -50,6 +57,10 @@ class MSP430Subtarget : public MSP430GenSubtargetInfo { /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool hasHWMult16() const { return HWMultMode == HWMult16; } + bool hasHWMult32() const { return HWMultMode == HWMult32; } + bool hasHWMultF5() const { return HWMultMode == HWMultF5; } + const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } diff --git a/interpreter/llvm/src/lib/Target/MSP430/MSP430TargetMachine.cpp b/interpreter/llvm/src/lib/Target/MSP430/MSP430TargetMachine.cpp index bebe5fa35ad42..982c6fea62d44 100644 --- a/interpreter/llvm/src/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -32,16 +32,20 @@ static Reloc::Model getEffectiveRelocModel(Optional RM) { return *RM; } +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16"; +} + MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, "e-m:e-p:16:16-i32:16:32-a:16-n8:16", TT, CPU, FS, + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM, OL), TLOF(make_unique()), - // FIXME: Check DataLayout string. Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } @@ -52,7 +56,7 @@ namespace { /// MSP430 Code Generator Pass Configuration Options. class MSP430PassConfig : public TargetPassConfig { public: - MSP430PassConfig(MSP430TargetMachine *TM, PassManagerBase &PM) + MSP430PassConfig(MSP430TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} MSP430TargetMachine &getMSP430TargetMachine() const { @@ -65,7 +69,7 @@ class MSP430PassConfig : public TargetPassConfig { } // namespace TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) { - return new MSP430PassConfig(this, PM); + return new MSP430PassConfig(*this, PM); } bool MSP430PassConfig::addInstSelector() { diff --git a/interpreter/llvm/src/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/interpreter/llvm/src/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index d407774574be1..e12188e706025 100644 --- a/interpreter/llvm/src/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -9,16 +9,18 @@ #include "MCTargetDesc/MipsABIFlagsSection.h" #include "MCTargetDesc/MipsABIInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "MipsTargetStreamer.h" -#include "MCTargetDesc/MipsBaseInfo.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -39,13 +41,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -216,9 +217,15 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); + bool emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, MCSymbol *Sym); + bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); + bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU, + SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadAddress(unsigned DstReg, unsigned BaseReg, const MCOperand &Offset, bool Is32BitAddress, SMLoc IDLoc, MCStreamer &Out, @@ -315,6 +322,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseDirectiveSet(); bool parseDirectiveOption(); bool parseInsnDirective(); + bool parseRSectionDirective(StringRef Section); bool parseSSectionDirective(StringRef Section, unsigned Type); bool parseSetAtDirective(); @@ -335,6 +343,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetPushDirective(); bool parseSetSoftFloatDirective(); bool parseSetHardFloatDirective(); + bool parseSetMtDirective(); + bool parseSetNoMtDirective(); bool parseSetAssignment(); @@ -620,6 +630,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool useSoftFloat() const { return getSTI().getFeatureBits()[Mips::FeatureSoftFloat]; } + bool hasMT() const { + return getSTI().getFeatureBits()[Mips::FeatureMT]; + } /// Warn if RegIndex is the same as the current AT. void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc); @@ -1011,6 +1024,16 @@ class MipsOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createReg(getAFGR64Reg())); } + void addStrictlyAFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getAFGR64Reg())); + } + + void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } + void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getFGR64Reg())); @@ -1027,6 +1050,15 @@ class MipsOperand : public MCParsedAsmOperand { "registers"); } + void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR32Reg())); + // FIXME: We ought to do this for -integrated-as without -via-file-asm too. + if (!AsmParser.useOddSPReg() && RegIdx.Index & 1) + AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU " + "registers"); + } + void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getFGRH32Reg())); @@ -1574,6 +1606,11 @@ class MipsOperand : public MCParsedAsmOperand { return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31; } + bool isStrictlyFGRAsmReg() const { + // AFGR64 is $0-$15 but we handle this in getAFGR64() + return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31; + } + bool isHWRegsAsmReg() const { return isRegIdx() && RegIdx.Kind & RegKind_HWRegs && RegIdx.Index <= 31; } @@ -1934,6 +1971,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case Mips::SDIV_MM: FirstOp = 0; SecondOp = 1; + LLVM_FALLTHROUGH; case Mips::SDivMacro: case Mips::DSDivMacro: case Mips::UDivMacro: @@ -2368,6 +2406,27 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, case Mips::PseudoTRUNC_W_D: return expandTrunc(Inst, true, true, IDLoc, Out, STI) ? MER_Fail : MER_Success; + + case Mips::LoadImmSingleGPR: + return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmSingleFGR: + return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmDoubleGPR: + return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmDoubleFGR: + return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmDoubleFGR_32: + return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; case Mips::Ulh: return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::Ulhu: @@ -2736,6 +2795,7 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, bool Is32BitSym, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { + // FIXME: These expansions do not respect -mxgot. MipsTargetStreamer &TOut = getTargetStreamer(); bool UseSrcReg = SrcReg != Mips::NoRegister; warnIfNoMacro(IDLoc); @@ -2755,8 +2815,12 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, // symbol in the final relocation is external and not modified with a // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16. if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && - Res.getConstant() == 0 && !Res.getSymA()->getSymbol().isInSection() && - !Res.getSymA()->getSymbol().isTemporary()) { + Res.getConstant() == 0 && + !(Res.getSymA()->getSymbol().isInSection() || + Res.getSymA()->getSymbol().isTemporary() || + (Res.getSymA()->getSymbol().isELF() && + cast(Res.getSymA()->getSymbol()).getBinding() == + ELF::STB_LOCAL))) { const MCExpr *CallExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(), @@ -2812,6 +2876,85 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, return false; } + if (inPicMode() && ABI.ArePtrs64bit()) { + MCValue Res; + if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { + Error(IDLoc, "expected relocatable expression"); + return true; + } + if (Res.getSymB() != nullptr) { + Error(IDLoc, "expected relocatable expression with only one symbol"); + return true; + } + + // The case where the result register is $25 is somewhat special. If the + // symbol in the final relocation is external and not modified with a + // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT_DISP. + if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && + Res.getConstant() == 0 && + !(Res.getSymA()->getSymbol().isInSection() || + Res.getSymA()->getSymbol().isTemporary() || + (Res.getSymA()->getSymbol().isELF() && + cast(Res.getSymA()->getSymbol()).getBinding() == + ELF::STB_LOCAL))) { + const MCExpr *CallExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); + TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(), + MCOperand::createExpr(CallExpr), IDLoc, STI); + return false; + } + + // The remaining cases are: + // Small offset: ld $tmp, %got_disp(symbol)($gp) + // >daddiu $tmp, $tmp, offset + // >daddu $rd, $tmp, $rs + // The daddiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + const MipsMCExpr *GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, + Res.getSymA(), + getContext()); + const MCExpr *LoExpr = nullptr; + if (Res.getConstant() != 0) { + // Symbols fully resolve with just the %got_disp(symbol) but we + // must still account for any offset to the symbol for + // expressions like symbol+8. + LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); + + // FIXME: Offsets greater than 16 bits are not yet implemented. + // FIXME: The correct range is a 32-bit sign-extended number. + if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) { + Error(IDLoc, "macro instruction uses large offset, which is not " + "currently supported"); + return true; + } + } + + unsigned TmpReg = DstReg; + if (UseSrcReg && + getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, + SrcReg)) { + // If $rs is the same as $rd, we need to use AT. + // If it is not available we exit. + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + TmpReg = ATReg; + } + + TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(), + MCOperand::createExpr(GotExpr), IDLoc, STI); + + if (LoExpr) + TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), + IDLoc, STI); + + if (UseSrcReg) + TOut.emitRRR(Mips::DADDu, DstReg, TmpReg, SrcReg, IDLoc, STI); + + return false; + } + const MipsMCExpr *HiExpr = MipsMCExpr::create(MipsMCExpr::MEK_HI, SymExpr, getContext()); const MipsMCExpr *LoExpr = @@ -2952,6 +3095,302 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, return false; } +// Each double-precision register DO-D15 overlaps with two of the single +// precision registers F0-F31. As an example, all of the following hold true: +// D0 + 1 == F1, F1 + 1 == D1, F1 + 1 == F2, depending on the context. +static unsigned nextReg(unsigned Reg) { + if (MipsMCRegisterClasses[Mips::FGR32RegClassID].contains(Reg)) + return Reg == (unsigned)Mips::F31 ? (unsigned)Mips::F0 : Reg + 1; + switch (Reg) { + default: llvm_unreachable("Unknown register in assembly macro expansion!"); + case Mips::ZERO: return Mips::AT; + case Mips::AT: return Mips::V0; + case Mips::V0: return Mips::V1; + case Mips::V1: return Mips::A0; + case Mips::A0: return Mips::A1; + case Mips::A1: return Mips::A2; + case Mips::A2: return Mips::A3; + case Mips::A3: return Mips::T0; + case Mips::T0: return Mips::T1; + case Mips::T1: return Mips::T2; + case Mips::T2: return Mips::T3; + case Mips::T3: return Mips::T4; + case Mips::T4: return Mips::T5; + case Mips::T5: return Mips::T6; + case Mips::T6: return Mips::T7; + case Mips::T7: return Mips::S0; + case Mips::S0: return Mips::S1; + case Mips::S1: return Mips::S2; + case Mips::S2: return Mips::S3; + case Mips::S3: return Mips::S4; + case Mips::S4: return Mips::S5; + case Mips::S5: return Mips::S6; + case Mips::S6: return Mips::S7; + case Mips::S7: return Mips::T8; + case Mips::T8: return Mips::T9; + case Mips::T9: return Mips::K0; + case Mips::K0: return Mips::K1; + case Mips::K1: return Mips::GP; + case Mips::GP: return Mips::SP; + case Mips::SP: return Mips::FP; + case Mips::FP: return Mips::RA; + case Mips::RA: return Mips::ZERO; + case Mips::D0: return Mips::F1; + case Mips::D1: return Mips::F3; + case Mips::D2: return Mips::F5; + case Mips::D3: return Mips::F7; + case Mips::D4: return Mips::F9; + case Mips::D5: return Mips::F11; + case Mips::D6: return Mips::F13; + case Mips::D7: return Mips::F15; + case Mips::D8: return Mips::F17; + case Mips::D9: return Mips::F19; + case Mips::D10: return Mips::F21; + case Mips::D11: return Mips::F23; + case Mips::D12: return Mips::F25; + case Mips::D13: return Mips::F27; + case Mips::D14: return Mips::F29; + case Mips::D15: return Mips::F31; + } +} + +// FIXME: This method is too general. In principle we should compute the number +// of instructions required to synthesize the immediate inline compared to +// synthesizing the address inline and relying on non .text sections. +// For static O32 and N32 this may yield a small benefit, for static N64 this is +// likely to yield a much larger benefit as we have to synthesize a 64bit +// address to load a 64 bit value. +bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, + MCSymbol *Sym) { + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + + if(IsPicEnabled) { + const MCExpr *GotSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *GotExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext()); + + if(isABI_O32() || isABI_N32()) { + TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr), + IDLoc, STI); + } else { //isABI_N64() + TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr), + IDLoc, STI); + } + } else { //!IsPicEnabled + const MCExpr *HiSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *HiExpr = + MipsMCExpr::create(MipsMCExpr::MEK_HI, HiSym, getContext()); + + // FIXME: This is technically correct but gives a different result to gas, + // but gas is incomplete there (it has a fixme noting it doesn't work with + // 64-bit addresses). + // FIXME: With -msym32 option, the address expansion for N64 should probably + // use the O32 / N32 case. It's safe to use the 64 address expansion as the + // symbol's value is considered sign extended. + if(isABI_O32() || isABI_N32()) { + TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI); + } else { //isABI_N64() + const MCExpr *HighestSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *HighestExpr = + MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, HighestSym, getContext()); + const MCExpr *HigherSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *HigherExpr = + MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, HigherSym, getContext()); + + TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc, + STI); + TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, + MCOperand::createExpr(HigherExpr), IDLoc, STI); + TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI); + TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI); + } + } + return false; +} + +bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, + bool Is64FPU, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; + // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the + // exponent field), convert it to double (e.g. 1 to 1.0) + if ((HiImmOp64 & 0x7ff00000) == 0) { + APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); + ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + } + + uint32_t LoImmOp64 = ImmOp64 & 0xffffffff; + HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; + + if (IsSingle) { + // Conversion of a double in an uint64_t to a float in a uint32_t, + // retaining the bit pattern of a float. + uint32_t ImmOp32; + double doubleImm = BitsToDouble(ImmOp64); + float tmp_float = static_cast(doubleImm); + ImmOp32 = FloatToBits(tmp_float); + + if (IsGPR) { + if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc, + Out, STI)) + return true; + return false; + } else { + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + if (LoImmOp64 == 0) { + if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc, + Out, STI)) + return true; + TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI); + return false; + } + + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = getContext().getELFSection( + ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(ImmOp32, 4); + getStreamer().SwitchSection(CS); + + if(emitPartialAddress(TOut, IDLoc, Sym)) + return true; + TOut.emitRRX(Mips::LWC1, FirstReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + } + return false; + } + + // if(!IsSingle) + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + + if (IsGPR) { + if (LoImmOp64 == 0) { + if(isABI_N32() || isABI_N64()) { + if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true, + IDLoc, Out, STI)) + return true; + return false; + } else { + if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true, + IDLoc, Out, STI)) + return true; + + if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true, + IDLoc, Out, STI)) + return true; + return false; + } + } + + MCSection *CS = getStreamer().getCurrentSectionOnly(); + MCSection *ReadOnlySection = getContext().getELFSection( + ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(HiImmOp64, 4); + getStreamer().EmitIntValue(LoImmOp64, 4); + getStreamer().SwitchSection(CS); + + if(emitPartialAddress(TOut, IDLoc, Sym)) + return true; + if(isABI_N64()) + TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + else + TOut.emitRRX(Mips::ADDiu, ATReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + + if(isABI_N32() || isABI_N64()) + TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI); + else { + TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI); + TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI); + } + return false; + } else { // if(!IsGPR && !IsSingle) + if ((LoImmOp64 == 0) && + !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) { + // FIXME: In the case where the constant is zero, we can load the + // register directly from the zero register. + if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc, + Out, STI)) + return true; + if (isABI_N32() || isABI_N64()) + TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI); + else if (hasMips32r2()) { + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI); + } else { + TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI); + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + } + return false; + } + + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = getContext().getELFSection( + ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(HiImmOp64, 4); + getStreamer().EmitIntValue(LoImmOp64, 4); + getStreamer().SwitchSection(CS); + + if(emitPartialAddress(TOut, IDLoc, Sym)) + return true; + TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + } + return false; +} + bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { @@ -4318,45 +4757,6 @@ bool MipsAsmParser::expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } -static unsigned nextReg(unsigned Reg) { - switch (Reg) { - case Mips::ZERO: return Mips::AT; - case Mips::AT: return Mips::V0; - case Mips::V0: return Mips::V1; - case Mips::V1: return Mips::A0; - case Mips::A0: return Mips::A1; - case Mips::A1: return Mips::A2; - case Mips::A2: return Mips::A3; - case Mips::A3: return Mips::T0; - case Mips::T0: return Mips::T1; - case Mips::T1: return Mips::T2; - case Mips::T2: return Mips::T3; - case Mips::T3: return Mips::T4; - case Mips::T4: return Mips::T5; - case Mips::T5: return Mips::T6; - case Mips::T6: return Mips::T7; - case Mips::T7: return Mips::S0; - case Mips::S0: return Mips::S1; - case Mips::S1: return Mips::S2; - case Mips::S2: return Mips::S3; - case Mips::S3: return Mips::S4; - case Mips::S4: return Mips::S5; - case Mips::S5: return Mips::S6; - case Mips::S6: return Mips::S7; - case Mips::S7: return Mips::T8; - case Mips::T8: return Mips::T9; - case Mips::T9: return Mips::K0; - case Mips::K0: return Mips::K1; - case Mips::K1: return Mips::GP; - case Mips::GP: return Mips::SP; - case Mips::SP: return Mips::FP; - case Mips::FP: return Mips::RA; - case Mips::RA: return Mips::ZERO; - default: return 0; - } - -} - // Expand 'ld $ offset($reg2)' to 'lw $, offset($reg2); // lw $>, offset+4($reg2)' // or expand 'sd $ offset($reg2)' to 'sw $, offset($reg2); @@ -4797,7 +5197,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { CC = StringSwitch(Name) .Case("zero", 0) - .Case("at", 1) + .Cases("at", "AT", 1) .Case("a0", 4) .Case("a1", 5) .Case("a2", 6) @@ -5935,6 +6335,39 @@ bool MipsAsmParser::parseSetNoOddSPRegDirective() { return false; } +bool MipsAsmParser::parseSetMtDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat "mt". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + setFeatureBits(Mips::FeatureMT, "mt"); + getTargetStreamer().emitDirectiveSetMt(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool MipsAsmParser::parseSetNoMtDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat "nomt". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + clearFeatureBits(Mips::FeatureMT, "mt"); + + getTargetStreamer().emitDirectiveSetNoMt(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + bool MipsAsmParser::parseSetPopDirective() { MCAsmParser &Parser = getParser(); SMLoc Loc = getLexer().getLoc(); @@ -6435,6 +6868,10 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetMsaDirective(); } else if (Tok.getString() == "nomsa") { return parseSetNoMsaDirective(); + } else if (Tok.getString() == "mt") { + return parseSetMtDirective(); + } else if (Tok.getString() == "nomt") { + return parseSetNoMtDirective(); } else if (Tok.getString() == "softfloat") { return parseSetSoftFloatDirective(); } else if (Tok.getString() == "hardfloat") { @@ -6643,6 +7080,23 @@ bool MipsAsmParser::parseInsnDirective() { return false; } +/// parseRSectionDirective +/// ::= .rdata +bool MipsAsmParser::parseRSectionDirective(StringRef Section) { + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + MCSection *ELFSection = getContext().getELFSection( + Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + getParser().getStreamer().SwitchSection(ELFSection); + + getParser().Lex(); // Eat EndOfStatement token. + return false; +} + /// parseSSectionDirective /// ::= .sbss /// ::= .sdata @@ -6667,6 +7121,7 @@ bool MipsAsmParser::parseSSectionDirective(StringRef Section, unsigned Type) { /// ::= .module fp=value /// ::= .module softfloat /// ::= .module hardfloat +/// ::= .module mt bool MipsAsmParser::parseDirectiveModule() { MCAsmParser &Parser = getParser(); MCAsmLexer &Lexer = getLexer(); @@ -6765,6 +7220,25 @@ bool MipsAsmParser::parseDirectiveModule() { return false; } + return false; // parseDirectiveModule has finished successfully. + } else if (Option == "mt") { + setModuleFeatureBits(Mips::FeatureMT, "mt"); + + // Synchronize the ABI Flags information with the FeatureBits information we + // updated above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated ABI Flags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted later). + getTargetStreamer().emitDirectiveModuleMT(); + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + return false; // parseDirectiveModule has finished successfully. } else { return Error(L, "'" + Twine(Option) + "' is not a valid .module option."); @@ -7190,6 +7664,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { parseInsnDirective(); return false; } + if (IDVal == ".rdata") { + parseRSectionDirective(".rodata"); + return false; + } if (IDVal == ".sbss") { parseSSectionDirective(IDVal, ELF::SHT_NOBITS); return false; diff --git a/interpreter/llvm/src/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/interpreter/llvm/src/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index ecdf6b0de6e7f..b0b9943230363 100644 --- a/interpreter/llvm/src/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -17,14 +17,14 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h index f385410270231..9abd4f1d6b08c 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h @@ -159,6 +159,8 @@ struct MipsABIFlagsSection { ASESet |= Mips::AFL_ASE_MICROMIPS; if (P.inMips16Mode()) ASESet |= Mips::AFL_ASE_MIPS16; + if (P.hasMT()) + ASESet |= Mips::AFL_ASE_MT; } template diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 1e2eb7dbec3e6..a1ed0ea4d7f36 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// // -#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsAsmBackend.h" +#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" @@ -235,10 +235,12 @@ static unsigned calculateMMLEIndex(unsigned i) { /// ApplyFixup - Apply the \p Value for given \p Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. -void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, bool IsPCRel, - MCContext &Ctx) const { +void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { MCFixupKind Kind = Fixup.getKind(); + MCContext &Ctx = Asm.getContext(); Value = adjustFixupValue(Fixup, Value, Ctx); if (!Value) diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 4b3cc6e21f4cd..8ebde3b9b7a4e 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -38,8 +38,9 @@ class MipsAsmBackend : public MCAsmBackend { MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; Optional getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 1a1c613cfce02..d116ac3471bc0 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -10,13 +10,13 @@ #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index ae32783223117..f658aadff22fe 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -10,12 +10,12 @@ #include "MipsELFStreamer.h" #include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index a35eb2a8e03a2..0330824fd614e 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsMCCodeEmitter.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" -#include "MipsMCCodeEmitter.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index be04480044d48..aad6bf378ea00 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -8,14 +8,14 @@ //===----------------------------------------------------------------------===// #include "MipsMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index 74d5e4cc98419..2d84528e7469f 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "MipsOptionRecord.h" #include "MipsABIInfo.h" #include "MipsELFStreamer.h" -#include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 2d4083b27ed17..2907b77158575 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -11,19 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsTargetStreamer.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsELFStreamer.h" #include "MipsMCExpr.h" #include "MipsMCTargetDesc.h" #include "MipsTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -50,6 +50,8 @@ void MipsTargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetMt() {} +void MipsTargetStreamer::emitDirectiveSetNoMt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) { forbidModuleDirective(); @@ -118,6 +120,7 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg() { } void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {} void MipsTargetStreamer::emitDirectiveModuleHardFloat() {} +void MipsTargetStreamer::emitDirectiveModuleMT() {} void MipsTargetStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { forbidModuleDirective(); @@ -392,6 +395,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoMsa() { MipsTargetStreamer::emitDirectiveSetNoMsa(); } +void MipsTargetAsmStreamer::emitDirectiveSetMt() { + OS << "\t.set\tmt\n"; + MipsTargetStreamer::emitDirectiveSetMt(); +} + +void MipsTargetAsmStreamer::emitDirectiveSetNoMt() { + OS << "\t.set\tnomt\n"; + MipsTargetStreamer::emitDirectiveSetNoMt(); +} + void MipsTargetAsmStreamer::emitDirectiveSetAt() { OS << "\t.set\tat\n"; MipsTargetStreamer::emitDirectiveSetAt(); @@ -656,6 +669,10 @@ void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() { OS << "\t.module\thardfloat\n"; } +void MipsTargetAsmStreamer::emitDirectiveModuleMT() { + OS << "\t.module\tmt\n"; +} + // This part is for ELF object output. MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) diff --git a/interpreter/llvm/src/lib/Target/Mips/MicroMips64r6InstrInfo.td b/interpreter/llvm/src/lib/Target/Mips/MicroMips64r6InstrInfo.td index 6b7f39e9dd79d..38b09d105ddd5 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MicroMips64r6InstrInfo.td +++ b/interpreter/llvm/src/lib/Target/Mips/MicroMips64r6InstrInfo.td @@ -548,3 +548,15 @@ def : MipsInstAlias<"dnegu $rt, $rs", def : MipsInstAlias<"dnegu $rt", (DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsll $rd, $rt, $rs", + (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt, + GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsrl $rd, $rt, $rs", + (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt, + GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsrl $rd, $rt", + (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd, + GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsll $rd, $rt", + (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd, + GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6; diff --git a/interpreter/llvm/src/lib/Target/Mips/MicroMipsSizeReduction.cpp b/interpreter/llvm/src/lib/Target/Mips/MicroMipsSizeReduction.cpp index 4593fc92ca6f0..35948e36ad913 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MicroMipsSizeReduction.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -135,6 +135,14 @@ class MicroMipsSizeReduce : public MachineFunctionPass { // returns true on success. static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry); + // Attempts to reduce LBU/LHU instruction into LBU16/LHU16, + // returns true on success. + static bool ReduceLXUtoLXU16(MachineInstr *MI, const ReduceEntry &Entry); + + // Attempts to reduce SB/SH instruction into SB16/SH16, + // returns true on success. + static bool ReduceSXtoSX16(MachineInstr *MI, const ReduceEntry &Entry); + // Attempts to reduce arithmetic instructions, returns true on success static bool ReduceArithmeticInstructions(MachineInstr *MI, const ReduceEntry &Entry); @@ -162,10 +170,26 @@ llvm::SmallVector MicroMipsSizeReduce::ReduceTable = { {RT_OneInstr, OpCodes(Mips::ADDu_MM, Mips::ADDU16_MM), ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::LBu, Mips::LBU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)}, + {RT_OneInstr, OpCodes(Mips::LBu_MM, Mips::LBU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)}, + {RT_OneInstr, OpCodes(Mips::LHu, Mips::LHU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::LHu_MM, Mips::LHU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, {RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP, OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, {RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP, OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SH, Mips::SH16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SH_MM, Mips::SH16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, {RT_OneInstr, OpCodes(Mips::SUBu, Mips::SUBU16_MM), ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), ImmField(0, 0, 0, -1)}, @@ -193,6 +217,13 @@ static bool isMMThreeBitGPRegister(const MachineOperand &MO) { return false; } +// Returns true if the machine operand MO is register $0, $17, or $2-$7. +static bool isMMSourceRegister(const MachineOperand &MO) { + if (MO.isReg() && Mips::GPRMM16ZeroRegClass.contains(MO.getReg())) + return true; + return false; +} + // Returns true if the operand Op is an immediate value // and writes the immediate value into variable Imm static bool GetImm(MachineInstr *MI, unsigned Op, int64_t &Imm) { @@ -279,6 +310,32 @@ bool MicroMipsSizeReduce::ReduceArithmeticInstructions( return ReplaceInstruction(MI, Entry); } +bool MicroMipsSizeReduce::ReduceLXUtoLXU16(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!isMMThreeBitGPRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceSXtoSX16(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!isMMSourceRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + bool MicroMipsSizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), diff --git a/interpreter/llvm/src/lib/Target/Mips/Mips.h b/interpreter/llvm/src/lib/Target/Mips/Mips.h index 7553f3972f5d0..008b9505ee26b 100644 --- a/interpreter/llvm/src/lib/Target/Mips/Mips.h +++ b/interpreter/llvm/src/lib/Target/Mips/Mips.h @@ -23,14 +23,14 @@ namespace llvm { class ModulePass; class FunctionPass; - ModulePass *createMipsOs16Pass(MipsTargetMachine &TM); - ModulePass *createMips16HardFloatPass(MipsTargetMachine &TM); + ModulePass *createMipsOs16Pass(); + ModulePass *createMips16HardFloatPass(); - FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM); - FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); - FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsModuleISelDagPass(); + FunctionPass *createMipsOptimizePICCallPass(); + FunctionPass *createMipsDelaySlotFillerPass(); FunctionPass *createMipsHazardSchedule(); - FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); + FunctionPass *createMipsLongBranchPass(); FunctionPass *createMipsConstantIslandPass(); FunctionPass *createMicroMipsSizeReductionPass(); } // end namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Mips/Mips.td b/interpreter/llvm/src/lib/Target/Mips/Mips.td index 9615bc38bfcef..6ceb055775387 100644 --- a/interpreter/llvm/src/lib/Target/Mips/Mips.td +++ b/interpreter/llvm/src/lib/Target/Mips/Mips.td @@ -185,6 +185,14 @@ def FeatureUseTCCInDIV : SubtargetFeature< "UseTCCInDIV", "false", "Force the assembler to use trapping">; +def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", + "Disable 4-operand madd.fmt and related instructions">; + +def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; + +def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true", + "Disable use of the jal instruction">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/Target/Mips/Mips16FrameLowering.cpp b/interpreter/llvm/src/lib/Target/Mips/Mips16FrameLowering.cpp index e7ceca9612a92..09e41e1423aee 100644 --- a/interpreter/llvm/src/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/Mips16FrameLowering.cpp @@ -1,4 +1,4 @@ -//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===// +//===- Mips16FrameLowering.cpp - Mips16 Frame Information -----------------===// // // The LLVM Compiler Infrastructure // @@ -17,14 +17,23 @@ #include "MipsInstrInfo.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetFrameLowering.h" +#include +#include +#include using namespace llvm; @@ -63,7 +72,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF, const std::vector &CSI = MFI.getCalleeSavedInfo(); - if (CSI.size()) { + if (!CSI.empty()) { const std::vector &CSI = MFI.getCalleeSavedInfo(); for (std::vector::const_iterator I = CSI.begin(), @@ -80,7 +89,6 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF, if (hasFP(MF)) BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0) .addReg(Mips::SP).setMIFlag(MachineInstr::FrameSetup); - } void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/interpreter/llvm/src/lib/Target/Mips/Mips16HardFloat.cpp b/interpreter/llvm/src/lib/Target/Mips/Mips16HardFloat.cpp index 5a394fe02f16c..3c2426129e49a 100644 --- a/interpreter/llvm/src/lib/Target/Mips/Mips16HardFloat.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/Mips16HardFloat.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MipsTargetMachine.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" @@ -28,14 +29,16 @@ namespace { public: static char ID; - Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {} + Mips16HardFloat() : ModulePass(ID) {} StringRef getPassName() const override { return "MIPS16 Hard Float Pass"; } - bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + ModulePass::getAnalysisUsage(AU); + } - protected: - const MipsTargetMachine &TM; + bool runOnModule(Module &M) override; }; static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) { @@ -520,6 +523,8 @@ static void removeUseSoftFloat(Function &F) { // during call lowering but it should be moved here in the future. // bool Mips16HardFloat::runOnModule(Module &M) { + auto &TM = static_cast( + getAnalysis().getTM()); DEBUG(errs() << "Run on Module Mips16HardFloat\n"); bool Modified = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { @@ -541,6 +546,6 @@ bool Mips16HardFloat::runOnModule(Module &M) { } -ModulePass *llvm::createMips16HardFloatPass(MipsTargetMachine &TM) { - return new Mips16HardFloat(TM); +ModulePass *llvm::createMips16HardFloatPass() { + return new Mips16HardFloat(); } diff --git a/interpreter/llvm/src/lib/Target/Mips/Mips32r6InstrInfo.td b/interpreter/llvm/src/lib/Target/Mips/Mips32r6InstrInfo.td index 3272319ad50f4..7daea163b8a64 100644 --- a/interpreter/llvm/src/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/interpreter/llvm/src/lib/Target/Mips/Mips32r6InstrInfo.td @@ -326,9 +326,9 @@ class AUIPC_DESC : ALUIPC_DESC_BASE<"auipc", GPR32Opnd, II_AUIPC>; class AUI_DESC_BASE : MipsR6Arch { - dag OutOperandList = (outs GPROpnd:$rs); - dag InOperandList = (ins GPROpnd:$rt, uimm16:$imm); - string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm"); + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm"); list Pattern = []; InstrItinClass Itinerary = itin; } diff --git a/interpreter/llvm/src/lib/Target/Mips/Mips64InstrInfo.td b/interpreter/llvm/src/lib/Target/Mips/Mips64InstrInfo.td index 99025fe1341da..3dba7ce30cade 100644 --- a/interpreter/llvm/src/lib/Target/Mips/Mips64InstrInfo.td +++ b/interpreter/llvm/src/lib/Target/Mips/Mips64InstrInfo.td @@ -748,9 +748,6 @@ let AdditionalPredicates = [NotInMicroMips] in { defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi64, GPR64Opnd, imm64>, GPR_64; } -def : MipsInstAlias<"dsll $rd, $rt, $rs", - (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, - ISA_MIPS3; let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"dneg $rt, $rs", (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>, @@ -793,9 +790,18 @@ def : MipsInstAlias<"dsra $rd, $rt, $rs", (DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; let AdditionalPredicates = [NotInMicroMips] in { + def : MipsInstAlias<"dsll $rd, $rt, $rs", + (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, + ISA_MIPS3; def : MipsInstAlias<"dsrl $rd, $rt, $rs", (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; + def : MipsInstAlias<"dsrl $rd, $rt", + (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>, + ISA_MIPS3; + def : MipsInstAlias<"dsll $rd, $rt", + (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>, + ISA_MIPS3; // Two operand (implicit 0 selector) versions: def : MipsInstAlias<"dmtc0 $rt, $rd", diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsAsmPrinter.cpp index 9cdbf510737f4..f7ff7c3dc7bbf 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsAsmPrinter.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "MipsAsmPrinter.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" #include "Mips.h" -#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "MipsTargetMachine.h" #include "MipsTargetStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsCCState.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsCCState.cpp index cb9f676c237a7..6a03ee9927d74 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsCCState.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsCCState.cpp @@ -51,6 +51,22 @@ static bool originalTypeIsF128(const Type *Ty, const char *Func) { return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); } +/// Return true if the original type was vXfXX. +static bool originalEVTTypeIsVectorFloat(EVT Ty) { + if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) + return true; + + return false; +} + +/// Return true if the original type was vXfXX / vXfXX. +static bool originalTypeIsVectorFloat(const Type * Ty) { + if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) + return true; + + return false; +} + MipsCCState::SpecialCallingConvType MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee, const MipsSubtarget &Subtarget) { @@ -78,8 +94,8 @@ void MipsCCState::PreAnalyzeCallResultForF128( } } -/// Identify lowered values that originated from f128 arguments and record -/// this for use by RetCC_MipsN. +/// Identify lowered values that originated from f128 or float arguments and +/// record this for use by RetCC_MipsN. void MipsCCState::PreAnalyzeReturnForF128( const SmallVectorImpl &Outs) { const MachineFunction &MF = getMachineFunction(); @@ -91,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF128( } } -/// Identify lowered values that originated from f128 arguments and record +/// Identify lower values that originated from vXfXX and record /// this. +void MipsCCState::PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl &Ins, const Type *RetTy) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); + } +} + +/// Identify lowered values that originated from vXfXX arguments and record +/// this. +void MipsCCState::PreAnalyzeReturnForVectorFloat( + const SmallVectorImpl &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) { + ISD::OutputArg Out = Outs[i]; + OriginalRetWasFloatVector.push_back( + originalEVTTypeIsVectorFloat(Out.ArgVT)); + } +} + +/// Identify lowered values that originated from f128, float and sret to vXfXX +/// arguments and record this. void MipsCCState::PreAnalyzeCallOperands( const SmallVectorImpl &Outs, std::vector &FuncArgs, const char *Func) { for (unsigned i = 0; i < Outs.size(); ++i) { - OriginalArgWasF128.push_back( - originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func)); - OriginalArgWasFloat.push_back( - FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy()); + TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; + + OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); + OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); + OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); CallOperandIsFixed.push_back(Outs[i].IsFixed); } } -/// Identify lowered values that originated from f128 arguments and record -/// this. +/// Identify lowered values that originated from f128, float and vXfXX arguments +/// and record this. void MipsCCState::PreAnalyzeFormalArgumentsForF128( const SmallVectorImpl &Ins) { const MachineFunction &MF = getMachineFunction(); @@ -120,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( if (Ins[i].Flags.isSRet()) { OriginalArgWasF128.push_back(false); OriginalArgWasFloat.push_back(false); + OriginalArgWasFloatVector.push_back(false); continue; } @@ -129,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( OriginalArgWasF128.push_back( originalTypeIsF128(FuncArg->getType(), nullptr)); OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); + + // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the + // first argument is actually an SRet pointer to a vector, then the next + // argument slot is $a2. + OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); } } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsCCState.h b/interpreter/llvm/src/lib/Target/Mips/MipsCCState.h index 77ecc65b2eeed..27901699480bf 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsCCState.h +++ b/interpreter/llvm/src/lib/Target/Mips/MipsCCState.h @@ -45,16 +45,33 @@ class MipsCCState : public CCState { const char *Func); /// Identify lowered values that originated from f128 arguments and record - /// this. + /// this for use by RetCC_MipsN. void PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); + void + PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl &Ins, + const Type *RetTy); + + void PreAnalyzeFormalArgumentsForVectorFloat( + const SmallVectorImpl &Ins); + + void + PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); + /// Records whether the value has been lowered from an f128. SmallVector OriginalArgWasF128; /// Records whether the value has been lowered from float. SmallVector OriginalArgWasFloat; + /// Records whether the value has been lowered from a floating point vector. + SmallVector OriginalArgWasFloatVector; + + /// Records whether the return value has been lowered from a floating point + /// vector. + SmallVector OriginalRetWasFloatVector; + /// Records whether the value was a fixed argument. /// See ISD::OutputArg::IsFixed, SmallVector CallOperandIsFixed; @@ -78,6 +95,7 @@ class MipsCCState : public CCState { CCState::AnalyzeCallOperands(Outs, Fn); OriginalArgWasF128.clear(); OriginalArgWasFloat.clear(); + OriginalArgWasFloatVector.clear(); CallOperandIsFixed.clear(); } @@ -96,31 +114,38 @@ class MipsCCState : public CCState { CCState::AnalyzeFormalArguments(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeCallResult(const SmallVectorImpl &Ins, CCAssignFn Fn, const Type *RetTy, const char *Func) { PreAnalyzeCallResultForF128(Ins, RetTy, Func); + PreAnalyzeCallResultForVectorFloat(Ins, RetTy); CCState::AnalyzeCallResult(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeReturn(const SmallVectorImpl &Outs, CCAssignFn Fn) { PreAnalyzeReturnForF128(Outs); + PreAnalyzeReturnForVectorFloat(Outs); CCState::AnalyzeReturn(Outs, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } bool CheckReturn(const SmallVectorImpl &ArgsFlags, CCAssignFn Fn) { PreAnalyzeReturnForF128(ArgsFlags); + PreAnalyzeReturnForVectorFloat(ArgsFlags); bool Return = CCState::CheckReturn(ArgsFlags, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); return Return; } @@ -128,6 +153,12 @@ class MipsCCState : public CCState { bool WasOriginalArgFloat(unsigned ValNo) { return OriginalArgWasFloat[ValNo]; } + bool WasOriginalArgVectorFloat(unsigned ValNo) const { + return OriginalArgWasFloatVector[ValNo]; + } + bool WasOriginalRetVectorFloat(unsigned ValNo) const { + return OriginalRetWasFloatVector[ValNo]; + } bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } }; diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsCallingConv.td b/interpreter/llvm/src/lib/Target/Mips/MipsCallingConv.td index a57cb7badc175..b5df78f89a6b9 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsCallingConv.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsCallingConv.td @@ -37,6 +37,10 @@ class CCIfOrigArgWasF128 class CCIfArgIsVarArg : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; +/// Match if the return was a floating point vector. +class CCIfOrigArgWasNotVectorFloat + : CCIf<"!static_cast(&State)" + "->WasOriginalRetVectorFloat(ValNo)", A>; /// Match if the special calling conv is the specified value. class CCIfSpecialCallingConv @@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[ // Promote i1/i8/i16 return values to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, - // i32 are returned in registers V0, V1, A0, A1 - CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>, + // i32 are returned in registers V0, V1, A0, A1, unless the original return + // type was a vector of floats. + CCIfOrigArgWasNotVectorFloat>>, // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsConstantIslandPass.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsConstantIslandPass.cpp index 026f66a1c0e15..ff43a39506103 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -24,10 +24,10 @@ #include "Mips16InstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsDSPInstrInfo.td b/interpreter/llvm/src/lib/Target/Mips/MipsDSPInstrInfo.td index ac9a81b1bb2f7..c238a65378e22 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsDSPInstrInfo.td @@ -19,6 +19,7 @@ def immZExt4 : ImmLeaf(Imm);}]>; def immZExt8 : ImmLeaf(Imm);}]>; def immZExt10 : ImmLeaf(Imm);}]>; def immSExt6 : ImmLeaf(Imm);}]>; +def immSExt10 : ImmLeaf(Imm);}]>; // Mips-specific dsp nodes def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -851,8 +852,8 @@ class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, uimm8, immZExt8, NoItinerary, DSPROpnd>; -class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, uimm10, - immZExt10, NoItinerary, DSPROpnd>; +class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, simm10, + immSExt10, NoItinerary, DSPROpnd>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, NoItinerary, DSPROpnd, GPR32Opnd>; diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsDelaySlotFiller.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsDelaySlotFiller.cpp index ae58c26e145ab..4a34e3101cb84 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -211,12 +211,12 @@ namespace { class Filler : public MachineFunctionPass { public: - Filler(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) { } + Filler() : MachineFunctionPass(ID), TM(nullptr) {} StringRef getPassName() const override { return "Mips Delay Slot Filler"; } bool runOnMachineFunction(MachineFunction &F) override { + TM = &F.getTarget(); bool Changed = false; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) @@ -290,7 +290,7 @@ namespace { bool terminateSearch(const MachineInstr &Candidate) const; - TargetMachine &TM; + const TargetMachine *TM; static char ID; }; @@ -386,7 +386,7 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) { void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { BitVector AllocSet = TRI.getAllocatableSet(MF); - for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R)) + for (unsigned R : AllocSet.set_bits()) for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) AllocSet.set(*AI); @@ -564,7 +564,7 @@ Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, // For given opcode returns opcode of corresponding instruction with short // delay slot. -// For the pseudo TAILCALL*_MM instrunctions return the short delay slot +// For the pseudo TAILCALL*_MM instructions return the short delay slot // form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range // that is too short to make use of for tail calls. static int getEquivalentCallShort(int Opcode) { @@ -610,7 +610,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { Changed = true; // Delay slot filling is disabled at -O0. - if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) { + if (!DisableDelaySlotFiller && (TM->getOptLevel() != CodeGenOpt::None)) { bool Filled = false; if (MipsCompactBranchPolicy.getValue() != CB_Always || @@ -910,6 +910,4 @@ bool Filler::terminateSearch(const MachineInstr &Candidate) const { /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay /// slots in Mips MachineFunctions -FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { - return new Filler(tm); -} +FunctionPass *llvm::createMipsDelaySlotFillerPass() { return new Filler(); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsFastISel.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsFastISel.cpp index b83f44a74d5be..f79cb0e67200a 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsFastISel.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsFastISel.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsCCState.h" -#include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.cpp index 78bae6954c3ca..20319f85696cd 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.cpp @@ -22,12 +22,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" @@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } +// The MIPS MSA ABI passes vector arguments in the integer register set. +// The number of integer registers used is dependant on the ABI used. +MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (VT.isVector() && Subtarget.hasMSA()) + return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64; + return MipsTargetLowering::getRegisterType(VT); +} + +MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) { + if (Subtarget.isABI_O32()) { + return MVT::i32; + } else { + return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64; + } + } + return MipsTargetLowering::getRegisterType(Context, VT); +} + +unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) + return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)), + 1U); + return MipsTargetLowering::getNumRegisters(Context, VT); +} + +unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + + // Break down vector types to either 2 i64s or 4 i32s. + RegisterVT = getRegisterTypeForCallingConv(Context, VT) ; + IntermediateVT = RegisterVT; + NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits() + ? VT.getVectorNumElements() + : VT.getSizeInBits() / RegisterVT.getSizeInBits(); + + return NumIntermediates; +} + SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); @@ -322,6 +364,18 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); + if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) { + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + } + + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); @@ -362,7 +416,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); @@ -428,6 +481,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::SHL); @@ -471,8 +525,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() && !Subtarget.inMicroMipsMode(); - // Disable if we don't generate PIC or the ABI isn't O32. - if (!TM.isPositionIndependent() || !TM.getABI().IsO32()) + // Disable if either of the following is true: + // We do not generate PIC, the ABI is not O32, LargeGOT is being used. + if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT) UseFastISel = false; return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr; @@ -795,7 +850,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); uint64_t SMPos0, SMSize0, SMPos1, SMSize1; - ConstantSDNode *CN; + ConstantSDNode *CN, *CN1; // See if Op's first operand matches (and $src1 , mask0). if (And0.getOpcode() != ISD::AND) @@ -806,47 +861,202 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // See if Op's second operand matches (and (shl $src, pos), mask1). - if (And1.getOpcode() != ISD::AND) + if (And1.getOpcode() == ISD::AND && + And1.getOperand(0).getOpcode() == ISD::SHL) { + + if (!(CN = dyn_cast(And1.getOperand(1))) || + !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) + return SDValue(); + + // The shift masks must have the same position and size. + if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + return SDValue(); + + SDValue Shl = And1.getOperand(0); + + if (!(CN = dyn_cast(Shl.getOperand(1)))) + return SDValue(); + + unsigned Shamt = CN->getZExtValue(); + + // Return if the shift amount and the first bit position of mask are not the + // same. + EVT ValTy = N->getValueType(0); + if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) + return SDValue(); + + SDLoc DL(N); + return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), + DAG.getConstant(SMPos0, DL, MVT::i32), + DAG.getConstant(SMSize0, DL, MVT::i32), + And0.getOperand(0)); + } else { + // Pattern match DINS. + // $dst = or (and $src, mask0), mask1 + // where mask0 = ((1 << SMSize0) -1) << SMPos0 + // => dins $dst, $src, pos, size + if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMPos0) && + ((SMSize0 + SMPos0 <= 64 && Subtarget.hasMips64r2()) || + (SMSize0 + SMPos0 <= 32))) { + // Check if AND instruction has constant as argument + bool isConstCase = And1.getOpcode() != ISD::AND; + if (And1.getOpcode() == ISD::AND) { + if (!(CN1 = dyn_cast(And1->getOperand(1)))) + return SDValue(); + } else { + if (!(CN1 = dyn_cast(N->getOperand(1)))) + return SDValue(); + } + // Don't generate INS if constant OR operand doesn't fit into bits + // cleared by constant AND operand. + if (CN->getSExtValue() & CN1->getSExtValue()) + return SDValue(); + + SDLoc DL(N); + EVT ValTy = N->getOperand(0)->getValueType(0); + SDValue Const1; + SDValue SrlX; + if (!isConstCase) { + Const1 = DAG.getConstant(SMPos0, DL, MVT::i32); + SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1); + } + return DAG.getNode( + MipsISD::Ins, DL, N->getValueType(0), + isConstCase + ? DAG.getConstant(CN1->getSExtValue() >> SMPos0, DL, ValTy) + : SrlX, + DAG.getConstant(SMPos0, DL, MVT::i32), + DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? SMSize0 & 31 + : SMSize0, + DL, MVT::i32), + And0->getOperand(0)); + + } return SDValue(); + } +} - if (!(CN = dyn_cast(And1.getOperand(1))) || - !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) +static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, + const MipsSubtarget &Subtarget) { + // ROOTNode must have a multiplication as an operand for the match to be + // successful. + if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL && + ROOTNode->getOperand(1).getOpcode() != ISD::MUL) return SDValue(); - // The shift masks must have the same position and size. - if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + // We don't handle vector types here. + if (ROOTNode->getValueType(0).isVector()) return SDValue(); - SDValue Shl = And1.getOperand(0); - if (Shl.getOpcode() != ISD::SHL) + // For MIPS64, madd / msub instructions are inefficent to use with 64 bit + // arithmetic. E.g. + // (add (mul a b) c) => + // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in + // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32) + // or + // MIPS64R2: (dins (mflo res) (mfhi res) 32 32) + // + // The overhead of setting up the Hi/Lo registers and reassembling the + // result makes this a dubious optimzation for MIPS64. The core of the + // problem is that Hi/Lo contain the upper and lower 32 bits of the + // operand and result. + // + // It requires a chain of 4 add/mul for MIPS64R2 to get better code + // density than doing it naively, 5 for MIPS64. Additionally, using + // madd/msub on MIPS64 requires the operands actually be 32 bit sign + // extended operands, not true 64 bit values. + // + // FIXME: For the moment, disable this completely for MIPS64. + if (Subtarget.hasMips64()) return SDValue(); - if (!(CN = dyn_cast(Shl.getOperand(1)))) + SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(0) + : ROOTNode->getOperand(1); + + SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(1) + : ROOTNode->getOperand(0); + + // Transform this to a MADD only if the user of this node is the add. + // If there are other users of the mul, this function returns here. + if (!Mult.hasOneUse()) return SDValue(); - unsigned Shamt = CN->getZExtValue(); + // maddu and madd are unusual instructions in that on MIPS64 bits 63..31 + // must be in canonical form, i.e. sign extended. For MIPS32, the operands + // of the multiply must have 32 or more sign bits, otherwise we cannot + // perform this optimization. We have to check this here as we're performing + // this optimization pre-legalization. + SDValue MultLHS = Mult->getOperand(0); + SDValue MultRHS = Mult->getOperand(1); - // Return if the shift amount and the first bit position of mask are not the - // same. - EVT ValTy = N->getValueType(0); - if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) + bool IsSigned = MultLHS->getOpcode() == ISD::SIGN_EXTEND && + MultRHS->getOpcode() == ISD::SIGN_EXTEND; + bool IsUnsigned = MultLHS->getOpcode() == ISD::ZERO_EXTEND && + MultRHS->getOpcode() == ISD::ZERO_EXTEND; + + if (!IsSigned && !IsUnsigned) return SDValue(); - SDLoc DL(N); - return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), - DAG.getConstant(SMPos0, DL, MVT::i32), - DAG.getConstant(SMSize0, DL, MVT::i32), - And0.getOperand(0)); + // Initialize accumulator. + SDLoc DL(ROOTNode); + SDValue TopHalf; + SDValue BottomHalf; + BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(0, DL)); + + TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(1, DL)); + SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + BottomHalf, + TopHalf); + + // Create MipsMAdd(u) / MipsMSub(u) node. + bool IsAdd = ROOTNode->getOpcode() == ISD::ADD; + unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd) + : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub); + SDValue MAddOps[3] = { + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)), + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn}; + EVT VTs[2] = {MVT::i32, MVT::i32}; + SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); + + SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); + SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); + SDValue Combined = + CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi); + return Combined; +} + +static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + // (sub v0 (mul v1, v2)) => (msub v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); + + return SDValue(); + } + + return SDValue(); } static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { - // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + // (add v0 (mul v1, v2)) => (madd v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); - if (DCI.isBeforeLegalizeOps()) return SDValue(); + } + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) @@ -974,6 +1184,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performAssertZextCombine(N, DAG, DCI, Subtarget); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); + case ISD::SUB: + return performSUBCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -2515,6 +2727,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is available, shadow it and // go to stack. +// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. +// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} +// with the remainder spilled to the stack. +// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases +// spilling the remainder to the stack. // // For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// @@ -2526,8 +2743,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getMachineFunction().getSubtarget()); static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; + + const MipsCCState * MipsState = static_cast(&State); + static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; + static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 }; + // Do not process byval args here. if (ArgFlags.isByVal()) return true; @@ -2565,8 +2787,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getFirstUnallocated(F32Regs) != ValNo; unsigned OrigAlign = ArgFlags.getOrigAlign(); bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8); - - if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { + bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo); + + // The MIPS vector ABI for floats passes them in a pair of registers + if (ValVT == MVT::i32 && isVectorFloat) { + // This is the start of an vector that was scalarized into an unknown number + // of components. It doesn't matter how many there are. Allocate one of the + // notional 8 byte aligned registers which map onto the argument stack, and + // shadow the register lost to alignment requirements. + if (ArgFlags.isSplit()) { + Reg = State.AllocateReg(FloatVectorIntRegs); + if (Reg == Mips::A2) + State.AllocateReg(Mips::A1); + else if (Reg == 0) + State.AllocateReg(Mips::A3); + } else { + // If we're an intermediate component of the split, we can just attempt to + // allocate a register directly. + Reg = State.AllocateReg(IntRegs); + } + } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs); // If this is the first part of an i64 arg, // the allocated register must be either A0 or A2. @@ -2909,6 +3149,20 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT Ty = Callee.getValueType(); bool GlobalOrExternal = false, IsCallReloc = false; + // The long-calls feature is ignored in case of PIC. + // While we do not support -mshared / -mno-shared properly, + // ignore long-calls in case of -mabicalls too. + if (Subtarget.useLongCalls() && !Subtarget.isABICalls() && !IsPIC) { + // Get the address of the callee into a register to prevent + // using of the `jal` instruction for the direct call. + if (auto *N = dyn_cast(Callee)) + Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) + : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); + else if (auto *N = dyn_cast(Callee)) + Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) + : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); + } + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (IsPIC) { const GlobalValue *Val = G->getGlobal(); diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.h b/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.h index 2dcafd51061a2..0e47ed38f4207 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.h +++ b/interpreter/llvm/src/lib/Target/Mips/MipsISelLowering.h @@ -248,6 +248,33 @@ namespace llvm { bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const override; + + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Return the number of registers for a given MVT, ensuring vectors are + /// treated as a series of gpr sized integers. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Break down vectors to the correct number of gpr sized integers. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + + /// Return the correct alignment for the current calling convention. + virtual unsigned + getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override { + if (ArgTy->isVectorTy()) + return std::min(DL.getABITypeAlignment(ArgTy), 8U); + return DL.getABITypeAlignment(ArgTy); + } + ISD::NodeType getExtendForAtomicOps() const override { return ISD::SIGN_EXTEND; } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsInstrFPU.td b/interpreter/llvm/src/lib/Target/Mips/MipsInstrFPU.td index df42d56d041bc..0333fe6520fab 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsInstrFPU.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsInstrFPU.td @@ -443,8 +443,17 @@ let AdditionalPredicates = [NotInMicroMips] in { } def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, bitconvert>, MFC1_FM<0>; +def MFC1_D64 : MFC1_FT<"mfc1", GPR32Opnd, FGR64Opnd, II_MFC1>, MFC1_FM<0>, + FGR_64 { + let DecoderNamespace = "Mips64"; +} def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, MFC1_FM<4>; +def MTC1_D64 : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM<4>, + FGR_64 { + let DecoderNamespace = "Mips64"; +} + let AdditionalPredicates = [NotInMicroMips] in { def MFHC1_D32 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>, MFC1_FM<3>, ISA_MIPS32R2, FGR_32; @@ -557,11 +566,11 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, @@ -569,11 +578,11 @@ let AdditionalPredicates = [NoNaNsFPMath] in { } def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, @@ -582,12 +591,12 @@ let AdditionalPredicates = [NoNaNsFPMath] in { let DecoderNamespace = "Mips64" in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; } -let AdditionalPredicates = [NoNaNsFPMath], +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4], DecoderNamespace = "Mips64" in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; @@ -681,6 +690,29 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd), "trunc.w.d\t$fd, $fs, $rs">, FGR_64, HARDFLOAT; +def LoadImmSingleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), + (ins imm64:$fpimm), + "li.s\t$rd, $fpimm">; + +def LoadImmSingleFGR : MipsAsmPseudoInst<(outs StrictlyFGR32Opnd:$rd), + (ins imm64:$fpimm), + "li.s\t$rd, $fpimm">, + HARDFLOAT; + +def LoadImmDoubleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), + (ins imm64:$fpimm), + "li.d\t$rd, $fpimm">; + +def LoadImmDoubleFGR_32 : MipsAsmPseudoInst<(outs StrictlyAFGR64Opnd:$rd), + (ins imm64:$fpimm), + "li.d\t$rd, $fpimm">, + FGR_32, HARDFLOAT; + +def LoadImmDoubleFGR : MipsAsmPseudoInst<(outs StrictlyFGR64Opnd:$rd), + (ins imm64:$fpimm), + "li.d\t$rd, $fpimm">, + FGR_64, HARDFLOAT; + //===----------------------------------------------------------------------===// // InstAliases. //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.cpp index df62c66b75a32..4adf77f8d9a95 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.cpp @@ -103,12 +103,9 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); for (unsigned i = 1; i < Cond.size(); ++i) { - if (Cond[i].isReg()) - MIB.addReg(Cond[i].getReg()); - else if (Cond[i].isImm()) - MIB.addImm(Cond[i].getImm()); - else - assert(false && "Cannot copy operand"); + assert((Cond[i].isImm() || Cond[i].isReg()) && + "Cannot copy operand for conditional branch!"); + MIB.add(Cond[i]); } MIB.addMBB(TBB); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.td b/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.td index 8761946b8dbb3..89a5854bede0f 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsInstrInfo.td @@ -238,7 +238,10 @@ def HasEVA : Predicate<"Subtarget->hasEVA()">, AssemblerPredicate<"FeatureEVA,FeatureMips32r2">; def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; - +def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, + AssemblerPredicate<"!FeatureMadd4">; +def HasMT : Predicate<"Subtarget->hasMT()">, + AssemblerPredicate<"FeatureMT">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. @@ -380,6 +383,10 @@ class ASE_MSA64 { list InsnPredicates = [HasMSA, HasMips64]; } +class ASE_MT { + list InsnPredicates = [HasMT]; +} + // Class used for separating microMIPSr6 and microMIPS (r3) instruction. // It can be used only on instructions that doesn't inherit PredicateControl. class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl { @@ -390,6 +397,10 @@ class ASE_NOT_DSP { list InsnPredicates = [NotDSP]; } +class MADD4 { + list AdditionalPredicates = [HasMadd4]; +} + //===----------------------------------------------------------------------===// class MipsPat : Pat, PredicateControl { @@ -2913,6 +2924,10 @@ include "MipsMSAInstrInfo.td" include "MipsEVAInstrFormats.td" include "MipsEVAInstrInfo.td" +// MT +include "MipsMTInstrFormats.td" +include "MipsMTInstrInfo.td" + // Micromips include "MicroMipsInstrFormats.td" include "MicroMipsInstrInfo.td" diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsLongBranch.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsLongBranch.cpp index 100503700a720..b95f1158fa562 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsLongBranch.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsLongBranch.cpp @@ -75,9 +75,8 @@ namespace { public: static char ID; - MipsLongBranch(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), IsPIC(TM.isPositionIndependent()), - ABI(static_cast(TM).getABI()) {} + MipsLongBranch() + : MachineFunctionPass(ID), ABI(MipsABIInfo::Unknown()) {} StringRef getPassName() const override { return "Mips Long Branch"; } @@ -96,7 +95,6 @@ namespace { MachineBasicBlock *MBBOpnd); void expandToLongBranch(MBBInfo &Info); - const TargetMachine &TM; MachineFunction *MF; SmallVector MBBInfos; bool IsPIC; @@ -469,6 +467,12 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { static_cast(F.getSubtarget()); const MipsInstrInfo *TII = static_cast(STI.getInstrInfo()); + + + const TargetMachine& TM = F.getTarget(); + IsPIC = TM.isPositionIndependent(); + ABI = static_cast(TM).getABI(); + LongBranchSeqSize = !IsPIC ? 2 : (ABI.IsN64() ? 10 : (!STI.isTargetNaCl() ? 9 : 10)); @@ -541,6 +545,4 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { /// createMipsLongBranchPass - Returns a pass that converts branches to long /// branches. -FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) { - return new MipsLongBranch(tm); -} +FunctionPass *llvm::createMipsLongBranchPass() { return new MipsLongBranch(); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsMTInstrFormats.td b/interpreter/llvm/src/lib/Target/Mips/MipsMTInstrFormats.td new file mode 100644 index 0000000000000..64bee5bfba18e --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Mips/MipsMTInstrFormats.td @@ -0,0 +1,78 @@ +//===-- MipsMTInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe the MIPS MT instructions format +// +// opcode - operation code. +// rt - destination register +// +//===----------------------------------------------------------------------===// + +class MipsMTInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, + PredicateControl { + let DecoderNamespace = "Mips"; + let EncodingPredicates = [HasStdEnc]; +} + +class OPCODE1 Val> { + bits<1> Value = Val; +} + +def OPCODE_SC_D : OPCODE1<0b0>; +def OPCODE_SC_E : OPCODE1<0b1>; + +class FIELD5 Val> { + bits<5> Value = Val; +} + +def FIELD5_1_DMT_EMT : FIELD5<0b00001>; +def FIELD5_2_DMT_EMT : FIELD5<0b01111>; +def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>; + +class COP0_MFMC0_MT : MipsMTInst { + bits<32> Inst; + + bits<5> rt; + let Inst{31-26} = 0b010000; // COP0 + let Inst{25-21} = 0b01011; // MFMC0 + let Inst{20-16} = rt; + let Inst{15-11} = Op1.Value; + let Inst{10-6} = Op2.Value; + let Inst{5} = sc.Value; + let Inst{4-3} = 0b00; + let Inst{2-0} = 0b001; +} + +class SPECIAL3_MT_FORK : MipsMTInst { + bits<32> Inst; + + bits<5> rs; + bits<5> rt; + bits<5> rd; + let Inst{31-26} = 0b011111; // SPECIAL3 + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; + let Inst{5-0} = 0b001000; // FORK +} + +class SPECIAL3_MT_YIELD : MipsMTInst { + bits<32> Inst; + + bits<5> rs; + bits<5> rd; + let Inst{31-26} = 0b011111; // SPECIAL3 + let Inst{25-21} = rs; + let Inst{20-16} = 0b00000; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; + let Inst{5-0} = 0b001001; // FORK +} diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsMTInstrInfo.td b/interpreter/llvm/src/lib/Target/Mips/MipsMTInstrInfo.td new file mode 100644 index 0000000000000..ab6693f60fd94 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Mips/MipsMTInstrInfo.td @@ -0,0 +1,98 @@ +//===-- MipsMTInstrInfo.td - Mips MT Instruction Infos -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Encodings +//===----------------------------------------------------------------------===// + +class DMT_ENC : COP0_MFMC0_MT; + +class EMT_ENC : COP0_MFMC0_MT; + +class DVPE_ENC : COP0_MFMC0_MT; + +class EVPE_ENC : COP0_MFMC0_MT; + +class FORK_ENC : SPECIAL3_MT_FORK; + +class YIELD_ENC : SPECIAL3_MT_YIELD; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Descriptions +//===----------------------------------------------------------------------===// + +class MT_1R_DESC_BASE { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins); + string AsmString = !strconcat(instr_asm, "\t$rt"); + list Pattern = []; + InstrItinClass Itinerary = Itin; +} + +class FORK_DESC { + dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt); + string AsmString = "fork\t$rd, $rs, $rt"; + list Pattern = []; + InstrItinClass Itinerary = II_FORK; +} + +class YIELD_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rs); + string AsmString = "yield\t$rd, $rs"; + list Pattern = []; + InstrItinClass Itinerary = II_YIELD; +} + +class DMT_DESC : MT_1R_DESC_BASE<"dmt", II_DMT>; + +class EMT_DESC : MT_1R_DESC_BASE<"emt", II_EMT>; + +class DVPE_DESC : MT_1R_DESC_BASE<"dvpe", II_DVPE>; + +class EVPE_DESC : MT_1R_DESC_BASE<"evpe", II_EVPE>; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Definitions +//===----------------------------------------------------------------------===// +let hasSideEffects = 1, isNotDuplicable = 1, + AdditionalPredicates = [NotInMicroMips] in { + def DMT : DMT_ENC, DMT_DESC, ASE_MT; + + def EMT : EMT_ENC, EMT_DESC, ASE_MT; + + def DVPE : DVPE_ENC, DVPE_DESC, ASE_MT; + + def EVPE : EVPE_ENC, EVPE_DESC, ASE_MT; + + def FORK : FORK_ENC, FORK_DESC, ASE_MT; + + def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT; +} + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Definitions +//===----------------------------------------------------------------------===// + +let AdditionalPredicates = [NotInMicroMips] in { + def : MipsInstAlias<"dmt", (DMT ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"emt", (EMT ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"dvpe", (DVPE ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT; +} diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsMachineFunction.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsMachineFunction.cpp index 63034ecab93b3..e01c03db22275 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsMachineFunction.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsMachineFunction.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -40,7 +40,11 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { const TargetRegisterClass *RC = STI.inMips16Mode() ? &Mips::CPU16RegsRegClass - : static_cast(MF.getTarget()) + : STI.inMicroMipsMode() + ? STI.hasMips64() + ? &Mips::GPRMM16_64RegClass + : &Mips::GPRMM16RegClass + : static_cast(MF.getTarget()) .getABI() .IsN64() ? &Mips::GPR64RegClass diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp index cf85eb3f24160..ceacaa4983894 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp @@ -10,6 +10,7 @@ #include "Mips.h" #include "MipsTargetMachine.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -22,18 +23,19 @@ namespace { public: static char ID; - explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_) - : MachineFunctionPass(ID), TM(TM_) {} + MipsModuleDAGToDAGISel() : MachineFunctionPass(ID) {} // Pass Name StringRef getPassName() const override { return "MIPS DAG->DAG Pattern Instruction Selection"; } - bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } - protected: - MipsTargetMachine &TM; + bool runOnMachineFunction(MachineFunction &MF) override; }; char MipsModuleDAGToDAGISel::ID = 0; @@ -41,10 +43,12 @@ namespace { bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n"); + auto &TPC = getAnalysis(); + auto &TM = TPC.getTM(); TM.resetSubtarget(&MF); return false; } -llvm::FunctionPass *llvm::createMipsModuleISelDagPass(MipsTargetMachine &TM) { - return new MipsModuleDAGToDAGISel(TM); +llvm::FunctionPass *llvm::createMipsModuleISelDagPass() { + return new MipsModuleDAGToDAGISel(); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsOptimizePICCall.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsOptimizePICCall.cpp index f8d9c34556bcc..79c8395d9dcc4 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "Mips.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/ScopedHashTable.h" @@ -59,7 +59,7 @@ class MBBInfo { class OptimizePICCall : public MachineFunctionPass { public: - OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {} + OptimizePICCall() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Mips OptimizePICCall"; } @@ -297,6 +297,6 @@ void OptimizePICCall::incCntAndSetReg(ValueType Entry, unsigned Reg) { } /// Return an OptimizeCall object. -FunctionPass *llvm::createMipsOptimizePICCallPass(MipsTargetMachine &TM) { - return new OptimizePICCall(TM); +FunctionPass *llvm::createMipsOptimizePICCallPass() { + return new OptimizePICCall(); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsOs16.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsOs16.cpp index 670b6c96e78ef..7ee45c28a7d09 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsOs16.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsOs16.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instructions.h" #include "Mips.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -155,6 +155,4 @@ bool MipsOs16::runOnModule(Module &M) { return modified; } -ModulePass *llvm::createMipsOs16Pass(MipsTargetMachine &TM) { - return new MipsOs16; -} +ModulePass *llvm::createMipsOs16Pass() { return new MipsOs16(); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.cpp index 65be350f259df..de3389b5a6bf5 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.cpp @@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" - << "stackSize : " << stackSize << "\n"); + << "stackSize : " << stackSize << "\n" + << "alignment : " + << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n"); eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.td b/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.td index ccfdcc89b078a..08fb3d7d43525 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsRegisterInfo.td @@ -552,16 +552,31 @@ def AFGR64AsmOperand : MipsAsmRegOperand { let PredicateMethod = "isFGRAsmReg"; } +def StrictlyAFGR64AsmOperand : MipsAsmRegOperand { + let Name = "StrictlyAFGR64AsmReg"; + let PredicateMethod = "isStrictlyFGRAsmReg"; +} + def FGR64AsmOperand : MipsAsmRegOperand { let Name = "FGR64AsmReg"; let PredicateMethod = "isFGRAsmReg"; } +def StrictlyFGR64AsmOperand : MipsAsmRegOperand { + let Name = "StrictlyFGR64AsmReg"; + let PredicateMethod = "isStrictlyFGRAsmReg"; +} + def FGR32AsmOperand : MipsAsmRegOperand { let Name = "FGR32AsmReg"; let PredicateMethod = "isFGRAsmReg"; } +def StrictlyFGR32AsmOperand : MipsAsmRegOperand { + let Name = "StrictlyFGR32AsmReg"; + let PredicateMethod = "isStrictlyFGRAsmReg"; +} + def FGRH32AsmOperand : MipsAsmRegOperand { let Name = "FGRH32AsmReg"; let PredicateMethod = "isFGRAsmReg"; @@ -639,14 +654,26 @@ def AFGR64Opnd : RegisterOperand { let ParserMatchClass = AFGR64AsmOperand; } +def StrictlyAFGR64Opnd : RegisterOperand { + let ParserMatchClass = StrictlyAFGR64AsmOperand; +} + def FGR64Opnd : RegisterOperand { let ParserMatchClass = FGR64AsmOperand; } +def StrictlyFGR64Opnd : RegisterOperand { + let ParserMatchClass = StrictlyFGR64AsmOperand; +} + def FGR32Opnd : RegisterOperand { let ParserMatchClass = FGR32AsmOperand; } +def StrictlyFGR32Opnd : RegisterOperand { + let ParserMatchClass = StrictlyFGR32AsmOperand; +} + def FGRCCOpnd : RegisterOperand { // The assembler doesn't use register classes so we can re-use // FGR32AsmOperand. diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsSEFrameLowering.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsSEFrameLowering.cpp index e765b4625206f..102ebb21609aa 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsSEFrameLowering.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSEFrameLowering.h" #include "MipsSEInstrInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/BitVector.h" diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index c9cf9363b8c96..4be26dd25dc04 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -24,11 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Dominators.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -245,46 +245,64 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { } } -void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, - SDValue CmpLHS, const SDLoc &DL, - SDNode *Node) const { - unsigned Opc = InFlag.getOpcode(); (void)Opc; - - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; - if (Subtarget->isGP64bit()) { - SLTuOp = Mips::SLTu64; - ADDuOp = Mips::DADDu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; +void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { + SDValue InFlag = Node->getOperand(2); + unsigned Opc = InFlag.getOpcode(); SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); - - if (Subtarget->isGP64bit()) { - // On 64-bit targets, sltu produces an i64 but our backend currently says - // that SLTu64 produces an i32. We need to fix this in the long run but for - // now, just make the DAG type-correct by asserting the upper bits are zero. - Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, - CurDAG->getTargetConstant(0, DL, VT), - SDValue(Carry, 0), - CurDAG->getTargetConstant(Mips::sub_32, DL, - VT)); + // In the base case, we can rely on the carry bit from the addsc + // instruction. + if (Opc == ISD::ADDC) { + SDValue Ops[3] = {LHS, RHS, InFlag}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops); + return; } - // Generate a second addition only if we know that RHS is not a - // constant-zero node. - SDNode *AddCarry = Carry; - ConstantSDNode *C = dyn_cast(RHS); - if (!C || C->getZExtValue()) - AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); + assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!"); + + // The more complex case is when there is a chain of ISD::ADDE nodes like: + // (adde (adde (adde (addc a b) c) d) e). + // + // The addwc instruction does not write to the carry bit, instead it writes + // to bit 20 of the dsp control register. To match this series of nodes, each + // intermediate adde node must be expanded to write the carry bit before the + // addition. + + // Start by reading the overflow field for addsc and moving the value to the + // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP + // corresponds to reading/writing the entire control register to/from a GPR. + + SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32); + + SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32); - CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); + SDNode *DSPCtrlField = + CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag); + + SDNode *Carry = CurDAG->getMachineNode( + Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne); + + SDValue Ops[4] = {SDValue(DSPCtrlField, 0), + CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne, + SDValue(Carry, 0)}; + SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); + + // My reading of the the MIPS DSP 3.01 specification isn't as clear as I + // would like about whether bit 20 always gets overwritten by addwc. + // Hence take an extremely conservative view and presume it's sticky. We + // therefore need to clear it. + + SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); + + SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)}; + SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps); + + SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue, + SDValue(DSPCtrlFinal, 0), CstOne); + + SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands); } /// Match frameindex @@ -765,19 +783,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { switch(Opcode) { default: break; - case ISD::SUBE: { - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu; - selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node); - return true; - } - case ISD::ADDE: { - if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. - break; - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu; - selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node); + selectAddE(Node, DL); return true; } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.h b/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.h index f89a350cab044..6f38289c5a457 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/interpreter/llvm/src/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -41,8 +41,7 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { const SDLoc &dl, EVT Ty, bool HasLo, bool HasHi); - void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, - const SDLoc &DL, SDNode *Node) const; + void selectAddE(SDNode *Node, const SDLoc &DL) const; bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsSEISelLowering.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsSEISelLowering.cpp index bf7f079e31052..72b2738bfac44 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsSEISelLowering.cpp @@ -179,8 +179,6 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setTargetDAGCombine(ISD::ADDE); - setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -421,163 +419,6 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, return MipsTargetLowering::LowerOperation(Op, DAG); } -// selectMADD - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc multLo, Lo0), (adde multHi, Hi0), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { - // ADDENode's second operand must be a flag output of an ADDC node in order - // for the matching to be successful. - SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); - - if (ADDCNode->getOpcode() != ISD::ADDC) - return false; - - SDValue MultHi = ADDENode->getOperand(0); - SDValue MultLo = ADDCNode->getOperand(0); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MADD only if ADDENode and ADDCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than ADDENode or ADDCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MADD instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(ADDENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - ADDCNode->getOperand(1), - ADDENode->getOperand(1)); - - // create MipsMAdd(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - - SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of adde and addc here - if (!SDValue(ADDCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); - } - if (!SDValue(ADDENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); - } - - return true; -} - -// selectMSUB - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc Lo0, multLo), (sube Hi0, multHi), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { - // SUBENode's second operand must be a flag output of an SUBC node in order - // for the matching to be successful. - SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); - - if (SUBCNode->getOpcode() != ISD::SUBC) - return false; - - SDValue MultHi = SUBENode->getOperand(1); - SDValue MultLo = SUBCNode->getOperand(1); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MSUB only if SUBENode and SUBCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than SUBENode or SUBCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MSUB instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(SUBENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - SUBCNode->getOperand(0), - SUBENode->getOperand(0)); - - // create MipsSub(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - - SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of sube and subc here - if (!SDValue(SUBCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); - } - if (!SDValue(SUBENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); - } - - return true; -} - -static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT // // Performs the following transformations: @@ -820,19 +661,6 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && - selectMSUB(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Clear the upper (64 - VT.sizeInBits) bits. @@ -1110,16 +938,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SDValue Val; switch (N->getOpcode()) { - case ISD::ADDE: - return performADDECombine(N, DAG, DCI, Subtarget); case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; case ISD::OR: Val = performORCombine(N, DAG, DCI, Subtarget); break; - case ISD::SUBE: - return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: return performMULCombine(N, DAG, DCI, this); case ISD::SHL: @@ -1433,19 +1257,22 @@ static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { EVT ResVecTy = Op->getValueType(0); EVT ViaVecTy = ResVecTy; + bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); SDLoc DL(Op); // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating // lanes. - SDValue LaneA; - SDValue LaneB = Op->getOperand(2); + SDValue LaneA = Op->getOperand(OpNr); + SDValue LaneB; if (ResVecTy == MVT::v2i64) { - LaneA = DAG.getConstant(0, DL, MVT::i32); + LaneB = DAG.getConstant(0, DL, MVT::i32); ViaVecTy = MVT::v4i32; + if(BigEndian) + std::swap(LaneA, LaneB); } else - LaneA = LaneB; + LaneB = LaneA; SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; @@ -1453,8 +1280,11 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { SDValue Result = DAG.getBuildVector( ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); - if (ViaVecTy != ResVecTy) - Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); + if (ViaVecTy != ResVecTy) { + SDValue One = DAG.getConstant(1, DL, ViaVecTy); + Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, + DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); + } return Result; } @@ -3590,9 +3420,17 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass : &Mips::GPR64RegClass); const bool UsingMips32 = RC == &Mips::GPR32RegClass; - unsigned Rs = RegInfo.createVirtualRegister(RC); + unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); + if(!UsingMips32) { + unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) + .addImm(0) + .addReg(Rs) + .addImm(Mips::sub_32); + Rs = Tmp; + } BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) .addReg(Rs) .addReg(Rt) @@ -3643,6 +3481,12 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, for (unsigned i = 1; i < MI.getNumOperands(); i++) MIB.add(MI.getOperand(i)); + if(!UsingMips32) { + unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); + Rt = Tmp; + } + BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); MI.eraseFromParent(); @@ -3710,6 +3554,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; + bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); @@ -3720,7 +3565,9 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); const TargetRegisterClass *GPRRC = IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1; + unsigned MFC1Opc = IsFGR64onMips64 + ? Mips::DMFC1 + : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; // Perform the register class copy as mentioned above. @@ -3729,7 +3576,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); unsigned WPHI = Wtemp; - if (!Subtarget.hasMips64() && IsFGR64) { + if (IsFGR64onMips32) { unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); @@ -3823,7 +3670,9 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *GPRRC = IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1; + unsigned MTC1Opc = IsFGR64onMips64 + ? Mips::DMTC1 + : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsSchedule.td b/interpreter/llvm/src/lib/Target/Mips/MipsSchedule.td index c0de59ba15f55..c2947bb44ef58 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsSchedule.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsSchedule.td @@ -84,6 +84,7 @@ def II_DIVU : InstrItinClass; def II_DIV_D : InstrItinClass; def II_DIV_S : InstrItinClass; def II_DMFC0 : InstrItinClass; +def II_DMT : InstrItinClass; def II_DMTC0 : InstrItinClass; def II_DMFC1 : InstrItinClass; def II_DMTC1 : InstrItinClass; @@ -113,8 +114,12 @@ def II_DSBH : InstrItinClass; def II_DSHD : InstrItinClass; def II_DSUBU : InstrItinClass; def II_DSUB : InstrItinClass; +def II_DVPE : InstrItinClass; +def II_EMT : InstrItinClass; +def II_EVPE : InstrItinClass; def II_EXT : InstrItinClass; // Any EXT instruction def II_FLOOR : InstrItinClass; +def II_FORK : InstrItinClass; def II_INS : InstrItinClass; // Any INS instruction def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo. def II_J : InstrItinClass; @@ -345,6 +350,7 @@ def II_WRPGPR : InstrItinClass; def II_RDPGPR : InstrItinClass; def II_DVP : InstrItinClass; def II_EVP : InstrItinClass; +def II_YIELD : InstrItinClass; //===----------------------------------------------------------------------===// // Mips Generic instruction itineraries. @@ -386,6 +392,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -404,7 +411,11 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -670,5 +681,6 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]> ]>; diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsScheduleGeneric.td b/interpreter/llvm/src/lib/Target/Mips/MipsScheduleGeneric.td index 15a0401b781e5..89cda676441e7 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsScheduleGeneric.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsScheduleGeneric.td @@ -187,7 +187,11 @@ def GenericIssueCOP0 : ProcResource<1> { let Super = GenericCOP0; } def GenericWriteCOP0TLB : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 4; } def GenericWriteCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 3; } def GenericReadCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 2; } -def GnereicReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>; +def GenericReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>; +def GenericReadWriteCOP0Long : SchedWriteRes<[GenericIssueCOP0]> { + let Latency = 5; +} +def GenericWriteCOP0Short : SchedWriteRes<[GenericIssueCOP0]>; def : ItinRW<[GenericWriteCOP0TLB], [II_TLBP, II_TLBR, II_TLBWI, II_TLBWR]>; def : ItinRW<[GenericWriteCOP0TLB], [II_TLBINV, II_TLBINVF]>; @@ -261,6 +265,14 @@ def : ItinRW<[GenericWriteLoad], [II_LBE, II_LBUE, II_LHE, II_LHUE, II_LWE, def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>; +// MIPS MT instructions +// ==================== + +def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE]>; + +def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>; +def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>; + // MIPS32R6 and MIPS16e // ==================== diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsScheduleP5600.td b/interpreter/llvm/src/lib/Target/Mips/MipsScheduleP5600.td index 882a241d1426a..fedfac24e4e74 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsScheduleP5600.td +++ b/interpreter/llvm/src/lib/Target/Mips/MipsScheduleP5600.td @@ -19,7 +19,7 @@ def MipsP5600Model : SchedMachineModel { HasMips64, HasMips64r2, HasCnMips, InMicroMips, InMips16Mode, HasMicroMips32r6, HasMicroMips64r6, - HasDSP, HasDSPR2]; + HasDSP, HasDSPR2, HasMT]; } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.cpp index 8f5ecadecdea2..eba21e0a1c672 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "MipsMachineFunction.h" +#include "MipsSubtarget.h" #include "Mips.h" +#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -59,9 +59,8 @@ static cl::opt void MipsSubtarget::anchor() { } -MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, bool little, - const MipsTargetMachine &TM) +MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + bool little, const MipsTargetMachine &TM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault), IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true), @@ -71,14 +70,13 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), - HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(), + HasEVA(false), DisableMadd4(false), HasMT(false), TM(TM), + TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(TM, *this)) { - PreviousInMips16Mode = InMips16Mode; - if (MipsArchVersion == MipsDefault) MipsArchVersion = Mips32; diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.h b/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.h index cca2cb8a46608..cce3b8c4c8d18 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.h +++ b/interpreter/llvm/src/lib/Target/Mips/MipsSubtarget.h @@ -78,7 +78,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // IsNan2008 - IEEE 754-2008 NaN encoding. bool IsNaN2008bit; - // IsFP64bit - General-purpose registers are 64 bits wide + // IsGP64bit - General-purpose registers are 64 bits wide bool IsGP64bit; // IsPTR64bit - Pointers are 64 bit wide @@ -119,9 +119,6 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // Mips16 hard float bool InMips16HardFloat; - // PreviousInMips16 -- the function we just processed was in Mips 16 Mode - bool PreviousInMips16Mode; - // InMicroMips -- can process MicroMips instructions bool InMicroMipsMode; @@ -147,6 +144,16 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasEVA -- supports EVA ASE. bool HasEVA; + + // nomadd4 - disables generation of 4-operand madd.s, madd.d and + // related instructions. + bool DisableMadd4; + + // HasMT -- support MT ASE. + bool HasMT; + + // Disable use of the `jal` instruction. + bool UseLongCalls = false; InstrItineraryData InstrItins; @@ -178,8 +185,8 @@ class MipsSubtarget : public MipsGenSubtargetInfo { /// This constructor initializes the data members to match that /// of the specified triple. - MipsSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - bool little, const MipsTargetMachine &TM); + MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little, + const MipsTargetMachine &TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -256,13 +263,17 @@ class MipsSubtarget : public MipsGenSubtargetInfo { bool hasDSPR2() const { return HasDSPR2; } bool hasDSPR3() const { return HasDSPR3; } bool hasMSA() const { return HasMSA; } + bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } + bool hasMT() const { return HasMT; } bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } bool useSoftFloat() const { return IsSoftFloat; } + bool useLongCalls() const { return UseLongCalls; } + bool enableLongBranchPass() const { return hasStandardEncoding() || allowMixed16_32(); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.cpp index 29a38fd35c1f8..330ae19ecd0f1 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsTargetMachine.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "Mips.h" @@ -18,7 +19,6 @@ #include "MipsSEISelDAGToDAG.h" #include "MipsSubtarget.h" #include "MipsTargetObjectFile.h" -#include "MipsTargetMachine.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -154,6 +154,11 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const { bool hasNoMips16Attr = !F.getFnAttribute("nomips16").hasAttribute(Attribute::None); + bool HasMicroMipsAttr = + !F.getFnAttribute("micromips").hasAttribute(Attribute::None); + bool HasNoMicroMipsAttr = + !F.getFnAttribute("nomicromips").hasAttribute(Attribute::None); + // FIXME: This is related to the code below to reset the target options, // we need to know whether or not the soft float flag is set on the // function, so we can enable it as a subtarget feature. @@ -165,6 +170,10 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const { FS += FS.empty() ? "+mips16" : ",+mips16"; else if (hasNoMips16Attr) FS += FS.empty() ? "-mips16" : ",-mips16"; + if (HasMicroMipsAttr) + FS += FS.empty() ? "+micromips" : ",+micromips"; + else if (HasNoMicroMipsAttr) + FS += FS.empty() ? "-micromips" : ",-micromips"; if (softFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; @@ -192,7 +201,7 @@ namespace { /// Mips Code Generator Pass Configuration Options. class MipsPassConfig : public TargetPassConfig { public: - MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM) + MipsPassConfig(MipsTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { // The current implementation of long branch pass requires a scratch // register ($at) to be available before branch instructions. Tail merging @@ -218,28 +227,28 @@ class MipsPassConfig : public TargetPassConfig { } // end anonymous namespace TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) { - return new MipsPassConfig(this, PM); + return new MipsPassConfig(*this, PM); } void MipsPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); - addPass(createAtomicExpandPass(&getMipsTargetMachine())); + addPass(createAtomicExpandPass()); if (getMipsSubtarget().os16()) - addPass(createMipsOs16Pass(getMipsTargetMachine())); + addPass(createMipsOs16Pass()); if (getMipsSubtarget().inMips16HardFloat()) - addPass(createMips16HardFloatPass(getMipsTargetMachine())); + addPass(createMips16HardFloatPass()); } // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsPassConfig::addInstSelector() { - addPass(createMipsModuleISelDagPass(getMipsTargetMachine())); + addPass(createMipsModuleISelDagPass()); addPass(createMips16ISelDag(getMipsTargetMachine(), getOptLevel())); addPass(createMipsSEISelDag(getMipsTargetMachine(), getOptLevel())); return false; } void MipsPassConfig::addPreRegAlloc() { - addPass(createMipsOptimizePICCallPass(getMipsTargetMachine())); + addPass(createMipsOptimizePICCallPass()); } TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { @@ -259,15 +268,14 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. void MipsPassConfig::addPreEmitPass() { - MipsTargetMachine &TM = getMipsTargetMachine(); addPass(createMicroMipsSizeReductionPass()); // The delay slot filler pass can potientially create forbidden slot (FS) // hazards for MIPSR6 which the hazard schedule pass (HSP) will fix. Any // (new) pass that creates compact branches after the HSP must handle FS // hazards itself or be pipelined before the HSP. - addPass(createMipsDelaySlotFillerPass(TM)); + addPass(createMipsDelaySlotFillerPass()); addPass(createMipsHazardSchedule()); - addPass(createMipsLongBranchPass(TM)); + addPass(createMipsLongBranchPass()); addPass(createMipsConstantIslandPass()); } diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.h b/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.h index 140d7133f879b..a3462868cb111 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/Mips/MipsTargetMachine.h @@ -66,6 +66,10 @@ class MipsTargetMachine : public LLVMTargetMachine { bool isLittleEndian() const { return isLittle; } const MipsABIInfo &getABI() const { return ABI; } + + bool isMachineVerifierClean() const override { + return false; + } }; /// Mips32/64 big endian target machine. diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/Mips/MipsTargetObjectFile.cpp index c5d6a05d66119..4d73c3991035e 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Mips/MipsTargetStreamer.h b/interpreter/llvm/src/lib/Target/Mips/MipsTargetStreamer.h index 41ebe411b98d9..7d9f99ce071e8 100644 --- a/interpreter/llvm/src/lib/Target/Mips/MipsTargetStreamer.h +++ b/interpreter/llvm/src/lib/Target/Mips/MipsTargetStreamer.h @@ -40,6 +40,8 @@ class MipsTargetStreamer : public MCTargetStreamer { virtual void emitDirectiveSetNoMacro(); virtual void emitDirectiveSetMsa(); virtual void emitDirectiveSetNoMsa(); + virtual void emitDirectiveSetMt(); + virtual void emitDirectiveSetNoMt(); virtual void emitDirectiveSetAt(); virtual void emitDirectiveSetAtWithArg(unsigned RegNo); virtual void emitDirectiveSetNoAt(); @@ -96,6 +98,7 @@ class MipsTargetStreamer : public MCTargetStreamer { virtual void emitDirectiveModuleOddSPReg(); virtual void emitDirectiveModuleSoftFloat(); virtual void emitDirectiveModuleHardFloat(); + virtual void emitDirectiveModuleMT(); virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value); virtual void emitDirectiveSetOddSPReg(); virtual void emitDirectiveSetNoOddSPReg(); @@ -204,6 +207,8 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { void emitDirectiveSetNoMacro() override; void emitDirectiveSetMsa() override; void emitDirectiveSetNoMsa() override; + void emitDirectiveSetMt() override; + void emitDirectiveSetNoMt() override; void emitDirectiveSetAt() override; void emitDirectiveSetAtWithArg(unsigned RegNo) override; void emitDirectiveSetNoAt() override; @@ -267,6 +272,7 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { void emitDirectiveModuleOddSPReg() override; void emitDirectiveModuleSoftFloat() override; void emitDirectiveModuleHardFloat() override; + void emitDirectiveModuleMT() override; void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override; void emitDirectiveSetOddSPReg() override; void emitDirectiveSetNoOddSPReg() override; diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 58cb7793d0407..0139646fc3f75 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXAsmPrinter.h" #include "InstPrinter/NVPTXInstPrinter.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" -#include "NVPTXAsmPrinter.h" #include "NVPTXMCExpr.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXRegisterInfo.h" @@ -73,8 +73,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 390776212ce7e..916b0e1156640 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "MCTargetDesc/NVPTXBaseInfo.h" +#include "NVPTX.h" #include "NVPTXUtilities.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constants.h" diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXISelLowering.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXISelLowering.cpp index 57ce8fe8daa45..9070b628f12e3 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXISelLowering.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" -#include "NVPTXISelLowering.h" #include "NVPTXSection.h" #include "NVPTXSubtarget.h" #include "NVPTXTargetMachine.h" @@ -62,7 +62,6 @@ #include #include -#undef DEBUG_TYPE #define DEBUG_TYPE "nvptx-lower" using namespace llvm; @@ -1548,7 +1547,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getMemIntrinsicNode( Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, - TheStoreType, MachinePointerInfo(), EltAlign); + TheStoreType, MachinePointerInfo(), EltAlign, + /* Volatile */ false, /* ReadMem */ false, + /* WriteMem */ true, /* Size */ 0); InFlag = Chain.getValue(1); // Cleanup. @@ -1608,7 +1609,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, theVal, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, - MachinePointerInfo()); + MachinePointerInfo(), /* Align */ 0, + /* Volatile */ false, /* ReadMem */ false, + /* WriteMem */ true, /* Size */ 0); InFlag = Chain.getValue(1); } @@ -1794,7 +1797,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag}; SDValue RetVal = DAG.getMemIntrinsicNode( Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType, - MachinePointerInfo(), EltAlign); + MachinePointerInfo(), EltAlign, /* Volatile */ false, + /* ReadMem */ true, /* WriteMem */ false, /* Size */ 0); for (unsigned j = 0; j < NumElts; ++j) { SDValue Ret = RetVal.getValue(j); @@ -2451,7 +2455,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // v2f16 was loaded as an i32. Now we must bitcast it back. else if (EltVT == MVT::v2f16) Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); - // Extend the element if necesary (e.g. an i8 is loaded + // Extend the element if necessary (e.g. an i8 is loaded // into an i16 register) if (Ins[InsIdx].VT.isInteger() && Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) { diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 0f6c2e53e60ad..da563f0531d43 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "NVPTXInstrInfo.h" +#include "NVPTX.h" #include "NVPTXTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 3be291b48b8f2..989f0a3aba2f6 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -42,6 +43,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); + AU.addRequired(); } bool runOnFunction(Function &F) override; @@ -61,6 +63,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); + const TargetTransformInfo &TTI = + getAnalysis().getTTI(F); // Collect all aggregate loads and mem* calls. for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { @@ -104,15 +108,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { Value *SrcAddr = LI->getOperand(0); Value *DstAddr = SI->getOperand(1); unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); - Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); - - createMemCpyLoop(/* ConvertedInst */ SI, - /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, - /* CopyLen */ CopyLen, - /* SrcAlign */ LI->getAlignment(), - /* DestAlign */ SI->getAlignment(), - /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile()); + ConstantInt *CopyLen = + ConstantInt::get(Type::getInt32Ty(Context), NumLoads); + + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile()); + } else { + createMemCpyLoopKnownSize(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), TTI); + } SI->eraseFromParent(); LI->eraseFromParent(); @@ -121,7 +136,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // Transform mem* intrinsic calls. for (MemIntrinsic *MemCall : MemCalls) { if (MemCpyInst *Memcpy = dyn_cast(MemCall)) { - expandMemCpyAsLoop(Memcpy); + expandMemCpyAsLoop(Memcpy, TTI); } else if (MemMoveInst *Memmove = dyn_cast(MemCall)) { expandMemMoveAsLoop(Memmove); } else if (MemSetInst *Memset = dyn_cast(MemCall)) { diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerArgs.cpp index e858b37e1843b..139dc7fbeeda1 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -90,8 +90,8 @@ //===----------------------------------------------------------------------===// #include "NVPTX.h" -#include "NVPTXUtilities.h" #include "NVPTXTargetMachine.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXPeephole.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXPeephole.cpp index e10b046f7c97a..4e902c0fb5073 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -36,8 +36,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ab5298d0dcfd6..2b6ba8c85d4d1 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXTargetMachine.h" #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" -#include "NVPTXTargetMachine.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" #include "llvm/ADT/STLExtras.h" @@ -132,7 +132,7 @@ namespace { class NVPTXPassConfig : public TargetPassConfig { public: - NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) + NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} NVPTXTargetMachine &getNVPTXTargetMachine() const { @@ -163,7 +163,7 @@ class NVPTXPassConfig : public TargetPassConfig { } // end anonymous namespace TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { - return new NVPTXPassConfig(this, PM); + return new NVPTXPassConfig(*this, PM); } void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.h b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.h index 1ed8e3b1e9357..2f3981be22f83 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -65,6 +65,9 @@ class NVPTXTargetMachine : public LLVMTargetMachine { TargetIRAnalysis getTargetIRAnalysis() override; + bool isMachineVerifierClean() const override { + return false; + } }; // NVPTXTargetMachine. class NVPTXTargetMachine32 : public NVPTXTargetMachine { diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index dd77070849486..a64d95512a4ab 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -141,9 +141,9 @@ int NVPTXTTIImpl::getArithmeticInstrCost( } } -void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, +void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { - BaseT::getUnrollingPreferences(L, UP); + BaseT::getUnrollingPreferences(L, SE, UP); // Enable partial unrolling and runtime unrolling, but reduce the // threshold. This partially unrolls small loops which are often diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 03075b550429e..f987892ba6758 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -61,7 +61,8 @@ class NVPTXTTIImpl : public BasicTTIImplBase { TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef()); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); }; } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/NVPTX/NVVMIntrRange.cpp b/interpreter/llvm/src/lib/Target/NVPTX/NVVMIntrRange.cpp index 9c71a2ee165b3..11277f5ba5966 100644 --- a/interpreter/llvm/src/lib/Target/NVPTX/NVVMIntrRange.cpp +++ b/interpreter/llvm/src/lib/Target/NVPTX/NVVMIntrRange.cpp @@ -15,8 +15,8 @@ #include "NVPTX.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Nios2/CMakeLists.txt b/interpreter/llvm/src/lib/Target/Nios2/CMakeLists.txt new file mode 100644 index 0000000000000..78db452094bd0 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_TARGET_DEFINITIONS Nios2.td) + +#Generate Nios2GenRegisterInfo.inc and Nios2GenInstrInfo.inc which included by +#your hand code C++ files. +#Nios2GenRegisterInfo.inc came from Nios2RegisterInfo.td, Nios2GenInstrInfo.inc +#came from Nios2InstrInfo.td. +tablegen(LLVM Nios2GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM Nios2GenInstrInfo.inc -gen-instr-info) + +#Nios2CommonTableGen must be defined +add_public_tablegen_target(Nios2CommonTableGen) + +#Nios2CodeGen should match with LLVMBuild.txt Nios2CodeGen +add_llvm_target(Nios2CodeGen Nios2TargetMachine.cpp) + +#Should match with "subdirectories = MCTargetDesc TargetInfo" in LLVMBuild.txt +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/interpreter/llvm/src/lib/Target/Nios2/LLVMBuild.txt b/interpreter/llvm/src/lib/Target/Nios2/LLVMBuild.txt new file mode 100644 index 0000000000000..b40a763797065 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/LLVMBuild.txt @@ -0,0 +1,61 @@ +;===- ./lib/Target/Nios2/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +#Following comments extracted from http: // llvm.org/docs/LLVMBuild.html + +[common] +subdirectories = + MCTargetDesc + TargetInfo + +[component_0] +#TargetGroup components are an extension of LibraryGroups, specifically for +#defining LLVM targets(which are handled specially in a few places). +type = TargetGroup +#The name of the component should always be the name of the target.(should +#match "def Nios2 : Target" in Nios2.td) +name = Nios2 +#Nios2 component is located in directory Target / +parent = Target +#Whether this target defines an assembly parser, assembly printer, disassembler +#, and supports JIT compilation.They are optional. + +[component_1] +#component_1 is a Library type and name is Nios2CodeGen.After build it will +#in lib / libLLVMNios2CodeGen.a of your build command directory. +type = Library +name = Nios2CodeGen +#Nios2CodeGen component(Library) is located in directory Nios2 / +parent = Nios2 +#If given, a list of the names of Library or LibraryGroup components which +#must also be linked in whenever this library is used.That is, the link time +#dependencies for this component.When tools are built, the build system will +#include the transitive closure of all required_libraries for the components +#the tool needs. +required_libraries = CodeGen + Core + GlobalISel + MC + Nios2Desc + Nios2Info + Support + Target +#end of required_libraries + +#All LLVMBuild.txt in Target / Nios2 and subdirectory use 'add_to_library_groups +#= Nios2' +add_to_library_groups = Nios2 diff --git a/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000000000..21def509a2324 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,2 @@ +#MCTargetDesc / CMakeLists.txt +add_llvm_library(LLVMNios2Desc Nios2MCTargetDesc.cpp) diff --git a/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000000000..4dc6995e7f5c6 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,25 @@ +;===- ./lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Nios2Desc +parent = Nios2 +required_libraries = MC + Nios2Info + Support +add_to_library_groups = Nios2 diff --git a/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp new file mode 100644 index 0000000000000..d913166399c67 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp @@ -0,0 +1,25 @@ +//===-- Nios2MCTargetDesc.cpp - Nios2 Target Descriptions -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Nios2 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "Nios2MCTargetDesc.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "Nios2GenInstrInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "Nios2GenRegisterInfo.inc" + +extern "C" void LLVMInitializeNios2TargetMC() {} diff --git a/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h new file mode 100644 index 0000000000000..d426062db1683 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h @@ -0,0 +1,34 @@ +//===-- Nios2MCTargetDesc.h - Nios2 Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Nios2 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H + +namespace llvm { +class Target; +class Triple; + +Target &getTheNios2Target(); + +} // namespace llvm + +// Defines symbolic names for Nios2 registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "Nios2GenRegisterInfo.inc" + +// Defines symbolic names for the Nios2 instructions. +#define GET_INSTRINFO_ENUM +#include "Nios2GenInstrInfo.inc" + +#endif diff --git a/interpreter/llvm/src/lib/Target/Nios2/Nios2.h b/interpreter/llvm/src/lib/Target/Nios2/Nios2.h new file mode 100644 index 0000000000000..87202f48cfbe1 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/Nios2.h @@ -0,0 +1,25 @@ +//===-- Nios2.h - Top-level interface for Nios2 representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM Nios2 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2_H + +#include "MCTargetDesc/Nios2MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Nios2TargetMachine; +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/lib/Target/Nios2/Nios2.td b/interpreter/llvm/src/lib/Target/Nios2/Nios2.td new file mode 100644 index 0000000000000..e8abba8633708 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/Nios2.td @@ -0,0 +1,29 @@ +//===-- Nios2.td - Describe the Nios2 Target Machine -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Target-dependent interfaces +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "Nios2RegisterInfo.td" +include "Nios2InstrInfo.td" + +def Nios2InstrInfo : InstrInfo; + +def Nios2 : Target { let InstructionSet = Nios2InstrInfo; } diff --git a/interpreter/llvm/src/lib/Target/Nios2/Nios2InstrFormats.td b/interpreter/llvm/src/lib/Target/Nios2/Nios2InstrFormats.td new file mode 100644 index 0000000000000..79868be48a488 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/Nios2InstrFormats.td @@ -0,0 +1,117 @@ +//===-- Nios2InstrFormats.td - Nios2 Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe NIOS2 instructions format +// +// +//===----------------------------------------------------------------------===// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<3> Value = val; +} + +def Pseudo : Format<0>; +def FrmI : Format<1>; +def FrmR : Format<2>; +def FrmJ : Format<3>; +def FrmOther : Format<4>; // Instruction w/ a custom format + +// Generic Nios2 Format +class Nios2Inst pattern, Format f> + : Instruction { + field bits<32> Inst; + Format Form = f; + + let Namespace = "Nios2"; + + let Size = 4; + + bits<6> Opcode = 0; + + // Bottom 6 bits are the 'opcode' field + let Inst{5 - 0} = Opcode; + + let OutOperandList = outs; + let InOperandList = ins; + + let AsmString = asmstr; + let Pattern = pattern; + + // + // Attributes specific to Nios2 instructions: + // + bits<3> FormBits = Form.Value; + + // TSFlags layout should be kept in sync with Nios2InstrInfo.h. + let TSFlags{2 - 0} = FormBits; + + let DecoderNamespace = "Nios2"; +} + +// Nios2 Instruction Format +class InstSE pattern, Format f> + : Nios2Inst { +} + +//===----------------------------------------------------------------------===// +// Format I instruction class in Nios2 : <|A|B|immediate|opcode|> +//===----------------------------------------------------------------------===// + +class FI op, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<5> rA; + bits<5> rB; + bits<16> imm; + + let Opcode = op; + + let Inst{31 - 27} = rA; + let Inst{26 - 22} = rB; + let Inst{21 - 6} = imm; +} + +//===----------------------------------------------------------------------===// +// Format R instruction : <|A|B|C|opx|imm|opcode|> +//===----------------------------------------------------------------------===// + +class FR opx, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<5> rA; + bits<5> rB; + bits<5> rC; + bits<5> imm = 0; + + // opcode is always 0x3a for R instr. + let Opcode = 0x3a; + + let Inst{31 - 27} = rA; + let Inst{26 - 22} = rB; + let Inst{21 - 17} = rC; + // opx stands for opcode extension + let Inst{16 - 11} = opx; + // optional 5-bit immediate value + let Inst{10 - 6} = imm; +} + +//===----------------------------------------------------------------------===// +// Format J instruction class in Nios2 : <|address|opcode|> +//===----------------------------------------------------------------------===// + +class FJ op, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<26> addr; + + let Opcode = op; + + let Inst{31 - 6} = addr; +} diff --git a/interpreter/llvm/src/lib/Target/Nios2/Nios2InstrInfo.td b/interpreter/llvm/src/lib/Target/Nios2/Nios2InstrInfo.td new file mode 100644 index 0000000000000..5e4815ab3e16f --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/Nios2InstrInfo.td @@ -0,0 +1,50 @@ +//===- Nios2InstrInfo.td - Target Description for Nios2 ------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Nios2 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "Nios2InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Nios2 Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +def simm16 : Operand { + let DecoderMethod= "DecodeSimm16"; +} + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi +def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// + +// Arithmetic and logical instructions with 2 register operands. +class ArithLogicI op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type, RegisterClass RC> : + FI { + let isReMaterializable = 1; +} + +//===----------------------------------------------------------------------===// +// Nios2 R1 Instructions +//===----------------------------------------------------------------------===// + +/// Arithmetic Instructions (ALU Immediate) +def ADDi : ArithLogicI<0x04, "addi", add, simm16, immSExt16, CPURegs>; diff --git a/interpreter/llvm/src/lib/Target/Nios2/Nios2RegisterInfo.td b/interpreter/llvm/src/lib/Target/Nios2/Nios2RegisterInfo.td new file mode 100644 index 0000000000000..1808815816f3b --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/Nios2RegisterInfo.td @@ -0,0 +1,60 @@ +//===-- Nios2RegisterInfo.td - Nios2 Register defs ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// We have bank of 32 registers. +class Nios2Reg : Register { + field bits<5> Num; + let Namespace = "Nios2"; +} + +// Nios2 CPU Registers +class Nios2GPRReg num, string n> : Nios2Reg { + let Num = num; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +let Namespace = "Nios2" in { + // General Purpose Registers + def ZERO : Nios2GPRReg<0, "zero">, DwarfRegNum<[ 0 ]>; + def AT : Nios2GPRReg<1, "at">, DwarfRegNum<[ 1 ]>; + foreach RegNum = 2 - 23 in { + def R #RegNum : Nios2GPRReg, DwarfRegNum<[ RegNum ]>; + } + def ET : Nios2GPRReg<24, "et">, DwarfRegNum<[ 24 ]>; + def BT : Nios2GPRReg<25, "bt">, DwarfRegNum<[ 25 ]>; + def GP : Nios2GPRReg<26, "gp">, DwarfRegNum<[ 26 ]>; + def SP : Nios2GPRReg<27, "sp">, DwarfRegNum<[ 27 ]>; + def FP : Nios2GPRReg<28, "fp">, DwarfRegNum<[ 28 ]>; + def EA : Nios2GPRReg<29, "ea">, DwarfRegNum<[ 29 ]>; + def BA : Nios2GPRReg<30, "ba">, DwarfRegNum<[ 30 ]>; + def RA : Nios2GPRReg<31, "ra">, DwarfRegNum<[ 31 ]>; + def PC : Nios2Reg<"pc">, DwarfRegNum<[ 32 ]>; +} + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def CPURegs : RegisterClass<"Nios2", [ i32 ], 32, + (add + // Reserved + ZERO, + AT, + // Return Values and Arguments + (sequence "R%u", 2, 7), + // Not preserved across procedure calls + // Caller saved + (sequence "R%u", 8, 15), + // Callee saved + (sequence "R%u", 16, 23), + // Reserved + ET, BT, GP, SP, FP, EA, BA, RA, PC)>; diff --git a/interpreter/llvm/src/lib/Target/Nios2/Nios2TargetMachine.cpp b/interpreter/llvm/src/lib/Target/Nios2/Nios2TargetMachine.cpp new file mode 100644 index 0000000000000..16d4eabcfaf71 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/Nios2TargetMachine.cpp @@ -0,0 +1,46 @@ +//===-- Nios2TargetMachine.cpp - Define TargetMachine for Nios2 -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Nios2 target spec. +// +//===----------------------------------------------------------------------===// + +#include "Nios2TargetMachine.h" +#include "Nios2.h" + +using namespace llvm; + +#define DEBUG_TYPE "nios2" + +extern "C" void LLVMInitializeNios2Target() { + // Register the target. +} + +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + return "e-p:32:32:32-i8:8:32-i16:16:32-n32"; +} + +static Reloc::Model getEffectiveRelocModel(CodeModel::Model CM, + Optional RM) { + if (!RM.hasValue() || CM == CodeModel::JITDefault) + return Reloc::Static; + return *RM; +} + +Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, + Options, getEffectiveRelocModel(CM, RM), CM, OL) {} + +Nios2TargetMachine::~Nios2TargetMachine() {} diff --git a/interpreter/llvm/src/lib/Target/Nios2/Nios2TargetMachine.h b/interpreter/llvm/src/lib/Target/Nios2/Nios2TargetMachine.h new file mode 100644 index 0000000000000..7f145c82f32ce --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/Nios2TargetMachine.h @@ -0,0 +1,30 @@ +//===-- Nios2TargetMachine.h - Define TargetMachine for Nios2 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Nios2 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Nios2TargetMachine : public LLVMTargetMachine { +public: + Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~Nios2TargetMachine() override; +}; +} // namespace llvm + +#endif diff --git a/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/CMakeLists.txt b/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..394d2c2680b76 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/CMakeLists.txt @@ -0,0 +1 @@ +add_llvm_library(LLVMNios2Info Nios2TargetInfo.cpp) diff --git a/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/LLVMBuild.txt b/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000000000..558f7501ea6b6 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Nios2/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Nios2Info +parent = Nios2 +required_libraries = Support +add_to_library_groups = Nios2 diff --git a/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp new file mode 100644 index 0000000000000..e317686140f79 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp @@ -0,0 +1,24 @@ +//===-- Nios2TargetInfo.cpp - Nios2 Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Nios2.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target &llvm::getTheNios2Target() { + static Target TheNios2Target; + return TheNios2Target; +} + +extern "C" void LLVMInitializeNios2TargetInfo() { + RegisterTarget + X(getTheNios2Target(), "nios2", "Nios2"); +} diff --git a/interpreter/llvm/src/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/interpreter/llvm/src/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 84bb9ec568009..baf5902ddf584 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "PPCInstPrinter.h" -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCInstrInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 4863ac5427366..bdad2fe8714fd 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,8 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -18,9 +20,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -113,8 +113,9 @@ class PPCAsmBackend : public MCAsmBackend { return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -130,12 +131,11 @@ class PPCAsmBackend : public MCAsmBackend { } } - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override { + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override { switch ((PPC::Fixups)Fixup.getKind()) { - default: break; + default: + return false; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: // If the target symbol has a local entry point we must not attempt @@ -148,10 +148,10 @@ class PPCAsmBackend : public MCAsmBackend { // and thus the shift to pack it. unsigned Other = S->getOther() << 2; if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) - IsResolved = false; + return true; } } - break; + return false; } } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index fd279c60f3f59..1488bd5b0be61 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index ae43e59d3cb1f..dce443997ea59 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -17,35 +17,31 @@ namespace llvm { namespace PPC { enum Fixups { - // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b' - // and 'bl'. + // 24-bit PC relative relocation for direct branches like 'b' and 'bl'. fixup_ppc_br24 = FirstTargetFixupKind, - - /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional - /// branches. + + /// 14-bit PC relative relocation for conditional branches. fixup_ppc_brcond14, - - /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches - /// like 'ba' and 'bla'. + + /// 24-bit absolute relocation for direct branches like 'ba' and 'bla'. fixup_ppc_br24abs, - /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional - /// branches. + /// 14-bit absolute relocation for conditional branches. fixup_ppc_brcond14abs, - /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo) - /// or ha16(_foo) for instrs like 'li' or 'addis'. + /// A 16-bit fixup corresponding to lo16(_foo) or ha16(_foo) for instrs like + /// 'li' or 'addis'. fixup_ppc_half16, - - /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with - /// implied 2 zero bits for instrs like 'std'. + + /// A 14-bit fixup corresponding to lo16(_foo) with implied 2 zero bits for + /// instrs like 'std'. fixup_ppc_half16ds, - /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call - /// to __tls_get_addr for the TLS general and local dynamic models, - /// or inserts the thread-pointer register number. + /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the + /// TLS general and local dynamic models, or inserts the thread-pointer + /// register number. fixup_ppc_nofixup, - + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index a00b56af0490a..92c8c224b71b4 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -271,7 +271,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12; const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isImm()); + assert(MO.isImm() && !(MO.getImm() % 16) && + "Expecting an immediate that is a multiple of 16"); return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits; } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 6b97d4c1456b6..54f664314578e 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "PPCFixupKinds.h" #include "PPCMCExpr.h" +#include "PPCFixupKinds.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2d686f2279194..e8f220ea54576 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" @@ -30,11 +31,10 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 1f38a8c947e73..d5506277ca880 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -18,7 +19,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; @@ -151,7 +151,7 @@ static void makeRelocationInfo(MachO::any_relocation_info &MRE, // The bitfield offsets that work (as determined by trial-and-error) // are different than what is documented in the mach-o manuals. // This appears to be an endianness issue; reversing the order of the - // documented bitfields in fixes this (but + // documented bitfields in fixes this (but // breaks x86/ARM assembly). MRE.r_word1 = ((Index << 8) | // was << 0 (IsPCRel << 7) | // was << 24 @@ -219,11 +219,11 @@ bool PPCMachObjectWriter::recordScatteredRelocation( const MCSymbol *SB = &B->getSymbol(); if (!SB->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + report_fatal_error("symbol '" + SB->getName() + "' can not be undefined in a subtraction expression"); - // FIXME: is Type correct? see include/llvm/Support/MachO.h - Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); + // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h + Value2 = Writer->getSymbolAddress(*SB, Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // FIXME: does FixedValue get used?? diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPC.h b/interpreter/llvm/src/lib/Target/PowerPC/PPC.h index 38ae62b26757a..ad92ac8ce1207 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPC.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPC.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPC_H #define LLVM_LIB_TARGET_POWERPC_PPC_H +#include "llvm/Support/CodeGen.h" #include "MCTargetDesc/PPCMCTargetDesc.h" // GCC #defines PPC on Linux but we use it as our namespace name @@ -24,12 +25,11 @@ namespace llvm { class PPCTargetMachine; class PassRegistry; class FunctionPass; - class ImmutablePass; class MachineInstr; class AsmPrinter; class MCInst; - FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM); + FunctionPass *createPPCCTRLoops(); #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif @@ -42,7 +42,7 @@ namespace llvm { FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCQPXLoadSplatPass(); - FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL); FunctionPass *createPPCTLSDynamicCallPass(); FunctionPass *createPPCBoolRetToIntPass(); FunctionPass *createPPCExpandISELPass(); @@ -52,6 +52,7 @@ namespace llvm { void initializePPCVSXFMAMutatePass(PassRegistry&); void initializePPCBoolRetToIntPass(PassRegistry&); void initializePPCExpandISELPass(PassRegistry &); + void initializePPCTLSDynamicCallPass(PassRegistry &); extern char &PPCVSXFMAMutateID; namespace PPCII { diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1f181d007f637..841b8c5144641 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" -#include "PPCInstrInfo.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCExpr.h" #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "PPC.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" @@ -29,6 +29,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -55,11 +57,9 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCBoolRetToInt.cpp index 93c201d038690..55e105dad0e5b 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements converting i1 values to i32 if they could be more +// This file implements converting i1 values to i32/i64 if they could be more // profitably allocated as GPRs rather than CRs. This pass will become totally // unnecessary if Register Bank Allocation and Global Instruction Selection ever // go upstream. // -// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the // transitive closure of their uses includes only PHINodes, CallInsts, and // ReturnInsts. The rational is that arguments are generally passed and returned -// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will // actually save casts at the Machine Instruction level. // // It might be useful to expand this pass to add bit-wise operations to the list @@ -33,11 +33,12 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -50,8 +51,9 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include "llvm/Pass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Casting.h" #include using namespace llvm; @@ -87,17 +89,19 @@ class PPCBoolRetToInt : public FunctionPass { return Defs; } - // Translate a i1 value to an equivalent i32 value: - static Value *translate(Value *V) { - Type *Int32Ty = Type::getInt32Ty(V->getContext()); + // Translate a i1 value to an equivalent i32/i64 value: + Value *translate(Value *V) { + Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext()) + : Type::getInt32Ty(V->getContext()); + if (auto *C = dyn_cast(V)) - return ConstantExpr::getZExt(C, Int32Ty); + return ConstantExpr::getZExt(C, IntTy); if (auto *P = dyn_cast(V)) { // Temporarily set the operands to 0. We'll fix this later in // runOnUse. - Value *Zero = Constant::getNullValue(Int32Ty); + Value *Zero = Constant::getNullValue(IntTy); PHINode *Q = - PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P); for (unsigned i = 0; i < P->getNumOperands(); ++i) Q->addIncoming(Zero, P->getIncomingBlock(i)); return Q; @@ -109,7 +113,7 @@ class PPCBoolRetToInt : public FunctionPass { auto InstPt = A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); - return new ZExtInst(V, Int32Ty, "", InstPt); + return new ZExtInst(V, IntTy, "", InstPt); } typedef SmallPtrSet PHINodeSet; @@ -185,6 +189,13 @@ class PPCBoolRetToInt : public FunctionPass { if (skipFunction(F)) return false; + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + ST = TM.getSubtargetImpl(F); + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); B2IMap Bool2IntMap; bool Changed = false; @@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass { return Changed; } - static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, B2IMap &BoolToIntMap) { auto Defs = findAllDefs(U); @@ -262,13 +273,16 @@ class PPCBoolRetToInt : public FunctionPass { AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } + +private: + const PPCSubtarget *ST; }; } // end anonymous namespace char PPCBoolRetToInt::ID = 0; INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", - "Convert i1 constants to i32 if they are returned", + "Convert i1 constants to i32/i64 if they are returned", false, false) FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCBranchSelector.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCBranchSelector.cpp index b7d3154d00000..d0b66f9bca09a 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCSubtarget.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCCTRLoops.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCCTRLoops.cpp index 70c4170653aef..53f33ac1fc0ed 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -23,14 +23,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "PPC.h" +#include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -43,6 +44,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -81,10 +83,7 @@ namespace { public: static char ID; - PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { - initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); - } - PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { + PPCCTRLoops() : FunctionPass(ID) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } @@ -99,16 +98,18 @@ namespace { } private: - bool mightUseCTR(const Triple &TT, BasicBlock *BB); + bool mightUseCTR(BasicBlock *BB); bool convertToCTRLoop(Loop *L); private: - PPCTargetMachine *TM; + const PPCTargetMachine *TM; + const PPCSubtarget *STI; + const PPCTargetLowering *TLI; + const DataLayout *DL; + const TargetLibraryInfo *LibInfo; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; DominatorTree *DT; - const TargetLibraryInfo *LibInfo; bool PreserveLCSSA; }; @@ -149,9 +150,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) { - return new PPCCTRLoops(TM); -} +FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } #ifndef NDEBUG INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", @@ -169,6 +168,14 @@ bool PPCCTRLoops::runOnFunction(Function &F) { if (skipFunction(F)) return false; + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + TM = &TPC->getTM(); + STI = TM->getSubtargetImpl(F); + TLI = STI->getTargetLowering(); + LI = &getAnalysis().getLoopInfo(); SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); @@ -198,8 +205,7 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { // Determining the address of a TLS variable results in a function call in // certain TLS models. -static bool memAddrUsesCTR(const PPCTargetMachine *TM, - const Value *MemAddr) { +static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { const auto *GV = dyn_cast(MemAddr); if (!GV) { // Recurse to check for constants that refer to TLS global variables. @@ -213,35 +219,35 @@ static bool memAddrUsesCTR(const PPCTargetMachine *TM, if (!GV->isThreadLocal()) return false; - if (!TM) - return true; - TLSModel::Model Model = TM->getTLSModel(GV); + TLSModel::Model Model = TM.getTLSModel(GV); return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; } -bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { +// Loop through the inline asm constraints and look for something that clobbers +// ctr. +static bool asmClobbersCTR(InlineAsm *IA) { + InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); + for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { + InlineAsm::ConstraintInfo &C = CIV[i]; + if (C.Type != InlineAsm::isInput) + for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) + if (StringRef(C.Codes[j]).equals_lower("{ctr}")) + return true; + } + return false; +} + +bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { if (CallInst *CI = dyn_cast(J)) { + // Inline ASM is okay, unless it clobbers the ctr register. if (InlineAsm *IA = dyn_cast(CI->getCalledValue())) { - // Inline ASM is okay, unless it clobbers the ctr register. - InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); - for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { - InlineAsm::ConstraintInfo &C = CIV[i]; - if (C.Type != InlineAsm::isInput) - for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) - if (StringRef(C.Codes[j]).equals_lower("{ctr}")) - return true; - } - + if (asmClobbersCTR(IA)) + return true; continue; } - if (!TM) - return true; - const TargetLowering *TLI = - TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); - if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. // sin, cos, exp and log are always calls. @@ -380,9 +386,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { } if (Opcode) { - auto &DL = CI->getModule()->getDataLayout(); - MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(), - true); + MVT VTy = TLI->getSimpleValueType( + *DL, CI->getArgOperand(0)->getType(), true); if (VTy == MVT::Other) return true; @@ -406,17 +411,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { CastInst *CI = cast(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) + isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) return true; - } else if (isLargeIntegerTy(TT.isArch32Bit(), + } else if (isLargeIntegerTy(!TM->isPPC64(), J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || J->getOpcode() == Instruction::SRem)) { return true; - } else if (TT.isArch32Bit() && + } else if (!TM->isPPC64() && isLargeIntegerTy(false, J->getType()->getScalarType()) && (J->getOpcode() == Instruction::Shl || J->getOpcode() == Instruction::AShr || @@ -428,16 +433,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { // On PowerPC, indirect jumps use the counter register. return true; } else if (SwitchInst *SI = dyn_cast(J)) { - if (!TM) - return true; - const TargetLowering *TLI = - TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); - if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) return true; } - if (TM->getSubtargetImpl(*BB->getParent())->getTargetLowering()->useSoftFloat()) { + if (STI->useSoftFloat()) { switch(J->getOpcode()) { case Instruction::FAdd: case Instruction::FSub: @@ -456,7 +456,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { } for (Value *Operand : J->operands()) - if (memAddrUsesCTR(TM, Operand)) + if (memAddrUsesCTR(*TM, Operand)) return true; } @@ -466,11 +466,6 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; - const Triple TT = - Triple(L->getHeader()->getParent()->getParent()->getTargetTriple()); - if (!TT.isArch32Bit() && !TT.isArch64Bit()) - return MadeChange; // Unknown arch. type. - // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); @@ -495,7 +490,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) - if (mightUseCTR(TT, *I)) + if (mightUseCTR(*I)) return MadeChange; SmallVector ExitingBlocks; @@ -517,7 +512,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { } else if (!SE->isLoopInvariant(EC, L)) continue; - if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) + if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) continue; // We now have a loop-invariant count of loop iterations (which is not the @@ -571,7 +566,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). - if (!Preheader || mightUseCTR(TT, Preheader)) + if (!Preheader || mightUseCTR(Preheader)) Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; @@ -582,10 +577,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // selected branch. MadeChange = true; - SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt"); + SCEVExpander SCEVE(*SE, *DL, "loopcnt"); LLVMContext &C = SE->getContext(); - Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : - Type::getInt32Ty(C); + Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); @@ -613,7 +607,10 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); - DeleteDeadPHIs(CountedExitBlock); + // Run through the basic blocks of the loop and see if any of them have dead + // PHIs that can be removed. + for (auto I : L->blocks()) + DeleteDeadPHIs(I); ++NumCTRLoops; return MadeChange; diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCEarlyReturn.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCEarlyReturn.cpp index 6bd229625fc3a..811e4dd9dfe16 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCExpandISEL.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCExpandISEL.cpp index ebd414baf1d21..41e3190c3eec7 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -339,7 +339,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, // Note: Cannot use stepBackward instead since we are using the Reg // liveness state at the end of MBB (liveOut of MBB) as the liveIn for // NewSuccessor. Otherwise, will cause cyclic dependence. - LivePhysRegs LPR(MF->getSubtarget().getRegisterInfo()); + LivePhysRegs LPR(*MF->getSubtarget().getRegisterInfo()); SmallVector, 2> Clobbers; for (MachineInstr &MI : *MBB) LPR.stepForward(MI, Clobbers); diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCFastISel.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCFastISel.cpp index 2fc8654deeab9..bc9957194f6dd 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCFastISel.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" -#include "PPCCallingConv.h" +#include "PPC.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCFrameLowering.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCFrameLowering.cpp index 40bfe3a449f70..b49c3345a17dd 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -435,22 +435,19 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - // If we are a leaf function, and use up to 224 bytes of stack space, - // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). - // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate - // stackless code if all local vars are reg-allocated. - bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); unsigned LR = RegInfo->getRARegister(); - if (!DisableRedZone && - (Subtarget.isPPC64() || // 32-bit SVR4, no stack- - !Subtarget.isSVR4ABI() || // allocated locals. - FrameSize == 0) && - FrameSize <= 224 && // Fits in red zone. - !MFI.hasVarSizedObjects() && // No dynamic alloca. - !MFI.adjustsStack() && // No calls. - !MustSaveLR(MF, LR) && - !RegInfo->hasBasePointer(MF)) { // No special alignment. + bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. + !MFI.adjustsStack() && // No calls. + !MustSaveLR(MF, LR) && // No need to save LR. + !RegInfo->hasBasePointer(MF); // No special alignment. + + // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless + // code if all local vars are reg-allocated. + bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); + + // Check whether we can skip adjusting the stack pointer (by using red zone) + if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { // No need for frame if (UpdateMF) MFI.setStackSize(0); @@ -521,7 +518,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool HasBP = RegInfo->hasBasePointer(MF); unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; - unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; + unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) @@ -1765,31 +1762,36 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, // Check whether the frame pointer register is allocated. If so, make sure it // is spilled to the correct offset. if (needsFP(MF)) { - HasGPSaveArea = true; - int FI = PFI->getFramePointerSaveIndex(); assert(FI && "No Frame Pointer Save Slot!"); - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + // FP is R31/X31, so no need to update MinGPR/MinG8R. + HasGPSaveArea = true; } if (PFI->usesPICBase()) { - HasGPSaveArea = true; - int FI = PFI->getPICBasePointerSaveIndex(); assert(FI && "No PIC Base Pointer Save Slot!"); - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + + MinGPR = std::min(MinGPR, PPC::R30); + HasGPSaveArea = true; } const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); if (RegInfo->hasBasePointer(MF)) { - HasGPSaveArea = true; - int FI = PFI->getBasePointerSaveIndex(); assert(FI && "No Base Pointer Save Slot!"); - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + + unsigned BP = RegInfo->getBaseRegister(MF); + if (PPC::G8RCRegClass.contains(BP)) { + MinG8R = std::min(MinG8R, BP); + HasG8SaveArea = true; + } else if (PPC::GPRCRegClass.contains(BP)) { + MinGPR = std::min(MinGPR, BP); + HasGPSaveArea = true; + } } // General register save area starts right below the Floating-point @@ -1864,8 +1866,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } if (HasVRSaveArea) { - // Insert alignment padding, we need 16-byte alignment. - LowerBound = (LowerBound - 15) & ~(15); + // Insert alignment padding, we need 16-byte alignment. Note: for postive + // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since + // we are using negative number here (the stack grows downward). We should + // use formula : y = x & (~(n-1)). Where x is the size before aligning, n + // is the alignment size ( n = 16 here) and y is the size after aligning. + assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); + LowerBound &= ~(15); for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { int FI = VRegs[i].getFrameIdx(); diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 5fa7b2c6bfb1b..901539b682baa 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -21,9 +21,10 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -54,7 +55,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/Statistic.h" #include #include #include @@ -77,6 +77,11 @@ STATISTIC(SignExtensionsAdded, "Number of sign extensions for compare inputs added."); STATISTIC(ZeroExtensionsAdded, "Number of zero extensions for compare inputs added."); +STATISTIC(NumLogicOpsOnComparison, + "Number of logical ops on i1 values calculated in GPR."); +STATISTIC(OmittedForNonExtendUses, + "Number of compares not eliminated as they have non-extending uses."); + // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -109,8 +114,8 @@ namespace { unsigned GlobalBaseReg; public: - explicit PPCDAGToDAGISel(PPCTargetMachine &tm) - : SelectionDAGISel(tm), TM(tm) {} + explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm) {} bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary @@ -173,7 +178,7 @@ namespace { /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -206,7 +211,11 @@ namespace { /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); + } + + bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); } // Select an address into a single register. @@ -275,6 +284,8 @@ namespace { bool trySETCC(SDNode *N); bool tryEXTEND(SDNode *N); + bool tryLogicOpOfCompares(SDNode *N); + SDValue computeLogicOpInGPR(SDValue LogicOp); SDValue signExtendInputIfNeeded(SDValue Input); SDValue zeroExtendInputIfNeeded(SDValue Input); SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); @@ -282,6 +293,10 @@ namespace { int64_t RHSValue, SDLoc dl); SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); + SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); void PeepholePPC64(); @@ -294,6 +309,7 @@ namespace { bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); + bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); }; @@ -408,25 +424,6 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { .getNode(); } -/// isIntS16Immediate - This method tests to see if the node is either a 32-bit -/// or 64-bit immediate, and if the value can be accurately represented as a -/// sign extension from a 16-bit value. If so, this returns true and the -/// immediate. -static bool isIntS16Immediate(SDNode *N, short &Imm) { - if (N->getOpcode() != ISD::Constant) - return false; - - Imm = (short)cast(N)->getZExtValue(); - if (N->getValueType(0) == MVT::i32) - return Imm == (int32_t)cast(N)->getZExtValue(); - else - return Imm == (int64_t)cast(N)->getZExtValue(); -} - -static bool isIntS16Immediate(SDValue Op, short &Imm) { - return isIntS16Immediate(Op.getNode(), Imm); -} - /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { @@ -717,7 +714,10 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) { static unsigned getInt64Count(int64_t Imm) { unsigned Count = getInt64CountDirect(Imm); - if (Count == 1) + + // If the instruction count is 1 or 2, we do not need further analysis + // since rotate + load constant requires at least 2 instructions. + if (Count <= 2) return Count; for (unsigned r = 1; r < 63; ++r) { @@ -827,7 +827,10 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { unsigned Count = getInt64CountDirect(Imm); - if (Count == 1) + + // If the instruction count is 1 or 2, we do not need further analysis + // since rotate + load constant requires at least 2 instructions. + if (Count <= 2) return getInt64Direct(CurDAG, dl, Imm); unsigned RMin = 0; @@ -2115,7 +2118,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI32Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLW; } else { - short SImm; + int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm((int)SImm & 0xFFFF, @@ -2162,7 +2165,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI64Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLD; } else { - short SImm; + int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI64Imm(SImm & 0xFFFF, dl)), @@ -2501,6 +2504,11 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { return true; } +// Is this opcode a bitwise logical operation? +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} + /// If this node is a sign/zero extension of an integer comparison, /// it can usually be computed in GPR's rather than using comparison /// instructions and ISEL. We only do this on 64-bit targets for now @@ -2513,13 +2521,20 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"); - if (N->getOperand(0).getOpcode() != ISD::SETCC) + SDValue WideRes; + // If we are zero-extending the result of a logical operation on i1 + // values, we can keep the values in GPRs. + if (isLogicOp(N->getOperand(0).getOpcode()) && + N->getOperand(0).getValueType() == MVT::i1 && + N->getOpcode() == ISD::ZERO_EXTEND) + WideRes = computeLogicOpInGPR(N->getOperand(0)); + else if (N->getOperand(0).getOpcode() != ISD::SETCC) return false; - - SDValue WideRes = - getSETCCInGPR(N->getOperand(0), - N->getOpcode() == ISD::SIGN_EXTEND ? - SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); + else + WideRes = + getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND ? + SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); if (!WideRes) return false; @@ -2540,6 +2555,159 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { return true; } +// Lower a logical operation on i1 values into a GPR sequence if possible. +// The result can be kept in a GPR if requested. +// Three types of inputs can be handled: +// - SETCC +// - TRUNCATE +// - Logical operation (AND/OR/XOR) +// There is also a special case that is handled (namely a complement operation +// achieved with xor %a, -1). +SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) { + assert(isLogicOp(LogicOp.getOpcode()) && + "Can only handle logic operations here."); + assert(LogicOp.getValueType() == MVT::i1 && + "Can only handle logic operations on i1 values here."); + SDLoc dl(LogicOp); + SDValue LHS, RHS; + + // Special case: xor %a, -1 + bool IsBitwiseNegation = isBitwiseNot(LogicOp); + + // Produces a GPR sequence for each operand of the binary logic operation. + // For SETCC, it produces the respective comparison, for TRUNCATE it truncates + // the value in a GPR and for logic operations, it will recursively produce + // a GPR sequence for the operation. + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + return + SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + getI64Imm(0, dl), getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return computeLogicOpInGPR(Operand); + return SDValue(); + }; + LHS = getLogicOperand(LogicOp.getOperand(0)); + RHS = getLogicOperand(LogicOp.getOperand(1)); + + // If a GPR sequence can't be produced for the LHS we can't proceed. + // Not producing a GPR sequence for the RHS is only a problem if this isn't + // a bitwise negation operation. + if (!LHS || (!RHS && !IsBitwiseNegation)) + return SDValue(); + + NumLogicOpsOnComparison++; + + // We will use the inputs as 64-bit values. + if (LHS.getValueType() == MVT::i32) + LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); + if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) + RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { + default: llvm_unreachable("Unknown logic operation."); + case ISD::AND: NewOpc = PPC::AND8; break; + case ISD::OR: NewOpc = PPC::OR8; break; + case ISD::XOR: NewOpc = PPC::XOR8; break; + } + + if (IsBitwiseNegation) { + RHS = getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); + +} + +/// Try performing logical operations on results of comparisons in GPRs. +/// It is typically preferred from a performance perspective over performing +/// the operations on individual bits in the CR. We only do this on 64-bit +/// targets for now as the code is specialized for 64-bit (it uses 64-bit +/// instructions and assumes 64-bit registers). +bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + if (N->getValueType(0) != MVT::i1) + return false; + assert(isLogicOp(N->getOpcode()) && + "Expected a logic operation on setcc results."); + SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); + if (!LoweredLogical) + return false; + + SDLoc dl(N); + bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; + unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue LHS = LoweredLogical.getOperand(0); + SDValue RHS = LoweredLogical.getOperand(1); + SDValue WideOp; + SDValue OpToConvToRecForm; + + // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode + // that is input to the XORI. + if (IsBitwiseNegate && + LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) + OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); + else if (IsBitwiseNegate) + // If the input to the XORI isn't an extension, that's what we're after. + OpToConvToRecForm = LoweredLogical.getOperand(0); + else + // If this is not an XORI, it is a reg-reg logical op and we can convert it + // to record-form. + OpToConvToRecForm = LoweredLogical; + + // Get the record-form version of the node we're looking to use to get the + // CR result from. + uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); + int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); + + // Convert the right node to record-form. This is either the logical we're + // looking at or it is the input node to the negation (if we're looking at + // a bitwise negation). + if (NewOpc != -1 && IsBitwiseNegate) { + // The input to the XORI has a record-form. Use it. + assert(LoweredLogical.getConstantOperandVal(1) == 1 && + "Expected a PPC::XORI8 only for bitwise negation."); + // Emit the record-form instruction. + std::vector Ops; + for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) + Ops.push_back(OpToConvToRecForm.getOperand(i)); + + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc, dl, + OpToConvToRecForm.getValueType(), + MVT::Glue, Ops), 0); + } else { + assert((NewOpc != -1 || !IsBitwiseNegate) && + "No record form available for AND8/OR8/XOR8?"); + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, + MVT::i64, MVT::Glue, LHS, RHS), 0); + } + + // Select this node to a single bit from CR0 set by the record-form node + // just created. For bitwise negation, use the EQ bit which is the equivalent + // of negating the result (i.e. it is a bit set when the result of the + // operation is zero). + SDValue SRIdxVal = + CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); + SDValue CRBit = + SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + ReplaceNode(N, CRBit.getNode()); + return true; +} + /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). @@ -2648,6 +2816,20 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } + case ISD::SETNE: { + // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) + // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), + getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + } } } @@ -2674,7 +2856,99 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, getI32Imm(63, dl)), 0); } + case ISD::SETNE: { + // Bitwise xor the operands, count leading zeros, shift right by 5 bits and + // flip the bit, finally take 2's complement. + // (sext (setcc %a, %b, setne)) -> + // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) + // Same as above, but the first xor is not needed. + // (sext (setcc %a, 0, setne)) -> + // (neg (xor (lshr (ctlz %a), 5), 1)) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = + { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + SDValue Xori = + SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); + } + } +} + +/// Produces a zero-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) + // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, + getI64Imm(58, dl), getI64Imm(63, dl)), + 0); + } + } +} + +/// Produces a sign-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue AddInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Addic = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + AddInput, getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, + Addic, Addic.getValue(1)), 0); } + } +} + +/// Does this SDValue have any uses for which keeping the value in a GPR is +/// appropriate. This is meant to be used on values that have type i1 since +/// it is somewhat meaningless to ask if values of other types can be kept in +/// GPR's. +static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { + assert(Compare.getOpcode() == ISD::SETCC && + "An ISD::SETCC node required here."); + + // For values that have a single use, the caller should obviously already have + // checked if that use is an extending use. We check the other uses here. + if (Compare.hasOneUse()) + return true; + // We want the value in a GPR if it is being extended, used for a select, or + // used in logical operations. + for (auto CompareUse : Compare.getNode()->uses()) + if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && + CompareUse->getOpcode() != ISD::ZERO_EXTEND && + CompareUse->getOpcode() != ISD::SELECT && + !isLogicOp(CompareUse->getOpcode())) { + OmittedForNonExtendUses++; + return false; + } + return true; } /// Returns an equivalent of a SETCC node but with the result the same width as @@ -2686,6 +2960,11 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."); + // Don't convert this comparison to a GPR sequence because there are uses + // of the i1 result (i.e. uses that require the result in the CR). + if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) + return SDValue(); + SDValue LHS = Compare.getOperand(0); SDValue RHS = Compare.getOperand(1); @@ -2694,30 +2973,54 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, ISD::CondCode CC = cast(Compare.getOperand(CCOpNum))->get(); EVT InputVT = LHS.getValueType(); - if (InputVT != MVT::i32) + if (InputVT != MVT::i32 && InputVT != MVT::i64) return SDValue(); - SDLoc dl(Compare); - ConstantSDNode *RHSConst = dyn_cast(RHS); - int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; - if (ConvOpts == SetccInGPROpts::ZExtInvert || ConvOpts == SetccInGPROpts::SExtInvert) CC = ISD::getSetCCInverse(CC, true); - if (ISD::isSignedIntSetCC(CC)) { + bool Inputs32Bit = InputVT == MVT::i32; + if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); - } else if (ISD::isUnsignedIntSetCC(CC)) { + } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); } + SDLoc dl(Compare); + ConstantSDNode *RHSConst = dyn_cast(RHS); + int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || ConvOpts == SetccInGPROpts::SExtInvert; - if (IsSext) + + if (IsSext && Inputs32Bit) return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); - return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (Inputs32Bit) + return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (IsSext) + return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); +} + +/// Does this node represent a load/store node whose address can be represented +/// with a register plus an immediate that's a multiple of \p Val: +bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { + LoadSDNode *LDN = dyn_cast(N); + StoreSDNode *STN = dyn_cast(N); + SDValue AddrOp; + if (LDN) + AddrOp = LDN->getOperand(1); + else if (STN) + AddrOp = STN->getOperand(2); + + short Imm = 0; + if (AddrOp.getOpcode() == ISD::ADD) + return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); + + // If the address comes from the outside, the offset will be zero. + return AddrOp.getOpcode() == ISD::CopyFromReg; } void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { @@ -2906,6 +3209,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::AND: { + if (tryLogicOpOfCompares(N)) + return; + unsigned Imm, Imm2, SH, MB, ME; uint64_t Imm64; @@ -3025,7 +3331,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryBitfieldInsert(N)) return; - short Imm; + if (tryLogicOpOfCompares(N)) + return; + + int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { KnownBits LHSKnown; @@ -3042,8 +3351,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ISD::XOR: { + if (tryLogicOpOfCompares(N)) + return; + break; + } case ISD::ADD: { - short Imm; + int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); @@ -3731,11 +4045,13 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { O0.getNode(), O1.getNode()); }; + // FIXME: When the semantics of the interaction between select and undef + // are clearly defined, it may turn out to be unnecessary to break here. SDValue TrueRes = TryFold(ConstTrue); - if (!TrueRes) + if (!TrueRes || TrueRes.isUndef()) break; SDValue FalseRes = TryFold(ConstFalse); - if (!FalseRes) + if (!FalseRes || FalseRes.isUndef()) break; // For us to materialize these using one instruction, we must be able to @@ -4813,6 +5129,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { /// createPPCISelDag - This pass converts a legalized DAG into a /// PowerPC-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { - return new PPCDAGToDAGISel(TM); +FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PPCDAGToDAGISel(TM, OptLevel); } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.cpp index 17bdd595da109..b3a3c73f6df03 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" -#include "PPCCallingConv.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" -#include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" @@ -28,11 +28,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -52,8 +52,8 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -61,9 +61,9 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -136,7 +136,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } - // PowerPC has an i16 but no i8 (or i1) SEXTLOAD + // Match BITREVERSE to customized fast code sequence in the td file. + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); @@ -175,7 +179,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } - // PowerPC does not support direct load / store of condition registers + // PowerPC does not support direct load/store of condition registers. setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); @@ -204,11 +208,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); - // PowerPC has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // PowerPC has no SREM/UREM instructions unless we are on P9 + // On P9 we may use a hardware instruction to compute the remainder. + // The instructions are not legalized directly because in the cases where the + // result of both the remainder and the division is required it is more + // efficient to compute the remainder from the result of the division rather + // than use the remainder instruction. + if (Subtarget.isISA3_0()) { + setOperationAction(ISD::SREM, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Custom); + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + } else { + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + } // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); @@ -410,6 +426,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); @@ -534,7 +555,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); @@ -684,6 +704,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRA, MVT::v2i64, Legal); setOperationAction(ISD::SRL, MVT::v2i64, Legal); + // 128 bit shifts can be accomplished via 3 instructions for SHL and + // SRL, but not for SRA because of the instructions available: + // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth + // doing + setOperationAction(ISD::SHL, MVT::v1i128, Expand); + setOperationAction(ISD::SRL, MVT::v1i128, Expand); + setOperationAction(ISD::SRA, MVT::v1i128, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Legal); } else { @@ -737,6 +765,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasP9Vector()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + + // 128 bit shifts can be accomplished via 3 instructions for SHL and + // SRL, but not for SRA because of the instructions available: + // VS{RL} and VS{RL}O. + setOperationAction(ISD::SHL, MVT::v1i128, Legal); + setOperationAction(ISD::SRL, MVT::v1i128, Legal); + setOperationAction(ISD::SRA, MVT::v1i128, Expand); } } @@ -778,7 +813,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FABS , MVT::v4f64, Legal); setOperationAction(ISD::FSIN , MVT::v4f64, Expand); setOperationAction(ISD::FCOS , MVT::v4f64, Expand); - setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); setOperationAction(ISD::FPOW , MVT::v4f64, Expand); setOperationAction(ISD::FLOG , MVT::v4f64, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); @@ -824,7 +858,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FABS , MVT::v4f32, Legal); setOperationAction(ISD::FSIN , MVT::v4f32, Expand); setOperationAction(ISD::FCOS , MVT::v4f32, Expand); - setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); setOperationAction(ISD::FPOW , MVT::v4f32, Expand); setOperationAction(ISD::FLOG , MVT::v4f32, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); @@ -1024,6 +1057,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; + MaxLoadsPerMemcmp = 128; + } else { + MaxLoadsPerMemcmp = 8; + MaxLoadsPerMemcmpOptSize = 4; } } @@ -1095,6 +1132,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; + case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; @@ -1133,6 +1172,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; case PPCISD::STXSIX: return "PPCISD::STXSIX"; case PPCISD::VEXTS: return "PPCISD::VEXTS"; + case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -1576,20 +1616,47 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, - unsigned &InsertAtByte, bool &Swap, bool IsLE) { - // Check that the mask is shuffling words - for (unsigned i = 0; i < 4; ++i) { - unsigned B0 = N->getMaskElt(i*4); - unsigned B1 = N->getMaskElt(i*4+1); - unsigned B2 = N->getMaskElt(i*4+2); - unsigned B3 = N->getMaskElt(i*4+3); - if (B0 % 4) +/// Check that the mask is shuffling N byte elements. Within each N byte +/// element of the mask, the indices could be either in increasing or +/// decreasing order as long as they are consecutive. +/// \param[in] N the shuffle vector SD Node to analyze +/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ +/// Word/DoubleWord/QuadWord). +/// \param[in] StepLen the delta indices number among the N byte element, if +/// the mask is in increasing/decreasing order then it is 1/-1. +/// \return true iff the mask is shuffling N byte elements. +static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, + int StepLen) { + assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && + "Unexpected element width."); + assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); + + unsigned NumOfElem = 16 / Width; + unsigned MaskVal[16]; // Width is never greater than 16 + for (unsigned i = 0; i < NumOfElem; ++i) { + MaskVal[0] = N->getMaskElt(i * Width); + if ((StepLen == 1) && (MaskVal[0] % Width)) { return false; - if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) + } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { return false; + } + + for (unsigned int j = 1; j < Width; ++j) { + MaskVal[j] = N->getMaskElt(i * Width + j); + if (MaskVal[j] != MaskVal[j-1] + StepLen) { + return false; + } + } } + return true; +} + +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + if (!isNByteElemShuffleMask(N, 4, 1)) + return false; + // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; @@ -1660,6 +1727,158 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } +bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + // Ensure each byte index of the word is consecutive. + if (!isNByteElemShuffleMask(N, 4, 1)) + return false; + + // Now we look at mask elements 0,4,8,12, which are the beginning of words. + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + assert(M0 < 4 && "Indexing into an undef vector?"); + if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) + return false; + + ShiftElts = IsLE ? (4 - M0) % 4 : M0; + Swap = false; + return true; + } + + // Ensure each word index of the ShuffleVector Mask is consecutive. + if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) + return false; + + if (IsLE) { + if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 3 left elements of the second vector + // (or if there is no shift to be done at all). + Swap = false; + ShiftElts = (8 - M0) % 8; + } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 3 left elements of the first vector + // (or if we're shifting by 4 - thereby simply swapping the vectors). + Swap = true; + ShiftElts = (4 - M0) % 4; + } + + return true; + } else { // BE + if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 4 elements of the first vector. + Swap = false; + ShiftElts = M0; + } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 4 elements of the right vector. + Swap = true; + ShiftElts = M0 - 4; + } + + return true; + } +} + +bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + if (!isNByteElemShuffleMask(N, Width, -1)) + return false; + + for (int i = 0; i < 16; i += Width) + if (N->getMaskElt(i) != i + Width - 1) + return false; + + return true; +} + +bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 2); +} + +bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 4); +} + +bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 8); +} + +bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 16); +} + +/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap +/// if the inputs to the instruction should be swapped and set \p DM to the +/// value for the immediate. +/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI +/// AND element 0 of the result comes from the first input (LE) or second input +/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. +/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle +/// mask. +bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + // Ensure each byte index of the double word is consecutive. + if (!isNByteElemShuffleMask(N, 8, 1)) + return false; + + unsigned M0 = N->getMaskElt(0) / 8; + unsigned M1 = N->getMaskElt(8) / 8; + assert(((M0 | M1) < 4) && "A mask element out of bounds?"); + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + if ((M0 | M1) < 2) { + DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); + Swap = false; + return true; + } else + return false; + } + + if (IsLE) { + if (M0 > 1 && M1 < 2) { + Swap = false; + } else if (M0 < 2 && M1 > 1) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + // Note: if control flow comes here that means Swap is already set above + DM = (((~M1) & 1) << 1) + ((~M0) & 1); + return true; + } else { // BE + if (M0 < 2 && M1 > 1) { + Swap = false; + } else if (M0 > 1 && M1 < 2) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + // Note: if control flow comes here that means Swap is already set above + DM = (M0 << 1) + (M1 & 1); + return true; + } +} + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -1814,17 +2033,17 @@ int PPC::isQVALIGNIShuffleMask(SDNode *N) { /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. -static bool isIntS16Immediate(SDNode *N, short &Imm) { +bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { if (!isa(N)) return false; - Imm = (short)cast(N)->getZExtValue(); + Imm = (int16_t)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); else return Imm == (int64_t)cast(N)->getZExtValue(); } -static bool isIntS16Immediate(SDValue Op, short &Imm) { +bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } @@ -1834,7 +2053,7 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) { bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { - short imm = 0; + int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i @@ -1911,12 +2130,12 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better -/// represented as reg+reg. If Aligned is true, only accept displacements -/// suitable for STD and friends, i.e. multiples of 4. +/// represented as reg+reg. If \p Alignment is non-zero, only accept +/// displacements that are multiples of that value. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, - bool Aligned) const { + unsigned Alignment) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. @@ -1924,9 +2143,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return false; if (N.getOpcode() == ISD::ADD) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -1948,9 +2167,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. @@ -1976,8 +2195,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" - short Imm; - if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { + int16_t Imm; + if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); @@ -1987,7 +2206,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && - (!Aligned || (CN->getZExtValue() & 3) == 0)) { + (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. @@ -2021,10 +2240,15 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, if (SelectAddressRegReg(N, Base, Index, DAG)) return true; - // If the operand is an addition, always emit this as [r+r], since this is - // better (for code size, and execution, as the memop does the add for free) - // than emitting an explicit add. - if (N.getOpcode() == ISD::ADD) { + // If the address is the result of an add, we will utilize the fact that the + // address calculation includes an implicit add. However, we can reduce + // register pressure if we do not materialize a constant just for use as the + // index register. We only get rid of the add if it is not an add of a + // value and a 16-bit signed constant and both have a single use. + int16_t imm = 0; + if (N.getOpcode() == ISD::ADD && + (!isIntS16Immediate(N.getOperand(1), imm) || + !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -2097,14 +2321,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)) return false; } @@ -2346,8 +2570,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_LO); - SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); + SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) + : DAG.getRegister(PPC::R2, MVT::i32); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } @@ -6207,7 +6432,7 @@ PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - // Get the corect type for integers. + // Get the correct type for integers. EVT IntVT = Op.getValueType(); // Get the inputs. @@ -6224,7 +6449,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); - // Get the corect type for pointers. + // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. @@ -6299,7 +6524,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Size = Op.getOperand(1); SDLoc dl(Op); - // Get the corect type for pointers. + // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, @@ -6430,6 +6655,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); + LLVM_FALLTHROUGH; case ISD::SETEQ: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); @@ -6441,6 +6667,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -6449,6 +6676,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + LLVM_FALLTHROUGH; case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -6462,6 +6690,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); + LLVM_FALLTHROUGH; case ISD::SETEQ: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -6659,6 +6888,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, // Given the head of the old chain, ResChain, insert a token factor containing // it and NewResChain, and make users of ResChain now be users of that token // factor. +// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const { @@ -7659,6 +7889,53 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } + + if (Subtarget.hasVSX() && + PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); + + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); + } + + if (Subtarget.hasVSX() && + PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2); + + SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); + } + + if (Subtarget.hasP9Vector()) { + if (PPC::isXXBRHShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); + } else if (PPC::isXXBRWShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); + } else if (PPC::isXXBRDShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); + } else if (PPC::isXXBRQShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); + SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); + } + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); @@ -8115,9 +8392,9 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (IntrinsicID == Intrinsic::thread_pointer) { // Reads the thread pointer register, used for __builtin_thread_pointer. - bool is64bit = Subtarget.isPPC64(); - return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); + if (Subtarget.isPPC64()) + return DAG.getRegister(PPC::X13, MVT::i64); + return DAG.getRegister(PPC::R2, MVT::i32); } // If this is a lowered altivec predicate compare, CompareOpc is set to the @@ -8184,6 +8461,40 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Flags; } +SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to + // the beginning of the argument list. + int ArgStart = isa(Op.getOperand(0)) ? 0 : 1; + SDLoc DL(Op); + switch (cast(Op.getOperand(ArgStart))->getZExtValue()) { + case Intrinsic::ppc_cfence: { + assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); + assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); + return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, + Op.getOperand(ArgStart + 1)), + Op.getOperand(0)), + 0); + } + default: + break; + } + return SDValue(); +} + +SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { + // Check for a DIV with the same operands as this REM. + for (auto UI : Op.getOperand(1)->uses()) { + if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || + (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) + if (UI->getOperand(0) == Op.getOperand(0) && + UI->getOperand(1) == Op.getOperand(1)) + return SDValue(); + } + return Op; +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8649,6 +8960,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + + case ISD::INTRINSIC_VOID: + return LowerINTRINSIC_VOID(Op, DAG); + case ISD::SREM: + case ISD::UREM: + return LowerREM(Op, DAG); } } @@ -8753,12 +9070,19 @@ Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { - if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) + if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) { + // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and + // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html + // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. + if (isa(Inst) && Subtarget.isPPC64()) + return Builder.CreateCall( + Intrinsic::getDeclaration( + Builder.GetInsertBlock()->getParent()->getParent(), + Intrinsic::ppc_cfence, {Inst->getType()}), + {Inst}); + // FIXME: Can use isync for rmw operation. return callIntrinsic(Builder, Intrinsic::ppc_lwsync); - // FIXME: this is too conservative, a dependent branch + isync is enough. - // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and - // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html - // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. + } return nullptr; } @@ -11001,6 +11325,132 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// This function adds the required vector_shuffle needed to get +// the elements of the vector extract in the correct position +// as specified by the CorrectElems encoding. +static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, + SDValue Input, uint64_t Elems, + uint64_t CorrectElems) { + SDLoc dl(N); + + unsigned NumElems = Input.getValueType().getVectorNumElements(); + SmallVector ShuffleMask(NumElems, -1); + + // Knowing the element indices being extracted from the original + // vector and the order in which they're being inserted, just put + // them at element indices required for the instruction. + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (DAG.getDataLayout().isLittleEndian()) + ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; + else + ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; + CorrectElems = CorrectElems >> 8; + Elems = Elems >> 8; + } + + SDValue Shuffle = + DAG.getVectorShuffle(Input.getValueType(), dl, Input, + DAG.getUNDEF(Input.getValueType()), ShuffleMask); + + EVT Ty = N->getValueType(0); + SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle); + return BV; +} + +// Look for build vector patterns where input operands come from sign +// extended vector_extract elements of specific indices. If the correct indices +// aren't used, add a vector shuffle to fix up the indices and create a new +// PPCISD:SExtVElems node which selects the vector sign extend instructions +// during instruction selection. +static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { + // This array encodes the indices that the vector sign extend instructions + // extract from when extending from one type to another for both BE and LE. + // The right nibble of each byte corresponds to the LE incides. + // and the left nibble of each byte corresponds to the BE incides. + // For example: 0x3074B8FC byte->word + // For LE: the allowed indices are: 0x0,0x4,0x8,0xC + // For BE: the allowed indices are: 0x3,0x7,0xB,0xF + // For example: 0x000070F8 byte->double word + // For LE: the allowed indices are: 0x0,0x8 + // For BE: the allowed indices are: 0x7,0xF + uint64_t TargetElems[] = { + 0x3074B8FC, // b->w + 0x000070F8, // b->d + 0x10325476, // h->w + 0x00003074, // h->d + 0x00001032, // w->d + }; + + uint64_t Elems = 0; + int Index; + SDValue Input; + + auto isSExtOfVecExtract = [&](SDValue Op) -> bool { + if (!Op) + return false; + if (Op.getOpcode() != ISD::SIGN_EXTEND) + return false; + + SDValue Extract = Op.getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + + ConstantSDNode *ExtOp = dyn_cast(Extract.getOperand(1)); + if (!ExtOp) + return false; + + Index = ExtOp->getZExtValue(); + if (Input && Input != Extract.getOperand(0)) + return false; + + if (!Input) + Input = Extract.getOperand(0); + + Elems = Elems << 8; + Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4; + Elems |= Index; + + return true; + }; + + // If the build vector operands aren't sign extended vector extracts, + // of the same input vector, then return. + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (!isSExtOfVecExtract(N->getOperand(i))) { + return SDValue(); + } + } + + // If the vector extract indicies are not correct, add the appropriate + // vector_shuffle. + int TgtElemArrayIdx; + int InputSize = Input.getValueType().getScalarSizeInBits(); + int OutputSize = N->getValueType(0).getScalarSizeInBits(); + if (InputSize + OutputSize == 40) + TgtElemArrayIdx = 0; + else if (InputSize + OutputSize == 72) + TgtElemArrayIdx = 1; + else if (InputSize + OutputSize == 48) + TgtElemArrayIdx = 2; + else if (InputSize + OutputSize == 80) + TgtElemArrayIdx = 3; + else if (InputSize + OutputSize == 96) + TgtElemArrayIdx = 4; + else + return SDValue(); + + uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; + CorrectElems = DAG.getDataLayout().isLittleEndian() + ? CorrectElems & 0x0F0F0F0F0F0F0F0F + : CorrectElems & 0xF0F0F0F0F0F0F0F0; + if (Elems != CorrectElems) { + return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); + } + + // Regular lowering will catch cases where a shuffle is not needed. + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -11028,6 +11478,15 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, if (Reduced) return Reduced; + // If we're building a vector out of extended elements from another vector + // we have P9 vector integer extend instructions. + if (Subtarget.hasP9Altivec()) { + Reduced = combineBVOfVecSExt(N, DAG); + if (Reduced) + return Reduced; + } + + if (N->getValueType(0) != MVT::v2f64) return SDValue(); diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.h b/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.h index 4fc7442572628..49d7d8220af16 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCISelLowering.h @@ -67,6 +67,10 @@ namespace llvm { /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. VEXTS, + /// SExtVElems, takes an input vector of a smaller type and sign + /// extends to an output vector of a larger type. + SExtVElems, + /// Reciprocal estimate instructions (unary FP ops). FRE, FRSQRTE, @@ -86,10 +90,18 @@ namespace llvm { /// XXINSERT, + /// XXREVERSE - The PPC VSX reverse instruction + /// + XXREVERSE, + /// VECSHL - The PPC VSX shift left instruction /// VECSHL, + /// XXPERMDI - The PPC XXPERMDI instruction + /// + XXPERMDI, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -450,7 +462,32 @@ namespace llvm { /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); - + /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXSLDWI instruction. + bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + + /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRH instruction. + bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRW instruction. + bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRD instruction. + bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRQ instruction. + bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); + + /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXPERMDI instruction. + bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, @@ -579,7 +616,7 @@ namespace llvm { /// is not better represented as reg+reg. If Aligned is true, only accept /// displacements suitable for STD and friends, i.e. multiples of 4. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG, bool Aligned) const; + SelectionDAG &DAG, unsigned Alignment) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. @@ -905,6 +942,8 @@ namespace llvm { SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; @@ -1057,6 +1096,9 @@ namespace llvm { ISD::ArgFlagsTy &ArgFlags, CCState &State); + bool isIntS16Immediate(SDNode *N, int16_t &Imm); + bool isIntS16Immediate(SDValue Op, int16_t &Imm); + } // end namespace llvm #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstr64Bit.td b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstr64Bit.td index a8433919f0f3a..e2af5e5295445 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstr64Bit.td @@ -683,6 +683,16 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divde $rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, isPPC64, Requires<[HasExtDiv]>; + +let Predicates = [IsISA3_0] in { +def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modsd $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (srem i64:$rA, i64:$rB))]>; +def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modud $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (urem i64:$rA, i64:$rB))]>; +} + let Defs = [CR0] in def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divde. $rT, $rA, $rB", IIC_IntDivD, @@ -735,12 +745,12 @@ def RLDICL_32_64 : MDForm_1<30, 0, "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // End fast-isel. -let isCodeGenOnly = 1 in -def RLDICL_32 : MDForm_1<30, 0, - (outs gprc:$rA), - (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, - []>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLDICL_32 : MDForm_1r<30, 0, + (outs gprc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, @@ -962,13 +972,15 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), // Support for medium and large code model. let hasSideEffects = 0 in { +let isReMaterializable = 1 in { def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDIStocHA", []>, isPPC64; +def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDItocL", []>, isPPC64; +} let mayLoad = 1 in def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), "#LDtocL", []>, isPPC64; -def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), - "#ADDItocL", []>, isPPC64; } // Support for thread-local storage. @@ -983,6 +995,10 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg), [(set i64:$rD, (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; + +let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in +def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>; + def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), (ADD8TLS $in, tglobaltlsaddr:$g)>; def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), @@ -997,7 +1013,9 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), isPPC64; // LR8 is a true define, while the rest of the Defs are clobbers. X3 is // explicitly defined when this op is created, so not mentioned here. -let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, +// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be +// correct because the branch select pass is relying on it. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8, Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsADDR", diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrAltivec.td b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrAltivec.td index e14d18fd54331..5465b5f2d66cd 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrAltivec.td @@ -987,12 +987,16 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)), (v8i16 (VSLH $vA, $vB))>; def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)), (v4i32 (VSLW $vA, $vB))>; +def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>; def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)), (v16i8 (VSLB $vA, $vB))>; def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)), (v8i16 (VSLH $vA, $vB))>; def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)), (v4i32 (VSLW $vA, $vB))>; +def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>; def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)), (v16i8 (VSRB $vA, $vB))>; @@ -1000,12 +1004,16 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)), (v8i16 (VSRH $vA, $vB))>; def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)), (v4i32 (VSRW $vA, $vB))>; +def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>; def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)), (v16i8 (VSRB $vA, $vB))>; def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)), (v8i16 (VSRH $vA, $vB))>; def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)), (v4i32 (VSRW $vA, $vB))>; +def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>; def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)), (v16i8 (VSRAB $vA, $vB))>; diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.cpp index 790a8902b3d2b..e74ba38c351f0 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -292,6 +292,29 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return 0; } +// For opcodes with the ReMaterializable flag set, this function is called to +// verify the instruction is really rematable. +bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA) const { + switch (MI.getOpcode()) { + default: + // This function should only be called for opcodes with the ReMaterializable + // flag set. + llvm_unreachable("Unknown rematerializable operation!"); + break; + case PPC::LI: + case PPC::LI8: + case PPC::LIS: + case PPC::LIS8: + case PPC::QVGPCI: + case PPC::ADDIStocHA: + case PPC::ADDItocL: + case PPC::LOAD_STACK_GUARD: + return true; + } + return false; +} + unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { // Note: This list must be kept consistent with StoreRegToStackSlot. @@ -1533,6 +1556,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case PPC::FCMPUD: SrcReg = MI.getOperand(1).getReg(); SrcReg2 = MI.getOperand(2).getReg(); + Value = 0; + Mask = 0; return true; } } @@ -1591,9 +1616,12 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // We can perform this optimization, equality only, if MI is // zero-extending. + // FIXME: Other possible target instructions include ANDISo and + // RLWINM aliases, such as ROTRWI, EXTLWI, SLWI and SRWI. if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo || MIOpC == PPC::SLW || MIOpC == PPC::SLWo || MIOpC == PPC::SRW || MIOpC == PPC::SRWo || + MIOpC == PPC::ANDIo || isZeroExtendingRotate) { noSub = true; equalityOnly = true; @@ -1607,8 +1635,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (equalityOnly) { // We need to check the uses of the condition register in order to reject // non-equality comparisons. - for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg), - IE = MRI->use_instr_end(); I != IE; ++I) { + for (MachineRegisterInfo::use_instr_iterator + I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); + I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { unsigned Pred = UseMI->getOperand(0).getImm(); @@ -1630,8 +1659,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL; ++I) { bool FoundUse = false; - for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg), - JE = MRI->use_instr_end(); J != JE; ++J) + for (MachineRegisterInfo::use_instr_iterator + J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end(); + J != JE; ++J) if (&*J == &*I) { FoundUse = true; break; @@ -1641,6 +1671,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, break; } + SmallVector, 4> PredsToUpdate; + SmallVector, 4> SubRegsToUpdate; + // There are two possible candidates which can be changed to set CR[01]. // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). @@ -1652,9 +1685,37 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // same BB as the comparison. This is to allow the check below to avoid calls // (and other explicit clobbers); instead we should really check for these // more explicitly (in at least a few predecessors). - else if (MI->getParent() != CmpInstr.getParent() || Value != 0) { - // PPC does not have a record-form SUBri. + else if (MI->getParent() != CmpInstr.getParent()) return false; + else if (Value != 0) { + // The record-form instructions set CR bit based on signed comparison against 0. + // We try to convert a compare against 1 or -1 into a compare against 0. + bool Success = false; + if (!equalityOnly && MRI->hasOneUse(CRReg)) { + MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg); + if (UseMI->getOpcode() == PPC::BCC) { + PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); + int16_t Immed = (int16_t)Value; + + if (Immed == -1 && Pred == PPC::PRED_GT) { + // We convert "greater than -1" into "greater than or equal to 0", + // since we are assuming signed comparison by !equalityOnly + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), + PPC::PRED_GE)); + Success = true; + } + else if (Immed == 1 && Pred == PPC::PRED_LT) { + // We convert "less than 1" into "less than or equal to 0". + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), + PPC::PRED_LE)); + Success = true; + } + } + } + + // PPC does not have a record-form SUBri. + if (!Success) + return false; } // Search for Sub. @@ -1720,15 +1781,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (NewOpC == -1) return false; - SmallVector, 4> PredsToUpdate; - SmallVector, 4> SubRegsToUpdate; - // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP // needs to be updated to be based on SUB. Push the condition code // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the // condition code of these operands will be modified. + // Here, Value == 0 means we haven't converted comparison against 1 or -1 to + // comparison against 0, which may modify predicate. bool ShouldSwap = false; - if (Sub) { + if (Sub && Value == 0) { ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && Sub->getOperand(2).getReg() == SrcReg; @@ -1765,6 +1825,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, } else // We need to abort on a user we don't understand. return false; } + assert(!(Value != 0 && ShouldSwap) && + "Non-zero immediate support and ShouldSwap" + "may conflict in updating predicate"); // Create a new virtual register to hold the value of the CR set by the // record-form instruction. If the instruction was not previously in @@ -1873,6 +1936,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { } bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + auto &MBB = *MI.getParent(); + auto DL = MI.getDebugLoc(); switch (MI.getOpcode()) { case TargetOpcode::LOAD_STACK_GUARD: { assert(Subtarget.isTargetLinux() && @@ -1891,6 +1956,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case PPC::DFSTOREf64: { assert(Subtarget.hasP9Vector() && "Invalid D-Form Pseudo-ops on non-P9 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + "D-form op must have register and immediate operands"); unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: @@ -1920,6 +1987,17 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(Opcode)); return true; } + case PPC::CFENCE8: { + auto Val = MI.getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); + BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP)) + .addImm(PPC::PRED_NE_MINUS) + .addReg(PPC::CR7) + .addImm(1); + MI.setDesc(get(PPC::ISYNC)); + MI.RemoveOperand(0); + return true; + } } return false; } @@ -1930,3 +2008,7 @@ PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { return &PPC::VSRCRegClass; return RC; } + +int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { + return PPC::getRecordFormOpcode(Opcode); +} diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.h b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.h index b30d09e03ec47..b0629c88cf57b 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.h @@ -162,6 +162,8 @@ class PPCInstrInfo : public PPCGenInstrInfo { unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; @@ -290,6 +292,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { return Reg >= PPC::V0 && Reg <= PPC::V31; } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; + static int getRecordFormOpcode(unsigned Opcode); }; } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.td b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.td index 1af5e7f28342f..dd7fc2659102a 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrInfo.td @@ -32,6 +32,9 @@ def SDT_PPCstxsix : SDTypeProfile<0, 3, [ def SDT_PPCVexts : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> ]>; +def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -46,13 +49,21 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, ]>; def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, - SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> + SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; +def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>, + SDTCisVec<1> +]>; + +def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -123,6 +134,7 @@ def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, [SDNPHasChain, SDNPMayStore]>; def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; +def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>; // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", @@ -170,6 +182,8 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>; +def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; @@ -391,6 +405,25 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ return cast(N)->getAlignment() < 4; }]>; +// This is a somewhat weaker condition than actually checking for 16-byte +// alignment. It is simply checking that the displacement can be represented +// as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form +// instructions). +def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; + //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -801,7 +834,8 @@ def pred : Operand { def iaddr : ComplexPattern; def xaddr : ComplexPattern; def xoaddr : ComplexPattern; -def ixaddr : ComplexPattern; // "std" +def ixaddr : ComplexPattern; // "std" +def iqaddr : ComplexPattern; // "stxv" // The address in a single register. This is used with the SjLj // pseudo-instructions. @@ -1223,9 +1257,15 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { // FIXME: should be able to write a pattern for PPCcondbranch, but can't use // a two-value operand where a dag node expects two operands. :( let isCodeGenOnly = 1 in { - def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc}${cond:pm} ${cond:reg}, $dst" - /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; + class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc}${cond:pm} ${cond:reg}, $dst" + /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; + def BCC : BCC_class; + + // The same as BCC, except that it's not a terminator. Used for introducing + // control flow dependency without creating new blocks. + let isTerminator = 0 in def CTRL_DEP : BCC_class; + def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst), "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; @@ -2533,6 +2573,14 @@ let Uses = [RM] in { "mffs. $rT", IIC_IntMFFS, []>, isDOT; } +let Predicates = [IsISA3_0] in { +def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "modsw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (srem i32:$rA, i32:$rB))]>; +def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "moduw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (urem i32:$rA, i32:$rB))]>; +} let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit @@ -4426,3 +4474,190 @@ def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>; def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>; } // IsISA3_0 + +// Fast 32-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]): +// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes): +// n' = (n rotl 24); After which n' = [B4, B1, B2, B3] +// Step 4.2: Insert B3 to the right position: +// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3] +// Step 4.3: Insert B1 to the right position: +// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1] +def MaskValues { + dag Lo1 = (ORI (LIS 0x5555), 0x5555); + dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA); + dag Lo2 = (ORI (LIS 0x3333), 0x3333); + dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC); + dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F); + dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0); +} + +def Shift1 { + dag Right = (RLWINM $A, 31, 1, 31); + dag Left = (RLWINM $A, 1, 0, 30); +} + +def Swap1 { + dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1), + (AND Shift1.Left, MaskValues.Hi1)); +} + +def Shift2 { + dag Right = (RLWINM Swap1.Bit, 30, 2, 31); + dag Left = (RLWINM Swap1.Bit, 2, 0, 29); +} + +def Swap2 { + dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2), + (AND Shift2.Left, MaskValues.Hi2)); +} + +def Shift4 { + dag Right = (RLWINM Swap2.Bits, 28, 4, 31); + dag Left = (RLWINM Swap2.Bits, 4, 0, 27); +} + +def Swap4 { + dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4), + (AND Shift4.Left, MaskValues.Hi4)); +} + +def Rotate { + dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31); +} + +def RotateInsertByte3 { + dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15); +} + +def RotateInsertByte1 { + dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31); +} + +def : Pat<(i32 (bitreverse i32:$A)), + (RLDICL_32 RotateInsertByte1.Left, 0, 32)>; + +// Fast 64-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]): +// Apply the same byte reverse algorithm mentioned above for the fast 32-bit +// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And +// then OR them together to get the final result. +def MaskValues64 { + dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32)); + dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32)); + dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32)); + dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32)); + dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32)); + dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32)); +} + +def DWMaskValues { + dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555); + dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA); + dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333); + dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC); + dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F); + dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0); +} + +def DWShift1 { + dag Right = (RLDICL $A, 63, 1); + dag Left = (RLDICR $A, 1, 62); +} + +def DWSwap1 { + dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1), + (AND8 DWShift1.Left, DWMaskValues.Hi1)); +} + +def DWShift2 { + dag Right = (RLDICL DWSwap1.Bit, 62, 2); + dag Left = (RLDICR DWSwap1.Bit, 2, 61); +} + +def DWSwap2 { + dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2), + (AND8 DWShift2.Left, DWMaskValues.Hi2)); +} + +def DWShift4 { + dag Right = (RLDICL DWSwap2.Bits, 60, 4); + dag Left = (RLDICR DWSwap2.Bits, 4, 59); +} + +def DWSwap4 { + dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4), + (AND8 DWShift4.Left, DWMaskValues.Hi4)); +} + +// Bit swap is done, now start byte swap. +def DWExtractLo32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32)); +} + +def DWRotateLo32 { + dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31); +} + +def DWLo32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15); +} + +// Lower 32 bits in the right order +def DWLo32RotateInsertByte1 { + dag Left = + (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31); +} + +def ExtendLo32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWLo32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftHi32 { // SRDI DWSwap4.Bits, 32) + dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32); +} + +def DWExtractHi32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32)); +} + +def DWRotateHi32 { + dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31); +} + +def DWHi32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15); +} + +// High 32 bits in the right order, but in the low 32-bit position +def DWHi32RotateInsertByte1 { + dag Left = + (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31); +} + +def ExtendHi32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWHi32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32 + dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31); +} + +def : Pat<(i64 (bitreverse i64:$A)), + (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>; diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrVSX.td b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrVSX.td index b98140fedfc04..942e8b392b82b 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrVSX.td +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCInstrVSX.td @@ -843,7 +843,9 @@ let Uses = [RM] in { def XXPERMDI : XX3Form_2<60, 10, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), - "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>; + "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, + [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB, + imm32SExt16:$DM))]>; let isCodeGenOnly = 1 in def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; @@ -1066,6 +1068,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; + // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; @@ -1895,6 +1901,98 @@ let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +// Variable index unsigned vector_extract on Power9 +let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBRX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHRX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHRX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHRX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHRX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHRX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHRX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHRX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHRX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWRX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWRX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWRX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWRX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWRX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWRX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWRX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWRX (LI8 12), $S))>; +} +let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBLX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHLX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHLX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHLX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHLX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHLX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHLX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHLX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHLX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWLX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWLX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWLX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWLX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWLX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWLX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWLX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWLX (LI8 12), $S))>; +} + let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -2334,6 +2432,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; + // Vector Reverse + def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)), + (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)), + (v4i32 (XXBRW $A))>; + def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)), + (v2i64 (XXBRD $A))>; + def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)), + (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + // Vector Permute def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, IIC_VecPerm, []>; @@ -2379,8 +2487,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, - [(set v2f64:$XT, (load xoaddr:$src))]>; - + [(set v2f64:$XT, (load xaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, @@ -2430,7 +2537,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, - [(store v2f64:$XT, xoaddr:$dst)]>; + [(store v2f64:$XT, xaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), @@ -2498,21 +2605,42 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; } // IsLittleEndian, HasP9Vector - def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + // D-Form Load/Store + def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; + + def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), + (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst), + (STXV $rS, memrix16:$dst)>; + + + def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), @@ -2664,21 +2792,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { let isPseudo = 1 in { def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", - [(set f32:$XT, (load iaddr:$src))]>; + [(set f32:$XT, (load ixaddr:$src))]>; def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", - [(set f64:$XT, (load iaddr:$src))]>; + [(set f64:$XT, (load ixaddr:$src))]>; def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", - [(store f32:$XT, iaddr:$dst)]>; + [(store f32:$XT, ixaddr:$dst)]>; def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", - [(store f64:$XT, iaddr:$dst)]>; + [(store f64:$XT, ixaddr:$dst)]>; } - def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>; - def : Pat<(f32 (fpround (extloadf32 iaddr:$src))), - (f32 (DFLOADf32 iaddr:$src))>; + def : Pat<(f64 (extloadf32 ixaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))), + (f32 (DFLOADf32 ixaddr:$src))>; } // end HasP9Vector, AddedComplexity // Integer extend helper dags 32 -> 64 @@ -2695,6 +2823,58 @@ def DblToFlt { dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } + +def ByteToWord { + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8)); +} + +def ByteToDWord { + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8)); +} + +def HWordToWord { + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16)); +} + +def HWordToDWord { + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16)); +} + +def WordToDWord { + dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); + dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1)))); + dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3)))); +} + def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } @@ -2704,9 +2884,15 @@ def FltToUIntLoad { def FltToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } +def FltToLongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A))))); +} def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } +def FltToULongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A))))); +} def FltToLong { dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); } @@ -2728,9 +2914,15 @@ def DblToULong { def DblToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } +def DblToIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A))))); +} def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } +def DblToUIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A))))); +} def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); } @@ -2898,19 +3090,19 @@ let AddedComplexity = 400 in { (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; - def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; + def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; - def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; } @@ -2935,4 +3127,49 @@ let AddedComplexity = 400 in { (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, + HWordToWord.LE_A2, HWordToWord.LE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, + ByteToWord.LE_A2, ByteToWord.LE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + + let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, + HWordToWord.BE_A2, HWordToWord.BE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, + ByteToWord.BE_A2, ByteToWord.BE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + + let Predicates = [HasP9Altivec] in { + def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), + (v2i64 (VEXTSB2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), + (v2i64 (VEXTSH2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), + (v2i64 (VEXTSW2D $A))>; + def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), + (v4i32 (VEXTSB2W $A))>; + def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), + (v4i32 (VEXTSH2W $A))>; + } } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCMCInstLower.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCMCInstLower.cpp index 541b98e01b994..b310493587ae7 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "PPC.h" #include "PPCSubtarget.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCMIPeephole.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCMIPeephole.cpp index c6d2c3ebcc0fc..ff5f17c7628f2 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -19,9 +19,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.cpp index aad9139246923..9207165c46a6d 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -273,6 +273,20 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + if (TM.isELFv2ABI() && PhysReg == PPC::X2) { + // X2 is guaranteed to be preserved within a function if it is reserved. + // The reason it's reserved is that it's the TOC pointer (and the function + // uses the TOC). In functions where it isn't reserved (i.e. leaf functions + // with no TOC access), we can't claim that it is preserved. + return (getReservedRegs(MF).test(PPC::X2)); + } else { + return false; + } +} + unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const PPCFrameLowering *TFI = getFrameLowering(MF); @@ -375,9 +389,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { - BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) - .addReg(PPC::R31) - .addImm(FrameSize); + if (LP64) + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), Reg) + .addReg(PPC::X31) + .addImm(FrameSize); + else + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) + .addReg(PPC::R31) + .addImm(FrameSize); } else if (LP64) { BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) .addImm(0) @@ -464,8 +483,10 @@ void PPCRegisterInfo::lowerDynamicAreaOffset( const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); + bool is64Bit = TM.isPPC64(); DebugLoc dl = MI.getDebugLoc(); - BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg()) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), + MI.getOperand(0).getReg()) .addImm(maxCallFrameSize); MBB.erase(II); } @@ -733,19 +754,31 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } -// Figure out if the offset in the instruction must be a multiple of 4. -// This is true for instructions like "STD". -static bool usesIXAddr(const MachineInstr &MI) { +// If the offset must be a multiple of some value, return what that value is. +static unsigned offsetMinAlign(const MachineInstr &MI) { unsigned OpC = MI.getOpcode(); switch (OpC) { default: - return false; + return 1; case PPC::LWA: case PPC::LWA_32: case PPC::LD: + case PPC::LDU: case PPC::STD: - return true; + case PPC::STDU: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::STXSD: + case PPC::STXSSP: + return 4; + case PPC::LXV: + case PPC::STXV: + return 16; } } @@ -831,9 +864,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister( FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false); - // Figure out if the offset in the instruction is shifted right two bits. - bool isIXAddr = usesIXAddr(MI); - // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP && @@ -862,7 +892,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // happen in invalid code. assert(OpC != PPC::DBG_VALUE && "This should be handled in a target-independent way"); - if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) || + if (!noImmForm && ((isInt<16>(Offset) && + ((Offset % offsetMinAlign(MI)) == 0)) || OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT)) { MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -1055,5 +1086,5 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT || - (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); + (isInt<16>(Offset) && (Offset % offsetMinAlign(*MI)) == 0); } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.h b/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.h index 4a96327fe5527..0bbb71fdf9fbe 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCRegisterInfo.h @@ -83,6 +83,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { void adjustStackMapLiveOutMask(uint32_t *Mask) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override; /// We require the register scavenger. bool requiresRegisterScavenging(const MachineFunction &MF) const override { diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCScheduleP9.td b/interpreter/llvm/src/lib/Target/PowerPC/PPCScheduleP9.td index a9c1bd78b05e0..a01995a629c29 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCScheduleP9.td +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCScheduleP9.td @@ -260,8 +260,8 @@ let SchedModel = P9Model in { // ***************** Defining Itinerary Class Resources ***************** - def : ItinRW<[P9_DFU_76C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntSimple, - IIC_IntGeneral]>; + def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], + [IIC_IntSimple, IIC_IntGeneral]>; def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], [IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>; diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCSubtarget.h b/interpreter/llvm/src/lib/Target/PowerPC/PPCSubtarget.h index 5a97f595ad8cf..90d11f46a384d 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCSubtarget.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCSubtarget.h @@ -272,6 +272,13 @@ class PPCSubtarget : public PPCGenSubtargetInfo { return 16; } + + // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no + // red zone and PPC64 SVR4ABI has a 288-byte red zone. + unsigned getRedZoneSize() const { + return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0); + } + bool hasHTM() const { return HasHTM; } bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index c7aa4cb78b7a4..5f8085f4626e2 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -21,9 +21,9 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -52,6 +52,7 @@ namespace { protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; + bool NeedFence = true; bool Is64Bit = MBB.getParent()->getSubtarget().isPPC64(); for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); @@ -62,6 +63,16 @@ namespace { MI.getOpcode() != PPC::ADDItlsldLADDR && MI.getOpcode() != PPC::ADDItlsgdLADDR32 && MI.getOpcode() != PPC::ADDItlsldLADDR32) { + + // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP + // as scheduling fences, we skip creating fences if we already + // have existing ADJCALLSTACKDOWN/UP to avoid nesting, + // which causes verification error with -verify-machineinstrs. + if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN) + NeedFence = false; + else if (MI.getOpcode() == PPC::ADJCALLSTACKUP) + NeedFence = true; + ++I; continue; } @@ -96,11 +107,15 @@ namespace { break; } - // Don't really need to save data to the stack - the clobbered + // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr + // as schduling fence to avoid it is scheduled before + // mflr in the prologue and the address in LR is clobbered (PR25839). + // We don't really need to save data to the stack - the clobbered // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). - BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) - .addImm(0); + if (NeedFence) + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) + .addImm(0); // Expand into two ops built prior to the existing instruction. MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3) @@ -116,7 +131,8 @@ namespace { .addReg(GPR3)); Call->addOperand(MI.getOperand(3)); - BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); + if (NeedFence) + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg) .addReg(GPR3); diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 7c53a5601790e..17345b6ca8d30 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -61,8 +61,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.cpp index 7806d45b54575..fe092cc3b858d 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetObjectFile.h" -#include "PPCTargetMachine.h" #include "PPCTargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -86,13 +86,14 @@ EnableMachineCombinerPass("ppc-machine-combiner", extern "C" void LLVMInitializePowerPCTarget() { // Register the targets - RegisterTargetMachine A(getThePPC32Target()); - RegisterTargetMachine B(getThePPC64Target()); - RegisterTargetMachine C(getThePPC64LETarget()); + RegisterTargetMachine A(getThePPC32Target()); + RegisterTargetMachine B(getThePPC64Target()); + RegisterTargetMachine C(getThePPC64LETarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); initializePPCExpandISELPass(PR); + initializePPCTLSDynamicCallPass(PR); } /// Return the datalayout string of a subtarget. @@ -177,32 +178,34 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, assert(Options.MCOptions.getABIName().empty() && "Unknown target-abi option!"); - if (!TT.isMacOSX()) { - switch (TT.getArch()) { - case Triple::ppc64le: - return PPCTargetMachine::PPC_ABI_ELFv2; - case Triple::ppc64: - return PPCTargetMachine::PPC_ABI_ELFv1; - default: - // Fallthrough. - ; - } + if (TT.isMacOSX()) + return PPCTargetMachine::PPC_ABI_UNKNOWN; + + switch (TT.getArch()) { + case Triple::ppc64le: + return PPCTargetMachine::PPC_ABI_ELFv2; + case Triple::ppc64: + return PPCTargetMachine::PPC_ABI_ELFv1; + default: + return PPCTargetMachine::PPC_ABI_UNKNOWN; } - return PPCTargetMachine::PPC_ABI_UNKNOWN; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { - if (!RM.hasValue()) { - if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { - if (!TT.isOSBinFormatMachO() && !TT.isMacOSX()) - return Reloc::PIC_; - } - if (TT.isOSDarwin()) - return Reloc::DynamicNoPIC; - return Reloc::Static; - } - return *RM; + if (RM.hasValue()) + return *RM; + + // Darwin defaults to dynamic-no-pic. + if (TT.isOSDarwin()) + return Reloc::DynamicNoPIC; + + // Non-darwin 64-bit platforms are PIC by default. + if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) + return Reloc::PIC_; + + // 32-bit is static by default. + return Reloc::Static; } // The FeatureString here is a little subtle. We are modifying the feature @@ -224,26 +227,6 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, PPCTargetMachine::~PPCTargetMachine() = default; -void PPC32TargetMachine::anchor() {} - -PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} - -void PPC64TargetMachine::anchor() {} - -PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} - const PPCSubtarget * PPCTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -296,7 +279,7 @@ namespace { /// PPC Code Generator Pass Configuration Options. class PPCPassConfig : public TargetPassConfig { public: - PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) + PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} PPCTargetMachine &getPPCTargetMachine() const { @@ -316,13 +299,13 @@ class PPCPassConfig : public TargetPassConfig { } // end anonymous namespace TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { - return new PPCPassConfig(this, PM); + return new PPCPassConfig(*this, PM); } void PPCPassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createPPCBoolRetToIntPass()); - addPass(createAtomicExpandPass(&getPPCTargetMachine())); + addPass(createAtomicExpandPass()); // For the BG/Q (or if explicitly requested), add explicit data prefetch // intrinsics. @@ -354,7 +337,7 @@ bool PPCPassConfig::addPreISel() { addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - addPass(createPPCCTRLoops(getPPCTargetMachine())); + addPass(createPPCCTRLoops()); return false; } @@ -370,7 +353,7 @@ bool PPCPassConfig::addILPOpts() { bool PPCPassConfig::addInstSelector() { // Install an instruction selector. - addPass(createPPCISelDag(getPPCTargetMachine())); + addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel())); #ifndef NDEBUG if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) @@ -406,7 +389,7 @@ void PPCPassConfig::addPreRegAlloc() { // FIXME: We probably don't need to run these for -fPIE. if (getPPCTargetMachine().isPositionIndependent()) { // FIXME: LiveVariables should not be necessary here! - // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on + // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on // LiveVariables. This (unnecessary) dependency has been removed now, // however a stage-2 clang build fails without LiveVariables computed here. addPass(&LiveVariablesID, false); diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.h b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.h index f2838351cee56..be705507b5347 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetMachine.h @@ -23,7 +23,7 @@ namespace llvm { /// Common code between 32-bit and 64-bit PowerPC targets. /// -class PPCTargetMachine : public LLVMTargetMachine { +class PPCTargetMachine final : public LLVMTargetMachine { public: enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 }; private: @@ -41,6 +41,9 @@ class PPCTargetMachine : public LLVMTargetMachine { ~PPCTargetMachine() override; const PPCSubtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some targets, is + // deprecated and should not be used. + const PPCSubtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; @@ -55,30 +58,11 @@ class PPCTargetMachine : public LLVMTargetMachine { const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; -}; - -/// PowerPC 32-bit target machine. -/// -class PPC32TargetMachine : public PPCTargetMachine { - virtual void anchor(); -public: - PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; -/// PowerPC 64-bit target machine. -/// -class PPC64TargetMachine : public PPCTargetMachine { - virtual void anchor(); -public: - PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + bool isMachineVerifierClean() const override { + return false; + } }; - } // end namespace llvm #endif diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 7ee1317bf72f2..6110706b01b90 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -189,7 +189,7 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, return PPCTTIImpl::getIntImmCost(Imm, Ty); } -void PPCTTIImpl::getUnrollingPreferences(Loop *L, +void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { if (ST->getDarwinDirective() == PPC::DIR_A2) { // The A2 is in-order with a deep pipeline, and concatenation unrolling @@ -201,7 +201,7 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, UP.AllowExpensiveTripCount = true; } - BaseT::getUnrollingPreferences(L, UP); + BaseT::getUnrollingPreferences(L, SE, UP); } bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { @@ -215,6 +215,11 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { return LoopHasReductions; } +bool PPCTTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { + MaxLoadSize = 8; + return true; +} + bool PPCTTIImpl::enableInterleavedAccessVectorization() { return true; } @@ -225,7 +230,7 @@ unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { return ST->hasVSX() ? 64 : 32; } -unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasQPX()) return 256; if (ST->hasAltivec()) return 128; @@ -239,9 +244,18 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { } unsigned PPCTTIImpl::getCacheLineSize() { - // This is currently only used for the data prefetch pass which is only - // enabled for BG/Q by default. - return CacheLineSize; + // Check first if the user specified a custom line size. + if (CacheLineSize.getNumOccurrences() > 0) + return CacheLineSize; + + // On P7, P8 or P9 we have a cache line size of 128. + unsigned Directive = ST->getDarwinDirective(); + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || + Directive == PPC::DIR_PWR9) + return 128; + + // On other processors return a default of 64 bytes. + return 64; } unsigned PPCTTIImpl::getPrefetchDistance() { diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.h b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.h index 6ce70fbd8778e..99ca6394d1bed 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -52,7 +52,8 @@ class PPCTTIImpl : public BasicTTIImplBase { Type *Ty); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); /// @} @@ -60,9 +61,10 @@ class PPCTTIImpl : public BasicTTIImplBase { /// @{ bool enableAggressiveInterleaving(bool LoopHasReductions); + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getCacheLineSize(); unsigned getPrefetchDistance(); unsigned getMaxInterleaveFactor(unsigned VF); diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXCopy.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXCopy.cpp index f3a0290da0540..93fe3230ab815 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f6d20ced15a00..a57484e5abdf7 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" diff --git a/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index d3434b77be8af..7d34efd4af3e0 100644 --- a/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/interpreter/llvm/src/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -42,9 +42,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" @@ -195,8 +195,10 @@ struct PPCVSXSwapRemoval : public MachineFunctionPass { return false; // If we don't have VSX on the subtarget, don't do anything. + // Also, on Power 9 the load and store ops preserve element order and so + // the swaps are not required. const PPCSubtarget &STI = MF.getSubtarget(); - if (!STI.hasVSX()) + if (!STI.hasVSX() || !STI.needsSwapsForVSXMemOps()) return false; bool Changed = false; diff --git a/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index d6f2672271e9b..be83efc02d278 100644 --- a/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -12,10 +12,10 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -32,8 +32,9 @@ class RISCVAsmBackend : public MCAsmBackend { : MCAsmBackend(), OSABI(OSABI), Is64Bit(Is64Bit) {} ~RISCVAsmBackend() override {} - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -69,9 +70,10 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void RISCVAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { return; } diff --git a/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index b2ed13758d417..9309d493cef48 100644 --- a/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -13,13 +13,13 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index ddc3bf3504526..7c98b1c8f3213 100644 --- a/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/interpreter/llvm/src/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H +#include "llvm/Config/config.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Config/config.h" namespace llvm { class MCAsmBackend; diff --git a/interpreter/llvm/src/lib/Target/RISCV/RISCVTargetMachine.cpp b/interpreter/llvm/src/lib/Target/RISCV/RISCVTargetMachine.cpp index a20331cd0a3ed..744d7b8aaa3a4 100644 --- a/interpreter/llvm/src/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -13,10 +13,10 @@ #include "RISCVTargetMachine.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" @@ -56,5 +56,5 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, } TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { - return new TargetPassConfig(this, PM); + return new TargetPassConfig(*this, PM); } diff --git a/interpreter/llvm/src/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/interpreter/llvm/src/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 7e6dff6b78948..087c037614a9d 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/SparcMCExpr.h" #include "MCTargetDesc/SparcMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" @@ -28,8 +28,8 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/interpreter/llvm/src/lib/Target/Sparc/DelaySlotFiller.cpp b/interpreter/llvm/src/lib/Target/Sparc/DelaySlotFiller.cpp index 6f9cc314e3764..df819ccd15dbd 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/DelaySlotFiller.cpp @@ -96,7 +96,7 @@ namespace { /// createSparcDelaySlotFillerPass - Returns a pass that fills in delay /// slots in Sparc MachineFunctions /// -FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { +FunctionPass *llvm::createSparcDelaySlotFillerPass() { return new Filler; } diff --git a/interpreter/llvm/src/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/interpreter/llvm/src/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index da7e0b737e785..8e298e8316dae 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -14,11 +14,11 @@ #include "Sparc.h" #include "SparcRegisterInfo.h" #include "SparcSubtarget.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.cpp b/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.cpp index 0acc2875daa84..ca6a0dc3c2a3a 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.cpp @@ -21,9 +21,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -LEONMachineFunctionPass::LEONMachineFunctionPass(TargetMachine &tm, char &ID) - : MachineFunctionPass(ID) {} - LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID) : MachineFunctionPass(ID) {} @@ -72,8 +69,7 @@ int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo &MRI) { // char InsertNOPLoad::ID = 0; -InsertNOPLoad::InsertNOPLoad(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +InsertNOPLoad::InsertNOPLoad() : LEONMachineFunctionPass(ID) {} bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -114,7 +110,7 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) { // char FixFSMULD::ID = 0; -FixFSMULD::FixFSMULD(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {} +FixFSMULD::FixFSMULD() : LEONMachineFunctionPass(ID) {} bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -203,8 +199,7 @@ bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) { // char ReplaceFMULS::ID = 0; -ReplaceFMULS::ReplaceFMULS(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +ReplaceFMULS::ReplaceFMULS() : LEONMachineFunctionPass(ID) {} bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -287,8 +282,7 @@ bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) { char DetectRoundChange::ID = 0; -DetectRoundChange::DetectRoundChange(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +DetectRoundChange::DetectRoundChange() : LEONMachineFunctionPass(ID) {} bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -338,8 +332,7 @@ bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) { // char FixAllFDIVSQRT::ID = 0; -FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +FixAllFDIVSQRT::FixAllFDIVSQRT() : LEONMachineFunctionPass(ID) {} bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); diff --git a/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.h b/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.h index 2158cb636bfcb..99cdfc4589ef4 100755 --- a/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.h +++ b/interpreter/llvm/src/lib/Target/Sparc/LeonPasses.h @@ -32,7 +32,6 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass std::vector UsedRegisters; protected: - LEONMachineFunctionPass(TargetMachine &tm, char &ID); LEONMachineFunctionPass(char &ID); int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex); @@ -48,7 +47,7 @@ class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { public: static char ID; - InsertNOPLoad(TargetMachine &tm); + InsertNOPLoad(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -62,7 +61,7 @@ class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass { public: static char ID; - FixFSMULD(TargetMachine &tm); + FixFSMULD(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -74,7 +73,7 @@ class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass { public: static char ID; - ReplaceFMULS(TargetMachine &tm); + ReplaceFMULS(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -89,7 +88,7 @@ class LLVM_LIBRARY_VISIBILITY DetectRoundChange public: static char ID; - DetectRoundChange(TargetMachine &tm); + DetectRoundChange(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -102,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY FixAllFDIVSQRT : public LEONMachineFunctionPass { public: static char ID; - FixAllFDIVSQRT(TargetMachine &tm); + FixAllFDIVSQRT(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { diff --git a/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index cc07547ede2c2..0a72a4438218d 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/SparcFixupKinds.h" #include "MCTargetDesc/SparcMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -61,14 +61,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_lo10: return Value & 0x3ff; - case Sparc::fixup_sparc_tls_ldo_hix22: - case Sparc::fixup_sparc_tls_le_hix22: - return (~Value >> 10) & 0x3fffff; - - case Sparc::fixup_sparc_tls_ldo_lox10: - case Sparc::fixup_sparc_tls_le_lox10: - return (~(~Value & 0x3ff)) & 0x1fff; - case Sparc::fixup_sparc_h44: return (Value >> 22) & 0x3fffff; @@ -84,6 +76,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_hm: return (Value >> 32) & 0x3ff; + case Sparc::fixup_sparc_tls_ldo_hix22: + case Sparc::fixup_sparc_tls_le_hix22: + case Sparc::fixup_sparc_tls_ldo_lox10: + case Sparc::fixup_sparc_tls_le_lox10: + assert(Value == 0 && "Sparc TLS relocs expect zero Value"); + return 0; + case Sparc::fixup_sparc_tls_gd_add: case Sparc::fixup_sparc_tls_gd_call: case Sparc::fixup_sparc_tls_ldm_add: @@ -203,15 +202,15 @@ namespace { return InfosBE[Kind - FirstTargetFixupKind]; } - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override { + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override { switch ((Sparc::Fixups)Fixup.getKind()) { - default: break; + default: + return false; case Sparc::fixup_sparc_wplt30: if (Target.getSymA()->getSymbol().isTemporary()) - return; + return false; + LLVM_FALLTHROUGH; case Sparc::fixup_sparc_tls_gd_hi22: case Sparc::fixup_sparc_tls_gd_lo10: case Sparc::fixup_sparc_tls_gd_add: @@ -229,7 +228,8 @@ namespace { case Sparc::fixup_sparc_tls_ie_ldx: case Sparc::fixup_sparc_tls_ie_add: case Sparc::fixup_sparc_tls_le_hix22: - case Sparc::fixup_sparc_tls_le_lox10: IsResolved = false; break; + case Sparc::fixup_sparc_tls_le_lox10: + return true; } } @@ -273,9 +273,9 @@ namespace { ELFSparcAsmBackend(const Target &T, Triple::OSType OSType) : SparcAsmBackend(T), OSType(OSType) { } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, - MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 21df60237d96e..50e8825b15e8c 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -14,10 +14,10 @@ #include "SparcMCAsmInfo.h" #include "SparcMCExpr.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index e85a8cd5e3399..a77f760d9eff0 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -19,7 +19,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Object/ELF.h" - using namespace llvm; #define DEBUG_TYPE "sparcmcexpr" diff --git a/interpreter/llvm/src/lib/Target/Sparc/Sparc.h b/interpreter/llvm/src/lib/Target/Sparc/Sparc.h index 0a8272d892976..4135e4e1b61d3 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/Sparc.h +++ b/interpreter/llvm/src/lib/Target/Sparc/Sparc.h @@ -28,7 +28,7 @@ namespace llvm { class MachineInstr; FunctionPass *createSparcISelDag(SparcTargetMachine &TM); - FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createSparcDelaySlotFillerPass(); void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, diff --git a/interpreter/llvm/src/lib/Target/Sparc/Sparc.td b/interpreter/llvm/src/lib/Target/Sparc/Sparc.td index 11004c5a952fc..91cab00b2b651 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/Sparc.td +++ b/interpreter/llvm/src/lib/Target/Sparc/Sparc.td @@ -20,6 +20,10 @@ include "llvm/Target/Target.td" // SPARC Subtarget features. // +def FeatureSoftMulDiv + : SubtargetFeature<"soft-mul-div", "UseSoftMulDiv", "true", + "Use software emulation for integer multiply and divide">; + def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true", "Enable SPARC-V9 instructions">; @@ -75,7 +79,7 @@ class Proc Features> : Processor; def : Proc<"generic", []>; -def : Proc<"v7", []>; +def : Proc<"v7", [FeatureSoftMulDiv]>; def : Proc<"v8", []>; def : Proc<"supersparc", []>; def : Proc<"sparclite", []>; diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/Sparc/SparcAsmPrinter.cpp index 31a128a5f2714..19fb94534b256 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "InstPrinter/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" #include "SparcTargetStreamer.h" diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcISelLowering.cpp b/interpreter/llvm/src/lib/Target/Sparc/SparcISelLowering.cpp index 9e7e3c6b705a9..6767a59a97571 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcISelLowering.cpp @@ -1689,6 +1689,19 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::MUL, MVT::i32, Expand); + if (Subtarget->useSoftMulDiv()) { + // .umul works for both signed and unsigned + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setLibcallName(RTLIB::MUL_I32, ".umul"); + + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setLibcallName(RTLIB::SDIV_I32, ".div"); + + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setLibcallName(RTLIB::UDIV_I32, ".udiv"); + } + if (Subtarget->is64Bit()) { setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcInstrInfo.td b/interpreter/llvm/src/lib/Target/Sparc/SparcInstrInfo.td index ae45c8be67524..3194ad4aeb6b1 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcInstrInfo.td @@ -27,6 +27,9 @@ def Is32Bit : Predicate<"!Subtarget->is64Bit()">; // True when generating 64-bit code. This also implies HasV9. def Is64Bit : Predicate<"Subtarget->is64Bit()">; +def UseSoftMulDiv : Predicate<"Subtarget->useSoftMulDiv()">, + AssemblerPredicate<"FeatureSoftMulDiv">; + // HasV9 - This predicate is true when the target processor supports V9 // instructions. Note that the machine may be running in 32-bit mode. def HasV9 : Predicate<"Subtarget->isV9()">, diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcMCInstLower.cpp b/interpreter/llvm/src/lib/Target/Sparc/SparcMCInstLower.cpp index a3cedcbf9dd14..a784124ff6883 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.cpp b/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.cpp index 43ddef3cc96e9..daac56add87ce 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.cpp @@ -28,6 +28,7 @@ void SparcSubtarget::anchor() { } SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { + UseSoftMulDiv = false; IsV9 = false; IsLeon = false; V8DeprecatedInsts = false; diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.h b/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.h index fa42da425ff2d..d18139984b87f 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.h +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcSubtarget.h @@ -32,6 +32,7 @@ class StringRef; class SparcSubtarget : public SparcGenSubtargetInfo { Triple TargetTriple; virtual void anchor(); + bool UseSoftMulDiv; bool IsV9; bool IsLeon; bool V8DeprecatedInsts; @@ -76,6 +77,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { bool enableMachineScheduler() const override; + bool useSoftMulDiv() const { return UseSoftMulDiv; } bool isV9() const { return IsV9; } bool isLeon() const { return IsLeon; } bool isVIS() const { return IsVIS; } diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.cpp b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.cpp index 4ae64062d9e2c..c7a1ca262d2c1 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.cpp @@ -11,9 +11,9 @@ //===----------------------------------------------------------------------===// #include "SparcTargetMachine.h" -#include "SparcTargetObjectFile.h" -#include "Sparc.h" #include "LeonPasses.h" +#include "Sparc.h" +#include "SparcTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" @@ -114,7 +114,7 @@ namespace { /// Sparc Code Generator Pass Configuration Options. class SparcPassConfig : public TargetPassConfig { public: - SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM) + SparcPassConfig(SparcTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} SparcTargetMachine &getSparcTargetMachine() const { @@ -128,11 +128,11 @@ class SparcPassConfig : public TargetPassConfig { } // namespace TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) { - return new SparcPassConfig(this, PM); + return new SparcPassConfig(*this, PM); } void SparcPassConfig::addIRPasses() { - addPass(createAtomicExpandPass(&getSparcTargetMachine())); + addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); } @@ -143,26 +143,26 @@ bool SparcPassConfig::addInstSelector() { } void SparcPassConfig::addPreEmitPass(){ - addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); + addPass(createSparcDelaySlotFillerPass()); if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad()) { - addPass(new InsertNOPLoad(getSparcTargetMachine())); + addPass(new InsertNOPLoad()); } if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD()) { - addPass(new FixFSMULD(getSparcTargetMachine())); + addPass(new FixFSMULD()); } if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS()) { - addPass(new ReplaceFMULS(getSparcTargetMachine())); + addPass(new ReplaceFMULS()); } if (this->getSparcTargetMachine().getSubtargetImpl()->detectRoundChange()) { - addPass(new DetectRoundChange(getSparcTargetMachine())); + addPass(new DetectRoundChange()); } if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT()) { - addPass(new FixAllFDIVSQRT(getSparcTargetMachine())); + addPass(new FixAllFDIVSQRT()); } } diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.h b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.h index 48193fe095bed..faf714cbe2c98 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetMachine.h @@ -40,6 +40,10 @@ class SparcTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isMachineVerifierClean() const override { + return false; + } }; /// Sparc 32-bit target machine diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.cpp index 8fdde15d8d27f..2c040dce994b6 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -9,12 +9,18 @@ #include "SparcTargetObjectFile.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; +void SparcELFTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { diff --git a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.h b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.h index fe8800625a567..3b1b345c3b193 100644 --- a/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.h +++ b/interpreter/llvm/src/lib/Target/Sparc/SparcTargetObjectFile.h @@ -23,6 +23,8 @@ class SparcELFTargetObjectFile : public TargetLoweringObjectFileELF { TargetLoweringObjectFileELF() {} + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, diff --git a/interpreter/llvm/src/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/interpreter/llvm/src/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index efcf6696fd50a..33680789ee082 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -61,6 +61,7 @@ enum RegisterKind { VR64Reg, VR128Reg, AR32Reg, + CR64Reg, }; enum MemoryKind { @@ -274,6 +275,10 @@ class SystemZOperand : public MCParsedAsmOperand { SMLoc getEndLoc() const override { return EndLoc; } void print(raw_ostream &OS) const override; + /// getLocRange - Get the range between the first and last token of this + /// operand. + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + // Used by the TableGen code to add particular types of operand // to an instruction. void addRegOperands(MCInst &Inst, unsigned N) const { @@ -343,6 +348,7 @@ class SystemZOperand : public MCParsedAsmOperand { bool isVF128() const { return false; } bool isVR128() const { return isReg(VR128Reg); } bool isAR32() const { return isReg(AR32Reg); } + bool isCR64() const { return isReg(CR64Reg); } bool isAnyReg() const { return (isReg() || isImm(0, 15)); } bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } @@ -379,7 +385,8 @@ class SystemZAsmParser : public MCTargetAsmParser { RegGR, RegFP, RegV, - RegAR + RegAR, + RegCR }; struct Register { RegisterGroup Group; @@ -487,6 +494,9 @@ class SystemZAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseAR32(OperandVector &Operands) { return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg); } + OperandMatchResultTy parseCR64(OperandVector &Operands) { + return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg); + } OperandMatchResultTy parseAnyReg(OperandVector &Operands) { return parseAnyRegister(Operands); } @@ -648,6 +658,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg) { Reg.Group = RegV; else if (Prefix == 'a' && Reg.Num < 16) Reg.Group = RegAR; + else if (Prefix == 'c' && Reg.Num < 16) + Reg.Group = RegCR; else return Error(Reg.StartLoc, "invalid register"); @@ -741,6 +753,10 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) { Kind = AR32Reg; RegNo = SystemZMC::AR32Regs[Reg.Num]; } + else if (Reg.Group == RegCR) { + Kind = CR64Reg; + RegNo = SystemZMC::CR64Regs[Reg.Num]; + } else { return MatchOperand_ParseFail; } @@ -1056,6 +1072,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, RegNo = SystemZMC::VR128Regs[Reg.Num]; else if (Reg.Group == RegAR) RegNo = SystemZMC::AR32Regs[Reg.Num]; + else if (Reg.Group == RegCR) + RegNo = SystemZMC::CR64Regs[Reg.Num]; StartLoc = Reg.StartLoc; EndLoc = Reg.EndLoc; return false; @@ -1150,6 +1168,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, return false; } +std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS); + bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -1195,8 +1215,13 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "invalid operand for instruction"); } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = SystemZMnemonicSpellCheck( + ((SystemZOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, + ((SystemZOperand &)*Operands[0]).getLocRange()); + } } llvm_unreachable("Unexpected match type"); diff --git a/interpreter/llvm/src/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/interpreter/llvm/src/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 27fd70bc60925..8903b57ffd0b4 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -162,6 +162,12 @@ static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16); } +static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16); +} + template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { if (!isUInt(Imm)) diff --git a/interpreter/llvm/src/lib/Target/SystemZ/LLVMBuild.txt b/interpreter/llvm/src/lib/Target/SystemZ/LLVMBuild.txt index 6f8431db7b11c..9b8b141fd52ab 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/LLVMBuild.txt +++ b/interpreter/llvm/src/lib/Target/SystemZ/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = SystemZCodeGen parent = SystemZ -required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target +required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target add_to_library_groups = SystemZ diff --git a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 23b7d5b5d5013..51ac410a9c819 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -50,8 +50,9 @@ class SystemZMCAsmBackend : public MCAsmBackend { return SystemZ::NumTargetFixupKinds; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } @@ -89,15 +90,17 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { return Infos[Kind - FirstTargetFixupKind]; } -void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { MCFixupKind Kind = Fixup.getKind(); unsigned Offset = Fixup.getOffset(); unsigned BitSize = getFixupKindInfo(Kind).TargetSize; unsigned Size = (BitSize + 7) / 8; - assert(Offset + Size <= DataSize && "Invalid fixup offset!"); + assert(Offset + Size <= Data.size() && "Invalid fixup offset!"); // Big-endian insertion of Size bytes. Value = extractBitsForFixup(Kind, Value); diff --git a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 3de570bf30cc4..df0a8161e6e7c 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -9,11 +9,11 @@ #include "MCTargetDesc/SystemZMCFixups.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index dfea7e33fa15f..727ab921daf95 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -116,6 +116,13 @@ const unsigned SystemZMC::AR32Regs[16] = { SystemZ::A12, SystemZ::A13, SystemZ::A14, SystemZ::A15 }; +const unsigned SystemZMC::CR64Regs[16] = { + SystemZ::C0, SystemZ::C1, SystemZ::C2, SystemZ::C3, + SystemZ::C4, SystemZ::C5, SystemZ::C6, SystemZ::C7, + SystemZ::C8, SystemZ::C9, SystemZ::C10, SystemZ::C11, + SystemZ::C12, SystemZ::C13, SystemZ::C14, SystemZ::C15 +}; + unsigned SystemZMC::getFirstReg(unsigned Reg) { static unsigned Map[SystemZ::NUM_TARGET_REGS]; static bool Initialized = false; diff --git a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index d9926c7e4986d..dbca3485290aa 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/interpreter/llvm/src/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -55,6 +55,7 @@ extern const unsigned VR32Regs[32]; extern const unsigned VR64Regs[32]; extern const unsigned VR128Regs[32]; extern const unsigned AR32Regs[16]; +extern const unsigned CR64Regs[16]; // Return the 0-based number of the first architectural register that // contains the given LLVM register. E.g. R1D -> 1. diff --git a/interpreter/llvm/src/lib/Target/SystemZ/README.txt b/interpreter/llvm/src/lib/Target/SystemZ/README.txt index 74cf653b9d95c..9b714157550d0 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/README.txt +++ b/interpreter/llvm/src/lib/Target/SystemZ/README.txt @@ -67,6 +67,11 @@ We don't use ICM, STCM, or CLM. -- +We don't use ADD (LOGICAL) HIGH, SUBTRACT (LOGICAL) HIGH, +or COMPARE (LOGICAL) HIGH yet. + +-- + DAGCombiner doesn't yet fold truncations of extended loads. Functions like: unsigned long f (unsigned long x, unsigned short *y) diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZ.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZ.td index 6bdfd4d07edce..41300a1b62954 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZ.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZ.td @@ -54,6 +54,9 @@ include "SystemZInstrFormats.td" include "SystemZInstrInfo.td" include "SystemZInstrVector.td" include "SystemZInstrFP.td" +include "SystemZInstrHFP.td" +include "SystemZInstrDFP.td" +include "SystemZInstrSystem.td" def SystemZInstrInfo : InstrInfo {} diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZExpandPseudo.cpp index 92ce8089c24f1..d02db9a617a33 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -74,7 +74,7 @@ bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZFeatures.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZFeatures.td index 7bfa378aa85c2..fda9c30fe3fcc 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZFeatures.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZFeatures.td @@ -68,11 +68,21 @@ def FeaturePopulationCount : SystemZFeature< "Assume that the population-count facility is installed" >; +def FeatureMessageSecurityAssist3 : SystemZFeature< + "message-security-assist-extension3", "MessageSecurityAssist3", + "Assume that the message-security-assist extension facility 3 is installed" +>; + def FeatureMessageSecurityAssist4 : SystemZFeature< "message-security-assist-extension4", "MessageSecurityAssist4", "Assume that the message-security-assist extension facility 4 is installed" >; +def FeatureResetReferenceBitsMultiple : SystemZFeature< + "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", + "Assume that the reset-reference-bits-multiple facility is installed" +>; + def Arch9NewFeatures : SystemZFeatureList<[ FeatureDistinctOps, FeatureFastSerialization, @@ -81,7 +91,9 @@ def Arch9NewFeatures : SystemZFeatureList<[ FeatureInterlockedAccess1, FeatureLoadStoreOnCond, FeaturePopulationCount, - FeatureMessageSecurityAssist4 + FeatureMessageSecurityAssist3, + FeatureMessageSecurityAssist4, + FeatureResetReferenceBitsMultiple ]>; //===----------------------------------------------------------------------===// @@ -115,12 +127,24 @@ def FeatureTransactionalExecution : SystemZFeature< "Assume that the transactional-execution facility is installed" >; +def FeatureDFPZonedConversion : SystemZFeature< + "dfp-zoned-conversion", "DFPZonedConversion", + "Assume that the DFP zoned-conversion facility is installed" +>; + +def FeatureEnhancedDAT2 : SystemZFeature< + "enhanced-dat-2", "EnhancedDAT2", + "Assume that the enhanced-DAT facility 2 is installed" +>; + def Arch10NewFeatures : SystemZFeatureList<[ FeatureExecutionHint, FeatureLoadAndTrap, FeatureMiscellaneousExtensions, FeatureProcessorAssist, - FeatureTransactionalExecution + FeatureTransactionalExecution, + FeatureDFPZonedConversion, + FeatureEnhancedDAT2 ]>; //===----------------------------------------------------------------------===// @@ -144,6 +168,11 @@ def FeatureMessageSecurityAssist5 : SystemZFeature< "Assume that the message-security-assist extension facility 5 is installed" >; +def FeatureDFPPackedConversion : SystemZFeature< + "dfp-packed-conversion", "DFPPackedConversion", + "Assume that the DFP packed-conversion facility is installed" +>; + def FeatureVector : SystemZFeature< "vector", "Vector", "Assume that the vectory facility is installed" @@ -154,9 +183,62 @@ def Arch11NewFeatures : SystemZFeatureList<[ FeatureLoadAndZeroRightmostByte, FeatureLoadStoreOnCond2, FeatureMessageSecurityAssist5, + FeatureDFPPackedConversion, FeatureVector ]>; +//===----------------------------------------------------------------------===// +// +// New features added in the Twelvth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureMiscellaneousExtensions2 : SystemZFeature< + "miscellaneous-extensions-2", "MiscellaneousExtensions2", + "Assume that the miscellaneous-extensions facility 2 is installed" +>; + +def FeatureGuardedStorage : SystemZFeature< + "guarded-storage", "GuardedStorage", + "Assume that the guarded-storage facility is installed" +>; + +def FeatureMessageSecurityAssist7 : SystemZFeature< + "message-security-assist-extension7", "MessageSecurityAssist7", + "Assume that the message-security-assist extension facility 7 is installed" +>; + +def FeatureMessageSecurityAssist8 : SystemZFeature< + "message-security-assist-extension8", "MessageSecurityAssist8", + "Assume that the message-security-assist extension facility 8 is installed" +>; + +def FeatureVectorEnhancements1 : SystemZFeature< + "vector-enhancements-1", "VectorEnhancements1", + "Assume that the vector enhancements facility 1 is installed" +>; +def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">; + +def FeatureVectorPackedDecimal : SystemZFeature< + "vector-packed-decimal", "VectorPackedDecimal", + "Assume that the vector packed decimal facility is installed" +>; + +def FeatureInsertReferenceBitsMultiple : SystemZFeature< + "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", + "Assume that the insert-reference-bits-multiple facility is installed" +>; + +def Arch12NewFeatures : SystemZFeatureList<[ + FeatureMiscellaneousExtensions2, + FeatureGuardedStorage, + FeatureMessageSecurityAssist7, + FeatureMessageSecurityAssist8, + FeatureVectorEnhancements1, + FeatureVectorPackedDecimal, + FeatureInsertReferenceBitsMultiple +]>; + //===----------------------------------------------------------------------===// // // Cumulative supported and unsupported feature sets @@ -171,9 +253,13 @@ def Arch10SupportedFeatures : SystemZFeatureAdd; def Arch11SupportedFeatures : SystemZFeatureAdd; +def Arch12SupportedFeatures + : SystemZFeatureAdd; -def Arch11UnsupportedFeatures +def Arch12UnsupportedFeatures : SystemZFeatureList<[]>; +def Arch11UnsupportedFeatures + : SystemZFeatureAdd; def Arch10UnsupportedFeatures : SystemZFeatureAdd; def Arch9UnsupportedFeatures diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZFrameLowering.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZFrameLowering.cpp index a28a91e834f61..0cb2b5a14ce73 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -277,8 +277,21 @@ void SystemZFrameLowering:: processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); - uint64_t MaxReach = (MFFrame.estimateStackSize(MF) + - SystemZMC::CallFrameSize * 2); + // Get the size of our stack frame to be allocated ... + uint64_t StackSize = (MFFrame.estimateStackSize(MF) + + SystemZMC::CallFrameSize); + // ... and the maximum offset we may need to reach into the + // caller's frame to access the save area or stack arguments. + int64_t MaxArgOffset = SystemZMC::CallFrameSize; + for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) + if (MFFrame.getObjectOffset(I) >= 0) { + int64_t ArgOffset = SystemZMC::CallFrameSize + + MFFrame.getObjectOffset(I) + + MFFrame.getObjectSize(I); + MaxArgOffset = std::max(MaxArgOffset, ArgOffset); + } + + uint64_t MaxReach = StackSize + MaxArgOffset; if (!isUInt<12>(MaxReach)) { // We may need register scavenging slots if some parts of the frame // are outside the reach of an unsigned 12-bit displacement. diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index fe4b52b515e0c..73a1036f88e0c 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -26,7 +26,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" // This is the limit of processor resource usage at which the // scheduler should try to look for other instructions (not using the diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.h b/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.h index 8fa54ee434cf2..0c755c9ad1b9a 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -25,10 +25,10 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H #include "SystemZSubtarget.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.cpp index 235e095f00100..2d916d2e15214 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -101,7 +101,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); } - addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + if (Subtarget.hasVectorEnhancements1()) + addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); + else + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); if (Subtarget.hasVector()) { addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); @@ -316,7 +319,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::AND, VT, Legal); setOperationAction(ISD::OR, VT, Legal); setOperationAction(ISD::XOR, VT, Legal); - setOperationAction(ISD::CTPOP, VT, Custom); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::CTPOP, VT, Legal); + else + setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); @@ -414,10 +420,60 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FROUND, MVT::v2f64, Legal); } + // The vector enhancements facility 1 has instructions for these. + if (Subtarget.hasVectorEnhancements1()) { + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FABS, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMINNAN, MVT::f64, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); + setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f128, Legal); + setOperationAction(ISD::FMINNUM, MVT::f128, Legal); + setOperationAction(ISD::FMINNAN, MVT::f128, Legal); + } + // We have fused multiply-addition for f32 and f64 but not f128. setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMA, MVT::f64, Legal); - setOperationAction(ISD::FMA, MVT::f128, Expand); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FMA, MVT::f128, Legal); + else + setOperationAction(ISD::FMA, MVT::f128, Expand); + + // We don't have a copysign instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant @@ -425,6 +481,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + // We don't have extending load instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + } + // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); @@ -489,7 +551,7 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { case MVT::f64: return true; case MVT::f128: - return false; + return Subtarget.hasVectorEnhancements1(); default: break; } @@ -1322,11 +1384,6 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); } -SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad( - SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const { - return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); -} - // Return true if Op is an intrinsic node with chain that returns the CC value // as its only (other) argument. Provide the associated SystemZISD opcode and // the mask of valid CC values if so. @@ -1467,21 +1524,25 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { return true; case Intrinsic::s390_vfcedbs: + case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchdbs: + case Intrinsic::s390_vfchsbs: Opcode = SystemZISD::VFCMPHS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchedbs: + case Intrinsic::s390_vfchesbs: Opcode = SystemZISD::VFCMPHES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vftcidb: + case Intrinsic::s390_vftcisb: Opcode = SystemZISD::VFTCI; CCValid = SystemZ::CCMASK_VCMP; return true; @@ -2059,6 +2120,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SHL && isSimpleShift(NewC.Op0, ShiftVal) && + (MaskVal >> ShiftVal != 0) && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal >> ShiftVal, CmpVal >> ShiftVal, @@ -2068,6 +2130,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, } else if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SRL && isSimpleShift(NewC.Op0, ShiftVal) && + (MaskVal << ShiftVal != 0) && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal << ShiftVal, CmpVal << ShiftVal, @@ -2227,15 +2290,12 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, // Lower a binary operation that produces two VT results, one in each // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, -// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation -// on the extended Op0 and (unextended) Op1. Store the even register result +// and Opcode performs the GR128 operation. Store the even register result // in Even and the odd register result in Odd. static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - unsigned Extend, unsigned Opcode, SDValue Op0, - SDValue Op1, SDValue &Even, SDValue &Odd) { - SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); - SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, - SDValue(In128, 0), Op1); + unsigned Opcode, SDValue Op0, SDValue Op1, + SDValue &Even, SDValue &Odd) { + SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); bool Is32Bit = is32Bit(VT); Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); @@ -2322,11 +2382,15 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, // producing a result of type VT. -static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, - EVT VT, SDValue CmpOp0, SDValue CmpOp1) { - // There is no hardware support for v4f32, so extend the vector into - // two v2f64s and compare those. - if (CmpOp0.getValueType() == MVT::v4f32) { +SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, + SDValue CmpOp1) const { + // There is no hardware support for v4f32 (unless we have the vector + // enhancements facility 1), so extend the vector into two v2f64s + // and compare those. + if (CmpOp0.getValueType() == MVT::v4f32 && + !Subtarget.hasVectorEnhancements1()) { SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); @@ -2340,9 +2404,11 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing // an integer mask of type VT. -static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - ISD::CondCode CC, SDValue CmpOp0, - SDValue CmpOp1) { +SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, + const SDLoc &DL, EVT VT, + ISD::CondCode CC, + SDValue CmpOp0, + SDValue CmpOp1) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); bool Invert = false; SDValue Cmp; @@ -2350,6 +2416,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // Handle tests for order using (or (ogt y x) (oge x y)). case ISD::SETUO: Invert = true; + LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); @@ -2361,6 +2428,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // Handle <> tests using (or (ogt y x) (ogt x y)). case ISD::SETUEQ: Invert = true; + LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); @@ -2964,8 +3032,14 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, // We define this so that it can be used for constant division. lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + else if (Subtarget.hasMiscellaneousExtensions2()) + // SystemZISD::SMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::SMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else { - // Do a full 128-bit multiplication based on UMUL_LOHI64: + // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: // // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) // @@ -2983,10 +3057,10 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. SMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::SMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, LL, RL, Ops[1], Ops[0]); SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); @@ -3007,10 +3081,10 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. UMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::UMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3021,24 +3095,19 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Opcode; - // We use DSGF for 32-bit division. - if (is32Bit(VT)) { + // We use DSGF for 32-bit division. This means the first operand must + // always be 64-bit, and the second operand should be 32-bit whenever + // that is possible, to improve performance. + if (is32Bit(VT)) Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); - Opcode = SystemZISD::SDIVREM32; - } else if (DAG.ComputeNumSignBits(Op1) > 32) { + else if (DAG.ComputeNumSignBits(Op1) > 32) Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); - Opcode = SystemZISD::SDIVREM32; - } else - Opcode = SystemZISD::SDIVREM64; - // DSG(F) takes a 64-bit dividend, so the even register in the GR128 - // input is "don't care". The instruction returns the remainder in - // the even register and the quotient in the odd register. + // DSG(F) returns the remainder in the even register and the + // quotient in the odd register. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, - Op0, Op1, Ops[1], Ops[0]); + lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3047,16 +3116,11 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - // DL(G) uses a double-width dividend, so we need to clear the even - // register in the GR128 input. The instruction returns the remainder - // in the even register and the quotient in the odd register. + // DL(G) returns the remainder in the even register and the + // quotient in the odd register. SDValue Ops[2]; - if (is32Bit(VT)) - lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); - else - lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3196,13 +3260,13 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SDLoc DL(Op); AtomicOrdering FenceOrdering = static_cast( cast(Op.getOperand(1))->getZExtValue()); - SynchronizationScope FenceScope = static_cast( + SyncScope::ID FenceSSID = static_cast( cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && - FenceScope == CrossThread) { + FenceSSID == SyncScope::System) { return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, Op.getOperand(0)), 0); @@ -3212,12 +3276,15 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } -// Op is an atomic load. Lower it into a normal volatile load. +// Op is an atomic load. Lower it into a serialization followed +// by a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); + SDValue Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), + MVT::Other, Node->getChain()), 0); return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), - Node->getChain(), Node->getBasePtr(), + Chain, Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); } @@ -4189,12 +4256,20 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); + // If all elements are loads, use VLREP/VLEs (below). + bool AllLoads = true; + for (auto Elem : Elems) + if (Elem.getOpcode() != ISD::LOAD || cast(Elem)->isIndexed()) { + AllLoads = false; + break; + } + // The best way of building a v2i64 from two i64s is to use VLVGP. - if (VT == MVT::v2i64) + if (VT == MVT::v2i64 && !AllLoads) return joinDwords(DAG, DL, Elems[0], Elems[1]); // Use a 64-bit merge high to combine two doubles. - if (VT == MVT::v2f64) + if (VT == MVT::v2f64 && !AllLoads) return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); // Build v4f32 values directly from the FPRs: @@ -4204,7 +4279,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // // V VMRHG // - if (VT == MVT::v4f32) { + if (VT == MVT::v4f32 && !AllLoads) { SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); // Avoid unnecessary undefs by reusing the other operand. @@ -4246,23 +4321,37 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); Result = DAG.getBuildVector(VT, DL, Constants); } else { - // Otherwise try to use VLVGP to start the sequence in order to + // Otherwise try to use VLREP or VLVGP to start the sequence in order to // avoid a false dependency on any previous contents of the vector - // register. This only makes sense if one of the associated elements - // is defined. - unsigned I1 = NumElements / 2 - 1; - unsigned I2 = NumElements - 1; - bool Def1 = !Elems[I1].isUndef(); - bool Def2 = !Elems[I2].isUndef(); - if (Def1 || Def2) { - SDValue Elem1 = Elems[Def1 ? I1 : I2]; - SDValue Elem2 = Elems[Def2 ? I2 : I1]; - Result = DAG.getNode(ISD::BITCAST, DL, VT, - joinDwords(DAG, DL, Elem1, Elem2)); - Done[I1] = true; - Done[I2] = true; - } else - Result = DAG.getUNDEF(VT); + // register. + + // Use a VLREP if at least one element is a load. + unsigned LoadElIdx = UINT_MAX; + for (unsigned I = 0; I < NumElements; ++I) + if (Elems[I].getOpcode() == ISD::LOAD && + cast(Elems[I])->isUnindexed()) { + LoadElIdx = I; + break; + } + if (LoadElIdx != UINT_MAX) { + Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]); + Done[LoadElIdx] = true; + } else { + // Try to use VLVGP. + unsigned I1 = NumElements / 2 - 1; + unsigned I2 = NumElements - 1; + bool Def1 = !Elems[I1].isUndef(); + bool Def2 = !Elems[I2].isUndef(); + if (Def1 || Def2) { + SDValue Elem1 = Elems[Def1 ? I1 : I2]; + SDValue Elem2 = Elems[Def2 ? I2 : I1]; + Result = DAG.getNode(ISD::BITCAST, DL, VT, + joinDwords(DAG, DL, Elem1, Elem2)); + Done[I1] = true; + Done[I2] = true; + } else + Result = DAG.getUNDEF(VT); + } } // Use VLVGx to insert the other elements. @@ -4647,11 +4736,10 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); OPCODE(POPCNT); - OPCODE(UMUL_LOHI64); - OPCODE(SDIVREM32); - OPCODE(SDIVREM64); - OPCODE(UDIVREM32); - OPCODE(UDIVREM64); + OPCODE(SMUL_LOHI); + OPCODE(UMUL_LOHI); + OPCODE(SDIVREM); + OPCODE(UDIVREM); OPCODE(MVC); OPCODE(MVC_LOOP); OPCODE(NC); @@ -4666,7 +4754,6 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(STRCMP); OPCODE(SEARCH_STRING); OPCODE(IPM); - OPCODE(SERIALIZE); OPCODE(MEMBARRIER); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); @@ -5345,12 +5432,24 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; + + // ISel pattern matching also adds a load memory operand of the same + // address, so take special care to find the storing memory operand. + MachineMemOperand *MMO = nullptr; + for (auto *I : MI.memoperands()) + if (I->isStore()) { + MMO = I; + break; + } + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) - .addReg(SrcReg) - .add(Base) - .addImm(Disp) - .addImm(CCValid) - .addImm(CCMask); + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addImm(CCValid) + .addImm(CCMask) + .addMemOperand(MMO); + MI.eraseFromParent(); return MBB; } @@ -5745,14 +5844,12 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, return DoneMBB; } -// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true +// Emit an extension from a GR64 to a GR128. ClearEven is true // if the high register of the GR128 value must be cleared or false if -// it's "don't care". SubReg is subreg_l32 when extending a GR32 -// and subreg_l64 when extending a GR64. +// it's "don't care". MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, - bool ClearEven, - unsigned SubReg) const { + bool ClearEven) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); @@ -5775,7 +5872,7 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, In128 = NewIn128; } BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) - .addReg(In128).addReg(Src).addImm(SubReg); + .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); MI.eraseFromParent(); return MBB; @@ -5928,7 +6025,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( .addImm(DestDisp) .addImm(ThisLength) .add(SrcBase) - .addImm(SrcDisp); + .addImm(SrcDisp) + ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; @@ -6099,6 +6197,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: + case SystemZ::SelectVR128: return emitSelect(MI, MBB, 0); case SystemZ::CondStore8Mux: @@ -6138,12 +6237,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::CondStoreF64Inv: return emitCondStore(MI, MBB, SystemZ::STD, 0, true); - case SystemZ::AEXT128_64: - return emitExt128(MI, MBB, false, SystemZ::subreg_l64); - case SystemZ::ZEXT128_32: - return emitExt128(MI, MBB, true, SystemZ::subreg_l32); - case SystemZ::ZEXT128_64: - return emitExt128(MI, MBB, true, SystemZ::subreg_l64); + case SystemZ::AEXT128: + return emitExt128(MI, MBB, false); + case SystemZ::ZEXT128: + return emitExt128(MI, MBB, true); case SystemZ::ATOMIC_SWAPW: return emitAtomicLoadBinary(MI, MBB, 0, 0); diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.h b/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.h index 79c8c4d92669f..abe8b7233e60c 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.h +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZISelLowering.h @@ -86,14 +86,12 @@ enum NodeType : unsigned { // Count number of bits set in operand 0 per byte. POPCNT, - // Wrappers around the ISD opcodes of the same name. The output and - // first input operands are GR128s. The trailing numbers are the - // widths of the second operand in bits. - UMUL_LOHI64, - SDIVREM32, - SDIVREM64, - UDIVREM32, - UDIVREM64, + // Wrappers around the ISD opcodes of the same name. The output is GR128. + // Input operands may be GR64 or GR32, depending on the instruction. + SMUL_LOHI, + UMUL_LOHI, + SDIVREM, + UDIVREM, // Use a series of MVCs to copy bytes from one memory location to another. // The operands are: @@ -139,9 +137,6 @@ enum NodeType : unsigned { // Store the CC value in bits 29 and 28 of an integer. IPM, - // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) - SERIALIZE, - // Compiler barrier only; generate a no-op. MEMBARRIER, @@ -471,8 +466,6 @@ class SystemZTargetLowering : public TargetLowering { const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; - SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, - SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; ISD::NodeType getExtendForAtomicOps() const override { @@ -487,6 +480,12 @@ class SystemZTargetLowering : public TargetLowering { const SystemZSubtarget &Subtarget; // Implement LowerOperation for individual opcodes. + SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, SDValue CmpOp1) const; + SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, + EVT VT, ISD::CondCode CC, + SDValue CmpOp0, SDValue CmpOp1) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; @@ -522,7 +521,6 @@ class SystemZTargetLowering : public TargetLowering { unsigned Opcode) const; SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; @@ -568,7 +566,7 @@ class SystemZTargetLowering : public TargetLowering { unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, - bool ClearEven, unsigned SubReg) const; + bool ClearEven) const; MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode, unsigned BitSize, diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrDFP.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrDFP.td new file mode 100644 index 0000000000000..08ab2d7bbc523 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrDFP.td @@ -0,0 +1,231 @@ +//==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ decimal floating-point +// arithmetic. These instructions are inot currently used for code generation, +// are provided for use with the assembler and disassembler only. If LLVM +// ever supports decimal floating-point types (_Decimal64 etc.), they can +// also be used for code generation for those types. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load and test. +let Defs = [CC] in { + def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>; + def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>; +} + + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations. The destination +// of LDXTR is a 128-bit value, but only the first register of the pair is used. +def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>; +def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>; + +// Extend floating-point values to wider representations. +def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>; +def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>; + +// Convert a signed integer value to a floating-point one. +def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>; +def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>; +let Predicates = [FeatureFPExtension] in { + def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>; + def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>; + def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>; + def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>; +} + +// Convert an unsigned integer value to a floating-point one. +let Predicates = [FeatureFPExtension] in { + def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>; + def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>; + def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>; + def CXLFTR : TernaryRRFe<"cxlftr", 0xB95B, FP128, GR32>; +} + +// Convert a floating-point value to a signed integer value. +let Defs = [CC] in { + def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>; + def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>; + let Predicates = [FeatureFPExtension] in { + def CGDTRA : TernaryRRFe<"cgdtra", 0xB3E1, GR64, FP64>; + def CGXTRA : TernaryRRFe<"cgxtra", 0xB3E9, GR64, FP128>; + def CFDTR : TernaryRRFe<"cfdtr", 0xB941, GR32, FP64>; + def CFXTR : TernaryRRFe<"cfxtr", 0xB949, GR32, FP128>; + } +} + +// Convert a floating-point value to an unsigned integer value. +let Defs = [CC] in { + let Predicates = [FeatureFPExtension] in { + def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>; + def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>; + def CLFDTR : TernaryRRFe<"clfdtr", 0xB943, GR32, FP64>; + def CLFXTR : TernaryRRFe<"clfxtr", 0xB94B, GR32, FP128>; + } +} + +// Convert a packed value to a floating-point one. +def CDSTR : UnaryRRE<"cdstr", 0xB3F3, null_frag, FP64, GR64>; +def CXSTR : UnaryRRE<"cxstr", 0xB3FB, null_frag, FP128, GR128>; +def CDUTR : UnaryRRE<"cdutr", 0xB3F2, null_frag, FP64, GR64>; +def CXUTR : UnaryRRE<"cxutr", 0xB3FA, null_frag, FP128, GR128>; + +// Convert a floating-point value to a packed value. +def CSDTR : BinaryRRFd<"csdtr", 0xB3E3, GR64, FP64>; +def CSXTR : BinaryRRFd<"csxtr", 0xB3EB, GR128, FP128>; +def CUDTR : UnaryRRE<"cudtr", 0xB3E2, null_frag, GR64, FP64>; +def CUXTR : UnaryRRE<"cuxtr", 0xB3EA, null_frag, GR128, FP128>; + +// Convert from/to memory values in the zoned format. +let Predicates = [FeatureDFPZonedConversion] in { + def CDZT : BinaryRSL<"cdzt", 0xEDAA, FP64>; + def CXZT : BinaryRSL<"cxzt", 0xEDAB, FP128>; + def CZDT : StoreBinaryRSL<"czdt", 0xEDA8, FP64>; + def CZXT : StoreBinaryRSL<"czxt", 0xEDA9, FP128>; +} + +// Convert from/to memory values in the packed format. +let Predicates = [FeatureDFPPackedConversion] in { + def CDPT : BinaryRSL<"cdpt", 0xEDAE, FP64>; + def CXPT : BinaryRSL<"cxpt", 0xEDAF, FP128>; + def CPDT : StoreBinaryRSL<"cpdt", 0xEDAC, FP64>; + def CPXT : StoreBinaryRSL<"cpxt", 0xEDAD, FP128>; +} + +// Perform floating-point operation. +let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in + def PFPO : SideEffectInherentE<"pfpo", 0x010A>; + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// Round to an integer, with the second operand (M3) specifying the rounding +// mode. M4 can be set to 4 to suppress detection of inexact conditions. +def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>; +def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>; + +// Extract biased exponent. +def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>; +def EEXTR : UnaryRRE<"eextr", 0xB3ED, null_frag, FP128, FP128>; + +// Extract significance. +def ESDTR : UnaryRRE<"esdtr", 0xB3E7, null_frag, FP64, FP64>; +def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Defs = [CC] in { + let isCommutable = 1 in { + def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>; + def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>; + } + let Predicates = [FeatureFPExtension] in { + def ADTRA : TernaryRRFa<"adtra", 0xB3D2, FP64, FP64, FP64>; + def AXTRA : TernaryRRFa<"axtra", 0xB3DA, FP128, FP128, FP128>; + } +} + +// Subtraction. +let Defs = [CC] in { + def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>; + def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>; + let Predicates = [FeatureFPExtension] in { + def SDTRA : TernaryRRFa<"sdtra", 0xB3D3, FP64, FP64, FP64>; + def SXTRA : TernaryRRFa<"sxtra", 0xB3DB, FP128, FP128, FP128>; + } +} + +// Multiplication. +let isCommutable = 1 in { + def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>; + def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>; +} +let Predicates = [FeatureFPExtension] in { + def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>; + def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>; +} + +// Division. +def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>; +def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>; +let Predicates = [FeatureFPExtension] in { + def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>; + def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>; +} + +// Quantize. +def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>; +def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>; + +// Reround. +def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>; +def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>; + +// Shift significand left/right. +def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>; +def SLXT : BinaryRXF<"slxt", 0xED48, null_frag, FP128, FP128, null_frag, 0>; +def SRDT : BinaryRXF<"srdt", 0xED41, null_frag, FP64, FP64, null_frag, 0>; +def SRXT : BinaryRXF<"srxt", 0xED49, null_frag, FP128, FP128, null_frag, 0>; + +// Insert biased exponent. +def IEDTR : BinaryRRFb<"iedtr", 0xB3F6, null_frag, FP64, FP64, FP64>; +def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +// Compare. +let Defs = [CC] in { + def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>; + def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>; +} + +// Compare and signal. +let Defs = [CC] in { + def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>; + def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>; +} + +// Compare biased exponent. +let Defs = [CC] in { + def CEDTR : CompareRRE<"cedtr", 0xB3F4, null_frag, FP64, FP64>; + def CEXTR : CompareRRE<"cextr", 0xB3FC, null_frag, FP128, FP128>; +} + +// Test Data Class. +let Defs = [CC] in { + def TDCET : TestRXE<"tdcet", 0xED50, null_frag, FP32>; + def TDCDT : TestRXE<"tdcdt", 0xED54, null_frag, FP64>; + def TDCXT : TestRXE<"tdcxt", 0xED58, null_frag, FP128>; +} + +// Test Data Group. +let Defs = [CC] in { + def TDGET : TestRXE<"tdget", 0xED51, null_frag, FP32>; + def TDGDT : TestRXE<"tdgdt", 0xED55, null_frag, FP64>; + def TDGXT : TestRXE<"tdgxt", 0xED59, null_frag, FP128>; +} + diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFP.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFP.td index 364b81f98eed6..02aeaadad0d9a 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFP.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFP.td @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. -def SelectF32 : SelectWrapper; -def SelectF64 : SelectWrapper; -def SelectF128 : SelectWrapper; +def SelectF32 : SelectWrapper; +def SelectF64 : SelectWrapper; +let Predicates = [FeatureNoVectorEnhancements1] in + def SelectF128 : SelectWrapper; +let Predicates = [FeatureVectorEnhancements1] in + def SelectVR128 : SelectWrapper; defm CondStoreF32 : CondStores; @@ -69,8 +72,9 @@ let Defs = [CC], usesCustomInserter = 1 in { let Predicates = [FeatureVector] in { defm : CompareZeroFP; defm : CompareZeroFP; - defm : CompareZeroFP; } +let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in + defm : CompareZeroFP; // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; @@ -83,8 +87,12 @@ let isCodeGenOnly = 1 in { } // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP32:$src1, FP128:$src2), - (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP64 result. let isCodeGenOnly = 1 in @@ -92,8 +100,12 @@ let isCodeGenOnly = 1 in def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>; // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP64:$src1, FP128:$src2), - (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP128 result. Use "upper" as the high half and leave // the low half as-is. @@ -101,12 +113,14 @@ class CopySign128 : Pat<(fcopysign FP128:$src1, cls:$src2), (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; -def : CopySign128; -def : CopySign128; -def : CopySign128; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : CopySign128; + def : CopySign128; + def : CopySign128; +} defm LoadStoreF32 : MVCLoadStore; defm LoadStoreF64 : MVCLoadStore; @@ -121,7 +135,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; // For z13 we prefer LDE over LE to avoid partial register dependencies. - def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; + let isCodeGenOnly = 1 in + def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -165,20 +180,32 @@ def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, Requires<[FeatureFPExtension]>; -def : Pat<(f32 (fpround FP128:$src)), - (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; -def : Pat<(f64 (fpround FP128:$src)), - (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f32 (fpround FP128:$src)), + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; + def : Pat<(f64 (fpround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +} // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>; +def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; + def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; +} // Extend memory floating-point values to wider representations. def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; -def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; -def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; +def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (extloadf32 bdxaddr12only:$src)), + (LXEB bdxaddr12only:$src)>; + def : Pat<(f128 (extloadf64 bdxaddr12only:$src)), + (LXDB bdxaddr12only:$src)>; +} // Convert a signed integer register value to a floating-point one. def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; @@ -425,30 +452,32 @@ def : Pat<(fmul (f64 (fpextend FP32:$src1)), // f128 multiplication of two FP64 registers. def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), - (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), - FP64:$src1, subreg_h64), FP64:$src2)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), - (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), - bdxaddr12only:$addr)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), + bdxaddr12only:$addr)>; // Fused multiply-add. -def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>; -def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>; +def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; +def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>; -def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>; -def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>; +def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>; +def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>; // Fused multiply-subtract. -def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>; -def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>; +def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>; +def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>; -def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>; -def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>; +def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>; +def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>; // Division. def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFormats.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFormats.td index a37da28078540..033a0a879d37d 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrFormats.td @@ -527,6 +527,22 @@ class InstRRFc op, dag outs, dag ins, string asmstr, list pattern> let Inst{3-0} = R2; } +class InstRRFd op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M4; + + let Inst{31-16} = op; + let Inst{15-12} = 0; + let Inst{11-8} = M4; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + class InstRRFe op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; @@ -725,6 +741,22 @@ class InstRSLa op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstRSLb op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<24> BDL2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-16} = BDL2; + let Inst{15-12} = R1; + let Inst{11-8} = M3; + let Inst{7-0} = op{7-0}; +} + class InstRSYa op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -1059,6 +1091,94 @@ class InstVRIe op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVRIf op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M5; + let Inst{19-12} = I4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIg op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<8> I3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = I4; + let Inst{23-20} = M5; + let Inst{19-12} = I3; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIh op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> I2; + bits<4> I3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = I3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIi op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> R2; + bits<8> I3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R2; + let Inst{31-24} = 0; + let Inst{23-20} = M4; + let Inst{19-12} = I3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + // Depending on the instruction mnemonic, certain bits may be or-ed into // the M4 value provided as explicit operand. These are passed as m4or. class InstVRRa op, dag outs, dag ins, string asmstr, list pattern, @@ -1227,6 +1347,67 @@ class InstVRRf op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVRRg op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = V1{3-0}; + let Inst{31-12} = 0; + let Inst{11} = 0; + let Inst{10} = V1{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRh op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = V1{3-0}; + let Inst{31-28} = V2{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M3; + let Inst{19-12} = 0; + let Inst{11} = 0; + let Inst{10} = V1{4}; + let Inst{9} = V2{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRi op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<5> V2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = 0; + let Inst{23-20} = M3; + let Inst{19-12} = 0; + let Inst{11} = 0; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + class InstVRSa op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -1289,6 +1470,25 @@ class InstVRSc op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVRSd op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<4> R3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = R3; + let Inst{31-16} = BD2; + let Inst{15-12} = V1{3-0}; + let Inst{11-9} = 0; + let Inst{8} = V1{4}; + let Inst{7-0} = op{7-0}; +} + class InstVRV op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -1326,6 +1526,24 @@ class InstVRX op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVSI op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<8> I3; + + let Inst{47-40} = op{15-8}; + let Inst{39-32} = I3; + let Inst{31-16} = BD2; + let Inst{15-12} = V1{3-0}; + let Inst{11-9} = 0; + let Inst{8} = V1{4}; + let Inst{7-0} = op{7-0}; +} + //===----------------------------------------------------------------------===// // Instruction classes for .insn directives //===----------------------------------------------------------------------===// @@ -1878,6 +2096,25 @@ class FixedCondBranchRX opcode> let M1 = V.ccmask; } +class CondBranchRXY opcode> + : InstRXYb { + let CCMaskFirst = 1; +} + +class AsmCondBranchRXY opcode> + : InstRXYb; + +class FixedCondBranchRXY opcode, + SDPatternOperator operator = null_frag> + : InstRXYb { + let isAsmParserOnly = V.alternate; + let M1 = V.ccmask; +} + class CmpBranchRIEa opcode, RegisterOperand cls, Immediate imm> : InstRIEa opcode, let AccessBytes = bytes; } +class StoreLengthVRSd opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVRSd { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreLengthVSI opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVSI { + let mayStore = 1; + let AccessBytes = bytes; +} + class StoreMultipleRS opcode, RegisterOperand cls, AddressingMode mode = bdaddr12only> : InstRSa opcode, SDPatternOperator operator, let OpType = "reg"; } +class UnaryTiedRRE opcode, RegisterOperand cls> + : InstRRE { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R2 = 0; +} + class UnaryMemRRFc opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRFc opcode, : InstRXa; +class SideEffectBinaryRXY opcode, + RegisterOperand cls> + : InstRXYa; + class SideEffectBinaryRILPC opcode, RegisterOperand cls> : InstRILb opcode, let AddedComplexity = 7; } +class SideEffectBinaryRRE opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE; + +class SideEffectBinaryRRFa opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFa { + let R3 = 0; + let M4 = 0; +} + +class SideEffectBinaryRRFc opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc { + let M3 = 0; +} + class SideEffectBinaryIE opcode, Immediate imm1, Immediate imm2> : InstIE opcode> : InstSSf; +class SideEffectBinarySSE opcode> + : InstSSE; + class SideEffectBinaryMemMemRR opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRR opcode, SDPatternOperator operator, let DisableEncoding = "$R1src"; } +class BinaryRRD opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRD { + let OpKey = mnemonic#cls; + let OpType = "reg"; +} + class BinaryRRFa opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2, RegisterOperand cls3> @@ -2808,6 +3109,11 @@ multiclass BinaryMemRRFcOpt opcode, def Opt : UnaryMemRRFc; } +class BinaryRRFd opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFd; + class BinaryRRFe opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRFe opcode1, bits<16> opcode2, } } +class BinaryRSL opcode, RegisterOperand cls> + : InstRSLb { + let mayLoad = 1; +} + class BinaryRX opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> @@ -2987,6 +3300,18 @@ class BinaryRXE opcode, SDPatternOperator operator, let M3 = 0; } +class BinaryRXF opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2, + SDPatternOperator load, bits<5> bytes> + : InstRXF { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; +} + class BinaryRXY opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> @@ -3091,6 +3416,11 @@ class BinaryVRIeFloatGeneric opcode> (ins VR128:$V2, imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5), mnemonic#"\t$V1, $V2, $I3, $M4, $M5", []>; +class BinaryVRIh opcode> + : InstVRIh; + class BinaryVRRa opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0> : InstVRRa opcode, SDPatternOperator operator, mnemonic#"\t$V1, $R2, $R3", [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>; +class BinaryVRRi opcode, RegisterOperand cls> + : InstVRRi; + class BinaryVRSa opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type> : InstVRSa opcode> (ins VR128:$V3, shift12only:$BD2, imm32zx4: $M4), mnemonic#"\t$R1, $V3, $BD2, $M4", []>; +class BinaryVRSd opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVRSd { + let mayLoad = 1; + let AccessBytes = bytes; +} + class BinaryVRX opcode, SDPatternOperator operator, TypedReg tr, bits<5> bytes> : InstVRX rsOpcode, } } +class StoreBinaryRSL opcode, RegisterOperand cls> + : InstRSLb { + let mayStore = 1; +} + +class BinaryVSI opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVSI { + let mayLoad = 1; + let AccessBytes = bytes; +} + class StoreBinaryVRV opcode, bits<5> bytes, Immediate index> : InstVRV opcode> let M5 = 0; } +class CompareVRRh opcode> + : InstVRRh { + let isCompare = 1; +} + class TestRXE opcode, SDPatternOperator operator, RegisterOperand cls> : InstRXE opcode> let mayLoad = 1; } +class TestVRRg opcode> + : InstVRRg; + class SideEffectTernarySSc opcode> : InstSSc; +class SideEffectTernaryRRFa opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa { + let M4 = 0; +} + +class SideEffectTernaryRRFb opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb { + let M4 = 0; +} + class SideEffectTernaryMemMemMemRRFb opcode, RegisterOperand cls1, RegisterOperand cls2, @@ -3558,6 +3943,13 @@ class SideEffectTernaryRRFc opcode, : InstRRFc; +multiclass SideEffectTernaryRRFcOpt opcode, + RegisterOperand cls1, + RegisterOperand cls2> { + def "" : SideEffectTernaryRRFc; + def Opt : SideEffectBinaryRRFc; +} + class SideEffectTernaryMemMemRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, Immediate imm> @@ -3581,6 +3973,12 @@ class SideEffectTernarySSF opcode, (ins bdaddr12only:$BD1, bdaddr12only:$BD2, cls:$R3), mnemonic#"\t$BD1, $BD2, $R3", []>; +class TernaryRRFa opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa; + class TernaryRRFb opcode, RegisterOperand cls1, RegisterOperand cls2, RegisterOperand cls3> @@ -3597,11 +3995,11 @@ class TernaryRRFe opcode, RegisterOperand cls1, (ins imm32zx4:$M3, cls2:$R2, imm32zx4:$M4), mnemonic#"\t$R1, $M3, $R2, $M4", []>; -class TernaryRRD opcode, - SDPatternOperator operator, RegisterOperand cls> - : InstRRD opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRD { + [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, cls2:$R2))]> { let OpKey = mnemonic#cls; let OpType = "reg"; let Constraints = "$R1 = $R1src"; @@ -3642,6 +4040,18 @@ multiclass TernaryRSPair rsOpcode, bits<16> rsyOpcode, } } +class SideEffectTernaryRS opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSa; + +class SideEffectTernaryRSY opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSYa; + class SideEffectTernaryMemMemRS opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRSa opcode, } class TernaryRXF opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, bits<5> bytes> - : InstRXF bytes> + : InstRXF { + [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, + (load bdxaddr12only:$XBD2)))]> { let OpKey = mnemonic#"r"#cls; let OpType = "mem"; let Constraints = "$R1 = $R1src"; @@ -3696,6 +4107,11 @@ class TernaryVRId opcode, SDPatternOperator operator, let M5 = type; } +class TernaryVRIi opcode, RegisterOperand cls> + : InstVRIi; + class TernaryVRRa opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or> : InstVRRa opcode, SDPatternOperator operator, let M6 = 0; } +class TernaryVRRcFloat opcode, + SDPatternOperator operator, TypedReg tr1, TypedReg tr2, + bits<4> type = 0, bits<4> m5 = 0> + : InstVRRc { + let M4 = type; + let M5 = m5; +} + +class TernaryVRRcFloatGeneric opcode> + : InstVRRc; + class TernaryVRRd opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type = 0> : InstVRRd opcode> let DisableEncoding = "$V1src"; } +class QuaternaryVRIf opcode> + : InstVRIf; + +class QuaternaryVRIg opcode> + : InstVRIg; + class QuaternaryVRRd opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, - bits<4> type, SDPatternOperator m6mask, bits<4> m6or> + TypedReg tr3, TypedReg tr4, bits<4> type, + SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0> : InstVRRd { let M5 = type; } +class QuaternaryVRRdGeneric opcode> + : InstVRRd; + // Declare a pair of instructions, one which sets CC and one which doesn't. // The CC-setting form ends with "S" and sets the low bit of M6. // Also create aliases to make use of M6 operand optional in assembler. @@ -3895,13 +4348,15 @@ multiclass QuaternaryOptVRRdSPair opcode, SDPatternOperator operator_cc, TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> modifier = 0> { - def "" : QuaternaryVRRd; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; let Defs = [CC] in - def S : QuaternaryVRRd; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, @@ -3909,15 +4364,41 @@ multiclass QuaternaryOptVRRdSPair opcode, } multiclass QuaternaryOptVRRdSPairGeneric opcode> { - def "" : InstVRRd; + def "" : QuaternaryVRRdGeneric; def : InstAlias(NAME) VR128:$V1, VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5, 0)>; } +class SideEffectQuaternaryRRFa opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa; + +multiclass SideEffectQuaternaryRRFaOptOpt opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> { + def "" : SideEffectQuaternaryRRFa; + def Opt : SideEffectTernaryRRFa; + def OptOpt : SideEffectBinaryRRFa; +} + +class SideEffectQuaternaryRRFb opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb; + +multiclass SideEffectQuaternaryRRFbOpt opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> { + def "" : SideEffectQuaternaryRRFb; + def Opt : SideEffectTernaryRRFb; +} + class SideEffectQuaternarySSe opcode, RegisterOperand cls> : InstSSe opcode, SDPatternOperator operator, let mayStore = 1; } +class CmpSwapRRE opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let mayStore = 1; +} + class CmpSwapRS opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr12only> : InstRSa // Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is // the value of the PSW's 2-bit condition code field. -class SelectWrapper +class SelectWrapper : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), - [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, + [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrHFP.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrHFP.td new file mode 100644 index 0000000000000..6d5b4b92f6508 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrHFP.td @@ -0,0 +1,240 @@ +//==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ hexadecimal floating-point +// arithmetic. Since this format is not mapped to any source-language data +// type, these instructions are not used for code generation, but are provided +// for use with the assembler and disassembler only. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load and test. +let Defs = [CC] in { + def LTER : UnaryRR <"lter", 0x32, null_frag, FP32, FP32>; + def LTDR : UnaryRR <"ltdr", 0x22, null_frag, FP64, FP64>; + def LTXR : UnaryRRE<"ltxr", 0xB362, null_frag, FP128, FP128>; +} + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations. +def LEDR : UnaryRR <"ledr", 0x35, null_frag, FP32, FP64>; +def LEXR : UnaryRRE<"lexr", 0xB366, null_frag, FP32, FP128>; +def LDXR : UnaryRR <"ldxr", 0x25, null_frag, FP64, FP128>; +let isAsmParserOnly = 1 in { + def LRER : UnaryRR <"lrer", 0x35, null_frag, FP32, FP64>; + def LRDR : UnaryRR <"lrdr", 0x25, null_frag, FP64, FP128>; +} + +// Extend floating-point values to wider representations. +def LDER : UnaryRRE<"lder", 0xB324, null_frag, FP64, FP32>; +def LXER : UnaryRRE<"lxer", 0xB326, null_frag, FP128, FP32>; +def LXDR : UnaryRRE<"lxdr", 0xB325, null_frag, FP128, FP64>; + +def LDE : UnaryRXE<"lde", 0xED24, null_frag, FP64, 4>; +def LXE : UnaryRXE<"lxe", 0xED26, null_frag, FP128, 4>; +def LXD : UnaryRXE<"lxd", 0xED25, null_frag, FP128, 8>; + +// Convert a signed integer register value to a floating-point one. +def CEFR : UnaryRRE<"cefr", 0xB3B4, null_frag, FP32, GR32>; +def CDFR : UnaryRRE<"cdfr", 0xB3B5, null_frag, FP64, GR32>; +def CXFR : UnaryRRE<"cxfr", 0xB3B6, null_frag, FP128, GR32>; + +def CEGR : UnaryRRE<"cegr", 0xB3C4, null_frag, FP32, GR64>; +def CDGR : UnaryRRE<"cdgr", 0xB3C5, null_frag, FP64, GR64>; +def CXGR : UnaryRRE<"cxgr", 0xB3C6, null_frag, FP128, GR64>; + +// Convert a floating-point register value to a signed integer value, +// with the second operand (modifier M3) specifying the rounding mode. +let Defs = [CC] in { + def CFER : BinaryRRFe<"cfer", 0xB3B8, GR32, FP32>; + def CFDR : BinaryRRFe<"cfdr", 0xB3B9, GR32, FP64>; + def CFXR : BinaryRRFe<"cfxr", 0xB3BA, GR32, FP128>; + + def CGER : BinaryRRFe<"cger", 0xB3C8, GR64, FP32>; + def CGDR : BinaryRRFe<"cgdr", 0xB3C9, GR64, FP64>; + def CGXR : BinaryRRFe<"cgxr", 0xB3CA, GR64, FP128>; +} + +// Convert BFP to HFP. +let Defs = [CC] in { + def THDER : UnaryRRE<"thder", 0xB358, null_frag, FP64, FP32>; + def THDR : UnaryRRE<"thdr", 0xB359, null_frag, FP64, FP64>; +} + +// Convert HFP to BFP. +let Defs = [CC] in { + def TBEDR : BinaryRRFe<"tbedr", 0xB350, FP32, FP64>; + def TBDR : BinaryRRFe<"tbdr", 0xB351, FP64, FP64>; +} + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// Negation (Load Complement). +let Defs = [CC] in { + def LCER : UnaryRR <"lcer", 0x33, null_frag, FP32, FP32>; + def LCDR : UnaryRR <"lcdr", 0x23, null_frag, FP64, FP64>; + def LCXR : UnaryRRE<"lcxr", 0xB363, null_frag, FP128, FP128>; +} + +// Absolute value (Load Positive). +let Defs = [CC] in { + def LPER : UnaryRR <"lper", 0x30, null_frag, FP32, FP32>; + def LPDR : UnaryRR <"lpdr", 0x20, null_frag, FP64, FP64>; + def LPXR : UnaryRRE<"lpxr", 0xB360, null_frag, FP128, FP128>; +} + +// Negative absolute value (Load Negative). +let Defs = [CC] in { + def LNER : UnaryRR <"lner", 0x31, null_frag, FP32, FP32>; + def LNDR : UnaryRR <"lndr", 0x21, null_frag, FP64, FP64>; + def LNXR : UnaryRRE<"lnxr", 0xB361, null_frag, FP128, FP128>; +} + +// Halve. +def HER : UnaryRR <"her", 0x34, null_frag, FP32, FP32>; +def HDR : UnaryRR <"hdr", 0x24, null_frag, FP64, FP64>; + +// Square root. +def SQER : UnaryRRE<"sqer", 0xB245, null_frag, FP32, FP32>; +def SQDR : UnaryRRE<"sqdr", 0xB244, null_frag, FP64, FP64>; +def SQXR : UnaryRRE<"sqxr", 0xB336, null_frag, FP128, FP128>; + +def SQE : UnaryRXE<"sqe", 0xED34, null_frag, FP32, 4>; +def SQD : UnaryRXE<"sqd", 0xED35, null_frag, FP64, 8>; + +// Round to an integer (rounding towards zero). +def FIER : UnaryRRE<"fier", 0xB377, null_frag, FP32, FP32>; +def FIDR : UnaryRRE<"fidr", 0xB37F, null_frag, FP64, FP64>; +def FIXR : UnaryRRE<"fixr", 0xB367, null_frag, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Defs = [CC] in { + let isCommutable = 1 in { + def AER : BinaryRR<"aer", 0x3A, null_frag, FP32, FP32>; + def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64, FP64>; + def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>; + } + def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, load, 4>; + def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, load, 8>; +} + +// Addition (unnormalized). +let Defs = [CC] in { + let isCommutable = 1 in { + def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>; + def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>; + } + def AU : BinaryRX<"au", 0x7E, null_frag, FP32, load, 4>; + def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, load, 8>; +} + +// Subtraction. +let Defs = [CC] in { + def SER : BinaryRR<"ser", 0x3B, null_frag, FP32, FP32>; + def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64, FP64>; + def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>; + + def SE : BinaryRX<"se", 0x7B, null_frag, FP32, load, 4>; + def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, load, 8>; +} + +// Subtraction (unnormalized). +let Defs = [CC] in { + def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>; + def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>; + + def SU : BinaryRX<"su", 0x7F, null_frag, FP32, load, 4>; + def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, load, 8>; +} + +// Multiplication. +let isCommutable = 1 in { + def MEER : BinaryRRE<"meer", 0xB337, null_frag, FP32, FP32>; + def MDR : BinaryRR <"mdr", 0x2C, null_frag, FP64, FP64>; + def MXR : BinaryRR <"mxr", 0x26, null_frag, FP128, FP128>; +} +def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, load, 4>; +def MD : BinaryRX <"md", 0x6C, null_frag, FP64, load, 8>; + +// Extending multiplication (f32 x f32 -> f64). +def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>; +def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, load, 4>; +let isAsmParserOnly = 1 in { + def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>; + def ME : BinaryRX<"me", 0x7C, null_frag, FP64, load, 4>; +} + +// Extending multiplication (f64 x f64 -> f128). +def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>; +def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, load, 8>; + +// Fused multiply-add. +def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>; +def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>; +def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, load, 4>; +def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, load, 8>; + +// Fused multiply-subtract. +def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>; +def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>; +def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, load, 4>; +def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, load, 8>; + +// Multiplication (unnormalized). +def MYR : BinaryRRD<"myr", 0xB33B, null_frag, FP128, FP64>; +def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64, FP64>; +def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64, FP64>; +def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, load, 8>; +def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, load, 8>; +def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, load, 8>; + +// Fused multiply-add (unnormalized). +def MAYR : TernaryRRD<"mayr", 0xB33A, null_frag, FP128, FP64>; +def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64, FP64>; +def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64, FP64>; +def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, load, 8>; +def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, load, 8>; +def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, load, 8>; + +// Division. +def DER : BinaryRR <"der", 0x3D, null_frag, FP32, FP32>; +def DDR : BinaryRR <"ddr", 0x2D, null_frag, FP64, FP64>; +def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>; +def DE : BinaryRX <"de", 0x7D, null_frag, FP32, load, 4>; +def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, load, 8>; + + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + def CER : CompareRR <"cer", 0x39, null_frag, FP32, FP32>; + def CDR : CompareRR <"cdr", 0x29, null_frag, FP64, FP64>; + def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>; + + def CE : CompareRX<"ce", 0x79, null_frag, FP32, load, 4>; + def CD : CompareRX<"cd", 0x69, null_frag, FP64, load, 8>; +} + diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.cpp index a30bf34857b53..4533f4fdf21ae 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "SystemZInstrInfo.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZInstrBuilder.h" -#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -236,32 +236,30 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); - const unsigned Reg = MI->getOperand(0).getReg(); + const unsigned Reg64 = MI->getOperand(0).getReg(); + const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); - // Conveniently, all 4 instructions are cloned from LOAD_STACK_GUARD, - // so they already have operand 0 set to reg. + // EAR can only load the low subregister so us a shift for %a0 to produce + // the GR containing %a0 and %a1. // ear , %a0 - MachineInstr *Ear1MI = MF.CloneMachineInstr(MI); - MBB->insert(MI, Ear1MI); - Ear1MI->setDesc(get(SystemZ::EAR)); - MachineInstrBuilder(MF, Ear1MI).addReg(SystemZ::A0); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A0) + .addReg(Reg64, RegState::ImplicitDefine); // sllg , , 32 - MachineInstr *SllgMI = MF.CloneMachineInstr(MI); - MBB->insert(MI, SllgMI); - SllgMI->setDesc(get(SystemZ::SLLG)); - MachineInstrBuilder(MF, SllgMI).addReg(Reg).addReg(0).addImm(32); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::SLLG), Reg64) + .addReg(Reg64) + .addReg(0) + .addImm(32); // ear , %a1 - MachineInstr *Ear2MI = MF.CloneMachineInstr(MI); - MBB->insert(MI, Ear2MI); - Ear2MI->setDesc(get(SystemZ::EAR)); - MachineInstrBuilder(MF, Ear2MI).addReg(SystemZ::A1); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A1); // lg , 40() MI->setDesc(get(SystemZ::LG)); - MachineInstrBuilder(MF, MI).addReg(Reg).addImm(40).addReg(0); + MachineInstrBuilder(MF, MI).addReg(Reg64).addImm(40).addReg(0); } // Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR @@ -871,6 +869,37 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Move 128-bit floating-point values between VR128 and FP128. + if (SystemZ::VR128BitRegClass.contains(DestReg) && + SystemZ::FP128BitRegClass.contains(SrcReg)) { + unsigned SrcRegHi = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned SrcRegLo = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) + .addReg(SrcRegHi, getKillRegState(KillSrc)) + .addReg(SrcRegLo, getKillRegState(KillSrc)); + return; + } + if (SystemZ::FP128BitRegClass.contains(DestReg) && + SystemZ::VR128BitRegClass.contains(SrcReg)) { + unsigned DestRegHi = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned DestRegLo = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + if (DestRegHi != SrcReg) + copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); + BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1); + return; + } + // Everything else needs only one instruction. unsigned Opcode; if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) @@ -1436,6 +1465,7 @@ SystemZII::Branch SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const { switch (MI.getOpcode()) { case SystemZ::BR: + case SystemZ::BI: case SystemZ::J: case SystemZ::JG: return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY, diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.td index fa5ecdd852433..f64c0d15ef83b 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrInfo.td @@ -48,6 +48,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { let isIndirectBranch = 1 in { def BC : CondBranchRX<"b#", 0x47>; def BCR : CondBranchRR<"b#r", 0x07>; + def BIC : CondBranchRXY<"bi#", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; } } @@ -58,6 +60,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { let isIndirectBranch = 1 in { def BCAsm : AsmCondBranchRX<"bc", 0x47>; def BCRAsm : AsmCondBranchRR<"bcr", 0x07>; + def BICAsm : AsmCondBranchRXY<"bic", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; } // Define AsmParser extended mnemonics for each general condition-code mask @@ -69,6 +73,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { let isIndirectBranch = 1 in { def BAsm#V : FixedCondBranchRX , "b#", 0x47>; def BRAsm#V : FixedCondBranchRR , "b#r", 0x07>; + def BIAsm#V : FixedCondBranchRXY, "bi#", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; } } } @@ -81,6 +87,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isIndirectBranch = 1 in { def B : FixedCondBranchRX; def BR : FixedCondBranchRR; + def BI : FixedCondBranchRXY, + Requires<[FeatureMiscellaneousExtensions2]>; } } @@ -189,18 +197,15 @@ let isBranch = 1, isTerminator = 1 in { //===----------------------------------------------------------------------===// // Unconditional trap. -// FIXME: This trap instruction should be marked as isTerminator, but there is -// currently a general bug that allows non-terminators to be placed between -// terminators. Temporarily leave this unmarked until the bug is fixed. -let isBarrier = 1, hasCtrlDep = 1 in +let hasCtrlDep = 1 in def Trap : Alias<4, (outs), (ins), [(trap)]>; // Conditional trap. -let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in +let hasCtrlDep = 1, Uses = [CC] in def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; // Fused compare-and-trap instructions. -let isTerminator = 1, hasCtrlDep = 1 in { +let hasCtrlDep = 1 in { // These patterns work the same way as for compare-and-branch. defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>; defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>; @@ -319,9 +324,9 @@ let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in { // Select instructions //===----------------------------------------------------------------------===// -def Select32Mux : SelectWrapper, Requires<[FeatureHighWord]>; -def Select32 : SelectWrapper; -def Select64 : SelectWrapper; +def Select32Mux : SelectWrapper, Requires<[FeatureHighWord]>; +def Select32 : SelectWrapper; +def Select64 : SelectWrapper; // We don't define 32-bit Mux stores if we don't have STOCFH, because the // low-only STOC should then always be used if possible. @@ -680,6 +685,22 @@ let Predicates = [FeatureLoadAndTrap] in { def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>; } +// Extend GR64s to GR128s. +let usesCustomInserter = 1 in + def ZEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + +//===----------------------------------------------------------------------===// +// "Any" extensions +//===----------------------------------------------------------------------===// + +// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; + +// Extend GR64s to GR128s. +let usesCustomInserter = 1 in + def AEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + //===----------------------------------------------------------------------===// // Truncations //===----------------------------------------------------------------------===// @@ -886,6 +907,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { } def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>; + // Addition to a high register. + def AHHHR : BinaryRRFa<"ahhhr", 0xB9C8, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def AHHLR : BinaryRRFa<"ahhlr", 0xB9D8, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Addition of signed 16-bit immediates. defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>; defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>; @@ -902,6 +929,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of memory. defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>; defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>; + def AGH : BinaryRXY<"agh", 0xE338, add, GR64, asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>; def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>; @@ -920,6 +949,12 @@ let Defs = [CC] in { } def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>; + // Addition to a high register. + def ALHHHR : BinaryRRFa<"alhhhr", 0xB9CA, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def ALHHLR : BinaryRRFa<"alhhlr", 0xB9DA, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Addition of signed 16-bit immediates. def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>, Requires<[FeatureDistinctOps]>; @@ -930,6 +965,10 @@ let Defs = [CC] in { def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; + // Addition of signed 32-bit immediates. + def ALSIH : BinaryRIL<"alsih", 0xCCA, null_frag, GRH32, simm32>, + Requires<[FeatureHighWord]>; + // Addition of memory. defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>; @@ -952,6 +991,10 @@ let Defs = [CC], Uses = [CC] in { def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>; } +// Addition that does not modify the condition code. +def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>, + Requires<[FeatureHighWord]>; + //===----------------------------------------------------------------------===// // Subtraction //===----------------------------------------------------------------------===// @@ -964,9 +1007,17 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, sub, GR64, GR64>; + // Subtraction from a high register. + def SHHHR : BinaryRRFa<"shhhr", 0xB9C9, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def SHHLR : BinaryRRFa<"shhlr", 0xB9D9, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Subtraction of memory. defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>; defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; + def SGH : BinaryRXY<"sgh", 0xE339, sub, GR64, asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>; def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>; } @@ -979,6 +1030,12 @@ let Defs = [CC] in { def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, subc, GR64, GR64>; + // Subtraction from a high register. + def SLHHHR : BinaryRRFa<"slhhhr", 0xB9CB, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def SLHHLR : BinaryRRFa<"slhhlr", 0xB9DB, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Subtraction of unsigned 32-bit immediates. These don't match // subc because we prefer addc for constants. def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>; @@ -1162,6 +1219,15 @@ defm : RMWIByte; // Multiplication //===----------------------------------------------------------------------===// +// Multiplication of a register, setting the condition code. We prefer these +// over MS(G)R if available, even though we cannot use the condition code, +// since they are three-operand instructions. +let Predicates = [FeatureMiscellaneousExtensions2], + Defs = [CC], isCommutable = 1 in { + def MSRKC : BinaryRRFa<"msrkc", 0xB9FD, mul, GR32, GR32, GR32>; + def MSGRKC : BinaryRRFa<"msgrkc", 0xB9ED, mul, GR64, GR64, GR64>; +} + // Multiplication of a register. let isCommutable = 1 in { def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>; @@ -1181,19 +1247,39 @@ def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; // Multiplication of memory. defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>; defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; +def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>; def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; +// Multiplication of memory, setting the condition code. +let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in { + def MSC : BinaryRXY<"msc", 0xE353, null_frag, GR32, load, 4>; + def MSGC : BinaryRXY<"msgc", 0xE383, null_frag, GR64, load, 8>; +} + // Multiplication of a register, producing two results. -def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; +def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; +def MGRK : BinaryRRFa<"mgrk", 0xB9EC, null_frag, GR128, GR64, GR64>, + Requires<[FeatureMiscellaneousExtensions2]>; def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; -def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; +def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>; +def : Pat<(z_smul_lohi GR64:$src1, GR64:$src2), + (MGRK GR64:$src1, GR64:$src2)>; +def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), + (MLGR (AEXT128 GR64:$src1), GR64:$src2)>; // Multiplication of memory, producing two results. def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>; def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>; +def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>, + Requires<[FeatureMiscellaneousExtensions2]>; def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; -def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; +def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>; +def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; +def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// // Division and remainder @@ -1201,19 +1287,38 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; let hasSideEffects = 1 in { // Do not speculatively execute. // Division and remainder, from registers. - def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; - def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; - def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; - def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; - def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; + def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; + def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; + def DSGR : BinaryRRE<"dsgr", 0xB90D, null_frag, GR128, GR64>; + def DLR : BinaryRRE<"dlr", 0xB997, null_frag, GR128, GR32>; + def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>; // Division and remainder, from memory. - def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; - def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; - def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; - def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; - def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; -} + def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; + def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, load, 4>; + def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, load, 8>; + def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, load, 4>; + def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, load, 8>; +} +def : Pat<(z_sdivrem GR64:$src1, GR32:$src2), + (DSGFR (AEXT128 GR64:$src1), GR32:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i32 (load bdxaddr20only:$src2))), + (DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, GR64:$src2), + (DSGR (AEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; + +def : Pat<(z_udivrem GR32:$src1, GR32:$src2), + (DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), GR32:$src2)>; +def : Pat<(z_udivrem GR32:$src1, (i32 (load bdxaddr20only:$src2))), + (DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), bdxaddr20only:$src2)>; +def : Pat<(z_udivrem GR64:$src1, GR64:$src2), + (DLGR (ZEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// // Shifts @@ -1301,6 +1406,12 @@ let Defs = [CC], CCValues = 0xE in { def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; + // Comparison with a high register. + def CHHR : CompareRRE<"chhr", 0xB9CD, null_frag, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def CHLR : CompareRRE<"chlr", 0xB9DD, null_frag, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Comparison with a signed 16-bit immediate. CHIMux expands to CHI or CIH, // depending on the choice of register. def CHIMux : CompareRIPseudo, @@ -1347,6 +1458,12 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; + // Comparison with a high register. + def CLHHR : CompareRRE<"clhhr", 0xB9CF, null_frag, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def CLHLR : CompareRRE<"clhlr", 0xB9DF, null_frag, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI // or CLIH, depending on the choice of register. def CLFIMux : CompareRIPseudo, @@ -1449,7 +1566,7 @@ let Predicates = [FeatureExecutionHint] in { // A serialization instruction that acts as a barrier for all memory // accesses, which expands to "bcr 14, 0". let hasSideEffects = 1 in -def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; +def Serialize : Alias<2, (outs), (ins), []>; // A pseudo instruction that serves as a compiler barrier. let hasSideEffects = 1, hasNoSchedulingInfo = 1 in @@ -1685,8 +1802,29 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { GR128, GR128, GR128>; def PCC : SideEffectInherentRRE<"pcc", 0xB92C>; } + let Predicates = [FeatureMessageSecurityAssist5] in - def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>; + def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>; + let Predicates = [FeatureMessageSecurityAssist7], isAsmParserOnly = 1 in + def PRNO : SideEffectBinaryMemMemRRE<"prno", 0xB93C, GR128, GR128>; + + let Predicates = [FeatureMessageSecurityAssist8] in + def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929, + GR128, GR128, GR128>; +} + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureGuardedStorage] in { + def LGG : UnaryRXY<"lgg", 0xE34C, null_frag, GR64, 8>; + def LLGFSG : UnaryRXY<"llgfsg", 0xE348, null_frag, GR64, 4>; + + let mayLoad = 1 in + def LGSC : SideEffectBinaryRXY<"lgsc", 0xE34D, GR64>; + let mayStore = 1 in + def STGSC : SideEffectBinaryRXY<"stgsc", 0xE349, GR64>; } //===----------------------------------------------------------------------===// @@ -1853,17 +1991,6 @@ def : Pat<(ctlz GR64:$src), let Predicates = [FeaturePopulationCount], Defs = [CC] in def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>; -// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; - -// Extend GR32s and GR64s to GR128s. -let usesCustomInserter = 1 in { - def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; - def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>; - def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; -} - // Search a block of memory for a character. let mayLoad = 1, Defs = [CC] in defm SRST : StringRRE<"srst", 0xB25E, z_search_string>; @@ -1891,54 +2018,12 @@ let mayLoad = 1, Defs = [CC] in let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>; -// Supervisor call. -let hasSideEffects = 1, isCall = 1, Defs = [CC] in - def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>; - -// Monitor call. -let hasSideEffects = 1, isCall = 1 in - def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>; - -// Store clock. -let hasSideEffects = 1, Defs = [CC] in { - def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>; - def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>; - def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>; -} - -// Store facility list. -let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in - def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>; - -// Extract CPU attribute. -let hasSideEffects = 1 in - def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>; - -// Extract CPU time. -let Defs = [R0D, R1D], hasSideEffects = 1, mayLoad = 1 in - def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>; - -// Extract PSW. -let hasSideEffects = 1, Uses = [CC] in - def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; - // Execute. let hasSideEffects = 1 in { def EX : SideEffectBinaryRX<"ex", 0x44, GR64>; def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>; } -// Program return. -let hasSideEffects = 1, Defs = [CC] in - def PR : SideEffectInherentE<"pr", 0x0101>; - -// Move with key. -let mayLoad = 1, mayStore = 1, Defs = [CC] in - def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>; - -// Store real address. -def STRAG : StoreSSE<"strag", 0xE502>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrSystem.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrSystem.td new file mode 100644 index 0000000000000..0112ebf1eb10c --- /dev/null +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrSystem.td @@ -0,0 +1,521 @@ +//==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ system-level instructions. +// Most of these instructions are privileged or semi-privileged. They are +// not used for code generation, but are provided for use with the assembler +// and disassembler only. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Program-Status Word Instructions. +//===----------------------------------------------------------------------===// + +// Extract PSW. +let hasSideEffects = 1, Uses = [CC] in + def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; + +// Load PSW (extended). +let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in { + def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>; + def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>; +} + +// Insert PSW key. +let Uses = [R2L], Defs = [R2L] in + def IPK : SideEffectInherentS<"ipk", 0xB20B, null_frag>; + +// Set PSW key from address. +let hasSideEffects = 1 in + def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>; + +// Set system mask. +let hasSideEffects = 1, mayLoad = 1 in + def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>; + +// Store then AND/OR system mask. +let hasSideEffects = 1 in { + def STNSM : StoreSI<"stnsm", 0xAC, null_frag, imm32zx8>; + def STOSM : StoreSI<"stosm", 0xAD, null_frag, imm32zx8>; +} + +// Insert address space control. +let hasSideEffects = 1 in + def IAC : InherentRRE<"iac", 0xB224, GR32, null_frag>; + +// Set address space control (fast). +let hasSideEffects = 1 in { + def SAC : SideEffectAddressS<"sac", 0xB219, null_frag>; + def SACF : SideEffectAddressS<"sacf", 0xB279, null_frag>; +} + +//===----------------------------------------------------------------------===// +// Control Register Instructions. +//===----------------------------------------------------------------------===// + +// Load control. +def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>; +def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>; + +// Store control. +def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>; +def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>; + +// Extract primary ASN (and instance). +let hasSideEffects = 1 in { + def EPAR : InherentRRE<"epar", 0xB226, GR32, null_frag>; + def EPAIR : InherentRRE<"epair", 0xB99A, GR64, null_frag>; +} + +// Extract secondary ASN (and instance). +let hasSideEffects = 1 in { + def ESAR : InherentRRE<"esar", 0xB227, GR32, null_frag>; + def ESAIR : InherentRRE<"esair", 0xB99B, GR64, null_frag>; +} + +// Set secondary ASN (and instance). +let hasSideEffects = 1 in { + def SSAR : SideEffectUnaryRRE<"ssar", 0xB225, GR32, null_frag>; + def SSAIR : SideEffectUnaryRRE<"ssair", 0xB99F, GR64, null_frag>; +} + +// Extract and set extended authority. +let hasSideEffects = 1 in + def ESEA : UnaryTiedRRE<"esea", 0xB99D, GR32>; + +//===----------------------------------------------------------------------===// +// Prefix-Register Instructions. +//===----------------------------------------------------------------------===// + +// Set prefix. +let hasSideEffects = 1 in + def SPX : SideEffectUnaryS<"spx", 0xB210, null_frag, 4>; + +// Store prefix. +let hasSideEffects = 1 in + def STPX : StoreInherentS<"stpx", 0xB211, null_frag, 4>; + +//===----------------------------------------------------------------------===// +// Storage-Key and Real Memory Instructions. +//===----------------------------------------------------------------------===// + +// Insert storage key extended. +let hasSideEffects = 1 in + def ISKE : BinaryRRE<"iske", 0xB229, null_frag, GR32, GR64>; + +// Insert virtual storage key. +let hasSideEffects = 1 in + def IVSK : BinaryRRE<"ivsk", 0xB223, null_frag, GR32, GR64>; + +// Set storage key extended. +let hasSideEffects = 1, Defs = [CC] in + defm SSKE : SideEffectTernaryRRFcOpt<"sske", 0xB22B, GR32, GR64>; + +// Reset reference bit extended. +let hasSideEffects = 1, Defs = [CC] in + def RRBE : SideEffectBinaryRRE<"rrbe", 0xB22A, GR32, GR64>; + +// Reset reference bits multiple. +let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in + def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>; + +// Insert reference bits multiple. +let Predicates = [FeatureInsertReferenceBitsMultiple], hasSideEffects = 1 in + def IRBM : UnaryRRE<"irbm", 0xB9AC, null_frag, GR64, GR64>; + +// Perform frame management function. +let hasSideEffects = 1 in + def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>; + +// Test block. +let hasSideEffects = 1, mayStore = 1, Uses = [R0D], Defs = [R0D, CC] in + def TB : SideEffectBinaryRRE<"tb", 0xB22C, GR64, GR64>; + +// Page in / out. +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def PGIN : SideEffectBinaryRRE<"pgin", 0xB22E, GR64, GR64>; + def PGOUT : SideEffectBinaryRRE<"pgout", 0xB22F, GR64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Dynamic-Address-Translation Instructions. +//===----------------------------------------------------------------------===// + +// Invalidate page table entry. +let hasSideEffects = 1 in + defm IPTE : SideEffectQuaternaryRRFaOptOpt<"ipte", 0xB221, GR64, GR32, GR32>; + +// Invalidate DAT table entry. +let hasSideEffects = 1 in + defm IDTE : SideEffectQuaternaryRRFbOpt<"idte", 0xB98E, GR64, GR64, GR64>; + +// Compare and replace DAT table entry. +let Predicates = [FeatureEnhancedDAT2], hasSideEffects = 1, Defs = [CC] in + defm CRDTE : SideEffectQuaternaryRRFbOpt<"crdte", 0xB98F, GR128, GR128, GR64>; + +// Purge TLB. +let hasSideEffects = 1 in + def PTLB : SideEffectInherentS<"ptlb", 0xB20D, null_frag>; + +// Compare and swap and purge. +let hasSideEffects = 1, Defs = [CC] in { + def CSP : CmpSwapRRE<"csp", 0xB250, GR128, GR64>; + def CSPG : CmpSwapRRE<"cspg", 0xB98A, GR128, GR64>; +} + +// Load page-table-entry address. +let hasSideEffects = 1, Defs = [CC] in + def LPTEA : TernaryRRFb<"lptea", 0xB9AA, GR64, GR64, GR64>; + +// Load real address. +let hasSideEffects = 1, Defs = [CC] in { + defm LRA : LoadAddressRXPair<"lra", 0xB1, 0xE313, null_frag>; + def LRAG : LoadAddressRXY<"lrag", 0xE303, null_frag, laaddr20pair>; +} + +// Store real address. +def STRAG : StoreSSE<"strag", 0xE502>; + +// Load using real address. +let mayLoad = 1 in { + def LURA : UnaryRRE<"lura", 0xB24B, null_frag, GR32, GR64>; + def LURAG : UnaryRRE<"lurag", 0xB905, null_frag, GR64, GR64>; +} + +// Store using real address. +let mayStore = 1 in { + def STURA : SideEffectBinaryRRE<"stura", 0xB246, GR32, GR64>; + def STURG : SideEffectBinaryRRE<"sturg", 0xB925, GR64, GR64>; +} + +// Test protection. +let hasSideEffects = 1, Defs = [CC] in + def TPROT : SideEffectBinarySSE<"tprot", 0xE501>; + +//===----------------------------------------------------------------------===// +// Memory-move Instructions. +//===----------------------------------------------------------------------===// + +// Move with key. +let mayLoad = 1, mayStore = 1, Defs = [CC] in + def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>; + +// Move to primary / secondary. +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def MVCP : MemoryBinarySSd<"mvcp", 0xDA, GR64>; + def MVCS : MemoryBinarySSd<"mvcs", 0xDB, GR64>; +} + +// Move with source / destination key. +let mayLoad = 1, mayStore = 1, Uses = [R0L, R1L] in { + def MVCSK : SideEffectBinarySSE<"mvcsk", 0xE50E>; + def MVCDK : SideEffectBinarySSE<"mvcdk", 0xE50F>; +} + +// Move with optional specifications. +let mayLoad = 1, mayStore = 1, Uses = [R0L] in + def MVCOS : SideEffectTernarySSF<"mvcos", 0xC80, GR64>; + +// Move page. +let mayLoad = 1, mayStore = 1, Uses = [R0L], Defs = [CC] in + def MVPG : SideEffectBinaryRRE<"mvpg", 0xB254, GR64, GR64>; + +//===----------------------------------------------------------------------===// +// Address-Space Instructions. +//===----------------------------------------------------------------------===// + +// Load address space parameters. +let hasSideEffects = 1, Defs = [CC] in + def LASP : SideEffectBinarySSE<"lasp", 0xE500>; + +// Purge ALB. +let hasSideEffects = 1 in + def PALB : SideEffectInherentRRE<"palb", 0xB248>; + +// Program call. +let hasSideEffects = 1 in + def PC : SideEffectAddressS<"pc", 0xB218, null_frag>; + +// Program return. +let hasSideEffects = 1, Defs = [CC] in + def PR : SideEffectInherentE<"pr", 0x0101>; + +// Program transfer (with instance). +let hasSideEffects = 1 in { + def PT : SideEffectBinaryRRE<"pt", 0xB228, GR32, GR64>; + def PTI : SideEffectBinaryRRE<"pti", 0xB99E, GR64, GR64>; +} + +// Resume program. +let hasSideEffects = 1, Defs = [CC] in + def RP : SideEffectAddressS<"rp", 0xB277, null_frag>; + +// Branch in subspace group. +let hasSideEffects = 1 in + def BSG : UnaryRRE<"bsg", 0xB258, null_frag, GR64, GR64>; + +// Branch and set authority. +let hasSideEffects = 1 in + def BSA : UnaryRRE<"bsa", 0xB25A, null_frag, GR64, GR64>; + +// Test access. +let Defs = [CC] in + def TAR : SideEffectBinaryRRE<"tar", 0xB24C, AR32, GR32>; + +//===----------------------------------------------------------------------===// +// Linkage-Stack Instructions. +//===----------------------------------------------------------------------===// + +// Branch and stack. +let hasSideEffects = 1 in + def BAKR : SideEffectBinaryRRE<"bakr", 0xB240, GR64, GR64>; + +// Extract stacked registers. +let hasSideEffects = 1 in { + def EREG : SideEffectBinaryRRE<"ereg", 0xB249, GR32, GR32>; + def EREGG : SideEffectBinaryRRE<"eregg", 0xB90E, GR64, GR64>; +} + +// Extract stacked state. +let hasSideEffects = 1, Defs = [CC] in + def ESTA : UnaryRRE<"esta", 0xB24A, null_frag, GR128, GR32>; + +// Modify stacked state. +let hasSideEffects = 1 in + def MSTA : SideEffectUnaryRRE<"msta", 0xB247, GR128, null_frag>; + +//===----------------------------------------------------------------------===// +// Time-Related Instructions. +//===----------------------------------------------------------------------===// + +// Perform timing facility function. +let hasSideEffects = 1, mayLoad = 1, Uses = [R0L, R1D], Defs = [CC] in + def PTFF : SideEffectInherentE<"ptff", 0x0104>; + +// Set clock. +let hasSideEffects = 1, Defs = [CC] in + def SCK : SideEffectUnaryS<"sck", 0xB204, null_frag, 8>; + +// Set clock programmable field. +let hasSideEffects = 1, Uses = [R0L] in + def SCKPF : SideEffectInherentE<"sckpf", 0x0107>; + +// Set clock comparator. +let hasSideEffects = 1 in + def SCKC : SideEffectUnaryS<"sckc", 0xB206, null_frag, 8>; + +// Set CPU timer. +let hasSideEffects = 1 in + def SPT : SideEffectUnaryS<"spt", 0xB208, null_frag, 8>; + +// Store clock (fast / extended). +let hasSideEffects = 1, Defs = [CC] in { + def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>; + def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>; + def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>; +} + +// Store clock comparator. +let hasSideEffects = 1 in + def STCKC : StoreInherentS<"stckc", 0xB207, null_frag, 8>; + +// Store CPU timer. +let hasSideEffects = 1 in + def STPT : StoreInherentS<"stpt", 0xB209, null_frag, 8>; + +//===----------------------------------------------------------------------===// +// CPU-Related Instructions. +//===----------------------------------------------------------------------===// + +// Store CPU address. +let hasSideEffects = 1 in + def STAP : StoreInherentS<"stap", 0xB212, null_frag, 2>; + +// Store CPU ID. +let hasSideEffects = 1 in + def STIDP : StoreInherentS<"stidp", 0xB202, null_frag, 8>; + +// Store system information. +let hasSideEffects = 1, Uses = [R0L, R1L], Defs = [R0L, CC] in + def STSI : StoreInherentS<"stsi", 0xB27D, null_frag, 0>; + +// Store facility list. +let hasSideEffects = 1 in + def STFL : StoreInherentS<"stfl", 0xB2B1, null_frag, 4>; + +// Store facility list extended. +let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in + def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>; + +// Extract CPU attribute. +let hasSideEffects = 1 in + def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>; + +// Extract CPU time. +let hasSideEffects = 1, mayLoad = 1, Defs = [R0D, R1D] in + def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>; + +// Perform topology function. +let hasSideEffects = 1 in + def PTF : UnaryTiedRRE<"ptf", 0xB9A2, GR64>; + +// Perform cryptographic key management operation. +let Predicates = [FeatureMessageSecurityAssist3], + hasSideEffects = 1, Uses = [R0L, R1D] in + def PCKMO : SideEffectInherentRRE<"pckmo", 0xB928>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Supervisor call. +let hasSideEffects = 1, isCall = 1, Defs = [CC] in + def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>; + +// Monitor call. +let hasSideEffects = 1, isCall = 1 in + def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>; + +// Diagnose. +let hasSideEffects = 1, isCall = 1 in + def DIAG : SideEffectTernaryRS<"diag", 0x83, GR32, GR32>; + +// Trace. +let hasSideEffects = 1, mayLoad = 1 in { + def TRACE : SideEffectTernaryRS<"trace", 0x99, GR32, GR32>; + def TRACG : SideEffectTernaryRSY<"tracg", 0xEB0F, GR64, GR64>; +} + +// Trap. +let hasSideEffects = 1 in { + def TRAP2 : SideEffectInherentE<"trap2", 0x01FF>; + def TRAP4 : SideEffectAddressS<"trap4", 0xB2FF, null_frag>; +} + +// Signal processor. +let hasSideEffects = 1, Defs = [CC] in + def SIGP : SideEffectTernaryRS<"sigp", 0xAE, GR64, GR64>; + +// Signal adapter. +let hasSideEffects = 1, Uses = [R0D, R1D, R2D, R3D], Defs = [CC] in + def SIGA : SideEffectAddressS<"siga", 0xB274, null_frag>; + +// Start interpretive execution. +let hasSideEffects = 1, Defs = [CC] in + def SIE : SideEffectUnaryS<"sie", 0xB214, null_frag, 0>; + +//===----------------------------------------------------------------------===// +// CPU-Measurement Facility Instructions (SA23-2260). +//===----------------------------------------------------------------------===// + +// Load program parameter +let hasSideEffects = 1 in + def LPP : SideEffectUnaryS<"lpp", 0xB280, null_frag, 8>; + +// Extract coprocessor-group address. +let hasSideEffects = 1, Defs = [CC] in + def ECPGA : UnaryRRE<"ecpga", 0xB2ED, null_frag, GR32, GR64>; + +// Extract CPU counter. +let hasSideEffects = 1, Defs = [CC] in + def ECCTR : UnaryRRE<"ecctr", 0xB2E4, null_frag, GR64, GR64>; + +// Extract peripheral counter. +let hasSideEffects = 1, Defs = [CC] in + def EPCTR : UnaryRRE<"epctr", 0xB2E5, null_frag, GR64, GR64>; + +// Load CPU-counter-set controls. +let hasSideEffects = 1, Defs = [CC] in + def LCCTL : SideEffectUnaryS<"lcctl", 0xB284, null_frag, 8>; + +// Load peripheral-counter-set controls. +let hasSideEffects = 1, Defs = [CC] in + def LPCTL : SideEffectUnaryS<"lpctl", 0xB285, null_frag, 8>; + +// Load sampling controls. +let hasSideEffects = 1, Defs = [CC] in + def LSCTL : SideEffectUnaryS<"lsctl", 0xB287, null_frag, 0>; + +// Query sampling information. +let hasSideEffects = 1 in + def QSI : StoreInherentS<"qsi", 0xB286, null_frag, 0>; + +// Query counter information. +let hasSideEffects = 1 in + def QCTRI : StoreInherentS<"qctri", 0xB28E, null_frag, 0>; + +// Set CPU counter. +let hasSideEffects = 1, Defs = [CC] in + def SCCTR : SideEffectBinaryRRE<"scctr", 0xB2E0, GR64, GR64>; + +// Set peripheral counter. +let hasSideEffects = 1, Defs = [CC] in + def SPCTR : SideEffectBinaryRRE<"spctr", 0xB2E1, GR64, GR64>; + +//===----------------------------------------------------------------------===// +// I/O Instructions (Principles of Operation, Chapter 14). +//===----------------------------------------------------------------------===// + +// Clear subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def CSCH : SideEffectInherentS<"csch", 0xB230, null_frag>; + +// Halt subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def HSCH : SideEffectInherentS<"hsch", 0xB231, null_frag>; + +// Modify subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def MSCH : SideEffectUnaryS<"msch", 0xB232, null_frag, 0>; + +// Resume subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def RSCH : SideEffectInherentS<"rsch", 0xB238, null_frag>; + +// Start subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def SSCH : SideEffectUnaryS<"ssch", 0xB233, null_frag, 0>; + +// Store subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def STSCH : StoreInherentS<"stsch", 0xB234, null_frag, 0>; + +// Test subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def TSCH : StoreInherentS<"tsch", 0xB235, null_frag, 0>; + +// Cancel subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def XSCH : SideEffectInherentS<"xsch", 0xB276, null_frag>; + +// Reset channel path. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def RCHP : SideEffectInherentS<"rchp", 0xB23B, null_frag>; + +// Set channel monitor. +let hasSideEffects = 1, mayLoad = 1, Uses = [R1L, R2D] in + def SCHM : SideEffectInherentS<"schm", 0xB23C, null_frag>; + +// Store channel path status. +let hasSideEffects = 1 in + def STCPS : StoreInherentS<"stcps", 0xB23A, null_frag, 0>; + +// Store channel report word. +let hasSideEffects = 1, Defs = [CC] in + def STCRW : StoreInherentS<"stcrw", 0xB239, null_frag, 0>; + +// Test pending interruption. +let hasSideEffects = 1, Defs = [CC] in + def TPI : StoreInherentS<"tpi", 0xB236, null_frag, 0>; + +// Set address limit. +let hasSideEffects = 1, Uses = [R1L] in + def SAL : SideEffectInherentS<"sal", 0xB237, null_frag>; + diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrVector.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrVector.td index 0158fe6aec08d..c9a02d9c80821 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrVector.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZInstrVector.td @@ -14,7 +14,7 @@ let Predicates = [FeatureVector] in { // Register move. def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; - def VLR32 : UnaryAliasVRR; + def VLR32 : UnaryAliasVRR; def VLR64 : UnaryAliasVRR; // Load GR from VR element. @@ -141,7 +141,7 @@ let Predicates = [FeatureVector] in { // LEY and LDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VL32 : UnaryAliasVRX; + def VL32 : UnaryAliasVRX; def VL64 : UnaryAliasVRX; // Load logical element and zero. @@ -154,6 +154,11 @@ let Predicates = [FeatureVector] in { (VLLEZF bdxaddr12only:$addr)>; def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), (VLLEZG bdxaddr12only:$addr)>; + let Predicates = [FeatureVectorEnhancements1] in { + def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>; + def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)), + (VLLEZLF bdxaddr12only:$addr)>; + } // Load element. def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; @@ -170,6 +175,13 @@ let Predicates = [FeatureVector] in { def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>; } +let Predicates = [FeatureVectorPackedDecimal] in { + // Load rightmost with length. The number of loaded bytes is only known + // at run time. + def VLRL : BinaryVSI<"vlrl", 0xE635, int_s390_vlrl, 0>; + def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>; +} + // Use replicating loads if we're inserting a single element into an // undefined vector. This avoids a false dependency on the previous // register contents. @@ -219,7 +231,7 @@ let Predicates = [FeatureVector] in { // STEY and STDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VST32 : StoreAliasVRX; + def VST32 : StoreAliasVRX; def VST64 : StoreAliasVRX; // Scatter element. @@ -227,6 +239,13 @@ let Predicates = [FeatureVector] in { def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; } +let Predicates = [FeatureVectorPackedDecimal] in { + // Store rightmost with length. The number of stored bytes is only known + // at run time. + def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, int_s390_vstrl, 0>; + def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>; +} + //===----------------------------------------------------------------------===// // Selects and permutes //===----------------------------------------------------------------------===// @@ -256,6 +275,10 @@ let Predicates = [FeatureVector] in { // Permute doubleword immediate. def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>; + // Bit Permute. + let Predicates = [FeatureVectorEnhancements1] in + def VBPERM : BinaryVRRc<"vbperm", 0xE785, int_s390_vbperm, v128g, v128b>; + // Replicate. def VREP: BinaryVRIcGeneric<"vrep", 0xE74D>; def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>; @@ -424,6 +447,10 @@ let Predicates = [FeatureVector] in { def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; + // Not exclusive or. + let Predicates = [FeatureVectorEnhancements1] in + def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>; + // Exclusive or. def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; @@ -567,6 +594,17 @@ let Predicates = [FeatureVector] in { def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; + // Multiply sum logical. + let Predicates = [FeatureVectorEnhancements1] in { + def VMSL : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>; + def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg, + v128q, v128g, v128g, v128q, 3>; + } + + // Nand. + let Predicates = [FeatureVectorEnhancements1] in + def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>; + // Nor. def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>; @@ -574,9 +612,19 @@ let Predicates = [FeatureVector] in { // Or. def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; + // Or with complement. + let Predicates = [FeatureVectorEnhancements1] in + def VOC : BinaryVRRc<"voc", 0xE76F, null_frag, v128any, v128any>; + // Population count. def VPOPCT : UnaryVRRaGeneric<"vpopct", 0xE750>; def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; + let Predicates = [FeatureVectorEnhancements1] in { + def VPOPCTB : UnaryVRRa<"vpopctb", 0xE750, ctpop, v128b, v128b, 0>; + def VPOPCTH : UnaryVRRa<"vpopcth", 0xE750, ctpop, v128h, v128h, 1>; + def VPOPCTF : UnaryVRRa<"vpopctf", 0xE750, ctpop, v128f, v128f, 2>; + def VPOPCTG : UnaryVRRa<"vpopctg", 0xE750, ctpop, v128g, v128g, 3>; + } // Element rotate left logical (with vector shift amount). def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>; @@ -724,6 +772,14 @@ multiclass BitwiseVectorOps { (VNO VR128:$x, VR128:$y)>; def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>; } + let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(type (z_vnot (xor VR128:$x, VR128:$y))), + (VNX VR128:$x, VR128:$y)>; + def : Pat<(type (z_vnot (and VR128:$x, VR128:$y))), + (VNN VR128:$x, VR128:$y)>; + def : Pat<(type (or VR128:$x, (z_vnot VR128:$y))), + (VOC VR128:$x, VR128:$y)>; + } } defm : BitwiseVectorOps; @@ -879,6 +935,11 @@ let Predicates = [FeatureVector] in { def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>; + } // Convert from fixed 64-bit. def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; @@ -910,6 +971,11 @@ let Predicates = [FeatureVector] in { def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>; + } // Load FP integer. def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; @@ -917,66 +983,213 @@ let Predicates = [FeatureVector] in { def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; defm : VectorRounding; defm : VectorRounding; + let Predicates = [FeatureVectorEnhancements1] in { + def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; + def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; + defm : VectorRounding; + defm : VectorRounding; + defm : VectorRounding; + } // Load lengthened. def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; - def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32eb, 2, 8>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + let isAsmParserOnly = 1 in { + def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; + def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; + def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + } + def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>; + def : Pat<(f128 (fpextend (f32 VR32:$src))), + (WFLLD (WLDEB VR32:$src))>; + } - // Load rounded, + // Load rounded. def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; - def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; - def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; - def : FPConversion; + def : FPConversion; + let Predicates = [FeatureVectorEnhancements1] in { + let isAsmParserOnly = 1 in { + def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; + def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } + def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; + def : FPConversion; + def : Pat<(f32 (fpround (f128 VR128:$src))), + (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>; + } + + // Maximum. + multiclass VectorMax { + def : FPMinMax; + def : FPMinMax; + } + let Predicates = [FeatureVectorEnhancements1] in { + def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; + def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, + v128db, v128db, 3, 0>; + def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, + v64db, v64db, 3, 8>; + def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, + v128sb, v128sb, 2, 0>; + def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, + v32sb, v32sb, 2, 8>; + def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, + v128xb, v128xb, 4, 8>; + defm : VectorMax; + defm : VectorMax; + defm : VectorMax; + defm : VectorMax; + defm : VectorMax; + } + + // Minimum. + multiclass VectorMin { + def : FPMinMax; + def : FPMinMax; + } + let Predicates = [FeatureVectorEnhancements1] in { + def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; + def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, + v128db, v128db, 3, 0>; + def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, + v64db, v64db, 3, 8>; + def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, + v128sb, v128sb, 2, 0>; + def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, + v32sb, v32sb, 2, 8>; + def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, + v128xb, v128xb, 4, 8>; + defm : VectorMin; + defm : VectorMin; + defm : VectorMin; + defm : VectorMin; + defm : VectorMin; + } // Multiply. def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>; + } // Multiply and add. def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>; + } // Multiply and subtract. def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>; + } + + // Negative multiply and add. + let Predicates = [FeatureVectorEnhancements1] in { + def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; + def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>; + def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; + def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; + def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>; + } + + // Negative multiply and subtract. + let Predicates = [FeatureVectorEnhancements1] in { + def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; + def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>; + def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; + def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; + def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>; + } // Perform sign operation. def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>; def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>; def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>; + def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>; + def WFPSOXB : BinaryVRRa<"wfpsoxb", 0xE7CC, null_frag, v128xb, v128xb, 4, 8>; + } // Load complement. def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>; + def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>; + def WFLCXB : UnaryVRRa<"wflcxb", 0xE7CC, fneg, v128xb, v128xb, 4, 8, 0>; + } // Load negative. def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>; + def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>; + def WFLNXB : UnaryVRRa<"wflnxb", 0xE7CC, fnabs, v128xb, v128xb, 4, 8, 1>; + } // Load positive. def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>; + def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>; + def WFLPXB : UnaryVRRa<"wflpxb", 0xE7CC, fabs, v128xb, v128xb, 4, 8, 2>; + } // Square root. def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>; + } // Subtract. def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>; + } // Test data class immediate. let Defs = [CC] in { def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>; def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>; + def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>; + def WFTCIXB : BinaryVRIe<"wftcixb", 0xE74A, null_frag, v128q, v128xb, 4, 8>; + } } } @@ -989,12 +1202,20 @@ let Predicates = [FeatureVector] in { let Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>; + } } // Compare and signal scalar. let Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>; + def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>; + } } // Compare equal. @@ -1003,6 +1224,28 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128f, v128sb, 2, 0>; + defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + + // Compare and signal equal. + let Predicates = [FeatureVectorEnhancements1] in { + defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag, + v128g, v128db, 3, 4>; + defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKEXB : BinaryVRRcSPair<"wfkexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } // Compare high. def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; @@ -1010,6 +1253,28 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128f, v128sb, 2, 0>; + defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + + // Compare and signal high. + let Predicates = [FeatureVectorEnhancements1] in { + defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag, + v128g, v128db, 3, 4>; + defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKHXB : BinaryVRRcSPair<"wfkhxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } // Compare high or equal. def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; @@ -1017,6 +1282,28 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128f, v128sb, 2, 0>; + defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + + // Compare and signal high or equal. + let Predicates = [FeatureVectorEnhancements1] in { + defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag, + v128g, v128db, 3, 4>; + defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKHEXB : BinaryVRRcSPair<"wfkhexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } } //===----------------------------------------------------------------------===// @@ -1028,36 +1315,49 @@ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>; //===----------------------------------------------------------------------===// // Replicating scalars @@ -1133,6 +1433,20 @@ let AddedComplexity = 4 in { (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>; } +//===----------------------------------------------------------------------===// +// Support for 128-bit floating-point values in vector registers +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(f128 (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (f128 VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + + def : Pat<(f128 fpimm0), (VZERO)>; + def : Pat<(f128 fpimmneg0), (WFLNXB (VZERO))>; +} + //===----------------------------------------------------------------------===// // String instructions //===----------------------------------------------------------------------===// @@ -1202,3 +1516,37 @@ let Predicates = [FeatureVector] in { defm VSTRCZF : QuaternaryOptVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, z_vstrcz_cc, v128f, v128f, 2, 2>; } + +//===----------------------------------------------------------------------===// +// Packed-decimal instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorPackedDecimal] in { + def VLIP : BinaryVRIh<"vlip", 0xE649>; + + def VPKZ : BinaryVSI<"vpkz", 0xE634, null_frag, 0>; + def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>; + + let Defs = [CC] in { + def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>; + def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>; + def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>; + def VCVDG : TernaryVRIi<"vcvdg", 0xE65A, GR64>; + + def VAP : QuaternaryVRIf<"vap", 0xE671>; + def VSP : QuaternaryVRIf<"vsp", 0xE673>; + + def VMP : QuaternaryVRIf<"vmp", 0xE678>; + def VMSP : QuaternaryVRIf<"vmsp", 0xE679>; + + def VDP : QuaternaryVRIf<"vdp", 0xE67A>; + def VRP : QuaternaryVRIf<"vrp", 0xE67B>; + def VSDP : QuaternaryVRIf<"vsdp", 0xE67E>; + + def VSRP : QuaternaryVRIg<"vsrp", 0xE659>; + def VPSOP : QuaternaryVRIg<"vpsop", 0xE65B>; + + def VTP : TestVRRg<"vtp", 0xE65F>; + def VCP : CompareVRRh<"vcp", 0xE677>; + } +} diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZLDCleanup.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZLDCleanup.cpp index ec8ce6e911fae..d4cd89ce590fc 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZTargetMachine.h" #include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -127,7 +127,7 @@ MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, return Copy; } -// Create a virtal register in *TLSBaseAddrReg, and populate it by +// Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.cpp index ab6020f3f1896..8342463c1086f 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -18,12 +18,12 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" #ifndef NDEBUG // Print the set of SUs void SystemZPostRASchedStrategy::SUSet:: -dump(SystemZHazardRecognizer &HazardRec) { +dump(SystemZHazardRecognizer &HazardRec) const { dbgs() << "{"; for (auto &SU : *this) { HazardRec.dumpSU(SU, dbgs()); diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.h b/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.h index 12357e0348a9e..3dfef388691e7 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -72,7 +72,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { // A set of SUs with a sorter and dump method. struct SUSet : std::set { #ifndef NDEBUG - void dump(SystemZHazardRecognizer &HazardRec); + void dump(SystemZHazardRecognizer &HazardRec) const; #endif }; diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZOperators.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZOperators.td index adfc69c5d4cf4..759a8bb0ce14d 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZOperators.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZOperators.td @@ -36,14 +36,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2, SDTCisSameAs<0, 2>, SDTCisPtrTy<0>]>; def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; -def SDT_ZGR128Binary32 : SDTypeProfile<1, 2, +def SDT_ZGR128Binary : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, - SDTCisVT<1, untyped>, - SDTCisVT<2, i32>]>; -def SDT_ZGR128Binary64 : SDTypeProfile<1, 2, - [SDTCisVT<0, untyped>, - SDTCisVT<1, untyped>, - SDTCisVT<2, i64>]>; + SDTCisInt<1>, + SDTCisInt<2>]>; def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, @@ -185,14 +181,11 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, [SDNPInGlue]>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; -def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; -def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; -def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; -def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; -def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; - -def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, - [SDNPHasChain, SDNPMayStore]>; +def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; +def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; +def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>; +def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>; + def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone, [SDNPHasChain, SDNPSideEffect]>; @@ -557,6 +550,12 @@ def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), (fma node:$src2, node:$src3, (fneg node:$src1))>; +// Negative fused multiply-add and multiply-subtract. +def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (fma node:$src1, node:$src2, node:$src3))>; +def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (fms node:$src1, node:$src2, node:$src3))>; + // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; @@ -632,6 +631,19 @@ def z_vllezf64 : PatFrag<(ops node:$addr), (scalar_to_vector (f64 (load node:$addr))), (z_vzero))>; +// Similarly for the high element of a zeroed vector. +def z_vllezli32 : z_vllez; +def z_vllezlf32 : PatFrag<(ops node:$addr), + (bitconvert + (z_merge_high + (v2i64 + (bitconvert + (z_merge_high + (v4f32 (scalar_to_vector + (f32 (load node:$addr)))), + (v4f32 (z_vzero))))), + (v2i64 (z_vzero))))>; + // Store one element of a vector. class z_vste : PatFrag<(ops node:$vec, node:$addr, node:$index), diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZPatterns.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZPatterns.td index 16a7ed784d709..152521fb66a8d 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZPatterns.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZPatterns.td @@ -167,3 +167,10 @@ class FPConversion suppress, bits<4> mode> : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), (insn tr2.op:$vec, suppress, mode)>; + +// Use INSN to perform mininum/maximum operation OPERATOR on type TR. +// FUNCTION is the type of minimum/maximum function to perform. +class FPMinMax function> + : Pat<(tr.vt (operator (tr.vt tr.op:$vec1), (tr.vt tr.op:$vec2))), + (insn tr.op:$vec1, tr.op:$vec2, function)>; diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZProcessors.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZProcessors.td index 1cdc0949ff4ae..0dca4582dc0d6 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZProcessors.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZProcessors.td @@ -33,3 +33,6 @@ def : ProcessorModel<"zEC12", ZEC12Model, Arch10SupportedFeatures.List>; def : ProcessorModel<"arch11", Z13Model, Arch11SupportedFeatures.List>; def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>; +def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>; +def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; + diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 6ef8000d6f431..d14a0fb0b0b2b 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" +#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.td index 47d2f75cc11ae..52ba1a584017a 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -260,10 +260,10 @@ defm VF128 : SystemZRegClass<"VF128", // All vector registers. defm VR128 : SystemZRegClass<"VR128", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, - (add (sequence "V%u", 0, 7), - (sequence "V%u", 16, 31), - (sequence "V%u", 8, 15))>; + [f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (add (sequence "V%u", 0, 7), + (sequence "V%u", 16, 31), + (sequence "V%u", 8, 15))>; // Attaches a ValueType to a register operand, to make the instruction // definitions easier. @@ -272,7 +272,8 @@ class TypedReg { RegisterOperand op = opin; } -def v32eb : TypedReg; +def v32f : TypedReg; +def v32sb : TypedReg; def v64g : TypedReg; def v64db : TypedReg; def v128b : TypedReg; @@ -280,8 +281,9 @@ def v128h : TypedReg; def v128f : TypedReg; def v128g : TypedReg; def v128q : TypedReg; -def v128eb : TypedReg; +def v128sb : TypedReg; def v128db : TypedReg; +def v128xb : TypedReg; def v128any : TypedReg; //===----------------------------------------------------------------------===// @@ -304,3 +306,13 @@ foreach I = 0-15 in { defm AR32 : SystemZRegClass<"AR32", [i32], 32, (add (sequence "A%u", 0, 15)), 0>; +// Control registers. +class CREG64 num, string n> : SystemZReg { + let HWEncoding = num; +} +foreach I = 0-15 in { + def C#I : CREG64, DwarfRegNum<[!add(I, 32)]>; +} +defm CR64 : SystemZRegClass<"CR64", [i64], 64, + (add (sequence "C%u", 0, 15)), 0>; + diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZSchedule.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZSchedule.td index 1ce0168f95e95..8dba89f70a422 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZSchedule.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZSchedule.td @@ -59,7 +59,7 @@ def FPU2 : SchedWrite; def DFU : SchedWrite; def DFU2 : SchedWrite; -// Vector sub units (z13) +// Vector sub units (z13 and later) def VecBF : SchedWrite; def VecBF2 : SchedWrite; def VecDF : SchedWrite; @@ -75,6 +75,7 @@ def VecXsPm : SchedWrite; def VBU : SchedWrite; +include "SystemZScheduleZ14.td" include "SystemZScheduleZ13.td" include "SystemZScheduleZEC12.td" include "SystemZScheduleZ196.td" diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ13.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ13.td index 612c3b6cf96e8..72543c1eaee2e 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -15,7 +15,7 @@ def Z13Model : SchedMachineModel { let UnsupportedFeatures = Arch11UnsupportedFeatures.List; - + let IssueWidth = 8; let MicroOpBufferSize = 60; // Issue queues let LoadLatency = 1; // Optimistic load latency. @@ -159,7 +159,7 @@ def : InstRW<[FXb], (instregex "CondReturn$")>; // Select instructions //===----------------------------------------------------------------------===// -// Select pseudo +// Select pseudo def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>; // CondStore pseudos @@ -226,7 +226,7 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>; def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>; -def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; @@ -282,7 +282,7 @@ def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; // Load multiple disjoint -def : InstRW<[FXb, Lat30, GroupAlone], (instregex "LMD$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; // Store multiple (estimated average of ceil(5/2) FXb ops) def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10, @@ -353,6 +353,9 @@ def : InstRW<[FXa], (instregex "ALGF(I|R)$")>; def : InstRW<[FXa], (instregex "ALGR(K)?$")>; def : InstRW<[FXa], (instregex "ALR(K)?$")>; def : InstRW<[FXa], (instregex "AR(K)?$")>; +def : InstRW<[FXa], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXa], (instregex "ALSIH(N)?$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>; // Logical addition with carry @@ -376,6 +379,8 @@ def : InstRW<[FXa], (instregex "SLGF(I|R)$")>; def : InstRW<[FXa], (instregex "SLGR(K)?$")>; def : InstRW<[FXa], (instregex "SLR(K)?$")>; def : InstRW<[FXa], (instregex "SR(K)?$")>; +def : InstRW<[FXa], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>; // Subtraction with borrow def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>; @@ -441,13 +446,13 @@ def : InstRW<[FXa, Lat6], (instregex "MS(R|FI)$")>; def : InstRW<[FXa, LSU, Lat12], (instregex "MSG$")>; def : InstRW<[FXa, Lat8], (instregex "MSGR$")>; def : InstRW<[FXa, Lat6], (instregex "MSGF(I|R)$")>; -def : InstRW<[FXa, LSU, Lat15, GroupAlone], (instregex "MLG$")>; -def : InstRW<[FXa, Lat9, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXa2, LSU, Lat15, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXa2, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXa, Lat5], (instregex "MGHI$")>; def : InstRW<[FXa, Lat5], (instregex "MHI$")>; def : InstRW<[FXa, LSU, Lat9], (instregex "MH(Y)?$")>; -def : InstRW<[FXa, Lat7, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXa2, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXa2, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder @@ -455,8 +460,8 @@ def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>; def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[LSU, FXa, Lat30, GroupAlone], (instregex "DSG(F)?$")>; +def : InstRW<[FXa2, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[LSU, FXa2, Lat30, GroupAlone], (instregex "DSG(F)?$")>; def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>; def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>; def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>; @@ -469,7 +474,8 @@ def : InstRW<[FXa], (instregex "SLL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRA(G|K)?$")>; def : InstRW<[FXa], (instregex "SLA(G|K)?$")>; -def : InstRW<[FXa, FXa, FXa, FXa, Lat8], (instregex "S(L|R)D(A|L)$")>; +def : InstRW<[FXa, FXa, FXa, FXa, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -506,6 +512,8 @@ def : InstRW<[FXb], (instregex "CLIH$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>; def : InstRW<[FXb], (instregex "CLR$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXb], (instregex "C(L)?HHR$")>; +def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>; // Compare halfword def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>; @@ -530,7 +538,7 @@ def : InstRW<[FXb], (instregex "TMLH(64)?$")>; def : InstRW<[FXb], (instregex "TMLL(64)?$")>; // Compare logical characters under mask -def : InstRW<[FXb, LSU, Lat5], (instregex "CLM(H|Y)?$")>; +def : InstRW<[FXb, LSU, Lat6], (instregex "CLM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Prefetch and execution hint @@ -566,7 +574,7 @@ def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone], (instregex "CDSG$")>; // Compare and swap and store -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CSST$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "CSST$")>; // Perform locked operation def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; @@ -582,36 +590,45 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; // Translate and convert //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[FXa, FXa, FXa, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXa, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; //===----------------------------------------------------------------------===// // Message-security assist //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; +def : InstRW<[FXa, Lat30], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; -def : InstRW<[FXb, VecDF, FXb, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[FXb, VecDF, VecDF, LSU, LSU, Lat30, GroupAlone], + (instregex "CVBG$")>; +def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXb, FXb, FXb, VecDF2, VecDF2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXb, VecDF, FXb, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "UNPK$")>; -def : InstRW<[FXb, VecDFX, LSU, LSU, Lat9, GroupAlone], +def : InstRW<[FXb, VecDFX, LSU, LSU, LSU, Lat9, GroupAlone], (instregex "(A|S|ZA)P$")>; -def : InstRW<[FXb, VecDFX2, LSU, LSU, Lat30, GroupAlone], +def : InstRW<[FXb, VecDFX2, VecDFX2, LSU, LSU, LSU, Lat30, GroupAlone], (instregex "(M|D)P$")>; -def : InstRW<[FXb, FXb, VecDFX2, LSU, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXb, VecDFX, VecDFX, LSU, LSU, Lat15, GroupAlone], (instregex "SRP$")>; def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>; -def : InstRW<[VecDFX, LSU, Lat4, GroupAlone], (instregex "TP$")>; +def : InstRW<[VecDFX, LSU, Lat4, BeginGroup], (instregex "TP$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; //===----------------------------------------------------------------------===// @@ -681,58 +698,29 @@ def : InstRW<[FXb], (instregex "PPA$")>; //===----------------------------------------------------------------------===// // Find leftmost one -def : InstRW<[FXa, Lat6, GroupAlone], (instregex "FLOGR$")>; +def : InstRW<[FXa, FXa, Lat6, GroupAlone], (instregex "FLOGR$")>; // Population count def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXa], (instregex "AEXT128_64$")>; -def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXa], (instregex "AEXT128$")>; +def : InstRW<[FXa], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>; -def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[FXa, Lat30], (instregex "SRSTU$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; // Various complex instructions -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; - -// Move with key -def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVCK$")>; - -// Monitor call -def : InstRW<[FXb], (instregex "MC$")>; - -// Extract CPU attribute -def : InstRW<[FXb, Lat30], (instregex "ECAG$")>; - -// Extract CPU Time -def : InstRW<[FXa, Lat5, LSU], (instregex "ECTG$")>; - -// Extract PSW -def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXa, Lat30], (instregex "CMPSC$")>; // Execute def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>; -// Program return -def : InstRW<[FXb, Lat30], (instregex "PR$")>; - -// Inline assembly -def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], - (instregex "STCK(F)?$")>; -def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], - (instregex "STCKE$")>; -def : InstRW<[FXa, LSU, Lat5], (instregex "STFLE$")>; -def : InstRW<[FXb, Lat30], (instregex "SVC$")>; - -// Store real address -def : InstRW<[FXb, LSU, Lat5], (instregex "STRAG$")>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// @@ -855,7 +843,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>; // Addition def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>; def : InstRW<[VecBF], (instregex "A(E|D)BR$")>; -def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXBR$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXBR$")>; // Subtraction def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>; @@ -870,9 +858,9 @@ def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>; def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>; // Multiply and add / subtract -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>; // Division @@ -881,7 +869,7 @@ def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>; def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>; // Divide to integer -def : InstRW<[VecFPd, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; +def : InstRW<[VecFPd, Lat30], (instregex "DI(E|D)BR$")>; //===----------------------------------------------------------------------===// // FP: Comparisons @@ -904,10 +892,246 @@ def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>; def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>; -def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; +def : InstRW<[FXa, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "LFAS$")>; def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>; + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>; +def : InstRW<[VecBF], (instregex "LEXR$")>; +def : InstRW<[VecDF2], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXb], (instregex "LDER$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[VecBF], (instregex "THD(E)?R$")>; +def : InstRW<[VecBF], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[VecBF], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>; +def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[VecBF], (instregex "FIER$")>; +def : InstRW<[VecBF], (instregex "FIDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MYR$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAYR$")>; + +// Division +def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>; +def : InstRW<[VecFPd], (instregex "D(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecBF, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[VecBF], (instregex "C(E|D)R$")>; +def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecDF], (instregex "LTDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[VecDF], (instregex "LDETR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[FXb, Lat30], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[VecDF], (instregex "FIDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecDF], (instregex "ADTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[VecDF], (instregex "SDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[VecDF], (instregex "QADTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "RRDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, VecDF, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "IEDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecDF], (instregex "(K|C)DTR$")>; +def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[VecDF], (instregex "CEDTR$")>; +def : InstRW<[VecDF], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + + // --------------------------------- Vector --------------------------------- // //===----------------------------------------------------------------------===// @@ -1061,32 +1285,43 @@ def : InstRW<[VecStr, Lat5], (instregex "VTM$")>; // Vector: Floating-point arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[VecBF2], (instregex "VCD(G|GB|LG|LGB)$")>; -def : InstRW<[VecBF], (instregex "WCD(GB|LGB)$")>; +// Conversion and rounding +def : InstRW<[VecBF2], (instregex "VCD(L)?G$")>; +def : InstRW<[VecBF2], (instregex "VCD(L)?GB$")>; +def : InstRW<[VecBF], (instregex "WCD(L)?GB$")>; def : InstRW<[VecBF2], (instregex "VC(L)?GD$")>; -def : InstRW<[VecBF2], (instregex "VFADB$")>; -def : InstRW<[VecBF], (instregex "WFADB$")>; -def : InstRW<[VecBF2], (instregex "VCGDB$")>; -def : InstRW<[VecBF], (instregex "WCGDB$")>; -def : InstRW<[VecBF2], (instregex "VF(I|M|A|S)$")>; -def : InstRW<[VecBF2], (instregex "VF(I|M|S)DB$")>; -def : InstRW<[VecBF], (instregex "WF(I|M|S)DB$")>; -def : InstRW<[VecBF2], (instregex "VCLGDB$")>; -def : InstRW<[VecBF], (instregex "WCLGDB$")>; -def : InstRW<[VecXsPm], (instregex "VFL(C|N|P)DB$")>; -def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)DB$")>; -def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>; -def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>; -def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>; -def : InstRW<[VecXsPm], (instregex "VFPSO$")>; -def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI(DB)?$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "WFTCIDB$")>; +def : InstRW<[VecBF2], (instregex "VC(L)?GDB$")>; +def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>; def : InstRW<[VecBF2], (instregex "VL(DE|ED)$")>; def : InstRW<[VecBF2], (instregex "VL(DE|ED)B$")>; def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; +def : InstRW<[VecBF2], (instregex "VFI$")>; +def : InstRW<[VecBF2], (instregex "VFIDB$")>; +def : InstRW<[VecBF], (instregex "WFIDB$")>; + +// Sign operations +def : InstRW<[VecXsPm], (instregex "VFPSO$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; + +// Test data class +def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; + +// Add / subtract +def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; +def : InstRW<[VecBF2], (instregex "VF(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[VecBF2], (instregex "VFM$")>; +def : InstRW<[VecBF2], (instregex "VFMDB$")>; +def : InstRW<[VecBF], (instregex "WFMDB$")>; +def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>; +def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>; -// divide / square root +// Divide / square root def : InstRW<[VecFPd], (instregex "VFD$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; def : InstRW<[VecFPd], (instregex "VFSQ$")>; @@ -1098,10 +1333,10 @@ def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)$")>; def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)DB$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm], (instregex "WFC(E|H|HE)DB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VFC(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WFC(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; //===----------------------------------------------------------------------===// @@ -1132,5 +1367,163 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>; def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>; def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXa, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXa, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXb, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXb, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXb, Lat30], (instregex "TB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXb, Lat30], (instregex "PR$")>; +def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXb, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30, EndGroup], (instregex "BAKR$")>; +def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[LSU, LSU, GroupAlone], (instregex "SPT$")>; +def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], + (instregex "STCK(F)?$")>; +def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], + (instregex "STCKE$")>; +def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[LSU, LSU, FXb, Lat5, BeginGroup], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXb, Lat30], (instregex "PTF$")>; +def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "SVC$")>; +def : InstRW<[FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[FXb, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXb], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXb, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "LPP$")>; +def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXb, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "L(P|S)CTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXb, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXb, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXb, Lat30], (instregex "SAL$")>; + } diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ14.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ14.td new file mode 100644 index 0000000000000..698eb5627d19b --- /dev/null +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -0,0 +1,1611 @@ +//-- SystemZScheduleZ14.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z14 to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def Z14Model : SchedMachineModel { + + let UnsupportedFeatures = Arch12UnsupportedFeatures.List; + + let IssueWidth = 8; + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z14Model in { + +// These definitions could be put in a subtarget common include file, +// but it seems the include system in Tablegen currently rejects +// multiple includes of same file. +def : WriteRes { + let NumMicroOps = 0; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 0; + let BeginGroup = 1; +} +def : WriteRes { + let NumMicroOps = 0; + let EndGroup = 1; +} +def : WriteRes { let Latency = 2; let NumMicroOps = 0;} +def : WriteRes { let Latency = 3; let NumMicroOps = 0;} +def : WriteRes { let Latency = 4; let NumMicroOps = 0;} +def : WriteRes { let Latency = 5; let NumMicroOps = 0;} +def : WriteRes { let Latency = 6; let NumMicroOps = 0;} +def : WriteRes { let Latency = 7; let NumMicroOps = 0;} +def : WriteRes { let Latency = 8; let NumMicroOps = 0;} +def : WriteRes { let Latency = 9; let NumMicroOps = 0;} +def : WriteRes { let Latency = 10; let NumMicroOps = 0;} +def : WriteRes { let Latency = 11; let NumMicroOps = 0;} +def : WriteRes { let Latency = 12; let NumMicroOps = 0;} +def : WriteRes { let Latency = 15; let NumMicroOps = 0;} +def : WriteRes { let Latency = 20; let NumMicroOps = 0;} +def : WriteRes { let Latency = 30; let NumMicroOps = 0;} + +// Execution units. +def Z14_FXaUnit : ProcResource<2>; +def Z14_FXbUnit : ProcResource<2>; +def Z14_LSUnit : ProcResource<2>; +def Z14_VecUnit : ProcResource<2>; +def Z14_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z14_VBUnit : ProcResource<2>; + +// Subtarget specific definitions of scheduling resources. +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 9; } +def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 9; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes + { let Latency = 30; } +def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 3; } +def : WriteRes; // Virtual Branching Unit + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[VBU], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[VBU], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[FXa, FXa, FXb, FXb, Lat4, GroupAlone], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[FXb], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[FXb, FXb, Lat2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[VBU], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[FXb], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[VBU, FXa, FXa, Lat3, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BRASL$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BAS(R)?$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[FXb, EndGroup], (instregex "Return$")>; +def : InstRW<[FXb], (instregex "CondReturn$")>; + +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + +// Select pseudo +def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>; + +// CondStore pseudos +def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[FXb, LSU, Lat5], (instregex "MV(G|H)?HI$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[FXb, LSU, LSU, LSU, Lat8, GroupAlone], (instregex "MVC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>; + +// Pseudo -> reg move +def : InstRW<[FXa], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[FXa], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[FXa], (instregex "INSERT_SUBREG$")>; +def : InstRW<[FXa], (instregex "REG_SEQUENCE$")>; +def : InstRW<[FXa], (instregex "SUBREG_TO_REG$")>; + +// Loads +def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux|CBB)?$")>; +def : InstRW<[LSU], (instregex "LG(RL)?$")>; +def : InstRW<[LSU], (instregex "L128$")>; + +def : InstRW<[FXa], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[FXa], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[FXa], (instregex "LG(F|H)I$")>; +def : InstRW<[FXa], (instregex "LHI(Mux)?$")>; +def : InstRW<[FXa], (instregex "LR(Mux)?$")>; + +// Load and zero rightmost byte +def : InstRW<[LSU], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[FXb, LSU, Lat5], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[FXa, LSU, Lat5], (instregex "LT(G)?$")>; +def : InstRW<[FXa], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[FXb, LSU, Lat5], (instregex "STG(RL)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "ST128$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "L(B|H|G)R$")>; +def : InstRW<[FXa], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[FXa, LSU, Lat5], (instregex "LTGF$")>; +def : InstRW<[FXa], (instregex "LTGFR$")>; + +def : InstRW<[FXa, LSU, Lat5], (instregex "LB(H|Mux)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LH(Y)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LG(B|H|F)$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LLCR(Mux)?$")>; +def : InstRW<[FXa], (instregex "LLHR(Mux)?$")>; +def : InstRW<[FXa], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSU], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSU], (instregex "LLH(Mux)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LL(C|H)H$")>; +def : InstRW<[LSU], (instregex "LLHRL$")>; +def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSU], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[FXb, LSU, Lat5], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], + (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; + +// Store multiple (estimated average of ceil(5/2) FXb ops) +def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10, + GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LRV(G)?R$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LRV(G|H)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STRV(G|H)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[FXa], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LP(G)?R$")>; +def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "L(N|P)GFR$")>; +def : InstRW<[FXa], (instregex "LN(R|GR)$")>; +def : InstRW<[FXa], (instregex "LC(R|GR)$")>; +def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "IC(Y)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "IC32(Y)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "ICM(H|Y)?$")>; +def : InstRW<[FXa], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[FXa], (instregex "IIHF(64)?$")>; +def : InstRW<[FXa], (instregex "IIHH(64)?$")>; +def : InstRW<[FXa], (instregex "IIHL(64)?$")>; +def : InstRW<[FXa], (instregex "IILF(64)?$")>; +def : InstRW<[FXa], (instregex "IILH(64)?$")>; +def : InstRW<[FXa], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "A(Y)?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "AH(Y)?$")>; +def : InstRW<[FXa], (instregex "AIH$")>; +def : InstRW<[FXa], (instregex "AFI(Mux)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "AG$")>; +def : InstRW<[FXa], (instregex "AGFI$")>; +def : InstRW<[FXa], (instregex "AGHI(K)?$")>; +def : InstRW<[FXa], (instregex "AGR(K)?$")>; +def : InstRW<[FXa], (instregex "AHI(K)?$")>; +def : InstRW<[FXa], (instregex "AHIMux(K)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "AL(Y)?$")>; +def : InstRW<[FXa], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "ALG(F)?$")>; +def : InstRW<[FXa], (instregex "ALGHSIK$")>; +def : InstRW<[FXa], (instregex "ALGF(I|R)$")>; +def : InstRW<[FXa], (instregex "ALGR(K)?$")>; +def : InstRW<[FXa], (instregex "ALR(K)?$")>; +def : InstRW<[FXa], (instregex "AR(K)?$")>; +def : InstRW<[FXa], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXa], (instregex "ALSIH(N)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "ALC(G)?$")>; +def : InstRW<[FXa, Lat2, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[FXa, LSU, Lat6], (instregex "AG(F|H)$")>; +def : InstRW<[FXa, Lat2], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "S(G|Y)?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "SH(Y)?$")>; +def : InstRW<[FXa], (instregex "SGR(K)?$")>; +def : InstRW<[FXa], (instregex "SLFI$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[FXa], (instregex "SLGF(I|R)$")>; +def : InstRW<[FXa], (instregex "SLGR(K)?$")>; +def : InstRW<[FXa], (instregex "SLR(K)?$")>; +def : InstRW<[FXa], (instregex "SR(K)?$")>; +def : InstRW<[FXa], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>; +def : InstRW<[FXa, Lat2, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[FXa, LSU, Lat6], (instregex "SG(F|H)$")>; +def : InstRW<[FXa, Lat2], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "N(G|Y)?$")>; +def : InstRW<[FXa], (instregex "NGR(K)?$")>; +def : InstRW<[FXa], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "NI(Y)?$")>; +def : InstRW<[FXa], (instregex "NIHF(64)?$")>; +def : InstRW<[FXa], (instregex "NIHH(64)?$")>; +def : InstRW<[FXa], (instregex "NIHL(64)?$")>; +def : InstRW<[FXa], (instregex "NILF(64)?$")>; +def : InstRW<[FXa], (instregex "NILH(64)?$")>; +def : InstRW<[FXa], (instregex "NILL(64)?$")>; +def : InstRW<[FXa], (instregex "NR(K)?$")>; +def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "O(G|Y)?$")>; +def : InstRW<[FXa], (instregex "OGR(K)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "OI(Y)?$")>; +def : InstRW<[FXa], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[FXa], (instregex "OIHF(64)?$")>; +def : InstRW<[FXa], (instregex "OIHH(64)?$")>; +def : InstRW<[FXa], (instregex "OIHL(64)?$")>; +def : InstRW<[FXa], (instregex "OILF(64)?$")>; +def : InstRW<[FXa], (instregex "OILH(64)?$")>; +def : InstRW<[FXa], (instregex "OILL(64)?$")>; +def : InstRW<[FXa], (instregex "OR(K)?$")>; +def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "X(G|Y)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "XI(Y)?$")>; +def : InstRW<[FXa], (instregex "XIFMux$")>; +def : InstRW<[FXa], (instregex "XGR(K)?$")>; +def : InstRW<[FXa], (instregex "XIHF(64)?$")>; +def : InstRW<[FXa], (instregex "XILF(64)?$")>; +def : InstRW<[FXa], (instregex "XR(K)?$")>; +def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat9], (instregex "MS(GF|Y)?$")>; +def : InstRW<[FXa, Lat5], (instregex "MS(R|FI)$")>; +def : InstRW<[FXa, LSU, Lat11], (instregex "MSG$")>; +def : InstRW<[FXa, Lat7], (instregex "MSGR$")>; +def : InstRW<[FXa, Lat5], (instregex "MSGF(I|R)$")>; +def : InstRW<[FXa2, LSU, Lat12, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXa2, Lat8, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXa, Lat4], (instregex "MGHI$")>; +def : InstRW<[FXa, Lat4], (instregex "MHI$")>; +def : InstRW<[FXa, LSU, Lat8], (instregex "MH(Y)?$")>; +def : InstRW<[FXa2, Lat6, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXa2, LSU, Lat10, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXa, LSU, Lat8], (instregex "MGH$")>; +def : InstRW<[FXa, FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>; +def : InstRW<[FXa, FXa, Lat8, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[FXa, LSU, Lat9], (instregex "MSC$")>; +def : InstRW<[FXa, LSU, Lat11], (instregex "MSGC$")>; +def : InstRW<[FXa, Lat5], (instregex "MSRKC$")>; +def : InstRW<[FXa, Lat7], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>; +def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>; +def : InstRW<[FXa2, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[LSU, FXa2, Lat30, GroupAlone], (instregex "DSG(F)?$")>; +def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>; +def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "SLL(G|K)?$")>; +def : InstRW<[FXa], (instregex "SRL(G|K)?$")>; +def : InstRW<[FXa], (instregex "SRA(G|K)?$")>; +def : InstRW<[FXa], (instregex "SLA(G|K)?$")>; +def : InstRW<[FXa, FXa, FXa, FXa, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[FXa], (instregex "RISBG(N|32)?$")>; +def : InstRW<[FXa], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[FXa], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[FXa], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>; +def : InstRW<[FXb], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[FXb], (instregex "CG(F|H)I$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CG(HSI|RL)$")>; +def : InstRW<[FXb], (instregex "C(G)?R$")>; +def : InstRW<[FXb], (instregex "CIH$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CH(F|SI)$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>; +def : InstRW<[FXb], (instregex "CLFI(Mux)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLGF(RL)?$")>; +def : InstRW<[FXb], (instregex "CLGF(I|R)$")>; +def : InstRW<[FXb], (instregex "CLGR$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLGRL$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>; +def : InstRW<[FXb], (instregex "CLIH$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>; +def : InstRW<[FXb], (instregex "CLR$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXb], (instregex "C(L)?HHR$")>; +def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>; +def : InstRW<[FXb, LSU, Lat6], (instregex "CGH(RL)?$")>; +def : InstRW<[FXa, FXb, LSU, Lat6, BeginGroup], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[FXb, LSU, Lat6], (instregex "CGF(RL)?$")>; +def : InstRW<[FXb, Lat2], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "CLC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[FXb, LSU, Lat5], (instregex "TM(Y)?$")>; +def : InstRW<[FXb], (instregex "TM(H|L)Mux$")>; +def : InstRW<[FXb], (instregex "TMHH(64)?$")>; +def : InstRW<[FXb], (instregex "TMHL(64)?$")>; +def : InstRW<[FXb], (instregex "TMLH(64)?$")>; +def : InstRW<[FXb], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[FXb, LSU, Lat6], (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU], (instregex "PFD(RL)?$")>; +def : InstRW<[FXb, Lat2], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[FXb], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[FXb, LSU, Lat5], (instregex "LAA(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAAL(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAN(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAO(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[FXb, LSU, Lat5, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[FXa, FXb, LSU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[FXa, FXa, FXb, FXb, FXa, LSU, Lat10, GroupAlone], + (instregex "CDS(Y)?$")>; +def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone], + (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[FXa, LSU, Lat30], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[FXb, FXb, LSU, Lat6, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[FXa, FXa, FXa, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXa, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, Lat30], (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(KIMD|KLMD|KMAC)$")>; +def : InstRW<[FXa, Lat30], (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU], (instregex "LGG$")>; +def : InstRW<[LSU, Lat5], (instregex "LLGFSG$")>; +def : InstRW<[LSU, Lat30], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, VecDF, VecDF, LSU, LSU, Lat30, GroupAlone], + (instregex "CVBG$")>; +def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXb, FXb, FXb, VecDF2, VecDF2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXb, VecDF, FXb, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "UNPK$")>; + +def : InstRW<[FXb, VecDFX, LSU, LSU, LSU, Lat9, GroupAlone], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[FXb, VecDFX2, VecDFX2, LSU, LSU, LSU, Lat30, GroupAlone], + (instregex "(M|D)P$")>; +def : InstRW<[FXb, VecDFX, VecDFX, LSU, LSU, Lat15, GroupAlone], + (instregex "SRP$")>; +def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>; +def : InstRW<[VecDFX, LSU, Lat4, BeginGroup], (instregex "TP$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[LSU, FXa, Lat5, BeginGroup], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[FXa, Lat3, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[FXa, FXa, FXb, Lat5, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[FXb], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[FXb, Lat2, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[FXa, FXb, Lat2, GroupAlone], (instregex "BSM$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat15, GroupAlone], + (instregex "TBEGIN(C|_nofloat)?$")>; + +// Transaction end +def : InstRW<[FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[FXa], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[FXa, FXa, Lat4, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>; + +// Extend +def : InstRW<[FXa], (instregex "AEXT128$")>; +def : InstRW<[FXa], (instregex "ZEXT128$")>; + +// String instructions +def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>; +def : InstRW<[FXa, Lat30], (instregex "SRSTU$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXa, Lat30], (instregex "CMPSC$")>; + +// Execute +def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Select instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "Select(F32|F64|F128|VR128)$")>; +def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[FXb], (instregex "LZ(DR|ER)$")>; +def : InstRW<[FXb, FXb, Lat2, BeginGroup], (instregex "LZXR$")>; + +// Load +def : InstRW<[VecXsPm], (instregex "LER$")>; +def : InstRW<[FXb], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[FXb, Lat3], (instregex "LGDR$")>; +def : InstRW<[FXb, FXb, Lat2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)BR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "LTEBRCompare(_VecPseudo)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "LTDBRCompare(_VecPseudo)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXBR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], + (instregex "LTXBRCompare(_VecPseudo)?$")>; + +// Copy sign +def : InstRW<[VecXsPm], (instregex "CPSDRd(d|s)$")>; +def : InstRW<[VecXsPm], (instregex "CPSDRs(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm, LSU, Lat7], (instregex "LE(Y)?$")>; +def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSU], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat7], (instregex "STD(Y)?$")>; +def : InstRW<[FXb, LSU, Lat7], (instregex "STE(Y)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecBF], (instregex "LEDBR(A)?$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LEXBR(A)?$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXBR(A)?$")>; + +// Load lengthened +def : InstRW<[VecBF, LSU, Lat12], (instregex "LDEB$")>; +def : InstRW<[VecBF], (instregex "LDEBR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12 , GroupAlone], (instregex "LX(D|E)B$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)BR$")>; + +// Convert from fixed / logical +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)BR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)BR(A)?$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CEL(F|G)BR$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CDL(F|G)BR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)BR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)BR(A)?$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat11, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLFDBR$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLG(E|D)BR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DBR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)EBR$")>; +def : InstRW<[FXb], (instregex "LCDFR(_32)?$")>; +def : InstRW<[FXb], (instregex "LNDFR(_32)?$")>; +def : InstRW<[FXb], (instregex "LPDFR(_32)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)B$")>; +def : InstRW<[VecFPd], (instregex "SQ(E|D)BR$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[VecBF], (instregex "FIEBR(A)?$")>; +def : InstRW<[VecBF], (instregex "FIDBR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>; +def : InstRW<[VecBF], (instregex "A(E|D)BR$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>; +def : InstRW<[VecBF], (instregex "S(E|D)BR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|EE)B$")>; +def : InstRW<[VecBF], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXDB$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[VecFPd, LSU], (instregex "D(E|D)B$")>; +def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[VecFPd, Lat30], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecXsPm, LSU, Lat8], (instregex "(K|C)(E|D)B$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(K|C)(E|D)BR?$")>; +def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[LSU, VecXsPm, Lat9], (instregex "TC(E|D)B$")>; +def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[FXa, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "LFAS$")>; +def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>; +def : InstRW<[VecBF], (instregex "LEXR$")>; +def : InstRW<[VecDF2], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXb], (instregex "LDER$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[VecBF], (instregex "THD(E)?R$")>; +def : InstRW<[VecBF], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[VecBF], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>; +def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[VecBF], (instregex "FIER$")>; +def : InstRW<[VecBF], (instregex "FIDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MYR$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAYR$")>; + +// Division +def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>; +def : InstRW<[VecFPd], (instregex "D(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecBF, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[VecBF], (instregex "C(E|D)R$")>; +def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecDF], (instregex "LTDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[VecDF], (instregex "LDETR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[FXb, Lat30], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[VecDF], (instregex "FIDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecDF], (instregex "ADTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[VecDF], (instregex "SDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[VecDF], (instregex "QADTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "RRDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, VecDF, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "IEDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecDF], (instregex "(K|C)DTR$")>; +def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[VecDF], (instregex "CEDTR$")>; +def : InstRW<[VecDF], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "VLR(32|64)?$")>; +def : InstRW<[FXb, Lat4], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[FXb], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[FXb, Lat2], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VZERO$")>; +def : InstRW<[VecXsPm], (instregex "VONE$")>; +def : InstRW<[VecXsPm], (instregex "VGBM$")>; +def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU], (instregex "VL(L|BB)?$")>; +def : InstRW<[LSU], (instregex "VL(32|64)$")>; +def : InstRW<[LSU], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSU], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, LSU, Lat7], (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VGE(F|G)$")>; +def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], + (instregex "VLM$")>; +def : InstRW<[LSU, Lat5], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat8], (instregex "VST(L|32|64)?$")>; +def : InstRW<[FXb, LSU, Lat8], (instregex "VSTE(F|G)$")>; +def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VSTE(B|H)$")>; +def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat20, GroupAlone], + (instregex "VSTM$")>; +def : InstRW<[FXb, FXb, LSU, Lat12, BeginGroup], (instregex "VSCE(F|G)$")>; +def : InstRW<[FXb, LSU, Lat8], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VPERM$")>; +def : InstRW<[VecXsPm], (instregex "VPDI$")>; +def : InstRW<[VecXsPm], (instregex "VBPERM$")>; +def : InstRW<[VecXsPm], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPL(B|F)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[VecXsPm], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[VecXsPm], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[VecXsPm], (instregex "VO(C)?$")>; +def : InstRW<[VecMul], (instregex "VCKSM$")>; +def : InstRW<[VecXsPm], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VX$")>; +def : InstRW<[VecMul], (instregex "VGFM?$")>; +def : InstRW<[VecMul], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[VecMul], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[VecXsPm], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAL(B|F)?$")>; +def : InstRW<[VecMul], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[VecMul], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VME(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VML(B|F)?$")>; +def : InstRW<[VecMul], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[VecMul], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[VecBF2], (instregex "VMSL(G)?$")>; + +def : InstRW<[VecXsPm], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[VecXsPm], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>; +def : InstRW<[VecXsPm], (instregex "VSLB$")>; +def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>; +def : InstRW<[VecXsPm], (instregex "VSR(A|L)B$")>; + +def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[VecXsPm], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[VecMul], (instregex "VSUM(B|H)?$")>; +def : InstRW<[VecMul], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[VecMul], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm, Lat4], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[VecStr, Lat5], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[VecBF], (instregex "VCD(L)?G$")>; +def : InstRW<[VecBF], (instregex "VCD(L)?GB$")>; +def : InstRW<[VecBF], (instregex "WCD(L)?GB$")>; +def : InstRW<[VecBF], (instregex "VC(L)?GD$")>; +def : InstRW<[VecBF], (instregex "VC(L)?GDB$")>; +def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>; +def : InstRW<[VecBF], (instregex "VL(DE|ED)$")>; +def : InstRW<[VecBF], (instregex "VL(DE|ED)B$")>; +def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; +def : InstRW<[VecBF], (instregex "VFL(L|R)$")>; +def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>; +def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>; +def : InstRW<[VecBF2], (instregex "WFLLD$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFLRX$")>; +def : InstRW<[VecBF2], (instregex "VFI$")>; +def : InstRW<[VecBF], (instregex "VFIDB$")>; +def : InstRW<[VecBF], (instregex "WFIDB$")>; +def : InstRW<[VecBF2], (instregex "VFISB$")>; +def : InstRW<[VecBF], (instregex "WFISB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[VecXsPm], (instregex "VFPSO$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[VecXsPm], (instregex "WFPSOXB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; +def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; +def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[VecBF2], (instregex "VFM$")>; +def : InstRW<[VecBF], (instregex "VFMDB$")>; +def : InstRW<[VecBF], (instregex "WFMDB$")>; +def : InstRW<[VecBF2], (instregex "VFMSB$")>; +def : InstRW<[VecBF], (instregex "WFMSB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WFMXB$")>; +def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[VecFPd], (instregex "VFD$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>; +def : InstRW<[VecFPd], (instregex "WFDXB$")>; +def : InstRW<[VecFPd], (instregex "VFSQ$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>; +def : InstRW<[VecFPd], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>; +def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(C|K)(E|H|HE)XB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)(E|H|HE)XBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "LEFR$")>; +def : InstRW<[FXb, Lat4], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecStr], (instregex "VFAE(B)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFAEBS$")>; +def : InstRW<[VecStr], (instregex "VFAE(F|H)$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFAE(F|H)S$")>; +def : InstRW<[VecStr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[VecStr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[VecStr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[VecStr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[VecStr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecDF, VecDF, Lat10], (instregex "VLIP$")>; +def : InstRW<[VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>; +def : InstRW<[VecDFX, FXb, LSU, Lat12, BeginGroup], (instregex "VUPKZ$")>; +def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVB(G)?$")>; +def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVD(G)?$")>; +def : InstRW<[VecDFX], (instregex "V(A|S)P$")>; +def : InstRW<[VecDF, VecDF, Lat30, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[VecDF, VecDF, Lat30, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[VecDFX, Lat30, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[VecDF, VecDF, Lat11], (instregex "VSRP$")>; +def : InstRW<[VecDFX], (instregex "VPSOP$")>; +def : InstRW<[VecDFX], (instregex "V(T|C)P$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXa, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXa, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXb, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXb, Lat30], (instregex "IRBM$")>; +def : InstRW<[FXb, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXb, Lat30], (instregex "TB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXb, Lat30], (instregex "PR$")>; +def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXb, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30, EndGroup], (instregex "BAKR$")>; +def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[LSU, LSU, GroupAlone], (instregex "SPT$")>; +def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], + (instregex "STCK(F)?$")>; +def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], + (instregex "STCKE$")>; +def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[LSU, LSU, FXb, Lat5, BeginGroup], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXb, Lat30], (instregex "PTF$")>; +def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "SVC$")>; +def : InstRW<[FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[FXb, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXb], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXb, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "LPP$")>; +def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXb, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "L(P|S)CTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXb, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXb, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXb, Lat30], (instregex "SAL$")>; + +} + diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ196.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ196.td index 670df8ff55413..4d986e8391cf5 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -310,6 +310,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>; def : InstRW<[FXU], (instregex "ALGR(K)?$")>; def : InstRW<[FXU], (instregex "ALR(K)?$")>; def : InstRW<[FXU], (instregex "AR(K)?$")>; +def : InstRW<[FXU], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXU], (instregex "ALSIH(N)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>; // Logical addition with carry @@ -333,6 +336,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>; def : InstRW<[FXU], (instregex "SLGR(K)?$")>; def : InstRW<[FXU], (instregex "SLR(K)?$")>; def : InstRW<[FXU], (instregex "SR(K)?$")>; +def : InstRW<[FXU], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "S(L)?HHLR$")>; // Subtraction with borrow def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>; @@ -398,13 +403,13 @@ def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>; def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>; def : InstRW<[FXU, Lat8], (instregex "MSGR$")>; def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>; -def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; -def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXU, FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXU, FXU, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXU, Lat5], (instregex "MGHI$")>; def : InstRW<[FXU, Lat5], (instregex "MHI$")>; def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>; -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXU, FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder @@ -431,7 +436,8 @@ def : InstRW<[FXU], (instregex "SLL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRA(G|K)?$")>; def : InstRW<[FXU, Lat2], (instregex "SLA(G|K)?$")>; -def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>; +def : InstRW<[FXU, FXU, FXU, FXU, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -468,6 +474,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>; def : InstRW<[FXU], (instregex "CLR$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXU], (instregex "C(L)?HHR$")>; +def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "C(L)?HLR$")>; // Compare halfword def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>; @@ -492,7 +500,7 @@ def : InstRW<[FXU], (instregex "TMLH(64)?$")>; def : InstRW<[FXU], (instregex "TMLL(64)?$")>; // Compare logical characters under mask -def : InstRW<[FXU, LSU, Lat5], (instregex "CLM(H|Y)?$")>; +def : InstRW<[FXU, FXU, LSU, Lat5, GroupAlone], (instregex "CLM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Prefetch @@ -525,7 +533,7 @@ def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone], (instregex "CDSG$")>; // Compare and swap and store -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSST$")>; // Perform locked operation def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; @@ -541,36 +549,44 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; // Translate and convert //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXU, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; //===----------------------------------------------------------------------===// // Message-security assist //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; +def : InstRW<[FXU, Lat30], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; -def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[FXU, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "CVBG$")>; +def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXU, FXU, FXU, DFU2, DFU2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXU, FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat15, GroupAlone], (instregex "(A|S|ZA)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "(M|D)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, FXU, DFU2, DFU2, LSU, LSU, LSU, Lat15, GroupAlone], (instregex "SRP$")>; -def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; -def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>; +def : InstRW<[DFU2, DFU2, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; +def : InstRW<[DFU2, LSU, LSU, GroupAlone], (instregex "TP$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; //===----------------------------------------------------------------------===// @@ -614,57 +630,29 @@ def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "BASSM$")>; //===----------------------------------------------------------------------===// // Find leftmost one -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; // Population count def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXU], (instregex "AEXT128_64$")>; -def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXU], (instregex "AEXT128$")>; +def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; -def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[FXU, Lat30], (instregex "SRSTU$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; // Various complex instructions -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; - -// Move with key -def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>; - -// Monitor call -def : InstRW<[FXU], (instregex "MC$")>; - -// Extract CPU attribute -def : InstRW<[FXU, Lat30], (instregex "ECAG$")>; - -// Extract CPU Time -def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>; - -// Extract PSW -def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXU, Lat30], (instregex "CMPSC$")>; // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; -// Program return -def : InstRW<[FXU, Lat30], (instregex "PR$")>; - -// Inline assembly -def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>; -def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>; -def : InstRW<[LSU, FXU, Lat5], (instregex "STCKE$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>; -def : InstRW<[FXU, Lat30], (instregex "SVC$")>; - -// Store real address -def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// @@ -801,9 +789,9 @@ def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>; // Multiply and add / subtract -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>; // Division @@ -812,7 +800,7 @@ def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>; // Divide to integer -def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; +def : InstRW<[FPU, Lat30], (instregex "DI(E|D)BR$")>; //===----------------------------------------------------------------------===// // FP: Comparisons @@ -834,10 +822,396 @@ def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>; def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>; -def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; +def : InstRW<[FXU, LSU, Lat3, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[FXU, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LFAS$")>; def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>; + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[FPU], (instregex "LT(D|E)R$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>; +def : InstRW<[FPU], (instregex "LEXR$")>; +def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXU], (instregex "LDER$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>; +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>; +def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>; +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>; +def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[FPU], (instregex "THD(E)?R$")>; +def : InstRW<[FPU], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>; +def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[FPU], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[FPU], (instregex "FIER$")>; +def : InstRW<[FPU], (instregex "FIDR$")>; +def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY$")>; +def : InstRW<[FPU, FPU, LSU, Lat15, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MYR$")>; +def : InstRW<[FPU, Lat10, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[FPU2, FPU2, LSU, GroupAlone], (instregex "MAY$")>; +def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[FPU, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[FPU], (instregex "C(E|D)R$")>; +def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>; +def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[DFU, Lat20], (instregex "LDETR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXU, DFU, Lat9, GroupAlone], (instregex "CDFTR$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CDGTR(A)?$")>; +def : InstRW<[FXU, DFU2, DFU2, GroupAlone], (instregex "CXFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CXGTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXLFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat6, GroupAlone], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CFDTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CGDTR(A)?$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CFXTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CGXTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>; + +// Perform floating-point operation +def : InstRW<[FXU, Lat30], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>; +def : InstRW<[FXU, DFU2, Lat15, GroupAlone], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[DFU, Lat30], (instregex "QADTR$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "RRDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, DFU, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "IEDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>; +def : InstRW<[DFU, DFU, Lat15], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>; +def : InstRW<[DFU2, Lat9], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, DFU2, Lat15], (instregex "TD(C|G)XT$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXU, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXU, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[FXU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], + (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXU, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXU, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXU, Lat30], (instregex "TB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCSK$")>; +def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVCDK$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXU, Lat30], (instregex "PR$")>; +def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXU, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30, EndGroup], (instregex "BAKR$")>; +def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>; +def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>; +def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STCKE$")>; +def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXU, Lat30], (instregex "PTF$")>; +def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "SVC$")>; +def : InstRW<[FXU, GroupAlone], (instregex "MC$")>; +def : InstRW<[FXU, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXU, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU], (instregex "LPP$")>; +def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXU, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "L(P|S)CTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXU, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXU, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXU, Lat30], (instregex "SAL$")>; + } diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZEC12.td b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZEC12.td index 1bdb8779dc72b..a0f2115eb9d72 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -69,7 +69,7 @@ def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 8; } def : WriteRes { let Latency = 9; } def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } def : WriteRes; // Virtual Branching Unit // -------------------------- INSTRUCTIONS ---------------------------------- // @@ -251,7 +251,7 @@ def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; // Load multiple disjoint -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "LMD$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; // Store multiple (estimated average of 3 ops) def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone], @@ -320,6 +320,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>; def : InstRW<[FXU], (instregex "ALGR(K)?$")>; def : InstRW<[FXU], (instregex "ALR(K)?$")>; def : InstRW<[FXU], (instregex "AR(K)?$")>; +def : InstRW<[FXU], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXU, Lat2], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXU], (instregex "ALSIH(N)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>; // Logical addition with carry @@ -343,6 +346,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>; def : InstRW<[FXU], (instregex "SLGR(K)?$")>; def : InstRW<[FXU], (instregex "SLR(K)?$")>; def : InstRW<[FXU], (instregex "SR(K)?$")>; +def : InstRW<[FXU], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXU, Lat2], (instregex "S(L)?HHLR$")>; // Subtraction with borrow def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>; @@ -408,13 +413,13 @@ def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>; def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>; def : InstRW<[FXU, Lat8], (instregex "MSGR$")>; def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>; -def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; -def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXU, FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXU, FXU, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXU, Lat5], (instregex "MGHI$")>; def : InstRW<[FXU, Lat5], (instregex "MHI$")>; def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>; -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXU, FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder @@ -441,7 +446,8 @@ def : InstRW<[FXU], (instregex "SLL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRA(G|K)?$")>; def : InstRW<[FXU], (instregex "SLA(G|K)?$")>; -def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>; +def : InstRW<[FXU, FXU, FXU, FXU, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -478,6 +484,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>; def : InstRW<[FXU], (instregex "CLR$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXU], (instregex "C(L)?HHR$")>; +def : InstRW<[FXU, Lat2], (instregex "C(L)?HLR$")>; // Compare halfword def : InstRW<[FXU, LSU, Lat6], (instregex "CH(Y|RL)?$")>; @@ -537,7 +545,7 @@ def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone], (instregex "CDSG$")>; // Compare and swap and store -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSST$")>; // Perform locked operation def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; @@ -553,36 +561,44 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; // Translate and convert //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[FXU, FXU, FXU, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXU, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; //===----------------------------------------------------------------------===// // Message-security assist //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; +def : InstRW<[FXU, Lat30], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; -def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[FXU, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "CVBG$")>; +def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXU, FXU, FXU, DFU2, DFU2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXU, FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat15, GroupAlone], (instregex "(A|S|ZA)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "(M|D)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, FXU, DFU2, DFU2, LSU, LSU, LSU, Lat15, GroupAlone], (instregex "SRP$")>; -def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; -def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>; +def : InstRW<[DFU2, DFU2, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; +def : InstRW<[DFU2, LSU, LSU, Lat5, GroupAlone], (instregex "TP$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; //===----------------------------------------------------------------------===// @@ -652,57 +668,29 @@ def : InstRW<[FXU], (instregex "PPA$")>; //===----------------------------------------------------------------------===// // Find leftmost one -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; // Population count def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXU], (instregex "AEXT128_64$")>; -def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXU], (instregex "AEXT128$")>; +def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; -def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[FXU, Lat30], (instregex "SRSTU$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; // Various complex instructions -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; - -// Move with key -def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>; - -// Monitor call -def : InstRW<[FXU], (instregex "MC$")>; - -// Extract CPU attribute -def : InstRW<[FXU, Lat30], (instregex "ECAG$")>; - -// Extract CPU Time -def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>; - -// Extract PSW -def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXU, Lat30], (instregex "CMPSC$")>; // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; -// Program return -def : InstRW<[FXU, Lat30], (instregex "PR$")>; - -// Inline assembly -def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>; -def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone], - (instregex "STCKE$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>; -def : InstRW<[FXU, Lat30], (instregex "SVC$")>; - -// Store real address -def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// @@ -839,9 +827,9 @@ def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>; // Multiply and add / subtract -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>; // Division @@ -850,7 +838,7 @@ def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>; // Divide to integer -def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; +def : InstRW<[FPU, Lat30], (instregex "DI(E|D)BR$")>; //===----------------------------------------------------------------------===// // FP: Comparisons @@ -872,10 +860,403 @@ def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>; def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>; -def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; -def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>; +def : InstRW<[FXU, LSU, Lat3, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[FXU, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LFAS$")>; +def : InstRW<[FXU, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[FPU], (instregex "LT(D|E)R$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>; +def : InstRW<[FPU], (instregex "LEXR$")>; +def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXU], (instregex "LDER$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>; +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>; +def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>; +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>; +def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[FPU], (instregex "THD(E)?R$")>; +def : InstRW<[FPU], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>; +def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[FPU], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[FPU], (instregex "FIER$")>; +def : InstRW<[FPU], (instregex "FIDR$")>; +def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY$")>; +def : InstRW<[FPU, FPU, LSU, Lat15, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MYR$")>; +def : InstRW<[FPU, Lat10, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[FPU2, FPU2, LSU, GroupAlone], (instregex "MAY$")>; +def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[FPU, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[FPU], (instregex "C(E|D)R$")>; +def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>; +def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[DFU, Lat20], (instregex "LDETR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXU, DFU, Lat9, GroupAlone], (instregex "CDFTR$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CDGTR(A)?$")>; +def : InstRW<[FXU, DFU2, DFU2, GroupAlone], (instregex "CXFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CXGTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXLFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat6, GroupAlone], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CFDTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CGDTR(A)?$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CFXTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CGXTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[LSU, DFU2, Lat7, GroupAlone], (instregex "CDZT$")>; +def : InstRW<[LSU, LSU, DFU2, DFU2, Lat10, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[FXU, LSU, DFU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>; +def : InstRW<[FXU, LSU, DFU, DFU, Lat15, GroupAlone], (instregex "CZXT$")>; + +// Perform floating-point operation +def : InstRW<[FXU, Lat30], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>; +def : InstRW<[FXU, DFU2, Lat15, GroupAlone], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[DFU, Lat30], (instregex "QADTR$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "RRDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, DFU, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "IEDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>; +def : InstRW<[DFU, DFU, Lat15], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>; +def : InstRW<[DFU, DFU, Lat9], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, DFU2, Lat15], (instregex "TD(C|G)XT$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXU, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXU, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[FXU, LSU, LSU, LSU, LSU, Lat30, GroupAlone], + (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXU, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXU, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXU, Lat30], (instregex "TB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[LSU, Lat6, Lat30, GroupAlone], (instregex "MVCSK$")>; +def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVCDK$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXU, Lat30], (instregex "PR$")>; +def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXU, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30, EndGroup], (instregex "BAKR$")>; +def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>; +def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>; +def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone], + (instregex "STCKE$")>; +def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXU, Lat30], (instregex "PTF$")>; +def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "SVC$")>; +def : InstRW<[FXU, GroupAlone], (instregex "MC$")>; +def : InstRW<[FXU, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXU], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXU, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU], (instregex "LPP$")>; +def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXU, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "L(P|S)CTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXU, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXU, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXU, Lat30], (instregex "SAL$")>; } diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZShortenInst.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZShortenInst.cpp index 263aff8b7bfb9..13ceb371a425e 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -200,14 +200,26 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); break; + case SystemZ::WFASB: + Changed |= shortenOn001AddCC(MI, SystemZ::AEBR); + break; + case SystemZ::WFDDB: Changed |= shortenOn001(MI, SystemZ::DDBR); break; + case SystemZ::WFDSB: + Changed |= shortenOn001(MI, SystemZ::DEBR); + break; + case SystemZ::WFIDB: Changed |= shortenFPConv(MI, SystemZ::FIDBRA); break; + case SystemZ::WFISB: + Changed |= shortenFPConv(MI, SystemZ::FIEBRA); + break; + case SystemZ::WLDEB: Changed |= shortenOn01(MI, SystemZ::LDEBR); break; @@ -220,30 +232,58 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn001(MI, SystemZ::MDBR); break; + case SystemZ::WFMSB: + Changed |= shortenOn001(MI, SystemZ::MEEBR); + break; + case SystemZ::WFLCDB: Changed |= shortenOn01(MI, SystemZ::LCDFR); break; + case SystemZ::WFLCSB: + Changed |= shortenOn01(MI, SystemZ::LCDFR_32); + break; + case SystemZ::WFLNDB: Changed |= shortenOn01(MI, SystemZ::LNDFR); break; + case SystemZ::WFLNSB: + Changed |= shortenOn01(MI, SystemZ::LNDFR_32); + break; + case SystemZ::WFLPDB: Changed |= shortenOn01(MI, SystemZ::LPDFR); break; + case SystemZ::WFLPSB: + Changed |= shortenOn01(MI, SystemZ::LPDFR_32); + break; + case SystemZ::WFSQDB: Changed |= shortenOn01(MI, SystemZ::SQDBR); break; + case SystemZ::WFSQSB: + Changed |= shortenOn01(MI, SystemZ::SQEBR); + break; + case SystemZ::WFSDB: Changed |= shortenOn001AddCC(MI, SystemZ::SDBR); break; + case SystemZ::WFSSB: + Changed |= shortenOn001AddCC(MI, SystemZ::SEBR); + break; + case SystemZ::WFCDB: Changed |= shortenOn01(MI, SystemZ::CDBR); break; + case SystemZ::WFCSB: + Changed |= shortenOn01(MI, SystemZ::CEBR); + break; + case SystemZ::VL32: // For z13 we prefer LDE over LE to avoid partial register dependencies. Changed |= shortenOn0(MI, SystemZ::LDE32); diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.cpp index 022679a7bc180..9cd09b0f911e0 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -37,13 +37,20 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, const TargetMachine &TM) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), - HasPopulationCount(false), HasMessageSecurityAssist4(false), + HasPopulationCount(false), HasMessageSecurityAssist3(false), + HasMessageSecurityAssist4(false), HasResetReferenceBitsMultiple(false), HasFastSerialization(false), HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), HasExecutionHint(false), HasLoadAndTrap(false), HasTransactionalExecution(false), HasProcessorAssist(false), + HasDFPZonedConversion(false), HasEnhancedDAT2(false), HasVector(false), HasLoadStoreOnCond2(false), HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false), + HasDFPPackedConversion(false), + HasMiscellaneousExtensions2(false), HasGuardedStorage(false), + HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false), + HasVectorEnhancements1(false), HasVectorPackedDecimal(false), + HasInsertReferenceBitsMultiple(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(), FrameLowering() {} diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.h b/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.h index 770dd7cd939ff..4829f73e080e2 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.h +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZSubtarget.h @@ -19,8 +19,8 @@ #include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -39,7 +39,9 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { bool HasHighWord; bool HasFPExtension; bool HasPopulationCount; + bool HasMessageSecurityAssist3; bool HasMessageSecurityAssist4; + bool HasResetReferenceBitsMultiple; bool HasFastSerialization; bool HasInterlockedAccess1; bool HasMiscellaneousExtensions; @@ -47,10 +49,20 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { bool HasLoadAndTrap; bool HasTransactionalExecution; bool HasProcessorAssist; + bool HasDFPZonedConversion; + bool HasEnhancedDAT2; bool HasVector; bool HasLoadStoreOnCond2; bool HasLoadAndZeroRightmostByte; bool HasMessageSecurityAssist5; + bool HasDFPPackedConversion; + bool HasMiscellaneousExtensions2; + bool HasGuardedStorage; + bool HasMessageSecurityAssist7; + bool HasMessageSecurityAssist8; + bool HasVectorEnhancements1; + bool HasVectorPackedDecimal; + bool HasInsertReferenceBitsMultiple; private: Triple TargetTriple; @@ -106,10 +118,19 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { // Return true if the target has the population-count facility. bool hasPopulationCount() const { return HasPopulationCount; } + // Return true if the target has the message-security-assist + // extension facility 3. + bool hasMessageSecurityAssist3() const { return HasMessageSecurityAssist3; } + // Return true if the target has the message-security-assist // extension facility 4. bool hasMessageSecurityAssist4() const { return HasMessageSecurityAssist4; } + // Return true if the target has the reset-reference-bits-multiple facility. + bool hasResetReferenceBitsMultiple() const { + return HasResetReferenceBitsMultiple; + } + // Return true if the target has the fast-serialization facility. bool hasFastSerialization() const { return HasFastSerialization; } @@ -133,6 +154,12 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { // Return true if the target has the processor-assist facility. bool hasProcessorAssist() const { return HasProcessorAssist; } + // Return true if the target has the DFP zoned-conversion facility. + bool hasDFPZonedConversion() const { return HasDFPZonedConversion; } + + // Return true if the target has the enhanced-DAT facility 2. + bool hasEnhancedDAT2() const { return HasEnhancedDAT2; } + // Return true if the target has the load-and-zero-rightmost-byte facility. bool hasLoadAndZeroRightmostByte() const { return HasLoadAndZeroRightmostByte; @@ -142,9 +169,39 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { // extension facility 5. bool hasMessageSecurityAssist5() const { return HasMessageSecurityAssist5; } + // Return true if the target has the DFP packed-conversion facility. + bool hasDFPPackedConversion() const { return HasDFPPackedConversion; } + // Return true if the target has the vector facility. bool hasVector() const { return HasVector; } + // Return true if the target has the miscellaneous-extensions facility 2. + bool hasMiscellaneousExtensions2() const { + return HasMiscellaneousExtensions2; + } + + // Return true if the target has the guarded-storage facility. + bool hasGuardedStorage() const { return HasGuardedStorage; } + + // Return true if the target has the message-security-assist + // extension facility 7. + bool hasMessageSecurityAssist7() const { return HasMessageSecurityAssist7; } + + // Return true if the target has the message-security-assist + // extension facility 8. + bool hasMessageSecurityAssist8() const { return HasMessageSecurityAssist8; } + + // Return true if the target has the vector-enhancements facility 1. + bool hasVectorEnhancements1() const { return HasVectorEnhancements1; } + + // Return true if the target has the vector-packed-decimal facility. + bool hasVectorPackedDecimal() const { return HasVectorPackedDecimal; } + + // Return true if the target has the insert-reference-bits-multiple facility. + bool hasInsertReferenceBitsMultiple() const { + return HasInsertReferenceBitsMultiple; + } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTDC.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTDC.cpp index 96a9ef82c125f..5dbd23d420a30 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTDC.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTDC.cpp @@ -47,10 +47,10 @@ #include "SystemZ.h" #include "llvm/ADT/MapVector.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetMachine.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetMachine.cpp index ede5005fa4916..025bf73d2df0d 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "SystemZTargetMachine.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZMachineScheduler.h" -#include "SystemZTargetMachine.h" #include "SystemZTargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" @@ -119,7 +119,7 @@ namespace { /// SystemZ Code Generator Pass Configuration Options. class SystemZPassConfig : public TargetPassConfig { public: - SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM) + SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} SystemZTargetMachine &getSystemZTargetMachine() const { @@ -143,8 +143,10 @@ class SystemZPassConfig : public TargetPassConfig { } // end anonymous namespace void SystemZPassConfig::addIRPasses() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { addPass(createSystemZTDCPass()); + addPass(createLoopDataPrefetchPass()); + } TargetPassConfig::addIRPasses(); } @@ -212,7 +214,7 @@ void SystemZPassConfig::addPreEmitPass() { } TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { - return new SystemZPassConfig(this, PM); + return new SystemZPassConfig(*this, PM); } TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index f56b238f91e66..506dc74279932 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -238,7 +238,7 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -void SystemZTTIImpl::getUnrollingPreferences(Loop *L, +void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { // Find out if L contains a call, what the machine instruction count // estimate is, and how many stores there are. @@ -302,7 +302,7 @@ unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { return 0; } -unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const { if (!Vector) return 64; if (ST->hasVector()) @@ -325,6 +325,30 @@ int SystemZTTIImpl::getArithmeticInstrCost( unsigned ScalarBits = Ty->getScalarSizeInBits(); + // Div with a constant which is a power of 2 will be converted by + // DAGCombiner to use shifts. With vector shift-element instructions, a + // vector sdiv costs about as much as a scalar one. + const unsigned SDivCostEstimate = 4; + bool SDivPow2 = false; + bool UDivPow2 = false; + if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) && + Args.size() == 2) { + const ConstantInt *CI = nullptr; + if (const Constant *C = dyn_cast(Args[1])) { + if (C->getType()->isVectorTy()) + CI = dyn_cast_or_null(C->getSplatValue()); + else + CI = dyn_cast(C); + } + if (CI != nullptr && + (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) { + if (Opcode == Instruction::SDiv) + SDivPow2 = true; + else + UDivPow2 = true; + } + } + if (Ty->isVectorTy()) { assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type."); unsigned VF = Ty->getVectorNumElements(); @@ -333,10 +357,13 @@ int SystemZTTIImpl::getArithmeticInstrCost( // These vector operations are custom handled, but are still supported // with one instruction per vector, regardless of element size. if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || - Opcode == Instruction::AShr) { + Opcode == Instruction::AShr || UDivPow2) { return NumVectors; } + if (SDivPow2) + return (NumVectors * SDivCostEstimate); + // These FP operations are supported with a single vector instruction for // double (base implementation assumes float generally costs 2). For // FP128, the scalar cost is 1, and there is no overhead since the values @@ -345,6 +372,9 @@ int SystemZTTIImpl::getArithmeticInstrCost( Opcode == Instruction::FMul || Opcode == Instruction::FDiv) { switch (ScalarBits) { case 32: { + // The vector enhancements facility 1 provides v4f32 instructions. + if (ST->hasVectorEnhancements1()) + return NumVectors; // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); @@ -395,6 +425,11 @@ int SystemZTTIImpl::getArithmeticInstrCost( // 2 * ipm sequences ; xor ; shift ; compare return 7; + if (UDivPow2) + return 1; + if (SDivPow2) + return SDivCostEstimate; + // An extra extension for narrow types is needed. if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem)) // sext of op(s) for narrow types @@ -747,15 +782,14 @@ int SystemZTTIImpl:: getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // vlvgp will insert two grs into a vector register, so only count half the // number of instructions. - if (Opcode == Instruction::InsertElement && - Val->getScalarType()->isIntegerTy(64)) + if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64)) return ((Index % 2 == 0) ? 1 : 0); if (Opcode == Instruction::ExtractElement) { int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1); // Give a slight penalty for moving out of vector pipeline to FXU unit. - if (Index == 0 && Val->getScalarType()->isIntegerTy()) + if (Index == 0 && Val->isIntOrIntVectorTy()) Cost += 1; return Cost; diff --git a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 3766ed45b8c4e..a0c6fa94f8c1e 100644 --- a/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -45,7 +45,8 @@ class SystemZTTIImpl : public BasicTTIImplBase { TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); /// @} @@ -53,8 +54,13 @@ class SystemZTTIImpl : public BasicTTIImplBase { /// @{ unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; + unsigned getCacheLineSize() { return 256; } + unsigned getPrefetchDistance() { return 2000; } + unsigned getMinPrefetchStride() { return 2048; } + + bool prefersVectorizedAddressing() { return false; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } diff --git a/interpreter/llvm/src/lib/Target/Target.cpp b/interpreter/llvm/src/lib/Target/Target.cpp index 5d1616d037798..42d92622d6c81 100644 --- a/interpreter/llvm/src/lib/Target/Target.cpp +++ b/interpreter/llvm/src/lib/Target/Target.cpp @@ -14,12 +14,12 @@ #include "llvm-c/Target.h" #include "llvm-c/Initialization.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/TargetLoweringObjectFile.cpp b/interpreter/llvm/src/lib/Target/TargetLoweringObjectFile.cpp index 91cc97e38b3d7..f941891f31833 100644 --- a/interpreter/llvm/src/lib/Target/TargetLoweringObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/TargetLoweringObjectFile.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -24,7 +25,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" @@ -240,6 +240,20 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal( if (GO->hasSection()) return getExplicitSectionGlobal(GO, Kind, TM); + if (auto *GVar = dyn_cast(GO)) { + auto Attrs = GVar->getAttributes(); + if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) || + (Attrs.hasAttribute("data-section") && Kind.isData()) || + (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) { + return getExplicitSectionGlobal(GO, Kind, TM); + } + } + + if (auto *F = dyn_cast(GO)) { + if (F->hasFnAttribute("implicit-section-name")) + return getExplicitSectionGlobal(GO, Kind, TM); + } + // Use default section depending on the 'type' of global return SelectSectionForGlobal(GO, Kind, TM); } diff --git a/interpreter/llvm/src/lib/Target/TargetMachineC.cpp b/interpreter/llvm/src/lib/Target/TargetMachineC.cpp index df12e0e88e3bb..01f14939864f0 100644 --- a/interpreter/llvm/src/lib/Target/TargetMachineC.cpp +++ b/interpreter/llvm/src/lib/Target/TargetMachineC.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/TargetMachine.h" #include "llvm-c/Core.h" #include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CodeGenCWrappers.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index b5f53114d3e16..9be11da9afac8 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/interpreter/llvm/src/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index c6158720d62f1..b1de84d7e8e61 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/interpreter/llvm/src/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,9 +16,9 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 7c78285fbda45..1357cb5735f8a 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" @@ -37,8 +37,9 @@ class WebAssemblyAsmBackendELF final : public MCAsmBackend { : MCAsmBackend(), Is64Bit(Is64Bit) {} ~WebAssemblyAsmBackendELF() override {} - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsPCRel) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -77,8 +78,9 @@ class WebAssemblyAsmBackend final : public MCAsmBackend { const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsPCRel) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -105,9 +107,11 @@ bool WebAssemblyAsmBackendELF::writeNopData(uint64_t Count, return true; } -void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void WebAssemblyAsmBackendELF::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, + uint64_t Value, bool IsPCRel) const { const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags"); @@ -119,7 +123,7 @@ void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. @@ -163,9 +167,11 @@ bool WebAssemblyAsmBackend::writeNopData(uint64_t Count, return true; } -void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, + uint64_t Value, bool IsPCRel) const { const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags"); @@ -177,7 +183,7 @@ void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 544cd653fd721..3e3b52fca5691 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" @@ -36,7 +36,6 @@ STATISTIC(MCNumFixups, "Number of MC fixups created."); namespace { class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCInstrInfo &MCII; - MCContext &Ctx; // Implementation generated by tablegen. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -48,14 +47,12 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCSubtargetInfo &STI) const override; public: - WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), Ctx(ctx) {} + WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {} }; } // end anonymous namespace -MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx) { - return new WebAssemblyMCCodeEmitter(MCII, Ctx); +MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) { + return new WebAssemblyMCCodeEmitter(MCII); } void WebAssemblyMCCodeEmitter::encodeInstruction( @@ -89,11 +86,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) { encodeSLEB128(int64_t(MO.getImm()), OS); } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) { - Fixups.push_back(MCFixup::create( - OS.tell() - Start, MCConstantExpr::create(MO.getImm(), Ctx), - MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc())); - ++MCNumFixups; - encodeULEB128(uint64_t(MO.getImm()), OS); + llvm_unreachable("wasm globals should only be accessed symbolicly"); } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) { encodeSLEB128(int64_t(MO.getImm()), OS); } else { @@ -135,6 +128,9 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) { FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32); PaddedSize = 5; + } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) { + FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index); + PaddedSize = 5; } else { llvm_unreachable("unexpected symbolic operand kind"); } diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 9fd3ec81c258f..9580eeaa33d73 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -74,7 +74,7 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/, static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo & /*MRI*/, MCContext &Ctx) { - return createWebAssemblyMCCodeEmitter(MCII, Ctx); + return createWebAssemblyMCCodeEmitter(MCII); } static MCAsmBackend *createAsmBackend(const Target & /*T*/, diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 795658ca96b4c..4d676c32a09c5 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -15,9 +15,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Wasm.h" namespace llvm { @@ -35,8 +35,7 @@ class raw_pwrite_stream; Target &getTheWebAssemblyTarget32(); Target &getTheWebAssemblyTarget64(); -MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx); +MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index ad59f2f405879..00bf02469bdd9 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -115,8 +115,8 @@ void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) { void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl &Params, SmallVectorImpl &Results) { - OS << "\t.functype\t" << name; + MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) { + OS << "\t.functype\t" << Symbol->getName(); if (Results.empty()) OS << ", void"; else { @@ -171,7 +171,7 @@ void WebAssemblyTargetELFStreamer::emitIndIdx(const MCExpr *Value) { } void WebAssemblyTargetELFStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl &Params, SmallVectorImpl &Results) { + MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) { // Nothing to emit here. TODO: Re-design how linking works and re-evaluate // whether it's necessary for .o files to declare indirect function types. } @@ -255,9 +255,25 @@ void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) { } void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl &Params, SmallVectorImpl &Results) { - // Nothing to emit here. TODO: Re-design how linking works and re-evaluate - // whether it's necessary for .o files to declare indirect function types. + MCSymbol *Symbol, SmallVectorImpl &Params, + SmallVectorImpl &Results) { + MCSymbolWasm *WasmSym = cast(Symbol); + if (WasmSym->isFunction()) { + // Symbol already has its arguments and result set. + return; + } + + SmallVector ValParams; + for (MVT Ty : Params) + ValParams.push_back(WebAssembly::toValType(Ty)); + + SmallVector ValResults; + for (MVT Ty : Results) + ValResults.push_back(WebAssembly::toValType(Ty)); + + WasmSym->setParams(std::move(ValParams)); + WasmSym->setReturns(std::move(ValResults)); + WasmSym->setIsFunction(true); } void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) { diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 68d6747298dfc..102d7219a1e74 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -16,9 +16,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Wasm.h" namespace llvm { @@ -44,11 +44,9 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer { /// .endfunc virtual void emitEndFunc() = 0; /// .functype - virtual void emitIndirectFunctionType(StringRef name, + virtual void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, - SmallVectorImpl &Results) { - llvm_unreachable("emitIndirectFunctionType not implemented"); - } + SmallVectorImpl &Results) = 0; /// .indidx virtual void emitIndIdx(const MCExpr *Value) = 0; /// .import_global @@ -71,7 +69,7 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { void emitGlobal(ArrayRef Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) override; void emitIndIdx(const MCExpr *Value) override; @@ -89,7 +87,7 @@ class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer { void emitGlobal(ArrayRef Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) override; void emitIndIdx(const MCExpr *Value) override; @@ -107,7 +105,7 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer { void emitGlobal(ArrayRef Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) override; void emitIndIdx(const MCExpr *Value) override; diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 2846ec5e93377..9cf77829f3bc2 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -13,14 +13,18 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCWasmObjectWriter.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Wasm.h" + using namespace llvm; namespace { @@ -29,8 +33,8 @@ class WebAssemblyWasmObjectWriter final : public MCWasmObjectTargetWriter { explicit WebAssemblyWasmObjectWriter(bool Is64Bit); private: - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const override; + unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup) const override; }; } // end anonymous namespace @@ -39,31 +43,33 @@ WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit) // Test whether the given expression computes a function address. static bool IsFunctionExpr(const MCExpr *Expr) { - if (const MCSymbolRefExpr *SyExp = - dyn_cast(Expr)) + if (auto SyExp = dyn_cast(Expr)) return cast(SyExp->getSymbol()).isFunction(); - if (const MCBinaryExpr *BinOp = - dyn_cast(Expr)) + if (auto BinOp = dyn_cast(Expr)) return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS()); - if (const MCUnaryExpr *UnOp = - dyn_cast(Expr)) + if (auto UnOp = dyn_cast(Expr)) return IsFunctionExpr(UnOp->getSubExpr()); return false; } -unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, - const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { +static bool IsFunctionType(const MCValue &Target) { + const MCSymbolRefExpr *RefA = Target.getSymA(); + return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX; +} + +unsigned +WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup) const { // WebAssembly functions are not allocated in the data address space. To // resolve a pointer to a function, we must use a special relocation type. bool IsFunction = IsFunctionExpr(Fixup.getValue()); - assert(!IsPCRel); switch (unsigned(Fixup.getKind())) { + case WebAssembly::fixup_code_global_index: + return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB; case WebAssembly::fixup_code_sleb128_i32: if (IsFunction) return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB; @@ -71,6 +77,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, case WebAssembly::fixup_code_sleb128_i64: llvm_unreachable("fixup_sleb128_i64 not implemented yet"); case WebAssembly::fixup_code_uleb128_i32: + if (IsFunctionType(Target)) + return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB; if (IsFunction) return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB; return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB; diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 4178ec0b28f05..211358ad66cd5 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -33,6 +33,8 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -82,7 +84,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { SmallVector Results; SmallVector Params; ComputeSignatureVTs(F, TM, Params, Results); - getTargetStreamer()->emitIndirectFunctionType(F.getName(), Params, + getTargetStreamer()->emitIndirectFunctionType(getSymbol(&F), Params, Results); } } @@ -94,13 +96,6 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { MCConstantExpr::create(Size, OutContext)); } } - - if (!TM.getTargetTriple().isOSBinFormatELF()) { - MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo(); - getTargetStreamer()->emitGlobal(MMIW.getGlobals()); - if (MMIW.hasStackPointerGlobal()) - getTargetStreamer()->emitStackPointer(MMIW.getStackPointerGlobal()); - } } void WebAssemblyAsmPrinter::EmitConstantPool() { @@ -218,9 +213,10 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) { if (const GlobalValue *GV = dyn_cast(CV)) - if (GV->getValueType()->isFunctionTy()) + if (GV->getValueType()->isFunctionTy()) { return MCSymbolRefExpr::create( getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + } return AsmPrinter::lowerConstant(CV); } diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 40e1928197bcf..700111743ee8e 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -17,8 +17,8 @@ /// ////===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/PriorityQueue.h" @@ -132,7 +132,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, // no blocks not dominated by the loop header. // - It's desirable to preserve the original block order when possible. // We use two ready lists; Preferred and Ready. Preferred has recently - // processed sucessors, to help preserve block sequences from the original + // processed successors, to help preserve block sequences from the original // order. Ready has the remaining ready blocks. PriorityQueue, CompareBlockNumbers> diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index bd11d1b469063..21e0f6b23777a 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -18,8 +18,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp index bc6360aafd61c..b2330a2320933 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp @@ -22,8 +22,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 53698ff09b100..c980f4b87f916 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -16,8 +16,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" @@ -63,12 +63,16 @@ class WebAssemblyFastISel final : public FastISel { public: // Innocuous defaults for our address. Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; } - void setKind(BaseKind K) { Kind = K; } + void setKind(BaseKind K) { + assert(!isSet() && "Can't change kind with non-zero base"); + Kind = K; + } BaseKind getKind() const { return Kind; } bool isRegBase() const { return Kind == RegBase; } bool isFIBase() const { return Kind == FrameIndexBase; } void setReg(unsigned Reg) { assert(isRegBase() && "Invalid base register access!"); + assert(Base.Reg == 0 && "Overwriting non-zero register"); Base.Reg = Reg; } unsigned getReg() const { @@ -77,6 +81,7 @@ class WebAssemblyFastISel final : public FastISel { } void setFI(unsigned FI) { assert(isFIBase() && "Invalid base frame index access!"); + assert(Base.FI == 0 && "Overwriting non-zero frame index"); Base.FI = FI; } unsigned getFI() const { @@ -91,6 +96,13 @@ class WebAssemblyFastISel final : public FastISel { int64_t getOffset() const { return Offset; } void setGlobalValue(const GlobalValue *G) { GV = G; } const GlobalValue *getGlobalValue() const { return GV; } + bool isSet() const { + if (isRegBase()) { + return Base.Reg != 0; + } else { + return Base.FI != 0; + } + } }; /// Keep a pointer to the WebAssemblySubtarget around so that we can make the @@ -297,6 +309,9 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { + if (Addr.isSet()) { + return false; + } Addr.setKind(Address::FrameIndexBase); Addr.setFI(SI->second); return true; @@ -341,6 +356,9 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { break; } } + if (Addr.isSet()) { + return false; + } Addr.setReg(getRegForValue(Obj)); return Addr.getReg() != 0; } diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 2bbf7a2b42f9a..41f315c2825b6 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -26,8 +26,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/PriorityQueue.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 4209bc333f230..a37d6136e44ed 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -104,10 +104,10 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, const DebugLoc &DL) { const auto *TII = MF.getSubtarget().getInstrInfo(); + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); if (MF.getSubtarget() .getTargetTriple().isOSBinFormatELF()) { - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); @@ -125,10 +125,8 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, .addReg(SrcReg) .addMemOperand(MMO); } else { - MachineModuleInfoWasm &MMIW = - MF.getMMI().getObjFileInfo(); BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32)) - .addImm(MMIW.getStackPointerGlobal()) + .addExternalSymbol(SPSymbol) .addReg(SrcReg); } } @@ -171,10 +169,11 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, unsigned SPReg = WebAssembly::SP32; if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); + + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); if (MF.getSubtarget() .getTargetTriple().isOSBinFormatELF()) { - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); unsigned Zero = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero) @@ -189,22 +188,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(Zero) // addr .addMemOperand(LoadMMO); } else { - auto &MMIW = MF.getMMI().getObjFileInfo(); - if (!MMIW.hasStackPointerGlobal()) { - MMIW.setStackPointerGlobal(MMIW.getGlobals().size()); - - // Create the stack-pointer global. For now, just use the - // Emscripten/Binaryen ABI names. - wasm::Global G; - G.Type = wasm::ValType::I32; - G.Mutable = true; - G.InitialValue = 0; - G.InitialModule = "env"; - G.InitialName = "STACKTOP"; - MMIW.addGlobal(G); - } BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg) - .addImm(MMIW.getStackPointerGlobal()); + .addExternalSymbol(SPSymbol); } bool HasBP = hasBP(MF); diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 257f1d110aa29..4f3ae57733e5b 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" // To access function attributes. diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 31a5ca1f4cc27..814377003cbcc 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -84,8 +84,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. - for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW, - ISD::FREM, ISD::FMA}) + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, + ISD::FMA}) setOperationAction(Op, T, Expand); // Note supported floating-point library function operators that otherwise // default to expand. diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 39cb1ca336f2d..1297941714644 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -57,17 +57,19 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops), } } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 -// Placemarkers to indicate the start or end of a block or loop scope. These -// use/clobber VALUE_STACK to prevent them from being moved into the middle of -// an expression tree. +// Placemarkers to indicate the start or end of a block, loop, or try scope. +// These use/clobber VALUE_STACK to prevent them from being moved into the +// middle of an expression tree. let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in { def BLOCK : I<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>; def LOOP : I<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>; +def TRY : I<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>; -// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode -// in wasm. +// END_BLOCK, END_LOOP, END_TRY, and END_FUNCTION are represented with the same +// opcode in wasm. def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>; def END_LOOP : I<(outs), (ins), [], "end_loop", 0x0b>; +def END_TRY : I<(outs), (ins), [], "end_try", 0x0b>; let isTerminator = 1, isBarrier = 1 in def END_FUNCTION : I<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] @@ -112,6 +114,20 @@ let isReturn = 1 in { def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable", 0x00>; +def THROW_I32 : I<(outs), (ins i32imm:$tag, I32:$obj), + [(int_wasm_throw imm:$tag, I32:$obj)], "throw \t$tag, $obj", + 0x08>; +def THROW_I64 : I<(outs), (ins i32imm:$tag, I64:$obj), + [(int_wasm_throw imm:$tag, I64:$obj)], "throw \t$tag, $obj", + 0x08>; +def RETHROW : I<(outs), (ins i32imm:$rel_depth), [], "rethrow \t$rel_depth", + 0x09>; + } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 } // Defs = [ARGUMENTS] + +// rethrow takes a relative depth as an argument, for which currently only 0 is +// possible for C++. Once other languages need depths other than 0, depths will +// be computed in CFGStackify. +def : Pat<(int_wasm_rethrow), (RETHROW 0)>; diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 744a3ed427af7..576b71dd79660 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 947c0329bb6e8..f0b6a3e35dbad 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -897,7 +897,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { } } - // Look for orphan landingpads, can occur in blocks with no predecesors + // Look for orphan landingpads, can occur in blocks with no predecessors for (BasicBlock &BB : F) { Instruction *I = BB.getFirstNonPHI(); if (auto *LPI = dyn_cast(I)) diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index ff186eb915039..8880539804cae 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -112,8 +112,6 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym, MCSymbolRefExpr::VariantKind VK = IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION : MCSymbolRefExpr::VK_None; - if (!isa(Sym)) - cast(Sym)->setIsFunction(IsFunc); const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx); diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 473dcb7a33fdc..1462c49aa9fd9 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 5fd4a8d1949ef..ba39b6cdb5682 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -140,8 +140,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // Check if it's possible to reuse any of the used colors. if (!MRI->isLiveIn(Old)) - for (int C(UsedColors.find_first()); C != -1; - C = UsedColors.find_next(C)) { + for (unsigned C : UsedColors.set_bits()) { if (MRI->getRegClass(SortedIntervals[C]->reg) != RC) continue; for (LiveInterval *OtherLI : Assignments[C]) diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index e3470825940c3..766ab456a8e6c 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 57d454746b068..ea9e3fa862ce2 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -20,8 +20,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" @@ -170,28 +170,16 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, if (MI.mayStore()) { Write = true; - const MachineFunction &MF = *MI.getParent()->getParent(); - if (MF.getSubtarget() - .getTargetTriple().isOSBinFormatELF()) { - // Check for stores to __stack_pointer. - for (auto MMO : MI.memoperands()) { - const MachinePointerInfo &MPI = MMO->getPointerInfo(); - if (MPI.V.is()) { - auto PSV = MPI.V.get(); - if (const ExternalSymbolPseudoSourceValue *EPSV = - dyn_cast(PSV)) - if (StringRef(EPSV->getSymbol()) == "__stack_pointer") - StackPointer = true; - } - } - } else { - // Check for sets of the stack pointer. - const MachineModuleInfoWasm &MMIW = - MF.getMMI().getObjFileInfo(); - if ((MI.getOpcode() == WebAssembly::SET_LOCAL_I32 || - MI.getOpcode() == WebAssembly::SET_LOCAL_I64) && - MI.getOperand(0).getImm() == MMIW.getStackPointerGlobal()) { - StackPointer = true; + // Check for stores to __stack_pointer. + for (auto MMO : MI.memoperands()) { + const MachinePointerInfo &MPI = MMO->getPointerInfo(); + if (MPI.V.is()) { + auto PSV = MPI.V.get(); + if (const ExternalSymbolPseudoSourceValue *EPSV = + dyn_cast(PSV)) + if (StringRef(EPSV->getSymbol()) == "__stack_pointer") { + StackPointer = true; + } } } } else if (MI.hasOrderedMemoryRef()) { diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 9e944df637d94..878ffd08d228a 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index c02ef4a1c399b..2599064334ee8 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -394,11 +394,22 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = { /* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR, // ELEMENT-WISE ATOMIC MEMORY -/* MEMCPY_ELEMENT_ATOMIC_1 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_2 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_4 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_8 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_16 */ iPTR_func_iPTR_iPTR_iPTR, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, + +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, // EXCEPTION HANDLING /* UNWIND_RESUME */ unsupported, @@ -839,11 +850,21 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = { /* MEMCPY */ "memcpy", /* MEMMOVE */ "memset", /* MEMSET */ "memmove", -/* MEMCPY_ELEMENT_ATOMIC_1 */ "MEMCPY_ELEMENT_ATOMIC_1", -/* MEMCPY_ELEMENT_ATOMIC_2 */ "MEMCPY_ELEMENT_ATOMIC_2", -/* MEMCPY_ELEMENT_ATOMIC_4 */ "MEMCPY_ELEMENT_ATOMIC_4", -/* MEMCPY_ELEMENT_ATOMIC_8 */ "MEMCPY_ELEMENT_ATOMIC_8", -/* MEMCPY_ELEMENT_ATOMIC_16 */ "MEMCPY_ELEMENT_ATOMIC_16", +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, /* UNWIND_RESUME */ "_Unwind_Resume", /* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1", /* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2", diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index 2441ead7cb27c..b1385f409fd33 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index a9aa781610ce1..8173364fa8809 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -24,8 +24,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/TargetLibraryInfo.h" diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 44c794ef5da19..7b05f671bdcbf 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -12,9 +12,9 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyTargetMachine.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetObjectFile.h" #include "WebAssemblyTargetTransformInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -129,7 +129,7 @@ namespace { /// WebAssembly Code Generator Pass Configuration Options. class WebAssemblyPassConfig final : public TargetPassConfig { public: - WebAssemblyPassConfig(WebAssemblyTargetMachine *TM, PassManagerBase &PM) + WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { @@ -154,7 +154,7 @@ TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() { TargetPassConfig * WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { - return new WebAssemblyPassConfig(this, PM); + return new WebAssemblyPassConfig(*this, PM); } FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { @@ -173,7 +173,7 @@ void WebAssemblyPassConfig::addIRPasses() { else // Expand some atomic operations. WebAssemblyTargetLowering has hooks which // control specifically what gets lowered. - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); // Fix function bitcasts, as WebAssembly requires caller and callee signatures // to match. diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 47aadf99e8601..b3ce4bd274606 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -36,7 +36,7 @@ unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) { return Result; } -unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector && getST()->hasSIMD128()) return 128; diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index f658609f89300..7b35fc9161339 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -55,7 +55,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { /// @{ unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/interpreter/llvm/src/lib/Target/WebAssembly/known_gcc_test_failures.txt b/interpreter/llvm/src/lib/Target/WebAssembly/known_gcc_test_failures.txt index 8e8e5fd1eff1e..35a67134775a6 100644 --- a/interpreter/llvm/src/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/interpreter/llvm/src/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -33,9 +33,6 @@ built-in-setjmp.c pr60003.c # Error in the program / unsupported by Clang. -scal-to-vec1.c -scal-to-vec2.c -scal-to-vec3.c 20000822-1.c 20010209-1.c 20010605-1.c @@ -91,6 +88,3 @@ pr45695.c wasm-o pr49279.c wasm-o pr49390.c wasm-o pr52286.c wasm-o - -# fatal error: error in backend: data symbols must have a size set with .size -921110-1.c wasm-o diff --git a/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 788fac62626b7..f7e31de65f6d1 100644 --- a/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86AsmInstrumentation.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86Operand.h" -#include "llvm/ADT/Twine.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" diff --git a/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmParser.cpp b/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmParser.cpp index 32ab475f11867..c1d216c8b7af8 100644 --- a/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/interpreter/llvm/src/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -49,8 +49,11 @@ static const char OpPrecedence[] = { 4, // IC_MINUS 5, // IC_MULTIPLY 5, // IC_DIVIDE - 6, // IC_RPAREN - 7, // IC_LPAREN + 5, // IC_MOD + 6, // IC_NOT + 7, // IC_NEG + 8, // IC_RPAREN + 9, // IC_LPAREN 0, // IC_IMM 0 // IC_REGISTER }; @@ -92,6 +95,9 @@ class X86AsmParser : public MCTargetAsmParser { IC_MINUS, IC_MULTIPLY, IC_DIVIDE, + IC_MOD, + IC_NOT, + IC_NEG, IC_RPAREN, IC_LPAREN, IC_IMM, @@ -111,6 +117,10 @@ class X86AsmParser : public MCTargetAsmParser { SmallVector InfixOperatorStack; SmallVector PostfixStack; + bool isUnaryOperator(const InfixCalculatorTok Op) { + return Op == IC_NEG || Op == IC_NOT; + } + public: int64_t popOperand() { assert (!PostfixStack.empty() && "Poped an empty stack!"); @@ -192,6 +202,22 @@ class X86AsmParser : public MCTargetAsmParser { ICToken Op = PostfixStack[i]; if (Op.first == IC_IMM || Op.first == IC_REGISTER) { OperandStack.push_back(Op); + } else if (isUnaryOperator(Op.first)) { + assert (OperandStack.size() > 0 && "Too few operands."); + ICToken Operand = OperandStack.pop_back_val(); + assert (Operand.first == IC_IMM && + "Unary operation with a register!"); + switch (Op.first) { + default: + report_fatal_error("Unexpected operator!"); + break; + case IC_NEG: + OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); + break; + case IC_NOT: + OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); + break; + } } else { assert (OperandStack.size() > 1 && "Too few operands."); int64_t Val; @@ -222,6 +248,12 @@ class X86AsmParser : public MCTargetAsmParser { Val = Op1.second / Op2.second; OperandStack.push_back(std::make_pair(IC_IMM, Val)); break; + case IC_MOD: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Modulo operation with an immediate and a register!"); + Val = Op1.second % Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; case IC_OR: assert (Op1.first == IC_IMM && Op2.first == IC_IMM && "Or operation with an immediate and a register!"); @@ -271,6 +303,7 @@ class X86AsmParser : public MCTargetAsmParser { IES_NOT, IES_MULTIPLY, IES_DIVIDE, + IES_MOD, IES_LBRAC, IES_RBRAC, IES_LPAREN, @@ -421,10 +454,16 @@ class X86AsmParser : public MCTargetAsmParser { default: State = IES_ERROR; break; + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: case IES_PLUS: case IES_NOT: case IES_MULTIPLY: case IES_DIVIDE: + case IES_MOD: case IES_LPAREN: case IES_RPAREN: case IES_LBRAC: @@ -432,11 +471,12 @@ class X86AsmParser : public MCTargetAsmParser { case IES_INTEGER: case IES_REGISTER: State = IES_MINUS; - // Only push the minus operator if it is not a unary operator. - if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || - CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || - CurrState == IES_LPAREN || CurrState == IES_LBRAC)) + // push minus operator if it is not a negate operator + if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || + CurrState == IES_INTEGER || CurrState == IES_RBRAC) IC.pushOperator(IC_MINUS); + else + IC.pushOperator(IC_NEG); if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { // If we already have a BaseReg, then assume this is the IndexReg with // a scale of 1. @@ -458,9 +498,21 @@ class X86AsmParser : public MCTargetAsmParser { default: State = IES_ERROR; break; + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: case IES_PLUS: + case IES_MINUS: case IES_NOT: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_MOD: + case IES_LPAREN: + case IES_LBRAC: State = IES_NOT; + IC.pushOperator(IC_NOT); break; } PrevState = CurrState; @@ -525,6 +577,7 @@ class X86AsmParser : public MCTargetAsmParser { case IES_LSHIFT: case IES_RSHIFT: case IES_DIVIDE: + case IES_MOD: case IES_MULTIPLY: case IES_LPAREN: State = IES_INTEGER; @@ -539,26 +592,6 @@ class X86AsmParser : public MCTargetAsmParser { } // Get the scale and replace the 'Register * Scale' with '0'. IC.popOperator(); - } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - CurrState == IES_MINUS) { - // Unary minus. No need to pop the minus operand because it was never - // pushed. - IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. - } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - CurrState == IES_NOT) { - // Unary not. No need to pop the not operand because it was never - // pushed. - IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. } else { IC.pushOperand(IC_IMM, TmpInt); } @@ -594,6 +627,19 @@ class X86AsmParser : public MCTargetAsmParser { break; } } + void onMod() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + State = IES_MOD; + IC.pushOperator(IC_MOD); + break; + } + } void onLBrac() { PrevState = State; switch (State) { @@ -647,18 +693,8 @@ class X86AsmParser : public MCTargetAsmParser { case IES_RSHIFT: case IES_MULTIPLY: case IES_DIVIDE: + case IES_MOD: case IES_LPAREN: - // FIXME: We don't handle this type of unary minus or not, yet. - if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - (CurrState == IES_MINUS || CurrState == IES_NOT)) { - State = IES_ERROR; - break; - } State = IES_LPAREN; IC.pushOperator(IC_LPAREN); break; @@ -1302,6 +1338,8 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine SM.onXor(); else if (Name.equals_lower("and")) SM.onAnd(); + else if (Name.equals_lower("mod")) + SM.onMod(); else return false; return true; @@ -1316,16 +1354,17 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { while (!Done) { bool UpdateLocLex = true; + AsmToken::TokenKind TK = getLexer().getKind(); // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an // identifier. Don't try an parse it as a register. - if (PrevTK != AsmToken::Error && Tok.getString().startswith(".")) + if (PrevTK != AsmToken::Error && Tok.getString().startswith(".") && + TK != AsmToken::Identifier) break; // If we're parsing an immediate expression, we don't expect a '['. if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) break; - AsmToken::TokenKind TK = getLexer().getKind(); switch (TK) { default: { if (SM.isValidEndState()) { @@ -1704,8 +1743,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); unsigned Len = DotDispStr.size(); - unsigned Val = OrigDispVal + DotDispVal; - InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val); + InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, DotDispVal); } NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); @@ -2415,8 +2453,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, break; } - // In MS inline asm curly braces mark the begining/end of a block, therefore - // they should be interepreted as end of statement + // In MS inline asm curly braces mark the beginning/end of a block, + // therefore they should be interepreted as end of statement CurlyAsEndOfStatement = isParsingIntelSyntax() && isParsingInlineAsm() && (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); diff --git a/interpreter/llvm/src/lib/Target/X86/AsmParser/X86Operand.h b/interpreter/llvm/src/lib/Target/X86/AsmParser/X86Operand.h index 33eff14b82157..0fba15cc692ca 100644 --- a/interpreter/llvm/src/lib/Target/X86/AsmParser/X86Operand.h +++ b/interpreter/llvm/src/lib/Target/X86/AsmParser/X86Operand.h @@ -15,8 +15,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" diff --git a/interpreter/llvm/src/lib/Target/X86/CMakeLists.txt b/interpreter/llvm/src/lib/Target/X86/CMakeLists.txt index fc4adddc149ba..6e08d4cff6eaf 100644 --- a/interpreter/llvm/src/lib/Target/X86/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Target/X86/CMakeLists.txt @@ -37,6 +37,7 @@ endif() set(sources X86AsmPrinter.cpp X86CallFrameOptimization.cpp + X86CmovConversion.cpp X86ExpandPseudo.cpp X86FastISel.cpp X86FixupBWInsts.cpp diff --git a/interpreter/llvm/src/lib/Target/X86/Disassembler/X86Disassembler.cpp b/interpreter/llvm/src/lib/Target/X86/Disassembler/X86Disassembler.cpp index 36ad23bb41c05..4ce908b1da64e 100644 --- a/interpreter/llvm/src/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/interpreter/llvm/src/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -74,8 +74,8 @@ // //===----------------------------------------------------------------------===// -#include "X86DisassemblerDecoder.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" diff --git a/interpreter/llvm/src/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/interpreter/llvm/src/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index b7f637e9a8cd7..577b7a776c6df 100644 --- a/interpreter/llvm/src/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/interpreter/llvm/src/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include /* for va_*() */ -#include /* for vsnprintf() */ -#include /* for exit() */ -#include /* for memset() */ +#include /* for va_*() */ +#include /* for vsnprintf() */ +#include /* for exit() */ +#include /* for memset() */ #include "X86DisassemblerDecoder.h" diff --git a/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 6aa7003067440..4d91300c7edec 100644 --- a/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86ATTInstPrinter.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86InstComments.cpp b/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86InstComments.cpp index 6e062ec59347b..f5f3a4cc83dc9 100644 --- a/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -15,8 +15,8 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "Utils/X86ShuffleDecode.h" -#include "llvm/MC/MCInst.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -587,6 +587,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPSLLDQZ256rr: case X86::VPSLLDQZ512rr: Src1Name = getRegName(MI->getOperand(1).getReg()); + LLVM_FALLTHROUGH; case X86::VPSLLDQZ128rm: case X86::VPSLLDQZ256rm: case X86::VPSLLDQZ512rm: @@ -604,6 +605,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPSRLDQZ256rr: case X86::VPSRLDQZ512rr: Src1Name = getRegName(MI->getOperand(1).getReg()); + LLVM_FALLTHROUGH; case X86::VPSRLDQZ128rm: case X86::VPSRLDQZ256rm: case X86::VPSRLDQZ512rm: @@ -1036,7 +1038,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::EXTRQI: if (MI->getOperand(2).isImm() && MI->getOperand(3).isImm()) - DecodeEXTRQIMask(MI->getOperand(2).getImm(), + DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(), MI->getOperand(3).getImm(), ShuffleMask); @@ -1047,7 +1049,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::INSERTQI: if (MI->getOperand(3).isImm() && MI->getOperand(4).isImm()) - DecodeINSERTQIMask(MI->getOperand(3).getImm(), + DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(), MI->getOperand(4).getImm(), ShuffleMask); @@ -1091,6 +1093,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m) DecodeSubVectorBroadcast(MVT::v8f32, MVT::v2f32, ShuffleMask); @@ -1099,6 +1102,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m) DecodeSubVectorBroadcast(MVT::v16f32, MVT::v2f32, ShuffleMask); diff --git a/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index a8c631ae282f9..d6af6712d5a1c 100644 --- a/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "X86IntelInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" -#include "X86IntelInstPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index a713af6aadb5a..733eac7c03212 100644 --- a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -10,6 +10,8 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -22,9 +24,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -108,12 +108,12 @@ class X86AsmBackend : public MCAsmBackend { return Infos[Kind - FirstTargetFixupKind]; } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.getOffset() + Size <= DataSize && - "Invalid fixup offset!"); + assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); // Check that uppper bits are either all zeros or all ones. // Specifically ignore overflow/underflow as long as the leakage is diff --git a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 0b73df3a2ff8c..4da4eebec0386 100644 --- a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 9c35a251e480b..1538a515f4190 100644 --- a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -13,12 +13,12 @@ #include "X86MCAsmInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" using namespace llvm; enum AsmWriterFlavorTy { diff --git a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 297926ddcfdaf..8f2017e990c51 100644 --- a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -19,7 +20,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; @@ -153,8 +153,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( const MCSymbol *B_Base = Asm.getAtom(*B); // Neither symbol can be modified. - if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) { + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None) { Asm.getContext().reportError(Fixup.getLoc(), "unsupported relocation of modified symbol"); return; @@ -397,7 +396,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, if (!SB->getFragment()) { Asm.getContext().reportError( Fixup.getLoc(), - "symbol '" + B->getSymbol().getName() + + "symbol '" + SB->getName() + "' can not be undefined in a subtraction expression"); return false; } @@ -409,7 +408,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, // pedantic compatibility with 'as'. Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; - Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); + Value2 = Writer->getSymbolAddress(*SB, Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } @@ -469,8 +468,8 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && - !is64Bit() && + const MCSymbolRefExpr *SymA = Target.getSymA(); + assert(SymA->getKind() == MCSymbolRefExpr::VK_TLVP && !is64Bit() && "Should only be called with a 32-bit TLVP relocation!"); unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); @@ -481,15 +480,14 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, // subtraction from the picbase. For 32-bit pic the addend is the difference // between the picbase and the next address. For 32-bit static the addend is // zero. - if (Target.getSymB()) { + if (auto *SymB = Target.getSymB()) { // If this is a subtraction then we're pcrel. uint32_t FixupAddress = Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); IsPCRel = 1; - FixedValue = - FixupAddress - - Writer->getSymbolAddress(Target.getSymB()->getSymbol(), Layout) + - Target.getConstant(); + FixedValue = FixupAddress - + Writer->getSymbolAddress(SymB->getSymbol(), Layout) + + Target.getConstant(); FixedValue += 1ULL << Log2Size; } else { FixedValue = 0; @@ -500,8 +498,7 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, MRE.r_word0 = Value; MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28); - Writer->addRelocation(&Target.getSymA()->getSymbol(), Fragment->getParent(), - MRE); + Writer->addRelocation(&SymA->getSymbol(), Fragment->getParent(), MRE); } void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, diff --git a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index d6777fc8aa6ae..807f7a6ddb198 100644 --- a/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/interpreter/llvm/src/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -9,11 +9,12 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -25,8 +26,8 @@ class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { X86WinCOFFObjectWriter(bool Is64Bit); ~X86WinCOFFObjectWriter() override = default; - unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection, + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; }; @@ -36,11 +37,19 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit) : MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64 : COFF::IMAGE_FILE_MACHINE_I386) {} -unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, +unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const { - unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind(); + unsigned FixupKind = Fixup.getKind(); + if (IsCrossSection) { + if (FixupKind != FK_Data_4 && FixupKind != llvm::X86::reloc_signed_4byte) { + Ctx.reportError(Fixup.getLoc(), "Cannot represent this expression"); + return COFF::IMAGE_REL_AMD64_ADDR32; + } + FixupKind = FK_PCRel_4; + } MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); diff --git a/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1be5aec849fc6..8a0fbfb45b22d 100644 --- a/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &Mask) { Mask.push_back(IsLoad ? static_cast(SM_SentinelZero) : i); } -void DecodeEXTRQIMask(int Len, int Idx, +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit extraction instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes - // of the lower 64-bits. The upper 64-bits are undefined. + // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining + // elements of the lower 64-bits. The upper 64-bits are undefined. for (int i = 0; i != Len; ++i) ShuffleMask.push_back(i + Idx); - for (int i = Len; i != 8; ++i) + for (int i = Len; i != (int)HalfElts; ++i) ShuffleMask.push_back(SM_SentinelZero); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != (int)NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } -void DecodeINSERTQIMask(int Len, int Idx, +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit insertion instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // INSERTQ: Extract lowest Len bytes from lower half of second source and - // insert over first source starting at Idx byte. The upper 64-bits are + // INSERTQ: Extract lowest Len elements from lower half of second source and + // insert over first source starting at Idx element. The upper 64-bits are // undefined. for (int i = 0; i != Idx; ++i) ShuffleMask.push_back(i); for (int i = 0; i != Len; ++i) - ShuffleMask.push_back(i + 16); - for (int i = Idx + Len; i != 8; ++i) + ShuffleMask.push_back(i + NumElts); + for (int i = Idx + Len; i != (int)HalfElts; ++i) ShuffleMask.push_back(i); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != (int)NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } diff --git a/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.h b/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.h index 17619d09d0594..251c9f7558ec7 100644 --- a/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/interpreter/llvm/src/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask); void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &ShuffleMask); -/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. -void DecodeEXTRQIMask(int Len, int Idx, +/// Decode a SSE4A EXTRQ instruction as a shuffle mask. +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask); -/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. -void DecodeINSERTQIMask(int Len, int Idx, +/// Decode a SSE4A INSERTQ instruction as a shuffle mask. +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask); /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. diff --git a/interpreter/llvm/src/lib/Target/X86/X86.h b/interpreter/llvm/src/lib/Target/X86/X86.h index 19c93cfff0fe9..91201d1fec85a 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86.h +++ b/interpreter/llvm/src/lib/Target/X86/X86.h @@ -83,6 +83,9 @@ FunctionPass *createX86WinEHStatePass(); /// the MachineInstr to MC. FunctionPass *createX86ExpandPseudoPass(); +/// This pass converts X86 cmov instructions into branch when profitable. +FunctionPass *createX86CmovConverterPass(); + /// Return a Machine IR pass that selectively replaces /// certain byte and word instructions by equivalent 32 bit instructions, /// in order to eliminate partial register usage, false dependences on diff --git a/interpreter/llvm/src/lib/Target/X86/X86.td b/interpreter/llvm/src/lib/Target/X86/X86.td index 784c3a6557ff0..54eabeac51264 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86.td +++ b/interpreter/llvm/src/lib/Target/X86/X86.td @@ -127,6 +127,9 @@ def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; +def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", + "true", "Enable AVX-512 Population Count Instructions", + [FeatureAVX512]>; def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; @@ -235,6 +238,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; +def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", + "LEA instruction with 3 ops or certain registers is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; def FeatureSoftFloat @@ -295,6 +300,8 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", "Intel Silvermont processors">; +def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM", + "Intel Goldmont processors">; class Proc Features> : ProcessorModel; @@ -425,6 +432,35 @@ class SilvermontProc : ProcessorModel; def : SilvermontProc<"slm">; // Legacy alias. +class GoldmontProc : ProcessorModel; +def : GoldmontProc<"goldmont">; + // "Arrandale" along with corei3 and corei5 class NehalemProc : ProcessorModel; @@ -777,10 +814,8 @@ def : Proc<"bdver4", [ FeatureMWAITX ]>; -// TODO: The scheduler model falls to BTVER2 model. -// The znver1 model has to be put in place. -// Zen -def: ProcessorModel<"znver1", BtVer2Model, [ +// Znver1 +def: ProcessorModel<"znver1", Znver1Model, [ FeatureADX, FeatureAES, FeatureAVX2, diff --git a/interpreter/llvm/src/lib/Target/X86/X86AsmPrinter.cpp b/interpreter/llvm/src/lib/Target/X86/X86AsmPrinter.cpp index e1825ca1eda13..dc15aeadaa619 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86AsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86AsmPrinter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineValueType.h" @@ -34,7 +35,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/interpreter/llvm/src/lib/Target/X86/X86CallLowering.cpp b/interpreter/llvm/src/lib/Target/X86/X86CallLowering.cpp index 161bfa7b54748..99aeec67c3266 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86CallLowering.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86CallLowering.cpp @@ -19,6 +19,7 @@ #include "X86InstrInfo.h" #include "X86TargetMachine.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" @@ -35,7 +36,7 @@ using namespace llvm; X86CallLowering::X86CallLowering(const X86TargetLowering &TLI) : CallLowering(&TLI) {} -void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, +bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, @@ -43,14 +44,24 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, const X86TargetLowering &TLI = *getTLI(); LLVMContext &Context = OrigArg.Ty->getContext(); - EVT VT = TLI.getValueType(DL, OrigArg.Ty); + + SmallVector SplitVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + + if (SplitVTs.size() != 1) { + // TODO: support struct/array split + return false; + } + + EVT VT = SplitVTs[0]; unsigned NumParts = TLI.getNumRegisters(Context, VT); if (NumParts == 1) { // replace the original type ( pointer -> GPR ). SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context), OrigArg.Flags, OrigArg.IsFixed); - return; + return true; } SmallVector SplitRegs; @@ -67,6 +78,7 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, } PerformArgSplit(SplitRegs); + return true; } namespace { @@ -113,9 +125,11 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); SmallVector SplitArgs; - splitToValueTypes( - OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { MIRBuilder.buildUnmerge(Regs, VReg); }); + if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs) { + MIRBuilder.buildUnmerge(Regs, VReg); + })) + return false; FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) @@ -181,12 +195,23 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SmallVector SplitArgs; unsigned Idx = 0; for (auto &Arg : F.args()) { + + // TODO: handle not simple cases. + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) + return false; + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); - setArgFlags(OrigArg, Idx + 1, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { - MIRBuilder.buildMerge(VRegs[Idx], Regs); - }); + setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); + if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs) { + MIRBuilder.buildMerge(VRegs[Idx], Regs); + })) + return false; Idx++; } diff --git a/interpreter/llvm/src/lib/Target/X86/X86CallLowering.h b/interpreter/llvm/src/lib/Target/X86/X86CallLowering.h index 8a8afb5682982..6a5dabf33a0a0 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86CallLowering.h +++ b/interpreter/llvm/src/lib/Target/X86/X86CallLowering.h @@ -39,7 +39,7 @@ class X86CallLowering : public CallLowering { /// A function of this type is used to perform value split action. typedef std::function)> SplitArgTy; - void splitToValueTypes(const ArgInfo &OrigArgInfo, + bool splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy SplitArg) const; diff --git a/interpreter/llvm/src/lib/Target/X86/X86CallingConv.td b/interpreter/llvm/src/lib/Target/X86/X86CallingConv.td index 6781d761a1c4f..26461986427d2 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86CallingConv.td +++ b/interpreter/llvm/src/lib/Target/X86/X86CallingConv.td @@ -73,8 +73,8 @@ def CC_#NAME : CallingConv<[ CCIfSubtarget<"is64Bit()", CCIfByVal>>, CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // Promote v8i1/v16i1/v32i1 arguments to i32. CCIfType<[v8i1, v16i1, v32i1], CCPromoteToType>, @@ -146,8 +146,8 @@ def CC_#NAME : CallingConv<[ ]>; def RetCC_#NAME : CallingConv<[ - // Promote i1, v8i1 arguments to i8. - CCIfType<[i1, v8i1], CCPromoteToType>, + // Promote i1, v1i1, v8i1 arguments to i8. + CCIfType<[i1, v1i1, v8i1], CCPromoteToType>, // Promote v16i1 arguments to i16. CCIfType<[v16i1], CCPromoteToType>, @@ -207,6 +207,7 @@ def RetCC_X86Common : CallingConv<[ // // For code that doesn't care about the ABI, we allow returning more than two // integer values in registers. + CCIfType<[v1i1], CCPromoteToType>, CCIfType<[i1], CCPromoteToType>, CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>, CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, @@ -375,6 +376,7 @@ def RetCC_X86_64_Swift : CallingConv<[ CCIfSwiftError>>, // For integers, ECX, R8D can be used as extra return registers. + CCIfType<[v1i1], CCPromoteToType>, CCIfType<[i1], CCPromoteToType>, CCIfType<[i8] , CCAssignToReg<[AL, DL, CL, R8B]>>, CCIfType<[i16], CCAssignToReg<[AX, DX, CX, R8W]>>, @@ -446,7 +448,7 @@ def RetCC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::Swift", CCDelegateTo>, // Handle explicit CC selection - CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, + CCIfCC<"CallingConv::Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, // Handle Vectorcall CC @@ -485,8 +487,8 @@ def CC_X86_64_C : CallingConv<[ // Handles byval parameters. CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in R10. CCIfNest>>, @@ -584,8 +586,8 @@ def CC_X86_Win64_C : CallingConv<[ // FIXME: Handle byval stuff. // FIXME: Handle varargs. - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in R10. CCIfNest>, @@ -649,7 +651,15 @@ def CC_X86_64_GHC : CallingConv<[ // Pass in STG registers: F1, F2, F3, F4, D1, D2 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> + CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>, + // AVX + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM1, YMM2, YMM3, YMM4, YMM5, YMM6]>>>, + // AVX-512 + CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], + CCIfSubtarget<"hasAVX512()", + CCAssignToReg<[ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6]>>> ]>; def CC_X86_64_HiPE : CallingConv<[ @@ -796,8 +806,8 @@ def CC_X86_32_Common : CallingConv<[ ]>; def CC_X86_32_C : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in ECX. CCIfNest>, @@ -816,8 +826,8 @@ def CC_X86_32_MCU : CallingConv<[ // puts arguments in registers. CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // If the call is not a vararg call, some arguments may be passed // in integer registers. @@ -828,8 +838,8 @@ def CC_X86_32_MCU : CallingConv<[ ]>; def CC_X86_32_FastCall : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in EAX. CCIfNest>, @@ -858,15 +868,15 @@ def CC_X86_32_ThisCall_Common : CallingConv<[ ]>; def CC_X86_32_ThisCall_Mingw : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, CCDelegateTo ]>; def CC_X86_32_ThisCall_Win : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // Pass sret arguments indirectly through stack. CCIfSRet>, @@ -885,8 +895,8 @@ def CC_X86_32_FastCC : CallingConv<[ // puts arguments in registers. CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in EAX. CCIfNest>, @@ -994,7 +1004,7 @@ def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo>, CCIfCC<"CallingConv::AnyReg", CCDelegateTo>, - CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, + CCIfCC<"CallingConv::Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::HHVM", CCDelegateTo>, diff --git a/interpreter/llvm/src/lib/Target/X86/X86CmovConversion.cpp b/interpreter/llvm/src/lib/Target/X86/X86CmovConversion.cpp new file mode 100644 index 0000000000000..bfc834435de55 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/X86/X86CmovConversion.cpp @@ -0,0 +1,611 @@ +//====-- X86CmovConversion.cpp - Convert Cmov to Branch -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a pass that converts X86 cmov instructions into branch +/// when profitable. This pass is conservative, i.e., it applies transformation +/// if and only if it can gaurantee a gain with high confidence. +/// +/// Thus, the optimization applies under the following conditions: +/// 1. Consider as a candidate only CMOV in most inner loop, assuming that +/// most hotspots are represented by these loops. +/// 2. Given a group of CMOV instructions, that are using same EFLAGS def +/// instruction: +/// a. Consider them as candidates only if all have same code condition or +/// opposite one, to prevent generating more than one conditional jump +/// per EFLAGS def instruction. +/// b. Consider them as candidates only if all are profitable to be +/// converted, assuming that one bad conversion may casue a degradation. +/// 3. Apply conversion only for loop that are found profitable and only for +/// CMOV candidates that were found profitable. +/// a. Loop is considered profitable only if conversion will reduce its +/// depth cost by some thrishold. +/// b. CMOV is considered profitable if the cost of its condition is higher +/// than the average cost of its true-value and false-value by 25% of +/// branch-misprediction-penalty, this to assure no degredassion even +/// with 25% branch misprediction. +/// +/// Note: This pass is assumed to run on SSA machine code. +//===----------------------------------------------------------------------===// +// +// External interfaces: +// FunctionPass *llvm::createX86CmovConverterPass(); +// bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF); +// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-cmov-converter" + +STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups"); +STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates"); +STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops"); +STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups"); + +namespace { +// This internal switch can be used to turn off the cmov/branch optimization. +static cl::opt + EnableCmovConverter("x86-cmov-converter", + cl::desc("Enable the X86 cmov-to-branch optimization."), + cl::init(true), cl::Hidden); + +/// Converts X86 cmov instructions into branches when profitable. +class X86CmovConverterPass : public MachineFunctionPass { +public: + X86CmovConverterPass() : MachineFunctionPass(ID) {} + ~X86CmovConverterPass() {} + + StringRef getPassName() const override { return "X86 cmov Conversion"; } + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + /// Pass identification, replacement for typeid. + static char ID; + + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + TargetSchedModel TSchedModel; + + /// List of consecutive CMOV instructions. + typedef SmallVector CmovGroup; + typedef SmallVector CmovGroups; + + /// Collect all CMOV-group-candidates in \p CurrLoop and update \p + /// CmovInstGroups accordingly. + /// + /// \param CurrLoop Loop being processed. + /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. + /// \returns true iff it found any CMOV-group-candidate. + bool collectCmovCandidates(MachineLoop *CurrLoop, CmovGroups &CmovInstGroups); + + /// Check if it is profitable to transform each CMOV-group-candidates into + /// branch. Remove all groups that are not profitable from \p CmovInstGroups. + /// + /// \param CurrLoop Loop being processed. + /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. + /// \returns true iff any CMOV-group-candidate remain. + bool checkForProfitableCmovCandidates(MachineLoop *CurrLoop, + CmovGroups &CmovInstGroups); + + /// Convert the given list of consecutive CMOV instructions into a branch. + /// + /// \param Group Consecutive CMOV instructions to be converted into branch. + void convertCmovInstsToBranches(SmallVectorImpl &Group) const; +}; + +char X86CmovConverterPass::ID = 0; + +void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); +} + +bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + if (!EnableCmovConverter) + return false; + + DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() + << "**********\n"); + + bool Changed = false; + MachineLoopInfo &MLI = getAnalysis(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + MRI = &MF.getRegInfo(); + TII = STI.getInstrInfo(); + TSchedModel.init(STI.getSchedModel(), &STI, TII); + + //===--------------------------------------------------------------------===// + // Algorithm + // --------- + // For each inner most loop + // collectCmovCandidates() { + // Find all CMOV-group-candidates. + // } + // + // checkForProfitableCmovCandidates() { + // * Calculate both loop-depth and optimized-loop-depth. + // * Use these depth to check for loop transformation profitability. + // * Check for CMOV-group-candidate transformation profitability. + // } + // + // For each profitable CMOV-group-candidate + // convertCmovInstsToBranches() { + // * Create FalseBB, SinkBB, Conditional branch to SinkBB. + // * Replace each CMOV instruction with a PHI instruction in SinkBB. + // } + // + // Note: For more details, see each function description. + //===--------------------------------------------------------------------===// + for (MachineBasicBlock &MBB : MF) { + MachineLoop *CurrLoop = MLI.getLoopFor(&MBB); + + // Optimize only inner most loops. + if (!CurrLoop || CurrLoop->getHeader() != &MBB || + !CurrLoop->getSubLoops().empty()) + continue; + + // List of consecutive CMOV instructions to be processed. + CmovGroups CmovInstGroups; + + if (!collectCmovCandidates(CurrLoop, CmovInstGroups)) + continue; + + if (!checkForProfitableCmovCandidates(CurrLoop, CmovInstGroups)) + continue; + + Changed = true; + for (auto &Group : CmovInstGroups) + convertCmovInstsToBranches(Group); + } + return Changed; +} + +bool X86CmovConverterPass::collectCmovCandidates(MachineLoop *CurrLoop, + CmovGroups &CmovInstGroups) { + //===--------------------------------------------------------------------===// + // Collect all CMOV-group-candidates and add them into CmovInstGroups. + // + // CMOV-group: + // CMOV instructions, in same MBB, that uses same EFLAGS def instruction. + // + // CMOV-group-candidate: + // CMOV-group where all the CMOV instructions are + // 1. consecutive. + // 2. have same condition code or opposite one. + // 3. have only operand registers (X86::CMOVrr). + //===--------------------------------------------------------------------===// + // List of possible improvement (TODO's): + // -------------------------------------- + // TODO: Add support for X86::CMOVrm instructions. + // TODO: Add support for X86::SETcc instructions. + // TODO: Add support for CMOV-groups with non consecutive CMOV instructions. + //===--------------------------------------------------------------------===// + + // Current processed CMOV-Group. + CmovGroup Group; + for (auto *MBB : CurrLoop->getBlocks()) { + Group.clear(); + // Condition code of first CMOV instruction current processed range and its + // opposite condition code. + X86::CondCode FirstCC, FirstOppCC; + // Indicator of a non CMOVrr instruction in the current processed range. + bool FoundNonCMOVInst = false; + // Indicator for current processed CMOV-group if it should be skipped. + bool SkipGroup = false; + + for (auto &I : *MBB) { + X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode()); + // Check if we found a X86::CMOVrr instruction. + if (CC != X86::COND_INVALID && !I.mayLoad()) { + if (Group.empty()) { + // We found first CMOV in the range, reset flags. + FirstCC = CC; + FirstOppCC = X86::GetOppositeBranchCondition(CC); + FoundNonCMOVInst = false; + SkipGroup = false; + } + Group.push_back(&I); + // Check if it is a non-consecutive CMOV instruction or it has different + // condition code than FirstCC or FirstOppCC. + if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC)) + // Mark the SKipGroup indicator to skip current processed CMOV-Group. + SkipGroup = true; + continue; + } + // If Group is empty, keep looking for first CMOV in the range. + if (Group.empty()) + continue; + + // We found a non X86::CMOVrr instruction. + FoundNonCMOVInst = true; + // Check if this instruction define EFLAGS, to determine end of processed + // range, as there would be no more instructions using current EFLAGS def. + if (I.definesRegister(X86::EFLAGS)) { + // Check if current processed CMOV-group should not be skipped and add + // it as a CMOV-group-candidate. + if (!SkipGroup) + CmovInstGroups.push_back(Group); + else + ++NumOfSkippedCmovGroups; + Group.clear(); + } + } + // End of basic block is considered end of range, check if current processed + // CMOV-group should not be skipped and add it as a CMOV-group-candidate. + if (Group.empty()) + continue; + if (!SkipGroup) + CmovInstGroups.push_back(Group); + else + ++NumOfSkippedCmovGroups; + } + + NumOfCmovGroupCandidate += CmovInstGroups.size(); + return !CmovInstGroups.empty(); +} + +/// \returns Depth of CMOV instruction as if it was converted into branch. +/// \param TrueOpDepth depth cost of CMOV true value operand. +/// \param FalseOpDepth depth cost of CMOV false value operand. +static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) { + //===--------------------------------------------------------------------===// + // With no info about branch weight, we assume 50% for each value operand. + // Thus, depth of optimized CMOV instruction is the rounded up average of + // its True-Operand-Value-Depth and False-Operand-Value-Depth. + //===--------------------------------------------------------------------===// + return (TrueOpDepth + FalseOpDepth + 1) / 2; +} + +bool X86CmovConverterPass::checkForProfitableCmovCandidates( + MachineLoop *CurrLoop, CmovGroups &CmovInstGroups) { + struct DepthInfo { + /// Depth of original loop. + unsigned Depth; + /// Depth of optimized loop. + unsigned OptDepth; + }; + /// Number of loop iterations to calculate depth for ?! + static const unsigned LoopIterations = 2; + DenseMap DepthMap; + DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}}; + enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 }; + /// For each register type maps the register to its last def instruction. + DenseMap RegDefMaps[RegTypeNum]; + /// Maps register operand to its def instruction, which can be nullptr if it + /// is unknown (e.g., operand is defined outside the loop). + DenseMap OperandToDefMap; + + // Set depth of unknown instruction (i.e., nullptr) to zero. + DepthMap[nullptr] = {0, 0}; + + SmallPtrSet CmovInstructions; + for (auto &Group : CmovInstGroups) + CmovInstructions.insert(Group.begin(), Group.end()); + + //===--------------------------------------------------------------------===// + // Step 1: Calculate instruction depth and loop depth. + // Optimized-Loop: + // loop with CMOV-group-candidates converted into branches. + // + // Instruction-Depth: + // instruction latency + max operand depth. + // * For CMOV instruction in optimized loop the depth is calculated as: + // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth) + // TODO: Find a better way to estimate the latency of the branch instruction + // rather than using the CMOV latency. + // + // Loop-Depth: + // max instruction depth of all instructions in the loop. + // Note: instruction with max depth represents the critical-path in the loop. + // + // Loop-Depth[i]: + // Loop-Depth calculated for first `i` iterations. + // Note: it is enough to calculate depth for up to two iterations. + // + // Depth-Diff[i]: + // Number of cycles saved in first 'i` iterations by optimizing the loop. + //===--------------------------------------------------------------------===// + for (unsigned I = 0; I < LoopIterations; ++I) { + DepthInfo &MaxDepth = LoopDepth[I]; + for (auto *MBB : CurrLoop->getBlocks()) { + // Clear physical registers Def map. + RegDefMaps[PhyRegType].clear(); + for (MachineInstr &MI : *MBB) { + unsigned MIDepth = 0; + unsigned MIDepthOpt = 0; + bool IsCMOV = CmovInstructions.count(&MI); + for (auto &MO : MI.uses()) { + // Checks for "isUse()" as "uses()" returns also implicit definitions. + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)]; + if (MachineInstr *DefMI = RDM.lookup(Reg)) { + OperandToDefMap[&MO] = DefMI; + DepthInfo Info = DepthMap.lookup(DefMI); + MIDepth = std::max(MIDepth, Info.Depth); + if (!IsCMOV) + MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth); + } + } + + if (IsCMOV) + MIDepthOpt = getDepthOfOptCmov( + DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth, + DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth); + + // Iterates over all operands to handle implicit definitions as well. + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI; + } + + unsigned Latency = TSchedModel.computeInstrLatency(&MI); + DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency}; + MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth); + MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt); + } + } + } + + unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth, + LoopDepth[1].Depth - LoopDepth[1].OptDepth}; + + //===--------------------------------------------------------------------===// + // Step 2: Check if Loop worth to be optimized. + // Worth-Optimize-Loop: + // case 1: Diff[1] == Diff[0] + // Critical-path is iteration independent - there is no dependency + // of critical-path instructions on critical-path instructions of + // previous iteration. + // Thus, it is enough to check gain percent of 1st iteration - + // To be conservative, the optimized loop need to have a depth of + // 12.5% cycles less than original loop, per iteration. + // + // case 2: Diff[1] > Diff[0] + // Critical-path is iteration dependent - there is dependency of + // critical-path instructions on critical-path instructions of + // previous iteration. + // Thus, it is required to check the gradient of the gain - the + // change in Depth-Diff compared to the change in Loop-Depth between + // 1st and 2nd iterations. + // To be conservative, the gradient need to be at least 50%. + // + // If loop is not worth optimizing, remove all CMOV-group-candidates. + //===--------------------------------------------------------------------===// + bool WorthOptLoop = false; + if (Diff[1] == Diff[0]) + WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth; + else if (Diff[1] > Diff[0]) + WorthOptLoop = + (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth); + + if (!WorthOptLoop) + return false; + + ++NumOfLoopCandidate; + + //===--------------------------------------------------------------------===// + // Step 3: Check for each CMOV-group-candidate if it worth to be optimized. + // Worth-Optimize-Group: + // Iff it worths to optimize all CMOV instructions in the group. + // + // Worth-Optimize-CMOV: + // Predicted branch is faster than CMOV by the difference between depth of + // condition operand and depth of taken (predicted) value operand. + // To be conservative, the gain of such CMOV transformation should cover at + // at least 25% of branch-misprediction-penalty. + //===--------------------------------------------------------------------===// + unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; + CmovGroups TempGroups; + std::swap(TempGroups, CmovInstGroups); + for (auto &Group : TempGroups) { + bool WorthOpGroup = true; + for (auto *MI : Group) { + // Avoid CMOV instruction which value is used as a pointer to load from. + // This is another conservative check to avoid converting CMOV instruction + // used with tree-search like algorithm, where the branch is unpredicted. + auto UIs = MRI->use_instructions(MI->defs().begin()->getReg()); + if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) { + unsigned Op = UIs.begin()->getOpcode(); + if (Op == X86::MOV64rm || Op == X86::MOV32rm) { + WorthOpGroup = false; + break; + } + } + + unsigned CondCost = + DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth; + unsigned ValCost = getDepthOfOptCmov( + DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, + DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); + if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) { + WorthOpGroup = false; + break; + } + } + + if (WorthOpGroup) + CmovInstGroups.push_back(Group); + } + + return !CmovInstGroups.empty(); +} + +static bool checkEFLAGSLive(MachineInstr *MI) { + if (MI->killsRegister(X86::EFLAGS)) + return false; + + // The EFLAGS operand of MI might be missing a kill marker. + // Figure out whether EFLAGS operand should LIVE after MI instruction. + MachineBasicBlock *BB = MI->getParent(); + MachineBasicBlock::iterator ItrMI = MI; + + // Scan forward through BB for a use/def of EFLAGS. + for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) { + if (I->readsRegister(X86::EFLAGS)) + return true; + if (I->definesRegister(X86::EFLAGS)) + return false; + } + + // We hit the end of the block, check whether EFLAGS is live into a successor. + for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) { + if ((*I)->isLiveIn(X86::EFLAGS)) + return true; + } + + return false; +} + +void X86CmovConverterPass::convertCmovInstsToBranches( + SmallVectorImpl &Group) const { + assert(!Group.empty() && "No CMOV instructions to convert"); + ++NumOfOptimizedCmovGroups; + + // To convert a CMOVcc instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + + // Before + // ----- + // MBB: + // cond = cmp ... + // v1 = CMOVge t1, f1, cond + // v2 = CMOVlt t2, f2, cond + // v3 = CMOVge v1, f3, cond + // + // After + // ----- + // MBB: + // cond = cmp ... + // jge %SinkMBB + // + // FalseMBB: + // jmp %SinkMBB + // + // SinkMBB: + // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] + // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch + // ; true-value with false-value + // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use + // ; previous Phi instruction result + + MachineInstr &MI = *Group.front(); + MachineInstr *LastCMOV = Group.back(); + DebugLoc DL = MI.getDebugLoc(); + X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode())); + X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction::iterator It = ++MBB->getIterator(); + MachineFunction *F = MBB->getParent(); + const BasicBlock *BB = MBB->getBasicBlock(); + + MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB); + MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB); + F->insert(It, FalseMBB); + F->insert(It, SinkMBB); + + // If the EFLAGS register isn't dead in the terminator, then claim that it's + // live into the sink and copy blocks. + if (checkEFLAGSLive(LastCMOV)) { + FalseMBB->addLiveIn(X86::EFLAGS); + SinkMBB->addLiveIn(X86::EFLAGS); + } + + // Transfer the remainder of BB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Add the false and sink blocks as its successors. + MBB->addSuccessor(FalseMBB); + MBB->addSuccessor(SinkMBB); + + // Create the conditional branch instruction. + BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB); + + // Add the sink block to the false block successors. + FalseMBB->addSuccessor(SinkMBB); + + MachineInstrBuilder MIB; + MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); + MachineBasicBlock::iterator MIItEnd = + std::next(MachineBasicBlock::iterator(LastCMOV)); + MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); + // As we are creating the PHIs, we have to be careful if there is more than + // one. Later CMOVs may reference the results of earlier CMOVs, but later + // PHIs have to reference the individual true/false inputs from earlier PHIs. + // That also means that PHI construction must work forward from earlier to + // later, and that the code must maintain a mapping from earlier PHI's + // destination registers, and the registers that went into the PHI. + DenseMap> RegRewriteTable; + + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { + unsigned DestReg = MIIt->getOperand(0).getReg(); + unsigned Op1Reg = MIIt->getOperand(1).getReg(); + unsigned Op2Reg = MIIt->getOperand(2).getReg(); + + // If this CMOV we are processing is the opposite condition from the jump we + // generated, then we have to swap the operands for the PHI that is going to + // be generated. + if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC) + std::swap(Op1Reg, Op2Reg); + + auto Op1Itr = RegRewriteTable.find(Op1Reg); + if (Op1Itr != RegRewriteTable.end()) + Op1Reg = Op1Itr->second.first; + + auto Op2Itr = RegRewriteTable.find(Op2Reg); + if (Op2Itr != RegRewriteTable.end()) + Op2Reg = Op2Itr->second.second; + + // SinkMBB: + // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ] + // ... + MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) + .addReg(Op1Reg) + .addMBB(FalseMBB) + .addReg(Op2Reg) + .addMBB(MBB); + (void)MIB; + DEBUG(dbgs() << "\tFrom: "; MIIt->dump()); + DEBUG(dbgs() << "\tTo: "; MIB->dump()); + + // Add this PHI to the rewrite table. + RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); + } + + // Now remove the CMOV(s). + MBB->erase(MIItBegin, MIItEnd); +} + +} // End anonymous namespace. + +FunctionPass *llvm::createX86CmovConverterPass() { + return new X86CmovConverterPass(); +} diff --git a/interpreter/llvm/src/lib/Target/X86/X86FastISel.cpp b/interpreter/llvm/src/lib/Target/X86/X86FastISel.cpp index fc3b4836c1789..527e5d568ac6f 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86FastISel.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86FastISel.cpp @@ -414,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; else @@ -424,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; else @@ -437,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; else @@ -1181,7 +1187,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && CC != CallingConv::X86_64_SysV && - CC != CallingConv::X86_64_Win64) + CC != CallingConv::Win64) return false; // Don't handle popping bytes if they don't fit the ret's immediate. @@ -3033,6 +3039,9 @@ bool X86FastISel::fastLowerArguments() { if (!Subtarget->is64Bit()) return false; + if (Subtarget->useSoftFloat()) + return false; + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; @@ -3162,7 +3171,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: - case CallingConv::X86_64_Win64: + case CallingConv::Win64: case CallingConv::X86_64_SysV: break; } @@ -3647,13 +3656,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); case MVT::i1: - if (Subtarget->hasAVX512()) { - // Need to copy to a VK1 register. - unsigned ResultReg = createResultReg(&X86::VK1RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg); - return ResultReg; - } case MVT::i8: return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, X86::sub_8bit); diff --git a/interpreter/llvm/src/lib/Target/X86/X86FixupBWInsts.cpp b/interpreter/llvm/src/lib/Target/X86/X86FixupBWInsts.cpp index c28746f96439b..95c6f2a3fa342 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86FixupBWInsts.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86FixupBWInsts.cpp @@ -22,7 +22,7 @@ /// instructions and register-to-register moves. It would /// seem like cmov(s) would also be affected, but because of the way cmov is /// really implemented by most machines as reading both the destination and -/// and source regsters, and then "merging" the two based on a condition, +/// and source registers, and then "merging" the two based on a condition, /// it really already should be considered as having a true dependence on the /// destination register as well. /// diff --git a/interpreter/llvm/src/lib/Target/X86/X86FixupLEAs.cpp b/interpreter/llvm/src/lib/Target/X86/X86FixupLEAs.cpp index 2cd4c1a3e7b36..9f649dad8bc07 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86FixupLEAs.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86FixupLEAs.cpp @@ -27,20 +27,26 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; -#define DEBUG_TYPE "x86-fixup-LEAs" +namespace llvm { +void initializeFixupLEAPassPass(PassRegistry &); +} + +#define FIXUPLEA_DESC "X86 LEA Fixup" +#define FIXUPLEA_NAME "x86-fixup-LEAs" + +#define DEBUG_TYPE FIXUPLEA_NAME STATISTIC(NumLEAs, "Number of LEA instructions created"); namespace { class FixupLEAPass : public MachineFunctionPass { enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; - static char ID; + /// \brief Loop over all of the instructions in the basic block /// replacing applicable instructions with LEA instructions, /// where appropriate. bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI); - StringRef getPassName() const override { return "X86 LEA Fixup"; } /// \brief Given a machine register, look for the instruction /// which writes it in the current basic block. If found, @@ -62,6 +68,22 @@ class FixupLEAPass : public MachineFunctionPass { void processInstructionForSLM(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI); + + /// \brief Given a LEA instruction which is unprofitable + /// on SNB+ try to replace it with other instructions. + /// According to Intel's Optimization Reference Manual: + /// " For LEA instructions with three source operands and some specific + /// situations, instruction latency has increased to 3 cycles, and must + /// dispatch via port 1: + /// - LEA that has all three source operands: base, index, and offset + /// - LEA that uses base and index registers where the base is EBP, RBP, + /// or R13 + /// - LEA that uses RIP relative addressing mode + /// - LEA that uses 16-bit addressing mode " + /// This function currently handles the first 2 cases only. + MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI, + MachineFunction::iterator MFI); + /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg /// and convert them to INC or DEC respectively. bool fixupIncDec(MachineBasicBlock::iterator &I, @@ -85,7 +107,13 @@ class FixupLEAPass : public MachineFunctionPass { MachineBasicBlock::iterator &MBBI) const; public: - FixupLEAPass() : MachineFunctionPass(ID) {} + static char ID; + + StringRef getPassName() const override { return FIXUPLEA_DESC; } + + FixupLEAPass() : MachineFunctionPass(ID) { + initializeFixupLEAPassPass(*PassRegistry::getPassRegistry()); + } /// \brief Loop over all of the basic blocks, /// replacing instructions by equivalent LEA instructions @@ -104,9 +132,12 @@ class FixupLEAPass : public MachineFunctionPass { bool OptIncDec; bool OptLEA; }; -char FixupLEAPass::ID = 0; } +char FixupLEAPass::ID = 0; + +INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) + MachineInstr * FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI) const { @@ -168,7 +199,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const X86Subtarget &ST = Func.getSubtarget(); OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize(); - OptLEA = ST.LEAusesAG() || ST.slowLEA(); + OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA(); if (!OptLEA && !OptIncDec) return false; @@ -242,9 +273,64 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, return MachineBasicBlock::iterator(); } -static inline bool isLEA(const int opcode) { - return opcode == X86::LEA16r || opcode == X86::LEA32r || - opcode == X86::LEA64r || opcode == X86::LEA64_32r; +static inline bool isLEA(const int Opcode) { + return Opcode == X86::LEA16r || Opcode == X86::LEA32r || + Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; +} + +static inline bool isInefficientLEAReg(unsigned int Reg) { + return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13; +} + +static inline bool isRegOperand(const MachineOperand &Op) { + return Op.isReg() && Op.getReg() != X86::NoRegister; +} +/// hasIneffecientLEARegs - LEA that uses base and index registers +/// where the base is EBP, RBP, or R13 +static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, + const MachineOperand &Index) { + return Base.isReg() && isInefficientLEAReg(Base.getReg()) && + isRegOperand(Index); +} + +static inline bool hasLEAOffset(const MachineOperand &Offset) { + return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); +} + +// LEA instruction that has all three operands: offset, base and index +static inline bool isThreeOperandsLEA(const MachineOperand &Base, + const MachineOperand &Index, + const MachineOperand &Offset) { + return isRegOperand(Base) && isRegOperand(Index) && hasLEAOffset(Offset); +} + +static inline int getADDrrFromLEA(int LEAOpcode) { + switch (LEAOpcode) { + default: + llvm_unreachable("Unexpected LEA instruction"); + case X86::LEA16r: + return X86::ADD16rr; + case X86::LEA32r: + return X86::ADD32rr; + case X86::LEA64_32r: + case X86::LEA64r: + return X86::ADD64rr; + } +} + +static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) { + bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); + switch (LEAOpcode) { + default: + llvm_unreachable("Unexpected LEA instruction"); + case X86::LEA16r: + return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri; + case X86::LEA32r: + case X86::LEA64_32r: + return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; + case X86::LEA64r: + return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; + } } /// isLEASimpleIncOrDec - Does this LEA have one these forms: @@ -337,8 +423,8 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p, void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { MachineInstr &MI = *I; - const int opcode = MI.getOpcode(); - if (!isLEA(opcode)) + const int Opcode = MI.getOpcode(); + if (!isLEA(Opcode)) return; if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() || !TII->isSafeToClobberEFLAGS(*MFI, I)) @@ -350,53 +436,142 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, return; if (MI.getOperand(2).getImm() > 1) return; - int addrr_opcode, addri_opcode; - switch (opcode) { - default: - llvm_unreachable("Unexpected LEA instruction"); - case X86::LEA16r: - addrr_opcode = X86::ADD16rr; - addri_opcode = X86::ADD16ri; - break; - case X86::LEA32r: - addrr_opcode = X86::ADD32rr; - addri_opcode = X86::ADD32ri; - break; - case X86::LEA64_32r: - case X86::LEA64r: - addrr_opcode = X86::ADD64rr; - addri_opcode = X86::ADD64ri32; - break; - } DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); DEBUG(dbgs() << "FixLEA: Replaced by: ";); MachineInstr *NewMI = nullptr; - const MachineOperand &Dst = MI.getOperand(0); // Make ADD instruction for two registers writing to LEA's destination if (SrcR1 != 0 && SrcR2 != 0) { - const MachineOperand &Src1 = MI.getOperand(SrcR1 == DstR ? 1 : 3); - const MachineOperand &Src2 = MI.getOperand(SrcR1 == DstR ? 3 : 1); - NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addrr_opcode)) - .add(Dst) - .add(Src1) - .add(Src2); - MFI->insert(I, NewMI); + const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); + const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1); + NewMI = + BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); DEBUG(NewMI->dump();); } // Make ADD instruction for immediate if (MI.getOperand(4).getImm() != 0) { + const MCInstrDesc &ADDri = + TII->get(getADDriFromLEA(Opcode, MI.getOperand(4))); const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3); - NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addri_opcode)) - .add(Dst) + NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR) .add(SrcR) .addImm(MI.getOperand(4).getImm()); - MFI->insert(I, NewMI); DEBUG(NewMI->dump();); } if (NewMI) { MFI->erase(I); - I = static_cast(NewMI); + I = NewMI; + } +} + +MachineInstr * +FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI, + MachineFunction::iterator MFI) { + + const int LEAOpcode = MI.getOpcode(); + if (!isLEA(LEAOpcode)) + return nullptr; + + const MachineOperand &Dst = MI.getOperand(0); + const MachineOperand &Base = MI.getOperand(1); + const MachineOperand &Scale = MI.getOperand(2); + const MachineOperand &Index = MI.getOperand(3); + const MachineOperand &Offset = MI.getOperand(4); + const MachineOperand &Segment = MI.getOperand(5); + + if (!(isThreeOperandsLEA(Base, Index, Offset) || + hasInefficientLEABaseReg(Base, Index)) || + !TII->isSafeToClobberEFLAGS(*MFI, MI) || + Segment.getReg() != X86::NoRegister) + return nullptr; + + unsigned int DstR = Dst.getReg(); + unsigned int BaseR = Base.getReg(); + unsigned int IndexR = Index.getReg(); + unsigned SSDstR = + (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR; + bool IsScale1 = Scale.getImm() == 1; + bool IsInefficientBase = isInefficientLEAReg(BaseR); + bool IsInefficientIndex = isInefficientLEAReg(IndexR); + + // Skip these cases since it takes more than 2 instructions + // to replace the LEA instruction. + if (IsInefficientBase && SSDstR == BaseR && !IsScale1) + return nullptr; + if (LEAOpcode == X86::LEA64_32r && IsInefficientBase && + (IsInefficientIndex || !IsScale1)) + return nullptr; + + const DebugLoc DL = MI.getDebugLoc(); + const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode)); + const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset)); + + DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); + DEBUG(dbgs() << "FixLEA: Replaced by: ";); + + // First try to replace LEA with one or two (for the 3-op LEA case) + // add instructions: + // 1.lea (%base,%index,1), %base => add %index,%base + // 2.lea (%base,%index,1), %index => add %base,%index + if (IsScale1 && (DstR == BaseR || DstR == IndexR)) { + const MachineOperand &Src = DstR == BaseR ? Index : Base; + MachineInstr *NewMI = + BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src); + DEBUG(NewMI->dump();); + // Create ADD instruction for the Offset in case of 3-Ops LEA. + if (hasLEAOffset(Offset)) { + NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); + DEBUG(NewMI->dump();); + } + return NewMI; + } + // If the base is inefficient try switching the index and base operands, + // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: + // lea offset(%base,%index,scale),%dst => + // lea (%base,%index,scale); add offset,%dst + if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { + MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode)) + .add(Dst) + .add(IsInefficientBase ? Index : Base) + .add(Scale) + .add(IsInefficientBase ? Base : Index) + .addImm(0) + .add(Segment); + DEBUG(NewMI->dump();); + // Create ADD instruction for the Offset in case of 3-Ops LEA. + if (hasLEAOffset(Offset)) { + NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); + DEBUG(NewMI->dump();); + } + return NewMI; + } + // Handle the rest of the cases with inefficient base register: + assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!"); + assert(IsInefficientBase && "efficient base should be handled already!"); + + // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst + if (IsScale1 && !hasLEAOffset(Offset)) { + TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, Base.isKill()); + DEBUG(MI.getPrevNode()->dump();); + + MachineInstr *NewMI = + BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index); + DEBUG(NewMI->dump();); + return NewMI; } + // lea offset(%base,%index,scale), %dst => + // lea offset( ,%index,scale), %dst; add %base,%dst + MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode)) + .add(Dst) + .addReg(0) + .add(Scale) + .add(Index) + .add(Offset) + .add(Segment); + DEBUG(NewMI->dump();); + + NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base); + DEBUG(NewMI->dump();); + return NewMI; } bool FixupLEAPass::processBasicBlock(MachineFunction &MF, @@ -410,8 +585,16 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF, if (OptLEA) { if (MF.getSubtarget().isSLM()) processInstructionForSLM(I, MFI); - else - processInstruction(I, MFI); + + else { + if (MF.getSubtarget().slow3OpsLEA()) { + if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) { + MFI->erase(I); + I = NewMI; + } + } else + processInstruction(I, MFI); + } } } return false; diff --git a/interpreter/llvm/src/lib/Target/X86/X86FloatingPoint.cpp b/interpreter/llvm/src/lib/Target/X86/X86FloatingPoint.cpp index a5489b9aa8b7e..5582526541bae 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86FloatingPoint.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86FloatingPoint.cpp @@ -123,18 +123,26 @@ namespace { EdgeBundles *Bundles; // Return a bitmask of FP registers in block's live-in list. - static unsigned calcLiveInMask(MachineBasicBlock *MBB) { + static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) { unsigned Mask = 0; - for (const auto &LI : MBB->liveins()) { - if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6) - continue; - Mask |= 1 << (LI.PhysReg - X86::FP0); + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(); + I != MBB->livein_end(); ) { + MCPhysReg Reg = I->PhysReg; + static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums"); + if (Reg >= X86::FP0 && Reg <= X86::FP6) { + Mask |= 1 << (Reg - X86::FP0); + if (RemoveFPs) { + I = MBB->removeLiveIn(I); + continue; + } + } + ++I; } return Mask; } // Partition all the CFG edges into LiveBundles. - void bundleCFG(MachineFunction &MF); + void bundleCFGRecomputeKillFlags(MachineFunction &MF); MachineBasicBlock *MBB; // Current basic block @@ -327,7 +335,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); // Prepare cross-MBB liveness. - bundleCFG(MF); + bundleCFGRecomputeKillFlags(MF); StackTop = 0; @@ -375,13 +383,15 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { /// registers live-out from a block is identical to the live-in set of all /// successors. This is not enforced by the normal live-in lists since /// registers may be implicitly defined, or not used by all successors. -void FPS::bundleCFG(MachineFunction &MF) { +void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) { assert(LiveBundles.empty() && "Stale data in LiveBundles"); LiveBundles.resize(Bundles->getNumBundles()); // Gather the actual live-in masks for all MBBs. for (MachineBasicBlock &MBB : MF) { - const unsigned Mask = calcLiveInMask(&MBB); + setKillFlags(MBB); + + const unsigned Mask = calcLiveInMask(&MBB, false); if (!Mask) continue; // Update MBB ingoing bundle mask. @@ -396,7 +406,6 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; MBB = &BB; - setKillFlags(BB); setupBlockStack(); for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { @@ -453,6 +462,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { unsigned Reg = DeadRegs[i]; // Check if Reg is live on the stack. An inline-asm register operand that // is in the clobber list and marked dead might not be live on the stack. + static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers"); if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) { DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); freeStackSlotAfter(I, Reg-X86::FP0); @@ -506,7 +516,6 @@ void FPS::setupBlockStack() { // Push the fixed live-in registers. for (unsigned i = Bundle.FixCount; i > 0; --i) { - MBB->addLiveIn(X86::ST0+i-1); DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" << unsigned(Bundle.FixStack[i-1]) << '\n'); pushReg(Bundle.FixStack[i-1]); @@ -515,7 +524,8 @@ void FPS::setupBlockStack() { // Kill off unwanted live-ins. This can happen with a critical edge. // FIXME: We could keep these live registers around as zombies. They may need // to be revived at the end of a short block. It might save a few instrs. - adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); + unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true); + adjustLiveRegs(Mask, MBB->begin()); DEBUG(MBB->dump()); } @@ -1655,8 +1665,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { } void FPS::setKillFlags(MachineBasicBlock &MBB) const { - const TargetRegisterInfo *TRI = - MBB.getParent()->getSubtarget().getRegisterInfo(); + const TargetRegisterInfo &TRI = + *MBB.getParent()->getSubtarget().getRegisterInfo(); LivePhysRegs LPR(TRI); LPR.addLiveOuts(MBB); diff --git a/interpreter/llvm/src/lib/Target/X86/X86FrameLowering.cpp b/interpreter/llvm/src/lib/Target/X86/X86FrameLowering.cpp index 331e56976db7c..f294e819090bc 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86FrameLowering.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86FrameLowering.cpp @@ -29,8 +29,8 @@ #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" #include using namespace llvm; @@ -748,17 +748,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, else CallOp = X86::CALLpcrel32; - const char *Symbol; - if (Is64Bit) { - if (STI.isTargetCygMing()) { - Symbol = "___chkstk_ms"; - } else { - Symbol = "__chkstk"; - } - } else if (STI.isTargetCygMing()) - Symbol = "_alloca"; - else - Symbol = "_chkstk"; + StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); MachineInstrBuilder CI; MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI); @@ -769,10 +759,11 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, // For the large code model, we have to call through a register. Use R11, // as it is scratch in all supported calling conventions. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) - .addExternalSymbol(Symbol); + .addExternalSymbol(MF.createExternalSymbolName(Symbol)); CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); } else { - CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)) + .addExternalSymbol(MF.createExternalSymbolName(Symbol)); } unsigned AX = Is64Bit ? X86::RAX : X86::EAX; @@ -783,13 +774,16 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, .addReg(SP, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - if (Is64Bit) { + if (STI.isTargetWin64() || !STI.isOSWindows()) { + // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp - // themselves. It also does not clobber %rax so we can reuse it when + // themselves. They also does not clobber %rax so we can reuse it when // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) - .addReg(X86::RSP) - .addReg(X86::RAX); + // All other platforms do not specify a particular ABI for the stack probe + // function, so we arbitrarily define it to not adjust %esp/%rsp itself. + BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Is64Bit)), SP) + .addReg(SP) + .addReg(AX); } if (InProlog) { @@ -978,7 +972,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); + bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); // The default stack probe size is 4096 if the function has no stackprobesize // attribute. @@ -1007,6 +1001,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, !TRI->needsStackRealignment(MF) && !MFI.hasVarSizedObjects() && // No dynamic alloca. !MFI.adjustsStack() && // No calls. + !UseStackProbe && // No stack probes. !IsWin64CC && // Win64 has no Red Zone !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. !MF.shouldSplitStack()) { // Regular stack @@ -1062,6 +1057,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, } if (HasFP) { + assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved"); + // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; // If required, include space for extra hidden slot for stashing base pointer. @@ -1124,13 +1121,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, nullptr, DwarfFramePtr)); } } - - // Mark the FramePtr as live-in in every block. Don't do this again for - // funclet prologues. - if (!IsFunclet) { - for (MachineBasicBlock &EveryMBB : MF) - EveryMBB.addLiveIn(MachineFramePtr); - } } else { assert(!IsFunclet && "funclets without FPs not yet implemented"); NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); @@ -1197,6 +1187,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { + assert(!X86FI->getUsesRedZone() && + "The Red Zone is not accounted for in stack probes"); + // Check whether EAX is livein for this block. bool isEAXAlive = isEAXLiveIn(MBB); diff --git a/interpreter/llvm/src/lib/Target/X86/X86ISelDAGToDAG.cpp b/interpreter/llvm/src/lib/Target/X86/X86ISelDAGToDAG.cpp index c899f0fd5100e..8f24f98be681f 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -204,6 +204,11 @@ namespace { bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + template + bool selectAddrOfGatherScatterNode(GatherScatterSDNode *Parent, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment); bool selectMOV64Imm32(SDValue N, SDValue &Imm); bool selectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -418,8 +423,6 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { case X86ISD::XOR: case X86ISD::OR: case ISD::ADD: - case ISD::ADDC: - case ISD::ADDE: case ISD::ADDCARRY: case ISD::AND: case ISD::OR: @@ -1052,7 +1055,10 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // Scale the leading zero count down based on the actual size of the value. // Also scale it down based on the size of the shift. - MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; + unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; + if (MaskLZ < ScaleDown) + return true; + MaskLZ -= ScaleDown; // The final check is to ensure that any masked out high bits of X are // already known to be zero. Otherwise, the mask has a semantic impact @@ -1417,13 +1423,10 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { return false; } -bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, - SDValue &Disp, SDValue &Segment) { - - MaskedGatherScatterSDNode *Mgs = dyn_cast(Parent); - if (!Mgs) - return false; +template +bool X86DAGToDAGISel::selectAddrOfGatherScatterNode( + GatherScatterSDNode *Mgs, SDValue N, SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS. @@ -1455,6 +1458,18 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, return true; } +bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + if (auto Mgs = dyn_cast(Parent)) + return selectAddrOfGatherScatterNode( + Mgs, N, Base, Scale, Index, Disp, Segment); + if (auto X86Gather = dyn_cast(Parent)) + return selectAddrOfGatherScatterNode( + X86Gather, N, Base, Scale, Index, Disp, Segment); + return false; +} + /// Returns true if it is able to pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. diff --git a/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.cpp b/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.cpp index 1f5b94ff8fa9d..957b46c40a6e8 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.cpp @@ -80,6 +80,12 @@ static cl::opt ExperimentalPrefLoopAlignment( " of the loop header PC will be 0)."), cl::Hidden); +static cl::opt MulConstantOptimization( + "mul-constant-optimization", cl::init(true), + cl::desc("Replace 'mul x, Const' with more effective instructions like " + "SHIFT, LEA, etc."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -311,16 +317,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UREM, VT, Expand); } - for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { - if (VT == MVT::i64 && !Subtarget.is64Bit()) - continue; - // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences. - setOperationAction(ISD::ADDC, VT, Custom); - setOperationAction(ISD::ADDE, VT, Custom); - setOperationAction(ISD::SUBC, VT, Custom); - setOperationAction(ISD::SUBE, VT, Custom); - } - setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, @@ -422,8 +418,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, continue; setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::SETCCE, VT, Custom); } + + // Custom action for SELECT MMX and expand action for SELECT_CC MMX + setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); + setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); + setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to @@ -670,7 +670,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); @@ -1140,7 +1139,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v8i64, &X86::VR512RegClass); addRegisterClass(MVT::v8f64, &X86::VR512RegClass); - addRegisterClass(MVT::i1, &X86::VK1RegClass); + addRegisterClass(MVT::v1i1, &X86::VK1RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); @@ -1155,16 +1154,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); } - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::SETCC, MVT::i1, Custom); - setOperationAction(ISD::SETCCE, MVT::i1, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); - setOperationAction(ISD::XOR, MVT::i1, Legal); - setOperationAction(ISD::OR, MVT::i1, Legal); - setOperationAction(ISD::AND, MVT::i1, Legal); - setOperationAction(ISD::SUB, MVT::i1, Custom); - setOperationAction(ISD::ADD, MVT::i1, Custom); - setOperationAction(ISD::MUL, MVT::i1, Custom); for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16, MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32, @@ -1233,7 +1222,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSTORE, VT, Custom); } } - setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); @@ -1311,7 +1299,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v8i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); setOperationAction(ISD::SELECT, MVT::v8f64, Custom); setOperationAction(ISD::SELECT, MVT::v8i64, Custom); setOperationAction(ISD::SELECT, MVT::v16f32, Custom); @@ -1350,6 +1340,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ, VT, Custom); } + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64, + MVT::v8i64}) { + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); + } + // Need to promote to 64-bit even though we have 32-bit masked instructions // because the IR optimizers rearrange bitcasts around logic ops leaving // too many variations to handle if we don't promote them. @@ -1373,6 +1370,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v8i64, Legal); } + if (Subtarget.hasVPOPCNTDQ()) { + // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512 + // version of popcntd/q. + for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64, + MVT::v4i32, MVT::v2i64}) + setOperationAction(ISD::CTPOP, VT, Legal); + } + // Custom lower several nodes. for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { @@ -1383,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Custom under AVX1. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) + MVT::v8f32, MVT::v4f64, MVT::v1i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1, MVT::v32i1, MVT::v64i1 }) @@ -1579,6 +1584,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Support carry in as value rather than glue. setOperationAction(ISD::ADDCARRY, VT, Custom); setOperationAction(ISD::SUBCARRY, VT, Custom); + setOperationAction(ISD::SETCCCARRY, VT, Custom); } if (!Subtarget.is64Bit()) { @@ -1668,6 +1674,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 4; + + // TODO: These control memcmp expansion in CGP and could be raised higher, but + // that needs to benchmarked and balanced with the potential use of vector + // load/store types (PR33329, PR33914). + MaxLoadsPerMemcmp = 2; + MaxLoadsPerMemcmpOptSize = 2; + // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). setPrefLoopAlignment(ExperimentalPrefLoopAlignment); @@ -1699,7 +1712,7 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext& Context, EVT VT) const { if (!VT.isVector()) - return Subtarget.hasAVX512() ? MVT::i1: MVT::i8; + return MVT::i8; if (VT.isSimple()) { MVT VVT = VT.getSimpleVT(); @@ -2480,6 +2493,9 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, SelectionDAG &DAG) { SDValue ValReturned = ValArg; + if (ValVT == MVT::v1i1) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned); + if (ValVT == MVT::v64i1) { // In 32 bit machine, this case is handled by getv64i1Argument assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); @@ -2502,7 +2518,6 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned); } - return DAG.getBitcast(ValVT, ValReturned); } @@ -2659,7 +2674,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { // C calling conventions: case CallingConv::C: - case CallingConv::X86_64_Win64: + case CallingConv::Win64: case CallingConv::X86_64_SysV: // Callee pop conventions: case CallingConv::X86_ThisCall: @@ -2809,8 +2824,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, SDValue Val = DAG.getLoad( ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); - return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) - : Val; + return ExtendedInMem + ? (VA.getValVT().isVector() + ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) + : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) + : Val; } // FIXME: Get this from tablegen. @@ -2960,7 +2978,7 @@ SDValue X86TargetLowering::LowerFormalArguments( RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) RC = &X86::VR64RegClass; - else if (RegVT == MVT::i1) + else if (RegVT == MVT::v1i1) RC = &X86::VK1RegClass; else if (RegVT == MVT::v8i1) RC = &X86::VK8RegClass; @@ -3971,6 +3989,13 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if (Offset != MFI.getObjectOffset(FI)) return false; + // If this is not byval, check that the argument stack object is immutable. + // inalloca and argument copy elision can create mutable argument stack + // objects. Byval objects can be mutated, but a byval call intends to pass the + // mutated memory. + if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) + return false; + if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) { // If the argument location is wider than the argument type, check that any // extension flags match. @@ -4212,6 +4237,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFLW: case X86ISD::SHUFP: case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: @@ -4759,7 +4786,7 @@ static void scaleShuffleMask(int Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(0 < Scale && "Unexpected scaling factor"); int NumElts = Mask.size(); - ScaledMask.assign(NumElts * Scale, -1); + ScaledMask.assign(static_cast(NumElts * Scale), -1); for (int i = 0; i != NumElts; ++i) { int M = Mask[i]; @@ -5060,6 +5087,20 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } +// Return true if the instruction zeroes the unused upper part of the +// destination and accepts mask. +static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { + switch (Opcode) { + default: + return false; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return true; + } +} + /// Insert i1-subvector to i1-vector. static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -5092,6 +5133,22 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // 3. Subvector should be inserted in the middle (for example v2i1 // to v16i1, index 2) + // If this node widens - by concatenating zeroes - the type of the result + // of a node with instruction that zeroes all upper (irrelevant) bits of the + // output register, mark this node as legal to enable replacing them with + // the v8i1 version of the previous instruction during instruction selection. + // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg, + // while zeroing all the upper remaining 60 bits of the register. if the + // result of such instruction is inserted into an allZeroVector, then we can + // safely remove insert_vector (in instruction selection) as the cmp instr + // already zeroed the rest of the register. + if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 && + (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) || + (SubVec.getOpcode() == ISD::AND && + (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) || + isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode()))))) + return Op; + // extend to natively supported kshift MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; MVT WideOpVT = OpVT; @@ -5314,20 +5371,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); unsigned NumElts = SizeInBits / EltSizeInBits; - unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); - unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; - - // Extract all the undef/constant element data and pack into single bitsets. - APInt UndefBits(SizeInBits, 0); - APInt MaskBits(SizeInBits, 0); + // Bitcast a source array of element bits to the target size. + auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef SrcEltBits) { + unsigned NumSrcElts = UndefSrcElts.getBitWidth(); + unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth(); + assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits && + "Constant bit sizes don't match"); - // Split the undef/constant single bitset data into the target elements. - auto SplitBitData = [&]() { // Don't split if we don't allow undef bits. bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; - if (UndefBits.getBoolValue() && !AllowUndefs) + if (UndefSrcElts.getBoolValue() && !AllowUndefs) return false; + // If we're already the right size, don't bother bitcasting. + if (NumSrcElts == NumElts) { + UndefElts = UndefSrcElts; + EltBits.assign(SrcEltBits.begin(), SrcEltBits.end()); + return true; + } + + // Extract all the undef/constant element data and pack into single bitsets. + APInt UndefBits(SizeInBits, 0); + APInt MaskBits(SizeInBits, 0); + + for (unsigned i = 0; i != NumSrcElts; ++i) { + unsigned BitOffset = i * SrcEltSizeInBits; + if (UndefSrcElts[i]) + UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); + MaskBits.insertBits(SrcEltBits[i], BitOffset); + } + + // Split the undef/constant single bitset data into the target elements. UndefElts = APInt(NumElts, 0); EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); @@ -5356,20 +5430,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, // Collect constant bits and insert into mask/undef bit masks. auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, - unsigned BitOffset) { + unsigned UndefBitIndex) { if (!Cst) return false; if (isa(Cst)) { - unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); - Undefs.setBits(BitOffset, BitOffset + CstSizeInBits); + Undefs.setBit(UndefBitIndex); return true; } if (auto *CInt = dyn_cast(Cst)) { - Mask.insertBits(CInt->getValue(), BitOffset); + Mask = CInt->getValue(); return true; } if (auto *CFP = dyn_cast(Cst)) { - Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset); + Mask = CFP->getValueAPF().bitcastToAPInt(); return true; } return false; @@ -5377,18 +5450,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, // Extract constant bits from build vector. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { const SDValue &Src = Op.getOperand(i); - unsigned BitOffset = i * SrcEltSizeInBits; if (Src.isUndef()) { - UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); + UndefSrcElts.setBit(i); continue; } auto *Cst = cast(Src); - APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits.insertBits(Bits, BitOffset); + SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); } - return SplitBitData(); + return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from constant pool vector. @@ -5397,27 +5473,33 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits())) return false; - unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); - for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) - if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits, - i * CstEltSizeInBits)) + unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); + unsigned NumSrcElts = CstTy->getVectorNumElements(); + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); + for (unsigned i = 0; i != NumSrcElts; ++i) + if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], + UndefSrcElts, i)) return false; - return SplitBitData(); + return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from a broadcasted constant pool scalar. if (Op.getOpcode() == X86ISD::VBROADCAST && - EltSizeInBits <= SrcEltSizeInBits) { + EltSizeInBits <= VT.getScalarSizeInBits()) { if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) { - APInt Bits(SizeInBits, 0); - APInt Undefs(SizeInBits, 0); - if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) { - for (unsigned i = 0; i != NumSrcElts; ++i) { - MaskBits |= Bits.shl(i * SrcEltSizeInBits); - UndefBits |= Undefs.shl(i * SrcEltSizeInBits); - } - return SplitBitData(); + unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); + if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) { + if (UndefSrcElts[0]) + UndefSrcElts.setBits(0, NumSrcElts); + SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); + return CastBitData(UndefSrcElts, SrcEltBits); } } } @@ -5426,10 +5508,15 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (Op.getOpcode() == X86ISD::VZEXT_MOVL && Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && isa(Op.getOperand(0).getOperand(0))) { + unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits; auto *CN = cast(Op.getOperand(0).getOperand(0)); - MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits = MaskBits.zext(SizeInBits); - return SplitBitData(); + SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); + SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); + return CastBitData(UndefSrcElts, SrcEltBits); } return false; @@ -5489,6 +5576,24 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, DecodeINSERTPSMask(cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; + case X86ISD::EXTRQI: + if (isa(N->getOperand(1)) && + isa(N->getOperand(2))) { + int BitLen = N->getConstantOperandVal(1); + int BitIdx = N->getConstantOperandVal(2); + DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask); + IsUnary = true; + } + break; + case X86ISD::INSERTQI: + if (isa(N->getOperand(2)) && + isa(N->getOperand(3))) { + int BitLen = N->getConstantOperandVal(2); + int BitIdx = N->getConstantOperandVal(3); + DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask); + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); + } + break; case X86ISD::UNPCKH: DecodeUNPCKHMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); @@ -5816,7 +5921,8 @@ static bool setTargetShuffleZeroElements(SDValue N, // The decoded shuffle mask may contain a different number of elements to the // destination value type. static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + SelectionDAG &DAG) { Mask.clear(); Ops.clear(); @@ -5854,17 +5960,42 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, return true; } case ISD::SCALAR_TO_VECTOR: { - // Match against a scalar_to_vector of an extract from a similar vector. + // Match against a scalar_to_vector of an extract from a vector, + // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar. SDValue N0 = N.getOperand(0); - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - N0.getOperand(0).getValueType() != VT || - !isa(N0.getOperand(1)) || - NumElts <= N0.getConstantOperandVal(1) || - !N->isOnlyUserOf(N0.getNode())) + SDValue SrcExtract; + + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getValueType() == VT) { + SrcExtract = N0; + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRW && + cast(N0.getOperand(1))->getVT() == MVT::i16) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16); + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRB && + cast(N0.getOperand(1))->getVT() == MVT::i8) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); + } + + if (!SrcExtract || !isa(SrcExtract.getOperand(1))) + return false; + + SDValue SrcVec = SrcExtract.getOperand(0); + EVT SrcVT = SrcVec.getValueType(); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; + + unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); + if (NumSrcElts <= SrcIdx) return false; - Ops.push_back(N0.getOperand(0)); - Mask.push_back(N0.getConstantOperandVal(1)); - Mask.append(NumElts - 1, SM_SentinelUndef); + + Ops.push_back(SrcVec); + Mask.push_back(SrcIdx); + Mask.append(NumZeros, SM_SentinelZero); + Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; } case X86ISD::PINSRB: @@ -5899,6 +6030,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } + case X86ISD::PACKSS: { + // If we know input saturation won't happen we can treat this + // as a truncation shuffle. + if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt || + DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt) + return false; + + Ops.push_back(N.getOperand(0)); + Ops.push_back(N.getOperand(1)); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(i * 2); + return true; + } case X86ISD::VSHLI: case X86ISD::VSRLI: { uint64_t ShiftVal = N.getConstantOperandVal(1); @@ -5973,9 +6117,10 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, /// Returns true if the target shuffle mask was decoded. static bool resolveTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, - SmallVectorImpl &Mask) { + SmallVectorImpl &Mask, + SelectionDAG &DAG) { if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) - if (!getFauxShuffleMask(Op, Mask, Inputs)) + if (!getFauxShuffleMask(Op, Mask, Inputs, DAG)) return false; resolveTargetShuffleInputsAndMask(Inputs, Mask); @@ -6375,6 +6520,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, /// Example: -> zextload a static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, const SDLoc &DL, SelectionDAG &DAG, + const X86Subtarget &Subtarget, bool isAfterLegalize) { unsigned NumElems = Elts.size(); @@ -6450,16 +6596,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); - - if (LDBase->hasAnyUseOfValue(1)) { - SDValue NewChain = - DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - } - + DAG.makeEquivalentMemoryOrdering(LDBase, NewLd); return NewLd; }; @@ -6479,6 +6616,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); + // Don't create 256-bit non-temporal aligned loads without AVX2 as these + // will lower to regular temporal loads and use the cache. + if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && + VT.is256BitVector() && !Subtarget.hasInt256()) + return SDValue(); + if (IsConsecutiveLoad) return CreateLoad(VT, LDBase); @@ -6518,19 +6661,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); - - // Make sure the newly-created LOAD is in the same position as LDBase in - // terms of dependency. We create a TokenFactor for LDBase and ResNode, - // and update uses of LDBase's output chain to use the TokenFactor. - if (LDBase->hasAnyUseOfValue(1)) { - SDValue NewChain = - DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), - SDValue(ResNode.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), - SDValue(ResNode.getNode(), 1)); - } - + DAG.makeEquivalentMemoryOrdering(LDBase, ResNode); return DAG.getBitcast(VT, ResNode); } } @@ -6548,12 +6679,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue, APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { - assert((ScalarSize == 32 || ScalarSize == 64) && - "Unsupported floating point scalar size"); - if (ScalarSize == 32) - Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat()); - else - Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble()); + if (ScalarSize == 32) { + Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); + } else { + assert(ScalarSize == 64 && "Unsupported floating point scalar size"); + Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); + } } else Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); ConstantVec.push_back(Const); @@ -6639,11 +6770,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // AVX have support for 32 and 64 bit broadcast for floats only. // No 64bit integer in 32bit subtarget. MVT CVT = MVT::getFloatingPointVT(SplatBitSize); - Constant *C = SplatBitSize == 32 - ? ConstantFP::get(Type::getFloatTy(*Ctx), - SplatValue.bitsToFloat()) - : ConstantFP::get(Type::getDoubleTy(*Ctx), - SplatValue.bitsToDouble()); + // Lower the splat via APFloat directly, to avoid any conversion. + Constant *C = + SplatBitSize == 32 + ? ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEsingle(), SplatValue)) + : ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEdouble(), SplatValue)); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; @@ -6871,7 +7004,7 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (!In.isUndef()) - Immediate |= cast(In)->getZExtValue() << idx; + Immediate |= (cast(In)->getZExtValue() & 0x1) << idx; } SDLoc dl(Op); MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8)); @@ -6914,7 +7047,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { if (!isa(In)) NonConstIdx.push_back(idx); else { - Immediate |= cast(In)->getZExtValue() << idx; + Immediate |= (cast(In)->getZExtValue() & 0x1) << idx; HasConstElts = true; } if (SplatIdx < 0) @@ -6925,9 +7058,9 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" if (IsSplat) - return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx), - DAG.getConstant(1, dl, VT), - DAG.getConstant(0, dl, VT)); + return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx), + DAG.getConstant(1, dl, VT), + DAG.getConstant(0, dl, VT)); // insert elements one by one SDValue DstVec; @@ -7683,7 +7816,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // See if we can use a vector load to get all of the elements. if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) { SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElems); - if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) return LD; } @@ -7807,24 +7941,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } // Next, we iteratively mix elements, e.g. for v4f32: - // Step 1: unpcklps 0, 2 ==> X: - // : unpcklps 1, 3 ==> Y: - // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - unsigned EltStride = NumElems >> 1; - while (EltStride != 0) { - for (unsigned i = 0; i < EltStride; ++i) { - // If Ops[i+EltStride] is undef and this is the first round of mixing, - // then it is safe to just drop this shuffle: V[i] is already in the - // right place, the one element (since it's the first round) being - // inserted as undef can be dropped. This isn't safe for successive - // rounds because they will permute elements within both vectors. - if (Ops[i+EltStride].isUndef() && - EltStride == NumElems/2) - continue; - - Ops[i] = getUnpackl(DAG, dl, VT, Ops[i], Ops[i + EltStride]); - } - EltStride >>= 1; + // Step 1: unpcklps 0, 1 ==> X: + // : unpcklps 2, 3 ==> Y: + // Step 2: unpcklpd X, Y ==> <3, 2, 1, 0> + for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { + // Generate scaled UNPCKL shuffle mask. + SmallVector Mask; + for(unsigned i = 0; i != Scale; ++i) + Mask.push_back(i); + for (unsigned i = 0; i != Scale; ++i) + Mask.push_back(NumElems+i); + Mask.append(NumElems - Mask.size(), SM_SentinelUndef); + + for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) + Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); } return Ops[0]; } @@ -7859,6 +7989,60 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } +// Return true if all the operands of the given CONCAT_VECTORS node are zeros +// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0) +static bool isExpandWithZeros(const SDValue &Op) { + assert(Op.getOpcode() == ISD::CONCAT_VECTORS && + "Expand with zeros only possible in CONCAT_VECTORS nodes!"); + + for (unsigned i = 1; i < Op.getNumOperands(); i++) + if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode())) + return false; + + return true; +} + +// Returns true if the given node is a type promotion (by concatenating i1 +// zeros) of the result of a node that already zeros all upper bits of +// k-register. +static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) { + unsigned Opc = Op.getOpcode(); + + assert(Opc == ISD::CONCAT_VECTORS && + Op.getSimpleValueType().getVectorElementType() == MVT::i1 && + "Unexpected node to check for type promotion!"); + + // As long as we are concatenating zeros to the upper part of a previous node + // result, climb up the tree until a node with different opcode is + // encountered + while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) { + if (Opc == ISD::INSERT_SUBVECTOR) { + if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) && + Op.getConstantOperandVal(2) == 0) + Op = Op.getOperand(1); + else + return SDValue(); + } else { // Opc == ISD::CONCAT_VECTORS + if (isExpandWithZeros(Op)) + Op = Op.getOperand(0); + else + return SDValue(); + } + Opc = Op.getOpcode(); + } + + // Check if the first inserted node zeroes the upper bits, or an 'and' result + // of a node that zeros the upper bits (its masked version). + if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) || + (Op.getOpcode() == ISD::AND && + (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) || + isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) { + return Op; + } + + return SDValue(); +} + static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG & DAG) { @@ -7869,6 +8053,17 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); + // If this node promotes - by concatenating zeroes - the type of the result + // of a node with instruction that zeroes all upper (irrelevant) bits of the + // output register, mark it as legal and catch the pattern in instruction + // selection to avoid emitting extra insturctions (for zeroing upper bits). + if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) { + SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64); + SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted, + ZeroC); + } + SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { // Specialize the cases when all, or all but one, of the operands are undef. @@ -8009,7 +8204,7 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef Mask) { static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef Mask, SmallVectorImpl &RepeatedMask) { - int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); + auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); RepeatedMask.assign(LaneSize, -1); int Size = Mask.size(); for (int i = 0; i < Size; ++i) { @@ -8399,9 +8594,9 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, Subtarget, DAG, DL); SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; - return DAG.getNode(ISD::VSELECT, DL, VT, VMask, - DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), - ZeroVector); + return DAG.getSelect(DL, VT, VMask, + DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), + ZeroVector); } static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, @@ -8761,8 +8956,9 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); return DAG.getBitcast( - VT, DAG.getNode(ISD::VSELECT, DL, BlendVT, - DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2)); + VT, + DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), + V1, V2)); } case MVT::v16f32: case MVT::v8f64: @@ -9161,11 +9357,11 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, return DAG.getBitcast(VT, V); } -/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. -static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - const APInt &Zeroable, - SelectionDAG &DAG) { +// EXTRQ: Extract Len elements from lower half of source, starting at Idx. +// Remainder of lower half result is zero and upper half is all undef. +static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef Mask, uint64_t &BitLen, + uint64_t &BitIdx, const APInt &Zeroable) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); @@ -9173,120 +9369,133 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) - return SDValue(); + return false; - // EXTRQ: Extract Len elements from lower half of source, starting at Idx. - // Remainder of lower half result is zero and upper half is all undef. - auto LowerAsEXTRQ = [&]() { - // Determine the extraction length from the part of the - // lower half that isn't zeroable. - int Len = HalfSize; - for (; Len > 0; --Len) - if (!Zeroable[Len - 1]) - break; - assert(Len > 0 && "Zeroable shuffle mask"); + // Determine the extraction length from the part of the + // lower half that isn't zeroable. + int Len = HalfSize; + for (; Len > 0; --Len) + if (!Zeroable[Len - 1]) + break; + assert(Len > 0 && "Zeroable shuffle mask"); - // Attempt to match first Len sequential elements from the lower half. - SDValue Src; - int Idx = -1; - for (int i = 0; i != Len; ++i) { - int M = Mask[i]; - if (M < 0) - continue; - SDValue &V = (M < Size ? V1 : V2); - M = M % Size; + // Attempt to match first Len sequential elements from the lower half. + SDValue Src; + int Idx = -1; + for (int i = 0; i != Len; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + SDValue &V = (M < Size ? V1 : V2); + M = M % Size; - // The extracted elements must start at a valid index and all mask - // elements must be in the lower half. - if (i > M || M >= HalfSize) - return SDValue(); + // The extracted elements must start at a valid index and all mask + // elements must be in the lower half. + if (i > M || M >= HalfSize) + return false; - if (Idx < 0 || (Src == V && Idx == (M - i))) { - Src = V; - Idx = M - i; - continue; - } - return SDValue(); + if (Idx < 0 || (Src == V && Idx == (M - i))) { + Src = V; + Idx = M - i; + continue; } + return false; + } - if (Idx < 0) - return SDValue(); + if (!Src || Idx < 0) + return false; - assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); - int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; - int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; - return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); - }; + assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); + BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + V1 = Src; + return true; +} - if (SDValue ExtrQ = LowerAsEXTRQ()) - return ExtrQ; +// INSERTQ: Extract lowest Len elements from lower half of second source and +// insert over first source, starting at Idx. +// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } +static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef Mask, uint64_t &BitLen, + uint64_t &BitIdx) { + int Size = Mask.size(); + int HalfSize = Size / 2; + assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); - // INSERTQ: Extract lowest Len elements from lower half of second source and - // insert over first source, starting at Idx. - // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } - auto LowerAsInsertQ = [&]() { - for (int Idx = 0; Idx != HalfSize; ++Idx) { - SDValue Base; + // Upper half must be undefined. + if (!isUndefInRange(Mask, HalfSize, HalfSize)) + return false; - // Attempt to match first source from mask before insertion point. - if (isUndefInRange(Mask, 0, Idx)) { + for (int Idx = 0; Idx != HalfSize; ++Idx) { + SDValue Base; + + // Attempt to match first source from mask before insertion point. + if (isUndefInRange(Mask, 0, Idx)) { + /* EMPTY */ + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { + Base = V1; + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { + Base = V2; + } else { + continue; + } + + // Extend the extraction length looking to match both the insertion of + // the second source and the remaining elements of the first. + for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { + SDValue Insert; + int Len = Hi - Idx; + + // Match insertion. + if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { + Insert = V1; + } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { + Insert = V2; + } else { + continue; + } + + // Match the remaining elements of the lower half. + if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { /* EMPTY */ - } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { + } else if ((!Base || (Base == V1)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { Base = V1; - } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { + } else if ((!Base || (Base == V2)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, + Size + Hi)) { Base = V2; } else { continue; } - // Extend the extraction length looking to match both the insertion of - // the second source and the remaining elements of the first. - for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { - SDValue Insert; - int Len = Hi - Idx; - - // Match insertion. - if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { - Insert = V1; - } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { - Insert = V2; - } else { - continue; - } - - // Match the remaining elements of the lower half. - if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { - /* EMPTY */ - } else if ((!Base || (Base == V1)) && - isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { - Base = V1; - } else if ((!Base || (Base == V2)) && - isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, - Size + Hi)) { - Base = V2; - } else { - continue; - } - - // We may not have a base (first source) - this can safely be undefined. - if (!Base) - Base = DAG.getUNDEF(VT); - - int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; - int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; - return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); - } + BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + V1 = Base; + V2 = Insert; + return true; } + } - return SDValue(); - }; + return false; +} + +/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. +static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + SelectionDAG &DAG) { + uint64_t BitLen, BitIdx; + if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) + return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); - if (SDValue InsertQ = LowerAsInsertQ()) - return InsertQ; + if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) + return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), + V2 ? V2 : DAG.getUNDEF(VT), + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); return SDValue(); } @@ -9883,17 +10092,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); - - // Make sure the newly-created LOAD is in the same position as Ld in - // terms of dependency. We create a TokenFactor for Ld and V, - // and update uses of Ld's output chain to use the TokenFactor. - if (Ld->hasAnyUseOfValue(1)) { - SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - SDValue(Ld, 1), SDValue(V.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), - SDValue(V.getNode(), 1)); - } + DAG.makeEquivalentMemoryOrdering(Ld, V); } else if (!BroadcastFromReg) { // We can't broadcast from a vector register. return SDValue(); @@ -10844,9 +11043,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( "We need to be changing the number of flipped inputs!"); int PSHUFHalfMask[] = {0, 1, 2, 3}; std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); - V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, - MVT::v8i16, V, - getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); + V = DAG.getNode( + FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, + MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V, + getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); for (int &M : Mask) if (M >= 0 && M == FixIdx) @@ -11960,18 +12160,22 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { - // With AVX2 we should use VPERMQ/VPERMPD to allow memory folding. + // With AVX2, use VPERMQ/VPERMPD to allow memory folding. if (Subtarget.hasAVX2() && V2.isUndef()) return SDValue(); - MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, - DAG.getIntPtrConstant(0, DL)); - SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - OnlyUsesV1 ? V1 : V2, - DAG.getIntPtrConstant(0, DL)); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + // With AVX1, use vperm2f128 (below) to allow load folding. Otherwise, + // this will likely become vinsertf128 which can't fold a 256-bit memop. + if (!isa(peekThroughBitcasts(V1))) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), + VT.getVectorNumElements() / 2); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0, DL)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } } } @@ -13861,8 +14065,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond); // Now return a new VSELECT using the mask. - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2)); } // Only some types will be legal on some subtargets. If we can emit a legal @@ -13946,7 +14149,6 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const SDValue Idx = Op.getOperand(1); MVT EltVT = Op.getSimpleValueType(); - assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector"); assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) && "Unexpected vector type in ExtractBitFromMaskVector"); @@ -13980,8 +14182,8 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, DAG.getConstant(MaxSift, dl, MVT::i8)); - return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec, - DAG.getIntPtrConstant(0, dl)); + return DAG.getNode(X86ISD::VEXTRACT, dl, Op.getSimpleValueType(), Vec, + DAG.getIntPtrConstant(0, dl)); } SDValue @@ -13992,7 +14194,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT VecVT = Vec.getSimpleValueType(); SDValue Idx = Op.getOperand(1); - if (Op.getSimpleValueType() == MVT::i1) + if (VecVT.getVectorElementType() == MVT::i1) return ExtractBitFromMaskVector(Op, DAG); if (!isa(Idx)) { @@ -14163,10 +14365,13 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { return EltInVec; } - // Insertion of one bit into first or last position - // can be done with two SHIFTs + OR. + // Insertion of one bit into first position if (IdxVal == 0 ) { - // EltInVec already at correct index and other bits are 0. + // Clean top bits of vector. + EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, + DAG.getConstant(NumElems - 1, dl, MVT::i8)); + EltInVec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, EltInVec, + DAG.getConstant(NumElems - 1, dl, MVT::i8)); // Clean the first bit in source vector. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, DAG.getConstant(1 , dl, MVT::i8)); @@ -14175,6 +14380,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } + // Insertion of one bit into last position if (IdxVal == NumElems -1) { // Move the bit to the last position inside the vector. EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, @@ -14220,9 +14426,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // If we are inserting a element, see if we can do this more efficiently with // a blend shuffle with a rematerializable vector than a costly integer // insertion. - // TODO: pre-SSE41 targets will tend to use bit masking - this could still - // be beneficial if we are inserting several zeros and can combine the masks. - if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) { + if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && + 16 <= EltVT.getSizeInBits()) { SmallVector BlendMask; for (unsigned i = 0; i != NumElts; ++i) BlendMask.push_back(i == IdxVal ? i + NumElts : i); @@ -14370,6 +14575,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, unsigned IdxVal = cast(Idx)->getZExtValue(); MVT ResVT = Op.getSimpleValueType(); + // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond + // would result with: v1i1 = extract_subvector(vXi1, idx). + // Lower these into extract_vector_elt which is already selectable. + if (ResVT == MVT::v1i1) { + assert(Subtarget.hasAVX512() && + "Boolean EXTRACT_SUBVECTOR requires AVX512"); + + MVT EltVT = ResVT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT LegalVT = + (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT(); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res); + } + assert((In.getSimpleValueType().is256BitVector() || In.getSimpleValueType().is512BitVector()) && "Can only extract from 256-bit or 512-bit vectors"); @@ -15387,8 +15607,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Get a pointer to FF if the sign bit was set, or to 0 otherwise. SDValue Zero = DAG.getIntPtrConstant(0, dl); SDValue Four = DAG.getIntPtrConstant(4, dl); - SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, - Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); // Load the value out, extending it from f32 to f80. @@ -15660,7 +15879,7 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SDValue Zero = DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT); - SDValue SelectedVal = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero); + SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero); if (VT == ExtVT) return SelectedVal; return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal); @@ -16293,6 +16512,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, case ISD::SHL: if (Op.getNode()->getFlags().hasNoSignedWrap()) break; + LLVM_FALLTHROUGH; default: NeedOF = true; break; @@ -17000,7 +17220,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); - ISD::CondCode SetCCOpcode = cast(CC)->get(); + ISD::CondCode Cond = cast(CC)->get(); bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); @@ -17027,18 +17247,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is // available. SDValue Cmp; - unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); + unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1); if (SSECC == 8) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; - if (SetCCOpcode == ISD::SETUEQ) { + if (Cond == ISD::SETUEQ) { CC0 = 3; // UNORD CC1 = 0; // EQ CombineOpc = Opc == X86ISD::CMPP ? static_cast(X86ISD::FOR) : static_cast(ISD::OR); } else { - assert(SetCCOpcode == ISD::SETONE); + assert(Cond == ISD::SETONE); CC0 = 7; // ORD CC1 = 4; // NEQ CombineOpc = Opc == X86ISD::CMPP ? static_cast(X86ISD::FAND) : @@ -17085,7 +17305,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // 2. The original operand type has been promoted to a 256-bit vector. // // Note that condition 2. only applies for AVX targets. - SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); + SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond); return DAG.getZExtOrTrunc(NewOp, dl, VT); } @@ -17125,7 +17345,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) { // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; - switch (SetCCOpcode) { + switch (Cond) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETULT: case ISD::SETLT: CmpMode = 0x00; break; @@ -17140,60 +17360,55 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } // Are we comparing unsigned or signed integers? - unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) - ? X86ISD::VPCOMU : X86ISD::VPCOM; + unsigned Opc = + ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CmpMode, dl, MVT::i8)); } - // We are handling one of the integer comparisons here. Since SSE only has + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc; - bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; - bool Subus = false; - - switch (SetCCOpcode) { - default: llvm_unreachable("Unexpected SETCC condition"); - case ISD::SETNE: Invert = true; - case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; - case ISD::SETLT: Swap = true; - case ISD::SETGT: Opc = X86ISD::PCMPGT; break; - case ISD::SETGE: Swap = true; - case ISD::SETLE: Opc = X86ISD::PCMPGT; - Invert = true; break; - case ISD::SETULT: Swap = true; - case ISD::SETUGT: Opc = X86ISD::PCMPGT; - FlipSigns = true; break; - case ISD::SETUGE: Swap = true; - case ISD::SETULE: Opc = X86ISD::PCMPGT; - FlipSigns = true; Invert = true; break; - } + unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ + : X86ISD::PCMPGT; + bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || + Cond == ISD::SETGE || Cond == ISD::SETUGE; + bool Invert = Cond == ISD::SETNE || + (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); + + // If both operands are known non-negative, then an unsigned compare is the + // same as a signed compare and there's no need to flip signbits. + // TODO: We could check for more general simplifications here since we're + // computing known bits. + bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && + !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); - bool hasMinMax = - (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) - || (Subtarget.hasSSE2() && (VET == MVT::i8)); - - if (hasMinMax) { - switch (SetCCOpcode) { + bool HasMinMax = + (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || + (Subtarget.hasSSE2() && (VET == MVT::i8)); + bool MinMax = false; + if (HasMinMax) { + switch (Cond) { default: break; case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break; case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break; } - if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } + if (MinMax) + Swap = Invert = FlipSigns = false; } - bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); - if (!MinMax && hasSubus) { + bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); + bool Subus = false; + if (!MinMax && HasSubus) { // As another special case, use PSUBUS[BW] when it's profitable. E.g. for // Op0 u<= Op1: // t = psubus Op0, Op1 // pcmpeq t, <0..0> - switch (SetCCOpcode) { + switch (Cond) { default: break; case ISD::SETULT: { // If the comparison is against a constant we can turn this into a @@ -17323,8 +17538,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); - assert(((!Subtarget.hasAVX512() && VT == MVT::i8) || (VT == MVT::i1)) - && "SetCC type must be 8-bit or 1-bit integer"); + assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc dl(Op); @@ -17388,19 +17602,24 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } -SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); SDValue Cond = Op.getOperand(3); SDLoc DL(Op); - assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); + assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."); X86::CondCode CC = TranslateIntegerX86CC(cast(Cond)->get()); - assert(Carry.getOpcode() != ISD::CARRY_FALSE); + // Recreate the carry if needed. + EVT CarryVT = Carry.getValueType(); + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), + Carry, DAG.getConstant(NegOne, DL, CarryVT)); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry); + SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1)); SDValue SetCC = getSETCC(CC, Cmp.getValue(1), DL, DAG); if (Op.getSimpleValueType() == MVT::i1) return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); @@ -17458,7 +17677,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (SSECC != 8) { if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CondOp0, + SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); return DAG.getNode(VT.isVector() ? X86ISD::SELECT : X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); @@ -17494,7 +17713,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; VCmp = DAG.getBitcast(VCmpVT, VCmp); - SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); + SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VSel, DAG.getIntPtrConstant(0, DL)); @@ -17506,9 +17725,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } // AVX512 fallback is to lower selects of scalar floats to masked moves. - if (Cond.getValueType() == MVT::i1 && (VT == MVT::f64 || VT == MVT::f32) && - Subtarget.hasAVX512()) - return DAG.getNode(X86ISD::SELECTS, DL, VT, Cond, Op1, Op2); + if ((VT == MVT::f64 || VT == MVT::f32) && Subtarget.hasAVX512()) { + SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond); + return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); + } if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { SDValue Op1Scalar; @@ -17522,9 +17742,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) Op2Scalar = Op2.getOperand(0); if (Op1Scalar.getNode() && Op2Scalar.getNode()) { - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, - Op1Scalar.getValueType(), - Cond, Op1Scalar, Op2Scalar); + SDValue newSelect = DAG.getSelect(DL, Op1Scalar.getValueType(), Cond, + Op1Scalar, Op2Scalar); if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, newSelect); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); @@ -17539,8 +17758,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getUNDEF(MVT::v8i1), Op1, zeroConst); Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1, DAG.getUNDEF(MVT::v8i1), Op2, zeroConst); - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::v8i1, - Cond, Op1, Op2); + SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst); } @@ -17571,23 +17789,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; - SDValue CmpOp0 = Cmp.getOperand(0); + // Apply further optimizations for special cases // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb if (isNullConstant(Y) && - (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { - SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, - DAG.getConstant(0, DL, - CmpOp0.getValueType()), - CmpOp0); - SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, DL, MVT::i8), - SDValue(Neg.getNode(), 1)); - return Res; - } + (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { + SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); + SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType()); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0); + SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, DL, MVT::i8), + SDValue(Neg.getNode(), 1)); + return Res; + } Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); @@ -17809,7 +18025,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, } else { SDValue NegOne = getOnesVector(ExtVT, DAG, dl); SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl); - V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); + V = DAG.getSelect(dl, ExtVT, In, NegOne, Zero); if (ExtVT == VT) return V; } @@ -18598,8 +18814,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); + bool EmitStackProbe = !getStackProbeSymbolName(MF).empty(); bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || - SplitStack; + SplitStack || EmitStackProbe; SDLoc dl(Op); // Get the inputs. @@ -19051,8 +19268,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, /// \brief Creates an SDNode for a predicated scalar operation. /// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc). -/// The mask is coming as MVT::i8 and it should be truncated -/// to MVT::i1 while lowering masking intrinsics. +/// The mask is coming as MVT::i8 and it should be transformed +/// to MVT::v1i1 while lowering masking intrinsics. /// The main difference between ScalarMaskingNode and VectorMaskingNode is using /// "X86select" instead of "vselect". We just can't create the "vselect" node /// for a scalar instruction. @@ -19067,9 +19284,8 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - // The mask should be of type MVT::i1 - SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); + SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask); if (Op.getOpcode() == X86ISD::FSETCCM || Op.getOpcode() == X86ISD::FSETCCM_RND) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); @@ -19122,7 +19338,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, // registration, or the .set_setframe offset. MCSymbol *OffsetSym = MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(Fn->getName())); + GlobalValue::dropLLVMManglingEscape(Fn->getName())); SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); SDValue ParentFrameOffset = DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); @@ -19510,10 +19726,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Src1 = Op.getOperand(1); SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); - SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm); + SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); - return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, FPclassMask); + return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, FPclassMask, + DAG.getIntPtrConstant(0, dl)); } case CMP_MASK: case CMP_MASK_CC: { @@ -19573,18 +19790,18 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); if (!isRoundModeCurDirection(Rnd)) - Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd); + Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd); } //default rounding mode if(!Cmp.getNode()) - Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC); + Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); - - return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, CmpMask); + return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, CmpMask, + DAG.getIntPtrConstant(0, dl)); } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; @@ -19632,13 +19849,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue FCmp; if (isRoundModeCurDirection(Sae)) - FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8)); + FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, + DAG.getConstant(CondVal, dl, MVT::i8)); else - FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8), Sae); - // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg" - return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp); + FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS, + DAG.getConstant(CondVal, dl, MVT::i8), Sae); + return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i32, FCmp, + DAG.getIntPtrConstant(0, dl)); } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), @@ -19724,12 +19941,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } - case CONVERT_MASK_TO_VEC: { - SDValue Mask = Op.getOperand(1); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return DAG.getNode(IntrData->Opc0, dl, VT, VMask); - } case BRCST_SUBVEC_TO_VEC: { SDValue Src = Op.getOperand(1); SDValue Passthru = Op.getOperand(2); @@ -19973,7 +20184,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Op1 = Op.getOperand(1); auto *Fn = cast(cast(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( - GlobalValue::getRealLinkageName(Fn->getName())); + GlobalValue::dropLLVMManglingEscape(Fn->getName())); // Generate a simple absolute symbol reference. This intrinsic is only // supported on 32-bit Windows, which isn't PIC. @@ -20012,7 +20223,10 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); EVT MaskVT = Mask.getValueType(); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); @@ -20034,7 +20248,10 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); @@ -20059,7 +20276,10 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -20078,7 +20298,10 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -20448,8 +20671,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, } // ADC/ADCX/SBB case ADX: { - SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other); - SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other); + SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); + SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32); SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), DAG.getConstant(-1, dl, MVT::i8)); SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), @@ -21826,8 +22049,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); // i64 SRA needs to be performed as partial shifts. - if ((VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64)) && - Op.getOpcode() == ISD::SRA && !Subtarget.hasXOP()) + if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || + (Subtarget.hasInt256() && VT == MVT::v4i64)) && + Op.getOpcode() == ISD::SRA) return ArithmeticShiftRight64(ShiftAmt); if (VT == MVT::v16i8 || @@ -21888,10 +22112,19 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } // Special case in 32-bit mode, where i64 is expanded into high and low parts. + // TODO: Replace constant extraction with getTargetConstantBitsFromNode. if (!Subtarget.is64Bit() && !Subtarget.hasXOP() && (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) || (Subtarget.hasAVX512() && VT == MVT::v8i64))) { + // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant. + unsigned SubVectorScale = 1; + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SubVectorScale = + Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits(); + Amt = Amt.getOperand(0); + } + // Peek through any splat that was introduced for i64 shift vectorization. int SplatIndex = -1; if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) @@ -21908,7 +22141,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / - VT.getVectorNumElements(); + (SubVectorScale * VT.getVectorNumElements()); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); @@ -22282,23 +22515,21 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel); - return DAG.getBitcast(SelVT, - DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } else if (Subtarget.hasSSE41()) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); - return DAG.getBitcast(SelVT, - DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl); SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); - return DAG.getNode(ISD::VSELECT, dl, SelVT, C, V0, V1); + return DAG.getSelect(dl, SelVT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 5; @@ -22420,15 +22651,14 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V0 = DAG.getBitcast(ExtVT, V0); V1 = DAG.getBitcast(ExtVT, V1); Sel = DAG.getBitcast(ExtVT, Sel); - return DAG.getBitcast( - VT, DAG.getNode(ISD::VSELECT, dl, ExtVT, Sel, V0, V1)); + return DAG.getBitcast(VT, DAG.getSelect(dl, ExtVT, Sel, V0, V1)); } // On pre-SSE41 targets we splat the sign bit - a negative value will // set all bits of the lanes to true and VSELECT uses that in // its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue C = DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT)); - return DAG.getNode(ISD::VSELECT, dl, VT, C, V0, V1); + return DAG.getSelect(dl, VT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 12; @@ -22483,10 +22713,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, SDLoc DL(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); + unsigned Opcode = Op.getOpcode(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + if (Subtarget.hasAVX512()) { + // Attempt to rotate by immediate. + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) { + if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) { + return EltBits[0] == V; + })) { + unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); + uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits); + return DAG.getNode(Op, DL, VT, R, + DAG.getConstant(RotateAmt, DL, MVT::i8)); + } + } + + // Else, fall-back on VPROLV/VPRORV. + return Op; + } assert(VT.isVector() && "Custom lowering only for vector rotates!"); assert(Subtarget.hasXOP() && "XOP support required for vector rotates!"); - assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported"); + assert((Opcode == ISD::ROTL) && "Only ROTL supported"); // XOP has 128-bit vector variable + immediate rotates. // +ve/-ve Amt = rotate left/right. @@ -22501,7 +22752,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (auto *BVAmt = dyn_cast(Amt)) { if (auto *RotateConst = BVAmt->getConstantSplatNode()) { uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue(); - assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range"); + assert(RotateAmt < EltSizeInBits && "Rotation out of range"); return DAG.getNode(X86ISD::VPROTI, DL, VT, R, DAG.getConstant(RotateAmt, DL, MVT::i8)); } @@ -22668,7 +22919,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { auto Builder = IRBuilder<>(AI); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - auto SynchScope = AI->getSynchScope(); + auto SSID = AI->getSyncScopeID(); // We must restrict the ordering to avoid generating loads with Release or // ReleaseAcquire orderings. auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); @@ -22690,7 +22941,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // otherwise, we might be able to be more aggressive on relaxed idempotent // rmw. In practice, they do not look useful, so we don't try to be // especially clever. - if (SynchScope == SingleThread) + if (SSID == SyncScope::SingleThread) // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at // the IR level, so we must wrap it in an intrinsic. return nullptr; @@ -22709,7 +22960,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // Finally we can emit the atomic load. LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr, AI->getType()->getPrimitiveSizeInBits()); - Loaded->setAtomic(Order, SynchScope); + Loaded->setAtomic(Order, SSID); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); return Loaded; @@ -22720,13 +22971,13 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); AtomicOrdering FenceOrdering = static_cast( cast(Op.getOperand(1))->getZExtValue()); - SynchronizationScope FenceScope = static_cast( + SyncScope::ID FenceSSID = static_cast( cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && - FenceScope == CrossThread) { + FenceSSID == SyncScope::System) { if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); @@ -23054,6 +23305,20 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, SDLoc DL(Op.getNode()); SDValue Op0 = Op.getOperand(0); + // TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions. + if (Subtarget.hasVPOPCNTDQ()) { + if (VT == MVT::v8i16) { + Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v8i64, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op); + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); + } + if (VT == MVT::v16i8 || VT == MVT::v16i16) { + Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v16i32, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op); + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); + } + } + if (!Subtarget.hasSSSE3()) { // We can't use the fast LUT approach, so fall back on vectorized bitmath. assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!"); @@ -23260,32 +23525,6 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { return Op; } -static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getNode()->getSimpleValueType(0); - - // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - SDVTList VTs = DAG.getVTList(VT, MVT::i32); - - unsigned Opc; - bool ExtraOp = false; - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid code"); - case ISD::ADDC: Opc = X86ISD::ADD; break; - case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break; - case ISD::SUBC: Opc = X86ISD::SUB; break; - case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break; - } - - if (!ExtraOp) - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), - Op.getOperand(1)); - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), - Op.getOperand(1), Op.getOperand(2)); -} - static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { SDNode *N = Op.getNode(); MVT VT = N->getSimpleValueType(0); @@ -23388,8 +23627,6 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && "Unexpected request for vector widening"); - EVT EltVT = NVT.getVectorElementType(); - SDLoc dl(InOp); if (InOp.getOpcode() == ISD::CONCAT_VECTORS && InOp.getNumOperands() == 2) { @@ -23407,6 +23644,8 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, for (unsigned i = 0; i < InNumElts; ++i) Ops.push_back(InOp.getOperand(i)); + EVT EltVT = InOp.getOperand(0).getValueType(); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) @@ -23681,6 +23920,57 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, SDValue RetOps[] = {Exract, NewGather.getValue(1)}; return DAG.getMergeValues(RetOps, dl); } + if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) { + // There is a special case when the return type is v2i32 is illegal and + // the type legaizer extended it to v2i64. Without this conversion we end up + // with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD. + // In order to avoid this situation, we'll build an X86 specific Gather node + // with index v2i64 and value type v4i32. + assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 && + "Unexpected type in masked gather"); + Src0 = DAG.getVectorShuffle(MVT::v4i32, dl, + DAG.getBitcast(MVT::v4i32, Src0), + DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 }); + // The mask should match the destination type. Extending mask with zeroes + // is not necessary since instruction itself reads only two values from + // memory. + Mask = ExtendToType(Mask, MVT::v4i1, DAG, false); + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getTargetMemSDNode( + DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(), + N->getMemOperand()); + + SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64, + NewGather.getValue(0), DAG); + SDValue RetOps[] = { Sext, NewGather.getValue(1) }; + return DAG.getMergeValues(RetOps, dl); + } + if (N->getMemoryVT() == MVT::v2f32 && Subtarget.hasVLX()) { + // This transformation is for optimization only. + // The type legalizer extended mask and index to 4 elements vector + // in order to match requirements of the common gather node - same + // vector width of index and value. X86 Gather node allows mismatch + // of vector width in order to select more optimal instruction at the + // end. + assert(VT == MVT::v4f32 && Src0.getValueType() == MVT::v4f32 && + "Unexpected type in masked gather"); + if (Mask.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) && + Index.getOpcode() == ISD::CONCAT_VECTORS && + Index.getOperand(1).isUndef()) { + Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false); + Index = Index.getOperand(0); + } else + return Op; + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getTargetMemSDNode( + DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(), + N->getMemOperand()); + + SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) }; + return DAG.getMergeValues(RetOps, dl); + + } return Op; } @@ -23776,7 +24066,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::SETCCE: return LowerSETCCE(Op, DAG); + case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); @@ -23809,7 +24099,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG); case ISD::UMUL_LOHI: case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG); - case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG); + case ISD::ROTL: + case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG); @@ -23821,10 +24112,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); - case ISD::ADDC: - case ISD::ADDE: - case ISD::SUBC: - case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADDCARRY: case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: @@ -24488,6 +24775,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; case X86ISD::LWPINS: return "X86ISD::LWPINS"; + case X86ISD::MGATHER: return "X86ISD::MGATHER"; } return nullptr; } @@ -24712,16 +25000,22 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, // xbegin sinkMBB // // mainMBB: - // eax = -1 + // s0 = -1 + // + // fallBB: + // eax = # XABORT_DEF + // s1 = eax // // sinkMBB: - // v = eax + // v = phi(s0/mainBB, s1/fallBB) MachineBasicBlock *thisMBB = MBB; MachineFunction *MF = MBB->getParent(); MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); + MF->insert(I, fallMBB); MF->insert(I, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. @@ -24729,25 +25023,40 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned DstReg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned fallDstReg = MRI.createVirtualRegister(RC); + // thisMBB: - // xbegin sinkMBB + // xbegin fallMBB // # fallthrough to mainMBB - // # abortion to sinkMBB - BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB); + // # abortion to fallMBB + BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB); thisMBB->addSuccessor(mainMBB); - thisMBB->addSuccessor(sinkMBB); + thisMBB->addSuccessor(fallMBB); // mainMBB: - // EAX = -1 - BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1); + // mainDstReg := -1 + BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1); + BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); mainMBB->addSuccessor(sinkMBB); - // sinkMBB: - // EAX is live into the sinkMBB - sinkMBB->addLiveIn(X86::EAX); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), - MI.getOperand(0).getReg()) + // fallMBB: + // ; pseudo instruction to model hardware's definition from XABORT + // EAX := XABORT_DEF + // fallDstReg := EAX + BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF)); + BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg) .addReg(X86::EAX); + fallMBB->addSuccessor(sinkMBB); + + // sinkMBB: + // DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB) + BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(fallDstReg).addMBB(fallMBB); MI.eraseFromParent(); return sinkMBB; @@ -26737,6 +27046,17 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( return Tmp; } + case X86ISD::VSHLI: { + SDValue Src = Op.getOperand(0); + unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); + APInt ShiftVal = cast(Op.getOperand(1))->getAPIntValue(); + if (ShiftVal.uge(VTBits)) + return VTBits; // Shifted all bits out --> zero. + if (ShiftVal.uge(Tmp)) + return 1; // Shifted all sign bits out --> unknown. + return Tmp - ShiftVal.getZExtValue(); + } + case X86ISD::VSRAI: { SDValue Src = Op.getOperand(0); unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); @@ -26898,49 +27218,78 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); + unsigned InputSizeInBits = MaskVT.getSizeInBits(); + unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; + MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); - bool ContainsZeros = false; - APInt Zeroable(NumMaskElts, false); - for (unsigned i = 0; i != NumMaskElts; ++i) { - int M = Mask[i]; - if (isUndefOrZero(M)) - Zeroable.setBit(i); - ContainsZeros |= (M == SM_SentinelZero); - } + bool ContainsZeros = + llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); - // Attempt to match against byte/bit shifts. - // FIXME: Add 512-bit support. - if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || - (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { - int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, - MaskVT.getScalarSizeInBits(), Mask, - 0, Zeroable, Subtarget); - if (0 < ShiftAmt) { - PermuteImm = (unsigned)ShiftAmt; + // Handle VPERMI/VPERMILPD vXi64/vXi64 patterns. + if (!ContainsZeros && MaskScalarSizeInBits == 64) { + // Check for lane crossing permutes. + if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { + // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). + if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); + PermuteImm = getV4X86ShuffleImm(Mask); + return true; + } + if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) { + SmallVector RepeatedMask; + if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); + PermuteImm = getV4X86ShuffleImm(RepeatedMask); + return true; + } + } + } else if (AllowFloatDomain && Subtarget.hasAVX()) { + // VPERMILPD can permute with a non-repeating shuffle. + Shuffle = X86ISD::VPERMILPI; + ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); + PermuteImm = 0; + for (int i = 0, e = Mask.size(); i != e; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); + PermuteImm |= (M & 1) << i; + } return true; } } - // Ensure we don't contain any zero elements. - if (ContainsZeros) - return false; - - assert(llvm::all_of(Mask, [&](int M) { - return SM_SentinelUndef <= M && M < (int)NumMaskElts; - }) && "Expected unary shuffle"); - - unsigned InputSizeInBits = MaskVT.getSizeInBits(); - unsigned MaskScalarSizeInBits = InputSizeInBits / Mask.size(); - MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); + // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns. + // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we + // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). + if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) && + !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) { + SmallVector RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { + // Narrow the repeated mask to create 32-bit element permutes. + SmallVector WordMask = RepeatedMask; + if (MaskScalarSizeInBits == 64) + scaleShuffleMask(2, RepeatedMask, WordMask); + + Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); + ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); + ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); + PermuteImm = getV4X86ShuffleImm(WordMask); + return true; + } + } - // Handle PSHUFLW/PSHUFHW repeated patterns. - if (MaskScalarSizeInBits == 16) { + // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns. + if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { ArrayRef LoMask(Mask.data() + 0, 4); @@ -26968,78 +27317,23 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, PermuteImm = getV4X86ShuffleImm(OffsetHiMask); return true; } - - return false; } - return false; } - // We only support permutation of 32/64 bit elements after this. - if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64) - return false; - - // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we - // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). - if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX()) - return false; - - // Pre-AVX2 we must use float shuffles on 256-bit vectors. - if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) { - AllowFloatDomain = true; - AllowIntDomain = false; - } - - // Check for lane crossing permutes. - if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { - // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). - if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) { - Shuffle = X86ISD::VPERMI; - ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); - PermuteImm = getV4X86ShuffleImm(Mask); + // Attempt to match against byte/bit shifts. + // FIXME: Add 512-bit support. + if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { + int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, + MaskScalarSizeInBits, Mask, + 0, Zeroable, Subtarget); + if (0 < ShiftAmt) { + PermuteImm = (unsigned)ShiftAmt; return true; } - if (Subtarget.hasAVX512() && MaskVT.is512BitVector() && Mask.size() == 8) { - SmallVector RepeatedMask; - if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { - Shuffle = X86ISD::VPERMI; - ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); - PermuteImm = getV4X86ShuffleImm(RepeatedMask); - return true; - } - } - return false; - } - - // VPERMILPD can permute with a non-repeating shuffle. - if (AllowFloatDomain && MaskScalarSizeInBits == 64) { - Shuffle = X86ISD::VPERMILPI; - ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); - PermuteImm = 0; - for (int i = 0, e = Mask.size(); i != e; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); - PermuteImm |= (M & 1) << i; - } - return true; } - // We need a repeating shuffle mask for VPERMILPS/PSHUFD. - SmallVector RepeatedMask; - if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) - return false; - - // Narrow the repeated mask for 32-bit element permutes. - SmallVector WordMask = RepeatedMask; - if (MaskScalarSizeInBits == 64) - scaleShuffleMask(2, RepeatedMask, WordMask); - - Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD); - ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32); - ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); - PermuteImm = getV4X86ShuffleImm(WordMask); - return true; + return false; } // Attempt to match a combined unary shuffle mask against supported binary @@ -27101,6 +27395,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, SDLoc &DL, @@ -27186,11 +27481,6 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // Attempt to combine to INSERTPS. if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && MaskVT.is128BitVector()) { - APInt Zeroable(4, 0); - for (unsigned i = 0; i != NumMaskElts; ++i) - if (Mask[i] < 0) - Zeroable.setBit(i); - if (Zeroable.getBoolValue() && matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { Shuffle = X86ISD::INSERTPS; @@ -27376,7 +27666,14 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. bool AllowFloatDomain = FloatDomain || (Depth > 3); - bool AllowIntDomain = !FloatDomain || (Depth > 3); + bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && + (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); + + // Determine zeroable mask elements. + APInt Zeroable(NumMaskElts, 0); + for (unsigned i = 0; i != NumMaskElts; ++i) + if (isUndefOrZero(Mask[i])) + Zeroable.setBit(i); if (UnaryShuffle) { // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load @@ -27410,7 +27707,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) @@ -27446,7 +27743,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { @@ -27466,6 +27763,45 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } + // Typically from here on, we need an integer version of MaskVT. + MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits); + IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts); + + // Annoyingly, SSE4A instructions don't map into the above match helpers. + if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { + uint64_t BitLen, BitIdx; + if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, + Zeroable)) { + if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) + return false; // Nothing to do! + V1 = DAG.getBitcast(IntMaskVT, V1); + DCI.AddToWorklist(V1.getNode()); + Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + + if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { + if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + return false; // Nothing to do! + V1 = DAG.getBitcast(IntMaskVT, V1); + DCI.AddToWorklist(V1.getNode()); + V2 = DAG.getBitcast(IntMaskVT, V2); + DCI.AddToWorklist(V2.getNode()); + Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + } + // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) @@ -27486,9 +27822,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27517,9 +27851,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (Mask[i] == SM_SentinelZero) Mask[i] = NumMaskElts + i; - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27544,9 +27876,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); V1 = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(V1.getNode()); @@ -27605,8 +27935,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); VPermIdx.push_back(Idx); } - MVT VPermMaskVT = MVT::getVectorVT(MVT::i32, NumMaskElts); - SDValue VPermMask = DAG.getBuildVector(VPermMaskVT, DL, VPermIdx); + SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27629,8 +27958,6 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumLanes = MaskVT.getSizeInBits() / 128; unsigned NumEltsPerLane = NumMaskElts / NumLanes; SmallVector VPerm2Idx; - MVT MaskIdxSVT = MVT::getIntegerVT(MaskVT.getScalarSizeInBits()); - MVT MaskIdxVT = MVT::getVectorVT(MaskIdxSVT, NumMaskElts); unsigned M2ZImm = 0; for (int M : Mask) { if (M == SM_SentinelUndef) { @@ -27650,7 +27977,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(MaskVT, V2); DCI.AddToWorklist(V2.getNode()); - SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, MaskIdxVT, DAG, DL, true); + SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPerm2MaskOp.getNode()); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, DAG.getConstant(M2ZImm, DL, MVT::i8)); @@ -27885,7 +28212,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, // Extract target shuffle mask and resolve sentinels and inputs. SmallVector OpMask; SmallVector OpInputs; - if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask)) + if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) return false; assert(OpInputs.size() <= 2 && "Too many shuffle inputs"); @@ -27919,28 +28246,45 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, OpMask.size() % RootMask.size() == 0) || OpMask.size() == RootMask.size()) && "The smaller number of elements must divide the larger."); - int MaskWidth = std::max(OpMask.size(), RootMask.size()); - int RootRatio = std::max(1, OpMask.size() / RootMask.size()); - int OpRatio = std::max(1, RootMask.size() / OpMask.size()); - assert(((RootRatio == 1 && OpRatio == 1) || - (RootRatio == 1) != (OpRatio == 1)) && + + // This function can be performance-critical, so we rely on the power-of-2 + // knowledge that we have about the mask sizes to replace div/rem ops with + // bit-masks and shifts. + assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes"); + unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size()); + unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size()); + + unsigned MaskWidth = std::max(OpMask.size(), RootMask.size()); + unsigned RootRatio = std::max(1, OpMask.size() >> RootMaskSizeLog2); + unsigned OpRatio = std::max(1, RootMask.size() >> OpMaskSizeLog2); + assert((RootRatio == 1 || OpRatio == 1) && "Must not have a ratio for both incoming and op masks!"); - SmallVector Mask((unsigned)MaskWidth, SM_SentinelUndef); + assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); + unsigned RootRatioLog2 = countTrailingZeros(RootRatio); + unsigned OpRatioLog2 = countTrailingZeros(OpRatio); + + SmallVector Mask(MaskWidth, SM_SentinelUndef); // Merge this shuffle operation's mask into our accumulated mask. Note that // this shuffle's mask will be the first applied to the input, followed by the // root mask to get us all the way to the root value arrangement. The reason // for this order is that we are recursing up the operation chain. - for (int i = 0; i < MaskWidth; ++i) { - int RootIdx = i / RootRatio; + for (unsigned i = 0; i < MaskWidth; ++i) { + unsigned RootIdx = i >> RootRatioLog2; if (RootMask[RootIdx] < 0) { // This is a zero or undef lane, we're done. Mask[i] = RootMask[RootIdx]; continue; } - int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio; + unsigned RootMaskedIdx = + RootRatio == 1 + ? RootMask[RootIdx] + : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); // Just insert the scaled root mask value if it references an input other // than the SrcOp we're currently inserting. @@ -27950,9 +28294,8 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, continue; } - RootMaskedIdx %= MaskWidth; - - int OpIdx = RootMaskedIdx / OpRatio; + RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); + unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we // are using. @@ -27961,9 +28304,12 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, } // Ok, we have non-zero lanes, map them through to one of the Op's inputs. - int OpMaskedIdx = OpMask[OpIdx] * OpRatio + RootMaskedIdx % OpRatio; - OpMaskedIdx %= MaskWidth; + unsigned OpMaskedIdx = + OpRatio == 1 + ? OpMask[OpIdx] + : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1)); + OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); if (OpMask[OpIdx] < (int)OpMask.size()) { assert(0 <= InputIdx0 && "Unknown target shuffle input"); OpMaskedIdx += InputIdx0 * MaskWidth; @@ -28784,7 +29130,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, } if (Elts.size() == VT.getVectorNumElements()) - if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true)) return LD; // For AVX2, we sometimes want to combine @@ -28916,12 +29263,118 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } +// Try to match patterns such as +// (i16 bitcast (v16i1 x)) +// -> +// (i16 movmsk (16i8 sext (v16i1 x))) +// before the illegal vector is scalarized on subtargets that don't have legal +// vxi1 types. +static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, + const X86Subtarget &Subtarget) { + EVT VT = BitCast.getValueType(); + SDValue N0 = BitCast.getOperand(0); + EVT VecVT = N0->getValueType(0); + + if (!VT.isScalarInteger() || !VecVT.isSimple()) + return SDValue(); + + // With AVX512 vxi1 types are legal and we prefer using k-regs. + // MOVMSK is supported in SSE2 or later. + if (Subtarget.hasAVX512() || !Subtarget.hasSSE2()) + return SDValue(); + + // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and + // v8f64. So all legal 128-bit and 256-bit vectors are covered except for + // v8i16 and v16i16. + // For these two cases, we can shuffle the upper element bytes to a + // consecutive sequence at the start of the vector and treat the results as + // v16i8 or v32i8, and for v61i8 this is the preferable solution. However, + // for v16i16 this is not the case, because the shuffle is expensive, so we + // avoid sign-extending to this type entirely. + // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: + // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) + MVT SExtVT; + MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE; + switch (VecVT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i1: + SExtVT = MVT::v2i64; + FPCastVT = MVT::v2f64; + break; + case MVT::v4i1: + SExtVT = MVT::v4i32; + FPCastVT = MVT::v4f32; + // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) + // sign-extend to a 256-bit operation to avoid truncation. + if (N0->getOpcode() == ISD::SETCC && + N0->getOperand(0)->getValueType(0).is256BitVector() && + Subtarget.hasInt256()) { + SExtVT = MVT::v4i64; + FPCastVT = MVT::v4f64; + } + break; + case MVT::v8i1: + SExtVT = MVT::v8i16; + // For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)), + // sign-extend to a 256-bit operation to match the compare. + // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over + // 256-bit because the shuffle is cheaper than sign extending the result of + // the compare. + if (N0->getOpcode() == ISD::SETCC && + N0->getOperand(0)->getValueType(0).is256BitVector() && + Subtarget.hasInt256()) { + SExtVT = MVT::v8i32; + FPCastVT = MVT::v8f32; + } + break; + case MVT::v16i1: + SExtVT = MVT::v16i8; + // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)), + // it is not profitable to sign-extend to 256-bit because this will + // require an extra cross-lane shuffle which is more expensive than + // truncating the result of the compare to 128-bits. + break; + case MVT::v32i1: + // TODO: Handle pre-AVX2 cases by splitting to two v16i1's. + if (!Subtarget.hasInt256()) + return SDValue(); + SExtVT = MVT::v32i8; + break; + }; + + SDLoc DL(BitCast); + SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT); + if (SExtVT == MVT::v8i16) { + V = DAG.getBitcast(MVT::v16i8, V); + V = DAG.getVectorShuffle( + MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8), + {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1}); + } else + assert(SExtVT.getScalarType() != MVT::i16 && + "Vectors of i16 must be shuffled"); + if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE) + V = DAG.getBitcast(FPCastVT, V); + V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); + return DAG.getZExtOrTrunc(V, DL, VT); +} + static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = N0.getValueType(); + // Try to match patterns such as + // (i16 bitcast (v16i1 x)) + // -> + // (i16 movmsk (16i8 sext (v16i1 x))) + // before the setcc result is scalarized on subtargets that don't have legal + // vxi1 types. + if (DCI.isBeforeLegalize()) + if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget)) + return V; // Since MMX types are special and don't usually play with other vector types, // it's better to handle them early to be sure we emit efficient code by // avoiding store-load conversions. @@ -29087,8 +29540,9 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0, // In SetLT case, The second operand of the comparison can be either 1 or 0. APInt SplatVal; if ((CC == ISD::SETLT) && - !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) && - SplatVal == 1) || + !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal, + /*AllowShrink*/false) && + SplatVal.isOneValue()) || (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode())))) return false; @@ -29252,8 +29706,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, // (extends the sign bit which is zero). // So it is correct to skip the sign/zero extend instruction. if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || - Root.getOpcode() == ISD::ZERO_EXTEND || - Root.getOpcode() == ISD::ANY_EXTEND)) + Root.getOpcode() == ISD::ZERO_EXTEND || + Root.getOpcode() == ISD::ANY_EXTEND)) Root = Root.getOperand(0); // If there was a match, we want Root to be a select that is the root of an @@ -29320,7 +29774,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; - if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask)) + if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. @@ -29577,10 +30031,10 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) { // Invert the cond to not(cond) : xor(op,allones)=not(op) - SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + SDValue CondNew = DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getAllOnesConstant(DL, CondVT)); // Vselect cond, op1, op2 = Vselect not(cond), op2, op1 - return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); + return DAG.getSelect(DL, VT, CondNew, RHS, LHS); } // To use the condition operand as a bitwise mask, it must have elements that @@ -29690,7 +30144,7 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; - bool isFastMultiplier = false; + bool IsFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { default: @@ -29702,12 +30156,12 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { case 5: // result = lea base(cond, cond*4) case 8: // result = lea base( , cond*8) case 9: // result = lea base(cond, cond*8) - isFastMultiplier = true; + IsFastMultiplier = true; break; } } - if (isFastMultiplier) { + if (IsFastMultiplier) { APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, @@ -29914,6 +30368,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: @@ -29944,6 +30399,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: @@ -29978,6 +30434,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: @@ -30006,6 +30463,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: @@ -30063,7 +30521,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE; Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0), Cond.getOperand(1), NewCC); - return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS); + return DAG.getSelect(DL, VT, Cond, LHS, RHS); } } } @@ -30171,12 +30629,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Byte blends are only available in AVX2 if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) return SDValue(); + // There are no 512-bit blend instructions that use sign bits. + if (VT.is512BitVector()) + return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask(APInt::getSignMask(BitWidth)); KnownBits Known; - TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), - DCI.isBeforeLegalizeOps()); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) { // If we changed the computation somewhere in the DAG, this change will @@ -30226,6 +30687,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(N, 0); } + // Custom action for SELECT MMX + if (VT == MVT::x86mmx) { + LHS = DAG.getBitcast(MVT::i64, LHS); + RHS = DAG.getBitcast(MVT::i64, RHS); + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS); + return DAG.getBitcast(VT, newSelect); + } + return SDValue(); } @@ -30473,11 +30942,40 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, return true; } +// When legalizing carry, we create carries via add X, -1 +// If that comes from an actual carry, via setcc, we use the +// carry directly. +static SDValue combineCarryThroughADD(SDValue EFLAGS) { + if (EFLAGS.getOpcode() == X86ISD::ADD) { + if (isAllOnesConstant(EFLAGS.getOperand(1))) { + SDValue Carry = EFLAGS.getOperand(0); + while (Carry.getOpcode() == ISD::TRUNCATE || + Carry.getOpcode() == ISD::ZERO_EXTEND || + Carry.getOpcode() == ISD::SIGN_EXTEND || + Carry.getOpcode() == ISD::ANY_EXTEND || + (Carry.getOpcode() == ISD::AND && + isOneConstant(Carry.getOperand(1)))) + Carry = Carry.getOperand(0); + if (Carry.getOpcode() == X86ISD::SETCC || + Carry.getOpcode() == X86ISD::SETCC_CARRY) { + if (Carry.getConstantOperandVal(0) == X86::COND_B) + return Carry.getOperand(1); + } + } + } + + return SDValue(); +} + /// Optimize an EFLAGS definition used according to the condition code \p CC /// into a simpler EFLAGS value, potentially returning a new \p CC and replacing /// uses of chain values. static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG) { + if (CC == X86::COND_B) + if (SDValue Flags = combineCarryThroughADD(EFLAGS)) + return Flags; + if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) return R; return combineSetCCAtomicArith(EFLAGS, CC, DAG); @@ -30903,6 +31401,77 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, } } +static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, + EVT VT, SDLoc DL) { + + auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(Mult, DL, VT)); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, + DAG.getConstant(Shift, DL, MVT::i8)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + auto combineMulMulAddOrSub = [&](bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(9, DL, VT)); + Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + switch (MulAmt) { + default: + break; + case 11: + // mul x, 11 => add ((shl (mul x, 5), 1), x) + return combineMulShlAddOrSub(5, 1, /*isAdd*/ true); + case 21: + // mul x, 21 => add ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ true); + case 22: + // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(5, 2, /*isAdd*/ true)); + case 19: + // mul x, 19 => sub ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ false); + case 13: + // mul x, 13 => add ((shl (mul x, 3), 2), x) + return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); + case 23: + // mul x, 13 => sub ((shl (mul x, 3), 3), x) + return combineMulShlAddOrSub(3, 3, /*isAdd*/ false); + case 14: + // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(3, 2, /*isAdd*/ true)); + case 26: + // mul x, 26 => sub ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ false); + case 28: + // mul x, 28 => add ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ true); + case 29: + // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulMulAddOrSub(/*isAdd*/ true)); + case 30: + // mul x, 30 => sub (sub ((shl x, 5), x), x) + return DAG.getNode( + ISD::SUB, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(5, DL, MVT::i8)), + N->getOperand(0)), + N->getOperand(0)); + } + return SDValue(); +} + /// Optimize a single multiply with constant into two operations in order to /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA. static SDValue combineMul(SDNode *N, SelectionDAG &DAG, @@ -30912,6 +31481,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalize() && VT.isVector()) return reduceVMULWidth(N, DAG, Subtarget); + if (!MulConstantOptimization) + return SDValue(); // An imul is usually smaller than the alternative sequence. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); @@ -30967,7 +31538,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); - } + } else if (!Subtarget.slowLEA()) + NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL); if (!NewMul) { assert(MulAmt != 0 && @@ -31324,13 +31896,11 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; if (Subtarget.hasAVX512()) { - SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CMP00, - CMP01, - DAG.getConstant(x86cc, DL, MVT::i8)); - if (N->getValueType(0) != MVT::i1) - return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), - FSetCC); - return FSetCC; + SDValue FSetCC = + DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, + DAG.getConstant(x86cc, DL, MVT::i8)); + return DAG.getNode(X86ISD::VEXTRACT, DL, N->getSimpleValueType(0), + FSetCC, DAG.getIntPtrConstant(0, DL)); } SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01, @@ -31521,7 +32091,8 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, return SDValue(); APInt SplatVal; - if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || + if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal, + /*AllowShrink*/false) || !SplatVal.isMask()) return SDValue(); @@ -31713,7 +32284,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, X = DAG.getBitcast(BlendVT, X); Y = DAG.getBitcast(BlendVT, Y); Mask = DAG.getBitcast(BlendVT, Mask); - Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); + Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X); return DAG.getBitcast(VT, Mask); } @@ -32105,7 +32676,8 @@ static SDValue detectUSatPattern(SDValue In, EVT VT) { "Unexpected types for truncate operation"); APInt C; - if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { + if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C, + /*AllowShrink*/false)) { // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according // the element size of the destination type. return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) : @@ -32269,15 +32841,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // For chips with slow 32-byte unaligned loads, break the 32-byte operation - // into two 16-byte operations. + // into two 16-byte operations. Also split non-temporal aligned loads on + // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. ISD::LoadExtType Ext = Ld->getExtensionType(); bool Fast; unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && - TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, - AddressSpace, Alignment, &Fast) && !Fast) { + ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) || + (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, + AddressSpace, Alignment, &Fast) && !Fast))) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); @@ -32847,7 +33421,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); - SDValue NewChain = NewLd.getValue(1); + // Make sure new load is placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd); if (TokenFactorIndex >= 0) { Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); @@ -32868,11 +33443,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, Ld->getPointerInfo().getWithOffset(4), MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); + // Make sure new loads are placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd); + NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd); - SDValue NewChain = LoLd.getValue(1); if (TokenFactorIndex >= 0) { - Ops.push_back(LoLd); - Ops.push_back(HiLd); + Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } @@ -33693,8 +34269,7 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands // are NaN, the NaN value of Op1 is the result. - auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; - return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax); + return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); } /// Do target-specific dag combines on X86ISD::ANDNP nodes. @@ -33987,7 +34562,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (InVT == MVT::i1) { SDValue Zero = DAG.getConstant(0, DL, VT); SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); - return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero); + return DAG.getSelect(DL, VT, N0, AllOnes, Zero); } return SDValue(); } @@ -34443,28 +35018,13 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Optimize RES, EFLAGS = X86ISD::ADD LHS, RHS -static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG, - X86TargetLowering::DAGCombinerInfo &DCI) { - // When legalizing carry, we create carries via add X, -1 - // If that comes from an actual carry, via setcc, we use the - // carry directly. - if (isAllOnesConstant(N->getOperand(1)) && N->hasAnyUseOfValue(1)) { - SDValue Carry = N->getOperand(0); - while (Carry.getOpcode() == ISD::TRUNCATE || - Carry.getOpcode() == ISD::ZERO_EXTEND || - Carry.getOpcode() == ISD::SIGN_EXTEND || - Carry.getOpcode() == ISD::ANY_EXTEND || - (Carry.getOpcode() == ISD::AND && - isOneConstant(Carry.getOperand(1)))) - Carry = Carry.getOperand(0); - - if (Carry.getOpcode() == ISD::SETCC || - Carry.getOpcode() == X86ISD::SETCC || - Carry.getOpcode() == X86ISD::SETCC_CARRY) { - if (Carry.getConstantOperandVal(0) == X86::COND_B) - return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1)); - } +static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { + if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) { + MVT VT = N->getSimpleValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, + N->getOperand(0), N->getOperand(1), + Flags); } return SDValue(); @@ -34493,6 +35053,14 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, Res1, CarryOut); } + if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) { + MVT VT = N->getSimpleValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, + N->getOperand(0), N->getOperand(1), + Flags); + } + return SDValue(); } @@ -34546,6 +35114,40 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); + // If X is -1 or 0, then we have an opportunity to avoid constants required in + // the general case below. + auto *ConstantX = dyn_cast(X); + if (ConstantX) { + if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) || + (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) { + // This is a complicated way to get -1 or 0 from the carry flag: + // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax + // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + Y.getOperand(1)); + } + + if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) || + (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) { + SDValue EFLAGS = Y->getOperand(1); + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + // Swap the operands of a SUB, and we have the same pattern as above. + // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB + // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + NewEFLAGS); + } + } + } + if (CC == X86::COND_B) { // X + SETB Z --> X + (mask SBB Z, Z) // X - SETB Z --> X - (mask SBB Z, Z) @@ -34587,23 +35189,56 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { !Cmp.getOperand(0).getValueType().isInteger()) return SDValue(); - // (cmp Z, 1) sets the carry flag if Z is 0. SDValue Z = Cmp.getOperand(0); - SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, - DAG.getConstant(1, DL, Z.getValueType())); + EVT ZVT = Z.getValueType(); + + // If X is -1 or 0, then we have an opportunity to avoid constants required in + // the general case below. + if (ConstantX) { + // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with + // fake operands: + // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) + // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z) + if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) || + (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) { + SDValue Zero = DAG.getConstant(0, DL, ZVT); + SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + SDValue(Neg.getNode(), 1)); + } + + // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb' + // with fake operands: + // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1) + // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1) + if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) || + (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) { + SDValue One = DAG.getConstant(1, DL, ZVT); + SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1); + } + } - SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); + // (cmp Z, 1) sets the carry flag if Z is 0. + SDValue One = DAG.getConstant(1, DL, ZVT); + SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); + + // Add the flags type for ADC/SBB nodes. + SDVTList VTs = DAG.getVTList(VT, MVT::i32); // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) if (CC == X86::COND_NE) return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, - DAG.getConstant(-1ULL, DL, VT), NewCmp); + DAG.getConstant(-1ULL, DL, VT), Cmp1); // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1) // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1) return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, - DAG.getConstant(0, DL, VT), NewCmp); + DAG.getConstant(0, DL, VT), Cmp1); } static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, @@ -34722,6 +35357,33 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi); } +/// Convert vector increment or decrement to sub/add with an all-ones constant: +/// add X, <1, 1...> --> sub X, <-1, -1...> +/// sub X, <1, 1...> --> add X, <-1, -1...> +/// The all-ones vector constant can be materialized using a pcmpeq instruction +/// that is commonly recognized as an idiom (has no register dependency), so +/// that's better/smaller than loading a splat 1 constant. +static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Unexpected opcode for increment/decrement transform"); + + // Pseudo-legality check: getOnesVector() expects one of these types, so bail + // out and wait for legalization if we have an unsupported vector length. + EVT VT = N->getValueType(0); + if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) + return SDValue(); + + SDNode *N1 = N->getOperand(1).getNode(); + APInt SplatVal; + if (!ISD::isConstantSplatVector(N1, SplatVal, /*AllowShrink*/false) || + !SplatVal.isOneValue()) + return SDValue(); + + SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); + unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; + return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); +} + static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { const SDNodeFlags Flags = N->getFlags(); @@ -34741,6 +35403,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1); + if (SDValue V = combineIncDecVector(N, DAG)) + return V; + return combineAddOrSubToADCOrSBB(N, DAG); } @@ -34774,6 +35439,9 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, isHorizontalBinOp(Op0, Op1, false)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); + if (SDValue V = combineIncDecVector(N, DAG)) + return V; + return combineAddOrSubToADCOrSBB(N, DAG); } @@ -34987,7 +35655,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), OpVT, AS, Alignment, &Fast) && Fast) { SDValue Ops[] = {SubVec2, SubVec}; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, + Subtarget, false)) return Ld; } } @@ -35027,11 +35696,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VSELECT: case ISD::SELECT: case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget); - case ISD::BITCAST: return combineBitcast(N, DAG, Subtarget); + case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); case ISD::ADD: return combineAdd(N, DAG, Subtarget); case ISD::SUB: return combineSub(N, DAG, Subtarget); - case X86ISD::ADD: return combineX86ADD(N, DAG, DCI); + case X86ISD::SBB: return combineSBB(N, DAG); case X86ISD::ADC: return combineADC(N, DAG, DCI); case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); case ISD::SHL: @@ -35079,6 +35748,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: @@ -35411,6 +36082,7 @@ TargetLowering::ConstraintWeight switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + LLVM_FALLTHROUGH; case 'R': case 'q': case 'Q': @@ -35762,6 +36434,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &X86::GR64RegClass); break; } + LLVM_FALLTHROUGH; // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32 || VT == MVT::f32) @@ -36078,3 +36751,22 @@ void X86TargetLowering::insertCopiesSplitCSR( bool X86TargetLowering::supportSwiftError() const { return Subtarget.is64Bit(); } + +/// Returns the name of the symbol used to emit stack probes or the empty +/// string if not applicable. +StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { + // If the function specifically requests stack probes, emit them. + if (MF.getFunction()->hasFnAttribute("probe-stack")) + return MF.getFunction()->getFnAttribute("probe-stack").getValueAsString(); + + // Generally, if we aren't on Windows, the platform ABI does not include + // support for stack probes, so don't emit them. + if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO()) + return ""; + + // We need a stack probe to conform to the Windows ABI. Choose the right + // symbol. + if (Subtarget.is64Bit()) + return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; + return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; +} diff --git a/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.h b/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.h index 18106c2eb3941..dbbc2bbba6a4a 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.h +++ b/interpreter/llvm/src/lib/Target/X86/X86ISelLowering.h @@ -615,7 +615,10 @@ namespace llvm { // Vector truncating store with unsigned/signed saturation VTRUNCSTOREUS, VTRUNCSTORES, // Vector truncating masked store with unsigned/signed saturation - VMTRUNCSTOREUS, VMTRUNCSTORES + VMTRUNCSTOREUS, VMTRUNCSTORES, + + // X86 specific gather + MGATHER // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all @@ -764,6 +767,19 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE + // for given operand and result types. + // Example of such a combine: + // v4i32 build_vector((extract_elt V, 0), + // (extract_elt V, 2), + // (extract_elt V, 4), + // (extract_elt V, 6)) + // --> + // v4i32 truncate (bitcast V to v4i64) + bool isDesirableToCombineBuildVectorToTruncate() const override { + return true; + } + /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 @@ -1056,6 +1072,8 @@ namespace llvm { bool supportSwiftError() const override; + StringRef getStackProbeSymbolName(MachineFunction &MF) const override; + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } /// \brief Lower interleaved load(s) into target specific @@ -1065,6 +1083,12 @@ namespace llvm { ArrayRef Indices, unsigned Factor) const override; + /// \brief Lower interleaved store(s) into target specific + /// instructions/intrinsics. + bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + unsigned Factor) const override; + + void finalizeLowering(MachineFunction &MF) const override; protected: @@ -1163,7 +1187,7 @@ namespace llvm { SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; @@ -1397,6 +1421,19 @@ namespace llvm { } }; + // X86 specific Gather node. + class X86MaskedGatherSDNode : public MaskedGatherScatterSDNode { + public: + X86MaskedGatherSDNode(unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, MMO) + {} + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::MGATHER; + } + }; + } // end namespace llvm #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrAVX512.td b/interpreter/llvm/src/lib/Target/X86/X86InstrAVX512.td index 71d395244b4ad..0ae960e7d566d 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrAVX512.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrAVX512.td @@ -31,8 +31,7 @@ class X86VectorVTInfo("VK" # NumElts # "WM"); // The mask VT. - ValueType KVT = !cast(!if (!eq (NumElts, 1), "i1", - "v" # NumElts # "i1")); + ValueType KVT = !cast("v" # NumElts # "i1"); // Suffix used in the instruction mnemonic. string Suffix = suffix; @@ -186,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo; +class X86KVectorVTInfo { + RegisterClass KRC = _krc; + RegisterClass KRCWM = _krcwm; + ValueType KVT = _vt; +} + +def v2i1_info : X86KVectorVTInfo; +def v4i1_info : X86KVectorVTInfo; +def v8i1_info : X86KVectorVTInfo; +def v16i1_info : X86KVectorVTInfo; +def v32i1_info : X86KVectorVTInfo; +def v64i1_info : X86KVectorVTInfo; + // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. // It assumes custom ISel patterns for masking which can be provided as @@ -965,6 +978,44 @@ multiclass avx512_int_broadcast_reg opc, X86VectorVTInfo _, (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX; } +multiclass avx512_int_broadcastbw_reg opc, string Name, + X86VectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg> { + let ExeDomain = _.ExeDomain in + defm r : AVX512_maskable_custom, T8PD, EVEX; + + def : Pat <(_.VT (OpNode SrcRC:$src)), + (!cast(Name#r) + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), + (!cast(Name#rk) _.RC:$src0, _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), + (!cast(Name#rkz) _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; +} + +multiclass avx512_int_broadcastbw_reg_vl opc, string Name, + AVX512VLVectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_int_broadcastbw_reg, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_int_broadcastbw_reg, EVEX_V256; + defm Z128 : avx512_int_broadcastbw_reg, EVEX_V128; + } +} + multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { @@ -976,18 +1027,11 @@ multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, } } -let isCodeGenOnly = 1 in { -defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - X86VBroadcast, GR8, HasBWI>; -defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - X86VBroadcast, GR16, HasBWI>; -} -let isAsmParserOnly = 1 in { - defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - null_frag, GR32, HasBWI>; - defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - null_frag, GR32, HasBWI>; -} +defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", + avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; +defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", + avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, + HasBWI>; defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, X86VBroadcast, GR32, HasAVX512>; defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, @@ -1632,6 +1676,7 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2)))))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = IsCommutable in def rrk : AVX512BI, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPGTDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPEQDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -} +multiclass avx512_icmp_packed_lowering Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rr) _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rm) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rrk) _.KRCWM:$mask, + _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_packed_rmb_lowering Preds> + : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmb) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmbk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +// VPCMPEQB - i8 +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; + +defm : avx512_icmp_packed_lowering; + +// VPCMPEQW - i16 +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; + +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; + +defm : avx512_icmp_packed_lowering; + +// VPCMPEQD - i32 +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +// VPCMPEQQ - i64 +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +// VPCMPGTB - i8 +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; + +defm : avx512_icmp_packed_lowering; + +// VPCMPGTW - i16 +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; + +defm : avx512_icmp_packed_lowering; +defm : avx512_icmp_packed_lowering; + +defm : avx512_icmp_packed_lowering; + +// VPCMPGTD - i32 +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +// VPCMPGTQ - i64 +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; + +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; +defm : avx512_icmp_packed_rmb_lowering; multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { @@ -1765,6 +2010,7 @@ multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = 1 in def rrik : AVX512AIi8, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_icmp_cc_packed_lowering Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rrik) _.KRCWM:$mask, + _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmik) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_cc_packed_rmb_lowering Preds> + : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmib) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmibk) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +// VPCMPB - i8 +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; + +defm : avx512_icmp_cc_packed_lowering; + +// VPCMPW - i16 +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; + +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; + +defm : avx512_icmp_cc_packed_lowering; + +// VPCMPD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +// VPCMPQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +// VPCMPUB - i8 +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; + +defm : avx512_icmp_cc_packed_lowering; + +// VPCMPUW - i16 +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; + +defm : avx512_icmp_cc_packed_lowering; +defm : avx512_icmp_cc_packed_lowering; + +defm : avx512_icmp_cc_packed_lowering; + +// VPCMPUD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +// VPCMPUQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; +defm : avx512_icmp_cc_packed_rmb_lowering; + multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -1997,21 +2474,108 @@ defm VCMPPD : avx512_vcmp, defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VCMPPSZrri - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPUDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; +multiclass avx512_fcmp_cc_packed_lowering Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rmbi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_fcmp_cc_packed_sae_lowering Preds> + : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> { + +let Predicates = Preds in + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))), + (i64 0)), + (COPY_TO_REGCLASS (!cast(InstrStr##rrib) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; +} + + +// VCMPPS - f32 +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; + +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; + +defm : avx512_fcmp_cc_packed_sae_lowering; +defm : avx512_fcmp_cc_packed_sae_lowering; + +// VCMPPD - f64 +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; + +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; +defm : avx512_fcmp_cc_packed_lowering; + +defm : avx512_fcmp_cc_packed_sae_lowering; +defm : avx512_fcmp_cc_packed_sae_lowering; +defm : avx512_fcmp_cc_packed_sae_lowering; // ---------------------------------------------------------------- // FPClass @@ -2263,7 +2827,7 @@ let Predicates = [HasAVX512, NoDQI] in { let Predicates = [HasAVX512] in { def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst), (KMOVWmk addr:$dst, VK16:$src)>; - def : Pat<(i1 (load addr:$src)), + def : Pat<(v1i1 (load addr:$src)), (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>; def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))), (KMOVWkm addr:$src)>; @@ -2280,77 +2844,45 @@ let Predicates = [HasBWI] in { } let Predicates = [HasAVX512] in { - def : Pat<(i1 (trunc (i64 GR64:$src))), - (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit), - (i32 1)), VK1)>; - - def : Pat<(i1 (trunc (i32 GR32:$src))), - (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>; - - def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))), - (COPY_TO_REGCLASS GR32:$src, VK1)>; + multiclass operation_gpr_mask_copy_lowering { + def : Pat<(maskVT (scalar_to_vector GR32:$src)), + (COPY_TO_REGCLASS GR32:$src, maskRC)>; - def : Pat<(i1 (trunc (i8 GR8:$src))), - (COPY_TO_REGCLASS - (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR8:$src, sub_8bit), (i32 1)), VK1)>; + def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))), + (COPY_TO_REGCLASS maskRC:$src, GR32)>; - def : Pat<(i1 (trunc (i16 GR16:$src))), - (COPY_TO_REGCLASS - (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR16:$src, sub_16bit), (i32 1)), VK1)>; + def : Pat<(maskVT (scalar_to_vector GR8:$src)), + (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; - def : Pat<(i32 (zext VK1:$src)), - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>; - - def : Pat<(i32 (anyext VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, GR32)>; - - def : Pat<(i8 (zext VK1:$src)), - (EXTRACT_SUBREG - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>; + def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; - def : Pat<(i8 (anyext VK1:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>; + def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))), + (COPY_TO_REGCLASS maskRC:$src, GR32)>; + } - def : Pat<(i64 (zext VK1:$src)), - (SUBREG_TO_REG (i64 0), - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; - def : Pat<(i64 (anyext VK1:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>; + def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) , + (COPY_TO_REGCLASS + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1))), VK1)>; + def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) , + (COPY_TO_REGCLASS + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1))), VK16)>; + def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) , + (COPY_TO_REGCLASS + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1))), VK8)>; - def : Pat<(i16 (zext VK1:$src)), - (EXTRACT_SUBREG - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>; - - def : Pat<(i16 (anyext VK1:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>; -} -def : Pat<(v16i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK16)>; -def : Pat<(v8i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK8)>; -def : Pat<(v4i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK4)>; -def : Pat<(v2i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK2)>; -def : Pat<(v32i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK32)>; -def : Pat<(v64i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK64)>; - -def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; - -def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))), (COPY_TO_REGCLASS VK64:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))), (COPY_TO_REGCLASS VK32:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), (COPY_TO_REGCLASS VK16:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), (COPY_TO_REGCLASS VK8:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK4:$src, (iPTR 0))), (COPY_TO_REGCLASS VK4:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK2:$src, (iPTR 0))), (COPY_TO_REGCLASS VK2:$src, VK1)>; +} // Mask unary operation // - KNOT @@ -2529,6 +3061,69 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>; +multiclass axv512_icmp_packed_no_vlx_lowering { +def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (!cast(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrk) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; +} + +multiclass axv512_icmp_packed_cc_no_vlx_lowering { +def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (!cast(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; +} + +let Predicates = [HasAVX512, NoVLX] in { + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; +} + // Mask setting all 0s or 1s multiclass avx512_mask_setop { let Predicates = [HasAVX512] in @@ -2551,14 +3146,11 @@ let Predicates = [HasAVX512] in { def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; + def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; - let AddedComplexity = 10 in { // To optimize isel table. - def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>; - def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; - def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; - } + def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; } // Patterns for kmask insert_subvector/extract_subvector to/from index=0 @@ -2570,6 +3162,12 @@ multiclass operation_subvector_mask_lowering; } +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; @@ -2723,22 +3321,22 @@ multiclass avx512_load_vl opc, string OpcodeStr, } multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, - PatFrag st_frag, PatFrag mstore> { + PatFrag st_frag, PatFrag mstore, string Name> { let hasSideEffects = 0 in { def rr_REV : AVX512PI, EVEX; + [], _.ExeDomain>, EVEX, FoldGenData; def rrk_REV : AVX512PI, EVEX, EVEX_K; + [], _.ExeDomain>, EVEX, EVEX_K, FoldGenData; def rrkz_REV : AVX512PI, EVEX, EVEX_KZ; + [], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData; } def mr : AVX512PI opc, string OpcodeStr, X86VectorVTInfo _, multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_unaligned, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_unaligned, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_unaligned, Name#Z128>, EVEX_V128; } } multiclass avx512_alignedstore_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_aligned512, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_aligned256, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_aligned128, Name#Z128>, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, - HasAVX512>, PS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVAPS">, + PS, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVAPD">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>, + avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, + "VMOVUPS">, PS, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, + avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, + "VMOVUPD">, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512>, PD, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQA32">, + PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQA64">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, - HasBWI>, XD, EVEX_CD8<8, CD8VF>; + avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, + HasBWI, "VMOVDQU8">, + XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, - HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>; + HasBWI, "VMOVDQU16">, + XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, - HasAVX512>, XS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQU32">, + XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, - HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQU64">, + XS, VEX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need // to load or store from a ZMM register instead. These are converted in @@ -3009,8 +3619,8 @@ let Predicates = [HasVLX] in { def : Pat<(alignedstore256 (v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), addr:$dst), (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; - def : Pat<(alignedstore (v8f32 (extract_subvector - (v16f32 VR512:$src), (iPTR 0))), addr:$dst), + def : Pat<(alignedstore256 (v8f32 (extract_subvector + (v16f32 VR512:$src), (iPTR 0))), addr:$dst), (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; def : Pat<(alignedstore256 (v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), addr:$dst), @@ -3249,7 +3859,7 @@ multiclass avx512_move_scalar_lowering(InstrStr#rrk) @@ -3260,7 +3870,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0, def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector - (_.EltVT (X86selects (i1 (trunc GR32:$mask)), + (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))), (_.EltVT _.FRC:$src1), (_.EltVT ZeroFP))))))), (COPY_TO_REGCLASS (!cast(InstrStr#rrkz) @@ -3279,7 +3889,7 @@ def : Pat<(masked_store addr:$dst, Mask, (iPTR 0))), (iPTR 0)))), (!cast(InstrStr#mrk) addr:$dst, - (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)), + (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; } @@ -3296,7 +3906,7 @@ def : Pat<(masked_store addr:$dst, Mask, (iPTR 0))), (iPTR 0)))), (!cast(InstrStr#mrk) addr:$dst, - (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; } @@ -3310,7 +3920,7 @@ def : Pat<(_.info128.VT (extract_subvector (v16i32 immAllZerosV))))), (iPTR 0))), (!cast(InstrStr#rmkz) - (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)), + (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (extract_subvector @@ -3322,7 +3932,7 @@ def : Pat<(_.info128.VT (extract_subvector (iPTR 0))))), (iPTR 0))), (!cast(InstrStr#rmk) _.info128.RC:$src, - (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)), + (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), addr:$srcAddr)>; } @@ -3338,7 +3948,7 @@ def : Pat<(_.info128.VT (extract_subvector (v16i32 immAllZerosV))))), (iPTR 0))), (!cast(InstrStr#rmkz) - (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (extract_subvector @@ -3350,7 +3960,7 @@ def : Pat<(_.info128.VT (extract_subvector (iPTR 0))))), (iPTR 0))), (!cast(InstrStr#rmk) _.info128.RC:$src, - (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; } @@ -3381,20 +3991,55 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), - (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)), + (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; -let hasSideEffects = 0 in -defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2), - "vmovss.s", "$src2, $src1", "$src1, $src2", []>, - XS, EVEX_4V, VEX_LIG; - -let hasSideEffects = 0 in -defm VMOVSDZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2), - "vmovsd.s", "$src2, $src1", "$src1, $src2", []>, - XD, EVEX_4V, VEX_LIG, VEX_W; +let hasSideEffects = 0 in { + def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrr">; + +let Constraints = "$src0 = $dst" in + def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, + VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrk">; + + def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrkz">; + + def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, + FoldGenData<"VMOVSDZrr">; + +let Constraints = "$src0 = $dst" in + def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, + VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrk">; + + def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.KRCWM:$mask, VR128X:$src1, + FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrkz">; +} let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { @@ -5062,6 +5707,109 @@ defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; + +// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. +let Predicates = [HasAVX512, NoVLX] in { + def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; + + def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; +} + +// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. +let Predicates = [HasAVX512, NoVLX] in { + def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; + + def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; +} + //===-------------------------------------------------------------------===// // 1-src variable permutation VPERMW/D/Q //===-------------------------------------------------------------------===// @@ -7246,13 +7994,13 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { let Predicates = [HasAVX512] in { def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>; def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>; def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>; def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>; @@ -7262,13 +8010,13 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x1))), _.FRC)>; + addr:$src, (i32 0x9))), _.FRC)>; def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x2))), _.FRC)>; + addr:$src, (i32 0xa))), _.FRC)>; def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x3))), _.FRC)>; + addr:$src, (i32 0xb))), _.FRC)>; def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), addr:$src, (i32 0x4))), _.FRC)>; @@ -7850,7 +8598,7 @@ let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128; defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128; + vx64xmem, X86mgatherv2i64>, EVEX_V128; } } @@ -8452,26 +9200,26 @@ multiclass avx512_shuff_packed_128; + (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>; def : Pat<(v16f32 (fnearbyint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; + (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>; def : Pat<(v16f32 (frint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; + (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>; def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>; def : Pat<(v8f64 (fnearbyint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>; def : Pat<(v8f64 (frint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>; } defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, @@ -8678,6 +9426,41 @@ let Predicates = [HasCDI, NoVLX] in { sub_xmm)>; } +//===---------------------------------------------------------------------===// +// Counts number of ones - VPOPCNTD and VPOPCNTQ +//===---------------------------------------------------------------------===// + +multiclass avx512_unary_rmb_popcnt opc, string OpcodeStr, X86VectorVTInfo VTInfo> { + let Predicates = [HasVPOPCNTDQ] in + defm Z : avx512_unary_rmb, EVEX_V512; +} + +// Use 512bit version to implement 128/256 bit. +multiclass avx512_unary_lowering { + let Predicates = [prd] in { + def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + (EXTRACT_SUBREG + (!cast(NAME # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info256.RC:$src1, + _.info256.SubRegIdx)), + _.info256.SubRegIdx)>; + + def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + (EXTRACT_SUBREG + (!cast(NAME # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info128.RC:$src1, + _.info128.SubRegIdx)), + _.info128.SubRegIdx)>; + } +} + +defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>, + avx512_unary_lowering; +defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>, + avx512_unary_lowering, VEX_W; + //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// @@ -8825,7 +9608,7 @@ multiclass avx512_extract_elt_w { def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX, TAPD; + EVEX, TAPD, FoldGenData; defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; } diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrArithmetic.td b/interpreter/llvm/src/lib/Target/X86/X86InstrArithmetic.td index 66382014f6e8c..e38bbc9b3d368 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrArithmetic.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrArithmetic.td @@ -964,10 +964,10 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_Rev; - def NAME#16rr_REV : BinOpRR_Rev; - def NAME#32rr_REV : BinOpRR_Rev; - def NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_Rev, FoldGenData; def NAME#8rm : BinOpRM_RF; def NAME#16rm : BinOpRM_RF; @@ -1049,10 +1049,10 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_RFF_Rev; - def NAME#16rr_REV : BinOpRR_RFF_Rev; - def NAME#32rr_REV : BinOpRR_RFF_Rev; - def NAME#64rr_REV : BinOpRR_RFF_Rev; + def NAME#8rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_RFF_Rev, FoldGenData; def NAME#8rm : BinOpRM_RFF; def NAME#16rm : BinOpRM_RFF; @@ -1129,10 +1129,10 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev; - def NAME#16rr_REV : BinOpRR_F_Rev; - def NAME#32rr_REV : BinOpRR_F_Rev; - def NAME#64rr_REV : BinOpRR_F_Rev; + def NAME#8rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_F_Rev, FoldGenData; def NAME#8rm : BinOpRM_F; def NAME#16rm : BinOpRM_F; diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrFMA.td b/interpreter/llvm/src/lib/Target/X86/X86InstrFMA.td index 1941ae57f0f1f..3a3cdc9fa5742 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrFMA.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrFMA.td @@ -297,7 +297,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, - VEX_LIG; + VEX_LIG, FoldGenData; } multiclass fma4s_int opc, string OpcodeStr, Operand memop, @@ -321,6 +321,12 @@ let isCodeGenOnly = 1 in { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; +let hasSideEffects = 0 in + def rr_Int_REV : FMA4, VEX_LIG, FoldGenData; } // isCodeGenOnly = 1 } @@ -372,12 +378,13 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4; + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, + FoldGenData; def Yrr_REV : FMA4, - VEX_L; + VEX_L, FoldGenData; } // isCodeGenOnly = 1 } diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrFormats.td b/interpreter/llvm/src/lib/Target/X86/X86InstrFormats.td index c2fe786732dcd..bfcbf71d252f3 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrFormats.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrFormats.td @@ -225,6 +225,12 @@ class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class XOP { Encoding OpEnc = EncXOP; } class XOP_4V : XOP { bit hasVEX_4V = 1; } +// Specify the alternative register form instruction to replace the current +// instruction in case it was picked during generation of memory folding tables +class FoldGenData { + string FoldGenRegForm = _RegisterForm; +} + class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, InstrItinClass itin, @@ -304,6 +310,10 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, CD8_EltSize, !srl(VectSize, CD8_Form{1-0}))), 0); + // Used in the memory folding generation (TableGen backend) to point to an alternative + // instruction to replace the current one in case it got picked during generation. + string FoldGenRegForm = ?; + // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; let TSFlags{8-7} = OpSizeBits; diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrFragmentsSIMD.td b/interpreter/llvm/src/lib/Target/X86/X86InstrFragmentsSIMD.td index 9867ba84bb9ba..8b5bbf24f6f63 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -274,7 +274,7 @@ def X86select : SDNode<"X86ISD::SELECT", SDTCisSameNumEltsAs<0, 1>]>>; def X86selects : SDNode<"X86ISD::SELECTS", - SDTypeProfile<1, 3, [SDTCisVT<1, i1>, + SDTypeProfile<1, 3, [SDTCisVT<1, v1i1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>>; @@ -441,7 +441,7 @@ def X86Vfpclass : SDNode<"X86ISD::VFPCLASS", SDTCisSameNumEltsAs<0,1>, SDTCisVT<2, i32>]>, []>; def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS", - SDTypeProfile<1, 2, [SDTCisVT<0, i1>, + SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>, SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>; def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", @@ -451,7 +451,7 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2, - [SDTCisEltOfVec<0, 1>, SDTCisVec<1>, + [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; @@ -641,22 +641,37 @@ def sdmem : Operand { // SSE pattern fragments //===----------------------------------------------------------------------===// +// Vector load wrappers to prevent folding of non-temporal aligned loads on +// supporting targets. +def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasSSE41() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 16; +}]>; +def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX2() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 32; +}]>; +def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX512() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 64; +}]>; + // 128-bit load pattern fragments // NOTE: all 128-bit integer vector loads are promoted to v2i64 -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>; // 256-bit load pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 -def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; -def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; -def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>; // 512-bit load pattern fragments -def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; -def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; -def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; +def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>; +def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>; +def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; @@ -722,15 +737,15 @@ def alignedloadv8f64 : PatFrag<(ops node:$ptr), def alignedloadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (alignedload512 node:$ptr))>; -// Like 'load', but uses special alignment checks suitable for use in +// Like 'vec128load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to // be naturally aligned on some targets but not on others. If the subtarget // allows unaligned accesses, match any load, though this may require // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. -def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasSSEUnalignedMem() - || cast(N)->getAlignment() >= 16; +def memop : PatFrag<(ops node:$ptr), (vec128load node:$ptr), [{ + return Subtarget->hasSSEUnalignedMem() || + cast(N)->getAlignment() >= 16; }]>; // 128-bit memop pattern fragments @@ -739,16 +754,6 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; -// These are needed to match a scalar memop that is used in a vector-only -// math instruction such as the FP logical ops: andps, andnps, orps, xorps. -// The memory operand is required to be a 128-bit load, so it must be converted -// from a vector to a scalar. -def memopfsf32_128 : PatFrag<(ops node:$ptr), - (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>; -def memopfsf64_128 : PatFrag<(ops node:$ptr), - (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>; - - // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. // FIXME: 8 byte alignment for mmx reads is not required @@ -758,6 +763,9 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>; +def X86masked_gather : SDNode<"X86ISD::MGATHER", SDTMaskedGather, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_gather node:$src1, node:$src2, node:$src3) , [{ if (MaskedGatherSDNode *Mgt = dyn_cast(N)) @@ -781,6 +789,15 @@ def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), Mgt->getBasePtr().getValueType() == MVT::v2i64); return false; }]>; +def X86mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ + if (X86MaskedGatherSDNode *Mgt = dyn_cast(N)) + return (Mgt->getIndex().getValueType() == MVT::v2i64 || + Mgt->getBasePtr().getValueType() == MVT::v2i64) && + (Mgt->getMemoryVT() == MVT::v2i32 || + Mgt->getMemoryVT() == MVT::v2f32); + return false; +}]>; def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_gather node:$src1, node:$src2, node:$src3) , [{ if (MaskedGatherSDNode *Mgt = dyn_cast(N)) diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.cpp b/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.cpp index 092ceb207ada4..34d4816a25183 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.cpp @@ -898,10 +898,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, + { X86::VPCONFLICTDZrr, X86::VPCONFLICTDZrm, 0 }, + { X86::VPCONFLICTQZrr, X86::VPCONFLICTQZrm, 0 }, { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, { X86::VPERMQZri, X86::VPERMQZmi, 0 }, + { X86::VPLZCNTDZrr, X86::VPLZCNTDZrm, 0 }, + { X86::VPLZCNTQZrr, X86::VPLZCNTQZrm, 0 }, { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 }, { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE }, { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 }, @@ -914,6 +918,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 }, { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 }, { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 }, + { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 }, + { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 }, { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, @@ -946,10 +952,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, + { X86::VPCONFLICTDZ256rr, X86::VPCONFLICTDZ256rm, 0 }, + { X86::VPCONFLICTQZ256rr, X86::VPCONFLICTQZ256rm, 0 }, { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 }, { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 }, { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 }, + { X86::VPLZCNTDZ256rr, X86::VPLZCNTDZ256rm, 0 }, + { X86::VPLZCNTQZ256rr, X86::VPLZCNTQZ256rm, 0 }, { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 }, @@ -993,8 +1003,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, + { X86::VPCONFLICTDZ128rr, X86::VPCONFLICTDZ128rm, 0 }, + { X86::VPCONFLICTQZ128rr, X86::VPCONFLICTQZ128rm, 0 }, { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 }, + { X86::VPLZCNTDZ128rr, X86::VPLZCNTDZ128rm, 0 }, + { X86::VPLZCNTQZ128rr, X86::VPLZCNTQZ128rm, 0 }, { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE }, @@ -2310,10 +2324,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 }, { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 }, { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 }, + { X86::VPCONFLICTDZrrkz, X86::VPCONFLICTDZrmkz, 0 }, + { X86::VPCONFLICTQZrrkz, X86::VPCONFLICTQZrmkz, 0 }, { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 }, { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 }, { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 }, { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 }, + { X86::VPLZCNTDZrrkz, X86::VPLZCNTDZrmkz, 0 }, + { X86::VPLZCNTQZrrkz, X86::VPLZCNTQZrmkz, 0 }, { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 }, { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 }, @@ -2326,6 +2344,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 }, { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 }, { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 }, + { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 }, + { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 }, { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 }, { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 }, { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 }, @@ -2346,10 +2366,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 }, { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 }, { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 }, + { X86::VPCONFLICTDZ256rrkz, X86::VPCONFLICTDZ256rmkz, 0 }, + { X86::VPCONFLICTQZ256rrkz, X86::VPCONFLICTQZ256rmkz, 0 }, { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 }, { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 }, { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 }, { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 }, + { X86::VPLZCNTDZ256rrkz, X86::VPLZCNTDZ256rmkz, 0 }, + { X86::VPLZCNTQZ256rrkz, X86::VPLZCNTQZ256rmkz, 0 }, { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 }, @@ -2381,8 +2405,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 }, { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 }, { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 }, + { X86::VPCONFLICTDZ128rrkz, X86::VPCONFLICTDZ128rmkz, 0 }, + { X86::VPCONFLICTQZ128rrkz, X86::VPCONFLICTQZ128rmkz, 0 }, { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 }, { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 }, + { X86::VPLZCNTDZ128rrkz, X86::VPLZCNTDZ128rmkz, 0 }, + { X86::VPLZCNTQZ128rrkz, X86::VPLZCNTQZ128rmkz, 0 }, { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE }, @@ -2931,10 +2959,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 }, { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 }, { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 }, + { X86::VPCONFLICTDZrrk, X86::VPCONFLICTDZrmk, 0 }, + { X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 }, { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 }, { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 }, { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 }, { X86::VPERMQZrik, X86::VPERMQZmik, 0 }, + { X86::VPLZCNTDZrrk, X86::VPLZCNTDZrmk, 0 }, + { X86::VPLZCNTQZrrk, X86::VPLZCNTQZrmk, 0 }, { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 }, { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 }, @@ -2947,6 +2979,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 }, { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 }, { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 }, + { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 }, + { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 }, { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 }, { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 }, { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 }, @@ -2967,10 +3001,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 }, { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 }, { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 }, + { X86::VPCONFLICTDZ256rrk, X86::VPCONFLICTDZ256rmk, 0 }, + { X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 }, { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 }, { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 }, { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 }, { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 }, + { X86::VPLZCNTDZ256rrk, X86::VPLZCNTDZ256rmk, 0 }, + { X86::VPLZCNTQZ256rrk, X86::VPLZCNTQZ256rmk, 0 }, { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 }, @@ -3002,8 +3040,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 }, { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 }, { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 }, + { X86::VPCONFLICTDZ128rrk, X86::VPCONFLICTDZ128rmk, 0 }, + { X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 }, { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 }, { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 }, + { X86::VPLZCNTDZ128rrk, X86::VPLZCNTDZ128rmk, 0 }, + { X86::VPLZCNTQZ128rrk, X86::VPLZCNTQZ128rmk, 0 }, { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE }, @@ -3028,6 +3070,64 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 }, { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 }, { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 }, + + // AVX-512 masked compare instructions + { X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 }, + { X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 }, + { X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 }, + { X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 }, + { X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 }, + { X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 }, + { X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE }, + { X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE }, + { X86::VPCMPBZ128rrik, X86::VPCMPBZ128rmik, 0 }, + { X86::VPCMPBZ256rrik, X86::VPCMPBZ256rmik, 0 }, + { X86::VPCMPBZrrik, X86::VPCMPBZrmik, 0 }, + { X86::VPCMPDZ128rrik, X86::VPCMPDZ128rmik, 0 }, + { X86::VPCMPDZ256rrik, X86::VPCMPDZ256rmik, 0 }, + { X86::VPCMPDZrrik, X86::VPCMPDZrmik, 0 }, + { X86::VPCMPEQBZ128rrk, X86::VPCMPEQBZ128rmk, 0 }, + { X86::VPCMPEQBZ256rrk, X86::VPCMPEQBZ256rmk, 0 }, + { X86::VPCMPEQBZrrk, X86::VPCMPEQBZrmk, 0 }, + { X86::VPCMPEQDZ128rrk, X86::VPCMPEQDZ128rmk, 0 }, + { X86::VPCMPEQDZ256rrk, X86::VPCMPEQDZ256rmk, 0 }, + { X86::VPCMPEQDZrrk, X86::VPCMPEQDZrmk, 0 }, + { X86::VPCMPEQQZ128rrk, X86::VPCMPEQQZ128rmk, 0 }, + { X86::VPCMPEQQZ256rrk, X86::VPCMPEQQZ256rmk, 0 }, + { X86::VPCMPEQQZrrk, X86::VPCMPEQQZrmk, 0 }, + { X86::VPCMPEQWZ128rrk, X86::VPCMPEQWZ128rmk, 0 }, + { X86::VPCMPEQWZ256rrk, X86::VPCMPEQWZ256rmk, 0 }, + { X86::VPCMPEQWZrrk, X86::VPCMPEQWZrmk, 0 }, + { X86::VPCMPGTBZ128rrk, X86::VPCMPGTBZ128rmk, 0 }, + { X86::VPCMPGTBZ256rrk, X86::VPCMPGTBZ256rmk, 0 }, + { X86::VPCMPGTBZrrk, X86::VPCMPGTBZrmk, 0 }, + { X86::VPCMPGTDZ128rrk, X86::VPCMPGTDZ128rmk, 0 }, + { X86::VPCMPGTDZ256rrk, X86::VPCMPGTDZ256rmk, 0 }, + { X86::VPCMPGTDZrrk, X86::VPCMPGTDZrmk, 0 }, + { X86::VPCMPGTQZ128rrk, X86::VPCMPGTQZ128rmk, 0 }, + { X86::VPCMPGTQZ256rrk, X86::VPCMPGTQZ256rmk, 0 }, + { X86::VPCMPGTQZrrk, X86::VPCMPGTQZrmk, 0 }, + { X86::VPCMPGTWZ128rrk, X86::VPCMPGTWZ128rmk, 0 }, + { X86::VPCMPGTWZ256rrk, X86::VPCMPGTWZ256rmk, 0 }, + { X86::VPCMPGTWZrrk, X86::VPCMPGTWZrmk, 0 }, + { X86::VPCMPQZ128rrik, X86::VPCMPQZ128rmik, 0 }, + { X86::VPCMPQZ256rrik, X86::VPCMPQZ256rmik, 0 }, + { X86::VPCMPQZrrik, X86::VPCMPQZrmik, 0 }, + { X86::VPCMPUBZ128rrik, X86::VPCMPUBZ128rmik, 0 }, + { X86::VPCMPUBZ256rrik, X86::VPCMPUBZ256rmik, 0 }, + { X86::VPCMPUBZrrik, X86::VPCMPUBZrmik, 0 }, + { X86::VPCMPUDZ128rrik, X86::VPCMPUDZ128rmik, 0 }, + { X86::VPCMPUDZ256rrik, X86::VPCMPUDZ256rmik, 0 }, + { X86::VPCMPUDZrrik, X86::VPCMPUDZrmik, 0 }, + { X86::VPCMPUQZ128rrik, X86::VPCMPUQZ128rmik, 0 }, + { X86::VPCMPUQZ256rrik, X86::VPCMPUQZ256rmik, 0 }, + { X86::VPCMPUQZrrik, X86::VPCMPUQZrmik, 0 }, + { X86::VPCMPUWZ128rrik, X86::VPCMPUWZ128rmik, 0 }, + { X86::VPCMPUWZ256rrik, X86::VPCMPUWZ256rmik, 0 }, + { X86::VPCMPUWZrrik, X86::VPCMPUWZrmik, 0 }, + { X86::VPCMPWZ128rrik, X86::VPCMPWZ128rmik, 0 }, + { X86::VPCMPWZ256rrik, X86::VPCMPWZ256rmik, 0 }, + { X86::VPCMPWZrrik, X86::VPCMPWZrmik, 0 }, }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { @@ -3585,6 +3685,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, // It's not always legal to reference the low 8-bit of the larger // register in 32-bit mode. return false; + LLVM_FALLTHROUGH; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: @@ -5129,20 +5230,32 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return nullptr; } } - case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: - case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: - case X86::VPCMPBZrri: case X86::VPCMPUBZrri: - case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: - case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: - case X86::VPCMPDZrri: case X86::VPCMPUDZrri: - case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: - case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: - case X86::VPCMPQZrri: case X86::VPCMPUQZrri: - case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: - case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: - case X86::VPCMPWZrri: case X86::VPCMPUWZrri: { + case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: + case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: + case X86::VPCMPBZrri: case X86::VPCMPUBZrri: + case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: + case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: + case X86::VPCMPDZrri: case X86::VPCMPUDZrri: + case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: + case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: + case X86::VPCMPQZrri: case X86::VPCMPUQZrri: + case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: + case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: + case X86::VPCMPWZrri: case X86::VPCMPUWZrri: + case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik: + case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik: + case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik: + case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik: + case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik: + case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik: + case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik: + case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik: + case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik: + case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik: + case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik: + case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: { // Flip comparison mode immediate (if necessary). - unsigned Imm = MI.getOperand(3).getImm() & 0x7; + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7; switch (Imm) { default: llvm_unreachable("Unreachable!"); case 0x01: Imm = 0x06; break; // LT -> NLE @@ -5156,7 +5269,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, break; } auto &WorkingMI = cloneIfNew(MI); - WorkingMI.getOperand(3).setImm(Imm); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -5930,7 +6043,7 @@ void X86InstrInfo::replaceBranchWithTailCall( // Add implicit uses and defs of all live regs potentially clobbered by the // call. This way they still appear live across the call. - LivePhysRegs LiveRegs(&getRegisterInfo()); + LivePhysRegs LiveRegs(getRegisterInfo()); LiveRegs.addLiveOuts(MBB); SmallVector, 8> Clobbers; LiveRegs.stepForward(*MIB, Clobbers); @@ -6545,9 +6658,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // first frame index. // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment. - const TargetRegisterInfo *TRI = &getRegisterInfo(); + const TargetRegisterInfo &TRI = getRegisterInfo(); MachineBasicBlock::LivenessQueryResult LQR = - MBB.computeRegisterLiveness(TRI, AX, MI); + MBB.computeRegisterLiveness(&TRI, AX, MI); // We do not want to save and restore AX if we do not have to. // Moreover, if we do so whereas AX is dead, we would need to set // an undef flag on the use of AX, otherwise the verifier will @@ -6564,7 +6677,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // AX contains the top most register in the aliasing hierarchy. // It may not be live, but one of its aliases may be. - for (MCRegAliasIterator AI(AX, TRI, true); + for (MCRegAliasIterator AI(AX, &TRI, true); AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI) LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live : MachineBasicBlock::LQR_Dead; @@ -8374,7 +8487,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, unsigned Opc = LoadMI.getOpcode(); unsigned UserOpc = UserMI.getOpcode(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg()); unsigned RegSize = TRI.getRegSizeInBits(*RC); @@ -10375,7 +10488,7 @@ namespace { return Copy; } - // Create a virtal register in *TLSBaseAddrReg, and populate it by + // Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I.getParent()->getParent(); @@ -10473,7 +10586,7 @@ X86InstrInfo::getOutliningType(MachineInstr &MI) const { // catch it. if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) || MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) || - MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) + MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) return MachineOutlinerInstrType::Illegal; // Outlined calls change the instruction pointer, so don't read from it. @@ -10511,9 +10624,7 @@ void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF, - bool IsTailCall) const { - return; -} + bool IsTailCall) const {} MachineBasicBlock::iterator X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.td b/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.td index 4d7d8ece92d9a..fab70e918b8ad 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrInfo.td @@ -813,6 +813,8 @@ def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">, AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">; +def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">, + AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">; def HasPFI : Predicate<"Subtarget->hasPFI()">, AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">; def HasERI : Predicate<"Subtarget->hasERI()">, @@ -896,9 +898,16 @@ def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" "TM.getCodeModel() == CodeModel::Kernel">; def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; -def OptForSize : Predicate<"Subtarget->getOptForSize()">; -def OptForMinSize : Predicate<"Subtarget->getOptForMinSize()">; -def OptForSpeed : Predicate<"!Subtarget->getOptForSize()">; + +// We could compute these on a per-module basis but doing so requires accessing +// the Function object through the Subtarget and objections were raised +// to that (see post-commit review comments for r301750). +let RecomputePerFunction = 1 in { + def OptForSize : Predicate<"MF->getFunction()->optForSize()">; + def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">; + def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">; +} + def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; @@ -1429,11 +1438,14 @@ def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), // Longer forms that use a ModR/M byte. Needed for disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8ri">; def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16ri">; def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32ri">; } } // SchedRW @@ -1556,13 +1568,17 @@ def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8rr">; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16rr">; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32rr">; def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV64rr">; } let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrMMX.td b/interpreter/llvm/src/lib/Target/X86/X86InstrMMX.td index dc3800ce381b0..2c047722db249 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrMMX.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrMMX.td @@ -248,7 +248,8 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (MMX_X86movd2w (x86mmx VR64:$src)))], - IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; + IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>, + FoldGenData<"MMX_MOVD64rr">; let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -277,7 +278,7 @@ def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; + IIC_MMX_MOVQ_RR>, FoldGenData<"MMX_MOVQ64rr">; } } // SchedRW diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrSSE.td b/interpreter/llvm/src/lib/Target/X86/X86InstrSSE.td index f73d85e7e01b9..650e4fc8716cd 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrSSE.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrSSE.td @@ -507,7 +507,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, multiclass sse12_move_rr { + string asm_opr, Domain d = GenericDomain, + string Name> { let isCommutable = 1 in def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), @@ -521,15 +522,17 @@ multiclass sse12_move_rr, Sched<[WriteFShuffle]>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>, + FoldGenData; } multiclass sse12_move { + Domain d = GenericDomain, string Name> { // AVX defm V#NAME : sse12_move_rr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, + "V"#Name>, VEX_4V, VEX_LIG, VEX_WIG; def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -539,7 +542,7 @@ multiclass sse12_move; + "\t{$src2, $dst|$dst, $src2}", d, Name>; } def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -563,9 +566,9 @@ multiclass sse12_move_rm, XS; + SSEPackedSingle, "MOVSS">, XS; defm MOVSD : sse12_move, XD; + SSEPackedDouble, "MOVSD">, XD; let canFoldAsLoad = 1, isReMaterializable = 1 in { defm MOVSS : sse12_move_rm, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPSrr">; def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPDrr">; def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPSrr">; def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPDrr">; def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPSYrr">; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPDYrr">; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPSYrr">; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPDYrr">; } // Aliases to help the assembler pick two byte VEX encodings by swapping the @@ -938,16 +949,16 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteFShuffle] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPSrr">; def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPDrr">; def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPSrr">; def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPDrr">; } let Predicates = [HasAVX, NoVLX] in { @@ -1733,7 +1744,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; } -def : Pat<(f32 (fpround FR64:$src)), +def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>, Requires<[UseAVX]>; @@ -3686,8 +3697,7 @@ let SchedRW = [WriteNop] in { // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. def PAUSE : I<0x90, RawFrm, (outs), (ins), - "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, - OBXS, Requires<[HasSSE2]>; + "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, OBXS; } let SchedRW = [WriteFence] in { @@ -3752,17 +3762,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQAYrr">; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQUYrr">; } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, @@ -3820,11 +3832,12 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVDQArr">; def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>, + FoldGenData<"MOVDQUrr">; } } // SchedRW @@ -5169,14 +5182,14 @@ multiclass S3D_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3DI, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, @@ -5186,14 +5199,14 @@ multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3I, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -5296,7 +5309,7 @@ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// -let Sched = WriteVecALU in { +let Sched = WritePHAdd in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -5915,7 +5928,7 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, Sched<[WriteShuffle]>; + []>, Sched<[WriteShuffle]>, FoldGenData; let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteShuffleLd, WriteRMW] in diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrTSX.td b/interpreter/llvm/src/lib/Target/X86/X86InstrTSX.td index 38ac8be944832..61aac58a491f2 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrTSX.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrTSX.td @@ -30,6 +30,11 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst), "xbegin\t$dst", []>, OpSize32; } +// Psuedo instruction to fake the definition of EAX on the fallback code path. +let isPseudo = 1, Defs = [EAX] in { +def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>; +} + def XEND : I<0x01, MRM_D5, (outs), (ins), "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstrXOP.td b/interpreter/llvm/src/lib/Target/X86/X86InstrXOP.td index 53224431c0e90..5dde2d07babeb 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstrXOP.td +++ b/interpreter/llvm/src/lib/Target/X86/X86InstrXOP.td @@ -111,7 +111,7 @@ multiclass xop3op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - XOP_4V, VEX_W, Sched<[WriteVarVecShift]>; + XOP_4V, VEX_W, Sched<[WriteVarVecShift]>, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -282,7 +282,7 @@ multiclass xop4op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -318,7 +318,7 @@ multiclass xop4op_int opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -357,7 +357,7 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W; + []>, VEX_W, FoldGenData; } let ExeDomain = SSEPackedDouble in { diff --git a/interpreter/llvm/src/lib/Target/X86/X86InstructionSelector.cpp b/interpreter/llvm/src/lib/Target/X86/X86InstructionSelector.cpp index de58d719acb4b..859d3288db896 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InstructionSelector.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86InstructionSelector.cpp @@ -19,6 +19,7 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -31,6 +32,8 @@ #define DEBUG_TYPE "X86-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -55,7 +58,7 @@ class X86InstructionSelector : public InstructionSelector { /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I) const; - // TODO: remove after suported by Tablegen-erated instruction selection. + // TODO: remove after supported by Tablegen-erated instruction selection. unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc, uint64_t Alignment) const; @@ -63,6 +66,8 @@ class X86InstructionSelector : public InstructionSelector { MachineFunction &MF) const; bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, @@ -71,6 +76,28 @@ class X86InstructionSelector : public InstructionSelector { MachineFunction &MF) const; bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + + // emit insert subreg instruction and insert it before MachineInstr &I + bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, + MachineRegisterInfo &MRI, MachineFunction &MF) const; + // emit extract subreg instruction and insert it before MachineInstr &I + bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, + MachineRegisterInfo &MRI, MachineFunction &MF) const; + + const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; + const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, + MachineRegisterInfo &MRI) const; const X86TargetMachine &TM; const X86Subtarget &STI; @@ -109,8 +136,8 @@ X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, // FIXME: This should be target-independent, inferred from the types declared // for each class in the bank. -static const TargetRegisterClass * -getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { +const TargetRegisterClass * +X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { if (RB.getID() == X86::GPRRegBankID) { if (Ty.getSizeInBits() <= 8) return &X86::GR8RegClass; @@ -123,13 +150,13 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { } if (RB.getID() == X86::VECRRegBankID) { if (Ty.getSizeInBits() == 32) - return &X86::FR32XRegClass; + return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; if (Ty.getSizeInBits() == 64) - return &X86::FR64XRegClass; + return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; if (Ty.getSizeInBits() == 128) - return &X86::VR128XRegClass; + return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass; if (Ty.getSizeInBits() == 256) - return &X86::VR256XRegClass; + return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass; if (Ty.getSizeInBits() == 512) return &X86::VR512RegClass; } @@ -137,10 +164,16 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { llvm_unreachable("Unknown RegBank!"); } +const TargetRegisterClass * +X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg, + MachineRegisterInfo &MRI) const { + const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); + return getRegClass(Ty, RegBank); +} + // Set X86 Opcode and constrain DestReg. -static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { +bool X86InstructionSelector::selectCopy(MachineInstr &I, + MachineRegisterInfo &MRI) const { unsigned DstReg = I.getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { @@ -167,7 +200,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, switch (RegBank.getID()) { case X86::GPRRegBankID: assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values."); - RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + RC = getRegClass(MRI.getType(DstReg), RegBank); // Change the physical register if (SrcSize > DstSize && TargetRegisterInfo::isPhysicalRegister(SrcReg)) { @@ -182,7 +215,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, } break; case X86::VECRRegBankID: - RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + RC = getRegClass(MRI.getType(DstReg), RegBank); break; default: llvm_unreachable("Unknown RegBank!"); @@ -216,7 +249,7 @@ bool X86InstructionSelector::select(MachineInstr &I) const { // Certain non-generic instructions also need some special handling. if (I.isCopy()) - return selectCopy(I, TII, MRI, TRI, RBI); + return selectCopy(I, MRI); // TODO: handle more cases - LOAD_STACK_GUARD, PHI return true; @@ -235,6 +268,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectFrameIndexOrGep(I, MRI, MF)) return true; + if (selectGlobalValue(I, MRI, MF)) + return true; if (selectConstant(I, MRI, MF)) return true; if (selectTrunc(I, MRI, MF)) @@ -243,6 +278,16 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectCmp(I, MRI, MF)) return true; + if (selectUadde(I, MRI, MF)) + return true; + if (selectUnmergeValues(I, MRI, MF)) + return true; + if (selectMergeValues(I, MRI, MF)) + return true; + if (selectExtract(I, MRI, MF)) + return true; + if (selectInsert(I, MRI, MF)) + return true; return false; } @@ -296,10 +341,58 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { + if (Alignment >= 32) + return Isload ? (HasVLX ? X86::VMOVAPSZ256rm + : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX + : X86::VMOVAPSYrm) + : (HasVLX ? X86::VMOVAPSZ256mr + : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX + : X86::VMOVAPSYmr); + else + return Isload ? (HasVLX ? X86::VMOVUPSZ256rm + : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX + : X86::VMOVUPSYrm) + : (HasVLX ? X86::VMOVUPSZ256mr + : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX + : X86::VMOVUPSYmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { + if (Alignment >= 64) + return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; + else + return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } return Opc; } +// Fill in an address from the given instruction. +void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, + X86AddressMode &AM) { + + assert(I.getOperand(0).isReg() && "unsupported opperand."); + assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && + "unsupported type."); + + if (I.getOpcode() == TargetOpcode::G_GEP) { + if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) { + int64_t Imm = *COff; + if (isInt<32>(Imm)) { // Check for displacement overflow. + AM.Disp = static_cast(Imm); + AM.Base.Reg = I.getOperand(1).getReg(); + return; + } + } + } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) { + AM.Base.FrameIndex = I.getOperand(1).getIndex(); + AM.BaseType = X86AddressMode::FrameIndexBase; + return; + } + + // Default behavior. + AM.Base.Reg = I.getOperand(0).getReg(); + return; +} + bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -314,22 +407,41 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); auto &MemOp = **I.memoperands_begin(); + if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { + DEBUG(dbgs() << "Atomic load/store not supported yet\n"); + return false; + } + unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment()); if (NewOpc == Opc) return false; + X86AddressMode AM; + X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM); + I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); - if (Opc == TargetOpcode::G_LOAD) - addOffset(MIB, 0); - else { + if (Opc == TargetOpcode::G_LOAD) { + I.RemoveOperand(1); + addFullAddress(MIB, AM); + } else { // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) + I.RemoveOperand(1); I.RemoveOperand(0); - addOffset(MIB, 0).addUse(DefReg); + addFullAddress(MIB, AM).addUse(DefReg); } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { + if (Ty == LLT::pointer(0, 64)) + return X86::LEA64r; + else if (Ty == LLT::pointer(0, 32)) + return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; + else + llvm_unreachable("Can't get LEA opcode. Unsupported type."); +} + bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -342,14 +454,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, LLT Ty = MRI.getType(DefReg); // Use LEA to calculate frame index and GEP - unsigned NewOpc; - if (Ty == LLT::pointer(0, 64)) - NewOpc = X86::LEA64r; - else if (Ty == LLT::pointer(0, 32)) - NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; - else - llvm_unreachable("Can't select G_FRAME_INDEX/G_GEP, unsupported type."); - + unsigned NewOpc = getLeaOP(Ty, STI); I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); @@ -365,6 +470,54 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + unsigned Opc = I.getOpcode(); + + if (Opc != TargetOpcode::G_GLOBAL_VALUE) + return false; + + auto GV = I.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) { + return false; // TODO: we don't support TLS yet. + } + + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return 0; + + X86AddressMode AM; + AM.GV = GV; + AM.GVOpFlags = STI.classifyGlobalReference(GV); + + // TODO: The ABI requires an extra load. not supported yet. + if (isGlobalStubReference(AM.GVOpFlags)) + return false; + + // TODO: This reference is relative to the pic base. not supported yet. + if (isGlobalRelativeToPICBase(AM.GVOpFlags)) + return false; + + if (STI.isPICStyleRIPRel()) { + // Use rip-relative addressing. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; + } + + const unsigned DefReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(DefReg); + unsigned NewOpc = getLeaOP(Ty, STI); + + I.setDesc(TII.get(NewOpc)); + MachineInstrBuilder MIB(MF, I); + + I.RemoveOperand(1); + addFullAddress(MIB, AM); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + bool X86InstructionSelector::selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -374,7 +527,8 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, const unsigned DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); - assert(Ty.isScalar() && "invalid element type."); + if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) + return false; uint64_t Val = 0; if (I.getOperand(1).isCImm()) { @@ -435,32 +589,38 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I, if (DstRB.getID() != X86::GPRRegBankID) return false; - const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); if (!DstRC) return false; - const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); if (!SrcRC) return false; - if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || - !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { - DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); - return false; - } - + unsigned SubIdx; if (DstRC == SrcRC) { // Nothing to be done + SubIdx = X86::NoSubRegister; } else if (DstRC == &X86::GR32RegClass) { - I.getOperand(1).setSubReg(X86::sub_32bit); + SubIdx = X86::sub_32bit; } else if (DstRC == &X86::GR16RegClass) { - I.getOperand(1).setSubReg(X86::sub_16bit); + SubIdx = X86::sub_16bit; } else if (DstRC == &X86::GR8RegClass) { - I.getOperand(1).setSubReg(X86::sub_8bit); + SubIdx = X86::sub_8bit; } else { return false; } + SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + return false; + } + + I.getOperand(1).setSubReg(SubIdx); + I.setDesc(TII.get(X86::COPY)); return true; } @@ -477,38 +637,40 @@ bool X86InstructionSelector::selectZext(MachineInstr &I, const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy == LLT::scalar(1)) { - - unsigned AndOpc; - if (DstTy == LLT::scalar(32)) - AndOpc = X86::AND32ri8; - else if (DstTy == LLT::scalar(64)) - AndOpc = X86::AND64ri8; - else - return false; + if (SrcTy != LLT::scalar(1)) + return false; - const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); - unsigned DefReg = - MRI.createVirtualRegister(getRegClassForTypeOnBank(DstTy, RegBank)); + unsigned AndOpc; + if (DstTy == LLT::scalar(8)) + AndOpc = X86::AND8ri; + else if (DstTy == LLT::scalar(16)) + AndOpc = X86::AND16ri8; + else if (DstTy == LLT::scalar(32)) + AndOpc = X86::AND32ri8; + else if (DstTy == LLT::scalar(64)) + AndOpc = X86::AND64ri8; + else + return false; + unsigned DefReg = SrcReg; + if (DstTy != LLT::scalar(8)) { + DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::SUBREG_TO_REG), DefReg) .addImm(0) .addReg(SrcReg) .addImm(X86::sub_8bit); + } - MachineInstr &AndInst = - *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) - .addReg(DefReg) - .addImm(1); + MachineInstr &AndInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) + .addReg(DefReg) + .addImm(1); - constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); + constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); - I.eraseFromParent(); - return true; - } - - return false; + I.eraseFromParent(); + return true; } bool X86InstructionSelector::selectCmp(MachineInstr &I, @@ -564,6 +726,338 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I, return true; } +bool X86InstructionSelector::selectUadde(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_UADDE) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned CarryOutReg = I.getOperand(1).getReg(); + const unsigned Op0Reg = I.getOperand(2).getReg(); + const unsigned Op1Reg = I.getOperand(3).getReg(); + unsigned CarryInReg = I.getOperand(4).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + + if (DstTy != LLT::scalar(32)) + return false; + + // find CarryIn def instruction. + MachineInstr *Def = MRI.getVRegDef(CarryInReg); + while (Def->getOpcode() == TargetOpcode::G_TRUNC) { + CarryInReg = Def->getOperand(1).getReg(); + Def = MRI.getVRegDef(CarryInReg); + } + + unsigned Opcode; + if (Def->getOpcode() == TargetOpcode::G_UADDE) { + // carry set by prev ADD. + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS) + .addReg(CarryInReg); + + if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI)) + return false; + + Opcode = X86::ADC32rr; + } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) { + // carry is constant, support only 0. + if (*val != 0) + return false; + + Opcode = X86::ADD32rr; + } else + return false; + + MachineInstr &AddInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg) + .addReg(Op0Reg) + .addReg(Op1Reg); + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) + .addReg(X86::EFLAGS); + + if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) || + !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI)) + return false; + + I.eraseFromParent(); + return true; +} + +bool X86InstructionSelector::selectExtract(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + if (I.getOpcode() != TargetOpcode::G_EXTRACT) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + int64_t Index = I.getOperand(2).getImm(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + + // Meanwile handle vector type only. + if (!DstTy.isVector()) + return false; + + if (Index % DstTy.getSizeInBits() != 0) + return false; // Not extract subvector. + + if (Index == 0) { + // Replace by extract subreg copy. + if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) + return false; + + I.eraseFromParent(); + return true; + } + + bool HasAVX = STI.hasAVX(); + bool HasAVX512 = STI.hasAVX512(); + bool HasVLX = STI.hasVLX(); + + if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { + if (HasVLX) + I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr)); + else if (HasAVX) + I.setDesc(TII.get(X86::VEXTRACTF128rr)); + else + return false; + } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { + if (DstTy.getSizeInBits() == 128) + I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr)); + else if (DstTy.getSizeInBits() == 256) + I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr)); + else + return false; + } else + return false; + + // Convert to X86 VEXTRACT immediate. + Index = Index / DstTy.getSizeInBits(); + I.getOperand(2).setImm(Index); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + +bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, + MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned SubIdx = X86::NoSubRegister; + + if (!DstTy.isVector() || !SrcTy.isVector()) + return false; + + assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && + "Incorrect Src/Dst register size"); + + if (DstTy.getSizeInBits() == 128) + SubIdx = X86::sub_xmm; + else if (DstTy.getSizeInBits() == 256) + SubIdx = X86::sub_ymm; + else + return false; + + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); + + SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + return false; + } + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg) + .addReg(SrcReg, 0, SubIdx); + + return true; +} + +bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, + MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned SubIdx = X86::NoSubRegister; + + // TODO: support scalar types + if (!DstTy.isVector() || !SrcTy.isVector()) + return false; + + assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && + "Incorrect Src/Dst register size"); + + if (SrcTy.getSizeInBits() == 128) + SubIdx = X86::sub_xmm; + else if (SrcTy.getSizeInBits() == 256) + SubIdx = X86::sub_ymm; + else + return false; + + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n"); + return false; + } + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY)) + .addReg(DstReg, RegState::DefineNoRead, SubIdx) + .addReg(SrcReg); + + return true; +} + +bool X86InstructionSelector::selectInsert(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + if (I.getOpcode() != TargetOpcode::G_INSERT) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + const unsigned InsertReg = I.getOperand(2).getReg(); + int64_t Index = I.getOperand(3).getImm(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT InsertRegTy = MRI.getType(InsertReg); + + // Meanwile handle vector type only. + if (!DstTy.isVector()) + return false; + + if (Index % InsertRegTy.getSizeInBits() != 0) + return false; // Not insert subvector. + + if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) { + // Replace by subreg copy. + if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF)) + return false; + + I.eraseFromParent(); + return true; + } + + bool HasAVX = STI.hasAVX(); + bool HasAVX512 = STI.hasAVX512(); + bool HasVLX = STI.hasVLX(); + + if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { + if (HasVLX) + I.setDesc(TII.get(X86::VINSERTF32x4Z256rr)); + else if (HasAVX) + I.setDesc(TII.get(X86::VINSERTF128rr)); + else + return false; + } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { + if (InsertRegTy.getSizeInBits() == 128) + I.setDesc(TII.get(X86::VINSERTF32x4Zrr)); + else if (InsertRegTy.getSizeInBits() == 256) + I.setDesc(TII.get(X86::VINSERTF64x4Zrr)); + else + return false; + } else + return false; + + // Convert to X86 VINSERT immediate. + Index = Index / InsertRegTy.getSizeInBits(); + + I.getOperand(3).setImm(Index); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + +bool X86InstructionSelector::selectUnmergeValues(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) + return false; + + // Split to extracts. + unsigned NumDefs = I.getNumOperands() - 1; + unsigned SrcReg = I.getOperand(NumDefs).getReg(); + unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { + + MachineInstr &ExtrInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) + .addReg(SrcReg) + .addImm(Idx * DefSize); + + if (!select(ExtrInst)) + return false; + } + + I.eraseFromParent(); + return true; +} + +bool X86InstructionSelector::selectMergeValues(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_MERGE_VALUES) + return false; + + // Split to inserts. + unsigned DstReg = I.getOperand(0).getReg(); + unsigned SrcReg0 = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg0); + unsigned SrcSize = SrcTy.getSizeInBits(); + + const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + // For the first src use insertSubReg. + unsigned DefReg = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(DefReg, RegBank); + if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) + return false; + + for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { + + unsigned Tmp = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(Tmp, RegBank); + + MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::G_INSERT), Tmp) + .addReg(DefReg) + .addReg(I.getOperand(Idx).getReg()) + .addImm((Idx - 1) * SrcSize); + + DefReg = Tmp; + + if (!select(InsertInst)) + return false; + } + + MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::COPY), DstReg) + .addReg(DefReg); + + if (!select(CopyInst)) + return false; + + I.eraseFromParent(); + return true; +} InstructionSelector * llvm::createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &Subtarget, diff --git a/interpreter/llvm/src/lib/Target/X86/X86InterleavedAccess.cpp b/interpreter/llvm/src/lib/Target/X86/X86InterleavedAccess.cpp index 806d6cc888f0f..f0ed4bc16e2f9 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86InterleavedAccess.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86InterleavedAccess.cpp @@ -16,6 +16,7 @@ #include "X86ISelLowering.h" #include "X86TargetMachine.h" +#include "llvm/Analysis/VectorUtils.h" using namespace llvm; @@ -50,9 +51,8 @@ class X86InterleavedAccessGroup { IRBuilder<> &Builder; /// \brief Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors - /// sub vectors of type \p T. Returns true and the sub-vectors in - /// \p DecomposedVectors if it decomposes the Inst, returns false otherwise. - bool decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T, + /// sub vectors of type \p T. Returns the sub-vectors in \p DecomposedVectors. + void decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T, SmallVectorImpl &DecomposedVectors); /// \brief Performs matrix transposition on a 4x4 matrix \p InputVectors and @@ -80,8 +80,7 @@ class X86InterleavedAccessGroup { /// target information \p STarget. explicit X86InterleavedAccessGroup(Instruction *I, ArrayRef Shuffs, - ArrayRef Ind, - const unsigned F, + ArrayRef Ind, const unsigned F, const X86Subtarget &STarget, IRBuilder<> &B) : Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget), @@ -102,48 +101,61 @@ bool X86InterleavedAccessGroup::isSupported() const { uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy); Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType(); - if (DL.getTypeSizeInBits(Inst->getType()) < Factor * ShuffleVecSize) - return false; + // Currently, lowering is supported for 4-element vectors of 64 bits on AVX. + uint64_t ExpectedShuffleVecSize; + if (isa(Inst)) + ExpectedShuffleVecSize = 256; + else + ExpectedShuffleVecSize = 1024; - // Currently, lowering is supported for 64 bits on AVX. - if (!Subtarget.hasAVX() || ShuffleVecSize != 256 || + if (!Subtarget.hasAVX() || ShuffleVecSize != ExpectedShuffleVecSize || DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4) return false; return true; } -bool X86InterleavedAccessGroup::decompose( +void X86InterleavedAccessGroup::decompose( Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy, SmallVectorImpl &DecomposedVectors) { + + assert((isa(VecInst) || isa(VecInst)) && + "Expected Load or Shuffle"); + Type *VecTy = VecInst->getType(); (void)VecTy; assert(VecTy->isVectorTy() && DL.getTypeSizeInBits(VecTy) >= DL.getTypeSizeInBits(SubVecTy) * NumSubVectors && "Invalid Inst-size!!!"); - assert(VecTy->getVectorElementType() == SubVecTy->getVectorElementType() && - "Element type mismatched!!!"); - if (!isa(VecInst)) - return false; + if (auto *SVI = dyn_cast(VecInst)) { + Value *Op0 = SVI->getOperand(0); + Value *Op1 = SVI->getOperand(1); + + // Generate N(= NumSubVectors) shuffles of T(= SubVecTy) type. + for (unsigned i = 0; i < NumSubVectors; ++i) + DecomposedVectors.push_back( + cast(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, Indices[i], + SubVecTy->getVectorNumElements(), 0)))); + return; + } + // Decompose the load instruction. LoadInst *LI = cast(VecInst); Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace()); - Value *VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy); - // Generate N loads of T type + // Generate N loads of T type. for (unsigned i = 0; i < NumSubVectors; i++) { - // TODO: Support inbounds GEP + // TODO: Support inbounds GEP. Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i)); Instruction *NewLoad = Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment()); DecomposedVectors.push_back(NewLoad); } - - return true; } void X86InterleavedAccessGroup::transpose_4x4( @@ -181,21 +193,46 @@ void X86InterleavedAccessGroup::transpose_4x4( // instructions/intrinsics. bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { SmallVector DecomposedVectors; - VectorType *VecTy = Shuffles[0]->getType(); - // Try to generate target-sized register(/instruction). - if (!decompose(Inst, Factor, VecTy, DecomposedVectors)) - return false; - SmallVector TransposedVectors; - // Perform matrix-transposition in order to compute interleaved - // results by generating some sort of (optimized) target-specific - // instructions. + VectorType *ShuffleTy = Shuffles[0]->getType(); + + if (isa(Inst)) { + // Try to generate target-sized register(/instruction). + decompose(Inst, Factor, ShuffleTy, DecomposedVectors); + + // Perform matrix-transposition in order to compute interleaved + // results by generating some sort of (optimized) target-specific + // instructions. + transpose_4x4(DecomposedVectors, TransposedVectors); + + // Now replace the unoptimized-interleaved-vectors with the + // transposed-interleaved vectors. + for (unsigned i = 0, e = Shuffles.size(); i < e; ++i) + Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]); + + return true; + } + + Type *ShuffleEltTy = ShuffleTy->getVectorElementType(); + unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor; + + // Lower the interleaved stores: + // 1. Decompose the interleaved wide shuffle into individual shuffle + // vectors. + decompose(Shuffles[0], Factor, + VectorType::get(ShuffleEltTy, NumSubVecElems), DecomposedVectors); + + // 2. Transpose the interleaved-vectors into vectors of contiguous + // elements. transpose_4x4(DecomposedVectors, TransposedVectors); - // Now replace the unoptimized-interleaved-vectors with the - // transposed-interleaved vectors. - for (unsigned i = 0; i < Shuffles.size(); i++) - Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]); + // 3. Concatenate the contiguous-vectors back into a wide vector. + Value *WideVec = concatenateVectors(Builder, TransposedVectors); + + // 4. Generate a store instruction for wide-vec. + StoreInst *SI = cast(Inst); + Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(), + SI->getAlignment()); return true; } @@ -220,3 +257,29 @@ bool X86TargetLowering::lowerInterleavedLoad( return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); } + +bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI, + ShuffleVectorInst *SVI, + unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + + assert(SVI->getType()->getVectorNumElements() % Factor == 0 && + "Invalid interleaved store"); + + // Holds the indices of SVI that correspond to the starting index of each + // interleaved shuffle. + SmallVector Indices; + auto Mask = SVI->getShuffleMask(); + for (unsigned i = 0; i < Factor; i++) + Indices.push_back(Mask[i]); + + ArrayRef Shuffles = makeArrayRef(SVI); + + // Create an interleaved access group. + IRBuilder<> Builder(SI); + X86InterleavedAccessGroup Grp(SI, Shuffles, Indices, Factor, Subtarget, + Builder); + + return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); +} diff --git a/interpreter/llvm/src/lib/Target/X86/X86IntrinsicsInfo.h b/interpreter/llvm/src/lib/Target/X86/X86IntrinsicsInfo.h index 2a40399ba5712..6b1add8ff8ed1 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86IntrinsicsInfo.h +++ b/interpreter/llvm/src/lib/Target/X86/X86IntrinsicsInfo.h @@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t { TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, - FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP, + FIXUPIMMS_MASKZ, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP, }; struct IntrinsicData { @@ -510,12 +510,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti64x4_512, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, @@ -524,16 +518,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCCM, X86ISD::FSETCCM_RND), X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCCM, X86ISD::FSETCCM_RND), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::COMPRESS, 0), X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG, @@ -1171,18 +1159,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FSUBS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FSUBS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_d_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_d_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_d_512, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_q_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_q_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_q_512, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM, diff --git a/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.cpp b/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.cpp index cf26238c02399..744ba21011af7 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.cpp @@ -35,6 +35,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, setLegalizerInfoSSE1(); setLegalizerInfoSSE2(); setLegalizerInfoSSE41(); + setLegalizerInfoAVX(); setLegalizerInfoAVX2(); setLegalizerInfoAVX512(); setLegalizerInfoAVX512DQ(); @@ -55,20 +56,27 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32}) setAction({BinOp, Ty}, Legal); + for (unsigned Op : {G_UADDE}) { + setAction({Op, s32}, Legal); + setAction({Op, 1, s1}, Legal); + } + for (unsigned MemOp : {G_LOAD, G_STORE}) { for (auto Ty : {s8, s16, s32, p0}) setAction({MemOp, Ty}, Legal); + setAction({MemOp, s1}, WidenScalar); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); } // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GLOBAL_VALUE, p0}, Legal); setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); @@ -84,8 +92,10 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar); // Extensions - setAction({G_ZEXT, s32}, Legal); - setAction({G_SEXT, s32}, Legal); + for (auto Ty : {s8, s16, s32}) { + setAction({G_ZEXT, Ty}, Legal); + setAction({G_SEXT, Ty}, Legal); + } for (auto Ty : {s1, s8, s16}) { setAction({G_ZEXT, 1, Ty}, Legal); @@ -111,7 +121,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32, s64}) setAction({BinOp, Ty}, Legal); @@ -119,12 +129,14 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { for (auto Ty : {s8, s16, s32, s64, p0}) setAction({MemOp, Ty}, Legal); + setAction({MemOp, s1}, WidenScalar); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); } // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GLOBAL_VALUE, p0}, Legal); setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); @@ -140,7 +152,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); // Extensions - for (auto Ty : {s32, s64}) { + for (auto Ty : {s8, s16, s32, s64}) { setAction({G_ZEXT, Ty}, Legal); setAction({G_SEXT, Ty}, Legal); } @@ -179,6 +191,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() { return; const LLT s64 = LLT::scalar(64); + const LLT v16s8 = LLT::vector(16, 8); const LLT v8s16 = LLT::vector(8, 16); const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); @@ -188,7 +201,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() { setAction({BinOp, Ty}, Legal); for (unsigned BinOp : {G_ADD, G_SUB}) - for (auto Ty : {v4s32}) + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) setAction({BinOp, Ty}, Legal); setAction({G_MUL, v8s16}, Legal); @@ -203,12 +216,46 @@ void X86LegalizerInfo::setLegalizerInfoSSE41() { setAction({G_MUL, v4s32}, Legal); } +void X86LegalizerInfo::setLegalizerInfoAVX() { + if (!Subtarget.hasAVX()) + return; + + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v8s32, v4s64}) + setAction({MemOp, Ty}, Legal); + + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) { + setAction({G_INSERT, Ty}, Legal); + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) { + setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } +} + void X86LegalizerInfo::setLegalizerInfoAVX2() { if (!Subtarget.hasAVX2()) return; + const LLT v32s8 = LLT::vector(32, 8); const LLT v16s16 = LLT::vector(16, 16); const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + setAction({BinOp, Ty}, Legal); for (auto Ty : {v16s16, v8s32}) setAction({G_MUL, Ty}, Legal); @@ -218,17 +265,44 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { if (!Subtarget.hasAVX512()) return; + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + const LLT v64s8 = LLT::vector(64, 8); + const LLT v32s16 = LLT::vector(32, 16); const LLT v16s32 = LLT::vector(16, 32); + const LLT v8s64 = LLT::vector(8, 64); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v16s32, v8s64}) + setAction({BinOp, Ty}, Legal); setAction({G_MUL, v16s32}, Legal); + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v16s32, v8s64}) + setAction({MemOp, Ty}, Legal); + + for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) { + setAction({G_INSERT, Ty}, Legal); + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) { + setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } + /************ VLX *******************/ if (!Subtarget.hasVLX()) return; - const LLT v4s32 = LLT::vector(4, 32); - const LLT v8s32 = LLT::vector(8, 32); - for (auto Ty : {v4s32, v8s32}) setAction({G_MUL, Ty}, Legal); } @@ -256,8 +330,13 @@ void X86LegalizerInfo::setLegalizerInfoAVX512BW() { if (!(Subtarget.hasAVX512() && Subtarget.hasBWI())) return; + const LLT v64s8 = LLT::vector(64, 8); const LLT v32s16 = LLT::vector(32, 16); + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v64s8, v32s16}) + setAction({BinOp, Ty}, Legal); + setAction({G_MUL, v32s16}, Legal); /************ VLX *******************/ diff --git a/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.h b/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.h index ab5405a704273..135950a95f844 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.h +++ b/interpreter/llvm/src/lib/Target/X86/X86LegalizerInfo.h @@ -39,6 +39,7 @@ class X86LegalizerInfo : public LegalizerInfo { void setLegalizerInfoSSE1(); void setLegalizerInfoSSE2(); void setLegalizerInfoSSE41(); + void setLegalizerInfoAVX(); void setLegalizerInfoAVX2(); void setLegalizerInfoAVX512(); void setLegalizerInfoAVX512DQ(); diff --git a/interpreter/llvm/src/lib/Target/X86/X86MCInstLower.cpp b/interpreter/llvm/src/lib/Target/X86/X86MCInstLower.cpp index 598d88d8b9c3b..fd2837b79103e 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86MCInstLower.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86MCInstLower.cpp @@ -12,20 +12,21 @@ // //===----------------------------------------------------------------------===// -#include "X86AsmPrinter.h" -#include "X86RegisterInfo.h" -#include "X86ShuffleDecodeConstantPool.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "InstPrinter/X86InstComments.h" #include "MCTargetDesc/X86BaseInfo.h" #include "Utils/X86ShuffleDecode.h" +#include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" +#include "X86ShuffleDecodeConstantPool.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -38,13 +39,12 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -1042,7 +1042,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { - assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64"); + assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. @@ -1332,6 +1332,32 @@ static std::string getShuffleComment(const MachineInstr *MI, return Comment; } +static void printConstant(const Constant *COp, raw_ostream &CS) { + if (isa(COp)) { + CS << "u"; + } else if (auto *CI = dyn_cast(COp)) { + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + const auto &Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } + } else if (auto *CF = dyn_cast(COp)) { + SmallString<32> Str; + CF->getValueAPF().toString(Str); + CS << Str; + } else { + CS << "?"; + } +} + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); const X86RegisterInfo *RI = MF->getSubtarget().getRegisterInfo(); @@ -1766,59 +1792,73 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // For loads from a constant pool to a vector register, print the constant // loaded. CASE_ALL_MOV_RM() + case X86::VBROADCASTF128: + case X86::VBROADCASTI128: + case X86::VBROADCASTF32X4Z256rm: + case X86::VBROADCASTF32X4rm: + case X86::VBROADCASTF32X8rm: + case X86::VBROADCASTF64X2Z128rm: + case X86::VBROADCASTF64X2rm: + case X86::VBROADCASTF64X4rm: + case X86::VBROADCASTI32X4Z256rm: + case X86::VBROADCASTI32X4rm: + case X86::VBROADCASTI32X8rm: + case X86::VBROADCASTI64X2Z128rm: + case X86::VBROADCASTI64X2rm: + case X86::VBROADCASTI64X4rm: if (!OutStreamer->isVerboseAsm()) break; if (MI->getNumOperands() <= 4) break; if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + int NumLanes = 1; + // Override NumLanes for the broadcast instructions. + switch (MI->getOpcode()) { + case X86::VBROADCASTF128: NumLanes = 2; break; + case X86::VBROADCASTI128: NumLanes = 2; break; + case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break; + case X86::VBROADCASTF32X4rm: NumLanes = 4; break; + case X86::VBROADCASTF32X8rm: NumLanes = 2; break; + case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break; + case X86::VBROADCASTF64X2rm: NumLanes = 4; break; + case X86::VBROADCASTF64X4rm: NumLanes = 2; break; + case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break; + case X86::VBROADCASTI32X4rm: NumLanes = 4; break; + case X86::VBROADCASTI32X8rm: NumLanes = 2; break; + case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break; + case X86::VBROADCASTI64X2rm: NumLanes = 4; break; + case X86::VBROADCASTI64X4rm: NumLanes = 2; break; + } + std::string Comment; raw_string_ostream CS(Comment); const MachineOperand &DstOp = MI->getOperand(0); CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; if (auto *CDS = dyn_cast(C)) { CS << "["; - for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { - if (i != 0) - CS << ","; - if (CDS->getElementType()->isIntegerTy()) - CS << CDS->getElementAsInteger(i); - else if (CDS->getElementType()->isFloatTy()) - CS << CDS->getElementAsFloat(i); - else if (CDS->getElementType()->isDoubleTy()) - CS << CDS->getElementAsDouble(i); - else - CS << "?"; + for (int l = 0; l != NumLanes; ++l) { + for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { + if (i != 0 || l != 0) + CS << ","; + if (CDS->getElementType()->isIntegerTy()) + CS << CDS->getElementAsInteger(i); + else if (CDS->getElementType()->isFloatTy()) + CS << CDS->getElementAsFloat(i); + else if (CDS->getElementType()->isDoubleTy()) + CS << CDS->getElementAsDouble(i); + else + CS << "?"; + } } CS << "]"; OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); } else if (auto *CV = dyn_cast(C)) { CS << "<"; - for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { - if (i != 0) - CS << ","; - Constant *COp = CV->getOperand(i); - if (isa(COp)) { - CS << "u"; - } else if (auto *CI = dyn_cast(COp)) { - if (CI->getBitWidth() <= 64) { - CS << CI->getZExtValue(); - } else { - // print multi-word constant as (w0,w1) - const auto &Val = CI->getValue(); - CS << "("; - for (int i = 0, N = Val.getNumWords(); i < N; ++i) { - if (i > 0) - CS << ","; - CS << Val.getRawData()[i]; - } - CS << ")"; - } - } else if (auto *CF = dyn_cast(COp)) { - SmallString<32> Str; - CF->getValueAPF().toString(Str); - CS << Str; - } else { - CS << "?"; + for (int l = 0; l != NumLanes; ++l) { + for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { + if (i != 0 || l != 0) + CS << ","; + printConstant(CV->getOperand(i), CS); } } CS << ">"; @@ -1826,6 +1866,85 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } } break; + case X86::VBROADCASTSSrm: + case X86::VBROADCASTSSYrm: + case X86::VBROADCASTSSZ128m: + case X86::VBROADCASTSSZ256m: + case X86::VBROADCASTSSZm: + case X86::VBROADCASTSDYrm: + case X86::VBROADCASTSDZ256m: + case X86::VBROADCASTSDZm: + case X86::VPBROADCASTBrm: + case X86::VPBROADCASTBYrm: + case X86::VPBROADCASTBZ128m: + case X86::VPBROADCASTBZ256m: + case X86::VPBROADCASTBZm: + case X86::VPBROADCASTDrm: + case X86::VPBROADCASTDYrm: + case X86::VPBROADCASTDZ128m: + case X86::VPBROADCASTDZ256m: + case X86::VPBROADCASTDZm: + case X86::VPBROADCASTQrm: + case X86::VPBROADCASTQYrm: + case X86::VPBROADCASTQZ128m: + case X86::VPBROADCASTQZ256m: + case X86::VPBROADCASTQZm: + case X86::VPBROADCASTWrm: + case X86::VPBROADCASTWYrm: + case X86::VPBROADCASTWZ128m: + case X86::VPBROADCASTWZ256m: + case X86::VPBROADCASTWZm: + if (!OutStreamer->isVerboseAsm()) + break; + if (MI->getNumOperands() <= 4) + break; + if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + int NumElts; + switch (MI->getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VBROADCASTSSrm: NumElts = 4; break; + case X86::VBROADCASTSSYrm: NumElts = 8; break; + case X86::VBROADCASTSSZ128m: NumElts = 4; break; + case X86::VBROADCASTSSZ256m: NumElts = 8; break; + case X86::VBROADCASTSSZm: NumElts = 16; break; + case X86::VBROADCASTSDYrm: NumElts = 4; break; + case X86::VBROADCASTSDZ256m: NumElts = 4; break; + case X86::VBROADCASTSDZm: NumElts = 8; break; + case X86::VPBROADCASTBrm: NumElts = 16; break; + case X86::VPBROADCASTBYrm: NumElts = 32; break; + case X86::VPBROADCASTBZ128m: NumElts = 16; break; + case X86::VPBROADCASTBZ256m: NumElts = 32; break; + case X86::VPBROADCASTBZm: NumElts = 64; break; + case X86::VPBROADCASTDrm: NumElts = 4; break; + case X86::VPBROADCASTDYrm: NumElts = 8; break; + case X86::VPBROADCASTDZ128m: NumElts = 4; break; + case X86::VPBROADCASTDZ256m: NumElts = 8; break; + case X86::VPBROADCASTDZm: NumElts = 16; break; + case X86::VPBROADCASTQrm: NumElts = 2; break; + case X86::VPBROADCASTQYrm: NumElts = 4; break; + case X86::VPBROADCASTQZ128m: NumElts = 2; break; + case X86::VPBROADCASTQZ256m: NumElts = 4; break; + case X86::VPBROADCASTQZm: NumElts = 8; break; + case X86::VPBROADCASTWrm: NumElts = 8; break; + case X86::VPBROADCASTWYrm: NumElts = 16; break; + case X86::VPBROADCASTWZ128m: NumElts = 8; break; + case X86::VPBROADCASTWZ256m: NumElts = 16; break; + case X86::VPBROADCASTWZm: NumElts = 32; break; + } + + std::string Comment; + raw_string_ostream CS(Comment); + const MachineOperand &DstOp = MI->getOperand(0); + CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; + CS << "["; + for (int i = 0; i != NumElts; ++i) { + if (i != 0) + CS << ","; + printConstant(C, CS); + } + CS << "]"; + OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); + } } MCInst TmpInst; diff --git a/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.cpp b/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.cpp index dd21e2b7c4a13..8fdf10617059a 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.cpp @@ -2,39 +2,31 @@ // // The LLVM Compiler Infrastructure // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This file contains the X86 implementation of the DAG scheduling mutation to -// pair instructions back to back. +/// \file This file contains the X86 implementation of the DAG scheduling +/// mutation to pair instructions back to back. // //===----------------------------------------------------------------------===// #include "X86MacroFusion.h" #include "X86Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" - -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); +#include "llvm/CodeGen/MacroFusion.h" using namespace llvm; -static cl::opt EnableMacroFusion("x86-misched-fusion", cl::Hidden, - cl::desc("Enable scheduling for macro fusion."), cl::init(true)); - -namespace { - -/// \brief Verify that the instruction pair, First and Second, -/// should be scheduled back to back. If either instruction is unspecified, -/// then verify that the other instruction may be part of a pair at all. -static bool shouldScheduleAdjacent(const X86Subtarget &ST, - const MachineInstr *First, - const MachineInstr *Second) { +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const X86Subtarget &ST = static_cast(TSI); // Check if this processor supports macro-fusion. Since this is a minor // heuristic, we haven't specifically reserved a feature. hasAVX is a decent // proxy for SandyBridge+. @@ -47,13 +39,10 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST, FuseInc } FuseKind; - assert((First || Second) && "At least one instr must be specified"); - unsigned FirstOpcode = First - ? First->getOpcode() + unsigned FirstOpcode = FirstMI + ? FirstMI->getOpcode() : static_cast(X86::INSTRUCTION_LIST_END); - unsigned SecondOpcode = Second - ? Second->getOpcode() - : static_cast(X86::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); switch (SecondOpcode) { default: @@ -203,69 +192,11 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST, } } -/// \brief Post-process the DAG to create cluster edges between instructions -/// that may be fused by the processor into a single operation. -class X86MacroFusion : public ScheduleDAGMutation { -public: - X86MacroFusion() {} - - void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast(DAGInstrs); - const X86Subtarget &ST = DAG->MF.getSubtarget(); - - // For now, assume targets can only fuse with the branch. - SUnit &ExitSU = DAG->ExitSU; - MachineInstr *Branch = ExitSU.getInstr(); - if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch)) - return; - - for (SDep &PredDep : ExitSU.Preds) { - if (PredDep.isWeak()) - continue; - SUnit &SU = *PredDep.getSUnit(); - MachineInstr &Pred = *SU.getInstr(); - if (!shouldScheduleAdjacent(ST, &Pred, Branch)) - continue; - - // Create a single weak edge from SU to ExitSU. The only effect is to cause - // bottom-up scheduling to heavily prioritize the clustered SU. There is no - // need to copy predecessor edges from ExitSU to SU, since top-down - // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling - // of SU, we could create an artificial edge from the deepest root, but it - // hasn't been needed yet. - bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); - (void)Success; - assert(Success && "No DAG nodes should be reachable from ExitSU"); - - // Adjust latency of data deps between the nodes. - for (SDep &PredDep : ExitSU.Preds) - if (PredDep.getSUnit() == &SU) - PredDep.setLatency(0); - for (SDep &SuccDep : SU.Succs) - if (SuccDep.getSUnit() == &ExitSU) - SuccDep.setLatency(0); - - ++NumFused; - DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; - SU.print(dbgs(), DAG); - dbgs() << " - ExitSU" - << " / " << DAG->TII->getName(Pred.getOpcode()) << " - " - << DAG->TII->getName(Branch->getOpcode()) << '\n';); - - break; - } -} - -} // end namespace - namespace llvm { std::unique_ptr createX86MacroFusionDAGMutation () { - return EnableMacroFusion ? make_unique() : nullptr; + return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent); } } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.h b/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.h index e630f802e8e63..13fa2d78a0185 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.h +++ b/interpreter/llvm/src/lib/Target/X86/X86MacroFusion.h @@ -2,23 +2,18 @@ // // The LLVM Compiler Infrastructure // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This file contains the X86 definition of the DAG scheduling mutation to pair -// instructions back to back. +/// \file This file contains the X86 definition of the DAG scheduling mutation +/// to pair instructions back to back. // //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h" #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// X86MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// - namespace llvm { /// Note that you have to add: diff --git a/interpreter/llvm/src/lib/Target/X86/X86OptimizeLEAs.cpp b/interpreter/llvm/src/lib/Target/X86/X86OptimizeLEAs.cpp index aabbf67a16b62..e6756b975c10c 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86OptimizeLEAs.cpp @@ -27,8 +27,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.cpp b/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.cpp index 7e4cba1c8345f..343da2573b55c 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.cpp @@ -224,7 +224,7 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, const TargetRegisterClass * X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { const Function *F = MF.getFunction(); - if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) + if (IsWin64 || (F && F->getCallingConv() == CallingConv::Win64)) return &X86::GR64_TCW64RegClass; else if (Is64Bit) return &X86::GR64_TCRegClass; @@ -334,7 +334,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Is64Bit) return CSR_64_MostRegs_SaveList; break; - case CallingConv::X86_64_Win64: + case CallingConv::Win64: if (!HasSSE) return CSR_Win64_NoSSE_SaveList; return CSR_Win64_SaveList; @@ -450,7 +450,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (Is64Bit) return CSR_64_MostRegs_RegMask; break; - case CallingConv::X86_64_Win64: + case CallingConv::Win64: return CSR_Win64_RegMask; case CallingConv::X86_64_SysV: return CSR_64_RegMask; diff --git a/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.td b/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.td index d235d2b40b15a..3a61a7247c72c 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.td +++ b/interpreter/llvm/src/lib/Target/X86/X86RegisterInfo.td @@ -511,7 +511,7 @@ def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 31)>; // Mask registers -def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;} +def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;} def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;} def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;} def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;} @@ -519,7 +519,7 @@ def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;} def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;} def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;} -def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;} +def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;} def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;} def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;} def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;} diff --git a/interpreter/llvm/src/lib/Target/X86/X86SchedHaswell.td b/interpreter/llvm/src/lib/Target/X86/X86SchedHaswell.td index 677e82459766d..03c8ccb53afeb 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86SchedHaswell.td +++ b/interpreter/llvm/src/lib/Target/X86/X86SchedHaswell.td @@ -1488,6 +1488,39 @@ def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; //-- Arithmetic instructions --// +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} +// v <- v,m. +def : WriteRes { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; +} + // PHADD|PHSUB (S) W/D. // v <- v,v. def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { diff --git a/interpreter/llvm/src/lib/Target/X86/X86SchedSandyBridge.td b/interpreter/llvm/src/lib/Target/X86/X86SchedSandyBridge.td index eca65c2892b7e..b8ec5883152c3 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86SchedSandyBridge.td +++ b/interpreter/llvm/src/lib/Target/X86/X86SchedSandyBridge.td @@ -157,6 +157,31 @@ def : WriteRes { let ResourceCycles = [1, 1, 1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { diff --git a/interpreter/llvm/src/lib/Target/X86/X86Schedule.td b/interpreter/llvm/src/lib/Target/X86/X86Schedule.td index 4eae6ca7abe33..d831a7974359a 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86Schedule.td +++ b/interpreter/llvm/src/lib/Target/X86/X86Schedule.td @@ -77,6 +77,10 @@ defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } +// Horizontal Add/Sub (float and integer) +defm WriteFHAdd : X86SchedWritePair; +defm WritePHAdd : X86SchedWritePair; + // Vector integer operations. defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. @@ -659,5 +663,6 @@ include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" include "X86ScheduleSLM.td" +include "X86ScheduleZnver1.td" include "X86ScheduleBtVer2.td" diff --git a/interpreter/llvm/src/lib/Target/X86/X86ScheduleBtVer2.td b/interpreter/llvm/src/lib/Target/X86/X86ScheduleBtVer2.td index ce1ece34e431a..9dcc968a1a7af 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86ScheduleBtVer2.td +++ b/interpreter/llvm/src/lib/Target/X86/X86ScheduleBtVer2.td @@ -319,6 +319,38 @@ def : WriteRes { let ResourceCycles = [1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { + let Latency = 3; +} + +def : WriteRes { + let Latency = 8; +} + +def : WriteRes { + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1]; +} + +def WriteFHAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>; + +def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>; + //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. //////////////////////////////////////////////////////////////////////////////// @@ -337,5 +369,98 @@ def : WriteRes { let Latency = 100; } def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// SSE4A instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteEXTRQ: SchedWriteRes<[JFPU01]> { + let Latency = 1; + let ResourceCycles = [1]; +} +def : InstRW<[WriteEXTRQ], (instregex "EXTRQ")>; + +def WriteINSERTQ: SchedWriteRes<[JFPU01]> { + let Latency = 2; + let ResourceCycles = [4]; +} +def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>; + +//////////////////////////////////////////////////////////////////////////////// +// AVX instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteFAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFAddY], (instregex "VADD(SUB)?P(S|D)Yrr", "VSUBP(S|D)Yrr")>; + +def WriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFAddYLd, ReadAfterLd], (instregex "VADD(SUB)?P(S|D)Yrm", "VSUBP(S|D)Yrm")>; + +def WriteFDivY: SchedWriteRes<[JFPU1]> { + let Latency = 38; + let ResourceCycles = [38]; +} +def : InstRW<[WriteFDivY], (instregex "VDIVP(D|S)Yrr")>; + +def WriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 43; + let ResourceCycles = [1, 38]; +} +def : InstRW<[WriteFDivYLd, ReadAfterLd], (instregex "VDIVP(S|D)Yrm")>; + +def WriteVMULYPD: SchedWriteRes<[JFPU1]> { + let Latency = 4; + let ResourceCycles = [4]; +} +def : InstRW<[WriteVMULYPD], (instregex "VMULPDYrr")>; + +def WriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 9; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVMULYPDLd, ReadAfterLd], (instregex "VMULPDYrm")>; + +def WriteVMULYPS: SchedWriteRes<[JFPU1]> { + let Latency = 2; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVMULYPS], (instregex "VMULPSYrr", "VRCPPSYr", "VRSQRTPSYr")>; + +def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 7; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; + +def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { + let Latency = 54; + let ResourceCycles = [54]; +} +def : InstRW<[WriteVSQRTYPD], (instregex "VSQRTPDYr")>; + +def WriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 59; + let ResourceCycles = [1, 54]; +} +def : InstRW<[WriteVSQRTYPDLd], (instregex "VSQRTPDYm")>; + +def WriteVSQRTYPS: SchedWriteRes<[JFPU1]> { + let Latency = 42; + let ResourceCycles = [42]; +} +def : InstRW<[WriteVSQRTYPS], (instregex "VSQRTPSYr")>; + +def WriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 47; + let ResourceCycles = [1, 42]; +} +def : InstRW<[WriteVSQRTYPSLd], (instregex "VSQRTPSYm")>; + } // SchedModel diff --git a/interpreter/llvm/src/lib/Target/X86/X86ScheduleSLM.td b/interpreter/llvm/src/lib/Target/X86/X86ScheduleSLM.td index f95d4fa041774..03ed2db2350dd 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86ScheduleSLM.td +++ b/interpreter/llvm/src/lib/Target/X86/X86ScheduleSLM.td @@ -137,6 +137,33 @@ defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD + +def : WriteRes { + let Latency = 3; + let ResourceCycles = [2]; +} + +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// PHADD|PHSUB (S) W/D. +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { diff --git a/interpreter/llvm/src/lib/Target/X86/X86ScheduleZnver1.td b/interpreter/llvm/src/lib/Target/X86/X86ScheduleZnver1.td new file mode 100644 index 0000000000000..d5b4cfe2ddee0 --- /dev/null +++ b/interpreter/llvm/src/lib/Target/X86/X86ScheduleZnver1.td @@ -0,0 +1,223 @@ +//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Znver1 to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def Znver1Model : SchedMachineModel { + // Zen can decode 4 instructions per cycle. + let IssueWidth = 4; + // Based on the reorder buffer we define MicroOpBufferSize + let MicroOpBufferSize = 192; + let LoadLatency = 4; + let MispredictPenalty = 17; + let HighLatency = 25; + let PostRAScheduler = 1; + + // FIXME: This variable is required for incomplete model. + // We haven't catered all instructions. + // So, we reset the value of this variable so as to + // say that the model is incomplete. + let CompleteModel = 0; +} + +let SchedModel = Znver1Model in { + +// Zen can issue micro-ops to 10 different units in one cycle. +// These are +// * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3) +// * Two AGU units (ZAGU0, ZAGU1) +// * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3) +// AGUs feed load store queues @two loads and 1 store per cycle. + +// Four ALU units are defined below +def ZnALU0 : ProcResource<1>; +def ZnALU1 : ProcResource<1>; +def ZnALU2 : ProcResource<1>; +def ZnALU3 : ProcResource<1>; + +// Two AGU units are defined below +def ZnAGU0 : ProcResource<1>; +def ZnAGU1 : ProcResource<1>; + +// Four FPU units are defined below +def ZnFPU0 : ProcResource<1>; +def ZnFPU1 : ProcResource<1>; +def ZnFPU2 : ProcResource<1>; +def ZnFPU3 : ProcResource<1>; + +// FPU grouping +def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>; +def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>; +def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>; +def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>; +def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>; +def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>; +def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>; +def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>; + +// Below are the grouping of the units. +// Micro-ops to be issued to multiple units are tackled this way. + +// ALU grouping +// ZnALU03 - 0,3 grouping +def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>; + +// 56 Entry (14x4 entries) Int Scheduler +def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> { + let BufferSize=56; +} + +// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations +// but are relevant for some instructions +def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> { + let BufferSize=28; +} + +// Integer Multiplication issued on ALU1. +def ZnMultiplier : ProcResource<1>; + +// Integer division issued on ALU2. +def ZnDivider : ProcResource<1>; + +// 4 Cycles load-to use Latency is captured +def : ReadAdvance; + +// (a folded load is an instruction that loads and does some operation) +// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops. +// a. load and +// b. addpd +// This multiclass is for folded loads for integer units. +multiclass ZnWriteResPair { + // Register variant takes 1-cycle on Execution Port. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on ZnAGU + // adds 4 cycles to the latency. + def : WriteRes { + let Latency = !add(Lat, 4); + } +} + +// This multiclass is for folded loads for floating point units. +multiclass ZnWriteResFpuPair { + // Register variant takes 1-cycle on Execution Port. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on ZnAGU + // adds 7 cycles to the latency. + def : WriteRes { + let Latency = !add(Lat, 7); + } +} + +// WriteRMW is set for instructions with Memory write +// operation in codegen +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes { let Latency = 8; } + +def : WriteRes; +def : WriteRes; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; + +// IDIV +def : WriteRes { + let Latency = 41; + let ResourceCycles = [1, 41]; +} + +def : WriteRes { + let Latency = 45; + let ResourceCycles = [1, 4, 41]; +} + +// IMUL +def : WriteRes{ + let Latency = 4; +} +def : WriteRes { + let Latency = 4; +} + +def : WriteRes { + let Latency = 8; +} + +// Floating point operations +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; + +// Vector integer operations which uses FPU units +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; + +// Vector Shift Operations +defm : ZnWriteResFpuPair; + +// AES Instructions. +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; + +def : WriteRes; +def : WriteRes; + +// Following instructions with latency=100 are microcoded. +// We set long latency so as to block the entire pipeline. +defm : ZnWriteResFpuPair; + +//Microcoded Instructions +let Latency = 100 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + } +} diff --git a/interpreter/llvm/src/lib/Target/X86/X86SelectionDAGInfo.cpp b/interpreter/llvm/src/lib/Target/X86/X86SelectionDAGInfo.cpp index d4b2392eb1f52..c67aa04aebeab 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h" +#include "X86SelectionDAGInfo.h" #include "X86ISelLowering.h" +#include "X86InstrInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" -#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Target/TargetLowering.h" diff --git a/interpreter/llvm/src/lib/Target/X86/X86Subtarget.cpp b/interpreter/llvm/src/lib/Target/X86/X86Subtarget.cpp index d66d39dcee174..24845beac22d1 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86Subtarget.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86Subtarget.cpp @@ -11,10 +11,23 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" +#include "X86.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "X86CallLowering.h" +#include "X86LegalizerInfo.h" +#include "X86RegisterBankInfo.h" +#endif #include "X86Subtarget.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif #include "llvm/IR/Attributes.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" @@ -286,6 +299,7 @@ void X86Subtarget::initializeEnvironment() { HasCDI = false; HasPFI = false; HasDQI = false; + HasVPOPCNTDQ = false; HasBWI = false; HasVLX = false; HasADX = false; @@ -320,6 +334,7 @@ void X86Subtarget::initializeEnvironment() { CallRegIndirect = false; LEAUsesAG = false; SlowLEA = false; + Slow3OpsLEA = false; SlowIncDec = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? @@ -334,10 +349,38 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct X86GISelActualAccessor : public GISelAccessor { + std::unique_ptr CallLoweringInfo; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + std::unique_ptr InstSelector; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, - unsigned StackAlignOverride, bool OptForSize, - bool OptForMinSize) + unsigned StackAlignOverride) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride), @@ -347,8 +390,7 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, In16BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() == Triple::CODE16), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering(*this, getStackAlignment()), OptForSize(OptForSize), - OptForMinSize(OptForMinSize) { + FrameLowering(*this, getStackAlignment()) { // Determine the PICStyle based on the target selected. if (!isPositionIndependent()) setPICStyle(PICStyles::None); @@ -360,6 +402,19 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, setPICStyle(PICStyles::StubPIC); else if (isTargetELF()) setPICStyle(PICStyles::GOT); +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + X86GISelActualAccessor *GISel = new X86GISelActualAccessor(); + + GISel->CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new X86LegalizerInfo(*this, TM)); + + auto *RBI = new X86RegisterBankInfo(*getRegisterInfo()); + GISel->RegBankInfo.reset(RBI); + GISel->InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI)); +#endif + setGISelAccessor(*GISel); } const CallLowering *X86Subtarget::getCallLowering() const { diff --git a/interpreter/llvm/src/lib/Target/X86/X86Subtarget.h b/interpreter/llvm/src/lib/Target/X86/X86Subtarget.h index de1514243aebc..427a0001bef98 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86Subtarget.h +++ b/interpreter/llvm/src/lib/Target/X86/X86Subtarget.h @@ -58,7 +58,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { }; enum X86ProcFamilyEnum { - Others, IntelAtom, IntelSLM + Others, IntelAtom, IntelSLM, IntelGLM }; /// X86 processor family: Intel Atom, and others @@ -253,6 +253,11 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// True if the LEA instruction with certain arguments is slow bool SlowLEA; + /// True if the LEA instruction has all three source operands: base, index, + /// and offset or if the LEA instruction uses base and index registers where + /// the base is EBP, RBP,or R13 + bool Slow3OpsLEA; + /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; @@ -265,6 +270,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; + /// Processor has AVX-512 population count Instructions + bool HasVPOPCNTDQ; + /// Processor has AVX-512 Doubleword and Quadword instructions bool HasDQI; @@ -331,16 +339,12 @@ class X86Subtarget final : public X86GenSubtargetInfo { X86TargetLowering TLInfo; X86FrameLowering FrameLowering; - bool OptForSize; - bool OptForMinSize; - public: /// This constructor initializes the data members to match that /// of the specified triple. /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const X86TargetMachine &TM, unsigned StackAlignOverride, - bool OptForSize, bool OptForMinSize); + const X86TargetMachine &TM, unsigned StackAlignOverride); /// This object will take onwership of \p GISelAccessor. void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); } @@ -490,8 +494,10 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } + bool slow3OpsLEA() const { return Slow3OpsLEA; } bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } + bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } @@ -507,9 +513,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } - bool getOptForSize() const { return OptForSize; } - bool getOptForMinSize() const { return OptForMinSize; } - /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for /// no-sse2). There isn't any reason to disable it if the target processor /// supports it. @@ -594,7 +597,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { case CallingConv::Intel_OCL_BI: return isTargetWin64(); // This convention allows using the Win64 convention on other targets. - case CallingConv::X86_64_Win64: + case CallingConv::Win64: return true; // This convention allows using the SysV convention on Windows targets. case CallingConv::X86_64_SysV: diff --git a/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.cpp b/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.cpp index 9a82e6e504634..08c2cdaefe71d 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.cpp @@ -15,23 +15,19 @@ #include "X86.h" #include "X86CallLowering.h" #include "X86LegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "X86RegisterBankInfo.h" -#endif #include "X86MacroFusion.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "X86TargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDepsFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -61,6 +57,7 @@ static cl::opt EnableMachineCombinerPass("x86-machine-combiner", namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); +void initializeFixupLEAPassPass(PassRegistry &); void initializeX86ExecutionDepsFixPass(PassRegistry &); } // end namespace llvm @@ -75,6 +72,7 @@ extern "C" void LLVMInitializeX86Target() { initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); initializeEvexToVexInstPassPass(PR); + initializeFixupLEAPassPass(PR); initializeX86ExecutionDepsFixPass(PR); } @@ -89,6 +87,8 @@ static std::unique_ptr createTLOF(const Triple &TT) { return llvm::make_unique(); if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU()) return llvm::make_unique(); + if (TT.isOSSolaris()) + return llvm::make_unique(); if (TT.isOSFuchsia()) return llvm::make_unique(); if (TT.isOSBinFormatELF()) @@ -208,35 +208,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, X86TargetMachine::~X86TargetMachine() = default; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct X86GISelActualAccessor : public GISelAccessor { - std::unique_ptr CallLoweringInfo; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - std::unique_ptr InstSelector; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - const X86Subtarget * X86TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -268,12 +239,6 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { FS = Key.substr(CPU.size()); - bool OptForSize = F.optForSize(); - bool OptForMinSize = F.optForMinSize(); - - Key += std::string(OptForSize ? "+" : "-") + "optforsize"; - Key += std::string(OptForMinSize ? "+" : "-") + "optforminsize"; - auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -281,22 +246,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, CPU, FS, *this, - Options.StackAlignmentOverride, - OptForSize, OptForMinSize); -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - X86GISelActualAccessor *GISel = new X86GISelActualAccessor(); - - GISel->CallLoweringInfo.reset(new X86CallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new X86LegalizerInfo(*I, *this)); - - auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); - GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector( - *this, *I, *RBI)); -#endif - I->setGISelAccessor(*GISel); + Options.StackAlignmentOverride); } return I.get(); } @@ -328,7 +278,7 @@ namespace { /// X86 Code Generator Pass Configuration Options. class X86PassConfig : public TargetPassConfig { public: - X86PassConfig(X86TargetMachine *TM, PassManagerBase &PM) + X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} X86TargetMachine &getX86TargetMachine() const { @@ -374,16 +324,16 @@ INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix", "X86 Execution Dependency Fix", false, false) TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { - return new X86PassConfig(this, PM); + return new X86PassConfig(*this, PM); } void X86PassConfig::addIRPasses() { - addPass(createAtomicExpandPass(&getX86TargetMachine())); + addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createInterleavedAccessPass(TM)); + addPass(createInterleavedAccessPass()); } bool X86PassConfig::addInstSelector() { @@ -425,6 +375,7 @@ bool X86PassConfig::addILPOpts() { addPass(&EarlyIfConverterID); if (EnableMachineCombinerPass) addPass(&MachineCombinerID); + addPass(createX86CmovConverterPass()); return true; } @@ -438,6 +389,7 @@ bool X86PassConfig::addPreISel() { void X86PassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); addPass(createX86OptimizeLEAs()); addPass(createX86CallFrameOptimization()); diff --git a/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.h b/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.h index cf933f52604ef..c16207973b393 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.h +++ b/interpreter/llvm/src/lib/Target/X86/X86TargetMachine.h @@ -40,6 +40,9 @@ class X86TargetMachine final : public LLVMTargetMachine { ~X86TargetMachine() override; const X86Subtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some targets, is + // deprecated and should not be used. + const X86Subtarget *getSubtargetImpl() const = delete; TargetIRAnalysis getTargetIRAnalysis() override; @@ -49,6 +52,10 @@ class X86TargetMachine final : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isMachineVerifierClean() const override { + return false; + } }; } // end namespace llvm diff --git a/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.cpp index 7f70829cb6c60..8627c06d44313 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.cpp @@ -9,6 +9,8 @@ #include "X86TargetObjectFile.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCContext.h" @@ -16,8 +18,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; @@ -86,6 +86,12 @@ X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(TM.Options.UseInitArray); } +void X86SolarisTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + const MCExpr *X86WindowsTargetObjectFile::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { diff --git a/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.h b/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.h index 39d2e84e5ed77..f6aa570b6332a 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.h +++ b/interpreter/llvm/src/lib/Target/X86/X86TargetObjectFile.h @@ -66,6 +66,11 @@ namespace llvm { void Initialize(MCContext &Ctx, const TargetMachine &TM) override; }; + /// \brief This implementation is used for Solaris on x86/x86-64. + class X86SolarisTargetObjectFile : public X86ELFTargetObjectFile { + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + }; + /// \brief This implementation is used for Windows targets on x86 and x86-64. class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF { const MCExpr * diff --git a/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.cpp b/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.cpp index f3b619a2956a0..c9924f264939d 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.cpp @@ -142,10 +142,15 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::FDIV, MVT::v2f64, 69 }, // divpd { ISD::FADD, MVT::v2f64, 2 }, // addpd { ISD::FSUB, MVT::v2f64, 2 }, // subpd - // v2i64/v4i64 mul is custom lowered as a series of long - // multiplies(3), shifts(3) and adds(2). - // slm muldq version throughput is 2 - { ISD::MUL, MVT::v2i64, 11 }, + // v2i64/v4i64 mul is custom lowered as a series of long: + // multiplies(3), shifts(3) and adds(2) + // slm muldq version throughput is 2 and addq throughput 4 + // thus: 3X2 (muldq throughput) + 3X1 (shift throuput) + + // 3X4 (addq throughput) = 17 + { ISD::MUL, MVT::v2i64, 17 }, + // slm addq\subq throughput is 4 + { ISD::ADD, MVT::v2i64, 4 }, + { ISD::SUB, MVT::v2i64, 4 }, }; if (ST->isSLM()) { @@ -247,35 +252,38 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry SSE2UniformConstCostTable[] = { - { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. - { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. - { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. - - { ISD::SHL, MVT::v32i8, 4 }, // 2*(psllw + pand). - { ISD::SRL, MVT::v32i8, 4 }, // 2*(psrlw + pand). - { ISD::SRA, MVT::v32i8, 8 }, // 2*(psrlw, pand, pxor, psubb). - - { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence - { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence - { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence - { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence - { ISD::SDIV, MVT::v8i32, 38 }, // pmuludq sequence - { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence - { ISD::UDIV, MVT::v8i32, 30 }, // pmuludq sequence - { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence + { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split. + { ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split. + { ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split. + + { ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split. + { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence + { ISD::UDIV, MVT::v16i16, 12+2 }, // 2*pmulhuw sequence + split. + { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence + { ISD::SDIV, MVT::v8i32, 38+2 }, // 2*pmuludq sequence + split. + { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence + { ISD::UDIV, MVT::v8i32, 30+2 }, // 2*pmuludq sequence + split. + { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && ST->hasSSE2()) { // pmuldq sequence. if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) - return LT.first * 30; + return LT.first * 32; if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) return LT.first * 15; - if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD, - LT.second)) - return LT.first * Entry->Cost; + // XOP has faster vXi8 shifts. + if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) || + !ST->hasXOP()) + if (const auto *Entry = + CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; } static const CostTblEntry AVX2UniformCostTable[] = { @@ -430,18 +438,18 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRL, MVT::v2i64, 2 }, { ISD::SRA, MVT::v2i64, 2 }, // 256bit shifts require splitting if AVX2 didn't catch them above. - { ISD::SHL, MVT::v32i8, 2 }, - { ISD::SRL, MVT::v32i8, 4 }, - { ISD::SRA, MVT::v32i8, 4 }, - { ISD::SHL, MVT::v16i16, 2 }, - { ISD::SRL, MVT::v16i16, 4 }, - { ISD::SRA, MVT::v16i16, 4 }, - { ISD::SHL, MVT::v8i32, 2 }, - { ISD::SRL, MVT::v8i32, 4 }, - { ISD::SRA, MVT::v8i32, 4 }, - { ISD::SHL, MVT::v4i64, 2 }, - { ISD::SRL, MVT::v4i64, 4 }, - { ISD::SRA, MVT::v4i64, 4 }, + { ISD::SHL, MVT::v32i8, 2+2 }, + { ISD::SRL, MVT::v32i8, 4+2 }, + { ISD::SRA, MVT::v32i8, 4+2 }, + { ISD::SHL, MVT::v16i16, 2+2 }, + { ISD::SRL, MVT::v16i16, 4+2 }, + { ISD::SRA, MVT::v16i16, 4+2 }, + { ISD::SHL, MVT::v8i32, 2+2 }, + { ISD::SRL, MVT::v8i32, 4+2 }, + { ISD::SRA, MVT::v8i32, 4+2 }, + { ISD::SHL, MVT::v4i64, 2+2 }, + { ISD::SRL, MVT::v4i64, 4+2 }, + { ISD::SRA, MVT::v4i64, 4+2 }, }; // Look for XOP lowering tricks. @@ -451,23 +459,28 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE2UniformShiftCostTable[] = { // Uniform splats are cheaper for the following instructions. - { ISD::SHL, MVT::v16i16, 2 }, // psllw. - { ISD::SHL, MVT::v8i32, 2 }, // pslld - { ISD::SHL, MVT::v4i64, 2 }, // psllq. - - { ISD::SRL, MVT::v16i16, 2 }, // psrlw. - { ISD::SRL, MVT::v8i32, 2 }, // psrld. - { ISD::SRL, MVT::v4i64, 2 }, // psrlq. - - { ISD::SRA, MVT::v16i16, 2 }, // psraw. - { ISD::SRA, MVT::v8i32, 2 }, // psrad. - { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. - { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle. + { ISD::SHL, MVT::v16i16, 2+2 }, // 2*psllw + split. + { ISD::SHL, MVT::v8i32, 2+2 }, // 2*pslld + split. + { ISD::SHL, MVT::v4i64, 2+2 }, // 2*psllq + split. + + { ISD::SRL, MVT::v16i16, 2+2 }, // 2*psrlw + split. + { ISD::SRL, MVT::v8i32, 2+2 }, // 2*psrld + split. + { ISD::SRL, MVT::v4i64, 2+2 }, // 2*psrlq + split. + + { ISD::SRA, MVT::v16i16, 2+2 }, // 2*psraw + split. + { ISD::SRA, MVT::v8i32, 2+2 }, // 2*psrad + split. + { ISD::SRA, MVT::v2i64, 4 }, // 2*psrad + shuffle. + { ISD::SRA, MVT::v4i64, 8+2 }, // 2*(2*psrad + shuffle) + split. }; if (ST->hasSSE2() && ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || (Op2Info == TargetTransformInfo::OK_UniformValue))) { + + // Handle AVX2 uniform v4i64 ISD::SRA, it's not worth a table. + if (ISD == ISD::SRA && LT.second == MVT::v4i64 && ST->hasAVX2()) + return LT.first * 4; // 2*psrad + shuffle. + if (const auto *Entry = CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; @@ -581,28 +594,28 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; static const CostTblEntry SSE41CostTable[] = { - { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. - { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence. - { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld - { ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld - - { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. - { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence. - { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRL, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. - { ISD::SRL, MVT::v8i32, 2*11 }, // Shift each lane + blend. - - { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. - { ISD::SRA, MVT::v32i8, 2*24 }, // pblendvb sequence. - { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. - { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend. - - { ISD::MUL, MVT::v4i32, 1 } // pmulld + { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. + { ISD::SHL, MVT::v32i8, 2*11+2 }, // pblendvb sequence + split. + { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SHL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld + { ISD::SHL, MVT::v8i32, 2*4+2 }, // pslld/paddd/cvttps2dq/pmulld + split + + { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. + { ISD::SRL, MVT::v32i8, 2*12+2 }, // pblendvb sequence + split. + { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. + { ISD::SRL, MVT::v8i32, 2*11+2 }, // Shift each lane + blend + split. + + { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. + { ISD::SRA, MVT::v32i8, 2*24+2 }, // pblendvb sequence + split. + { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRA, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. + { ISD::SRA, MVT::v8i32, 2*12+2 }, // Shift each lane + blend + split. + + { ISD::MUL, MVT::v4i32, 1 } // pmulld }; if (ST->hasSSE41()) @@ -612,33 +625,33 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE2CostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. - { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. - { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. - - { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. - - { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. - { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. - { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. - - { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. - { ISD::MUL, MVT::v8i16, 1 }, // pmullw - { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle - { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add - - { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ + { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. + { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SHL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. + + { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SRL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. + + { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. + { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v4i64, 2*12+2 }, // srl/xor/sub sequence+split. + + { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v8i16, 1 }, // pmullw + { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle + { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add + + { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ // It is not a good idea to vectorize division. We have to scalarize it and // in the process we will often end up having to spilling regular @@ -1375,6 +1388,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } + int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) { @@ -1384,6 +1399,48 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, // CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll // CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll // CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll + static const CostTblEntry AVX512CDCostTbl[] = { + { ISD::CTLZ, MVT::v8i64, 1 }, + { ISD::CTLZ, MVT::v16i32, 1 }, + { ISD::CTLZ, MVT::v32i16, 8 }, + { ISD::CTLZ, MVT::v64i8, 20 }, + { ISD::CTLZ, MVT::v4i64, 1 }, + { ISD::CTLZ, MVT::v8i32, 1 }, + { ISD::CTLZ, MVT::v16i16, 4 }, + { ISD::CTLZ, MVT::v32i8, 10 }, + { ISD::CTLZ, MVT::v2i64, 1 }, + { ISD::CTLZ, MVT::v4i32, 1 }, + { ISD::CTLZ, MVT::v8i16, 4 }, + { ISD::CTLZ, MVT::v16i8, 4 }, + }; + static const CostTblEntry AVX512BWCostTbl[] = { + { ISD::BITREVERSE, MVT::v8i64, 5 }, + { ISD::BITREVERSE, MVT::v16i32, 5 }, + { ISD::BITREVERSE, MVT::v32i16, 5 }, + { ISD::BITREVERSE, MVT::v64i8, 5 }, + { ISD::CTLZ, MVT::v8i64, 23 }, + { ISD::CTLZ, MVT::v16i32, 22 }, + { ISD::CTLZ, MVT::v32i16, 18 }, + { ISD::CTLZ, MVT::v64i8, 17 }, + { ISD::CTPOP, MVT::v8i64, 7 }, + { ISD::CTPOP, MVT::v16i32, 11 }, + { ISD::CTPOP, MVT::v32i16, 9 }, + { ISD::CTPOP, MVT::v64i8, 6 }, + { ISD::CTTZ, MVT::v8i64, 10 }, + { ISD::CTTZ, MVT::v16i32, 14 }, + { ISD::CTTZ, MVT::v32i16, 12 }, + { ISD::CTTZ, MVT::v64i8, 9 }, + }; + static const CostTblEntry AVX512CostTbl[] = { + { ISD::BITREVERSE, MVT::v8i64, 36 }, + { ISD::BITREVERSE, MVT::v16i32, 24 }, + { ISD::CTLZ, MVT::v8i64, 29 }, + { ISD::CTLZ, MVT::v16i32, 35 }, + { ISD::CTPOP, MVT::v8i64, 16 }, + { ISD::CTPOP, MVT::v16i32, 24 }, + { ISD::CTTZ, MVT::v8i64, 20 }, + { ISD::CTTZ, MVT::v16i32, 28 }, + }; static const CostTblEntry XOPCostTbl[] = { { ISD::BITREVERSE, MVT::v4i64, 4 }, { ISD::BITREVERSE, MVT::v8i32, 4 }, @@ -1542,6 +1599,18 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, MVT MTy = LT.second; // Attempt to lookup cost. + if (ST->hasCDI()) + if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasBWI()) + if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasAVX512()) + if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + if (ST->hasXOP()) if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) return LT.first * Entry->Cost; @@ -2168,6 +2237,12 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, return (CallerBits & CalleeBits) == CalleeBits; } +bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { + // TODO: We can increase these based on available vector ops. + MaxLoadSize = ST->is64Bit() ? 8 : 4; + return true; +} + bool X86TTIImpl::enableInterleavedAccessVectorization() { // TODO: We expect this to be beneficial regardless of arch, // but there are currently some unexplained performance artifacts on Atom. @@ -2175,6 +2250,114 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() { return !(ST->isAtom()); } +// Get estimation for interleaved load/store operations for AVX2. +// \p Factor is the interleaved-access factor (stride) - number of +// (interleaved) elements in the group. +// \p Indices contains the indices for a strided load: when the +// interleaved load has gaps they indicate which elements are used. +// If Indices is empty (or if the number of indices is equal to the size +// of the interleaved-access as given in \p Factor) the access has no gaps. +// +// As opposed to AVX-512, AVX2 does not have generic shuffles that allow +// computing the cost using a generic formula as a function of generic +// shuffles. We therefore use a lookup table instead, filled according to +// the instruction sequences that codegen currently generates. +int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace) { + + // We currently Support only fully-interleaved groups, with no gaps. + // TODO: Support also strided loads (interleaved-groups with gaps). + if (Indices.size() && Indices.size() != Factor) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + + // VecTy for interleave memop is . + // So, for VF=4, Interleave Factor = 3, Element type = i32 we have + // VecTy = <12 x i32>. + MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; + + // This function can be called with VecTy=<6xi128>, Factor=3, in which case + // the VF=2, while v2i128 is an unsupported MVT vector type + // (see MachineValueType.h::getVectorVT()). + if (!LegalVT.isVector()) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + + unsigned VF = VecTy->getVectorNumElements() / Factor; + Type *ScalarTy = VecTy->getVectorElementType(); + + // Calculate the number of memory operations (NumOfMemOps), required + // for load/store the VecTy. + unsigned VecTySize = DL.getTypeStoreSize(VecTy); + unsigned LegalVTSize = LegalVT.getStoreSize(); + unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; + + // Get the cost of one memory operation. + Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), + LegalVT.getVectorNumElements()); + unsigned MemOpCost = + getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace); + + VectorType *VT = VectorType::get(ScalarTy, VF); + EVT ETy = TLI->getValueType(DL, VT); + if (!ETy.isSimple()) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + + // TODO: Complete for other data-types and strides. + // Each combination of Stride, ElementTy and VF results in a different + // sequence; The cost tables are therefore accessed with: + // Factor (stride) and VectorType=VFxElemType. + // The Cost accounts only for the shuffle sequence; + // The cost of the loads/stores is accounted for separately. + // + static const CostTblEntry AVX2InterleavedLoadTbl[] = { + { 3, MVT::v2i8, 10 }, //(load 6i8 and) deinterleave into 3 x 2i8 + { 3, MVT::v4i8, 4 }, //(load 12i8 and) deinterleave into 3 x 4i8 + { 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8 + { 3, MVT::v16i8, 18}, //(load 48i8 and) deinterleave into 3 x 16i8 + { 3, MVT::v32i8, 42 }, //(load 96i8 and) deinterleave into 3 x 32i8 + + { 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8 + { 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8 + { 4, MVT::v8i8, 20 }, //(load 32i8 and) deinterleave into 4 x 8i8 + { 4, MVT::v16i8, 39 }, //(load 64i8 and) deinterleave into 4 x 16i8 + { 4, MVT::v32i8, 80 } //(load 128i8 and) deinterleave into 4 x 32i8 + }; + + static const CostTblEntry AVX2InterleavedStoreTbl[] = { + { 3, MVT::v2i8, 7 }, //interleave 3 x 2i8 into 6i8 (and store) + { 3, MVT::v4i8, 8 }, //interleave 3 x 4i8 into 12i8 (and store) + { 3, MVT::v8i8, 11 }, //interleave 3 x 8i8 into 24i8 (and store) + { 3, MVT::v16i8, 17 }, //interleave 3 x 16i8 into 48i8 (and store) + { 3, MVT::v32i8, 32 }, //interleave 3 x 32i8 into 96i8 (and store) + + { 4, MVT::v2i8, 12 }, //interleave 4 x 2i8 into 8i8 (and store) + { 4, MVT::v4i8, 9 }, //interleave 4 x 4i8 into 16i8 (and store) + { 4, MVT::v8i8, 16 }, //interleave 4 x 8i8 into 32i8 (and store) + { 4, MVT::v16i8, 20 }, //interleave 4 x 16i8 into 64i8 (and store) + { 4, MVT::v32i8, 40 } //interleave 4 x 32i8 into 128i8 (and store) + }; + + if (Opcode == Instruction::Load) { + if (const auto *Entry = + CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT())) + return NumOfMemOps * MemOpCost + Entry->Cost; + } else { + assert(Opcode == Instruction::Store && + "Expected Store Instruction at this point"); + if (const auto *Entry = + CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT())) + return NumOfMemOps * MemOpCost + Entry->Cost; + } + + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} + // Get estimation for interleaved load/store operations and strided load. // \p Indices contains indices for strided load. // \p Factor - the factor of interleaving. @@ -2283,6 +2466,10 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (ST->hasAVX512() && HasAVX512Solution && (!RequiresBW || ST->hasBWI())) return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); + if (ST->hasAVX2()) + return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); } diff --git a/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.h b/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.h index 9bef9e80c395c..ad0a0a2113012 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.h +++ b/interpreter/llvm/src/lib/Target/X86/X86TargetTransformInfo.h @@ -76,6 +76,8 @@ class X86TTIImpl : public BasicTTIImplBase { int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); + unsigned getAtomicMemIntrinsicMaxElementSize() const; + int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX); @@ -91,6 +93,9 @@ class X86TTIImpl : public BasicTTIImplBase { int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace); + int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, + unsigned Factor, ArrayRef Indices, + unsigned Alignment, unsigned AddressSpace); int getIntImmCost(int64_t); @@ -105,7 +110,7 @@ class X86TTIImpl : public BasicTTIImplBase { bool isLegalMaskedScatter(Type *DataType); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); private: int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask, diff --git a/interpreter/llvm/src/lib/Target/X86/X86WinEHState.cpp b/interpreter/llvm/src/lib/Target/X86/X86WinEHState.cpp index 500b26b3be176..0c3b343414760 100644 --- a/interpreter/llvm/src/lib/Target/X86/X86WinEHState.cpp +++ b/interpreter/llvm/src/lib/Target/X86/X86WinEHState.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -398,7 +398,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { /*isVarArg=*/false); Function *Trampoline = Function::Create(TrampolineTy, GlobalValue::InternalLinkage, - Twine("__ehhandler$") + GlobalValue::getRealLinkageName( + Twine("__ehhandler$") + GlobalValue::dropLLVMManglingEscape( ParentFunc->getName()), TheModule); BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline); diff --git a/interpreter/llvm/src/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/interpreter/llvm/src/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 5fc58d8313194..dd27e7ca30aa1 100644 --- a/interpreter/llvm/src/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/interpreter/llvm/src/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "InstPrinter/XCoreInstPrinter.h" #include "MCTargetDesc/XCoreMCAsmInfo.h" -#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCoreTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDwarf.h" @@ -23,8 +23,8 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Target/XCore/XCoreAsmPrinter.cpp b/interpreter/llvm/src/lib/Target/XCore/XCoreAsmPrinter.cpp index b35aa0b95821a..8f7c8a82380a0 100644 --- a/interpreter/llvm/src/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/interpreter/llvm/src/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "XCore.h" #include "InstPrinter/XCoreInstPrinter.h" +#include "XCore.h" #include "XCoreInstrInfo.h" #include "XCoreMCInstLower.h" #include "XCoreSubtarget.h" diff --git a/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.cpp b/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.cpp index e28e05c7f6a82..cb23399995dad 100644 --- a/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "XCoreTargetMachine.h" #include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCore.h" -#include "XCoreTargetMachine.h" #include "XCoreTargetObjectFile.h" #include "XCoreTargetTransformInfo.h" #include "llvm/ADT/Optional.h" @@ -54,7 +54,7 @@ namespace { /// XCore Code Generator Pass Configuration Options. class XCorePassConfig : public TargetPassConfig { public: - XCorePassConfig(XCoreTargetMachine *TM, PassManagerBase &PM) + XCorePassConfig(XCoreTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} XCoreTargetMachine &getXCoreTargetMachine() const { @@ -70,11 +70,11 @@ class XCorePassConfig : public TargetPassConfig { } // end anonymous namespace TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { - return new XCorePassConfig(this, PM); + return new XCorePassConfig(*this, PM); } void XCorePassConfig::addIRPasses() { - addPass(createAtomicExpandPass(&getXCoreTargetMachine())); + addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); } diff --git a/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.h b/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.h index 2b53f01a996de..a047b3c9d9fc3 100644 --- a/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.h +++ b/interpreter/llvm/src/lib/Target/XCore/XCoreTargetMachine.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H #include "XCoreSubtarget.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include diff --git a/interpreter/llvm/src/lib/Target/XCore/XCoreTargetObjectFile.cpp b/interpreter/llvm/src/lib/Target/XCore/XCoreTargetObjectFile.cpp index ad8693fd325e5..c60a262e719cb 100644 --- a/interpreter/llvm/src/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/interpreter/llvm/src/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -9,10 +9,10 @@ #include "XCoreTargetObjectFile.h" #include "XCoreSubtarget.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Testing/CMakeLists.txt b/interpreter/llvm/src/lib/Testing/CMakeLists.txt new file mode 100644 index 0000000000000..fc23e64eeb7a4 --- /dev/null +++ b/interpreter/llvm/src/lib/Testing/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Support) diff --git a/interpreter/llvm/src/lib/Testing/LLVMBuild.txt b/interpreter/llvm/src/lib/Testing/LLVMBuild.txt new file mode 100644 index 0000000000000..cdf83736298e4 --- /dev/null +++ b/interpreter/llvm/src/lib/Testing/LLVMBuild.txt @@ -0,0 +1,19 @@ +;===- ./lib/Testing/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = Support diff --git a/interpreter/llvm/src/lib/Testing/Support/CMakeLists.txt b/interpreter/llvm/src/lib/Testing/Support/CMakeLists.txt new file mode 100644 index 0000000000000..fa8dfe59c8bdb --- /dev/null +++ b/interpreter/llvm/src/lib/Testing/Support/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(LLVMTestingSupport + Error.cpp + + BUILDTREE_ONLY + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Testing/Support + ) + +include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) +include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include) +target_link_libraries(LLVMTestingSupport PRIVATE gtest) \ No newline at end of file diff --git a/interpreter/llvm/src/lib/Testing/Support/Error.cpp b/interpreter/llvm/src/lib/Testing/Support/Error.cpp new file mode 100644 index 0000000000000..ce0da44da408b --- /dev/null +++ b/interpreter/llvm/src/lib/Testing/Support/Error.cpp @@ -0,0 +1,22 @@ +//===- llvm/Testing/Support/Error.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Testing/Support/Error.h" + +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +llvm::detail::ErrorHolder llvm::detail::TakeError(llvm::Error Err) { + bool Succeeded = !static_cast(Err); + std::string Message; + if (!Succeeded) + Message = toString(std::move(Err)); + return {Succeeded, Message}; +} diff --git a/interpreter/llvm/src/lib/Testing/Support/LLVMBuild.txt b/interpreter/llvm/src/lib/Testing/Support/LLVMBuild.txt new file mode 100644 index 0000000000000..173cfb4a5587e --- /dev/null +++ b/interpreter/llvm/src/lib/Testing/Support/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./Testing/Support/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = TestingSupport +parent = Libraries +required_libraries = Support +installed = 0 diff --git a/interpreter/llvm/src/lib/ToolDrivers/CMakeLists.txt b/interpreter/llvm/src/lib/ToolDrivers/CMakeLists.txt new file mode 100644 index 0000000000000..28da36bba2091 --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(llvm-dlltool) +add_subdirectory(llvm-lib) diff --git a/interpreter/llvm/src/lib/ToolDrivers/LLVMBuild.txt b/interpreter/llvm/src/lib/ToolDrivers/LLVMBuild.txt new file mode 100644 index 0000000000000..a49e04bdf3c1f --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/ToolDrivers/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = llvm-dlltool llvm-lib + +[component_0] +type = Group +name = ToolDrivers +parent = Libraries diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt new file mode 100644 index 0000000000000..52bd5cba86f43 --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(DllOptionsTableGen) + +add_llvm_library(LLVMDlltoolDriver + DlltoolDriver.cpp + ) + +add_dependencies(LLVMDlltoolDriver DllOptionsTableGen) diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp new file mode 100644 index 0000000000000..4820b9f7de586 --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -0,0 +1,183 @@ +//===- DlltoolDriver.cpp - dlltool.exe-compatible driver ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines an interface to a dlltool.exe-compatible driver. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/COFFModuleDefinition.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Path.h" + +#include +#include + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::COFF; + +namespace { + +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ + {X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \ + X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#include "Options.inc" +#undef OPTION +}; + +class DllOptTable : public llvm::opt::OptTable { +public: + DllOptTable() : OptTable(infoTable, false) {} +}; + +} // namespace + +std::vector> OwningMBs; + +// Opens a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +Optional openFile(StringRef Path) { + ErrorOr> MB = MemoryBuffer::getFile(Path); + + if (std::error_code EC = MB.getError()) { + llvm::errs() << "fail openFile: " << EC.message() << "\n"; + return None; + } + + MemoryBufferRef MBRef = MB.get()->getMemBufferRef(); + OwningMBs.push_back(std::move(MB.get())); // take ownership + return MBRef; +} + +static MachineTypes getEmulation(StringRef S) { + return StringSwitch(S) + .Case("i386", IMAGE_FILE_MACHINE_I386) + .Case("i386:x86-64", IMAGE_FILE_MACHINE_AMD64) + .Case("arm", IMAGE_FILE_MACHINE_ARMNT) + .Default(IMAGE_FILE_MACHINE_UNKNOWN); +} + +static std::string getImplibPath(std::string Path) { + SmallString<128> Out = StringRef("lib"); + Out.append(Path); + sys::path::replace_extension(Out, ".a"); + return Out.str(); +} + +int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { + DllOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + llvm::opt::InputArgList Args = + Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount); + if (MissingCount) { + llvm::errs() << Args.getArgString(MissingIndex) << ": missing argument\n"; + return 1; + } + + // Handle when no input or output is specified + if (Args.hasArgNoClaim(OPT_INPUT) || + (!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) { + Table.PrintHelp(outs(), ArgsArr[0], "dlltool", false); + llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm\n"; + return 1; + } + + if (!Args.hasArgNoClaim(OPT_m) && Args.hasArgNoClaim(OPT_d)) { + llvm::errs() << "error: no target machine specified\n" + << "supported targets: i386, i386:x86-64, arm\n"; + return 1; + } + + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + + if (!Args.hasArg(OPT_d)) { + llvm::errs() << "no definition file specified\n"; + return 1; + } + + Optional MB = openFile(Args.getLastArg(OPT_d)->getValue()); + if (!MB) + return 1; + + if (!MB->getBufferSize()) { + llvm::errs() << "definition file empty\n"; + return 1; + } + + COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; + if (auto *Arg = Args.getLastArg(OPT_m)) + Machine = getEmulation(Arg->getValue()); + + if (Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + llvm::errs() << "unknown target\n"; + return 1; + } + + Expected Def = + parseCOFFModuleDefinition(*MB, Machine, true); + + if (!Def) { + llvm::errs() << "error parsing definition\n" + << errorToErrorCode(Def.takeError()).message(); + return 1; + } + + // Do this after the parser because parseCOFFModuleDefinition sets OutputFile. + if (auto *Arg = Args.getLastArg(OPT_D)) + Def->OutputFile = Arg->getValue(); + + if (Def->OutputFile.empty()) { + llvm::errs() << "no output file specified\n"; + return 1; + } + + std::string Path = Args.getLastArgValue(OPT_l); + if (Path.empty()) + Path = getImplibPath(Def->OutputFile); + + if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) { + for (COFFShortExport& E : Def->Exports) { + if (E.isWeak() || (!E.Name.empty() && E.Name[0] == '?')) + continue; + E.SymbolName = E.Name; + // Trim off the trailing decoration. Symbols will always have a + // starting prefix here (either _ for cdecl/stdcall, @ for fastcall + // or ? for C++ functions). (Vectorcall functions also will end up having + // a prefix here, even if they shouldn't.) + E.Name = E.Name.substr(0, E.Name.find('@', 1)); + // By making sure E.SymbolName != E.Name for decorated symbols, + // writeImportLibrary writes these symbols with the type + // IMPORT_NAME_UNDECORATE. + } + } + + if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) + return 1; + return 0; +} diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt new file mode 100644 index 0000000000000..11736eb47bcb8 --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = DlltoolDriver +parent = Libraries +required_libraries = Object Option Support diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/Options.td b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/Options.td new file mode 100644 index 0000000000000..e78182ab8130b --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-dlltool/Options.td @@ -0,0 +1,26 @@ +include "llvm/Option/OptParser.td" + +def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target machine">; +def m_long : JoinedOrSeparate<["--"], "machine">, Alias; + +def l: JoinedOrSeparate<["-"], "l">, HelpText<"Generate an import lib">; +def l_long : JoinedOrSeparate<["--"], "output-lib">, Alias; + +def D: JoinedOrSeparate<["-"], "D">, HelpText<"Specify the input DLL Name">; +def D_long : JoinedOrSeparate<["--"], "dllname">, Alias; + +def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">; +def d_long : JoinedOrSeparate<["--"], "input-def">, Alias; + +def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">; +def k_alias: Flag<["--"], "kill-at">, Alias; + +//============================================================================== +// The flags below do nothing. They are defined only for dlltool compatibility. +//============================================================================== + +def S: JoinedOrSeparate<["-"], "S">, HelpText<"Assembler">; +def S_alias: JoinedOrSeparate<["--"], "as">, Alias; + +def f: JoinedOrSeparate<["-"], "f">, HelpText<"Assembler Flags">; +def f_alias: JoinedOrSeparate<["--"], "as-flags">, Alias; diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/CMakeLists.txt b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/CMakeLists.txt new file mode 100644 index 0000000000000..ab53a6843446a --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(LibOptionsTableGen) + +add_llvm_library(LLVMLibDriver + LibDriver.cpp + ) +add_dependencies(LLVMLibDriver LibOptionsTableGen) diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/LLVMBuild.txt b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/LLVMBuild.txt new file mode 100644 index 0000000000000..e4b32ec4af90d --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/LibDriver/LLVMBuild.txt ----------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = LibDriver +parent = Libraries +required_libraries = BinaryFormat Object Option Support diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/LibDriver.cpp new file mode 100644 index 0000000000000..f304b9c9a8dac --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -0,0 +1,169 @@ +//===- LibDriver.cpp - lib.exe-compatible driver --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines an interface to a lib.exe-compatible driver that also understands +// bitcode files. Used by llvm-lib and lld-link /lib. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ToolDrivers/llvm-lib/LibDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10, X11) \ + {X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, \ + X8, X7, OPT_##GROUP, OPT_##ALIAS, X6, X11}, +#include "Options.inc" +#undef OPTION +}; + +class LibOptTable : public llvm::opt::OptTable { +public: + LibOptTable() : OptTable(infoTable, true) {} +}; + +} + +static std::string getOutputPath(llvm::opt::InputArgList *Args, + const llvm::NewArchiveMember &FirstMember) { + if (auto *Arg = Args->getLastArg(OPT_out)) + return Arg->getValue(); + SmallString<128> Val = StringRef(FirstMember.Buf->getBufferIdentifier()); + llvm::sys::path::replace_extension(Val, ".lib"); + return Val.str(); +} + +static std::vector getSearchPaths(llvm::opt::InputArgList *Args, + StringSaver &Saver) { + std::vector Ret; + // Add current directory as first item of the search path. + Ret.push_back(""); + + // Add /libpath flags. + for (auto *Arg : Args->filtered(OPT_libpath)) + Ret.push_back(Arg->getValue()); + + // Add $LIB. + Optional EnvOpt = sys::Process::GetEnv("LIB"); + if (!EnvOpt.hasValue()) + return Ret; + StringRef Env = Saver.save(*EnvOpt); + while (!Env.empty()) { + StringRef Path; + std::tie(Path, Env) = Env.split(';'); + Ret.push_back(Path); + } + return Ret; +} + +static Optional findInputFile(StringRef File, + ArrayRef Paths) { + for (auto Dir : Paths) { + SmallString<128> Path = Dir; + sys::path::append(Path, File); + if (sys::fs::exists(Path)) + return Path.str().str(); + } + return Optional(); +} + +int llvm::libDriverMain(llvm::ArrayRef ArgsArr) { + SmallVector NewArgs(ArgsArr.begin(), ArgsArr.end()); + BumpPtrAllocator Alloc; + StringSaver Saver(Alloc); + cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgs); + ArgsArr = NewArgs; + + LibOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + llvm::opt::InputArgList Args = + Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount); + if (MissingCount) { + llvm::errs() << "missing arg value for \"" + << Args.getArgString(MissingIndex) << "\", expected " + << MissingCount + << (MissingCount == 1 ? " argument.\n" : " arguments.\n"); + return 1; + } + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + + if (!Args.hasArgNoClaim(OPT_INPUT)) { + // No input files. To match lib.exe, silently do nothing. + return 0; + } + + std::vector SearchPaths = getSearchPaths(&Args, Saver); + + std::vector Members; + for (auto *Arg : Args.filtered(OPT_INPUT)) { + Optional Path = findInputFile(Arg->getValue(), SearchPaths); + if (!Path.hasValue()) { + llvm::errs() << Arg->getValue() << ": no such file or directory\n"; + return 1; + } + Expected MOrErr = + NewArchiveMember::getFile(Saver.save(*Path), /*Deterministic=*/true); + if (!MOrErr) { + handleAllErrors(MOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) { + llvm::errs() << Arg->getValue() << ": " << EIB.message() << "\n"; + }); + return 1; + } + llvm::file_magic Magic = llvm::identify_magic(MOrErr->Buf->getBuffer()); + if (Magic != llvm::file_magic::coff_object && + Magic != llvm::file_magic::bitcode && + Magic != llvm::file_magic::windows_resource) { + llvm::errs() << Arg->getValue() + << ": not a COFF object, bitcode or resource file\n"; + return 1; + } + Members.emplace_back(std::move(*MOrErr)); + } + + std::pair Result = + llvm::writeArchive(getOutputPath(&Args, Members[0]), Members, + /*WriteSymtab=*/true, object::Archive::K_GNU, + /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin)); + + if (Result.second) { + if (Result.first.empty()) + Result.first = ArgsArr[0]; + llvm::errs() << Result.first << ": " << Result.second.message() << "\n"; + return 1; + } + + return 0; +} diff --git a/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/Options.td b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/Options.td new file mode 100644 index 0000000000000..5a56ef7468d4a --- /dev/null +++ b/interpreter/llvm/src/lib/ToolDrivers/llvm-lib/Options.td @@ -0,0 +1,25 @@ +include "llvm/Option/OptParser.td" + +// lib.exe accepts options starting with either a dash or a slash. + +// Flag that takes no arguments. +class F : Flag<["/", "-", "-?"], name>; + +// Flag that takes one argument after ":". +class P : + Joined<["/", "-", "-?"], name#":">, HelpText; + +def libpath: P<"libpath", "Object file search path">; +def out : P<"out", "Path to file to write output">; + +def llvmlibthin : F<"llvmlibthin">; + +//============================================================================== +// The flags below do nothing. They are defined only for lib.exe compatibility. +//============================================================================== + +class QF : Joined<["/", "-", "-?"], name#":">; + +def ignore : QF<"ignore">; +def machine: QF<"machine">; +def nologo : F<"nologo">; diff --git a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroCleanup.cpp b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroCleanup.cpp index a97db6fde454e..359876627fce1 100644 --- a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -101,7 +101,9 @@ namespace { struct CoroCleanup : FunctionPass { static char ID; // Pass identification, replacement for typeid - CoroCleanup() : FunctionPass(ID) {} + CoroCleanup() : FunctionPass(ID) { + initializeCoroCleanupPass(*PassRegistry::getPassRegistry()); + } std::unique_ptr L; @@ -124,6 +126,7 @@ struct CoroCleanup : FunctionPass { if (!L) AU.setPreservesAll(); } + StringRef getPassName() const override { return "Coroutine Cleanup"; } }; } diff --git a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroEarly.cpp b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroEarly.cpp index e8bb0ca99d8a5..ba05896af150c 100644 --- a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroEarly.cpp @@ -183,7 +183,9 @@ namespace { struct CoroEarly : public FunctionPass { static char ID; // Pass identification, replacement for typeid. - CoroEarly() : FunctionPass(ID) {} + CoroEarly() : FunctionPass(ID) { + initializeCoroEarlyPass(*PassRegistry::getPassRegistry()); + } std::unique_ptr L; @@ -208,6 +210,9 @@ struct CoroEarly : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); } + StringRef getPassName() const override { + return "Lower early coroutine intrinsics"; + } }; } diff --git a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroElide.cpp b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroElide.cpp index c6ac3f614ff7e..42fd6d7461459 100644 --- a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroElide.cpp +++ b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroElide.cpp @@ -258,7 +258,9 @@ static bool replaceDevirtTrigger(Function &F) { namespace { struct CoroElide : FunctionPass { static char ID; - CoroElide() : FunctionPass(ID) {} + CoroElide() : FunctionPass(ID) { + initializeCoroElidePass(*PassRegistry::getPassRegistry()); + } std::unique_ptr L; @@ -301,6 +303,7 @@ struct CoroElide : FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } + StringRef getPassName() const override { return "Coroutine Elision"; } }; } diff --git a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroFrame.cpp b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroFrame.cpp index 19e6789dfa74a..85e9003ec3c56 100644 --- a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroFrame.cpp @@ -177,7 +177,7 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) // consume. Note, that crossing coro.save also requires a spill, as any code // between coro.save and coro.suspend may resume the coroutine and all of the // state needs to be saved by that time. - auto markSuspendBlock = [&](IntrinsicInst* BarrierInst) { + auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) { BasicBlock *SuspendBlock = BarrierInst->getParent(); auto &B = getBlockData(SuspendBlock); B.Suspend = true; @@ -347,6 +347,27 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, return FrameTy; } +// We need to make room to insert a spill after initial PHIs, but before +// catchswitch instruction. Placing it before violates the requirement that +// catchswitch, like all other EHPads must be the first nonPHI in a block. +// +// Split away catchswitch into a separate block and insert in its place: +// +// cleanuppad cleanupret. +// +// cleanupret instruction will act as an insert point for the spill. +static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { + BasicBlock *CurrentBlock = CatchSwitch->getParent(); + BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); + CurrentBlock->getTerminator()->eraseFromParent(); + + auto *CleanupPad = + CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); + auto *CleanupRet = + CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); + return CleanupRet; +} + // Replace all alloca and SSA values that are accessed across suspend points // with GetElementPointer from coroutine frame + loads and stores. Create an // AllocaSpillBB that will become the new entry block for the resume parts of @@ -437,8 +458,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { InsertPt = NewBB->getTerminator(); } else if (dyn_cast(CurrentValue)) { // Skip the PHINodes and EH pads instructions. - InsertPt = - &*cast(E.def())->getParent()->getFirstInsertionPt(); + BasicBlock *DefBlock = cast(E.def())->getParent(); + if (auto *CSI = dyn_cast(DefBlock->getTerminator())) + InsertPt = splitBeforeCatchSwitch(CSI); + else + InsertPt = &*DefBlock->getFirstInsertionPt(); } else { // For all other values, the spill is placed immediately after // the definition. @@ -495,6 +519,78 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { return FramePtr; } +// Sets the unwind edge of an instruction to a particular successor. +static void setUnwindEdgeTo(TerminatorInst *TI, BasicBlock *Succ) { + if (auto *II = dyn_cast(TI)) + II->setUnwindDest(Succ); + else if (auto *CS = dyn_cast(TI)) + CS->setUnwindDest(Succ); + else if (auto *CR = dyn_cast(TI)) + CR->setUnwindDest(Succ); + else + llvm_unreachable("unexpected terminator instruction"); +} + +// Replaces all uses of OldPred with the NewPred block in all PHINodes in a +// block. +static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, + BasicBlock *NewPred, + PHINode *LandingPadReplacement) { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + + // We manually update the LandingPadReplacement PHINode and it is the last + // PHI Node. So, if we find it, we are done. + if (LandingPadReplacement == PN) + break; + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != OldPred) + BBIdx = PN->getBasicBlockIndex(OldPred); + + assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!"); + PN->setIncomingBlock(BBIdx, NewPred); + } +} + +// Uses SplitEdge unless the successor block is an EHPad, in which case do EH +// specific handling. +static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, + LandingPadInst *OriginalPad, + PHINode *LandingPadReplacement) { + auto *PadInst = Succ->getFirstNonPHI(); + if (!LandingPadReplacement && !PadInst->isEHPad()) + return SplitEdge(BB, Succ); + + auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ); + setUnwindEdgeTo(BB->getTerminator(), NewBB); + updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); + + if (LandingPadReplacement) { + auto *NewLP = OriginalPad->clone(); + auto *Terminator = BranchInst::Create(Succ, NewBB); + NewLP->insertBefore(Terminator); + LandingPadReplacement->addIncoming(NewLP, NewBB); + return NewBB; + } + Value *ParentPad = nullptr; + if (auto *FuncletPad = dyn_cast(PadInst)) + ParentPad = FuncletPad->getParentPad(); + else if (auto *CatchSwitch = dyn_cast(PadInst)) + ParentPad = CatchSwitch->getParentPad(); + else + llvm_unreachable("handling for other EHPads not implemented yet"); + + auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB); + CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); + return NewBB; +} + static void rewritePHIs(BasicBlock &BB) { // For every incoming edge we will create a block holding all // incoming values in a single PHI nodes. @@ -502,7 +598,7 @@ static void rewritePHIs(BasicBlock &BB) { // loop: // %n.val = phi i32[%n, %entry], [%inc, %loop] // - // It will create: + // It will create: // // loop.from.entry: // %n.loop.pre = phi i32 [%n, %entry] @@ -517,9 +613,22 @@ static void rewritePHIs(BasicBlock &BB) { // TODO: Simplify PHINodes in the basic block to remove duplicate // predecessors. + LandingPadInst *LandingPad = nullptr; + PHINode *ReplPHI = nullptr; + if ((LandingPad = dyn_cast_or_null(BB.getFirstNonPHI()))) { + // ehAwareSplitEdge will clone the LandingPad in all the edge blocks. + // We replace the original landing pad with a PHINode that will collect the + // results from all of them. + ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad); + ReplPHI->takeName(LandingPad); + LandingPad->replaceAllUsesWith(ReplPHI); + // We will erase the original landing pad at the end of this function after + // ehAwareSplitEdge cloned it in the transition blocks. + } + SmallVector Preds(pred_begin(&BB), pred_end(&BB)); for (BasicBlock *Pred : Preds) { - auto *IncomingBB = SplitEdge(Pred, &BB); + auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI); IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName()); auto *PN = cast(&BB.front()); do { @@ -531,7 +640,14 @@ static void rewritePHIs(BasicBlock &BB) { InputV->addIncoming(V, Pred); PN->setIncomingValue(Index, InputV); PN = dyn_cast(PN->getNextNode()); - } while (PN); + } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced + // the landing pad. + } + + if (LandingPad) { + // Calls to ehAwareSplitEdge function cloned the original lading pad. + // No longer need it. + LandingPad->eraseFromParent(); } } @@ -683,9 +799,9 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { splitAround(CSI, "CoroSuspend"); } - // Put fallthrough CoroEnd into its own block. Note: Shape::buildFrom places - // the fallthrough coro.end as the first element of CoroEnds array. - splitAround(Shape.CoroEnds.front(), "CoroEnd"); + // Put CoroEnds into their own blocks. + for (CoroEndInst *CE : Shape.CoroEnds) + splitAround(CE, "CoroEnd"); // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will // never has its definition separated from the PHI by the suspend point. @@ -697,19 +813,24 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { IRBuilder<> Builder(F.getContext()); SpillInfo Spills; - // See if there are materializable instructions across suspend points. - for (Instruction &I : instructions(F)) - if (materializable(I)) - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) - Spills.emplace_back(&I, U); - - // Rewrite materializable instructions to be materialized at the use point. - DEBUG(dump("Materializations", Spills)); - rewriteMaterializableInstructions(Builder, Spills); + for (int Repeat = 0; Repeat < 4; ++Repeat) { + // See if there are materializable instructions across suspend points. + for (Instruction &I : instructions(F)) + if (materializable(I)) + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) + Spills.emplace_back(&I, U); + + if (Spills.empty()) + break; + + // Rewrite materializable instructions to be materialized at the use point. + DEBUG(dump("Materializations", Spills)); + rewriteMaterializableInstructions(Builder, Spills); + Spills.clear(); + } // Collect the spills for arguments and other not-materializable values. - Spills.clear(); for (Argument &A : F.args()) for (User *U : A.users()) if (Checker.isDefinitionAcrossSuspend(A, U)) @@ -731,8 +852,6 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { if (I.getType()->isTokenTy()) report_fatal_error( "token definition is separated from the use by a suspend point"); - assert(!materializable(I) && - "rewriteMaterializable did not do its job"); Spills.emplace_back(&I, U); } } diff --git a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroInstr.h b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroInstr.h index 5c666bdfea1f6..9a8cc5a2591c9 100644 --- a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroInstr.h +++ b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroInstr.h @@ -58,10 +58,10 @@ class LLVM_LIBRARY_VISIBILITY CoroSubFnInst : public IntrinsicInst { } // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_subfn_addr; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -70,10 +70,10 @@ class LLVM_LIBRARY_VISIBILITY CoroSubFnInst : public IntrinsicInst { class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst { public: // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_alloc; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -175,10 +175,10 @@ class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst { } // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_id; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -187,10 +187,10 @@ class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst { class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { public: // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_frame; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -203,10 +203,10 @@ class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst { Value *getFrame() const { return getArgOperand(FrameArg); } // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_free; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -221,10 +221,10 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { Value *getMem() const { return getArgOperand(MemArg); } // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_begin; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -233,10 +233,10 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { class LLVM_LIBRARY_VISIBILITY CoroSaveInst : public IntrinsicInst { public: // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_save; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -254,10 +254,10 @@ class LLVM_LIBRARY_VISIBILITY CoroPromiseInst : public IntrinsicInst { } // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_promise; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -279,10 +279,10 @@ class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public IntrinsicInst { } // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_suspend; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -291,10 +291,10 @@ class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public IntrinsicInst { class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst { public: // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_size; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; @@ -310,10 +310,10 @@ class LLVM_LIBRARY_VISIBILITY CoroEndInst : public IntrinsicInst { } // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { + static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_end; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; diff --git a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroSplit.cpp b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroSplit.cpp index 12eb167898259..173dc05f05847 100644 --- a/interpreter/llvm/src/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/interpreter/llvm/src/lib/Transforms/Coroutines/CoroSplit.cpp @@ -22,8 +22,8 @@ #include "CoroInternal.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Scalar.h" @@ -228,14 +228,6 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, SmallVector Returns; - if (DISubprogram *SP = F.getSubprogram()) { - // If we have debug info, add mapping for the metadata nodes that should not - // be cloned by CloneFunctionInfo. - auto &MD = VMap.MD(); - MD[SP->getUnit()].reset(SP->getUnit()); - MD[SP->getType()].reset(SP->getType()); - MD[SP->getFile()].reset(SP->getFile()); - } CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); // Remove old returns. @@ -509,12 +501,87 @@ static void simplifySuspendPoints(coro::Shape &Shape) { S.resize(N); } +static SmallPtrSet getCoroBeginPredBlocks(CoroBeginInst *CB) { + // Collect all blocks that we need to look for instructions to relocate. + SmallPtrSet RelocBlocks; + SmallVector Work; + Work.push_back(CB->getParent()); + + do { + BasicBlock *Current = Work.pop_back_val(); + for (BasicBlock *BB : predecessors(Current)) + if (RelocBlocks.count(BB) == 0) { + RelocBlocks.insert(BB); + Work.push_back(BB); + } + } while (!Work.empty()); + return RelocBlocks; +} + +static SmallPtrSet +getNotRelocatableInstructions(CoroBeginInst *CoroBegin, + SmallPtrSetImpl &RelocBlocks) { + SmallPtrSet DoNotRelocate; + // Collect all instructions that we should not relocate + SmallVector Work; + + // Start with CoroBegin and terminators of all preceding blocks. + Work.push_back(CoroBegin); + BasicBlock *CoroBeginBB = CoroBegin->getParent(); + for (BasicBlock *BB : RelocBlocks) + if (BB != CoroBeginBB) + Work.push_back(BB->getTerminator()); + + // For every instruction in the Work list, place its operands in DoNotRelocate + // set. + do { + Instruction *Current = Work.pop_back_val(); + DoNotRelocate.insert(Current); + for (Value *U : Current->operands()) { + auto *I = dyn_cast(U); + if (!I) + continue; + if (isa(U)) + continue; + if (DoNotRelocate.count(I) == 0) { + Work.push_back(I); + DoNotRelocate.insert(I); + } + } + } while (!Work.empty()); + return DoNotRelocate; +} + +static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) { + // Analyze which non-alloca instructions are needed for allocation and + // relocate the rest to after coro.begin. We need to do it, since some of the + // targets of those instructions may be placed into coroutine frame memory + // for which becomes available after coro.begin intrinsic. + + auto BlockSet = getCoroBeginPredBlocks(CoroBegin); + auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet); + + Instruction *InsertPt = CoroBegin->getNextNode(); + BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well. + for (auto B = BB.begin(), E = BB.end(); B != E;) { + Instruction &I = *B++; + if (isa(&I)) + continue; + if (&I == CoroBegin) + break; + if (DoNotRelocateSet.count(&I)) + continue; + I.moveBefore(InsertPt); + } +} + static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { coro::Shape Shape(F); if (!Shape.CoroBegin) return; simplifySuspendPoints(Shape); + relocateInstructionBefore(Shape.CoroBegin, F); buildCoroutineFrame(F, Shape); replaceFrameSize(Shape); @@ -614,7 +681,9 @@ namespace { struct CoroSplit : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - CoroSplit() : CallGraphSCCPass(ID) {} + CoroSplit() : CallGraphSCCPass(ID) { + initializeCoroSplitPass(*PassRegistry::getPassRegistry()); + } bool Run = false; @@ -660,6 +729,7 @@ struct CoroSplit : public CallGraphSCCPass { void getAnalysisUsage(AnalysisUsage &AU) const override { CallGraphSCCPass::getAnalysisUsage(AU); } + StringRef getPassName() const override { return "Coroutine Splitting"; } }; } diff --git a/interpreter/llvm/src/lib/Transforms/Coroutines/Coroutines.cpp b/interpreter/llvm/src/lib/Transforms/Coroutines/Coroutines.cpp index ea48043f9381f..44e1f9b404eda 100644 --- a/interpreter/llvm/src/lib/Transforms/Coroutines/Coroutines.cpp +++ b/interpreter/llvm/src/lib/Transforms/Coroutines/Coroutines.cpp @@ -218,6 +218,8 @@ void coro::Shape::buildFrom(Function &F) { size_t FinalSuspendIndex = 0; clear(*this); SmallVector CoroFrames; + SmallVector UnusedCoroSaves; + for (Instruction &I : instructions(F)) { if (auto II = dyn_cast(&I)) { switch (II->getIntrinsicID()) { @@ -229,6 +231,12 @@ void coro::Shape::buildFrom(Function &F) { case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break; + case Intrinsic::coro_save: + // After optimizations, coro_suspends using this coro_save might have + // been removed, remember orphaned coro_saves to remove them later. + if (II->use_empty()) + UnusedCoroSaves.push_back(cast(II)); + break; case Intrinsic::coro_suspend: CoroSuspends.push_back(cast(II)); if (CoroSuspends.back()->isFinal()) { @@ -311,4 +319,8 @@ void coro::Shape::buildFrom(Function &F) { if (HasFinalSuspend && FinalSuspendIndex != CoroSuspends.size() - 1) std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); + + // Remove orphaned coro.saves. + for (CoroSaveInst *CoroSave : UnusedCoroSaves) + CoroSave->eraseFromParent(); } diff --git a/interpreter/llvm/src/lib/Transforms/IPO/ArgumentPromotion.cpp b/interpreter/llvm/src/lib/Transforms/IPO/ArgumentPromotion.cpp index d8cf8d3f5da21..72bae203ee94b 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -124,6 +124,10 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; + + // There may be remaining metadata uses of the argument for things like + // llvm.dbg.value. Replace them with undef. + I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads @@ -352,7 +356,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // Just add all the struct element types. Type *AgTy = cast(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, - "", InsertPt); + I->getParamAlignment(), "", InsertPt); StructType *STy = cast(AgTy); Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; diff --git a/interpreter/llvm/src/lib/Transforms/IPO/CrossDSOCFI.cpp b/interpreter/llvm/src/lib/Transforms/IPO/CrossDSOCFI.cpp index 1b111de061576..d94aa5da85601 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -95,6 +95,17 @@ void CrossDSOCFI::buildCFICheck(Module &M) { } } + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto Func : CfiFunctionsMD->operands()) { + assert(Func->getNumOperands() >= 2); + for (unsigned I = 2; I < Func->getNumOperands(); ++I) + if (ConstantInt *TypeId = + extractNumericTypeId(cast(Func->getOperand(I).get()))) + TypeIds.insert(TypeId->getZExtValue()); + } + } + LLVMContext &Ctx = M.getContext(); Constant *C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), diff --git a/interpreter/llvm/src/lib/Transforms/IPO/ElimAvailExtern.cpp b/interpreter/llvm/src/lib/Transforms/IPO/ElimAvailExtern.cpp index 98c4b1740306d..ecff88c88dcba 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Pass.h" using namespace llvm; #define DEBUG_TYPE "elim-avail-extern" diff --git a/interpreter/llvm/src/lib/Transforms/IPO/ExtractGV.cpp b/interpreter/llvm/src/lib/Transforms/IPO/ExtractGV.cpp index 479fd182598a7..d1147f7d844b5 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/ExtractGV.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/ExtractGV.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include using namespace llvm; @@ -53,18 +53,18 @@ static void makeVisible(GlobalValue &GV, bool Delete) { } namespace { - /// @brief A pass to extract specific functions and their dependencies. + /// @brief A pass to extract specific global values and their dependencies. class GVExtractorPass : public ModulePass { SetVector Named; bool deleteStuff; public: static char ID; // Pass identification, replacement for typeid - /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the - /// specified function. Otherwise, it deletes as much of the module as - /// possible, except for the function specified. - /// - explicit GVExtractorPass(std::vector& GVs, bool deleteS = true) + /// If deleteS is true, this pass deletes the specified global values. + /// Otherwise, it deletes as much of the module as possible, except for the + /// global values specified. + explicit GVExtractorPass(std::vector &GVs, + bool deleteS = true) : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} bool runOnModule(Module &M) override { diff --git a/interpreter/llvm/src/lib/Transforms/IPO/FunctionAttrs.cpp b/interpreter/llvm/src/lib/Transforms/IPO/FunctionAttrs.cpp index 28cc81c76d4fb..813a4b6e28319 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/FunctionAttrs.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/FunctionAttrs.h" -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -34,7 +33,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; #define DEBUG_TYPE "functionattrs" @@ -1188,6 +1187,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { SCCNodes.insert(F); } + // Skip it if the SCC only contains optnone functions. + if (SCCNodes.empty()) + return Changed; + Changed |= addArgumentReturnedAttrs(SCCNodes); Changed |= addReadAttrs(SCCNodes, AARGetter); Changed |= addArgumentAttrs(SCCNodes); diff --git a/interpreter/llvm/src/lib/Transforms/IPO/FunctionImport.cpp b/interpreter/llvm/src/lib/Transforms/IPO/FunctionImport.cpp index 231487923fada..233a36d2bc543 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/FunctionImport.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/FunctionImport.cpp @@ -64,6 +64,12 @@ static cl::opt ImportHotMultiplier( "import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"), cl::desc("Multiply the `import-instr-limit` threshold for hot callsites")); +static cl::opt ImportCriticalMultiplier( + "import-critical-multiplier", cl::init(100.0), cl::Hidden, + cl::value_desc("x"), + cl::desc( + "Multiply the `import-instr-limit` threshold for critical callsites")); + // FIXME: This multiplier was not really tuned up. static cl::opt ImportColdMultiplier( "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"), @@ -207,6 +213,8 @@ static void computeImportForFunction( return ImportHotMultiplier; if (Hotness == CalleeInfo::HotnessType::Cold) return ImportColdMultiplier; + if (Hotness == CalleeInfo::HotnessType::Critical) + return ImportCriticalMultiplier; return 1.0; }; @@ -292,8 +300,7 @@ static void computeImportForFunction( static void ComputeImportForModule( const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList, - StringMap *ExportLists = nullptr, - const DenseSet *DeadSymbols = nullptr) { + StringMap *ExportLists = nullptr) { // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector Worklist; @@ -301,7 +308,7 @@ static void ComputeImportForModule( // Populate the worklist with the import for the functions in the current // module for (auto &GVSummary : DefinedGVSummaries) { - if (DeadSymbols && DeadSymbols->count(GVSummary.first)) { + if (!Index.isGlobalValueLive(GVSummary.second)) { DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n"); continue; } @@ -344,15 +351,14 @@ void llvm::ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap &ModuleToDefinedGVSummaries, StringMap &ImportLists, - StringMap &ExportLists, - const DenseSet *DeadSymbols) { + StringMap &ExportLists) { // For each module that has function defined, compute the import/export lists. for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { auto &ImportList = ImportLists[DefinedGVSummaries.first()]; DEBUG(dbgs() << "Computing import for Module '" << DefinedGVSummaries.first() << "'\n"); ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList, - &ExportLists, DeadSymbols); + &ExportLists); } // When computing imports we added all GUIDs referenced by anything @@ -414,82 +420,71 @@ void llvm::ComputeCrossModuleImportForModule( #endif } -DenseSet llvm::computeDeadSymbols( - const ModuleSummaryIndex &Index, +void llvm::computeDeadSymbols( + ModuleSummaryIndex &Index, const DenseSet &GUIDPreservedSymbols) { + assert(!Index.withGlobalValueDeadStripping()); if (!ComputeDead) - return DenseSet(); + return; if (GUIDPreservedSymbols.empty()) // Don't do anything when nothing is live, this is friendly with tests. - return DenseSet(); - DenseSet LiveSymbols; + return; + unsigned LiveSymbols = 0; SmallVector Worklist; Worklist.reserve(GUIDPreservedSymbols.size() * 2); for (auto GUID : GUIDPreservedSymbols) { ValueInfo VI = Index.getValueInfo(GUID); if (!VI) continue; - DEBUG(dbgs() << "Live root: " << VI.getGUID() << "\n"); - LiveSymbols.insert(VI); - Worklist.push_back(VI); + for (auto &S : VI.getSummaryList()) + S->setLive(true); } + // Add values flagged in the index as live roots to the worklist. - for (const auto &Entry : Index) { - bool IsLiveRoot = llvm::any_of( - Entry.second.SummaryList, - [&](const std::unique_ptr &Summary) { - return Summary->liveRoot(); - }); - if (!IsLiveRoot) - continue; - DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n"); - Worklist.push_back(ValueInfo(&Entry)); - } + for (const auto &Entry : Index) + for (auto &S : Entry.second.SummaryList) + if (S->isLive()) { + DEBUG(dbgs() << "Live root: " << Entry.first << "\n"); + Worklist.push_back(ValueInfo(&Entry)); + ++LiveSymbols; + break; + } + + // Make value live and add it to the worklist if it was not live before. + // FIXME: we should only make the prevailing copy live here + auto visit = [&](ValueInfo VI) { + for (auto &S : VI.getSummaryList()) + if (S->isLive()) + return; + for (auto &S : VI.getSummaryList()) + S->setLive(true); + ++LiveSymbols; + Worklist.push_back(VI); + }; while (!Worklist.empty()) { auto VI = Worklist.pop_back_val(); - - // FIXME: we should only make the prevailing copy live here for (auto &Summary : VI.getSummaryList()) { - for (auto Ref : Summary->refs()) { - if (LiveSymbols.insert(Ref).second) { - DEBUG(dbgs() << "Marking live (ref): " << Ref.getGUID() << "\n"); - Worklist.push_back(Ref); - } - } - if (auto *FS = dyn_cast(Summary.get())) { - for (auto Call : FS->calls()) { - if (LiveSymbols.insert(Call.first).second) { - DEBUG(dbgs() << "Marking live (call): " << Call.first.getGUID() - << "\n"); - Worklist.push_back(Call.first); - } - } - } + for (auto Ref : Summary->refs()) + visit(Ref); + if (auto *FS = dyn_cast(Summary.get())) + for (auto Call : FS->calls()) + visit(Call.first); if (auto *AS = dyn_cast(Summary.get())) { auto AliaseeGUID = AS->getAliasee().getOriginalName(); ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID); - if (AliaseeVI && LiveSymbols.insert(AliaseeVI).second) { - DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n"); - Worklist.push_back(AliaseeVI); - } + if (AliaseeVI) + visit(AliaseeVI); } } } - DenseSet DeadSymbols; - DeadSymbols.reserve( - std::min(Index.size(), Index.size() - LiveSymbols.size())); - for (auto &Entry : Index) { - if (!LiveSymbols.count(ValueInfo(&Entry))) { - DEBUG(dbgs() << "Marking dead: " << Entry.first << "\n"); - DeadSymbols.insert(Entry.first); - } - } - DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and " - << DeadSymbols.size() << " symbols Dead \n"); - NumDeadSymbols += DeadSymbols.size(); - NumLiveSymbols += LiveSymbols.size(); - return DeadSymbols; + Index.setWithGlobalValueDeadStripping(); + + unsigned DeadSymbols = Index.size() - LiveSymbols; + DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols + << " symbols Dead \n"); + NumDeadSymbols += DeadSymbols; + NumLiveSymbols += LiveSymbols; } /// Compute the set of summaries needed for a ThinLTO backend compilation of @@ -550,8 +545,6 @@ void llvm::thinLTOResolveWeakForLinkerModule( }; auto updateLinkage = [&](GlobalValue &GV) { - if (!GlobalValue::isWeakForLinker(GV.getLinkage())) - return; // See if the global summary analysis computed a new resolved linkage. const auto &GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) @@ -559,6 +552,21 @@ void llvm::thinLTOResolveWeakForLinkerModule( auto NewLinkage = GS->second->linkage(); if (NewLinkage == GV.getLinkage()) return; + + // Switch the linkage to weakany if asked for, e.g. we do this for + // linker redefined symbols (via --wrap or --defsym). + // We record that the visibility should be changed here in `addThinLTO` + // as we need access to the resolution vectors for each input file in + // order to find which symbols have been redefined. + // We may consider reorganizing this code and moving the linkage recording + // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex. + if (NewLinkage == GlobalValue::WeakAnyLinkage) { + GV.setLinkage(NewLinkage); + return; + } + + if (!GlobalValue::isWeakForLinker(GV.getLinkage())) + return; // Check for a non-prevailing def that has interposable linkage // (e.g. non-odr weak or linkonce). In that case we can't simply // convert to available_externally, since it would lose the diff --git a/interpreter/llvm/src/lib/Transforms/IPO/GlobalOpt.cpp b/interpreter/llvm/src/lib/Transforms/IPO/GlobalOpt.cpp index f277a51ae659a..93eab680ca6be 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/GlobalOpt.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/GlobalOpt.cpp @@ -837,7 +837,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, if (StoreInst *SI = dyn_cast(GV->user_back())) { // The global is initialized when the store to it occurs. new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, - SI->getOrdering(), SI->getSynchScope(), SI); + SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -854,7 +854,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0, - LI->getOrdering(), LI->getSynchScope(), + LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction*)ICI : LI); InitBoolUsed = true; switch (ICI->getPredicate()) { @@ -1605,7 +1605,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0, - LI->getOrdering(), LI->getSynchScope(), LI); + LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa(StoredVal) || isa(StoredVal)) && "This is not a form that we understand!"); @@ -1614,12 +1614,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } new StoreInst(StoreVal, NewGV, false, 0, - SI->getOrdering(), SI->getSynchScope(), SI); + SI->getOrdering(), SI->getSyncScopeID(), SI); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast(UI); LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0, - LI->getOrdering(), LI->getSynchScope(), LI); + LI->getOrdering(), LI->getSyncScopeID(), LI); Value *NSI; if (IsOneZero) NSI = new ZExtInst(NLI, LI->getType(), "", LI); @@ -2026,6 +2026,24 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, continue; } + // LLVM's definition of dominance allows instructions that are cyclic + // in unreachable blocks, e.g.: + // %pat = select i1 %condition, @global, i16* %pat + // because any instruction dominates an instruction in a block that's + // not reachable from entry. + // So, remove unreachable blocks from the function, because a) there's + // no point in analyzing them and b) GlobalOpt should otherwise grow + // some more complicated logic to break these cycles. + // Removing unreachable blocks might invalidate the dominator so we + // recalculate it. + if (!F->isDeclaration()) { + if (removeUnreachableBlocks(*F)) { + auto &DT = LookupDomTree(*F); + DT.recalculate(*F); + Changed = true; + } + } + Changed |= processGlobal(*F, TLI, LookupDomTree); if (!F->hasLocalLinkage()) diff --git a/interpreter/llvm/src/lib/Transforms/IPO/GlobalSplit.cpp b/interpreter/llvm/src/lib/Transforms/IPO/GlobalSplit.cpp index 4705ebe265ae1..e47d881d1127a 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/GlobalSplit.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/GlobalSplit.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/GlobalSplit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Constants.h" @@ -23,6 +22,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include diff --git a/interpreter/llvm/src/lib/Transforms/IPO/IPConstantPropagation.cpp b/interpreter/llvm/src/lib/Transforms/IPO/IPConstantPropagation.cpp index 349807496dc2c..f79b61037f1db 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" @@ -24,6 +23,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; #define DEBUG_TYPE "ipconstprop" diff --git a/interpreter/llvm/src/lib/Transforms/IPO/IPO.cpp b/interpreter/llvm/src/lib/Transforms/IPO/IPO.cpp index 89518f3c5faec..5bb305ca84d03 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/IPO.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/IPO.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Initialization.h" #include "llvm-c/Transforms/IPO.h" -#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" diff --git a/interpreter/llvm/src/lib/Transforms/IPO/InferFunctionAttrs.cpp b/interpreter/llvm/src/lib/Transforms/IPO/InferFunctionAttrs.cpp index 2ef299d9a2f0e..15d7515cc8425 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/InferFunctionAttrs.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" diff --git a/interpreter/llvm/src/lib/Transforms/IPO/Inliner.cpp b/interpreter/llvm/src/lib/Transforms/IPO/Inliner.cpp index 673d3af0ab524..317770d133b3d 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/Inliner.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/Inliner.cpp @@ -519,44 +519,51 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); - // If this call site is dead and it is to a readonly function, we should - // just delete the call instead of trying to inline it, regardless of - // size. This happens because IPSCCP propagates the result out of the - // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { - DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() - << "\n"); - // Update the call graph by deleting the edge from Callee to Caller. - CG[Caller]->removeCallEdgeFor(CS); - CS.getInstruction()->eraseFromParent(); - ++NumCallsDeleted; - } else { - // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) - continue; + // We can only inline direct calls to non-declarations. + if (!Callee || Callee->isDeclaration()) + continue; + + Instruction *Instr = CS.getInstruction(); + bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); + + int InlineHistoryID; + if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. - int InlineHistoryID = CallSites[CSi].second; + InlineHistoryID = CallSites[CSi].second; if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; + } + + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); + // If the policy determines that we should inline this function, + // delete the call instead. + if (!shouldInline(CS, GetInlineCost, ORE)) + continue; + + // If this call site is dead and it is to a readonly function, we should + // just delete the call instead of trying to inline it, regardless of + // size. This happens because IPSCCP propagates the result out of the + // call and then we're left with the dead call. + if (IsTriviallyDead) { + DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n"); + // Update the call graph by deleting the edge from Callee to Caller. + CG[Caller]->removeCallEdgeFor(CS); + Instr->eraseFromParent(); + ++NumCallsDeleted; + } else { // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + DebugLoc DLoc = Instr->getDebugLoc(); BasicBlock *Block = CS.getParent(); - // FIXME for new PM: because of the old PM we currently generate ORE and - // in turn BFI on demand. With the new PM, the ORE dependency should - // just become a regular analysis dependency. - OptimizationRemarkEmitter ORE(Caller); - - // If the policy determines that we should inline this function, - // try to do so. - if (!shouldInline(CS, GetInlineCost, ORE)) - continue; // Attempt to inline the function. using namespace ore; @@ -902,7 +909,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // To check this we also need to nuke any dead constant uses (perhaps // made dead by this operation on other functions). Callee.removeDeadConstantUsers(); - if (Callee.use_empty()) { + if (Callee.use_empty() && !CG.isLibFunction(Callee)) { Calls.erase( std::remove_if(Calls.begin() + i + 1, Calls.end(), [&Callee](const std::pair &Call) { @@ -982,5 +989,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // And delete the actual function from the module. M.getFunctionList().erase(DeadF); } - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + + if (!Changed) + return PreservedAnalyses::all(); + + // Even if we change the IR, we update the core CGSCC data structures and so + // can preserve the proxy to the function analysis manager. + PreservedAnalyses PA; + PA.preserve(); + return PA; } diff --git a/interpreter/llvm/src/lib/Transforms/IPO/LoopExtractor.cpp b/interpreter/llvm/src/lib/Transforms/IPO/LoopExtractor.cpp index f898c3b5a9358..c74b0a35e296d 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/LoopExtractor.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/LoopExtractor.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Dominators.h" @@ -22,6 +21,7 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" diff --git a/interpreter/llvm/src/lib/Transforms/IPO/LowerTypeTests.cpp b/interpreter/llvm/src/lib/Transforms/IPO/LowerTypeTests.cpp index ca4ee92f971a1..693df5e7ba925 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/LowerTypeTests.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -206,17 +207,26 @@ struct ByteArrayInfo { class GlobalTypeMember final : TrailingObjects { GlobalObject *GO; size_t NTypes; + // For functions: true if this is a definition (either in the merged module or + // in one of the thinlto modules). + bool IsDefinition; + // For functions: true if this function is either defined or used in a thinlto + // module and its jumptable entry needs to be exported to thinlto backends. + bool IsExported; friend TrailingObjects; size_t numTrailingObjects(OverloadToken) const { return NTypes; } public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, + bool IsDefinition, bool IsExported, ArrayRef Types) { auto *GTM = static_cast(Alloc.Allocate( totalSizeToAlloc(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); + GTM->IsDefinition = IsDefinition; + GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects()); return GTM; @@ -224,6 +234,12 @@ class GlobalTypeMember final : TrailingObjects { GlobalObject *getGlobal() const { return GO; } + bool isDefinition() const { + return IsDefinition; + } + bool isExported() const { + return IsExported; + } ArrayRef types() const { return makeArrayRef(getTrailingObjects(), NTypes); } @@ -235,7 +251,6 @@ class LowerTypeTestsModule { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; - bool LinkerSubsectionsViaSymbols; Triple::ArchType Arch; Triple::OSType OS; Triple::ObjectFormatType ObjectFormat; @@ -295,6 +310,7 @@ class LowerTypeTestsModule { void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); + void importFunction(Function *F, bool isDefinition); BitSetInfo buildBitSet(Metadata *TypeId, @@ -475,13 +491,9 @@ void LowerTypeTestsModule::allocateByteArrays() { // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures // that the pc-relative displacement is folded into the lea instead of the // test instruction getting another displacement. - if (LinkerSubsectionsViaSymbols) { - BAI->ByteArray->replaceAllUsesWith(GEP); - } else { - GlobalAlias *Alias = GlobalAlias::create( - Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); - BAI->ByteArray->replaceAllUsesWith(Alias); - } + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); + BAI->ByteArray->replaceAllUsesWith(Alias); BAI->ByteArray->eraseFromParent(); } @@ -502,7 +514,7 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, return createMaskedBitTest(B, TIL.InlineBits, BitOffset); } else { Constant *ByteArray = TIL.TheByteArray; - if (!LinkerSubsectionsViaSymbols && AvoidReuse && !ImportSummary) { + if (AvoidReuse && !ImportSummary) { // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. @@ -608,8 +620,25 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, if (TIL.TheKind == TypeTestResolution::AllOnes) return OffsetInRange; - TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); - IRBuilder<> ThenB(Term); + // See if the intrinsic is used in the following common pattern: + // br(llvm.type.test(...), thenbb, elsebb) + // where nothing happens between the type test and the br. + // If so, create slightly simpler IR. + if (CI->hasOneUse()) + if (auto *Br = dyn_cast(*CI->user_begin())) + if (CI->getNextNode() == Br) { + BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator()); + BasicBlock *Else = Br->getSuccessor(1); + BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange); + NewBr->setMetadata(LLVMContext::MD_prof, + Br->getMetadata(LLVMContext::MD_prof)); + ReplaceInstWithInst(InitialBB->getTerminator(), NewBr); + + IRBuilder<> ThenB(CI); + return createBitSetTest(ThenB, TIL, BitOffset); + } + + IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false)); // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. @@ -680,17 +709,13 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); - if (LinkerSubsectionsViaSymbols) { - GV->replaceAllUsesWith(CombinedGlobalElemPtr); - } else { - assert(GV->getType()->getAddressSpace() == 0); - GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, - GV->getLinkage(), "", - CombinedGlobalElemPtr, &M); - GAlias->setVisibility(GV->getVisibility()); - GAlias->takeName(GV); - GV->replaceAllUsesWith(GAlias); - } + assert(GV->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = + GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->setVisibility(GV->getVisibility()); + GAlias->takeName(GV); + GV->replaceAllUsesWith(GAlias); GV->eraseFromParent(); } } @@ -812,6 +837,46 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { CI->eraseFromParent(); } +// ThinLTO backend: the function F has a jump table entry; update this module +// accordingly. isDefinition describes the type of the jump table entry. +void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { + assert(F->getType()->getAddressSpace() == 0); + + // Declaration of a local function - nothing to do. + if (F->isDeclarationForLinker() && isDefinition) + return; + + GlobalValue::VisibilityTypes Visibility = F->getVisibility(); + std::string Name = F->getName(); + Function *FDecl; + + if (F->isDeclarationForLinker() && !isDefinition) { + // Declaration of an external function. + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name + ".cfi_jt", &M); + FDecl->setVisibility(GlobalValue::HiddenVisibility); + } else if (isDefinition) { + F->setName(Name + ".cfi"); + F->setLinkage(GlobalValue::ExternalLinkage); + F->setVisibility(GlobalValue::HiddenVisibility); + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name, &M); + FDecl->setVisibility(Visibility); + } else { + // Function definition without type metadata, where some other translation + // unit contained a declaration with type metadata. This normally happens + // during mixed CFI + non-CFI compilation. We do nothing with the function + // so that it is treated the same way as a function defined outside of the + // LTO unit. + return; + } + + if (F->isWeakForLinker()) + replaceWeakDeclarationWithJumpTablePtr(F, FDecl); + else + F->replaceAllUsesWith(FDecl); +} + void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef TypeIds, Constant *CombinedGlobalAddr, const DenseMap &GlobalLayout) { @@ -1135,7 +1200,6 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // arithmetic that we normally use for globals. // FIXME: find a better way to represent the jumptable in the IR. - assert(!Functions.empty()); // Build a simple layout based on the regular layout of jump tables. @@ -1159,6 +1223,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // references to the original functions with references to the aliases. for (unsigned I = 0; I != Functions.size(); ++I) { Function *F = cast(Functions[I]->getGlobal()); + bool IsDefinition = Functions[I]->isDefinition(); Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( ConstantExpr::getInBoundsGetElementPtr( @@ -1166,8 +1231,18 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ArrayRef{ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, I)}), F->getType()); - if (LinkerSubsectionsViaSymbols || F->isDeclarationForLinker()) { - + if (Functions[I]->isExported()) { + if (IsDefinition) { + ExportSummary->cfiFunctionDefs().insert(F->getName()); + } else { + GlobalAlias *JtAlias = GlobalAlias::create( + F->getValueType(), 0, GlobalValue::ExternalLinkage, + F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); + JtAlias->setVisibility(GlobalValue::HiddenVisibility); + ExportSummary->cfiFunctionDecls().insert(F->getName()); + } + } + if (!IsDefinition) { if (F->isWeakForLinker()) replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr); else @@ -1175,9 +1250,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( } else { assert(F->getType()->getAddressSpace() == 0); - GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0, - F->getLinkage(), "", - CombinedGlobalElemPtr, &M); + GlobalAlias *FAlias = GlobalAlias::create( + F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); FAlias->setVisibility(F->getVisibility()); FAlias->takeName(F); if (FAlias->hasName()) @@ -1302,7 +1376,6 @@ LowerTypeTestsModule::LowerTypeTestsModule( : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); - LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); Arch = TargetTriple.getArch(); OS = TargetTriple.getOS(); ObjectFormat = TargetTriple.getObjectFormat(); @@ -1347,15 +1420,37 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { bool LowerTypeTestsModule::lower() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); - if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary) + if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary && + !ImportSummary) return false; if (ImportSummary) { - for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); - UI != UE;) { - auto *CI = cast((*UI++).getUser()); - importTypeTest(CI); + if (TypeTestFunc) { + for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); + UI != UE;) { + auto *CI = cast((*UI++).getUser()); + importTypeTest(CI); + } + } + + SmallVector Defs; + SmallVector Decls; + for (auto &F : M) { + // CFI functions are either external, or promoted. A local function may + // have the same name, but it's not the one we are looking for. + if (F.hasLocalLinkage()) + continue; + if (ImportSummary->cfiFunctionDefs().count(F.getName())) + Defs.push_back(&F); + else if (ImportSummary->cfiFunctionDecls().count(F.getName())) + Decls.push_back(&F); } + + for (auto F : Defs) + importFunction(F, /*isDefinition*/ true); + for (auto F : Decls) + importFunction(F, /*isDefinition*/ false); + return true; } @@ -1381,6 +1476,58 @@ bool LowerTypeTestsModule::lower() { llvm::DenseMap TypeIdInfo; unsigned I = 0; SmallVector Types; + + struct ExportedFunctionInfo { + CfiFunctionLinkage Linkage; + MDNode *FuncMD; // {name, linkage, type[, type...]} + }; + DenseMap ExportedFunctions; + if (ExportSummary) { + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto FuncMD : CfiFunctionsMD->operands()) { + assert(FuncMD->getNumOperands() >= 2); + StringRef FunctionName = + cast(FuncMD->getOperand(0))->getString(); + if (!ExportSummary->isGUIDLive(GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(FunctionName)))) + continue; + CfiFunctionLinkage Linkage = static_cast( + cast(FuncMD->getOperand(1)) + ->getValue() + ->getUniqueInteger() + .getZExtValue()); + auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}}); + if (!P.second && P.first->second.Linkage != CFL_Definition) + P.first->second = {Linkage, FuncMD}; + } + + for (const auto &P : ExportedFunctions) { + StringRef FunctionName = P.first; + CfiFunctionLinkage Linkage = P.second.Linkage; + MDNode *FuncMD = P.second.FuncMD; + Function *F = M.getFunction(FunctionName); + if (!F) + F = Function::Create( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalVariable::ExternalLinkage, FunctionName, &M); + + if (Linkage == CFL_Definition) + F->eraseMetadata(LLVMContext::MD_type); + + if (F->isDeclaration()) { + if (Linkage == CFL_WeakDeclaration) + F->setLinkage(GlobalValue::ExternalWeakLinkage); + + SmallVector Types; + for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) + F->addMetadata(LLVMContext::MD_type, + *cast(FuncMD->getOperand(I).get())); + } + } + } + } + for (GlobalObject &GO : M.global_objects()) { if (isa(GO) && GO.isDeclarationForLinker()) continue; @@ -1390,7 +1537,15 @@ bool LowerTypeTestsModule::lower() { if (Types.empty()) continue; - auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types); + bool IsDefinition = !GO.isDeclarationForLinker(); + bool IsExported = false; + if (isa(GO) && ExportedFunctions.count(GO.getName())) { + IsDefinition |= ExportedFunctions[GO.getName()].Linkage == CFL_Definition; + IsExported = true; + } + + auto *GTM = + GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); auto &Info = TypeIdInfo[cast(Type)->getOperand(1)]; @@ -1442,9 +1597,8 @@ bool LowerTypeTestsModule::lower() { for (auto &P : *ExportSummary) { for (auto &S : P.second.SummaryList) { auto *FS = dyn_cast(S.get()); - if (!FS) + if (!FS || !ExportSummary->isGlobalValueLive(FS)) continue; - // FIXME: Only add live functions. for (GlobalValue::GUID G : FS->type_tests()) for (Metadata *MD : MetadataByGUID[G]) AddTypeIdUse(MD).IsExported = true; diff --git a/interpreter/llvm/src/lib/Transforms/IPO/PartialInlining.cpp b/interpreter/llvm/src/lib/Transforms/IPO/PartialInlining.cpp index 5550376279f28..8840435af6421 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/PartialInlining.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/PartialInlining.cpp @@ -68,6 +68,10 @@ static cl::opt cl::desc("Relative frequency of outline region to " "the entry block")); +static cl::opt ExtraOutliningPenalty( + "partial-inlining-extra-penalty", cl::init(0), cl::Hidden, + cl::desc("A debug option to add additional penalty to the computed one.")); + namespace { struct FunctionOutliningInfo { @@ -83,7 +87,7 @@ struct FunctionOutliningInfo { SmallVector Entries; // The return block that is not included in the outlined region. BasicBlock *ReturnBlock; - // The dominating block of the region ot be outlined. + // The dominating block of the region to be outlined. BasicBlock *NonReturnBlock; // The set of blocks in Entries that that are predecessors to ReturnBlock SmallVector ReturnBlockPreds; @@ -99,6 +103,35 @@ struct PartialInlinerImpl { bool run(Module &M); Function *unswitchFunction(Function *F); + // This class speculatively clones the the function to be partial inlined. + // At the end of partial inlining, the remaining callsites to the cloned + // function that are not partially inlined will be fixed up to reference + // the original function, and the cloned function will be erased. + struct FunctionCloner { + FunctionCloner(Function *F, FunctionOutliningInfo *OI); + ~FunctionCloner(); + + // Prepare for function outlining: making sure there is only + // one incoming edge from the extracted/outlined region to + // the return block. + void NormalizeReturnBlock(); + + // Do function outlining: + Function *doFunctionOutlining(); + + Function *OrigFunc = nullptr; + Function *ClonedFunc = nullptr; + Function *OutlinedFunc = nullptr; + BasicBlock *OutliningCallBB = nullptr; + // ClonedFunc is inlined in one of its callers after function + // outlining. + bool IsFunctionInlined = false; + // The cost of the region to be outlined. + int OutlinedRegionCost = 0; + std::unique_ptr ClonedOI = nullptr; + std::unique_ptr ClonedFuncBFI = nullptr; + }; + private: int NumPartialInlining = 0; std::function *GetAssumptionCache; @@ -110,27 +143,18 @@ struct PartialInlinerImpl { // The result is no larger than 1 and is represented using BP. // (Note that the outlined region's 'head' block can only have incoming // edges from the guarding entry blocks). - BranchProbability getOutliningCallBBRelativeFreq(Function *F, - FunctionOutliningInfo *OI, - Function *DuplicateFunction, - BlockFrequencyInfo *BFI, - BasicBlock *OutliningCallBB); + BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner); // Return true if the callee of CS should be partially inlined with // profit. - bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI, - BlockFrequencyInfo *CalleeBFI, - BasicBlock *OutliningCallBB, - int OutliningCallOverhead, + bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, + BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE); // Try to inline DuplicateFunction (cloned from F with call to // the OutlinedFunction into its callers. Return true // if there is any successful inlining. - bool tryPartialInline(Function *DuplicateFunction, - Function *F, /*orignal function */ - FunctionOutliningInfo *OI, Function *OutlinedFunction, - BlockFrequencyInfo *CalleeBFI); + bool tryPartialInline(FunctionCloner &Cloner); // Compute the mapping from use site of DuplicationFunction to the enclosing // BB's profile count. @@ -142,7 +166,7 @@ struct PartialInlinerImpl { NumPartialInlining >= MaxNumPartialInlining); } - CallSite getCallSite(User *U) { + static CallSite getCallSite(User *U) { CallSite CS; if (CallInst *CI = dyn_cast(U)) CS = CallSite(CI); @@ -153,7 +177,7 @@ struct PartialInlinerImpl { return CS; } - CallSite getOneCallSiteTo(Function *F) { + static CallSite getOneCallSiteTo(Function *F) { User *User = *F->user_begin(); return getCallSite(User); } @@ -167,20 +191,15 @@ struct PartialInlinerImpl { // Returns the costs associated with function outlining: // - The first value is the non-weighted runtime cost for making the call - // to the outlined function 'OutlinedFunction', including the addtional - // setup cost in the outlined function itself; + // to the outlined function, including the addtional setup cost in the + // outlined function itself; // - The second value is the estimated size of the new call sequence in - // basic block 'OutliningCallBB'; - // - The third value is the estimated size of the original code from - // function 'F' that is extracted into the outlined function. - std::tuple - computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo, - Function *OutlinedFunction, - BasicBlock *OutliningCallBB); + // basic block Cloner.OutliningCallBB; + std::tuple computeOutliningCosts(FunctionCloner &Cloner); // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to // approximate both the size and runtime cost (Note that in the current // inline cost analysis, there is no clear distinction there either). - int computeBBInlineCost(BasicBlock *BB); + static int computeBBInlineCost(BasicBlock *BB); std::unique_ptr computeOutliningInfo(Function *F); @@ -392,40 +411,54 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) { return false; } -BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( - Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction, - BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) { +BranchProbability +PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { auto EntryFreq = - BFI->getBlockFreq(&DuplicateFunction->getEntryBlock()); - auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB); + Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock()); + auto OutliningCallFreq = + Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB); auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(), EntryFreq.getFrequency()); - if (hasProfileData(F, OI)) + if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get())) return OutlineRegionRelFreq; - // When profile data is not available, we need to be very - // conservative in estimating the overall savings. We need to make sure - // the outline region relative frequency is not below the threshold - // specified by the option. - OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); + // When profile data is not available, we need to be conservative in + // estimating the overall savings. Static branch prediction can usually + // guess the branch direction right (taken/non-taken), but the guessed + // branch probability is usually not biased enough. In case when the + // outlined region is predicted to be likely, its probability needs + // to be made higher (more biased) to not under-estimate the cost of + // function outlining. On the other hand, if the outlined region + // is predicted to be less likely, the predicted probablity is usually + // higher than the actual. For instance, the actual probability of the + // less likely target is only 5%, but the guessed probablity can be + // 40%. In the latter case, there is no need for further adjustement. + // FIXME: add an option for this. + if (OutlineRegionRelFreq < BranchProbability(45, 100)) + return OutlineRegionRelFreq; + + OutlineRegionRelFreq = std::max( + OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); return OutlineRegionRelFreq; } bool PartialInlinerImpl::shouldPartialInline( - CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI, - BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB, - int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) { + CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE) { + using namespace ore; if (SkipCostAnalysis) return true; Instruction *Call = CS.getInstruction(); Function *Callee = CS.getCalledFunction(); + assert(Callee == Cloner.ClonedFunc); + Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, @@ -433,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialInline( if (IC.isAlways()) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) - << NV("Callee", F) + << NV("Callee", Cloner.OrigFunc) << " should always be fully inlined, not partially"); return false; } if (IC.isNever()) { ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because it should never be inlined (cost=never)"); return false; @@ -448,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialInline( if (!IC) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because too costly to inline (cost=" << NV("Cost", IC.getCost()) << ", threshold=" << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); return false; } const DataLayout &DL = Caller->getParent()->getDataLayout(); + // The savings of eliminating the call: int NonWeightedSavings = getCallsiteCost(CS, DL); BlockFrequency NormWeightedSavings(NonWeightedSavings); - auto RelativeFreq = - getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB); - auto NormWeightedRcost = - BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq; - // Weighted saving is smaller than weighted cost, return false - if (NormWeightedSavings < NormWeightedRcost) { + if (NormWeightedSavings < WeightedOutliningRcost) { ORE.emit( OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " runtime overhead (overhead=" - << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency()) + << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) << ", savings=" << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")" << " of making the outlined call is too high"); @@ -479,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialInline( } ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) - << NV("Callee", F) << " can be partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) << " (threshold=" << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); @@ -496,6 +525,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { if (isa(I)) continue; + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::Alloca: + continue; + case Instruction::GetElementPtr: + if (cast(I)->hasAllZeroIndices()) + continue; + default: + break; + } + + IntrinsicInst *IntrInst = dyn_cast(I); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + if (CallInst *CI = dyn_cast(I)) { InlineCost += getCallsiteCost(CallSite(CI), DL); continue; @@ -515,38 +564,32 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { return InlineCost; } -std::tuple PartialInlinerImpl::computeOutliningCosts( - Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction, - BasicBlock *OutliningCallBB) { - // First compute the cost of the outlined region 'OI' in the original - // function 'F': - int OutlinedRegionCost = 0; - for (BasicBlock &BB : *F) { - if (&BB != OI->ReturnBlock && - // Assuming Entry set is small -- do a linear search here: - std::find(OI->Entries.begin(), OI->Entries.end(), &BB) == - OI->Entries.end()) { - OutlinedRegionCost += computeBBInlineCost(&BB); - } - } +std::tuple +PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { // Now compute the cost of the call sequence to the outlined function // 'OutlinedFunction' in BB 'OutliningCallBB': - int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB); + int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB); // Now compute the cost of the extracted/outlined function itself: int OutlinedFunctionCost = 0; - for (BasicBlock &BB : *OutlinedFunction) { + for (BasicBlock &BB : *Cloner.OutlinedFunc) { OutlinedFunctionCost += computeBBInlineCost(&BB); } - assert(OutlinedFunctionCost >= OutlinedRegionCost && + assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); + // The code extractor introduces a new root and exit stub blocks with + // additional unconditional branches. Those branches will be eliminated + // later with bb layout. The cost should be adjusted accordingly: + OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; + int OutliningRuntimeOverhead = - OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost); + OutliningFuncCallCost + + (OutlinedFunctionCost - Cloner.OutlinedRegionCost) + + ExtraOutliningPenalty; - return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead, - OutlinedRegionCost); + return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead); } // Create the callsite to profile count map which is @@ -558,17 +601,17 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( std::vector Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); Function *CurrentCaller = nullptr; + std::unique_ptr TempBFI; BlockFrequencyInfo *CurrentCallerBFI = nullptr; auto ComputeCurrBFI = [&,this](Function *Caller) { // For the old pass manager: if (!GetBFI) { - if (CurrentCallerBFI) - delete CurrentCallerBFI; DominatorTree DT(*Caller); LoopInfo LI(DT); BranchProbabilityInfo BPI(*Caller, LI); - CurrentCallerBFI = new BlockFrequencyInfo(*Caller, BPI, LI); + TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI)); + CurrentCallerBFI = TempBFI.get(); } else { // New pass manager: CurrentCallerBFI = &(*GetBFI)(*Caller); @@ -593,42 +636,30 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( } } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { - - if (F->hasAddressTaken()) - return nullptr; - - // Let inliner handle it - if (F->hasFnAttribute(Attribute::AlwaysInline)) - return nullptr; - - if (F->hasFnAttribute(Attribute::NoInline)) - return nullptr; - - if (PSI->isFunctionEntryCold(F)) - return nullptr; - - if (F->user_begin() == F->user_end()) - return nullptr; - - std::unique_ptr OI = computeOutliningInfo(F); - - if (!OI) - return nullptr; +PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, + FunctionOutliningInfo *OI) + : OrigFunc(F) { + ClonedOI = llvm::make_unique(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; - Function *DuplicateFunction = CloneFunction(F, VMap); - BasicBlock *NewReturnBlock = cast(VMap[OI->ReturnBlock]); - BasicBlock *NewNonReturnBlock = cast(VMap[OI->NonReturnBlock]); - DenseSet NewEntries; + ClonedFunc = CloneFunction(F, VMap); + + ClonedOI->ReturnBlock = cast(VMap[OI->ReturnBlock]); + ClonedOI->NonReturnBlock = cast(VMap[OI->NonReturnBlock]); for (BasicBlock *BB : OI->Entries) { - NewEntries.insert(cast(VMap[BB])); + ClonedOI->Entries.push_back(cast(VMap[BB])); + } + for (BasicBlock *E : OI->ReturnBlockPreds) { + BasicBlock *NewE = cast(VMap[E]); + ClonedOI->ReturnBlockPreds.push_back(NewE); } - // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. - F->replaceAllUsesWith(DuplicateFunction); + F->replaceAllUsesWith(ClonedFunc); +} + +void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { auto getFirstPHI = [](BasicBlock *BB) { BasicBlock::iterator I = BB->begin(); @@ -644,130 +675,206 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { } return FirstPhi; }; + // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. - BasicBlock *PreReturn = NewReturnBlock; + BasicBlock *PreReturn = ClonedOI->ReturnBlock; // only split block when necessary: PHINode *FirstPhi = getFirstPHI(PreReturn); - unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size(); - if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { - - NewReturnBlock = NewReturnBlock->splitBasicBlock( - NewReturnBlock->getFirstNonPHI()->getIterator()); - BasicBlock::iterator I = PreReturn->begin(); - Instruction *Ins = &NewReturnBlock->front(); - while (I != PreReturn->end()) { - PHINode *OldPhi = dyn_cast(I); - if (!OldPhi) - break; + unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size(); - PHINode *RetPhi = - PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); - OldPhi->replaceAllUsesWith(RetPhi); - Ins = NewReturnBlock->getFirstNonPHI(); + if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1) + return; - RetPhi->addIncoming(&*I, PreReturn); - for (BasicBlock *E : OI->ReturnBlockPreds) { - BasicBlock *NewE = cast(VMap[E]); - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); - OldPhi->removeIncomingValue(NewE); - } - ++I; + auto IsTrivialPhi = [](PHINode *PN) -> Value * { + Value *CommonValue = PN->getIncomingValue(0); + if (all_of(PN->incoming_values(), + [&](Value *V) { return V == CommonValue; })) + return CommonValue; + return nullptr; + }; + + ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock( + ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator()); + BasicBlock::iterator I = PreReturn->begin(); + Instruction *Ins = &ClonedOI->ReturnBlock->front(); + SmallVector DeadPhis; + while (I != PreReturn->end()) { + PHINode *OldPhi = dyn_cast(I); + if (!OldPhi) + break; + + PHINode *RetPhi = + PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); + OldPhi->replaceAllUsesWith(RetPhi); + Ins = ClonedOI->ReturnBlock->getFirstNonPHI(); + + RetPhi->addIncoming(&*I, PreReturn); + for (BasicBlock *E : ClonedOI->ReturnBlockPreds) { + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E); + OldPhi->removeIncomingValue(E); } - for (auto E : OI->ReturnBlockPreds) { - BasicBlock *NewE = cast(VMap[E]); - NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + + // After incoming values splitting, the old phi may become trivial. + // Keeping the trivial phi can introduce definition inside the outline + // region which is live-out, causing necessary overhead (load, store + // arg passing etc). + if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { + OldPhi->replaceAllUsesWith(OldPhiVal); + DeadPhis.push_back(OldPhi); } - } + ++I; + } + for (auto *DP : DeadPhis) + DP->eraseFromParent(); + for (auto E : ClonedOI->ReturnBlockPreds) { + E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); + } +} + +Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { // Returns true if the block is to be partial inlined into the caller // (i.e. not to be extracted to the out of line function) - auto ToBeInlined = [&](BasicBlock *BB) { - return BB == NewReturnBlock || NewEntries.count(BB); + auto ToBeInlined = [&, this](BasicBlock *BB) { + return BB == ClonedOI->ReturnBlock || + (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) != + ClonedOI->Entries.end()); }; + // Gather up the blocks that we're going to extract. std::vector ToExtract; - ToExtract.push_back(NewNonReturnBlock); - for (BasicBlock &BB : *DuplicateFunction) - if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock) + ToExtract.push_back(ClonedOI->NonReturnBlock); + OutlinedRegionCost += + PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock); + for (BasicBlock &BB : *ClonedFunc) + if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) { ToExtract.push_back(&BB); + // FIXME: the code extractor may hoist/sink more code + // into the outlined function which may make the outlining + // overhead (the difference of the outlined function cost + // and OutliningRegionCost) look larger. + OutlinedRegionCost += computeBBInlineCost(&BB); + } // The CodeExtractor needs a dominator tree. DominatorTree DT; - DT.recalculate(*DuplicateFunction); + DT.recalculate(*ClonedFunc); // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. LoopInfo LI(DT); - BranchProbabilityInfo BPI(*DuplicateFunction, LI); - BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI); + BranchProbabilityInfo BPI(*ClonedFunc, LI); + ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); // Extract the body of the if. - Function *OutlinedFunction = - CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI) - .extractCodeRegion(); + OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, + ClonedFuncBFI.get(), &BPI) + .extractCodeRegion(); + + if (OutlinedFunc) { + OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) + .getInstruction() + ->getParent(); + assert(OutliningCallBB->getParent() == ClonedFunc); + } - bool AnyInline = - tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI); + return OutlinedFunc; +} +PartialInlinerImpl::FunctionCloner::~FunctionCloner() { // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. - DuplicateFunction->replaceAllUsesWith(F); - DuplicateFunction->eraseFromParent(); - if (!AnyInline && OutlinedFunction) - OutlinedFunction->eraseFromParent(); - return OutlinedFunction; + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + if (!IsFunctionInlined) { + // Remove the function that is speculatively created if there is no + // reference. + if (OutlinedFunc) + OutlinedFunc->eraseFromParent(); + } } -bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, - Function *F, - FunctionOutliningInfo *OI, - Function *OutlinedFunction, - BlockFrequencyInfo *CalleeBFI) { - if (OutlinedFunction == nullptr) - return false; +Function *PartialInlinerImpl::unswitchFunction(Function *F) { + + if (F->hasAddressTaken()) + return nullptr; + // Let inliner handle it + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return nullptr; + + if (F->hasFnAttribute(Attribute::NoInline)) + return nullptr; + + if (PSI->isFunctionEntryCold(F)) + return nullptr; + + if (F->user_begin() == F->user_end()) + return nullptr; + + std::unique_ptr OI = computeOutliningInfo(F); + + if (!OI) + return nullptr; + + FunctionCloner Cloner(F, OI.get()); + Cloner.NormalizeReturnBlock(); + Function *OutlinedFunction = Cloner.doFunctionOutlining(); + + bool AnyInline = tryPartialInline(Cloner); + + if (AnyInline) + return OutlinedFunction; + + return nullptr; +} + +bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { int NonWeightedRcost; int SizeCost; - int OutlinedRegionSizeCost; - auto OutliningCallBB = - getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent(); + if (Cloner.OutlinedFunc == nullptr) + return false; + + std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner); - std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) = - computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB); + auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); + auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; // The call sequence to the outlined function is larger than the original // outlined region size, it does not increase the chances of inlining - // 'F' with outlining (The inliner usies the size increase to model the - // the cost of inlining a callee). - if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) { - OptimizationRemarkEmitter ORE(F); + // the function with outlining (The inliner usies the size increase to + // model the cost of inlining a callee). + if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { + OptimizationRemarkEmitter ORE(Cloner.OrigFunc); DebugLoc DLoc; BasicBlock *Block; - std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction); + std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", DLoc, Block) - << ore::NV("Function", F) + << ore::NV("Function", Cloner.OrigFunc) << " not partially inlined into callers (Original Size = " - << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost) + << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost) << ", Size of call sequence to outlined function = " << ore::NV("NewSize", SizeCost) << ")"); return false; } - assert(F->user_begin() == F->user_end() && + assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() && "F's users should all be replaced!"); - std::vector Users(DuplicateFunction->user_begin(), - DuplicateFunction->user_end()); + + std::vector Users(Cloner.ClonedFunc->user_begin(), + Cloner.ClonedFunc->user_end()); DenseMap CallSiteToProfCountMap; - if (F->getEntryCount()) - computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap); + if (Cloner.OrigFunc->getEntryCount()) + computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); - auto CalleeEntryCount = F->getEntryCount(); + auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount(); uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); + bool AnyInline = false; for (User *User : Users) { CallSite CS = getCallSite(User); @@ -777,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, OptimizationRemarkEmitter ORE(CS.getCaller()); - if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB, - NonWeightedRcost, ORE)) + if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) continue; ORE.emit( OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()) - << ore::NV("Callee", F) << " partially inlined into " + << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " << ore::NV("Caller", CS.getCaller())); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); @@ -801,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, NumPartialInlined++; } - if (AnyInline && CalleeEntryCount) - F->setEntryCount(CalleeEntryCountV); + if (AnyInline) { + Cloner.IsFunctionInlined = true; + if (CalleeEntryCount) + Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); + } return AnyInline; } diff --git a/interpreter/llvm/src/lib/Transforms/IPO/PassManagerBuilder.cpp b/interpreter/llvm/src/lib/Transforms/IPO/PassManagerBuilder.cpp index 3bd46fe257cc6..0b319f6a488b3 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -44,16 +44,16 @@ using namespace llvm; static cl::opt -RunLoopVectorization("vectorize-loops", cl::Hidden, - cl::desc("Run the Loop vectorization passes")); + RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); static cl::opt -RunSLPVectorization("vectorize-slp", cl::Hidden, - cl::desc("Run the SLP vectorization passes")); + RunLoopVectorization("vectorize-loops", cl::Hidden, + cl::desc("Run the Loop vectorization passes")); static cl::opt -RunBBVectorization("vectorize-slp-aggressive", cl::Hidden, - cl::desc("Run the BB vectorization passes")); +RunSLPVectorization("vectorize-slp", cl::Hidden, + cl::desc("Run the SLP vectorization passes")); static cl::opt UseGVNAfterVectorization("use-gvn-after-vectorization", @@ -68,10 +68,6 @@ static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); -static cl::opt RunLoadCombine("combine-loads", cl::init(false), - cl::Hidden, - cl::desc("Run the load combining pass")); - static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); @@ -137,6 +133,10 @@ static cl::opt PreInlineThreshold( cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); +static cl::opt EnableEarlyCSEMemSSA( + "enable-earlycse-memssa", cl::init(true), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)")); + static cl::opt EnableGVNHoist( "enable-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass (default = off)")); @@ -151,6 +151,10 @@ static cl::opt cl::Hidden, cl::desc("Enable the simple loop unswitch pass.")); +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = off)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -158,11 +162,9 @@ PassManagerBuilder::PassManagerBuilder() { Inliner = nullptr; DisableUnitAtATime = false; DisableUnrollLoops = false; - BBVectorize = RunBBVectorization; SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; - LoadCombine = RunLoadCombine; NewGVN = RunNewGVN; DisableGVNLoadPRE = false; VerifyInput = false; @@ -170,8 +172,8 @@ PassManagerBuilder::PassManagerBuilder() { MergeFunctions = false; PrepareForLTO = false; EnablePGOInstrGen = RunPGOInstrGen; - PGOInstrGen = RunPGOInstrGen.getValue(); - PGOInstrUse = RunPGOInstrUse.getValue(); + PGOInstrGen = PGOOutputFile; + PGOInstrUse = RunPGOInstrUse; PrepareForThinLTO = EnablePrepareForThinLTO; PerformThinLTO = false; DivergentTarget = false; @@ -186,6 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() { static ManagedStatic, 8> > GlobalExtensions; +/// Check if GlobalExtensions is constructed and not empty. +/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger +/// the construction of the object. +static bool GlobalExtensionsNotEmpty() { + return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); +} + void PassManagerBuilder::addGlobalExtension( PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { @@ -198,9 +207,12 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { - for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) - if ((*GlobalExtensions)[i].first == ETy) - (*GlobalExtensions)[i].second(*this, PM); + if (GlobalExtensionsNotEmpty()) { + for (auto &Ext : *GlobalExtensions) { + if (Ext.first == ETy) + Ext.second(*this, PM); + } + } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) if (Extensions[i].first == ETy) Extensions[i].second(*this, PM); @@ -256,11 +268,12 @@ void PassManagerBuilder::populateFunctionPassManager( // Do PGO instrumentation generation or use pass as the option specified. void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { - if (!EnablePGOInstrGen && PGOInstrUse.empty()) + if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) return; // Perform the preinline and cleanup passes for O1 and above. // And avoid doing them if optimizing for size. - if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) { + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && + PGOSampleUse.empty()) { // Create preinline pass. We construct an InlineParams object and specify // the threshold here to avoid the command line options of the regular // inliner to influence pre-inlining. The only fields of InlineParams we @@ -284,6 +297,8 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { InstrProfOptions Options; if (!PGOInstrGen.empty()) Options.InstrProfileOutput = PGOInstrGen; + Options.DoCounterPromotion = true; + MPM.add(createLoopRotatePass()); MPM.add(createInstrProfilingLegacyPass(Options)); } if (!PGOInstrUse.empty()) @@ -300,9 +315,14 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies if (EnableGVNHoist) MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } + // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. @@ -369,29 +389,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (RerollLoops) MPM.add(createLoopRerollPass()); - if (!RunSLPAfterLoopVectorization) { - if (SLPVectorize) - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - - if (BBVectorize) { - MPM.add(createBBVectorizePass()); - addInstructionCombiningPass(MPM); - addExtensionsToPM(EP_Peephole, MPM); - if (OptLevel > 1 && UseGVNAfterVectorization) - MPM.add(NewGVN - ? createNewGVNPass() - : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies - else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - - // BBVectorize may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass(OptLevel)); - } - } - - if (LoadCombine) - MPM.add(createLoadCombinePass()); + if (!RunSLPAfterLoopVectorization && SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs @@ -426,7 +425,7 @@ void PassManagerBuilder::populateModulePassManager( // builds. The function merging pass is if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - else if (!GlobalExtensions->empty() || !Extensions.empty()) + else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); @@ -516,6 +515,8 @@ void PassManagerBuilder::populateModulePassManager( // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (RunPartialInlining) + MPM.add(createPartialInliningPass()); if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) @@ -621,28 +622,10 @@ void PassManagerBuilder::populateModulePassManager( addInstructionCombiningPass(MPM); } - if (RunSLPAfterLoopVectorization) { - if (SLPVectorize) { - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - if (OptLevel > 1 && ExtraVectorizerPasses) { - MPM.add(createEarlyCSEPass()); - } - } - - if (BBVectorize) { - MPM.add(createBBVectorizePass()); - addInstructionCombiningPass(MPM); - addExtensionsToPM(EP_Peephole, MPM); - if (OptLevel > 1 && UseGVNAfterVectorization) - MPM.add(NewGVN - ? createNewGVNPass() - : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies - else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - - // BBVectorize may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass(OptLevel)); + if (RunSLPAfterLoopVectorization && SLPVectorize) { + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (OptLevel > 1 && ExtraVectorizerPasses) { + MPM.add(createEarlyCSEPass()); } } @@ -831,9 +814,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // alignments. PM.add(createAlignmentFromAssumptionsPass()); - if (LoadCombine) - PM.add(createLoadCombinePass()); - // Cleanup and simplify the code after the scalar optimizations. addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); @@ -898,6 +878,12 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (OptLevel != 0) addLTOOptimizationPasses(PM); + else { + // The whole-program-devirt pass needs to run at -O0 because only it knows + // about the llvm.type.checked.load intrinsic: it needs to both lower the + // intrinsic itself and handle it in the summary. + PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); + } // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. diff --git a/interpreter/llvm/src/lib/Transforms/IPO/PruneEH.cpp b/interpreter/llvm/src/lib/Transforms/IPO/PruneEH.cpp index d9acb9b1a743b..3fd59847a005f 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/PruneEH.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/PruneEH.cpp @@ -14,10 +14,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/EHPersonalities.h" @@ -28,6 +26,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/IPO/SampleProfile.cpp b/interpreter/llvm/src/lib/Transforms/IPO/SampleProfile.cpp index e755e2bd8f260..6baada2c1ae1f 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/SampleProfile.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/SampleProfile.cpp @@ -173,8 +173,10 @@ class SampleProfileLoader { void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); bool computeBlockWeights(Function &F); void findEquivalenceClasses(Function &F); + template void findEquivalencesFor(BasicBlock *BB1, ArrayRef Descendants, - DominatorTreeBase *DomTree); + DominatorTreeBase *DomTree); + void propagateWeights(Function &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(Function &F); @@ -217,7 +219,7 @@ class SampleProfileLoader { /// \brief Dominance, post-dominance and loop information. std::unique_ptr DT; - std::unique_ptr> PDT; + std::unique_ptr> PDT; std::unique_ptr LI; AssumptionCacheTracker *ACT; @@ -690,11 +692,21 @@ bool SampleProfileLoader::inlineHotFunctions( for (auto I : CIS) { InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); Function *CalledFunction = CallSite(I).getCalledFunction(); + // Do not inline recursive calls. + if (CalledFunction == &F) + continue; Instruction *DI = I; if (!CalledFunction && !PromotedInsns.count(I) && CallSite(I).isIndirectCall()) for (const auto *FS : findIndirectCallFunctionSamples(*I)) { auto CalleeFunctionName = FS->getName(); + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will simply + // ignore it instead of handling it explicitly. + if (CalleeFunctionName == F.getName()) + continue; const char *Reason = "Callee function not available"; auto R = SymbolMap.find(CalleeFunctionName); if (R == SymbolMap.end()) @@ -763,9 +775,10 @@ bool SampleProfileLoader::inlineHotFunctions( /// \param DomTree Opposite dominator tree. If \p Descendants is filled /// with blocks from \p BB1's dominator tree, then /// this is the post-dominator tree, and vice versa. +template void SampleProfileLoader::findEquivalencesFor( BasicBlock *BB1, ArrayRef Descendants, - DominatorTreeBase *DomTree) { + DominatorTreeBase *DomTree) { const BasicBlock *EC = EquivalenceClass[BB1]; uint64_t Weight = BlockWeights[EC]; for (const auto *BB2 : Descendants) { @@ -1273,7 +1286,7 @@ void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) { DT.reset(new DominatorTree); DT->recalculate(F); - PDT.reset(new DominatorTreeBase(true)); + PDT.reset(new PostDomTreeBase()); PDT->recalculate(F); LI.reset(new LoopInfo); @@ -1474,7 +1487,8 @@ bool SampleProfileLoader::runOnFunction(Function &F) { PreservedAnalyses SampleProfileLoaderPass::run(Module &M, ModuleAnalysisManager &AM) { - SampleProfileLoader SampleLoader(SampleProfileFile); + SampleProfileLoader SampleLoader( + ProfileFileName.empty() ? SampleProfileFile : ProfileFileName); SampleLoader.doInitialization(M); diff --git a/interpreter/llvm/src/lib/Transforms/IPO/StripSymbols.cpp b/interpreter/llvm/src/lib/Transforms/IPO/StripSymbols.cpp index fb64367eef917..de1b51e206ff3 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/StripSymbols.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/StripSymbols.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -30,6 +29,7 @@ #include "llvm/IR/TypeFinder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/interpreter/llvm/src/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 659cb9df00a2c..8ef6bb6523093 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -6,14 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This pass prepares a module containing type metadata for ThinLTO by splitting -// it into regular and thin LTO parts if possible, and writing both parts to -// a multi-module bitcode file. Modules that do not contain type metadata are -// written unmodified as a single module. -// -//===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -38,7 +32,8 @@ namespace { // Promote each local-linkage entity defined by ExportM and used by ImportM by // changing visibility and appending the given ModuleId. -void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { +void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, + SetVector &PromoteExtra) { DenseMap RenamedComdats; for (auto &ExportGV : ExportM.global_values()) { if (!ExportGV.hasLocalLinkage()) @@ -46,7 +41,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { auto Name = ExportGV.getName(); GlobalValue *ImportGV = ImportM.getNamedValue(Name); - if (!ImportGV || ImportGV->use_empty()) + if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV)) continue; std::string NewName = (Name + ModuleId).str(); @@ -59,8 +54,10 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { ExportGV.setLinkage(GlobalValue::ExternalLinkage); ExportGV.setVisibility(GlobalValue::HiddenVisibility); - ImportGV->setName(NewName); - ImportGV->setVisibility(GlobalValue::HiddenVisibility); + if (ImportGV) { + ImportGV->setName(NewName); + ImportGV->setVisibility(GlobalValue::HiddenVisibility); + } } if (!RenamedComdats.empty()) @@ -274,7 +271,8 @@ void splitAndWriteThinLTOBitcode( if (!ArgT || ArgT->getBitWidth() > 64) return; } - if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) + if (!F->isDeclaration() && + computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) EligibleVirtualFns.insert(F); }); } @@ -302,6 +300,11 @@ void splitAndWriteThinLTOBitcode( F.setComdat(nullptr); } + SetVector CfiFunctions; + for (auto &F : M) + if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F)) + CfiFunctions.insert(&F); + // Remove all globals with type metadata, globals with comdats that live in // MergedM, and aliases pointing to such globals from the thin LTO module. filterModule(&M, [&](const GlobalValue *GV) { @@ -314,16 +317,50 @@ void splitAndWriteThinLTOBitcode( return true; }); - promoteInternals(*MergedM, M, ModuleId); - promoteInternals(M, *MergedM, ModuleId); + promoteInternals(*MergedM, M, ModuleId, CfiFunctions); + promoteInternals(M, *MergedM, ModuleId, CfiFunctions); + + SmallVector CfiFunctionMDs; + for (auto V : CfiFunctions) { + Function &F = *cast(V); + SmallVector Types; + F.getMetadata(LLVMContext::MD_type, Types); + + auto &Ctx = MergedM->getContext(); + SmallVector Elts; + Elts.push_back(MDString::get(Ctx, F.getName())); + CfiFunctionLinkage Linkage; + if (!F.isDeclarationForLinker()) + Linkage = CFL_Definition; + else if (F.isWeakForLinker()) + Linkage = CFL_WeakDeclaration; + else + Linkage = CFL_Declaration; + Elts.push_back(ConstantAsMetadata::get( + llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage))); + for (auto Type : Types) + Elts.push_back(Type); + CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts)); + } - simplifyExternals(*MergedM); + if(!CfiFunctionMDs.empty()) { + NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions"); + for (auto MD : CfiFunctionMDs) + NMD->addOperand(MD); + } + simplifyExternals(*MergedM); // FIXME: Try to re-use BSI and PFI from the original module here. ProfileSummaryInfo PSI(M); ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); + // Mark the merged module as requiring full LTO. We still want an index for + // it though, so that it can participate in summary-based dead stripping. + MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + ModuleSummaryIndex MergedMIndex = + buildModuleSummaryIndex(*MergedM, nullptr, &PSI); + SmallVector Buffer; BitcodeWriter W(Buffer); @@ -333,7 +370,9 @@ void splitAndWriteThinLTOBitcode( ModuleHash ModHash = {{0}}; W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/true, &ModHash); - W.writeModule(MergedM.get()); + W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, + &MergedMIndex); + W.writeSymtab(); W.writeStrtab(); OS << Buffer; @@ -346,7 +385,9 @@ void splitAndWriteThinLTOBitcode( StripDebugInfo(M); W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/false, &ModHash); - W2.writeModule(MergedM.get()); + W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, + &MergedMIndex); + W2.writeSymtab(); W2.writeStrtab(); *ThinLinkOS << Buffer; } @@ -436,3 +477,15 @@ ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS) { return new WriteThinLTOBitcode(Str, ThinLinkOS); } + +PreservedAnalyses +llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + writeThinLTOBitcode(OS, ThinLinkOS, + [&FAM](Function &F) -> AAResults & { + return FAM.getResult(F); + }, + M, &AM.getResult(M)); + return PreservedAnalyses::all(); +} diff --git a/interpreter/llvm/src/lib/Transforms/IPO/WholeProgramDevirt.cpp b/interpreter/llvm/src/lib/Transforms/IPO/WholeProgramDevirt.cpp index aae22c5457ba2..00769cd632292 100644 --- a/interpreter/llvm/src/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/interpreter/llvm/src/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -46,9 +46,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/TypeMetadataUtils.h" diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 3709360226f50..809471cfd74f0 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -164,7 +164,7 @@ namespace { /// class FAddCombine { public: - FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {} + FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {} Value *simplify(Instruction *FAdd); private: @@ -187,7 +187,7 @@ namespace { Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst, bool NoNumber = false); - InstCombiner::BuilderTy *Builder; + InstCombiner::BuilderTy &Builder; Instruction *Instr; // Debugging stuff are clustered here. @@ -735,7 +735,7 @@ Value *FAddCombine::createNaryFAdd } Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFSub(Opnd0, Opnd1); + Value *V = Builder.CreateFSub(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; @@ -750,21 +750,21 @@ Value *FAddCombine::createFNeg(Value *V) { } Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFAdd(Opnd0, Opnd1); + Value *V = Builder.CreateFAdd(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFMul(Opnd0, Opnd1); + Value *V = Builder.CreateFMul(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; } Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + Value *V = Builder.CreateFDiv(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; @@ -847,116 +847,28 @@ Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) { return createFMul(OpndVal, Coeff.getValue(Instr->getType())); } -/// \brief Return true if we can prove that adding the two values of the -/// knownbits will not overflow. -/// Otherwise return false. -static bool checkRippleForAdd(const KnownBits &LHSKnown, - const KnownBits &RHSKnown) { - // Addition of two 2's complement numbers having opposite signs will never - // overflow. - if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) || - (LHSKnown.isNonNegative() && RHSKnown.isNegative())) - return true; - - // If either of the values is known to be non-negative, adding them can only - // overflow if the second is also non-negative, so we can assume that. - // Two non-negative numbers will only overflow if there is a carry to the - // sign bit, so we can check if even when the values are as big as possible - // there is no overflow to the sign bit. - if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { - APInt MaxLHS = ~LHSKnown.Zero; - MaxLHS.clearSignBit(); - APInt MaxRHS = ~RHSKnown.Zero; - MaxRHS.clearSignBit(); - APInt Result = std::move(MaxLHS) + std::move(MaxRHS); - return Result.isSignBitClear(); - } - - // If either of the values is known to be negative, adding them can only - // overflow if the second is also negative, so we can assume that. - // Two negative number will only overflow if there is no carry to the sign - // bit, so we can check if even when the values are as small as possible - // there is overflow to the sign bit. - if (LHSKnown.isNegative() || RHSKnown.isNegative()) { - APInt MinLHS = LHSKnown.One; - MinLHS.clearSignBit(); - APInt MinRHS = RHSKnown.One; - MinRHS.clearSignBit(); - APInt Result = std::move(MinLHS) + std::move(MinRHS); - return Result.isSignBitSet(); - } - - // If we reached here it means that we know nothing about the sign bits. - // In this case we can't know if there will be an overflow, since by - // changing the sign bits any two values can be made to overflow. - return false; -} - -/// Return true if we can prove that: -/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) -/// This basically requires proving that the add in the original type would not -/// overflow to change the sign bit or have a carry out. -bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, - Instruction &CxtI) { - // There are different heuristics we can use for this. Here are some simple - // ones. - - // If LHS and RHS each have at least two sign bits, the addition will look - // like - // - // XX..... + - // YY..... - // - // If the carry into the most significant position is 0, X and Y can't both - // be 1 and therefore the carry out of the addition is also 0. - // - // If the carry into the most significant position is 1, X and Y can't both - // be 0 and therefore the carry out of the addition is also 1. - // - // Since the carry into the most significant position is always equal to - // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && - ComputeNumSignBits(RHS, 0, &CxtI) > 1) - return true; - - unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - KnownBits LHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, 0, &CxtI); - - KnownBits RHSKnown(BitWidth); - computeKnownBits(RHS, RHSKnown, 0, &CxtI); - - // Check if carry bit of addition will not cause overflow. - if (checkRippleForAdd(LHSKnown, RHSKnown)) - return true; - - return false; -} - /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nsw LHS, RHS) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. /// TODO: Handle this for Vectors. -bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowSignedSub(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; - unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - KnownBits LHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, 0, &CxtI); + KnownBits LHSKnown = computeKnownBits(LHS, 0, &CxtI); - KnownBits RHSKnown(BitWidth); - computeKnownBits(RHS, RHSKnown, 0, &CxtI); + KnownBits RHSKnown = computeKnownBits(RHS, 0, &CxtI); // Subtraction of two 2's complement numbers having identical signs will // never overflow. - if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) || - (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1])) + if ((LHSKnown.isNegative() && RHSKnown.isNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())) return true; // TODO: implement logic similar to checkRippleForAdd @@ -965,16 +877,13 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nuw LHS, RHS) -bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // If the LHS is negative and the RHS is non-negative, no unsigned wrap. - bool LHSKnownNonNegative, LHSKnownNegative; - bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, - &CxtI); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, - &CxtI); - if (LHSKnownNegative && RHSKnownNonNegative) + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI); + if (LHSKnown.isNegative() && RHSKnown.isNonNegative()) return true; return false; @@ -986,7 +895,7 @@ bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, // ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C)) // XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even static Value *checkForNegativeOperand(BinaryOperator &I, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // This function creates 2 instructions to replace ADD, we need at least one @@ -1010,13 +919,13 @@ static Value *checkForNegativeOperand(BinaryOperator &I, // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) { - Value *NewAnd = Builder->CreateAnd(Z, *C1); - return Builder->CreateSub(RHS, NewAnd, "sub"); + Value *NewAnd = Builder.CreateAnd(Z, *C1); + return Builder.CreateSub(RHS, NewAnd, "sub"); } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) { // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1)) // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1)) - Value *NewOr = Builder->CreateOr(Z, ~(*C1)); - return Builder->CreateSub(RHS, NewOr, "sub"); + Value *NewOr = Builder.CreateOr(Z, ~(*C1)); + return Builder.CreateSub(RHS, NewOr, "sub"); } } } @@ -1035,8 +944,8 @@ static Value *checkForNegativeOperand(BinaryOperator &I, if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) if (C1->countTrailingZeros() == 0) if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { - Value *NewOr = Builder->CreateOr(Z, ~(*C2)); - return Builder->CreateSub(RHS, NewOr, "sub"); + Value *NewOr = Builder.CreateOr(Z, ~(*C2)); + return Builder.CreateSub(RHS, NewOr, "sub"); } return nullptr; } @@ -1079,14 +988,24 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add, return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty); } - // Shifts and add used to flip and mask off the low bit: - // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 - const APInt *C3; - if (*C == 1 && match(Op0, m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), - m_APInt(C3)))) && - C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) { - Value *NotX = Builder.CreateNot(X); - return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); + if (C->isOneValue() && Op0->hasOneUse()) { + // add (sext i1 X), 1 --> zext (not X) + // TODO: The smallest IR representation is (select X, 0, 1), and that would + // not require the one-use check. But we need to remove a transform in + // visitSelect and make sure that IR value tracking for select is equal or + // better than for these ops. + if (match(Op0, m_SExt(m_Value(X))) && + X->getType()->getScalarSizeInBits() == 1) + return new ZExtInst(Builder.CreateNot(X), Ty); + + // Shifts and add used to flip and mask off the low bit: + // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 + const APInt *C3; + if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) && + C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) { + Value *NotX = Builder.CreateNot(X); + return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); + } } return nullptr; @@ -1099,15 +1018,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), SQ)) + if (Value *V = + SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Instruction *X = foldAddWithConstant(I, *Builder)) + if (Instruction *X = foldAddWithConstant(I, Builder)) return X; // FIXME: This should be moved into the above helper function to allow these @@ -1140,16 +1060,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); - Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); + Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } // If this is a xor that was canonicalized from a sub, turn it back into // a sub and fuse this add with it. if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { - IntegerType *IT = cast(I.getType()); - KnownBits LHSKnown(IT->getBitWidth()); - computeKnownBits(XorLHS, LHSKnown, 0, &I); + KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I); if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); @@ -1166,7 +1084,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Instruction *NV = foldOpWithConstantIntoOperand(I)) return NV; - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateXor(LHS, RHS); // X + X --> X << 1 @@ -1183,7 +1101,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *LHSV = dyn_castNegVal(LHS)) { if (!isa(RHS)) if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } @@ -1230,7 +1148,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (AddRHSHighBits == AddRHSHighBitsAnd) { // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName()); return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -1270,10 +1188,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { + willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = - Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); + Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -1287,10 +1205,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (LHSConv->getOperand(0)->getType() == RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), + willNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } @@ -1306,11 +1224,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (ConstantExpr::getZExt(CI, I.getType()) == RHSC && - computeOverflowForUnsignedAdd(LHSConv->getOperand(0), CI, &I) == - OverflowResult::NeverOverflows) { + willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = - Builder->CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); + Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); return new ZExtInst(NewAdd, I.getType()); } } @@ -1324,11 +1241,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (LHSConv->getOperand(0)->getType() == RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - computeOverflowForUnsignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), - &I) == OverflowResult::NeverOverflows) { + willNotOverflowUnsignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNUWAdd( + Value *NewAdd = Builder.CreateNUWAdd( LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new ZExtInst(NewAdd, I.getType()); } @@ -1367,16 +1283,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - // TODO(jingyue): Consider WillNotOverflowSignedAdd and - // WillNotOverflowUnsignedAdd to reduce the number of invocations of + // TODO(jingyue): Consider willNotOverflowSignedAdd and + // willNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. - if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && - computeOverflowForUnsignedAdd(LHS, RHS, &I) == - OverflowResult::NeverOverflows) { + if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -1391,7 +1305,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (isa(RHS)) @@ -1445,10 +1360,9 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { + willNotOverflowSignedAdd(LHSIntVal, CI, I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - CI, "addconv"); + Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -1464,10 +1378,9 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // and if the integer add will not overflow. if (LHSIntVal->getType() == RHSIntVal->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { + willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - RHSIntVal, "addconv"); + Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -1565,14 +1478,14 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // pointer, subtract it from the offset we have. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder->CreateSub(Result, Offset); + Result = Builder.CreateSub(Result, Offset); } // If we have p - gep(p, ...) then we have to negate the result. if (Swapped) - Result = Builder->CreateNeg(Result, "diff.neg"); + Result = Builder.CreateNeg(Result, "diff.neg"); - return Builder->CreateIntCast(Result, Ty, true); + return Builder.CreateIntCast(Result, Ty, true); } Instruction *InstCombiner::visitSub(BinaryOperator &I) { @@ -1581,8 +1494,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), SQ)) + if (Value *V = + SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // (A*B)-(A*C) -> A*(B-C) etc @@ -1606,7 +1520,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return Res; } - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateXor(Op0, Op1); // Replace (-1 - A) with (~A). @@ -1636,12 +1550,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Fold (sub 0, (zext bool to B)) --> (sext bool to B) if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X)))) - if (X->getType()->getScalarType()->isIntegerTy(1)) + if (X->getType()->isIntOrIntVectorTy(1)) return CastInst::CreateSExtOrBitCast(X, Op1->getType()); // Fold (sub 0, (sext bool to B)) --> (zext bool to B) if (C->isNullValue() && match(Op1, m_SExt(m_Value(X)))) - if (X->getType()->getScalarType()->isIntegerTy(1)) + if (X->getType()->isIntOrIntVectorTy(1)) return CastInst::CreateZExtOrBitCast(X, Op1->getType()); } @@ -1651,7 +1565,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) - if (*Op0C == 0) { + if (Op0C->isNullValue()) { Value *X; const APInt *ShAmt; if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) && @@ -1669,8 +1583,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. if (Op0C->isMask()) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(Op1, RHSKnown, 0, &I); + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); if ((*Op0C | RHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateXor(Op1, Op0); } @@ -1700,7 +1613,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // ((X | Y) - X) --> (~X & Y) if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1))))) return BinaryOperator::CreateAnd( - Y, Builder->CreateNot(Op1, Op1->getName() + ".not")); + Y, Builder.CreateNot(Op1, Op1->getName() + ".not")); } if (Op1->hasOneUse()) { @@ -1710,13 +1623,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // (X - (Y - Z)) --> (X + (Z - Y)). if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) return BinaryOperator::CreateAdd(Op0, - Builder->CreateSub(Z, Y, Op1->getName())); + Builder.CreateSub(Z, Y, Op1->getName())); // (X - (X & Y)) --> (X & ~Y) // if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0)))) return BinaryOperator::CreateAnd(Op0, - Builder->CreateNot(Y, Y->getName() + ".not")); + Builder.CreateNot(Y, Y->getName() + ".not")); // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow. if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) && @@ -1733,7 +1646,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // 'nuw' is dropped in favor of the canonical form. if (match(Op1, m_SExt(m_Value(Y))) && Y->getType()->getScalarSizeInBits() == 1) { - Value *Zext = Builder->CreateZExt(Y, I.getType()); + Value *Zext = Builder.CreateZExt(Y, I.getType()); BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext); Add->setHasNoSignedWrap(I.hasNoSignedWrap()); return Add; @@ -1744,13 +1657,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *A, *B; Constant *CI; if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B))))) - return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); + return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B)); // X - A*CI -> X + A*-CI // No need to handle commuted multiply because multiply handling will // ensure constant will be move to the right hand side. if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) { - Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); + Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI)); return BinaryOperator::CreateAdd(Op0, NewMul); } } @@ -1770,11 +1683,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return replaceInstUsesWith(I, Res); bool Changed = false; - if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) { + if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -1788,7 +1701,8 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // fsub nsz 0, X ==> fsub nsz -0.0, X @@ -1813,14 +1727,14 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { } if (FPTruncInst *FPTI = dyn_cast(Op1)) { if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) { - Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType()); + Value *NewTrunc = Builder.CreateFPTrunc(V, I.getType()); Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc); NewI->copyFastMathFlags(&I); return NewI; } } else if (FPExtInst *FPEI = dyn_cast(Op1)) { if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) { - Value *NewExt = Builder->CreateFPExt(V, I.getType()); + Value *NewExt = Builder.CreateFPExt(V, I.getType()); Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt); NewI->copyFastMathFlags(&I); return NewI; diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 629f9be8a0aba..fdc9c373b95e6 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -54,17 +54,17 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC) { /// instruction. The sign is passed in to determine which kind of predicate to /// use in the new icmp instruction. static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate NewPred; if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred)) return NewConstant; - return Builder->CreateICmp(NewPred, LHS, RHS); + return Builder.CreateICmp(NewPred, LHS, RHS); } /// This is the complement of getFCmpCode, which turns an opcode and two /// operands into either a FCmp instruction, or a true/false constant. static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { const auto Pred = static_cast(Code); assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE && "Unexpected FCmp predicate!"); @@ -72,53 +72,45 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); if (Pred == FCmpInst::FCMP_TRUE) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); - return Builder->CreateFCmp(Pred, LHS, RHS); + return Builder.CreateFCmp(Pred, LHS, RHS); } -/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B)) +/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or +/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B)) /// \param I Binary operator to transform. /// \return Pointer to node that must replace the original binary operator, or /// null pointer if no transformation was made. -Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) { - IntegerType *ITy = dyn_cast(I.getType()); - - // Can't do vectors. - if (I.getType()->isVectorTy()) - return nullptr; - - // Can only do bitwise ops. - if (!I.isBitwiseLogicOp()) - return nullptr; +static Value *SimplifyBSwap(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying"); Value *OldLHS = I.getOperand(0); Value *OldRHS = I.getOperand(1); - ConstantInt *ConstLHS = dyn_cast(OldLHS); - ConstantInt *ConstRHS = dyn_cast(OldRHS); - IntrinsicInst *IntrLHS = dyn_cast(OldLHS); - IntrinsicInst *IntrRHS = dyn_cast(OldRHS); - bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap); - bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap); - - if (!IsBswapLHS && !IsBswapRHS) - return nullptr; - - if (!IsBswapLHS && !ConstLHS) - return nullptr; - if (!IsBswapRHS && !ConstRHS) + Value *NewLHS; + if (!match(OldLHS, m_BSwap(m_Value(NewLHS)))) return nullptr; - /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) - /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) - Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) : - Builder->getInt(ConstLHS->getValue().byteSwap()); + Value *NewRHS; + const APInt *C; - Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) : - Builder->getInt(ConstRHS->getValue().byteSwap()); + if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) { + // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) + if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse()) + return nullptr; + // NewRHS initialized by the matcher. + } else if (match(OldRHS, m_APInt(C))) { + // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) + if (!OldLHS->hasOneUse()) + return nullptr; + NewRHS = ConstantInt::get(I.getType(), C->byteSwap()); + } else + return nullptr; - Value *BinOp = Builder->CreateBinOp(I.getOpcode(), NewLHS, NewRHS); - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy); - return Builder->CreateCall(F, BinOp); + Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS); + Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, + I.getType()); + return Builder.CreateCall(F, BinOp); } /// This handles expressions of the form ((val OP C1) & C2). Where @@ -137,7 +129,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, case Instruction::Xor: if (Op->hasOneUse()) { // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Value *And = Builder->CreateAnd(X, AndRHS); + Value *And = Builder.CreateAnd(X, AndRHS); And->takeName(Op); return BinaryOperator::CreateXor(And, Together); } @@ -150,7 +142,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, // NOTE: This reduces the number of bits set in the & mask, which // can expose opportunities for store narrowing. Together = ConstantExpr::getXor(AndRHS, Together); - Value *And = Builder->CreateAnd(X, Together); + Value *And = Builder.CreateAnd(X, Together); And->takeName(Op); return BinaryOperator::CreateOr(And, OpRHS); } @@ -172,17 +164,17 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, const APInt& AddRHS = OpRHS->getValue(); // Check to see if any bits below the one bit set in AndRHSV are set. - if ((AddRHS & (AndRHSV-1)) == 0) { + if ((AddRHS & (AndRHSV - 1)).isNullValue()) { // If not, the only thing that can effect the output of the AND is // the bit specified by AndRHSV. If that bit is set, the effect of // the XOR is to toggle the bit. If it is clear, then the ADD has // no effect. - if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + if ((AddRHS & AndRHSV).isNullValue()) { // Bit is not set, noop TheAnd.setOperand(0, X); return &TheAnd; } else { // Pull the XOR out of the AND. - Value *NewAnd = Builder->CreateAnd(X, AndRHS); + Value *NewAnd = Builder.CreateAnd(X, AndRHS); NewAnd->takeName(Op); return BinaryOperator::CreateXor(NewAnd, AndRHS); } @@ -198,7 +190,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask); + ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShlMask); if (CI->getValue() == ShlMask) // Masking out bits that the shift already masks. @@ -218,7 +210,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask); + ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShrMask); if (CI->getValue() == ShrMask) // Masking out bits that the shift already masks. @@ -238,12 +230,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask); + Constant *C = Builder.getInt(AndRHS->getValue() & ShrMask); if (C == AndRHS) { // Masking out bits shifted in. // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + ShVal = Builder.CreateLShr(ShVal, OpRHS, Op->getName()); return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } } @@ -269,15 +261,15 @@ Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi, ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) { Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred; - return Builder->CreateICmp(Pred, V, ConstantInt::get(Ty, Hi)); + return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi)); } // V >= Lo && V < Hi --> V - Lo u< Hi - Lo // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo Value *VMinusLo = - Builder->CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off"); + Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off"); Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo); - return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo); + return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo); } /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns @@ -523,7 +515,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - llvm::InstCombiner::BuilderTy *Builder) { + llvm::InstCombiner::BuilderTy &Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); unsigned Mask = @@ -556,27 +548,27 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & Mask_AllZeros) { // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) // -> (icmp eq (A & (B|D)), 0) - Value *NewOr = Builder->CreateOr(B, D); - Value *NewAnd = Builder->CreateAnd(A, NewOr); + Value *NewOr = Builder.CreateOr(B, D); + Value *NewAnd = Builder.CreateAnd(A, NewOr); // We can't use C as zero because we might actually handle // (icmp ne (A & B), B) & (icmp ne (A & D), D) // with B and D, having a single bit set. Value *Zero = Constant::getNullValue(A->getType()); - return Builder->CreateICmp(NewCC, NewAnd, Zero); + return Builder.CreateICmp(NewCC, NewAnd, Zero); } if (Mask & BMask_AllOnes) { // (icmp eq (A & B), B) & (icmp eq (A & D), D) // -> (icmp eq (A & (B|D)), (B|D)) - Value *NewOr = Builder->CreateOr(B, D); - Value *NewAnd = Builder->CreateAnd(A, NewOr); - return Builder->CreateICmp(NewCC, NewAnd, NewOr); + Value *NewOr = Builder.CreateOr(B, D); + Value *NewAnd = Builder.CreateAnd(A, NewOr); + return Builder.CreateICmp(NewCC, NewAnd, NewOr); } if (Mask & AMask_AllOnes) { // (icmp eq (A & B), A) & (icmp eq (A & D), A) // -> (icmp eq (A & (B&D)), A) - Value *NewAnd1 = Builder->CreateAnd(B, D); - Value *NewAnd2 = Builder->CreateAnd(A, NewAnd1); - return Builder->CreateICmp(NewCC, NewAnd2, A); + Value *NewAnd1 = Builder.CreateAnd(B, D); + Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1); + return Builder.CreateICmp(NewCC, NewAnd2, A); } // Remaining cases assume at least that B and D are constant, and depend on @@ -641,13 +633,13 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // If there is a conflict, we should actually return a false for the // whole construct. if (((BCst->getValue() & DCst->getValue()) & - (CCst->getValue() ^ ECst->getValue())) != 0) + (CCst->getValue() ^ ECst->getValue())).getBoolValue()) return ConstantInt::get(LHS->getType(), !IsAnd); - Value *NewOr1 = Builder->CreateOr(B, D); + Value *NewOr1 = Builder.CreateOr(B, D); Value *NewOr2 = ConstantExpr::getOr(CCst, ECst); - Value *NewAnd = Builder->CreateAnd(A, NewOr1); - return Builder->CreateICmp(NewCC, NewAnd, NewOr2); + Value *NewAnd = Builder.CreateAnd(A, NewOr1); + return Builder.CreateICmp(NewCC, NewAnd, NewOr2); } return nullptr; @@ -698,21 +690,20 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, } // This simplification is only valid if the upper range is not negative. - bool IsNegative, IsNotNegative; - ComputeSignBit(RangeEnd, IsNotNegative, IsNegative, /*Depth=*/0, Cmp1); - if (!IsNotNegative) + KnownBits Known = computeKnownBits(RangeEnd, /*Depth=*/0, Cmp1); + if (!Known.isNonNegative()) return nullptr; if (Inverted) NewPred = ICmpInst::getInversePredicate(NewPred); - return Builder->CreateICmp(NewPred, Input, RangeEnd); + return Builder.CreateICmp(NewPred, Input, RangeEnd); } static Value * foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, bool JoinedByAnd, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Value *X = LHS->getOperand(0); if (X != RHS->getOperand(0)) return nullptr; @@ -743,29 +734,75 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2 // We choose an 'or' with a Pow2 constant rather than the inverse mask with // 'and' because that may lead to smaller codegen from a smaller constant. - Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor)); - return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); + Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor)); + return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); } // Special case: get the ordering right when the values wrap around zero. // Ie, we assumed the constants were unsigned when swapping earlier. - if (*C1 == 0 && C2->isAllOnesValue()) + if (C1->isNullValue() && C2->isAllOnesValue()) std::swap(C1, C2); if (*C1 == *C2 - 1) { // (X == 13 || X == 14) --> X - 13 <=u 1 // (X != 13 && X != 14) --> X - 13 >u 1 // An 'add' is the canonical IR form, so favor that over a 'sub'. - Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1))); + Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1))); auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE; - return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1)); + return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1)); + } + + return nullptr; +} + +// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) +// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) +Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, + bool JoinedByAnd, + Instruction &CxtI) { + ICmpInst::Predicate Pred = LHS->getPredicate(); + if (Pred != RHS->getPredicate()) + return nullptr; + if (JoinedByAnd && Pred != ICmpInst::ICMP_NE) + return nullptr; + if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ) + return nullptr; + + // TODO support vector splats + ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); + ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); + if (!LHSC || !RHSC || !LHSC->isZero() || !RHSC->isZero()) + return nullptr; + + Value *A, *B, *C, *D; + if (match(LHS->getOperand(0), m_And(m_Value(A), m_Value(B))) && + match(RHS->getOperand(0), m_And(m_Value(C), m_Value(D)))) { + if (A == D || B == D) + std::swap(C, D); + if (B == C) + std::swap(A, B); + + if (A == C && + isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) && + isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) { + Value *Mask = Builder.CreateOr(B, D); + Value *Masked = Builder.CreateAnd(A, Mask); + auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; + return Builder.CreateICmp(NewPred, Masked, Mask); + } } return nullptr; } /// Fold (icmp)&(icmp) if possible. -Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, + Instruction &CxtI) { + // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) + // if K1 and K2 are a one-bit mask. + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI)) + return V; + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) @@ -810,8 +847,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) || (PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) { - Value *NewOr = Builder->CreateOr(LHS0, RHS0); - return Builder->CreateICmp(PredL, NewOr, LHSC); + Value *NewOr = Builder.CreateOr(LHS0, RHS0); + return Builder.CreateICmp(PredL, NewOr, LHSC); } } @@ -841,11 +878,12 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // Check that the low bits are zero. APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); - if ((Low & AndC->getValue()) == 0 && (Low & BigC->getValue()) == 0) { - Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue()); + if ((Low & AndC->getValue()).isNullValue() && + (Low & BigC->getValue()).isNullValue()) { + Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue()); APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue(); Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N); - return Builder->CreateICmp(PredL, NewAnd, NewVal); + return Builder.CreateICmp(PredL, NewAnd, NewVal); } } } @@ -897,14 +935,14 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13 - return Builder->CreateICmpULT(LHS0, LHSC); - if (LHSC->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13 + return Builder.CreateICmpULT(LHS0, LHSC); + if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), false, true); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 - return Builder->CreateICmpSLT(LHS0, LHSC); + return Builder.CreateICmpSLT(LHS0, LHSC); break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_NE: // Potential folds for this case should already be handled. @@ -917,7 +955,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 - return Builder->CreateICmp(PredL, LHS0, RHSC); + return Builder.CreateICmp(PredL, LHS0, RHSC); break; // (X u> 13 & X != 15) -> no change case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) getValue() + 1, RHSC->getValue(), @@ -930,7 +968,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 - return Builder->CreateICmp(PredL, LHS0, RHSC); + return Builder.CreateICmp(PredL, LHS0, RHSC); break; // (X s> 13 & X != 15) -> no change case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true, @@ -944,7 +982,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { /// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns /// a Value which should already be inserted into the function. -Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { +Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); @@ -979,15 +1017,15 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { // If either of the constants are nans, then the whole thing returns // false. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return Builder->getFalse(); - return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.getFalse(); + return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp ord x,x" is "fcmp ord x, 0". if (isa(LHS->getOperand(1)) && isa(RHS->getOperand(1))) - return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); return nullptr; } @@ -1042,7 +1080,7 @@ bool InstCombiner::shouldOptimizeCast(CastInst *CI) { /// Fold {and,or,xor} (cast X), C. static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Constant *C; if (!match(Logic.getOperand(1), m_Constant(C))) return nullptr; @@ -1051,26 +1089,17 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, Type *DestTy = Logic.getType(); Type *SrcTy = Cast->getSrcTy(); - // If the first operand is bitcast, move the logic operation ahead of the - // bitcast (do the logic operation in the original type). This can eliminate - // bitcasts and allow combines that would otherwise be impeded by the bitcast. + // Move the logic operation ahead of a zext if the constant is unchanged in + // the smaller source type. Performing the logic in a smaller type may provide + // more information to later folds, and the smaller logic instruction may be + // cheaper (particularly in the case of vectors). Value *X; - if (match(Cast, m_BitCast(m_Value(X)))) { - Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy); - Value *NewOp = Builder->CreateBinOp(LogicOpc, X, NewConstant); - return CastInst::CreateBitOrPointerCast(NewOp, DestTy); - } - - // Similarly, move the logic operation ahead of a zext if the constant is - // unchanged in the smaller source type. Performing the logic in a smaller - // type may provide more information to later folds, and the smaller logic - // instruction may be cheaper (particularly in the case of vectors). if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) { Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy); Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy); if (ZextTruncC == C) { // LogicOpc (zext X), C --> zext (LogicOpc X, C) - Value *NewOp = Builder->CreateBinOp(LogicOpc, X, TruncC); + Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC); return new ZExtInst(NewOp, DestTy); } } @@ -1113,7 +1142,7 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { - Value *NewOp = Builder->CreateBinOp(LogicOpc, Cast0Src, Cast1Src, + Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src, I.getName()); return CastInst::Create(CastOpcode, NewOp, DestTy); } @@ -1127,8 +1156,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { ICmpInst *ICmp0 = dyn_cast(Cast0Src); ICmpInst *ICmp1 = dyn_cast(Cast1Src); if (ICmp0 && ICmp1) { - Value *Res = LogicOpc == Instruction::And ? FoldAndOfICmps(ICmp0, ICmp1) - : FoldOrOfICmps(ICmp0, ICmp1, &I); + Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1, I) + : foldOrOfICmps(ICmp0, ICmp1, I); if (Res) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; @@ -1139,8 +1168,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { FCmpInst *FCmp0 = dyn_cast(Cast0Src); FCmpInst *FCmp1 = dyn_cast(Cast1Src); if (FCmp0 && FCmp1) { - Value *Res = LogicOpc == Instruction::And ? FoldAndOfFCmps(FCmp0, FCmp1) - : FoldOrOfFCmps(FCmp0, FCmp1); + Value *Res = LogicOpc == Instruction::And ? foldAndOfFCmps(FCmp0, FCmp1) + : foldOrOfFCmps(FCmp0, FCmp1); if (Res) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; @@ -1159,15 +1188,14 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) { // Fold (and (sext bool to A), B) --> (select bool, B, 0) Value *X = nullptr; - if (match(Op0, m_SExt(m_Value(X))) && - X->getType()->getScalarType()->isIntegerTy(1)) { + if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { Value *Zero = Constant::getNullValue(Op1->getType()); return SelectInst::Create(X, Op1, Zero); } // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) if (match(Op0, m_Not(m_SExt(m_Value(X)))) && - X->getType()->getScalarType()->isIntegerTy(1)) { + X->getType()->isIntOrIntVectorTy(1)) { Value *Zero = Constant::getNullValue(Op0->getType()); return SelectInst::Create(X, Zero, Op1); } @@ -1193,9 +1221,10 @@ static Instruction *foldAndToXor(BinaryOperator &I, // (A | ~B) & (B | ~A) --> ~(A ^ B) // (~B | A) & (~A | B) --> ~(A ^ B) // (~B | A) & (B | ~A) --> ~(A ^ B) - if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + if (Op0->hasOneUse() || Op1->hasOneUse()) + if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); return nullptr; } @@ -1210,9 +1239,10 @@ static Instruction *foldOrToXor(BinaryOperator &I, // Operand complexity canonicalization guarantees that the 'and' is Op0. // (A & B) | ~(A | B) --> ~(A ^ B) // (A & B) | ~(B | A) --> ~(A ^ B) - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) - return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + if (Op0->hasOneUse() || Op1->hasOneUse()) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); // (A & ~B) | (~A & B) --> A ^ B // (A & ~B) | (B & ~A) --> A ^ B @@ -1235,7 +1265,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAndInst(Op0, Op1, SQ)) + if (Value *V = SimplifyAndInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -1244,16 +1274,26 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return &I; // Do this before using distributive laws to catch simple and/or/not patterns. - if (Instruction *Xor = foldAndToXor(I, *Builder)) + if (Instruction *Xor = foldAndToXor(I, Builder)) return Xor; // (A|B)&(A|C) -> A|(B&C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); + if (match(Op1, m_One())) { + // (1 << x) & 1 --> zext(x == 0) + // (1 >> x) & 1 --> zext(x == 0) + Value *X; + if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X))))) { + Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(I.getType(), 0)); + return new ZExtInst(IsZero, I.getType()); + } + } + if (ConstantInt *AndRHS = dyn_cast(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); @@ -1271,37 +1311,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { APInt NotAndRHS(~AndRHSMask); if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) { // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); + Value *NewRHS = Builder.CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); } if (!isa(Op0RHS) && MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) { // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); + Value *NewLHS = Builder.CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); } break; } - case Instruction::Sub: - // -x & 1 -> x & 1 - if (AndRHSMask == 1 && match(Op0LHS, m_Zero())) - return BinaryOperator::CreateAnd(Op0RHS, AndRHS); - - break; - - case Instruction::Shl: - case Instruction::LShr: - // (1 << x) & 1 --> zext(x == 0) - // (1 >> x) & 1 --> zext(x == 0) - if (AndRHSMask == 1 && Op0LHS == AndRHS) { - Value *NewICmp = - Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); - return new ZExtInst(NewICmp, I.getType()); - } - break; } // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth @@ -1321,11 +1344,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType()); Value *BinOp; if (isa(Op0LHS)) - BinOp = Builder->CreateBinOp(Op0I->getOpcode(), X, TruncC1); + BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1); else - BinOp = Builder->CreateBinOp(Op0I->getOpcode(), TruncC1, X); + BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X); auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType()); - auto *And = Builder->CreateAnd(BinOp, TruncC2); + auto *And = Builder.CreateAnd(BinOp, TruncC2); return new ZExtInst(And, I.getType()); } } @@ -1345,7 +1368,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // into : and (trunc X to T), trunc(YC) & C2 // This will fold the two constants together, which may allow // other simplifications. - Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); + Value *NewCast = Builder.CreateTrunc(X, I.getType(), "and.shrunk"); Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); C3 = ConstantExpr::getAnd(C3, AndRHS); return BinaryOperator::CreateAnd(NewCast, C3); @@ -1357,7 +1380,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) return FoldedLogic; - if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) return DeMorgan; { @@ -1383,27 +1406,21 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // an endless loop. By checking that A is non-constant we ensure that // we will never get to the loop. if (A == tmpOp0 && !isa(A)) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); + return BinaryOperator::CreateAnd(A, Builder.CreateNot(B)); } } - // (A&((~A)|B)) -> A&B - if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A)))) - return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A)))) - return BinaryOperator::CreateAnd(A, Op0); - // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) - if (Op1->hasOneUse() || cast(Op1)->hasOneUse()) - return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C)); + if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) + return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C)); // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) - if (Op0->hasOneUse() || cast(Op0)->hasOneUse()) - return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); + if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) + return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) // (A | B) & (B ^ (~A)) -> (A & B) @@ -1426,7 +1443,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ICmpInst *LHS = dyn_cast(Op0); ICmpInst *RHS = dyn_cast(Op1); if (LHS && RHS) - if (Value *Res = FoldAndOfICmps(LHS, RHS)) + if (Value *Res = foldAndOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -1434,26 +1451,26 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldAndOfICmps(LHS, Cmp)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); + if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) + return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldAndOfICmps(LHS, Cmp)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); + if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) + return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); } if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldAndOfICmps(Cmp, RHS)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); + if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) + return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldAndOfICmps(Cmp, RHS)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); + if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) + return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); } } // If and'ing two fcmp, try combine them into one. if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + if (Value *Res = foldAndOfFCmps(LHS, RHS)) return replaceInstUsesWith(I, Res); if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) @@ -1528,16 +1545,19 @@ static Value *getSelectCondition(Value *A, Value *B, InstCombiner::BuilderTy &Builder) { // If these are scalars or vectors of i1, A can be used directly. Type *Ty = A->getType(); - if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1)) + if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1)) return A; // If A and B are sign-extended, look through the sexts to find the booleans. Value *Cond; + Value *NotB; if (match(A, m_SExt(m_Value(Cond))) && - Cond->getType()->getScalarType()->isIntegerTy(1) && - match(B, m_CombineOr(m_Not(m_SExt(m_Specific(Cond))), - m_SExt(m_Not(m_Specific(Cond)))))) - return Cond; + Cond->getType()->isIntOrIntVectorTy(1) && + match(B, m_OneUse(m_Not(m_Value(NotB))))) { + NotB = peekThroughBitcast(NotB, true); + if (match(NotB, m_SExt(m_Specific(Cond)))) + return Cond; + } // All scalar (and most vector) possibilities should be handled now. // Try more matches that only apply to non-splat constant vectors. @@ -1554,7 +1574,7 @@ static Value *getSelectCondition(Value *A, Value *B, // operand, see if the constants are inverse bitmasks. if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) && match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) && - Cond->getType()->getScalarType()->isIntegerTy(1) && + Cond->getType()->isIntOrIntVectorTy(1) && areInverseVectorBitmasks(AC, BC)) { AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty)); return Builder.CreateXor(Cond, AC); @@ -1569,12 +1589,8 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D, // The potential condition of the select may be bitcasted. In that case, look // through its bitcast and the corresponding bitcast of the 'not' condition. Type *OrigType = A->getType(); - Value *SrcA, *SrcB; - if (match(A, m_OneUse(m_BitCast(m_Value(SrcA)))) && - match(B, m_OneUse(m_BitCast(m_Value(SrcB))))) { - A = SrcA; - B = SrcB; - } + A = peekThroughBitcast(A, true); + B = peekThroughBitcast(B, true); if (Value *Cond = getSelectCondition(A, B, Builder)) { // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D)) @@ -1590,46 +1606,17 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D, } /// Fold (icmp)|(icmp) if possible. -Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, - Instruction *CxtI) { - ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - +Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, + Instruction &CxtI) { // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // if K1 and K2 are a one-bit mask. - ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); - ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI)) + return V; - if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero() && - RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) { - - BinaryOperator *LAnd = dyn_cast(LHS->getOperand(0)); - BinaryOperator *RAnd = dyn_cast(RHS->getOperand(0)); - if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() && - LAnd->getOpcode() == Instruction::And && - RAnd->getOpcode() == Instruction::And) { - - Value *Mask = nullptr; - Value *Masked = nullptr; - if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT)) { - Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); - Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); - } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT)) { - Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); - Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); - } + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - if (Masked) - return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask); - } - } + ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); + ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) @@ -1678,9 +1665,9 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, RangeDiff.ugt(LHSC->getValue())) { Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC); - Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskC); - Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddC); - return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSC)); + Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC); + Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC); + return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC); } } } @@ -1727,9 +1714,9 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, A = LHS->getOperand(1); } if (A && B) - return Builder->CreateICmp( + return Builder.CreateICmp( ICmpInst::ICMP_UGE, - Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); + Builder.CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); } // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n @@ -1750,8 +1737,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (LHSC == RHSC && PredL == PredR) { // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) { - Value *NewOr = Builder->CreateOr(LHS0, RHS0); - return Builder->CreateICmp(PredL, NewOr, LHSC); + Value *NewOr = Builder.CreateOr(LHS0, RHS0); + return Builder.CreateICmp(PredL, NewOr, LHSC); } } @@ -1761,7 +1748,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, ConstantInt *AddC; if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC)))) if (RHSC->getValue() + AddC->getValue() == LHSC->getValue()) - return Builder->CreateICmpULE(LHS0, LHSC); + return Builder.CreateICmpULE(LHS0, LHSC); } // From here on, we only handle: @@ -1847,7 +1834,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, /// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns /// a Value which should already be inserted into the function. -Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { +Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); @@ -1877,18 +1864,18 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { // If either of the constants are nans, then the whole thing returns // true. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return Builder->getTrue(); + return Builder.getTrue(); // Otherwise, no need to compare the two constants, compare the // rest. - return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp uno x,x" is "fcmp uno x, 0". if (isa(LHS->getOperand(1)) && isa(RHS->getOperand(1))) - return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); return nullptr; } @@ -1905,8 +1892,9 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { /// (A & C1) | B /// /// when the XOR of the two constants is "all ones" (-1). -Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C) { +static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C, + InstCombiner::BuilderTy &Builder) { ConstantInt *CI1 = dyn_cast(C); if (!CI1) return nullptr; @@ -1918,7 +1906,7 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, if (!Xor.isAllOnesValue()) return nullptr; if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); + Value *NewOp = Builder.CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } @@ -1927,15 +1915,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, /// \brief This helper function folds: /// -/// ((A | B) & C1) ^ (B & C2) +/// ((A ^ B) & C1) | (B & C2) /// /// into: /// /// (A & C1) ^ B /// /// when the XOR of the two constants is "all ones" (-1). -Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C) { +static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C, + InstCombiner::BuilderTy &Builder) { ConstantInt *CI1 = dyn_cast(C); if (!CI1) return nullptr; @@ -1950,7 +1939,7 @@ Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op, return nullptr; if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1); + Value *NewOp = Builder.CreateAnd(V1 == A ? B : A, CI1); return BinaryOperator::CreateXor(NewOp, V1); } @@ -1967,7 +1956,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyOrInst(Op0, Op1, SQ)) + if (Value *V = SimplifyOrInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -1976,14 +1965,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return &I; // Do this before using distributive laws to catch simple and/or/not patterns. - if (Instruction *Xor = foldOrToXor(I, *Builder)) + if (Instruction *Xor = foldOrToXor(I, Builder)) return Xor; // (A&B)|(A&C) -> A&(B|C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); if (isa(Op1)) @@ -2000,7 +1989,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) && MaskedValueIsZero(Op1, *C, 0, &I)) { - Value *NOr = Builder->CreateOr(A, Op1); + Value *NOr = Builder.CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, ConstantInt::get(NOr->getType(), *C)); @@ -2009,7 +1998,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // Y|(X^C) -> (X|Y)^C iff Y&C == 0 if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) && MaskedValueIsZero(Op0, *C, 0, &I)) { - Value *NOr = Builder->CreateOr(A, Op0); + Value *NOr = Builder.CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, ConstantInt::get(NOr->getType(), *C)); @@ -2018,18 +2007,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *A, *B; - // ((~A & B) | A) -> (A | B) - if (match(Op0, m_c_And(m_Not(m_Specific(Op1)), m_Value(A)))) - return BinaryOperator::CreateOr(A, Op1); - if (match(Op1, m_c_And(m_Not(m_Specific(Op0)), m_Value(A)))) - return BinaryOperator::CreateOr(Op0, A); - - // ((A & B) | ~A) -> (~A | B) - // The NOT is guaranteed to be in the RHS by complexity ordering. - if (match(Op1, m_Not(m_Value(A))) && - match(Op0, m_c_And(m_Specific(A), m_Value(B)))) - return BinaryOperator::CreateOr(Op1, B); - // (A & C)|(B & D) Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && @@ -2038,7 +2015,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ConstantInt *C1 = dyn_cast(C); ConstantInt *C2 = dyn_cast(D); if (C1 && C2) { // (A & C1)|(B & C2) - if ((C1->getValue() & C2->getValue()) == 0) { + if ((C1->getValue() & C2->getValue()).isNullValue()) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && @@ -2047,7 +2024,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (V2 == B && MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(A, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); // Or commutes, try both ways. if (match(B, m_Or(m_Value(V1), m_Value(V2))) && ((V1 == A && @@ -2055,18 +2032,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (V2 == A && MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(B, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. ConstantInt *C3 = nullptr, *C4 = nullptr; if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && - (C3->getValue() & ~C1->getValue()) == 0 && + (C3->getValue() & ~C1->getValue()).isNullValue() && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && - (C4->getValue() & ~C2->getValue()) == 0) { - V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); + (C4->getValue() & ~C2->getValue()).isNullValue()) { + V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); return BinaryOperator::CreateAnd(V2, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); } } } @@ -2076,67 +2053,61 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // 'or' that it is replacing. if (Op0->hasOneUse() || Op1->hasOneUse()) { // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants. - if (Value *V = matchSelectFromAndOr(A, C, B, D, *Builder)) + if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(A, C, D, B, *Builder)) + if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(C, A, B, D, *Builder)) + if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(C, A, D, B, *Builder)) + if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(B, D, A, C, *Builder)) + if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(B, D, C, A, *Builder)) + if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(D, B, A, C, *Builder)) + if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(D, B, C, A, *Builder)) + if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder)) return replaceInstUsesWith(I, V); } // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(A, m_Or(m_Value(V1), m_Specific(B))) || - match(A, m_Or(m_Specific(B), m_Value(V1)))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C); - if (Ret) return Ret; + if (match(A, m_c_Or(m_Value(V1), m_Specific(B)))) { + if (Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C, Builder)) + return Ret; } // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(B, m_Or(m_Specific(A), m_Value(V1))) || - match(B, m_Or(m_Value(V1), m_Specific(A)))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D); - if (Ret) return Ret; + if (match(B, m_c_Or(m_Specific(A), m_Value(V1)))) { + if (Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D, Builder)) + return Ret; } // ((A^B)&1)|(B&-2) -> (A&1) ^ B - if (match(A, m_Xor(m_Value(V1), m_Specific(B))) || - match(A, m_Xor(m_Specific(B), m_Value(V1)))) { - Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C); - if (Ret) return Ret; + if (match(A, m_c_Xor(m_Value(V1), m_Specific(B)))) { + if (Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C, Builder)) + return Ret; } // (B&-2)|((A^B)&1) -> (A&1) ^ B - if (match(B, m_Xor(m_Specific(A), m_Value(V1))) || - match(B, m_Xor(m_Value(V1), m_Specific(A)))) { - Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D); - if (Ret) return Ret; + if (match(B, m_c_Xor(m_Specific(A), m_Value(V1)))) { + if (Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D, Builder)) + return Ret; } } // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) - if (Op1->hasOneUse() || cast(Op1)->hasOneUse()) - return BinaryOperator::CreateOr(Op0, C); + return BinaryOperator::CreateOr(Op0, C); // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) - if (Op0->hasOneUse() || cast(Op0)->hasOneUse()) - return BinaryOperator::CreateOr(Op1, C); + return BinaryOperator::CreateOr(Op1, C); // ((B | C) & A) | B -> B | (A & C) if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) - return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C)); + return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C)); - if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) return DeMorgan; // Canonicalize xor to the RHS. @@ -2158,11 +2129,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { - Value *Not = Builder->CreateNot(B, B->getName()+".not"); + Value *Not = Builder.CreateNot(B, B->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { - Value *Not = Builder->CreateNot(A, A->getName()+".not"); + Value *Not = Builder.CreateNot(A, A->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } } @@ -2176,21 +2147,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { B->getOpcode() == Instruction::Xor)) { Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : B->getOperand(0); - Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + Value *Not = Builder.CreateNot(NotOp, NotOp->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } - // (A & B) | (~A ^ B) -> (~A ^ B) - // (A & B) | (B ^ ~A) -> (~A ^ B) - // (B & A) | (~A ^ B) -> (~A ^ B) - // (B & A) | (B ^ ~A) -> (~A ^ B) - // The match order is important: match the xor first because the 'not' - // operation defines 'A'. We do not need to match the xor as Op0 because the - // xor was canonicalized to Op1 above. - if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && - match(Op0, m_c_And(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(Builder->CreateNot(A), B); - if (SwappedForXor) std::swap(Op0, Op1); @@ -2198,7 +2158,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ICmpInst *LHS = dyn_cast(Op0); ICmpInst *RHS = dyn_cast(Op1); if (LHS && RHS) - if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) + if (Value *Res = foldOrOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -2206,26 +2166,26 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) + return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); + if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) + return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); } if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) + return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); + if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) + return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); } } // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + if (Value *Res = foldOrOfFCmps(LHS, RHS)) return replaceInstUsesWith(I, Res); if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) @@ -2233,10 +2193,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or . if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) && - A->getType()->getScalarType()->isIntegerTy(1)) + A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) && - A->getType()->getScalarType()->isIntegerTy(1)) + A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); // Note: If we've gotten to the point of visiting the outer OR, then the @@ -2247,7 +2207,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ConstantInt *C1; if (Op0->hasOneUse() && !isa(Op1) && match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { - Value *Inner = Builder->CreateOr(A, Op1); + Value *Inner = Builder.CreateOr(A, Op1); Inner->takeName(Op0); return BinaryOperator::CreateOr(Inner, C1); } @@ -2260,8 +2220,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Op0->hasOneUse() && Op1->hasOneUse() && match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { - Value *orTrue = Builder->CreateOr(A, C); - Value *orFalse = Builder->CreateOr(B, D); + Value *orTrue = Builder.CreateOr(A, C); + Value *orFalse = Builder.CreateOr(B, D); return SelectInst::Create(X, orTrue, orFalse); } } @@ -2271,7 +2231,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { /// A ^ B can be specified using other logic ops in a variety of patterns. We /// can fold these early and efficiently by morphing an existing instruction. -static Instruction *foldXorToXor(BinaryOperator &I) { +static Instruction *foldXorToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { assert(I.getOpcode() == Instruction::Xor); Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); @@ -2296,10 +2257,10 @@ static Instruction *foldXorToXor(BinaryOperator &I) { // (~B | A) ^ (~A | B) -> A ^ B // (~A | B) ^ (A | ~B) -> A ^ B // (B | ~A) ^ (A | ~B) -> A ^ B - if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) || - (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) { + if ((match(Op0, m_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Not(m_Specific(B)))))) { I.setOperand(0, A); I.setOperand(1, B); return &I; @@ -2309,15 +2270,72 @@ static Instruction *foldXorToXor(BinaryOperator &I) { // (~B & A) ^ (~A & B) -> A ^ B // (~A & B) ^ (A & ~B) -> A ^ B // (B & ~A) ^ (A & ~B) -> A ^ B - if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) || - (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) { + if ((match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))))) { I.setOperand(0, A); I.setOperand(1, B); return &I; } + // For the remaining cases we need to get rid of one of the operands. + if (!Op0->hasOneUse() && !Op1->hasOneUse()) + return nullptr; + + // (A | B) ^ ~(A & B) -> ~(A ^ B) + // (A | B) ^ ~(B & A) -> ~(A ^ B) + // (A & B) ^ ~(A | B) -> ~(A ^ B) + // (A & B) ^ ~(B | A) -> ~(A ^ B) + // Complexity sorting ensures the not will be on the right side. + if ((match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) || + (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + return nullptr; +} + +Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { + if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); + } + } + + // Instead of trying to imitate the folds for and/or, decompose this 'xor' + // into those logic ops. That is, try to turn this into an and-of-icmps + // because we have many folds for that pattern. + // + // This is based on a truth table definition of xor: + // X ^ Y --> (X | Y) & !(X & Y) + if (Value *OrICmp = SimplifyBinOp(Instruction::Or, LHS, RHS, SQ)) { + // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y). + // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). + if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) { + // TODO: Independently handle cases where the 'and' side is a constant. + if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) { + // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS + RHS->setPredicate(RHS->getInversePredicate()); + return Builder.CreateAnd(LHS, RHS); + } + if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) { + // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS + LHS->setPredicate(LHS->getInversePredicate()); + return Builder.CreateAnd(LHS, RHS); + } + } + } + return nullptr; } @@ -2331,10 +2349,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyXorInst(Op0, Op1, SQ)) + if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - if (Instruction *NewXor = foldXorToXor(I)) + if (Instruction *NewXor = foldXorToXor(I, Builder)) return NewXor; // (A&B)^(A&C) -> A&(B^C) etc @@ -2346,7 +2364,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. @@ -2357,13 +2375,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // ~(~X & Y) --> (X | ~Y) // ~(Y & ~X) --> (X | ~Y) if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) { - Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); return BinaryOperator::CreateOr(X, NotY); } // ~(~X | Y) --> (X & ~Y) // ~(Y | ~X) --> (X & ~Y) if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) { - Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); return BinaryOperator::CreateAnd(X, NotY); } @@ -2379,8 +2397,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { NotVal->getOperand(0)->hasOneUse()) && IsFreeToInvert(NotVal->getOperand(1), NotVal->getOperand(1)->hasOneUse())) { - Value *NotX = Builder->CreateNot(NotVal->getOperand(0), "notlhs"); - Value *NotY = Builder->CreateNot(NotVal->getOperand(1), "notrhs"); + Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs"); + Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs"); if (NotVal->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(NotX, NotY); return BinaryOperator::CreateAnd(NotX, NotY); @@ -2409,10 +2427,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B - ICmpInst::Predicate Pred; - if (match(Op0, m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))) && - match(Op1, m_AllOnes())) { + // not (cmp A, B) = !cmp A, B + CmpInst::Predicate Pred; + if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) { cast(Op0)->setPredicate(CmpInst::getInversePredicate(Pred)); return replaceInstUsesWith(I, Op0); } @@ -2424,8 +2441,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CI->hasOneUse() && Op0C->hasOneUse()) { Instruction::CastOps Opcode = Op0C->getOpcode(); if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - (RHSC == ConstantExpr::getCast(Opcode, Builder->getTrue(), - Op0C->getDestTy()))) { + (RHSC == ConstantExpr::getCast(Opcode, Builder.getTrue(), + Op0C->getDestTy()))) { CI->setPredicate(CI->getInversePredicate()); return CastInst::Create(Opcode, CI, Op0C->getType()); } @@ -2435,7 +2452,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (BinaryOperator *Op0I = dyn_cast(Op0)) { // ~(c-X) == X-c-1 == X+(-c-1) - if (Op0I->getOpcode() == Instruction::Sub && RHSC->isAllOnesValue()) + if (Op0I->getOpcode() == Instruction::Sub && RHSC->isMinusOne()) if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); return BinaryOperator::CreateAdd(Op0I->getOperand(1), @@ -2445,13 +2462,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { if (Op0I->getOpcode() == Instruction::Add) { // ~(X-c) --> (-c-1)-X - if (RHSC->isAllOnesValue()) { + if (RHSC->isMinusOne()) { Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub(SubOne(NegOp0CI), Op0I->getOperand(0)); } else if (RHSC->getValue().isSignMask()) { // (X + C) ^ signmask -> (X + C + signmask) - Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue()); + Constant *C = Builder.getInt(RHSC->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); } @@ -2484,7 +2501,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { APInt FoldConst = C1->getValue().lshr(C2->getValue()); FoldConst ^= C3->getValue(); // Prepare the two operands. - Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2); + Value *Opnd0 = Builder.CreateLShr(E1->getOperand(0), C2); Opnd0->takeName(Op0I); cast(Opnd0)->setDebugLoc(I.getDebugLoc()); Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst); @@ -2529,14 +2546,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (A == Op1) // (B|A)^B == (A|B)^B std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); + return BinaryOperator::CreateAnd(A, Builder.CreateNot(Op1)); } else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) { if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); const APInt *C; if (B == Op1 && // (B&A)^A == ~B & A !match(Op1, m_APInt(C))) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1); + return BinaryOperator::CreateAnd(Builder.CreateNot(A), Op1); } } } @@ -2548,20 +2565,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { match(Op1, m_Or(m_Value(A), m_Value(B)))) { if (D == A) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(A), B), C); + Builder.CreateAnd(Builder.CreateNot(A), B), C); if (D == B) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(B), A), C); + Builder.CreateAnd(Builder.CreateNot(B), A), C); } // (A | B)^(A ^ C) -> ((~A) & B) ^ C if (match(Op0, m_Or(m_Value(A), m_Value(B))) && match(Op1, m_Xor(m_Value(D), m_Value(C)))) { if (D == A) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(A), B), C); + Builder.CreateAnd(Builder.CreateNot(A), B), C); if (D == B) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(B), A), C); + Builder.CreateAnd(Builder.CreateNot(B), A), C); } // (A & B) ^ (A ^ B) -> (A | B) if (match(Op0, m_And(m_Value(A), m_Value(B))) && @@ -2578,25 +2595,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Value *A, *B; if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Not(m_Specific(A)))) - return BinaryOperator::CreateNot(Builder->CreateAnd(A, B)); - - // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) - if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) - if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { - if (LHS->getOperand(0) == RHS->getOperand(1) && - LHS->getOperand(1) == RHS->getOperand(0)) - LHS->swapOperands(); - if (LHS->getOperand(0) == RHS->getOperand(0) && - LHS->getOperand(1) == RHS->getOperand(1)) { - Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); - unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); - bool isSigned = LHS->isSigned() || RHS->isSigned(); - return replaceInstUsesWith(I, - getNewICmpValue(isSigned, Code, Op0, Op1, - Builder)); - } - } + return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); + + if (auto *LHS = dyn_cast(I.getOperand(0))) + if (auto *RHS = dyn_cast(I.getOperand(1))) + if (Value *V = foldXorOfICmps(LHS, RHS)) + return replaceInstUsesWith(I, V); if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) return CastedXor; diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCalls.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCalls.cpp index face7abcc95f2..391c430dab75d 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -16,9 +16,9 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -94,75 +94,80 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) { return ConstantVector::get(BoolVec); } -Instruction * -InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) { +Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( + ElementUnorderedAtomicMemCpyInst *AMI) { // Try to unfold this intrinsic into sequence of explicit atomic loads and // stores. // First check that number of elements is compile time constant. - auto *NumElementsCI = dyn_cast(AMI->getNumElements()); - if (!NumElementsCI) + auto *LengthCI = dyn_cast(AMI->getLength()); + if (!LengthCI) return nullptr; // Check that there are not too many elements. - uint64_t NumElements = NumElementsCI->getZExtValue(); + uint64_t LengthInBytes = LengthCI->getZExtValue(); + uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes(); + uint64_t NumElements = LengthInBytes / ElementSizeInBytes; if (NumElements >= UnfoldElementAtomicMemcpyMaxElements) return nullptr; - // Don't unfold into illegal integers - uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8; - if (!getDataLayout().isLegalInteger(ElementSizeInBytes)) - return nullptr; + // Only expand if there are elements to copy. + if (NumElements > 0) { + // Don't unfold into illegal integers + uint64_t ElementSizeInBits = ElementSizeInBytes * 8; + if (!getDataLayout().isLegalInteger(ElementSizeInBits)) + return nullptr; - // Cast source and destination to the correct type. Intrinsic input arguments - // are usually represented as i8*. - // Often operands will be explicitly casted to i8* and we can just strip - // those casts instead of inserting new ones. However it's easier to rely on - // other InstCombine rules which will cover trivial cases anyway. - Value *Src = AMI->getRawSource(); - Value *Dst = AMI->getRawDest(); - Type *ElementPointerType = Type::getIntNPtrTy( - AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace()); - - Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, - "memcpy_unfold.src_casted"); - Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, - "memcpy_unfold.dst_casted"); - - for (uint64_t i = 0; i < NumElements; ++i) { - // Get current element addresses - ConstantInt *ElementIdxCI = - ConstantInt::get(AMI->getContext(), APInt(64, i)); - Value *SrcElementAddr = - Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); - Value *DstElementAddr = - Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); - - // Load from the source. Transfer alignment information and mark load as - // unordered atomic. - LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); - Load->setOrdering(AtomicOrdering::Unordered); - // We know alignment of the first element. It is also guaranteed by the - // verifier that element size is less or equal than first element alignment - // and both of this values are powers of two. - // This means that all subsequent accesses are at least element size - // aligned. - // TODO: We can infer better alignment but there is no evidence that this - // will matter. - Load->setAlignment(i == 0 ? AMI->getSrcAlignment() - : AMI->getElementSizeInBytes()); - Load->setDebugLoc(AMI->getDebugLoc()); - - // Store loaded value via unordered atomic store. - StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); - Store->setOrdering(AtomicOrdering::Unordered); - Store->setAlignment(i == 0 ? AMI->getDstAlignment() - : AMI->getElementSizeInBytes()); - Store->setDebugLoc(AMI->getDebugLoc()); + // Cast source and destination to the correct type. Intrinsic input + // arguments are usually represented as i8*. Often operands will be + // explicitly casted to i8* and we can just strip those casts instead of + // inserting new ones. However it's easier to rely on other InstCombine + // rules which will cover trivial cases anyway. + Value *Src = AMI->getRawSource(); + Value *Dst = AMI->getRawDest(); + Type *ElementPointerType = + Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits, + Src->getType()->getPointerAddressSpace()); + + Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType, + "memcpy_unfold.src_casted"); + Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType, + "memcpy_unfold.dst_casted"); + + for (uint64_t i = 0; i < NumElements; ++i) { + // Get current element addresses + ConstantInt *ElementIdxCI = + ConstantInt::get(AMI->getContext(), APInt(64, i)); + Value *SrcElementAddr = + Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); + Value *DstElementAddr = + Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); + + // Load from the source. Transfer alignment information and mark load as + // unordered atomic. + LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val"); + Load->setOrdering(AtomicOrdering::Unordered); + // We know alignment of the first element. It is also guaranteed by the + // verifier that element size is less or equal than first element + // alignment and both of this values are powers of two. This means that + // all subsequent accesses are at least element size aligned. + // TODO: We can infer better alignment but there is no evidence that this + // will matter. + Load->setAlignment(i == 0 ? AMI->getParamAlignment(1) + : ElementSizeInBytes); + Load->setDebugLoc(AMI->getDebugLoc()); + + // Store loaded value via unordered atomic store. + StoreInst *Store = Builder.CreateStore(Load, DstElementAddr); + Store->setOrdering(AtomicOrdering::Unordered); + Store->setAlignment(i == 0 ? AMI->getParamAlignment(0) + : ElementSizeInBytes); + Store->setDebugLoc(AMI->getDebugLoc()); + } } // Set the number of elements of the copy to 0, it will be deleted on the // next iteration. - AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType())); + AMI->setLength(Constant::getNullValue(LengthCI->getType())); return AMI; } @@ -208,7 +213,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { if (M->getNumOperands() == 3 && M->getOperand(0) && mdconst::hasa(M->getOperand(0)) && - mdconst::extract(M->getOperand(0))->isNullValue() && + mdconst::extract(M->getOperand(0))->isZero() && M->getOperand(1) && mdconst::hasa(M->getOperand(1)) && mdconst::extract(M->getOperand(1))->getValue() == @@ -222,9 +227,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); - LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); + Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); + Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); + LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile()); L->setAlignment(SrcAlign); if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); @@ -233,7 +238,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (LoopMemParallelMD) L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); - StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); + StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile()); S->setAlignment(DstAlign); if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD); @@ -269,15 +274,15 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Value *Dest = MI->getDest(); unsigned DstAddrSp = cast(Dest->getType())->getAddressSpace(); Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); - Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); + Dest = Builder.CreateBitCast(Dest, NewDstPtrTy); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, - MI->isVolatile()); + StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest, + MI->isVolatile()); S->setAlignment(Alignment); // Set the size of the copy to 0, it will be deleted on the next iteration. @@ -393,7 +398,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, unsigned BitWidth = SVT->getPrimitiveSizeInBits(); // If shift-by-zero then just return the original value. - if (Count == 0) + if (Count.isNullValue()) return Vec; // Handle cases when Shift >= BitWidth. @@ -595,8 +600,7 @@ static Value *simplifyX86muldq(const IntrinsicInst &II, return Builder.CreateMul(LHS, RHS); } -static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC, - InstCombiner::BuilderTy &Builder, bool IsSigned) { +static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) { Value *Arg0 = II.getArgOperand(0); Value *Arg1 = II.getArgOperand(1); Type *ResTy = II.getType(); @@ -671,8 +675,7 @@ static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC, return ConstantVector::get(Vals); } -static Value *simplifyX86movmsk(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder) { +static Value *simplifyX86movmsk(const IntrinsicInst &II) { Value *Arg = II.getArgOperand(0); Type *ResTy = II.getType(); Type *ArgTy = Arg->getType(); @@ -855,7 +858,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, } // Constant Fold - extraction from zero is always {zero, undef}. - if (CI0 && CI0->equalsInt(0)) + if (CI0 && CI0->isZero()) return LowConstantHighUndef(0); return nullptr; @@ -1373,14 +1376,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { II.getIntrinsicID() == Intrinsic::ctlz) && "Expected cttz or ctlz intrinsic"); Value *Op0 = II.getArgOperand(0); - // FIXME: Try to simplify vectors of integers. - auto *IT = dyn_cast(Op0->getType()); - if (!IT) - return nullptr; - unsigned BitWidth = IT->getBitWidth(); - KnownBits Known(BitWidth); - IC.computeKnownBits(Op0, Known, 0, &II); + KnownBits Known = IC.computeKnownBits(Op0, 0, &II); // Create a mask for bits above (ctlz) or below (cttz) the first known one. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; @@ -1394,20 +1391,63 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // FIXME: This should be in InstSimplify because we're replacing an // instruction with a constant. if (PossibleZeros == DefiniteZeros) { - auto *C = ConstantInt::get(IT, DefiniteZeros); + auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros); return IC.replaceInstUsesWith(II, C); } // If the input to cttz/ctlz is known to be non-zero, // then change the 'ZeroIsUndef' parameter to 'true' // because we know the zero behavior can't affect the result. - if (Known.One != 0 || isKnownNonZero(Op0, IC.getDataLayout())) { + if (!Known.One.isNullValue() || + isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, + &IC.getDominatorTree())) { if (!match(II.getArgOperand(1), m_One())) { - II.setOperand(1, IC.Builder->getTrue()); + II.setOperand(1, IC.Builder.getTrue()); return &II; } } + // Add range metadata since known bits can't completely reflect what we know. + // TODO: Handle splat vectors. + auto *IT = dyn_cast(Op0->getType()); + if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)), + ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return &II; + } + + return nullptr; +} + +static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) { + assert(II.getIntrinsicID() == Intrinsic::ctpop && + "Expected ctpop intrinsic"); + Value *Op0 = II.getArgOperand(0); + // FIXME: Try to simplify vectors of integers. + auto *IT = dyn_cast(Op0->getType()); + if (!IT) + return nullptr; + + unsigned BitWidth = IT->getBitWidth(); + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); + + unsigned MinCount = Known.countMinPopulation(); + unsigned MaxCount = Known.countMaxPopulation(); + + // Add range metadata since known bits can't completely reflect what we know. + if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)), + ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return &II; + } + return nullptr; } @@ -1435,7 +1475,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { // the LLVM intrinsic definition for the pointer argument. unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); - Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec"); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); // Second, convert the x86 XMM integer vector mask to a vector of bools based // on each element's most significant bit (the sign bit). @@ -1443,7 +1483,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { // The pass-through vector for an x86 masked load is a zero vector. CallInst *NewMaskedLoad = - IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); + IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); return IC.replaceInstUsesWith(II, NewMaskedLoad); } @@ -1478,13 +1518,13 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { // the LLVM intrinsic definition for the pointer argument. unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); - Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec"); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); // Second, convert the x86 XMM integer vector mask to a vector of bools based // on each element's most significant bit (the sign bit). Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); - IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask); + IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask); // 'Replace uses' doesn't work for stores. Erase the original masked store. IC.eraseInstFromFunction(II); @@ -1722,7 +1762,7 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but // need special handling. // - // We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just + // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just // as well. case Intrinsic::nvvm_rcp_rn_d: return {SPC_Reciprocal, FTZ_Any}; @@ -1818,8 +1858,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto Args = CI.arg_operands(); - if (Value *V = - SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ)) + if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(), + Args.end(), SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -1892,12 +1932,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } - if (auto *AMI = dyn_cast(II)) { - if (Constant *C = dyn_cast(AMI->getNumElements())) + if (auto *AMI = dyn_cast(II)) { + if (Constant *C = dyn_cast(AMI->getLength())) if (C->isNullValue()) return eraseInstFromFunction(*AMI); - if (Instruction *I = SimplifyElementAtomicMemCpy(AMI)) + if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI)) return I; } @@ -1923,16 +1963,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; + // TODO should this be in InstSimplify? // bswap(bswap(x)) -> x if (match(IIOperand, m_BSwap(m_Value(X)))) - return replaceInstUsesWith(CI, X); + return replaceInstUsesWith(CI, X); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getPrimitiveSizeInBits() - IIOperand->getType()->getPrimitiveSizeInBits(); Value *CV = ConstantInt::get(X->getType(), C); - Value *V = Builder->CreateLShr(X, CV); + Value *V = Builder.CreateLShr(X, CV); return new TruncInst(V, IIOperand->getType()); } break; @@ -1942,14 +1983,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; + // TODO should this be in InstSimplify? // bitreverse(bitreverse(x)) -> x - if (match(IIOperand, m_Intrinsic(m_Value(X)))) + if (match(IIOperand, m_BitReverse(m_Value(X)))) return replaceInstUsesWith(CI, X); break; } case Intrinsic::masked_load: - if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder)) + if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder)) return replaceInstUsesWith(CI, SimplifiedMaskedOp); break; case Intrinsic::masked_store: @@ -1968,7 +2010,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Power->isOne()) return replaceInstUsesWith(CI, II->getArgOperand(0)); // powi(x, -1) -> 1/x - if (Power->isAllOnesValue()) + if (Power->isMinusOne()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), II->getArgOperand(0)); } @@ -1980,6 +2022,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return I; break; + case Intrinsic::ctpop: + if (auto *I = foldCtpop(*II, *this)) + return I; + break; + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: @@ -2026,11 +2073,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::fmuladd: { // Canonicalize fast fmuladd to the separate fmul + fadd. if (II->hasUnsafeAlgebra()) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(II->getFastMathFlags()); - Value *Mul = Builder->CreateFMul(II->getArgOperand(0), - II->getArgOperand(1)); - Value *Add = Builder->CreateFAdd(Mul, II->getArgOperand(2)); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(II->getFastMathFlags()); + Value *Mul = Builder.CreateFMul(II->getArgOperand(0), + II->getArgOperand(1)); + Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2)); Add->takeName(II); return replaceInstUsesWith(*II, Add); } @@ -2081,8 +2128,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Constant *LHS, *RHS; if (match(II->getArgOperand(0), m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { - CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS}); - CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS}); + CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS}); + CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS}); return SelectInst::Create(Cond, Call0, Call1); } @@ -2100,7 +2147,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // fabs (fpext x) -> fpext (fabs x) Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), { ExtSrc->getType() }); - CallInst *NewFabs = Builder->CreateCall(F, ExtSrc); + CallInst *NewFabs = Builder.CreateCall(F, ExtSrc); NewFabs->copyFastMathFlags(II); NewFabs->takeName(II); return new FPExtInst(NewFabs, II->getType()); @@ -2127,7 +2174,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC lvx -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -2135,8 +2182,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: { // Turn PPC VSX loads into normal loads. - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), - PointerType::getUnqual(II->getType())); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), + PointerType::getUnqual(II->getType())); return new LoadInst(Ptr, Twine(""), false, 1); } case Intrinsic::ppc_altivec_stvx: @@ -2146,7 +2193,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { &DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); } break; @@ -2154,18 +2201,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_vsx_stxvd2x: { // Turn PPC VSX stores into normal stores. Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr, false, 1); } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { - Type *VTy = VectorType::get(Builder->getFloatTy(), + Type *VTy = VectorType::get(Builder.getFloatTy(), II->getType()->getVectorNumElements()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(VTy)); - Value *Load = Builder->CreateLoad(Ptr); + Value *Load = Builder.CreateLoad(Ptr); return new FPExtInst(Load, II->getType()); } break; @@ -2173,7 +2220,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC QPX qvlfd -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC, &DT) >= 32) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -2182,11 +2229,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC QPX qvstfs -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, &DT) >= 16) { - Type *VTy = VectorType::get(Builder->getFloatTy(), + Type *VTy = VectorType::get(Builder.getFloatTy(), II->getArgOperand(0)->getType()->getVectorNumElements()); - Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy); + Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy); Type *OpPtrTy = PointerType::getUnqual(VTy); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(TOp, Ptr); } break; @@ -2196,7 +2243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { &DT) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); } break; @@ -2225,15 +2272,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { SmallVector SubVecMask; for (unsigned i = 0; i != RetWidth; ++i) SubVecMask.push_back((int)i); - VectorHalfAsShorts = Builder->CreateShuffleVector( + VectorHalfAsShorts = Builder.CreateShuffleVector( Arg, UndefValue::get(ArgType), SubVecMask); } auto VectorHalfType = VectorType::get(Type::getHalfTy(II->getContext()), RetWidth); auto VectorHalfs = - Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType); - auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType); + Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType); + auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType); return replaceInstUsesWith(*II, VectorFloats); } @@ -2287,7 +2334,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_movmsk_pd_256: case Intrinsic::x86_avx_movmsk_ps_256: case Intrinsic::x86_avx2_pmovmskb: { - if (Value *V = simplifyX86movmsk(*II, *Builder)) + if (Value *V = simplifyX86movmsk(*II)) return replaceInstUsesWith(*II, V); break; } @@ -2390,25 +2437,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_avx512_mask_add_ps_512: case Intrinsic::x86_avx512_mask_add_pd_512: - V = Builder->CreateFAdd(Arg0, Arg1); + V = Builder.CreateFAdd(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_sub_ps_512: case Intrinsic::x86_avx512_mask_sub_pd_512: - V = Builder->CreateFSub(Arg0, Arg1); + V = Builder.CreateFSub(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_mul_ps_512: case Intrinsic::x86_avx512_mask_mul_pd_512: - V = Builder->CreateFMul(Arg0, Arg1); + V = Builder.CreateFMul(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_div_ps_512: case Intrinsic::x86_avx512_mask_div_pd_512: - V = Builder->CreateFDiv(Arg0, Arg1); + V = Builder.CreateFDiv(Arg0, Arg1); break; } // Create a select for the masking. V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2), - *Builder); + Builder); return replaceInstUsesWith(*II, V); } } @@ -2429,27 +2476,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Extract the element as scalars. Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); - Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0); - Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0); + Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0); + Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0); Value *V; switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_avx512_mask_add_ss_round: case Intrinsic::x86_avx512_mask_add_sd_round: - V = Builder->CreateFAdd(LHS, RHS); + V = Builder.CreateFAdd(LHS, RHS); break; case Intrinsic::x86_avx512_mask_sub_ss_round: case Intrinsic::x86_avx512_mask_sub_sd_round: - V = Builder->CreateFSub(LHS, RHS); + V = Builder.CreateFSub(LHS, RHS); break; case Intrinsic::x86_avx512_mask_mul_ss_round: case Intrinsic::x86_avx512_mask_mul_sd_round: - V = Builder->CreateFMul(LHS, RHS); + V = Builder.CreateFMul(LHS, RHS); break; case Intrinsic::x86_avx512_mask_div_ss_round: case Intrinsic::x86_avx512_mask_div_sd_round: - V = Builder->CreateFDiv(LHS, RHS); + V = Builder.CreateFDiv(LHS, RHS); break; } @@ -2459,18 +2506,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // We don't need a select if we know the mask bit is a 1. if (!C || !C->getValue()[0]) { // Cast the mask to an i1 vector and then extract the lowest element. - auto *MaskTy = VectorType::get(Builder->getInt1Ty(), + auto *MaskTy = VectorType::get(Builder.getInt1Ty(), cast(Mask->getType())->getBitWidth()); - Mask = Builder->CreateBitCast(Mask, MaskTy); - Mask = Builder->CreateExtractElement(Mask, (uint64_t)0); + Mask = Builder.CreateBitCast(Mask, MaskTy); + Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); // Extract the lowest element from the passthru operand. - Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2), + Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2), (uint64_t)0); - V = Builder->CreateSelect(Mask, V, Passthru); + V = Builder.CreateSelect(Mask, V, Passthru); } // Insert the result back into the original argument 0. - V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0); + V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0); return replaceInstUsesWith(*II, V); } @@ -2551,7 +2598,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_pslli_d_512: case Intrinsic::x86_avx512_pslli_q_512: case Intrinsic::x86_avx512_pslli_w_512: - if (Value *V = simplifyX86immShift(*II, *Builder)) + if (Value *V = simplifyX86immShift(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2582,7 +2629,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_psll_d_512: case Intrinsic::x86_avx512_psll_q_512: case Intrinsic::x86_avx512_psll_w_512: { - if (Value *V = simplifyX86immShift(*II, *Builder)) + if (Value *V = simplifyX86immShift(*II, Builder)) return replaceInstUsesWith(*II, V); // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector @@ -2626,7 +2673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_psrlv_w_128: case Intrinsic::x86_avx512_psrlv_w_256: case Intrinsic::x86_avx512_psrlv_w_512: - if (Value *V = simplifyX86varShift(*II, *Builder)) + if (Value *V = simplifyX86varShift(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2636,7 +2683,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_pmulu_dq: case Intrinsic::x86_avx512_pmul_dq_512: case Intrinsic::x86_avx512_pmulu_dq_512: { - if (Value *V = simplifyX86muldq(*II, *Builder)) + if (Value *V = simplifyX86muldq(*II, Builder)) return replaceInstUsesWith(*II, V); unsigned VWidth = II->getType()->getVectorNumElements(); @@ -2656,7 +2703,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_packsswb: case Intrinsic::x86_avx512_packssdw_512: case Intrinsic::x86_avx512_packsswb_512: - if (Value *V = simplifyX86pack(*II, *this, *Builder, true)) + if (Value *V = simplifyX86pack(*II, true)) return replaceInstUsesWith(*II, V); break; @@ -2666,7 +2713,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_packuswb: case Intrinsic::x86_avx512_packusdw_512: case Intrinsic::x86_avx512_packuswb_512: - if (Value *V = simplifyX86pack(*II, *this, *Builder, false)) + if (Value *V = simplifyX86pack(*II, false)) return replaceInstUsesWith(*II, V); break; @@ -2709,7 +2756,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::x86_sse41_insertps: - if (Value *V = simplifyX86insertps(*II, *Builder)) + if (Value *V = simplifyX86insertps(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2732,7 +2779,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { : nullptr; // Attempt to simplify to a constant, shuffle vector or EXTRQI call. - if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) return replaceInstUsesWith(*II, V); // EXTRQ only uses the lowest 64-bits of the first 128-bit vector @@ -2764,7 +2811,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { ConstantInt *CIIndex = dyn_cast(II->getArgOperand(2)); // Attempt to simplify to a constant or shuffle vector. - if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) return replaceInstUsesWith(*II, V); // EXTRQI only uses the lowest 64-bits of the first 128-bit vector @@ -2796,7 +2843,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { const APInt &V11 = CI11->getValue(); APInt Len = V11.zextOrTrunc(6); APInt Idx = V11.lshr(8).zextOrTrunc(6); - if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) return replaceInstUsesWith(*II, V); } @@ -2829,7 +2876,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (CILength && CIIndex) { APInt Len = CILength->getValue().zextOrTrunc(6); APInt Idx = CIIndex->getValue().zextOrTrunc(6); - if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) return replaceInstUsesWith(*II, V); } @@ -2883,7 +2930,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: case Intrinsic::x86_avx512_pshuf_b_512: - if (Value *V = simplifyX86pshufb(*II, *Builder)) + if (Value *V = simplifyX86pshufb(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2893,13 +2940,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_vpermilvar_pd: case Intrinsic::x86_avx_vpermilvar_pd_256: case Intrinsic::x86_avx512_vpermilvar_pd_512: - if (Value *V = simplifyX86vpermilvar(*II, *Builder)) + if (Value *V = simplifyX86vpermilvar(*II, Builder)) return replaceInstUsesWith(*II, V); break; case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: - if (Value *V = simplifyX86vpermv(*II, *Builder)) + if (Value *V = simplifyX86vpermv(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2917,10 +2964,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_mask_permvar_sf_512: case Intrinsic::x86_avx512_mask_permvar_si_256: case Intrinsic::x86_avx512_mask_permvar_si_512: - if (Value *V = simplifyX86vpermv(*II, *Builder)) { + if (Value *V = simplifyX86vpermv(*II, Builder)) { // We simplified the permuting, now create a select for the masking. V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2), - *Builder); + Builder); return replaceInstUsesWith(*II, V); } break; @@ -2929,7 +2976,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_vperm2f128_ps_256: case Intrinsic::x86_avx_vperm2f128_si_256: case Intrinsic::x86_avx2_vperm2i128: - if (Value *V = simplifyX86vperm2(*II, *Builder)) + if (Value *V = simplifyX86vperm2(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2962,7 +3009,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_xop_vpcomd: case Intrinsic::x86_xop_vpcomq: case Intrinsic::x86_xop_vpcomw: - if (Value *V = simplifyX86vpcom(*II, *Builder, true)) + if (Value *V = simplifyX86vpcom(*II, Builder, true)) return replaceInstUsesWith(*II, V); break; @@ -2970,7 +3017,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_xop_vpcomud: case Intrinsic::x86_xop_vpcomuq: case Intrinsic::x86_xop_vpcomuw: - if (Value *V = simplifyX86vpcom(*II, *Builder, false)) + if (Value *V = simplifyX86vpcom(*II, Builder, false)) return replaceInstUsesWith(*II, V); break; @@ -2997,10 +3044,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), - Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), - Mask->getType()); + Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0), + Mask->getType()); + Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1), + Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -3020,13 +3067,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, - Builder->getInt32(Idx&15)); + Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, + Builder.getInt32(Idx&15)); } // Insert this value into the result vector. - Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - Builder->getInt32(i)); + Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx], + Builder.getInt32(i)); } return CastInst::Create(Instruction::BitCast, Result, CI.getType()); } @@ -3191,7 +3238,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Mask == (S_NAN | Q_NAN)) { // Equivalent of isnan. Replace with standard fcmp. - Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0); + Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0); FCmp->takeName(II); return replaceInstUsesWith(*II, FCmp); } @@ -3203,7 +3250,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Clamp mask to used bits if ((Mask & FullMask) != Mask) { - CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(), { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) } ); @@ -3296,13 +3343,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // TODO: Also emit sub if only width is constant. if (!CWidth && COffset && Offset == 0) { Constant *KSize = ConstantInt::get(COffset->getType(), IntSize); - Value *ShiftVal = Builder->CreateSub(KSize, II->getArgOperand(2)); - ShiftVal = Builder->CreateZExt(ShiftVal, II->getType()); + Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2)); + ShiftVal = Builder.CreateZExt(ShiftVal, II->getType()); - Value *Shl = Builder->CreateShl(Src, ShiftVal); - Value *RightShift = Signed ? - Builder->CreateAShr(Shl, ShiftVal) : - Builder->CreateLShr(Shl, ShiftVal); + Value *Shl = Builder.CreateShl(Src, ShiftVal); + Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal) + : Builder.CreateLShr(Shl, ShiftVal); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); } @@ -3313,17 +3359,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // TODO: This allows folding to undef when the hardware has specific // behavior? if (Offset + Width < IntSize) { - Value *Shl = Builder->CreateShl(Src, IntSize - Offset - Width); - Value *RightShift = Signed ? - Builder->CreateAShr(Shl, IntSize - Width) : - Builder->CreateLShr(Shl, IntSize - Width); + Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width); + Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width) + : Builder.CreateLShr(Shl, IntSize - Width); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); } - Value *RightShift = Signed ? - Builder->CreateAShr(Src, Offset) : - Builder->CreateLShr(Src, Offset); + Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset) + : Builder.CreateLShr(Src, Offset); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); @@ -3392,7 +3436,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } if (match(Src2, m_NaN()) || isa(Src2)) { - CallInst *NewCall = Builder->CreateMinNum(Src0, Src1); + CallInst *NewCall = Builder.CreateMinNum(Src0, Src1); NewCall->copyFastMathFlags(II); NewCall->takeName(II); return replaceInstUsesWith(*II, NewCall); @@ -3404,7 +3448,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), C2->getValueAPF()); return replaceInstUsesWith(*II, - ConstantFP::get(Builder->getContext(), Result)); + ConstantFP::get(Builder.getContext(), Result)); } } } @@ -3447,7 +3491,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; MDNode *MD = MDNode::get(II->getContext(), MDArgs); Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; - CallInst *NewCall = Builder->CreateCall(NewF, Args); + CallInst *NewCall = Builder.CreateCall(NewF, Args); NewCall->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent); NewCall->takeName(II); @@ -3509,7 +3553,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { SrcLHS->getType()); Value *Args[] = { SrcLHS, SrcRHS, ConstantInt::get(CC->getType(), SrcPred) }; - CallInst *NewCall = Builder->CreateCall(NewF, Args); + CallInst *NewCall = Builder.CreateCall(NewF, Args); NewCall->takeName(II); return replaceInstUsesWith(*II, NewCall); } @@ -3586,16 +3630,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // the InstCombineIRInserter object. Value *AssumeIntrinsic = II->getCalledValue(), *A, *B; if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { - Builder->CreateCall(AssumeIntrinsic, A, II->getName()); - Builder->CreateCall(AssumeIntrinsic, B, II->getName()); + Builder.CreateCall(AssumeIntrinsic, A, II->getName()); + Builder.CreateCall(AssumeIntrinsic, B, II->getName()); return eraseInstFromFunction(*II); } // assume(!(a || b)) -> assume(!a); assume(!b); if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) { - Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A), - II->getName()); - Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B), - II->getName()); + Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName()); + Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName()); return eraseInstFromFunction(*II); } @@ -3679,7 +3721,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return eraseInstFromFunction(*NextInst); // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). - II->setArgOperand(0, Builder->CreateAnd(CurrCond, NextCond)); + II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond)); return eraseInstFromFunction(*NextInst); } break; @@ -3838,24 +3880,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // Mark any parameters that are known to be non-null with the nonnull // attribute. This is helpful for inlining calls to functions with null // checks on their arguments. - SmallVector Indices; + SmallVector ArgNos; unsigned ArgNo = 0; for (Value *V : CS.args()) { if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && isKnownNonNullAt(V, CS.getInstruction(), &DT)) - Indices.push_back(ArgNo + AttributeList::FirstArgIndex); + ArgNos.push_back(ArgNo); ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); - if (!Indices.empty()) { + if (!ArgNos.empty()) { AttributeList AS = CS.getAttributes(); LLVMContext &Ctx = CS.getInstruction()->getContext(); - AS = AS.addAttribute(Ctx, Indices, - Attribute::get(Ctx, Attribute::NonNull)); + AS = AS.addParamAttribute(Ctx, ArgNos, + Attribute::get(Ctx, Attribute::NonNull)); CS.setAttributes(AS); Changed = true; } @@ -4116,7 +4158,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Value *NewArg = *AI; if ((*AI)->getType() != ParamTy) - NewArg = Builder->CreateBitOrPointerCast(*AI, ParamTy); + NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy); Args.push_back(NewArg); // Add any parameter attributes. @@ -4142,7 +4184,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, PTy, false); - NewArg = Builder->CreateCast(opcode, *AI, PTy); + NewArg = Builder.CreateCast(opcode, *AI, PTy); } Args.push_back(NewArg); @@ -4168,10 +4210,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallSite NewCS; if (InvokeInst *II = dyn_cast(Caller)) { - NewCS = Builder->CreateInvoke(Callee, II->getNormalDest(), - II->getUnwindDest(), Args, OpBundles); + NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args, OpBundles); } else { - NewCS = Builder->CreateCall(Callee, Args, OpBundles); + NewCS = Builder.CreateCall(Callee, Args, OpBundles); cast(NewCS.getInstruction()) ->setTailCallKind(cast(Caller)->getTailCallKind()); } @@ -4281,7 +4323,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the chain argument and attributes. Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) - NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); + NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); NewArgAttrs.push_back(NestAttr); } diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCasts.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCasts.cpp index 4939859470f65..dfdfd3e9da840 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -84,7 +84,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { PointerType *PTy = cast(CI.getType()); - BuilderTy AllocaBuilder(*Builder); + BuilderTy AllocaBuilder(Builder); AllocaBuilder.SetInsertPoint(&AI); // Get the type really allocated and the type casted to. @@ -406,8 +406,7 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 /// ---> /// extractelement <4 x i32> %X, 1 -static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, - const DataLayout &DL) { +static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) { Value *TruncOp = Trunc.getOperand(0); Type *DestType = Trunc.getType(); if (!TruncOp->hasOneUse() || !isa(DestType)) @@ -434,14 +433,14 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, unsigned NumVecElts = VecWidth / DestWidth; if (VecType->getElementType() != DestType) { VecType = VectorType::get(DestType, NumVecElts); - VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc"); + VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc"); } unsigned Elt = ShiftAmount / DestWidth; - if (DL.isBigEndian()) + if (IC.getDataLayout().isBigEndian()) Elt = NumVecElts - 1 - Elt; - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt)); } /// Try to narrow the width of bitwise logic instructions with constants. @@ -460,7 +459,7 @@ Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) { // trunc (logic X, C) --> logic (trunc X, C') Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy); - Value *NarrowOp0 = Builder->CreateTrunc(LogicOp->getOperand(0), DestTy); + Value *NarrowOp0 = Builder.CreateTrunc(LogicOp->getOperand(0), DestTy); return BinaryOperator::Create(LogicOp->getOpcode(), NarrowOp0, NarrowC); } @@ -554,7 +553,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. if (DestTy->getScalarSizeInBits() == 1) { Constant *One = ConstantInt::get(SrcTy, 1); - Src = Builder->CreateAnd(Src, One); + Src = Builder.CreateAnd(Src, One); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } @@ -580,11 +579,12 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Since we're doing an lshr and a zero extend, and know that the shift // amount is smaller than ASize, it is always safe to do the shift in A's // type, then zero extend or truncate to the result. - Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Value *Shift = Builder.CreateLShr(A, Cst->getZExtValue()); Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, DestTy, false); } + // FIXME: We should canonicalize to zext/trunc and remove this transform. // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type // conversion. // It works because bits coming from sign extension have the same value as @@ -595,28 +595,34 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *SExt = cast(Src)->getOperand(0); const unsigned SExtSize = SExt->getType()->getPrimitiveSizeInBits(); const unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + const unsigned CISize = CI.getType()->getPrimitiveSizeInBits(); + const unsigned MaxAmt = SExtSize - std::max(CISize, ASize); unsigned ShiftAmt = Cst->getZExtValue(); + // This optimization can be only performed when zero bits generated by // the original lshr aren't pulled into the value after truncation, so we // can only shift by values no larger than the number of extension bits. // FIXME: Instead of bailing when the shift is too large, use and to clear // the extra bits. - if (SExt->hasOneUse() && ShiftAmt <= SExtSize - ASize) { - // If shifting by the size of the original value in bits or more, it is - // being filled with the sign bit, so shift by ASize-1 to avoid ub. - Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1)); - Shift->takeName(Src); - return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + if (ShiftAmt <= MaxAmt) { + if (CISize == ASize) + return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(), + std::min(ShiftAmt, ASize - 1))); + if (SExt->hasOneUse()) { + Value *Shift = Builder.CreateAShr(A, std::min(ShiftAmt, ASize - 1)); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + } } } if (Instruction *I = shrinkBitwiseLogic(CI)) return I; - if (Instruction *I = shrinkSplatShuffle(CI, *Builder)) + if (Instruction *I = shrinkSplatShuffle(CI, Builder)) return I; - if (Instruction *I = shrinkInsertElt(CI, *Builder)) + if (Instruction *I = shrinkInsertElt(CI, Builder)) return I; if (Src->hasOneUse() && isa(SrcTy) && @@ -629,7 +635,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // FoldShiftByConstant and is the extend in reg pattern. const unsigned DestSize = DestTy->getScalarSizeInBits(); if (Cst->getValue().ult(DestSize)) { - Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr"); + Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr"); return BinaryOperator::Create( Instruction::Shl, NewTrunc, @@ -638,7 +644,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { } } - if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) + if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; return nullptr; @@ -654,20 +660,20 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // zext (x x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) || (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { if (!DoTransform) return ICI; Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits() - 1); - In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit"); + In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); + In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName() + ".not"); + In = Builder.CreateXor(In, One, In->getName() + ".not"); } return replaceInstUsesWith(CI, In); @@ -681,19 +687,18 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: - KnownBits Known(Op1C->getType()->getBitWidth()); - computeKnownBits(ICI->getOperand(0), Known, 0, &CI); + KnownBits Known = computeKnownBits(ICI->getOperand(0), 0, &CI); APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoTransform) return ICI; bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; - if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), @@ -707,19 +712,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (ShAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), - In->getName() + ".lobit"); + In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), + In->getName() + ".lobit"); } - if ((Op1CV != 0) == isNE) { // Toggle the low bit. + if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One); + In = Builder.CreateXor(In, One); } if (CI.getType() == In->getType()) return replaceInstUsesWith(CI, In); - Value *IntCast = Builder->CreateIntCast(In, CI.getType(), false); + Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false); return replaceInstUsesWith(CI, IntCast); } } @@ -730,14 +735,11 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // may lead to additional simplifications. if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { if (IntegerType *ITy = dyn_cast(CI.getType())) { - uint32_t BitWidth = ITy->getBitWidth(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - KnownBits KnownLHS(BitWidth); - KnownBits KnownRHS(BitWidth); - computeKnownBits(LHS, KnownLHS, 0, &CI); - computeKnownBits(RHS, KnownRHS, 0, &CI); + KnownBits KnownLHS = computeKnownBits(LHS, 0, &CI); + KnownBits KnownRHS = computeKnownBits(RHS, 0, &CI); if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) { APInt KnownBits = KnownLHS.Zero | KnownLHS.One; @@ -745,19 +747,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (UnknownBit.countPopulation() == 1) { if (!DoTransform) return ICI; - Value *Result = Builder->CreateXor(LHS, RHS); + Value *Result = Builder.CreateXor(LHS, RHS); // Mask off any bits that are set and won't be shifted away. if (KnownLHS.One.uge(UnknownBit)) - Result = Builder->CreateAnd(Result, + Result = Builder.CreateAnd(Result, ConstantInt::get(ITy, UnknownBit)); // Shift the bit we're testing down to the lsb. - Result = Builder->CreateLShr( + Result = Builder.CreateLShr( Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1)); Result->takeName(ICI); return replaceInstUsesWith(CI, Result); } @@ -957,7 +959,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); + Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask"); return new ZExtInst(And, CI.getType()); } @@ -967,7 +969,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { AndValue)); } if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType()); + Value *Trunc = Builder.CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), @@ -989,8 +991,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) - Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); + Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName()); BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast); // Perform the elimination. @@ -1017,7 +1019,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) && X->getType() == CI.getType()) { Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC); + return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC); } return nullptr; @@ -1040,12 +1042,12 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { Value *Sh = ConstantInt::get(Op0->getType(), Op0->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); + In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/); if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In, In->getName()+".not"); + In = Builder.CreateNot(In, In->getName() + ".not"); return replaceInstUsesWith(CI, In); } } @@ -1056,9 +1058,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { // the icmp and sext into bitwise/integer operations. if (ICI->hasOneUse() && ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ - unsigned BitWidth = Op1C->getType()->getBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(Op0, Known, 0, &CI); + KnownBits Known = computeKnownBits(Op0, 0, &CI); APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { @@ -1078,26 +1078,26 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); // Perform a right shift to place the desired bit in the LSB. if (ShiftAmt) - In = Builder->CreateLShr(In, - ConstantInt::get(In->getType(), ShiftAmt)); + In = Builder.CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); // At this point "In" is either 1 or 0. Subtract 1 to turn // {1, 0} -> {0, -1}. - In = Builder->CreateAdd(In, - ConstantInt::getAllOnesValue(In->getType()), - "sext"); + In = Builder.CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); } else { // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); // Perform a left shift to place the desired bit in the MSB. if (ShiftAmt) - In = Builder->CreateShl(In, - ConstantInt::get(In->getType(), ShiftAmt)); + In = Builder.CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); // Distribute the bit over the whole bit width. - In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), - BitWidth - 1), "sext"); + In = Builder.CreateAShr(In, ConstantInt::get(In->getType(), + KnownZeroMask.getBitWidth() - 1), "sext"); } if (CI.getType() == In->getType()) @@ -1188,10 +1188,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // If we know that the value being extended is positive, we can use a zext // instead. - bool KnownZero, KnownOne; - ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); - if (KnownZero) { - Value *ZExt = Builder->CreateZExt(Src, DestTy); + KnownBits Known = computeKnownBits(Src, 0, &CI); + if (Known.isNonNegative()) { + Value *ZExt = Builder.CreateZExt(Src, DestTy); return replaceInstUsesWith(CI, ZExt); } @@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), + return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"), ShAmt); } @@ -1229,7 +1228,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(X, ShAmt), ShAmt); + return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt); } if (ICmpInst *ICI = dyn_cast(Src)) @@ -1258,7 +1257,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - A = Builder->CreateShl(A, ShAmtV, CI.getName()); + A = Builder.CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } @@ -1347,9 +1346,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // case of interest here is (float)((double)float + float)). if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1364,9 +1363,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // in the destination format if it can represent both sources. if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::CreateFMul(LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1382,9 +1381,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // TODO: Tighten bound via rigorous analysis of the unbalanced case. if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::CreateFDiv(LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1399,11 +1398,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (SrcWidth == OpWidth) break; if (LHSWidth < SrcWidth) - LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, RHSOrig->getType()); else if (RHSWidth <= SrcWidth) - RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, LHSOrig->getType()); if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) { - Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig); + Value *ExactResult = Builder.CreateFRem(LHSOrig, RHSOrig); if (Instruction *RI = dyn_cast(ExactResult)) RI->copyFastMathFlags(OpI); return CastInst::CreateFPCast(ExactResult, CI.getType()); @@ -1412,8 +1411,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // (fptrunc (fneg x)) -> (fneg (fptrunc x)) if (BinaryOperator::isFNeg(OpI)) { - Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), - CI.getType()); + Value *InnerTrunc = Builder.CreateFPTrunc(OpI->getOperand(1), + CI.getType()); Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc); RI->copyFastMathFlags(OpI); return RI; @@ -1432,10 +1431,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { (isa(SI->getOperand(1)) || isa(SI->getOperand(2))) && matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) { - Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1), - CI.getType()); - Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2), - CI.getType()); + Value *LHSTrunc = Builder.CreateFPTrunc(SI->getOperand(1), CI.getType()); + Value *RHSTrunc = Builder.CreateFPTrunc(SI->getOperand(2), CI.getType()); return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc); } @@ -1465,7 +1462,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // Do unary FP operation on smaller type. // (fptrunc (fabs x)) -> (fabs (fptrunc x)) - Value *InnerTrunc = Builder->CreateFPTrunc(Src, CI.getType()); + Value *InnerTrunc = Builder.CreateFPTrunc(Src, CI.getType()); Type *IntrinsicType[] = { CI.getType() }; Function *Overload = Intrinsic::getDeclaration( CI.getModule(), II->getIntrinsicID(), IntrinsicType); @@ -1482,7 +1479,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - if (Instruction *I = shrinkInsertElt(CI, *Builder)) + if (Instruction *I = shrinkInsertElt(CI, Builder)) return I; return nullptr; @@ -1577,7 +1574,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (CI.getType()->isVectorTy()) // Handle vectors of pointers. Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty); return new IntToPtrInst(P, CI.getType()); } @@ -1627,7 +1624,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { if (Ty->isVectorTy()) // Handle vectors of pointers. PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy); + Value *P = Builder.CreatePtrToInt(CI.getOperand(0), PtrTy); return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } @@ -1653,7 +1650,7 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, return nullptr; SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); - InVal = IC.Builder->CreateBitCast(InVal, SrcTy); + InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } // Now that the element types match, get the shuffle mask and RHS of the @@ -1833,8 +1830,8 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (!Elements[i]) continue; // Unset element. - Result = IC.Builder->CreateInsertElement(Result, Elements[i], - IC.Builder->getInt32(i)); + Result = IC.Builder.CreateInsertElement(Result, Elements[i], + IC.Builder.getInt32(i)); } return Result; @@ -1845,8 +1842,7 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, /// vectors better than bitcasts of scalars because vector registers are /// usually not type-specific like scalar integer or scalar floating-point. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, - InstCombiner &IC, - const DataLayout &DL) { + InstCombiner &IC) { // TODO: Create and use a pattern matcher for ExtractElementInst. auto *ExtElt = dyn_cast(BitCast.getOperand(0)); if (!ExtElt || !ExtElt->hasOneUse()) @@ -1860,8 +1856,8 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements(); auto *NewVecType = VectorType::get(DestType, NumElts); - auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(), - NewVecType, "bc"); + auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), + NewVecType, "bc"); return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); } @@ -1870,7 +1866,7 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder) { Type *DestTy = BitCast.getType(); BinaryOperator *BO; - if (!DestTy->getScalarType()->isIntegerTy() || + if (!DestTy->isIntOrIntVectorTy() || !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || !BO->isBitwiseLogicOp()) return nullptr; @@ -1896,6 +1892,18 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X); } + // Canonicalize vector bitcasts to come before vector bitwise logic with a + // constant. This eases recognition of special constants for later ops. + // Example: + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b + Constant *C; + if (match(BO->getOperand(1), m_Constant(C))) { + // bitcast (logic X, C) --> logic (bitcast X, C') + Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy); + Value *CastedC = ConstantExpr::getBitCast(C, DestTy); + return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC); + } + return nullptr; } @@ -2021,8 +2029,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { // For each old PHI node, create a corresponding new PHI node with a type A. SmallDenseMap NewPNodes; for (auto *OldPN : OldPhiNodes) { - Builder->SetInsertPoint(OldPN); - PHINode *NewPN = Builder->CreatePHI(DestTy, OldPN->getNumOperands()); + Builder.SetInsertPoint(OldPN); + PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands()); NewPNodes[OldPN] = NewPN; } @@ -2035,8 +2043,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { if (auto *C = dyn_cast(V)) { NewV = ConstantExpr::getBitCast(C, DestTy); } else if (auto *LI = dyn_cast(V)) { - Builder->SetInsertPoint(LI->getNextNode()); - NewV = Builder->CreateBitCast(LI, DestTy); + Builder.SetInsertPoint(LI->getNextNode()); + NewV = Builder.CreateBitCast(LI, DestTy); Worklist.Add(LI); } else if (auto *BCI = dyn_cast(V)) { NewV = BCI->getOperand(0); @@ -2052,9 +2060,9 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { for (User *U : PN->users()) { auto *SI = dyn_cast(U); if (SI && SI->isSimple() && SI->getOperand(0) == PN) { - Builder->SetInsertPoint(SI); + Builder.SetInsertPoint(SI); auto *NewBC = - cast(Builder->CreateBitCast(NewPNodes[PN], SrcTy)); + cast(Builder.CreateBitCast(NewPNodes[PN], SrcTy)); SI->setOperand(0, NewBC); Worklist.Add(SI); assert(hasStoreUsersOnly(*NewBC)); @@ -2109,14 +2117,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { - SmallVector Idxs(NumZeros + 1, Builder->getInt32(0)); + SmallVector Idxs(NumZeros + 1, Builder.getInt32(0)); return GetElementPtrInst::CreateInBounds(Src, Idxs); } } if (VectorType *DestVTy = dyn_cast(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { - Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) @@ -2149,7 +2157,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // scalar-scalar cast. if (!DestTy->isVectorTy()) { Value *Elem = - Builder->CreateExtractElement(Src, + Builder.CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); return CastInst::Create(Instruction::BitCast, Elem, DestTy); } @@ -2178,8 +2186,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Tmp->getOperand(0)->getType() == DestTy) || ((Tmp = dyn_cast(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); + Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); @@ -2192,13 +2200,13 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (Instruction *I = optimizeBitCastFromPhi(CI, PN)) return I; - if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL)) + if (Instruction *I = canonicalizeBitCastExtElt(CI, *this)) return I; - if (Instruction *I = foldBitCastBitwiseLogic(CI, *Builder)) + if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder)) return I; - if (Instruction *I = foldBitCastSelect(CI, *Builder)) + if (Instruction *I = foldBitCastSelect(CI, Builder)) return I; if (SrcTy->isPointerTy()) @@ -2222,7 +2230,7 @@ Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { MidTy = VectorType::get(MidTy, VT->getNumElements()); } - Value *NewBitCast = Builder->CreateBitCast(Src, MidTy); + Value *NewBitCast = Builder.CreateBitCast(Src, MidTy); return new AddrSpaceCastInst(NewBitCast, CI.getType()); } diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCompares.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCompares.cpp index e3639db544a74..a8faaecb5c342 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -112,10 +112,10 @@ static bool subWithOverflow(Constant *&Result, Constant *In1, /// Given an icmp instruction, return true if any use of this comparison is a /// branch on sign bit comparison. -static bool isBranchOnSignBitCheck(ICmpInst &I, bool isSignBit) { +static bool hasBranchUse(ICmpInst &I) { for (auto *U : I.users()) if (isa(U)) - return isSignBit; + return true; return false; } @@ -127,7 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, switch (Pred) { case ICmpInst::ICMP_SLT: // True if LHS s< 0 TrueIfSigned = true; - return RHS == 0; + return RHS.isNullValue(); case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 TrueIfSigned = true; return RHS.isAllOnesValue(); @@ -155,10 +155,10 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { if (!ICmpInst::isSigned(Pred)) return false; - if (C == 0) + if (C.isNullValue()) return ICmpInst::isRelational(Pred); - if (C == 1) { + if (C.isOneValue()) { if (Pred == ICmpInst::ICMP_SLT) { Pred = ICmpInst::ICMP_SLE; return true; @@ -392,7 +392,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) - Idx = Builder->CreateTrunc(Idx, IntPtrTy); + Idx = Builder.CreateTrunc(Idx, IntPtrTy); } // If the comparison is only true for one or two elements, emit direct @@ -400,7 +400,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) - return replaceInstUsesWith(ICI, Builder->getFalse()); + return replaceInstUsesWith(ICI, Builder.getFalse()); Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); @@ -409,9 +409,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); // True for two elements -> 'i == 47 | i == 72'. - Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); + Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); - Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); + Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx); return BinaryOperator::CreateOr(C1, C2); } @@ -420,7 +420,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (SecondFalseElement != Overdefined) { // None false -> true. if (FirstFalseElement == Undefined) - return replaceInstUsesWith(ICI, Builder->getTrue()); + return replaceInstUsesWith(ICI, Builder.getTrue()); Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); @@ -429,9 +429,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); // False for two elements -> 'i != 47 & i != 72'. - Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); + Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); - Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); + Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } @@ -443,7 +443,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, // Generate (i-FirstTrue) getType(), -FirstTrueElement); - Idx = Builder->CreateAdd(Idx, Offs); + Idx = Builder.CreateAdd(Idx, Offs); } Value *End = ConstantInt::get(Idx->getType(), @@ -457,7 +457,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). if (FirstFalseElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); - Idx = Builder->CreateAdd(Idx, Offs); + Idx = Builder.CreateAdd(Idx, Offs); } Value *End = ConstantInt::get(Idx->getType(), @@ -481,9 +481,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); if (Ty) { - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + Value *V = Builder.CreateIntCast(Idx, Ty, false); + V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } } @@ -566,7 +566,7 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; } @@ -588,10 +588,10 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, // Okay, we can do this evaluation. Start by converting the index to intptr. if (VariableIdx->getType() != IntPtrTy) - VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); + return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset"); } /// Returns true if we can rewrite Start as a GEP with pointer Base @@ -981,13 +981,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (LHSIndexTy != RHSIndexTy) { if (LHSIndexTy->getPrimitiveSizeInBits() < RHSIndexTy->getPrimitiveSizeInBits()) { - ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); + ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy); } else - LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); + LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy); } - Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), - LOffset, ROffset); + Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond), + LOffset, ROffset); return replaceInstUsesWith(I, Cmp); } @@ -1026,7 +1026,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return replaceInstUsesWith(I, // No comparison is needed here. - Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond))); + Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond))); else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); @@ -1174,7 +1174,7 @@ Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI, // (X+ -1) >s X --> X X == -128 assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); - Constant *C = Builder->getInt(CI->getValue()-1); + Constant *C = Builder.getInt(CI->getValue() - 1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); } @@ -1193,7 +1193,7 @@ Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A, }; // Don't bother doing any work for cases which InstSimplify handles. - if (AP2 == 0) + if (AP2.isNullValue()) return nullptr; bool IsAShr = isa(I.getOperand(0)); @@ -1252,7 +1252,7 @@ Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A, }; // Don't bother doing any work for cases which InstSimplify handles. - if (AP2 == 0) + if (AP2.isNullValue()) return nullptr; unsigned AP2TrailingZeros = AP2.countTrailingZeros(); @@ -1347,17 +1347,17 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, Value *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::sadd_with_overflow, NewType); - InstCombiner::BuilderTy *Builder = IC.Builder; + InstCombiner::BuilderTy &Builder = IC.Builder; // Put the new code above the original add, in case there are any uses of the // add between the add and the compare. - Builder->SetInsertPoint(OrigAdd); + Builder.SetInsertPoint(OrigAdd); - Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName() + ".trunc"); - Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName() + ".trunc"); - CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd"); - Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); - Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc"); + Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc"); + CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd"); + Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType()); // The inner add was the result of the narrow add, zero extended to the // wider type. Replace it with the result computed by the intrinsic. @@ -1399,12 +1399,12 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { } // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) - if (*C == 0 && Pred == ICmpInst::ICMP_SGT) { + if (C->isNullValue() && Pred == ICmpInst::ICMP_SGT) { SelectPatternResult SPR = matchSelectPattern(X, A, B); if (SPR.Flavor == SPF_SMIN) { - if (isKnownPositive(A, DL)) + if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, B, Cmp.getOperand(1)); - if (isKnownPositive(B, DL)) + if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, A, Cmp.getOperand(1)); } } @@ -1434,9 +1434,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { ConstantRange Intersection = DominatingCR.intersectWith(CR); ConstantRange Difference = DominatingCR.difference(CR); if (Intersection.isEmptySet()) - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (Difference.isEmptySet()) - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); // If this is a normal comparison, it demands all bits. If it is a sign // bit comparison, it only demands the sign bit. @@ -1448,12 +1448,13 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { // of a test and branch. So we avoid canonicalizing in such situations // because test and branch instruction has better branch displacement // than compare and branch instruction. - if (!isBranchOnSignBitCheck(Cmp, IsSignBit) && !Cmp.isEquality()) { - if (auto *AI = Intersection.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI)); - if (auto *AD = Difference.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD)); - } + if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp))) + return nullptr; + + if (auto *AI = Intersection.getSingleElement()) + return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI)); + if (auto *AD = Difference.getSingleElement()) + return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD)); } return nullptr; @@ -1465,7 +1466,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, const APInt *C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *X = Trunc->getOperand(0); - if (*C == 1 && C->getBitWidth() > 1) { + if (C->isOneValue() && C->getBitWidth() > 1) { // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V)))) @@ -1478,8 +1479,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, // of the high bits truncated out of x are known. unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), SrcBits = X->getType()->getScalarSizeInBits(); - KnownBits Known(SrcBits); - computeKnownBits(X, Known, 0, &Cmp); + KnownBits Known = computeKnownBits(X, 0, &Cmp); // If all the high bits are known, we can do this xform. if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) { @@ -1506,7 +1506,7 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, // If this is a comparison that tests the signbit (X < 0) or (x > -1), // fold the xor. ICmpInst::Predicate Pred = Cmp.getPredicate(); - if ((Pred == ICmpInst::ICMP_SLT && *C == 0) || + if ((Pred == ICmpInst::ICMP_SLT && C->isNullValue()) || (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue())) { // If the sign bit of the XorCst is not set, there is no change to @@ -1624,15 +1624,15 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is // preferable because it allows the C2 << Y expression to be hoisted out of a // loop if Y is invariant and X is not. - if (Shift->hasOneUse() && *C1 == 0 && Cmp.isEquality() && + if (Shift->hasOneUse() && C1->isNullValue() && Cmp.isEquality() && !Shift->isArithmeticShift() && !isa(Shift->getOperand(0))) { // Compute C2 << Y. Value *NewShift = - IsShl ? Builder->CreateLShr(And->getOperand(1), Shift->getOperand(1)) - : Builder->CreateShl(And->getOperand(1), Shift->getOperand(1)); + IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1)) + : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1)); // Compute X & (C2 << Y). - Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NewShift); + Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift); Cmp.setOperand(0, NewAnd); return &Cmp; } @@ -1670,7 +1670,7 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, unsigned WideScalarBits = WideType->getScalarSizeInBits(); Constant *ZextC1 = ConstantInt::get(WideType, C1->zext(WideScalarBits)); Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits)); - Value *NewAnd = Builder->CreateAnd(W, ZextC2, And->getName()); + Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName()); return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1); } } @@ -1682,7 +1682,8 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, // (icmp pred (and A, (or (shl 1, B), 1), 0)) // // iff pred isn't signed - if (!Cmp.isSigned() && *C1 == 0 && match(And->getOperand(1), m_One())) { + if (!Cmp.isSigned() && C1->isNullValue() && + match(And->getOperand(1), m_One())) { Constant *One = cast(And->getOperand(1)); Value *Or = And->getOperand(0); Value *A, *B, *LShr; @@ -1703,12 +1704,12 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One); } else { if (UsesRemoved >= 3) - NewOr = Builder->CreateOr(Builder->CreateShl(One, B, LShr->getName(), - /*HasNUW=*/true), - One, Or->getName()); + NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(), + /*HasNUW=*/true), + One, Or->getName()); } if (NewOr) { - Value *NewAnd = Builder->CreateAnd(A, NewOr, And->getName()); + Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName()); Cmp.setOperand(0, NewAnd); return &Cmp; } @@ -1765,13 +1766,13 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, // (X & C2) != 0 -> (trunc X) < 0 // iff C2 is a power of 2 and it masks the sign bit of a legal integer type. const APInt *C2; - if (And->hasOneUse() && *C == 0 && match(Y, m_APInt(C2))) { + if (And->hasOneUse() && C->isNullValue() && match(Y, m_APInt(C2))) { int32_t ExactLogBase2 = C2->exactLogBase2(); if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); if (And->getType()->isVectorTy()) NTy = VectorType::get(NTy, And->getType()->getVectorNumElements()); - Value *Trunc = Builder->CreateTrunc(X, NTy); + Value *Trunc = Builder.CreateTrunc(X, NTy); auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT; return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy)); @@ -1785,7 +1786,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, const APInt *C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); - if (*C == 1) { + if (C->isOneValue()) { // icmp slt signum(V) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V)))) @@ -1802,7 +1803,7 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1)); } - if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse()) + if (!Cmp.isEquality() || !C->isNullValue() || !Or->hasOneUse()) return nullptr; Value *P, *Q; @@ -1810,12 +1811,24 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 // -> and (icmp eq P, null), (icmp eq Q, null). Value *CmpP = - Builder->CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); + Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); Value *CmpQ = - Builder->CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); - auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And - : Instruction::Or; - return BinaryOperator::Create(LogicOpc, CmpP, CmpQ); + Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); + auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + return BinaryOperator::Create(BOpc, CmpP, CmpQ); + } + + // Are we using xors to bitwise check for a pair of (in)equalities? Convert to + // a shorter form that has more potential to be folded even further. + Value *X1, *X2, *X3, *X4; + if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) && + match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) { + // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4) + // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4) + Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2); + Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4); + auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + return BinaryOperator::Create(BOpc, Cmp12, Cmp34); } return nullptr; @@ -1992,7 +2005,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, Constant *Mask = ConstantInt::get( ShType, APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); - Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); Constant *LShrC = ConstantInt::get(ShType, C->lshr(*ShiftAmt)); return new ICmpInst(Pred, And, LShrC); } @@ -2004,7 +2017,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, Constant *Mask = ConstantInt::get( ShType, APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1)); - Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(ShType)); } @@ -2023,7 +2036,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements()); Constant *NewC = ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt)); - return new ICmpInst(Pred, Builder->CreateTrunc(X, TruncTy), NewC); + return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC); } return nullptr; @@ -2037,7 +2050,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0 Value *X = Shr->getOperand(0); CmpInst::Predicate Pred = Cmp.getPredicate(); - if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && *C == 0) + if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && + C->isNullValue()) return new ICmpInst(Pred, X, Cmp.getOperand(1)); const APInt *ShiftVal; @@ -2074,8 +2088,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, Constant *DivCst = ConstantInt::get( Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); - Value *Tmp = IsAShr ? Builder->CreateSDiv(X, DivCst, "", Shr->isExact()) - : Builder->CreateUDiv(X, DivCst, "", Shr->isExact()); + Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact()) + : Builder.CreateUDiv(X, DivCst, "", Shr->isExact()); Cmp.setOperand(0, Tmp); @@ -2113,7 +2127,7 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // Otherwise strength reduce the shift into an 'and'. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); Constant *Mask = ConstantInt::get(Shr->getType(), Val); - Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); return new ICmpInst(Pred, And, ShiftedCmpRHS); } @@ -2128,7 +2142,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, if (!match(UDiv->getOperand(0), m_APInt(C2))) return nullptr; - assert(C2 != 0 && "udiv 0, X should have been simplified already."); + assert(*C2 != 0 && "udiv 0, X should have been simplified already."); // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) Value *Y = UDiv->getOperand(1); @@ -2141,7 +2155,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) { - assert(C != 0 && "icmp ult X, 0 should have been simplified already."); + assert(*C != 0 && "icmp ult X, 0 should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_UGT, Y, ConstantInt::get(Y->getType(), C2->udiv(*C))); } @@ -2179,7 +2193,8 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, // INT_MIN will also fail if the divisor is 1. Although folds of all these // division-by-constant cases should be present, we can not assert that they // have happened before we reach this icmp instruction. - if (*C2 == 0 || *C2 == 1 || (DivIsSigned && C2->isAllOnesValue())) + if (C2->isNullValue() || C2->isOneValue() || + (DivIsSigned && C2->isAllOnesValue())) return nullptr; // TODO: We could do all of the computations below using APInt. @@ -2225,7 +2240,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false); } } else if (C2->isStrictlyPositive()) { // Divisor is > 0. - if (*C == 0) { // (X / pos) op 0 + if (C->isNullValue()) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); HiBound = RangeSize; @@ -2246,7 +2261,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, } else if (C2->isNegative()) { // Divisor is < 0. if (Div->isExact()) RangeSize = ConstantExpr::getNeg(RangeSize); - if (*C == 0) { // (X / neg) op 0 + if (C->isNullValue()) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) LoBound = AddOne(RangeSize); HiBound = ConstantExpr::getNeg(RangeSize); @@ -2276,7 +2291,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, default: llvm_unreachable("Unhandled icmp opcode!"); case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); @@ -2288,7 +2303,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiBound->getUniqueInteger(), DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); @@ -2302,16 +2317,16 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (LoOverflow == -1) // Low bound is less than input range. - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); return new ICmpInst(Pred, X, LoBound); case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (HiOverflow == -1) // High bound less than input range. - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); @@ -2338,15 +2353,15 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y) - if (Pred == ICmpInst::ICMP_SGT && *C == 0) + if (Pred == ICmpInst::ICMP_SGT && C->isNullValue()) return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y) - if (Pred == ICmpInst::ICMP_SLT && *C == 0) + if (Pred == ICmpInst::ICMP_SLT && C->isNullValue()) return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y) - if (Pred == ICmpInst::ICMP_SLT && *C == 1) + if (Pred == ICmpInst::ICMP_SLT && C->isOneValue()) return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); } @@ -2358,12 +2373,12 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, // iff (C2 & (C - 1)) == C - 1 and C is a power of 2 if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == (*C - 1)) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateOr(Y, *C - 1), X); + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, *C - 1), X); // C2 - Y >u C -> (Y | C) != C2 // iff C2 & C == C and C + 1 is a power of 2 if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == *C) - return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateOr(Y, *C), X); + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, *C), X); return nullptr; } @@ -2419,19 +2434,90 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, // iff C & (C2-1) == 0 // C2 is a power of 2 if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == 0) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateAnd(X, -(*C)), + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -(*C)), ConstantExpr::getNeg(cast(Y))); // X+C >u C2 -> (X & ~C2) != C // iff C & C2 == 0 // C2+1 is a power of 2 if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == 0) - return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateAnd(X, ~(*C)), + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~(*C)), ConstantExpr::getNeg(cast(Y))); return nullptr; } +bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, + Value *&RHS, ConstantInt *&Less, + ConstantInt *&Equal, + ConstantInt *&Greater) { + // TODO: Generalize this to work with other comparison idioms or ensure + // they get canonicalized into this form. + + // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32 + // Greater), where Equal, Less and Greater are placeholders for any three + // constants. + ICmpInst::Predicate PredA, PredB; + if (match(SI->getTrueValue(), m_ConstantInt(Equal)) && + match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) && + PredA == ICmpInst::ICMP_EQ && + match(SI->getFalseValue(), + m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)), + m_ConstantInt(Less), m_ConstantInt(Greater))) && + PredB == ICmpInst::ICMP_SLT) { + return true; + } + return false; +} + +Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp, + Instruction *Select, + ConstantInt *C) { + + assert(C && "Cmp RHS should be a constant int!"); + // If we're testing a constant value against the result of a three way + // comparison, the result can be expressed directly in terms of the + // original values being compared. Note: We could possibly be more + // aggressive here and remove the hasOneUse test. The original select is + // really likely to simplify or sink when we remove a test of the result. + Value *OrigLHS, *OrigRHS; + ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan; + if (Cmp.hasOneUse() && + matchThreeWayIntCompare(cast(Select), OrigLHS, OrigRHS, + C1LessThan, C2Equal, C3GreaterThan)) { + assert(C1LessThan && C2Equal && C3GreaterThan); + + bool TrueWhenLessThan = + ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C) + ->isAllOnesValue(); + bool TrueWhenEqual = + ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C) + ->isAllOnesValue(); + bool TrueWhenGreaterThan = + ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C) + ->isAllOnesValue(); + + // This generates the new instruction that will replace the original Cmp + // Instruction. Instead of enumerating the various combinations when + // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus + // false, we rely on chaining of ORs and future passes of InstCombine to + // simplify the OR further (i.e. a s< b || a == b becomes a s<= b). + + // When none of the three constants satisfy the predicate for the RHS (C), + // the entire original Cmp can be simplified to a false. + Value *Cond = Builder.getFalse(); + if (TrueWhenLessThan) + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS)); + if (TrueWhenEqual) + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS)); + if (TrueWhenGreaterThan) + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS)); + + return replaceInstUsesWith(Cmp, Cond); + } + return nullptr; +} + /// Try to fold integer comparisons with a constant operand: icmp Pred X, C /// where X is some kind of instruction. Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { @@ -2491,11 +2577,28 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { return I; } + // Match against CmpInst LHS being instructions other than binary operators. Instruction *LHSI; - if (match(Cmp.getOperand(0), m_Instruction(LHSI)) && - LHSI->getOpcode() == Instruction::Trunc) - if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C)) - return I; + if (match(Cmp.getOperand(0), m_Instruction(LHSI))) { + switch (LHSI->getOpcode()) { + case Instruction::Select: + { + // For now, we only support constant integers while folding the + // ICMP(SELECT)) pattern. We can extend this to support vector of integers + // similar to the cases handled by binary ops above. + if (ConstantInt *ConstRHS = dyn_cast(Cmp.getOperand(1))) + if (Instruction *I = foldICmpSelectConstant(Cmp, LHSI, ConstRHS)) + return I; + break; + } + case Instruction::Trunc: + if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C)) + return I; + break; + default: + break; + } + } if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, C)) return I; @@ -2521,10 +2624,10 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, switch (BO->getOpcode()) { case Instruction::SRem: // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (*C == 0 && BO->hasOneUse()) { + if (C->isNullValue() && BO->hasOneUse()) { const APInt *BOC; if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { - Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName()); + Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName()); return new ICmpInst(Pred, NewRem, Constant::getNullValue(BO->getType())); } @@ -2538,7 +2641,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, Constant *SubC = ConstantExpr::getSub(RHS, cast(BOp1)); return new ICmpInst(Pred, BOp0, SubC); } - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. if (Value *NegVal = dyn_castNegVal(BOp1)) @@ -2546,7 +2649,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(Pred, NegVal, BOp1); if (BO->hasOneUse()) { - Value *Neg = Builder->CreateNeg(BOp1); + Value *Neg = Builder.CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(Pred, BOp0, Neg); } @@ -2559,7 +2662,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // For the xor case, we can xor two constants together, eliminating // the explicit xor. return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC)); - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((xor A, B) != 0) with (A != B) return new ICmpInst(Pred, BOp0, BOp1); } @@ -2572,7 +2675,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // Replace ((sub BOC, B) != C) with (B != BOC-C). Constant *SubC = ConstantExpr::getSub(cast(BOp0), RHS); return new ICmpInst(Pred, BOp1, SubC); - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((sub A, B) != 0) with (A != B). return new ICmpInst(Pred, BOp0, BOp1); } @@ -2585,7 +2688,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // Replace (X | C) == -1 with (X & ~C) == ~C. // This removes the -1 constant. Constant *NotBOC = ConstantExpr::getNot(cast(BOp1)); - Value *And = Builder->CreateAnd(BOp0, NotBOC); + Value *And = Builder.CreateAnd(BOp0, NotBOC); return new ICmpInst(Pred, And, NotBOC); } break; @@ -2610,7 +2713,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, } // ((X & ~7) == 0) --> X < 8 - if (*C == 0 && (~(*BOC) + 1).isPowerOf2()) { + if (C->isNullValue() && (~(*BOC) + 1).isPowerOf2()) { Constant *NegBOC = ConstantExpr::getNeg(cast(BOp1)); auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(NewPred, BOp0, NegBOC); @@ -2619,9 +2722,9 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, break; } case Instruction::Mul: - if (*C == 0 && BO->hasNoSignedWrap()) { + if (C->isNullValue() && BO->hasNoSignedWrap()) { const APInt *BOC; - if (match(BOp1, m_APInt(BOC)) && *BOC != 0) { + if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) { // The trivial case (mul X, 0) is handled by InstSimplify. // General case : (mul X, C) != 0 iff X != 0 // (mul X, C) == 0 iff X == 0 @@ -2630,7 +2733,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, } break; case Instruction::UDiv: - if (*C == 0) { + if (C->isNullValue()) { // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A) auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; return new ICmpInst(NewPred, BOp1, BOp0); @@ -2649,32 +2752,35 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, if (!II || !Cmp.isEquality()) return nullptr; - // Handle icmp {eq|ne} , intcst. + // Handle icmp {eq|ne} , Constant. + Type *Ty = II->getType(); switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - Cmp.setOperand(1, Builder->getInt(C->byteSwap())); + Cmp.setOperand(1, ConstantInt::get(Ty, C->byteSwap())); return &Cmp; + case Intrinsic::ctlz: case Intrinsic::cttz: // ctz(A) == bitwidth(A) -> A == 0 and likewise for != if (*C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - Cmp.setOperand(1, ConstantInt::getNullValue(II->getType())); + Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); return &Cmp; } break; + case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != - bool IsZero = *C == 0; + bool IsZero = C->isNullValue(); if (IsZero || *C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - auto *NewOp = IsZero ? Constant::getNullValue(II->getType()) - : Constant::getAllOnesValue(II->getType()); + auto *NewOp = + IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty); Cmp.setOperand(1, NewOp); return &Cmp; } @@ -2683,6 +2789,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, default: break; } + return nullptr; } @@ -2750,11 +2857,11 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { } if (Transform) { if (!Op1) - Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, - I.getName()); + Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, + I.getName()); if (!Op2) - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, - I.getName()); + Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, + I.getName()); return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); } break; @@ -2938,12 +3045,12 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { APInt AP1Abs = C1->getValue().abs(); APInt AP2Abs = C2->getValue().abs(); if (AP1Abs.uge(AP2Abs)) { - ConstantInt *C3 = Builder->getInt(AP1 - AP2); - Value *NewAdd = Builder->CreateNSWAdd(A, C3); + ConstantInt *C3 = Builder.getInt(AP1 - AP2); + Value *NewAdd = Builder.CreateNSWAdd(A, C3); return new ICmpInst(Pred, NewAdd, C); } else { - ConstantInt *C3 = Builder->getInt(AP2 - AP1); - Value *NewAdd = Builder->CreateNSWAdd(C, C3); + ConstantInt *C3 = Builder.getInt(AP2 - AP1); + Value *NewAdd = Builder.CreateNSWAdd(C, C3); return new ICmpInst(Pred, A, NewAdd); } } @@ -3030,18 +3137,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { break; case Instruction::Add: case Instruction::Sub: - case Instruction::Xor: + case Instruction::Xor: { if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); - // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { - if (CI->getValue().isSignMask()) { + + const APInt *C; + if (match(BO0->getOperand(1), m_APInt(C))) { + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b + if (C->isSignMask()) { ICmpInst::Predicate NewPred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); } - if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) { + // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b + if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) { ICmpInst::Predicate NewPred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); NewPred = I.getSwappedPredicate(NewPred); @@ -3049,35 +3159,47 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { } } break; - case Instruction::Mul: + } + case Instruction::Mul: { if (!I.isEquality()) break; - if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get( - I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + const APInt *C; + if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() && + !C->isOneValue()) { + // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask) + // Mask = -1 >> count-trailing-zeros(C). + if (unsigned TZs = C->countTrailingZeros()) { + Constant *Mask = ConstantInt::get( + BO0->getType(), + APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); + Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } + // If there are no trailing zeros in the multiplier, just eliminate + // the multiplies (no masking is needed): + // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; + } case Instruction::UDiv: case Instruction::LShr: - if (I.isSigned()) + if (I.isSigned() || !BO0->isExact() || !BO1->isExact()) break; - LLVM_FALLTHROUGH; + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + case Instruction::SDiv: + if (!I.isEquality() || !BO0->isExact() || !BO1->isExact()) + break; + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + case Instruction::AShr: if (!BO0->isExact() || !BO1->isExact()) break; return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + case Instruction::Shl: { bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); @@ -3093,8 +3215,7 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { if (BO0) { // Transform A & (L - 1) `ult` L --> L != 0 auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes()); - auto BitwiseAnd = - m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value())); + auto BitwiseAnd = m_c_And(m_Value(), LSubOne); if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) { auto *Zero = Constant::getNullValue(BO0->getType()); @@ -3197,12 +3318,12 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { return nullptr; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + const CmpInst::Predicate Pred = I.getPredicate(); Value *A, *B, *C, *D; if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 Value *OtherVal = A == Op1 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); + return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType())); } if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { @@ -3210,28 +3331,27 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { ConstantInt *C1, *C2; if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { - Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC); - return new ICmpInst(I.getPredicate(), A, Xor); + Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue()); + Value *Xor = Builder.CreateXor(C, NC); + return new ICmpInst(Pred, A, Xor); } // A^B == A^D -> B == D if (A == C) - return new ICmpInst(I.getPredicate(), B, D); + return new ICmpInst(Pred, B, D); if (A == D) - return new ICmpInst(I.getPredicate(), B, C); + return new ICmpInst(Pred, B, C); if (B == C) - return new ICmpInst(I.getPredicate(), A, D); + return new ICmpInst(Pred, A, D); if (B == D) - return new ICmpInst(I.getPredicate(), A, C); + return new ICmpInst(Pred, A, C); } } if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { // A == (A^B) -> B == 0 Value *OtherVal = A == Op0 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); + return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType())); } // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 @@ -3258,8 +3378,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { } if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y); - Op1 = Builder->CreateAnd(Op1, Z); + Op1 = Builder.CreateXor(X, Y); + Op1 = Builder.CreateAnd(Op1, Z); I.setOperand(0, Op1); I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; @@ -3276,8 +3396,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { APInt Pow2 = Cst1->getValue() + 1; if (Pow2.isPowerOf2() && isa(A->getType()) && Pow2.logBase2() == cast(A->getType())->getBitWidth()) - return new ICmpInst(I.getPredicate(), A, - Builder->CreateTrunc(B, A->getType())); + return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType())); } // (A >> C) == (B >> C) --> (A^B) u< (1 << C) @@ -3289,12 +3408,11 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { unsigned TypeBits = Cst1->getBitWidth(); unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); if (ShAmt < TypeBits && ShAmt != 0) { - ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE - ? ICmpInst::ICMP_UGE - : ICmpInst::ICMP_ULT; - Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + ICmpInst::Predicate NewPred = + Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); - return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal)); + return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal)); } } @@ -3304,12 +3422,11 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { unsigned TypeBits = Cst1->getBitWidth(); unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); if (ShAmt < TypeBits && ShAmt != 0) { - Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); - Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal), + Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal), I.getName() + ".mask"); - return new ICmpInst(I.getPredicate(), And, - Constant::getNullValue(Cst1->getType())); + return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType())); } } @@ -3332,11 +3449,20 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { APInt CmpV = Cst1->getValue().zext(ASize); CmpV <<= ShAmt; - Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); - return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV)); + return new ICmpInst(Pred, Mask, Builder.getInt(CmpV)); } } + // If both operands are byte-swapped or bit-reversed, just compare the + // original values. + // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant() + // and handle more intrinsics. + if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) || + (match(Op0, m_BitReverse(m_Value(A))) && + match(Op1, m_BitReverse(m_Value(B))))) + return new ICmpInst(Pred, A, B); + return nullptr; } @@ -3361,7 +3487,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { RHSOp = RHSC->getOperand(0); // If the pointer types don't match, insert a bitcast. if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); + RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType()); } } else if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); @@ -3445,7 +3571,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { // We're performing an unsigned comp with a sign extended value. // This is true if the input is >= 0. [aka >s -1] Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); + Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); // Finally, return the value computed. if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) @@ -3473,7 +3599,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, // may be pointing to the compare. We want to insert the new instructions // before the add in case there are uses of the add between the add and the // compare. - Builder->SetInsertPoint(&OrigI); + Builder.SetInsertPoint(&OrigI); switch (OCF) { case OCF_INVALID: @@ -3482,11 +3608,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_UNSIGNED_ADD: { OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI); if (OR == OverflowResult::NeverOverflows) - return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(), true); if (OR == OverflowResult::AlwaysOverflows) - return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true); + return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true); // Fall through uadd into sadd LLVM_FALLTHROUGH; @@ -3494,13 +3620,13 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_SIGNED_ADD: { // X + 0 -> {X, false} if (match(RHS, m_Zero())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); // We can strength reduce this signed add into a regular add if we can prove // that it will never overflow. if (OCF == OCF_SIGNED_ADD) - if (WillNotOverflowSignedAdd(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), + if (willNotOverflowSignedAdd(LHS, RHS, OrigI)) + return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(), true); break; } @@ -3509,15 +3635,15 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_SIGNED_SUB: { // X - 0 -> {X, false} if (match(RHS, m_Zero())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); if (OCF == OCF_SIGNED_SUB) { - if (WillNotOverflowSignedSub(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), + if (willNotOverflowSignedSub(LHS, RHS, OrigI)) + return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(), true); } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), + if (willNotOverflowUnsignedSub(LHS, RHS, OrigI)) + return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(), true); } break; @@ -3526,28 +3652,28 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_UNSIGNED_MUL: { OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI); if (OR == OverflowResult::NeverOverflows) - return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(), true); if (OR == OverflowResult::AlwaysOverflows) - return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true); + return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true); LLVM_FALLTHROUGH; } case OCF_SIGNED_MUL: // X * undef -> undef if (isa(RHS)) - return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false); + return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false); // X * 0 -> {0, false} if (match(RHS, m_Zero())) - return SetResult(RHS, Builder->getFalse(), false); + return SetResult(RHS, Builder.getFalse(), false); // X * 1 -> {X, false} if (match(RHS, m_One())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); if (OCF == OCF_SIGNED_MUL) - if (WillNotOverflowSignedMul(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), + if (willNotOverflowSignedMul(LHS, RHS, OrigI)) + return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(), true); break; } @@ -3623,6 +3749,11 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, const APInt &CVal = CI->getValue(); if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) return nullptr; + } else { + // In this case we could have the operand of the binary operation + // being defined in another block, and performing the replacement + // could break the dominance relation. + return nullptr; } } else { // Other uses prohibit this transformation. @@ -3712,25 +3843,25 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, return nullptr; } - InstCombiner::BuilderTy *Builder = IC.Builder; - Builder->SetInsertPoint(MulInstr); + InstCombiner::BuilderTy &Builder = IC.Builder; + Builder.SetInsertPoint(MulInstr); // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) Value *MulA = A, *MulB = B; if (WidthA < MulWidth) - MulA = Builder->CreateZExt(A, MulType); + MulA = Builder.CreateZExt(A, MulType); if (WidthB < MulWidth) - MulB = Builder->CreateZExt(B, MulType); + MulB = Builder.CreateZExt(B, MulType); Value *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::umul_with_overflow, MulType); - CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul"); + CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul"); IC.Worklist.Add(MulInstr); // If there are uses of mul result other than the comparison, we know that // they are truncation or binary AND. Change them to use result of // mul.with.overflow and adjust properly mask/size. if (MulVal->hasNUsesOrMore(2)) { - Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); + Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value"); for (User *U : MulVal->users()) { if (U == &I || U == OtherVal) continue; @@ -3744,9 +3875,9 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) ConstantInt *CI = cast(BO->getOperand(1)); APInt ShortMask = CI->getValue().trunc(MulWidth); - Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); + Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask); Instruction *Zext = - cast(Builder->CreateZExt(ShortAnd, BO->getType())); + cast(Builder.CreateZExt(ShortAnd, BO->getType())); IC.Worklist.Add(Zext); IC.replaceInstUsesWith(*BO, Zext); } else { @@ -3783,7 +3914,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, llvm_unreachable("Unexpected predicate"); } if (Inverse) { - Value *Res = Builder->CreateExtractValue(Call, 1); + Value *Res = Builder.CreateExtractValue(Call, 1); return BinaryOperator::CreateNot(Res); } @@ -4080,7 +4211,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // Check if the LHS is 8 >>u x and the result is a power of 2 like 1. const APInt *CI; - if (Op0KnownZeroInverted == 1 && + if (Op0KnownZeroInverted.isOneValue() && match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) { // ((8 >>u X) & 1) == 0 -> X != 3 // ((8 >>u X) & 1) != 0 -> X == 3 @@ -4138,7 +4269,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Max == Op0Min + 1) // A A == C-1 if min(A)+1 == C return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Builder->getInt(CI->getValue() - 1)); + Builder.getInt(CI->getValue() - 1)); } break; case ICmpInst::ICMP_SGT: @@ -4152,7 +4283,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Min == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Builder->getInt(CI->getValue() + 1)); + Builder.getInt(CI->getValue() + 1)); } break; case ICmpInst::ICMP_SGE: @@ -4253,6 +4384,80 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne)); } +/// Integer compare with boolean values can always be turned into bitwise ops. +static Instruction *canonicalizeICmpBool(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + Value *A = I.getOperand(0), *B = I.getOperand(1); + assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only"); + + // A boolean compared to true/false can be simplified to Op0/true/false in + // 14 out of the 20 (10 predicates * 2 constants) possible combinations. + // Cases not handled by InstSimplify are always 'not' of Op0. + if (match(B, m_Zero())) { + switch (I.getPredicate()) { + case CmpInst::ICMP_EQ: // A == 0 -> !A + case CmpInst::ICMP_ULE: // A <=u 0 -> !A + case CmpInst::ICMP_SGE: // A >=s 0 -> !A + return BinaryOperator::CreateNot(A); + default: + llvm_unreachable("ICmp i1 X, C not simplified as expected."); + } + } else if (match(B, m_One())) { + switch (I.getPredicate()) { + case CmpInst::ICMP_NE: // A != 1 -> !A + case CmpInst::ICMP_ULT: // A !A + case CmpInst::ICMP_SGT: // A >s -1 -> !A + return BinaryOperator::CreateNot(A); + default: + llvm_unreachable("ICmp i1 X, C not simplified as expected."); + } + } + + switch (I.getPredicate()) { + default: + llvm_unreachable("Invalid icmp instruction!"); + case ICmpInst::ICMP_EQ: + // icmp eq i1 A, B -> ~(A ^ B) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + case ICmpInst::ICMP_NE: + // icmp ne i1 A, B -> A ^ B + return BinaryOperator::CreateXor(A, B); + + case ICmpInst::ICMP_UGT: + // icmp ugt -> icmp ult + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULT: + // icmp ult i1 A, B -> ~A & B + return BinaryOperator::CreateAnd(Builder.CreateNot(A), B); + + case ICmpInst::ICMP_SGT: + // icmp sgt -> icmp slt + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLT: + // icmp slt i1 A, B -> A & ~B + return BinaryOperator::CreateAnd(Builder.CreateNot(B), A); + + case ICmpInst::ICMP_UGE: + // icmp uge -> icmp ule + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: + // icmp ule i1 A, B -> ~A | B + return BinaryOperator::CreateOr(Builder.CreateNot(A), B); + + case ICmpInst::ICMP_SGE: + // icmp sge -> icmp sle + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: + // icmp sle i1 A, B -> A | ~B + return BinaryOperator::CreateOr(Builder.CreateNot(B), A); + } +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -4290,49 +4495,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } - Type *Ty = Op0->getType(); - - // icmp's with boolean values can always be turned into bitwise operations - if (Ty->getScalarType()->isIntegerTy(1)) { - switch (I.getPredicate()) { - default: llvm_unreachable("Invalid icmp instruction!"); - case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1, I.getName() + "tmp"); - return BinaryOperator::CreateNot(Xor); - } - case ICmpInst::ICMP_NE: // icmp ne i1 A, B -> A^B - return BinaryOperator::CreateXor(Op0, Op1); - - case ICmpInst::ICMP_UGT: - std::swap(Op0, Op1); // Change icmp ugt -> icmp ult - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp"); - return BinaryOperator::CreateAnd(Not, Op1); - } - case ICmpInst::ICMP_SGT: - std::swap(Op0, Op1); // Change icmp sgt -> icmp slt - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp"); - return BinaryOperator::CreateAnd(Not, Op0); - } - case ICmpInst::ICMP_UGE: - std::swap(Op0, Op1); // Change icmp uge -> icmp ule - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp"); - return BinaryOperator::CreateOr(Not, Op1); - } - case ICmpInst::ICMP_SGE: - std::swap(Op0, Op1); // Change icmp sge -> icmp sle - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp"); - return BinaryOperator::CreateOr(Not, Op0); - } - } - } + if (Op0->getType()->isIntOrIntVectorTy(1)) + if (Instruction *Res = canonicalizeICmpBool(I, Builder)) + return Res; if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I)) return NewICmp; @@ -4424,7 +4589,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); } else { // Otherwise, cast the RHS right before the icmp - Op1 = Builder->CreateBitCast(Op1, Op0->getType()); + Op1 = Builder.CreateBitCast(Op1, Op0->getType()); } } return new ICmpInst(I.getPredicate(), Op0, Op1); @@ -4456,18 +4621,20 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality()) - return new ICmpInst(I.getInversePredicate(), - Builder->CreateAnd(A, B), + isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality()) + return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B), Op1); - // ~x < ~y --> y < x - // ~x < cst --> ~cst < x + // ~X < ~Y --> Y < X + // ~X < C --> X > ~C if (match(Op0, m_Not(m_Value(A)))) { if (match(Op1, m_Not(m_Value(B)))) return new ICmpInst(I.getPredicate(), B, A); - if (ConstantInt *RHSC = dyn_cast(Op1)) - return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + + const APInt *C; + if (match(Op1, m_APInt(C))) + return new ICmpInst(I.getSwappedPredicate(), A, + ConstantInt::get(Op1->getType(), ~(*C))); } Instruction *AddI = nullptr; @@ -4555,10 +4722,10 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven); if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) { if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE); - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); } } @@ -4624,9 +4791,9 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, Pred = ICmpInst::ICMP_NE; break; case FCmpInst::FCMP_ORD: - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); case FCmpInst::FCMP_UNO: - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); } // Now we know that the APFloat is a normal number, zero or inf. @@ -4644,8 +4811,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } else { // If the RHS value is > UnsignedMax, fold the comparison. This handles @@ -4656,8 +4823,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } @@ -4669,8 +4836,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } else { // See if the RHS value is < UnsignedMin. @@ -4680,8 +4847,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } @@ -4703,14 +4870,14 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, switch (Pred) { default: llvm_unreachable("Unexpected integer comparison!"); case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); case ICmpInst::ICMP_ULE: // (float)int <= 4.4 --> int <= 4 // (float)int <= -4.4 --> false if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); break; case ICmpInst::ICMP_SLE: // (float)int <= 4.4 --> int <= 4 @@ -4722,7 +4889,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int < -4.4 --> false // (float)int < 4.4 --> int <= 4 if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); Pred = ICmpInst::ICMP_ULE; break; case ICmpInst::ICMP_SLT: @@ -4735,7 +4902,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int > 4.4 --> int > 4 // (float)int > -4.4 --> true if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); break; case ICmpInst::ICMP_SGT: // (float)int > 4.4 --> int > 4 @@ -4747,7 +4914,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); Pred = ICmpInst::ICMP_UGT; break; case ICmpInst::ICMP_SGE: diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineInternal.h b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineInternal.h index 3be6419a129a4..c38a4981bf1dc 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineInternal.h @@ -21,7 +21,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -29,7 +28,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" @@ -73,6 +72,39 @@ static inline unsigned getComplexity(Value *V) { return isa(V) ? (isa(V) ? 0 : 1) : 2; } +/// Predicate canonicalization reduces the number of patterns that need to be +/// matched by other transforms. For example, we may swap the operands of a +/// conditional branch or select to create a compare with a canonical (inverted) +/// predicate which is then more likely to be matched with other values. +static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) { + switch (Pred) { + case CmpInst::ICMP_NE: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + // TODO: There are 16 FCMP predicates. Should others be (not) canonical? + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_OGE: + return false; + default: + return true; + } +} + +/// Return the source operand of a potentially bitcasted value while optionally +/// checking if it has one use. If there is no bitcast or the one use check is +/// not met, return the input value itself. +static inline Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) { + if (auto *BitCast = dyn_cast(V)) + if (!OneUseOnly || BitCast->hasOneUse()) + return BitCast->getOperand(0); + + // V is not a bitcast or V has more than one use and OneUseOnly is true. + return V; +} + /// \brief Add one to a Constant static inline Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); @@ -97,11 +129,10 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { return true; // A vector of constant integers can be inverted easily. - Constant *CV; - if (V->getType()->isVectorTy() && match(V, PatternMatch::m_Constant(CV))) { + if (V->getType()->isVectorTy() && isa(V)) { unsigned NumElts = V->getType()->getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = CV->getAggregateElement(i); + Constant *Elt = cast(V)->getAggregateElement(i); if (!Elt) return false; @@ -179,7 +210,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner /// \brief An IRBuilder that automatically inserts new instructions into the /// worklist. typedef IRBuilder BuilderTy; - BuilderTy *Builder; + BuilderTy &Builder; private: // Mode in which we are running the combiner. @@ -202,7 +233,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner bool MadeIRChange; public: - InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, + InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) @@ -253,15 +284,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *visitSDiv(BinaryOperator &I); Instruction *visitFDiv(BinaryOperator &I); Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted); - Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); - Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); - Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); - Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, - Value *B, Value *C); - Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A, - Value *B, Value *C); Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); @@ -388,10 +411,27 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner bool DoTransform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI); + bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { + return computeOverflowForSignedAdd(LHS, RHS, &CxtI) == + OverflowResult::NeverOverflows; + }; + bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { + return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) == + OverflowResult::NeverOverflows; + }; + bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowUnsignedSub(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowUnsignedMul(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { + return computeOverflowForUnsignedMul(LHS, RHS, &CxtI) == + OverflowResult::NeverOverflows; + }; Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); @@ -412,6 +452,14 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction::CastOps isEliminableCastPair(const CastInst *CI1, const CastInst *CI2); + Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); + Value *foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); + Value *foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); + + Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, + bool JoinedByAnd, Instruction &CxtI); public: /// \brief Inserts an instruction \p New before instruction \p Old /// @@ -490,32 +538,44 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner return nullptr; // Don't do anything with FI } - void computeKnownBits(Value *V, KnownBits &Known, - unsigned Depth, Instruction *CxtI) const { - return llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); + void computeKnownBits(const Value *V, KnownBits &Known, + unsigned Depth, const Instruction *CxtI) const { + llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); + } + KnownBits computeKnownBits(const Value *V, unsigned Depth, + const Instruction *CxtI) const { + return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); } - bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0, - Instruction *CxtI = nullptr) const { + bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false, + unsigned Depth = 0, + const Instruction *CxtI = nullptr) { + return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT); + } + + bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); } - unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0, - Instruction *CxtI = nullptr) const { + unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } - void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - unsigned Depth = 0, Instruction *CxtI = nullptr) const { - return llvm::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI, - &DT); - } - OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS, - const Instruction *CxtI) { + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); } - OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, - const Instruction *CxtI) { + OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); } + OverflowResult computeOverflowForSignedAdd(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); + } /// Maximum size of array considered when transforming. uint64_t MaxArraySizeForCombine; @@ -536,9 +596,17 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). - Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &, - Instruction::BinaryOps, Value *, Value *, Value *, - Value *); + Value *tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *, + Value *, Value *, Value *); + + /// Match a select chain which produces one of three values based on whether + /// the LHS is less than, equal to, or greater than RHS respectively. + /// Return true if we matched a three way compare idiom. The LHS, RHS, Less, + /// Equal and Greater values are saved in the matching process and returned to + /// the caller. + bool matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, Value *&RHS, + ConstantInt *&Less, ConstantInt *&Equal, + ConstantInt *&Greater); /// \brief Attempts to replace V with a simpler value based on the demanded /// bits. @@ -568,7 +636,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner APInt &UndefElts, unsigned Depth = 0); Value *SimplifyVectorOp(BinaryOperator &Inst); - Value *SimplifyBSwap(BinaryOperator &Inst); /// Given a binary operator, cast instruction, or select which has a PHI node @@ -617,6 +684,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *foldICmpBinOp(ICmpInst &Cmp); Instruction *foldICmpEquality(ICmpInst &Cmp); + Instruction *foldICmpSelectConstant(ICmpInst &Cmp, Instruction *Select, + ConstantInt *C); Instruction *foldICmpTruncConstant(ICmpInst &Cmp, Instruction *Trunc, const APInt *C); Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And, @@ -671,7 +740,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); - Instruction *SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI); + Instruction * + SimplifyElementUnorderedAtomicMemCpy(ElementUnorderedAtomicMemCpyInst *AMI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 675553017838b..451036545741a 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -169,6 +169,18 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return nullptr; } +/// Returns true if V is dereferenceable for size of alloca. +static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, + const DataLayout &DL) { + if (AI->isArrayAllocation()) + return false; + uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); + if (!AllocaSize) + return false; + return isDereferenceableAndAlignedPointer(V, AI->getAlignment(), + APInt(64, AllocaSize), DL); +} + static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Check for array size of 1 (scalar allocation). if (!AI.isArrayAllocation()) { @@ -177,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { return nullptr; // Canonicalize it. - Value *V = IC.Builder->getInt32(1); + Value *V = IC.Builder.getInt32(1); AI.setOperand(0, V); return &AI; } @@ -185,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); + AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -217,7 +229,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // any casting is exposed early. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); + Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } @@ -390,7 +402,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT); - if (AI.getAlignment() <= SourceAlign) { + if (AI.getAlignment() <= SourceAlign && + isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) @@ -445,10 +458,10 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT SmallVector, 8> MD; LI.getAllMetadata(MD); - LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( - IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), + LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( + IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)), LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); - NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); MDBuilder MDB(NewLoad->getContext()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; @@ -476,21 +489,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT break; case LLVMContext::MD_nonnull: - // This only directly applies if the new type is also a pointer. - if (NewTy->isPointerTy()) { - NewLoad->setMetadata(ID, N); - break; - } - // If it's integral now, translate it to !range metadata. - if (NewTy->isIntegerTy()) { - auto *ITy = cast(NewTy); - auto *NullInt = ConstantExpr::getPtrToInt( - ConstantPointerNull::get(cast(Ptr->getType())), ITy); - auto *NonNullInt = - ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); - NewLoad->setMetadata(LLVMContext::MD_range, - MDB.createRange(NonNullInt, NullInt)); - } + copyNonnullMetadata(LI, N, *NewLoad); break; case LLVMContext::MD_align: case LLVMContext::MD_dereferenceable: @@ -500,17 +499,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT NewLoad->setMetadata(ID, N); break; case LLVMContext::MD_range: - // FIXME: It would be nice to propagate this in some way, but the type - // conversions make it hard. - - // If it's a pointer now and the range does not contain 0, make it !nonnull. - if (NewTy->isPointerTy()) { - unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(LI.getContext(), None); - NewLoad->setMetadata(LLVMContext::MD_nonnull, NN); - } - } + copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad); break; } } @@ -529,10 +518,10 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value SmallVector, 8> MD; SI.getAllMetadata(MD); - StoreInst *NewStore = IC.Builder->CreateAlignedStore( - V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), + StoreInst *NewStore = IC.Builder.CreateAlignedStore( + V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlignment(), SI.isVolatile()); - NewStore->setAtomic(SI.getOrdering(), SI.getSynchScope()); + NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; MDNode *N = MDPair.second; @@ -624,7 +613,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast(*UI++); - IC.Builder->SetInsertPoint(SI); + IC.Builder.SetInsertPoint(SI); combineStoreToNewValue(IC, *SI, NewLoad); IC.eraseInstFromFunction(*SI); } @@ -672,7 +661,10 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { if (NumElements == 1) { LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U), ".unpack"); - return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + AAMDNodes AAMD; + LI.getAAMetadata(AAMD); + NewLoad->setAAMetadata(AAMD); + return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( UndefValue::get(T), NewLoad, 0, Name)); } @@ -697,11 +689,15 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), - Name + ".elt"); + auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), + Name + ".elt"); auto EltAlign = MinAlign(Align, SL->getElementOffset(i)); - auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack"); - V = IC.Builder->CreateInsertValue(V, L, i); + auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack"); + // Propagate AA metadata. It'll still be valid on the narrowed load. + AAMDNodes AAMD; + LI.getAAMetadata(AAMD); + L->setAAMetadata(AAMD); + V = IC.Builder.CreateInsertValue(V, L, i); } V->setName(Name); @@ -713,7 +709,10 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { auto NumElements = AT->getNumElements(); if (NumElements == 1) { LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack"); - return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + AAMDNodes AAMD; + LI.getAAMetadata(AAMD); + NewLoad->setAAMetadata(AAMD); + return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( UndefValue::get(T), NewLoad, 0, Name)); } @@ -741,11 +740,14 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), - Name + ".elt"); - auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset), - Name + ".unpack"); - V = IC.Builder->CreateInsertValue(V, L, i); + auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), + Name + ".elt"); + auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset), + Name + ".unpack"); + AAMDNodes AAMD; + LI.getAAMetadata(AAMD); + L->setAAMetadata(AAMD); + V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; } @@ -885,10 +887,8 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, // first non-zero index. auto IsAllNonNegative = [&]() { for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) { - bool KnownNonNegative, KnownNegative; - IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative, - KnownNegative, 0, MemI); - if (KnownNonNegative) + KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI); + if (Known.isNonNegative()) continue; return false; } @@ -982,8 +982,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { combineMetadataForCSE(cast(AvailableVal), &LI); return replaceInstUsesWith( - LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), - LI.getName() + ".cast")); + LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(), + LI.getName() + ".cast")); } // None of the following transforms are legal for volatile/ordered atomic @@ -998,8 +998,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); + StoreInst *SI = new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + SI->setDebugLoc(LI.getDebugLoc()); return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); } @@ -1019,15 +1020,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) && isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) { - LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); - LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); + LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); assert(LI.isUnordered() && "implied by above"); V1->setAlignment(Align); - V1->setAtomic(LI.getOrdering(), LI.getSynchScope()); + V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); V2->setAlignment(Align); - V2->setAtomic(LI.getOrdering(), LI.getSynchScope()); + V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -1172,7 +1173,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // If the struct only have one element, we unpack. unsigned Count = ST->getNumElements(); if (Count == 1) { - V = IC.Builder->CreateExtractValue(V, 0); + V = IC.Builder.CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } @@ -1201,11 +1202,14 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), - AddrName); - auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), + AddrName); + auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); auto EltAlign = MinAlign(Align, SL->getElementOffset(i)); - IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign); + llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); + AAMDNodes AAMD; + SI.getAAMetadata(AAMD); + NS->setAAMetadata(AAMD); } return true; @@ -1215,7 +1219,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // If the array only have one element, we unpack. auto NumElements = AT->getNumElements(); if (NumElements == 1) { - V = IC.Builder->CreateExtractValue(V, 0); + V = IC.Builder.CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } @@ -1248,11 +1252,14 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), - AddrName); - auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), + AddrName); + auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); auto EltAlign = MinAlign(Align, Offset); - IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign); + Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); + AAMDNodes AAMD; + SI.getAAMetadata(AAMD); + NS->setAAMetadata(AAMD); Offset += EltSize; } @@ -1534,7 +1541,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), - SI.getSynchScope()); + SI.getSyncScopeID()); InsertNewInstBefore(NewSI, *BBI); // The debug locations of the original instructions might differ; merge them. NewSI->setDebugLoc(DILocation::getMergedLocation(SI.getDebugLoc(), diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index face9d9237ae1..e3a50220f94e2 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -39,17 +39,15 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, Value *A = nullptr, *B = nullptr, *One = nullptr; if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) && match(One, m_One())) { - A = IC.Builder->CreateSub(A, B); - return IC.Builder->CreateShl(One, A); + A = IC.Builder.CreateSub(A, B); + return IC.Builder.CreateShl(One, A); } // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it // inexact. Similarly for <<. BinaryOperator *I = dyn_cast(V); if (I && I->isLogicalShift() && - isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0, - &IC.getAssumptionCache(), &CxtI, - &IC.getDominatorTree())) { + IC.isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, &CxtI)) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) { @@ -132,8 +130,9 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { /// \brief Return true if we can prove that: /// (mul LHS, RHS) === (mul nsw LHS, RHS) -bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowSignedMul(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -162,11 +161,9 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // product is exactly the minimum negative number. // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 // For simplicity we just check if at least one side is not negative. - bool LHSNonNegative, LHSNegative; - bool RHSNonNegative, RHSNegative; - ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI); - ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI); - if (LHSNonNegative || RHSNonNegative) + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI); + if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) return true; } return false; @@ -179,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyMulInst(Op0, Op1, SQ)) + if (Value *V = SimplifyMulInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -230,8 +227,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); if (I.hasNoSignedWrap()) { - uint64_t V; - if (match(NewCst, m_ConstantInt(V)) && V != Width - 1) + const APInt *V; + if (match(NewCst, m_APInt(V)) && *V != Width - 1) Shl->setHasNoSignedWrap(); } @@ -253,9 +250,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { ConstantInt *C1; Value *Sub = nullptr; if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) - Sub = Builder->CreateSub(X, Y, "suba"); + Sub = Builder.CreateSub(X, Y, "suba"); else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) - Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); + Sub = Builder.CreateSub(Builder.CreateNeg(C1), Y, "subc"); if (Sub) return BinaryOperator::CreateMul(Sub, @@ -275,11 +272,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *X; Constant *C1; if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) { - Value *Mul = Builder->CreateMul(C1, Op1); + Value *Mul = Builder.CreateMul(C1, Op1); // Only go forward with the transform if C1*CI simplifies to a tidier // constant. if (!match(Mul, m_Mul(m_Value(), m_Value()))) - return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul); + return BinaryOperator::CreateAdd(Builder.CreateMul(X, Op1), Mul); } } } @@ -321,7 +318,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem : Instruction::SRem; - Value *Rem = Builder->CreateBinOp(RemOpc, X, DivOp1); + Value *Rem = Builder.CreateBinOp(RemOpc, X, DivOp1); if (DivOp1 == Y) return BinaryOperator::CreateSub(X, Rem); return BinaryOperator::CreateSub(Rem, X); @@ -329,7 +326,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } /// i1 mul -> i1 and. - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y @@ -371,7 +368,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } if (BoolCast) { - Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + Value *V = Builder.CreateSub(Constant::getNullValue(I.getType()), BoolCast); return BinaryOperator::CreateAnd(V, OtherOp); } @@ -386,10 +383,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType()); if (ConstantExpr::getSExt(CI, I.getType()) == Op1C && - WillNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { + willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = - Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); + Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); return new SExtInst(NewMul, I.getType()); } } @@ -403,10 +400,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Op0Conv->getOperand(0)->getType() == Op1Conv->getOperand(0)->getType() && (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) && - WillNotOverflowSignedMul(Op0Conv->getOperand(0), + willNotOverflowSignedMul(Op0Conv->getOperand(0), Op1Conv->getOperand(0), I)) { // Insert the new integer mul. - Value *NewMul = Builder->CreateNSWMul( + Value *NewMul = Builder.CreateNSWMul( Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); return new SExtInst(NewMul, I.getType()); } @@ -422,11 +419,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType()); if (ConstantExpr::getZExt(CI, I.getType()) == Op1C && - computeOverflowForUnsignedMul(Op0Conv->getOperand(0), CI, &I) == - OverflowResult::NeverOverflows) { + willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = - Builder->CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); + Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); return new ZExtInst(NewMul, I.getType()); } } @@ -440,25 +436,22 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Op0Conv->getOperand(0)->getType() == Op1Conv->getOperand(0)->getType() && (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) && - computeOverflowForUnsignedMul(Op0Conv->getOperand(0), - Op1Conv->getOperand(0), - &I) == OverflowResult::NeverOverflows) { + willNotOverflowUnsignedMul(Op0Conv->getOperand(0), + Op1Conv->getOperand(0), I)) { // Insert the new integer mul. - Value *NewMul = Builder->CreateNUWMul( + Value *NewMul = Builder.CreateNUWMul( Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); return new ZExtInst(NewMul, I.getType()); } } } - if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && - computeOverflowForUnsignedMul(Op0, Op1, &I) == - OverflowResult::NeverOverflows) { + if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -606,7 +599,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (isa(Op0)) std::swap(Op0, Op1); - if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); bool AllowReassociate = I.hasUnsafeAlgebra(); @@ -704,11 +698,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } // if pattern detected emit alternate sequence if (OpX && OpY) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(Log2->getFastMathFlags()); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(Log2->getFastMathFlags()); Log2->setArgOperand(0, OpY); - Value *FMulVal = Builder->CreateFMul(OpX, Log2); - Value *FSub = Builder->CreateFSub(FMulVal, OpX); + Value *FMulVal = Builder.CreateFMul(OpX, Log2); + Value *FSub = Builder.CreateFSub(FMulVal, OpX); FSub->takeName(&I); return replaceInstUsesWith(I, FSub); } @@ -720,23 +714,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { for (int i = 0; i < 2; i++) { bool IgnoreZeroSign = I.hasNoSignedZeros(); if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(I.getFastMathFlags()); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); // -X * -Y => X*Y if (N1) { - Value *FMul = Builder->CreateFMul(N0, N1); + Value *FMul = Builder.CreateFMul(N0, N1); FMul->takeName(&I); return replaceInstUsesWith(I, FMul); } if (Opnd0->hasOneUse()) { // -X * Y => -(X*Y) (Promote negation as high as possible) - Value *T = Builder->CreateFMul(N0, Opnd1); - Value *Neg = Builder->CreateFNeg(T); + Value *T = Builder.CreateFMul(N0, Opnd1); + Value *Neg = Builder.CreateFNeg(T); Neg->takeName(&I); return replaceInstUsesWith(I, Neg); } @@ -761,10 +755,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Y = Opnd0_0; if (Y) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(I.getFastMathFlags()); - Value *T = Builder->CreateFMul(Opnd1, Opnd1); - Value *R = Builder->CreateFMul(T, Y); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Value *T = Builder.CreateFMul(Opnd1, Opnd1); + Value *R = Builder.CreateFMul(T, Y); R->takeName(&I); return replaceInstUsesWith(I, R); } @@ -830,7 +824,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { *I = SI->getOperand(NonNullOperand); Worklist.Add(&*BBI); } else if (*I == SelectCond) { - *I = Builder->getInt1(NonNullOperand == 1); + *I = Builder.getInt1(NonNullOperand == 1); Worklist.Add(&*BBI); } } @@ -937,27 +931,25 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } - if (*C2 != 0) // avoid X udiv 0 + if (!C2->isNullValue()) // avoid X udiv 0 if (Instruction *FoldedDiv = foldOpWithConstantIntoOperand(I)) return FoldedDiv; } } if (match(Op0, m_One())) { - assert(!I.getType()->getScalarType()->isIntegerTy(1) && - "i1 divide not removed?"); + assert(!I.getType()->isIntOrIntVectorTy(1) && "i1 divide not removed?"); if (I.getOpcode() == Instruction::SDiv) { // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the // result is one, if Op1 is -1 then the result is minus one, otherwise // it's zero. - Value *Inc = Builder->CreateAdd(Op1, Op0); - Value *Cmp = Builder->CreateICmpULT( - Inc, ConstantInt::get(I.getType(), 3)); + Value *Inc = Builder.CreateAdd(Op1, Op0); + Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(I.getType(), 3)); return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); } else { // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the // result is one, otherwise it's zero. - return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType()); + return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), I.getType()); } } @@ -1032,7 +1024,7 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, // X udiv C, where C >= signbit static Instruction *foldUDivNegCst(Value *Op0, Value *Op1, const BinaryOperator &I, InstCombiner &IC) { - Value *ICI = IC.Builder->CreateICmpULT(Op0, cast(Op1)); + Value *ICI = IC.Builder.CreateICmpULT(Op0, cast(Op1)); return SelectInst::Create(ICI, Constant::getNullValue(I.getType()), ConstantInt::get(I.getType(), 1)); @@ -1051,10 +1043,9 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, if (!match(ShiftLeft, m_Shl(m_APInt(CI), m_Value(N)))) llvm_unreachable("match should never fail here!"); if (*CI != 1) - N = IC.Builder->CreateAdd(N, - ConstantInt::get(N->getType(), CI->logBase2())); + N = IC.Builder.CreateAdd(N, ConstantInt::get(N->getType(), CI->logBase2())); if (Op1 != ShiftLeft) - N = IC.Builder->CreateZExt(N, Op1->getType()); + N = IC.Builder.CreateZExt(N, Op1->getType()); BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N); if (I.isExact()) LShr->setIsExact(); @@ -1110,7 +1101,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyUDivInst(Op0, Op1, SQ)) + if (Value *V = SimplifyUDivInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1140,7 +1131,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (ZExtInst *ZOp0 = dyn_cast(Op0)) if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) return new ZExtInst( - Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()), + Builder.CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()), I.getType()); // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) @@ -1183,7 +1174,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySDivInst(Op0, Op1, SQ)) + if (Value *V = SimplifySDivInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1215,7 +1206,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Constant *NarrowDivisor = ConstantExpr::getTrunc(cast(Op1), Op0Src->getType()); - Value *NarrowOp = Builder->CreateSDiv(Op0Src, NarrowDivisor); + Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor); return new SExtInst(NarrowOp, Op0->getType()); } } @@ -1223,7 +1214,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Constant *RHS = dyn_cast(Op1)) { // X/INT_MIN -> X == INT_MIN if (RHS->isMinSignedValue()) - return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType()); + return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType()); // -X/C --> X/-C provided the negation doesn't overflow. Value *X; @@ -1245,7 +1236,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return BO; } - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -1295,7 +1286,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (isa(Op0)) @@ -1385,7 +1377,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { // (X/Y) / Z => X / (Y*Z) // if (!isa(Y) || !isa(Op1)) { - NewInst = Builder->CreateFMul(Y, Op1); + NewInst = Builder.CreateFMul(Y, Op1); if (Instruction *RI = dyn_cast(NewInst)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= cast(Op0)->getFastMathFlags(); @@ -1397,7 +1389,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { // Z / (X/Y) => Z*Y / X // if (!isa(Y) || !isa(Op0)) { - NewInst = Builder->CreateFMul(Op0, Y); + NewInst = Builder.CreateFMul(Op0, Y); if (Instruction *RI = dyn_cast(NewInst)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= cast(Op1)->getFastMathFlags(); @@ -1479,7 +1471,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyURemInst(Op0, Op1, SQ)) + if (Value *V = SimplifyURemInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *common = commonIRemTransforms(I)) @@ -1488,28 +1480,28 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // (zext A) urem (zext B) --> zext (A urem B) if (ZExtInst *ZOp0 = dyn_cast(Op0)) if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) - return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + return new ZExtInst(Builder.CreateURem(ZOp0->getOperand(0), ZOp1), I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(Op1, N1); + Value *Add = Builder.CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); } // 1 urem X -> zext(X != 1) if (match(Op0, m_One())) { - Value *Cmp = Builder->CreateICmpNE(Op1, Op0); - Value *Ext = Builder->CreateZExt(Cmp, I.getType()); + Value *Cmp = Builder.CreateICmpNE(Op1, Op0); + Value *Ext = Builder.CreateZExt(Cmp, I.getType()); return replaceInstUsesWith(I, Ext); } // X urem C -> X < C ? X : X - C, where C >= signbit. const APInt *DivisorC; if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) { - Value *Cmp = Builder->CreateICmpULT(Op0, Op1); - Value *Sub = Builder->CreateSub(Op0, Op1); + Value *Cmp = Builder.CreateICmpULT(Op0, Op1); + Value *Sub = Builder.CreateSub(Op0, Op1); return SelectInst::Create(Cmp, Op0, Sub); } @@ -1522,7 +1514,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySRemInst(Op0, Op1, SQ)) + if (Value *V = SimplifySRemInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer rem common cases @@ -1595,7 +1587,8 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle cases involving: rem X, (select Cond, Y, Z) diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombinePHI.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombinePHI.cpp index 1117c11f4f51d..0011412c2bf47 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -16,9 +16,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/IR/DebugInfo.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -636,10 +636,10 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, /// Return an existing non-zero constant if this phi node has one, otherwise /// return constant 1. static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) { - assert(isa(PN.getType()) && "Expect only intger type phi"); + assert(isa(PN.getType()) && "Expect only integer type phi"); for (Value *V : PN.operands()) if (auto *ConstVA = dyn_cast(V)) - if (!ConstVA->isZeroValue()) + if (!ConstVA->isZero()) return ConstVA; return ConstantInt::get(cast(PN.getType()), 1); } @@ -836,12 +836,12 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { } // Otherwise, do an extract in the predecessor. - Builder->SetInsertPoint(Pred->getTerminator()); + Builder.SetInsertPoint(Pred->getTerminator()); Value *Res = InVal; if (Offset) - Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(), Offset), "extract"); - Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + Res = Builder.CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); @@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, SQ)) + if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN))) return replaceInstUsesWith(PN, V); if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSelect.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSelect.cpp index 7afb8814fe52d..4eebe8255998c 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -61,12 +61,12 @@ static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF, } } -static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder, +static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy &Builder, SelectPatternFlavor SPF, Value *A, Value *B) { CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF); assert(CmpInst::isIntPredicate(Pred)); - return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B); + return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } /// We want to turn code that looks like this: @@ -167,8 +167,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, // Fold this by inserting a select from the input values. Value *NewSI = - Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName() + ".v", &SI); + Builder.CreateSelect(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName() + ".v", &SI); return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } @@ -211,8 +211,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, } // If we reach here, they do have operations in common. - Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, OtherOpF, - SI.getName() + ".v", &SI); + Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF, + SI.getName() + ".v", &SI); Value *Op0 = MatchIsOpZero ? MatchOp : NewSI; Value *Op1 = MatchIsOpZero ? NewSI : MatchOp; return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); @@ -227,8 +227,8 @@ static bool isSelect01(Constant *C1, Constant *C2) { return false; if (!C1I->isZero() && !C2I->isZero()) // One side must be zero. return false; - return C1I->isOne() || C1I->isAllOnesValue() || - C2I->isOne() || C2I->isAllOnesValue(); + return C1I->isOne() || C1I->isMinusOne() || + C2I->isOne() || C2I->isMinusOne(); } /// Try to fold the select into one of the operands to allow further @@ -254,7 +254,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); + Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *TVI_BO = cast(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), @@ -284,7 +284,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); + Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *FVI_BO = cast(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), @@ -303,7 +303,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// We want to turn: /// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) /// into: -/// (or (shl (and X, C1), C3), y) +/// (or (shl (and X, C1), C3), Y) /// iff: /// C1 and C2 are both powers of 2 /// where: @@ -315,21 +315,46 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// 3. The magnitude of C2 and C1 are flipped static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, Value *FalseVal, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { const ICmpInst *IC = dyn_cast(SI.getCondition()); - if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) + if (!IC || !SI.getType()->isIntegerTy()) return nullptr; Value *CmpLHS = IC->getOperand(0); Value *CmpRHS = IC->getOperand(1); - if (!match(CmpRHS, m_Zero())) - return nullptr; + Value *V; + unsigned C1Log; + bool IsEqualZero; + bool NeedAnd = false; + if (IC->isEquality()) { + if (!match(CmpRHS, m_Zero())) + return nullptr; + + const APInt *C1; + if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1)))) + return nullptr; + + V = CmpLHS; + C1Log = C1->logBase2(); + IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ; + } else if (IC->getPredicate() == ICmpInst::ICMP_SLT || + IC->getPredicate() == ICmpInst::ICMP_SGT) { + // We also need to recognize (icmp slt (trunc (X)), 0) and + // (icmp sgt (trunc (X)), -1). + IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT; + if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) || + (!IsEqualZero && !match(CmpRHS, m_Zero()))) + return nullptr; + + if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V))))) + return nullptr; - Value *X; - const APInt *C1; - if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) + C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1; + NeedAnd = true; + } else { return nullptr; + } const APInt *C2; bool OrOnTrueVal = false; @@ -340,26 +365,40 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, if (!OrOnFalseVal && !OrOnTrueVal) return nullptr; - Value *V = CmpLHS; Value *Y = OrOnFalseVal ? TrueVal : FalseVal; - unsigned C1Log = C1->logBase2(); unsigned C2Log = C2->logBase2(); + + bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal); + bool NeedShift = C1Log != C2Log; + bool NeedZExtTrunc = Y->getType()->getIntegerBitWidth() != + V->getType()->getIntegerBitWidth(); + + // Make sure we don't create more instructions than we save. + Value *Or = OrOnFalseVal ? FalseVal : TrueVal; + if ((NeedShift + NeedXor + NeedZExtTrunc) > + (IC->hasOneUse() + Or->hasOneUse())) + return nullptr; + + if (NeedAnd) { + // Insert the AND instruction on the input to the truncate. + APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log); + V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1)); + } + if (C2Log > C1Log) { - V = Builder->CreateZExtOrTrunc(V, Y->getType()); - V = Builder->CreateShl(V, C2Log - C1Log); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateShl(V, C2Log - C1Log); } else if (C1Log > C2Log) { - V = Builder->CreateLShr(V, C1Log - C2Log); - V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateLShr(V, C1Log - C2Log); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); } else - V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); - ICmpInst::Predicate Pred = IC->getPredicate(); - if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) || - (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal)) - V = Builder->CreateXor(V, *C2); + if (NeedXor) + V = Builder.CreateXor(V, *C2); - return Builder->CreateOr(V, Y); + return Builder.CreateOr(V, Y); } /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single @@ -375,7 +414,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, /// into: /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); @@ -410,8 +449,8 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, IntrinsicInst *NewI = cast(II->clone()); Type *Ty = NewI->getArgOperand(1)->getType(); NewI->setArgOperand(1, Constant::getNullValue(Ty)); - Builder->Insert(NewI); - return Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType()); + Builder.Insert(NewI); + return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType()); } return nullptr; @@ -558,7 +597,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *Builder)) + if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder)) return NewSel; bool Changed = adjustMinMax(SI, *ICI); @@ -578,23 +617,23 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast(CmpRHS)) { ConstantInt *C1 = nullptr, *C2 = nullptr; - if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { + if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) { C1 = dyn_cast(TrueVal); C2 = dyn_cast(FalseVal); - } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) { + } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) { C1 = dyn_cast(FalseVal); C2 = dyn_cast(TrueVal); } if (C1 && C2) { // This shift results in either -1 or 0. - Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1); + Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1); // Check if we can express the operation with a single or. - if (C2->isAllOnesValue()) - return replaceInstUsesWith(SI, Builder->CreateOr(AShr, C1)); + if (C2->isMinusOne()) + return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1)); - Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue()); - return replaceInstUsesWith(SI, Builder->CreateAdd(And, C1)); + Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue()); + return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1)); } } } @@ -645,19 +684,19 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, // (X & Y) == 0 ? X : X ^ Y --> X & ~Y if (TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateAnd(X, ~(*Y)); + V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) != 0 ? X ^ Y : X --> X & ~Y else if (!TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateAnd(X, ~(*Y)); + V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) == 0 ? X ^ Y : X --> X | Y else if (TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateOr(X, *Y); + V = Builder.CreateOr(X, *Y); // (X & Y) != 0 ? X : X ^ Y --> X | Y else if (!TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateOr(X, *Y); + V = Builder.CreateOr(X, *Y); if (V) return replaceInstUsesWith(SI, V); @@ -770,8 +809,8 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { SelectInst *SI = cast(Inner); Value *NewSI = - Builder->CreateSelect(SI->getCondition(), SI->getFalseValue(), - SI->getTrueValue(), SI->getName(), SI); + Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(), + SI->getTrueValue(), SI->getName(), SI); return replaceInstUsesWith(Outer, NewSI); } @@ -809,15 +848,15 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, IsFreeOrProfitableToInvert(B, NotB, ElidesXor) && IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) { if (!NotA) - NotA = Builder->CreateNot(A); + NotA = Builder.CreateNot(A); if (!NotB) - NotB = Builder->CreateNot(B); + NotB = Builder.CreateNot(B); if (!NotC) - NotC = Builder->CreateNot(C); + NotC = Builder.CreateNot(C); Value *NewInner = generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB); - Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern( + Value *NewOuter = Builder.CreateNot(generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC)); return replaceInstUsesWith(Outer, NewOuter); } @@ -829,9 +868,9 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, /// icmp instruction with zero, and we have an 'and' with the non-constant value /// and a power of two we can turn the select into a shift on the result of the /// 'and'. -static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, - ConstantInt *FalseVal, - InstCombiner::BuilderTy *Builder) { +static Value *foldSelectICmpAnd(const SelectInst &SI, APInt TrueVal, + APInt FalseVal, + InstCombiner::BuilderTy &Builder) { const ICmpInst *IC = dyn_cast(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) return nullptr; @@ -847,56 +886,53 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. - ConstantInt *Offset = nullptr; - if (!TrueVal->isZero() && !FalseVal->isZero()) { - if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) + APInt Offset(TrueVal.getBitWidth(), 0); + if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) { + if ((TrueVal - FalseVal).isPowerOf2()) Offset = FalseVal; - else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) + else if ((FalseVal - TrueVal).isPowerOf2()) Offset = TrueVal; else return nullptr; // Adjust TrueVal and FalseVal to the offset. - TrueVal = ConstantInt::get(Builder->getContext(), - TrueVal->getValue() - Offset->getValue()); - FalseVal = ConstantInt::get(Builder->getContext(), - FalseVal->getValue() - Offset->getValue()); + TrueVal -= Offset; + FalseVal -= Offset; } // Make sure the mask in the 'and' and one of the select arms is a power of 2. if (!AndRHS->getValue().isPowerOf2() || - (!TrueVal->getValue().isPowerOf2() && - !FalseVal->getValue().isPowerOf2())) + (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())) return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. - ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; - unsigned ValZeros = ValC->getValue().logBase2(); + const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal; + unsigned ValZeros = ValC.logBase2(); unsigned AndZeros = AndRHS->getValue().logBase2(); // If types don't match we can still convert the select by introducing a zext // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. - if (AndZeros >= ValC->getBitWidth()) + if (AndZeros >= ValC.getBitWidth()) return nullptr; - Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); + Value *V = Builder.CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) - V = Builder->CreateShl(V, ValZeros - AndZeros); + V = Builder.CreateShl(V, ValZeros - AndZeros); else if (ValZeros < AndZeros) - V = Builder->CreateLShr(V, AndZeros - ValZeros); + V = Builder.CreateLShr(V, AndZeros - ValZeros); // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. - bool ShouldNotVal = !TrueVal->isZero(); + bool ShouldNotVal = !TrueVal.isNullValue(); ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; if (ShouldNotVal) - V = Builder->CreateXor(V, ValC); + V = Builder.CreateXor(V, ValC); // Apply an offset if needed. - if (Offset) - V = Builder->CreateAdd(V, Offset); + if (!Offset.isNullValue()) + V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset)); return V; } @@ -985,7 +1021,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { // TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too. Value *X = ExtInst->getOperand(0); Type *SmallType = X->getType(); - if (!SmallType->getScalarType()->isIntegerTy(1)) + if (!SmallType->isIntOrIntVectorTy(1)) return nullptr; Constant *C; @@ -1006,7 +1042,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { // select Cond, (ext X), C --> ext(select Cond, X, C') // select Cond, C, (ext X) --> ext(select Cond, C', X) - Value *NewSel = Builder->CreateSelect(Cond, X, TruncCVal, "narrow", &Sel); + Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel); return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType); } @@ -1121,13 +1157,31 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); - if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ)) + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, + SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); if (Instruction *I = canonicalizeSelectToShuffle(SI)) return I; - if (SelType->getScalarType()->isIntegerTy(1) && + // Canonicalize a one-use integer compare with a non-canonical predicate by + // inverting the predicate and swapping the select operands. This matches a + // compare canonicalization for conditional branches. + // TODO: Should we do the same for FP compares? + CmpInst::Predicate Pred; + if (match(CondVal, m_OneUse(m_ICmp(Pred, m_Value(), m_Value()))) && + !isCanonicalPredicate(Pred)) { + // Swap true/false values and condition. + CmpInst *Cond = cast(CondVal); + Cond->setPredicate(CmpInst::getInversePredicate(Pred)); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + SI.swapProfMetadata(); + Worklist.Add(Cond); + return &SI; + } + + if (SelType->isIntOrIntVectorTy(1) && TrueVal->getType() == CondVal->getType()) { if (match(TrueVal, m_One())) { // Change: A = select B, true, C --> A = or B, C @@ -1135,7 +1189,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (match(TrueVal, m_Zero())) { // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); } if (match(FalseVal, m_Zero())) { @@ -1144,7 +1198,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (match(FalseVal, m_One())) { // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateOr(NotCond, TrueVal); } @@ -1169,7 +1223,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0> // because that may need 3 instructions to splat the condition value: // extend, insertelement, shufflevector. - if (CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { + if (SelType->isIntOrIntVectorTy() && + CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { // select C, 1, 0 -> zext C to int if (match(TrueVal, m_One()) && match(FalseVal, m_Zero())) return new ZExtInst(CondVal, SelType); @@ -1180,20 +1235,21 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select C, 0, 1 -> zext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) { - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new ZExtInst(NotCond, SelType); } // select C, 0, -1 -> sext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) { - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new SExtInst(NotCond, SelType); } } if (ConstantInt *TrueValC = dyn_cast(TrueVal)) if (ConstantInt *FalseValC = dyn_cast(FalseVal)) - if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder)) + if (Value *V = foldSelectICmpAnd(SI, TrueValC->getValue(), + FalseValC->getValue(), Builder)) return replaceInstUsesWith(SI, V); // See if we are selecting two values based on a comparison of the two values. @@ -1231,10 +1287,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->setFastMathFlags(FCI->getFastMathFlags()); - Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal, - FCI->getName() + ".inv"); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(FCI->getFastMathFlags()); + Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal, + FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); @@ -1274,10 +1330,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->setFastMathFlags(FCI->getFastMathFlags()); - Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal, - FCI->getName() + ".inv"); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(FCI->getFastMathFlags()); + Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal, + FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); @@ -1293,7 +1349,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *Result = foldSelectInstWithICmp(SI, ICI)) return Result; - if (Instruction *Add = foldAddSubSelect(SI, *Builder)) + if (Instruction *Add = foldAddSubSelect(SI, Builder)) return Add; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) @@ -1324,16 +1380,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *Cmp; if (CmpInst::isIntPredicate(Pred)) { - Cmp = Builder->CreateICmp(Pred, LHS, RHS); + Cmp = Builder.CreateICmp(Pred, LHS, RHS); } else { - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); auto FMF = cast(SI.getCondition())->getFastMathFlags(); - Builder->setFastMathFlags(FMF); - Cmp = Builder->CreateFCmp(Pred, LHS, RHS); + Builder.setFastMathFlags(FMF); + Cmp = Builder.CreateFCmp(Pred, LHS, RHS); } - Value *NewSI = Builder->CreateCast( - CastOp, Builder->CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI), + Value *NewSI = Builder.CreateCast( + CastOp, Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI), SelType); return replaceInstUsesWith(SI, NewSI); } @@ -1368,13 +1424,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value()))); if (NumberOfNots >= 2) { - Value *NewLHS = Builder->CreateNot(LHS); - Value *NewRHS = Builder->CreateNot(RHS); - Value *NewCmp = SPF == SPF_SMAX - ? Builder->CreateICmpSLT(NewLHS, NewRHS) - : Builder->CreateICmpULT(NewLHS, NewRHS); + Value *NewLHS = Builder.CreateNot(LHS); + Value *NewRHS = Builder.CreateNot(RHS); + Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS) + : Builder.CreateICmpULT(NewLHS, NewRHS); Value *NewSI = - Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS)); + Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS)); return replaceInstUsesWith(SI, NewSI); } } @@ -1404,7 +1459,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // We choose this as normal form to enable folding on the And and shortening // paths for the values (this helps GetUnderlyingObjects() for example). if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { - Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); + Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition()); SI.setOperand(0, And); SI.setOperand(1, TrueSI->getTrueValue()); return &SI; @@ -1422,7 +1477,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { - Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); + Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition()); SI.setOperand(0, Or); SI.setOperand(2, FalseSI->getFalseValue()); return &SI; @@ -1478,13 +1533,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { KnownBits Known(1); computeKnownBits(CondVal, Known, 0, &SI); - if (Known.One == 1) + if (Known.One.isOneValue()) return replaceInstUsesWith(SI, TrueVal); - if (Known.Zero == 1) + if (Known.Zero.isOneValue()) return replaceInstUsesWith(SI, FalseVal); } - if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, *Builder)) + if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder)) return BitCastSel; return nullptr; diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineShifts.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineShifts.cpp index 219effce7ba56..7ed141c7fd79d 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -44,9 +44,10 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *A; Constant *C; if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C)))) - if (isKnownNonNegative(A, DL) && isKnownNonNegative(C, DL)) + if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) && + isKnownNonNegative(C, DL, 0, &AC, &I, &DT)) return BinaryOperator::Create( - I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A); + I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A); // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. // Because shifts by negative values (which could occur if A were negative) @@ -55,8 +56,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? - Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1), - Op1->getName()); + Value *Rem = Builder.CreateAnd(A, ConstantInt::get(I.getType(), *B - 1), + Op1->getName()); I.setOperand(1, Rem); return &I; } @@ -259,9 +260,9 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // We can always evaluate constants shifted. if (Constant *C = dyn_cast(V)) { if (isLeftShift) - V = IC.Builder->CreateShl(C, NumBits); + V = IC.Builder.CreateShl(C, NumBits); else - V = IC.Builder->CreateLShr(C, NumBits); + V = IC.Builder.CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (auto *C = dyn_cast(V)) if (auto *FoldedC = @@ -288,7 +289,7 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Shl: case Instruction::LShr: return foldShiftedShift(cast(I), NumBits, isLeftShift, - *(IC.Builder)); + IC.Builder); case Instruction::Select: I->setOperand( @@ -352,7 +353,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, Constant *ShAmt = ConstantExpr::getZExt(cast(Op1), TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); + Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to @@ -374,9 +375,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, } // shift1 & 0x00FF - Value *And = Builder->CreateAnd(NSh, - ConstantInt::get(I.getContext(), MaskV), - TI->getName()); + Value *And = Builder.CreateAnd(NSh, + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); @@ -400,10 +401,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); + Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); @@ -420,11 +421,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), m_ConstantInt(CC)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, - Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); + Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } LLVM_FALLTHROUGH; @@ -436,10 +436,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); + Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); @@ -455,10 +455,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); + Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -501,7 +501,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); Value *NewShift = - Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, @@ -519,8 +519,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), SQ)) + if (Value *V = + SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *V = commonShiftTransforms(I)) @@ -539,7 +540,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { unsigned SrcWidth = X->getType()->getScalarSizeInBits(); if (ShAmt < SrcWidth && MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I)) - return new ZExtInst(Builder->CreateShl(X, ShAmt), Ty); + return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty); } // (X >>u C) << C --> X & (-1 << C) @@ -554,8 +555,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { // The inexact versions are deferred to DAGCombine, so we don't hide shl // behind a bit mask. const APInt *ShOp1; - if (match(Op0, m_CombineOr(m_Exact(m_LShr(m_Value(X), m_APInt(ShOp1))), - m_Exact(m_AShr(m_Value(X), m_APInt(ShOp1)))))) { + if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1))))) { unsigned ShrAmt = ShOp1->getZExtValue(); if (ShrAmt < ShAmt) { // If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1) @@ -618,7 +618,8 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ)) + if (Value *V = + SimplifyLShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) @@ -639,7 +640,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { // ctpop.i32(x)>>5 --> zext(x == -1) bool IsPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Ty, IsPop ? -1 : 0); - Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); + Value *Cmp = Builder.CreateICmpEQ(II->getArgOperand(0), RHS); return new ZExtInst(Cmp, Ty); } @@ -656,7 +657,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return NewLShr; } // (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2) - Value *NewLShr = Builder->CreateLShr(X, ShiftDiff, "", I.isExact()); + Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact()); APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask)); } @@ -669,7 +670,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return NewShl; } // (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2) - Value *NewShl = Builder->CreateShl(X, ShiftDiff); + Value *NewShl = Builder.CreateShl(X, ShiftDiff); APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask)); } @@ -679,6 +680,31 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } + if (match(Op0, m_SExt(m_Value(X))) && + (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { + // Are we moving the sign bit to the low bit and widening with high zeros? + unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits(); + if (ShAmt == BitWidth - 1) { + // lshr (sext i1 X to iN), N-1 --> zext X to iN + if (SrcTyBitWidth == 1) + return new ZExtInst(X, Ty); + + // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN + if (Op0->hasOneUse()) { + Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1); + return new ZExtInst(NewLShr, Ty); + } + } + + // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN + if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) { + // The new shift amount can't be more than the narrow source type. + unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1); + Value *AShr = Builder.CreateAShr(X, NewShAmt); + return new ZExtInst(AShr, Ty); + } + } + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) { unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); // Oversized shifts are simplified to zero in InstSimplify. @@ -702,7 +728,8 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ)) + if (Value *V = + SimplifyAShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 4028a92771a49..a20f474cbf40d 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -121,7 +121,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } Known.resetAll(); - if (DemandedMask == 0) // Not demanding any bits from V. + if (DemandedMask.isNullValue()) // Not demanding any bits from V. return UndefValue::get(VTy); if (Depth == 6) // Limit search depth. @@ -158,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // Output known-0 are known to be clear if zero in either the LHS | RHS. APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; @@ -192,8 +192,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // Output known-0 bits are only known if clear in both the LHS & RHS. APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; @@ -224,8 +224,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | @@ -313,8 +313,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(I, 1, DemandedMask) || @@ -325,15 +325,19 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known.One = RHSKnown.One & LHSKnown.One; Known.Zero = RHSKnown.Zero & LHSKnown.Zero; break; + case Instruction::ZExt: case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask = DemandedMask.zext(truncBf); - Known = Known.zext(truncBf); - if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth); + KnownBits InputKnown(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) return I; - DemandedMask = DemandedMask.trunc(BitWidth); - Known = Known.trunc(BitWidth); - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + Known = Known.zextOrTrunc(BitWidth); + // Any top bits are known to be zero. + if (BitWidth > SrcBitWidth) + Known.Zero.setBitsFrom(SrcBitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case Instruction::BitCast: @@ -355,56 +359,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); - break; - case Instruction::ZExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - DemandedMask = DemandedMask.trunc(SrcBitWidth); - Known = Known.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) - return I; - DemandedMask = DemandedMask.zext(BitWidth); - Known = Known.zext(BitWidth); - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); - // The top bits are known to be zero. - Known.Zero.setBitsFrom(SrcBitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; - } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt InputDemandedBits = DemandedMask & - APInt::getLowBitsSet(BitWidth, SrcBitWidth); + APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth); - APInt NewBits(APInt::getBitsSetFrom(BitWidth, SrcBitWidth)); // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. - if ((NewBits & DemandedMask) != 0) + if (DemandedMask.getActiveBits() > SrcBitWidth) InputDemandedBits.setBit(SrcBitWidth-1); - InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); - Known = Known.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, InputDemandedBits, Known, Depth + 1)) + KnownBits InputKnown(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1)) return I; - InputDemandedBits = InputDemandedBits.zext(BitWidth); - Known = Known.zext(BitWidth); - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. // If the input sign bit is known zero, or if the NewBits are not demanded // convert this into a zero extension. - if (Known.Zero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { - // Convert to ZExt cast + if (InputKnown.isNonNegative() || + DemandedMask.getActiveBits() <= SrcBitWidth) { + // Convert to ZExt cast. CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); return InsertNewInstWith(NewCast, *I); - } else if (Known.One[SrcBitWidth-1]) { // Input sign bit known set - Known.One |= NewBits; - } + } + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + Known = InputKnown.sext(BitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case Instruction::Add: @@ -433,8 +417,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // the highest demanded bit, we just return the other side. if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - // We can't do this with the LHS for subtraction. - if (I->getOpcode() == Instruction::Add && + // We can't do this with the LHS for subtraction, unless we are only + // demanding the LSB. + if ((I->getOpcode() == Instruction::Add || + DemandedFromOps.isOneValue()) && DemandedFromOps.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); } @@ -467,7 +453,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShiftAmt; Known.One <<= ShiftAmt; // low bits known zero. @@ -491,7 +477,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShiftAmt); Known.One.lshrInPlace(ShiftAmt); if (ShiftAmt) @@ -504,7 +490,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) { + if (DemandedMask.isOneValue()) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( I->getOperand(0), I->getOperand(1), I->getName()); @@ -535,7 +521,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); Known.Zero.lshrInPlace(ShiftAmt); @@ -564,7 +550,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce // INT_MIN % -1 (== undef) by accident. - if (Rem->isAllOnesValue()) + if (Rem->isMinusOne()) break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { @@ -590,7 +576,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One)) Known.One |= ~LowBits; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } } @@ -672,7 +658,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If we don't need any of low bits then return zero, // we know that DemandedMask is non-zero already. APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); - if (DemandedElts == 0) + if (DemandedElts.isNullValue()) return ConstantInt::getNullValue(VTy); // We know that the upper bits are set to zero. @@ -924,7 +910,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, return nullptr; } - if (DemandedElts == 0) { // If nothing is demanded, provide undef. + if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef. UndefElts = EltMask; return UndefValue::get(V->getType()); } @@ -1643,10 +1629,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) Args.push_back(II->getArgOperand(I)); - IRBuilderBase::InsertPointGuard Guard(*Builder); - Builder->SetInsertPoint(II); + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(II); - CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); + CallInst *NewCall = Builder.CreateCall(NewIntrin, Args); NewCall->takeName(II); NewCall->copyMetadata(*II); @@ -1673,15 +1659,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (NewNumElts == 1) { - return Builder->CreateInsertElement(UndefValue::get(V->getType()), - NewCall, static_cast(0)); + return Builder.CreateInsertElement(UndefValue::get(V->getType()), + NewCall, static_cast(0)); } SmallVector EltMask; for (unsigned I = 0; I < VWidth; ++I) EltMask.push_back(I); - Value *Shuffle = Builder->CreateShuffleVector( + Value *Shuffle = Builder.CreateShuffleVector( NewCall, UndefValue::get(NewTy), EltMask); MadeChange = true; diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 7fc6774f1849c..dd71a31b644b3 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -145,7 +145,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(), - EI.getIndexOperand(), SQ)) + EI.getIndexOperand(), + SQ.getWithInstruction(&EI))) return replaceInstUsesWith(EI, V); // If vector val is constant with all elements the same, replace EI with @@ -203,11 +204,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (I->hasOneUse() && cheapToScalarize(BO, isa(EI.getOperand(1)))) { Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); + Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); + Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), newEI0, newEI1, BO); } @@ -249,8 +250,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // Bitcasts can change the number of vector elements, and they cost // nothing. if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { - Value *EE = Builder->CreateExtractElement(CI->getOperand(0), - EI.getIndexOperand()); + Value *EE = Builder.CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); Worklist.AddValue(EE); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } @@ -268,20 +269,20 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Value *Cond = SI->getCondition(); if (Cond->getType()->isVectorTy()) { - Cond = Builder->CreateExtractElement(Cond, - EI.getIndexOperand(), - Cond->getName() + ".elt"); + Cond = Builder.CreateExtractElement(Cond, + EI.getIndexOperand(), + Cond->getName() + ".elt"); } Value *V1Elem - = Builder->CreateExtractElement(TrueVal, - EI.getIndexOperand(), - TrueVal->getName() + ".elt"); + = Builder.CreateExtractElement(TrueVal, + EI.getIndexOperand(), + TrueVal->getName() + ".elt"); Value *V2Elem - = Builder->CreateExtractElement(FalseVal, - EI.getIndexOperand(), - FalseVal->getName() + ".elt"); + = Builder.CreateExtractElement(FalseVal, + EI.getIndexOperand(), + FalseVal->getName() + ".elt"); return SelectInst::Create(Cond, V1Elem, V2Elem, @@ -440,7 +441,7 @@ static void replaceExtractElements(InsertElementInst *InsElt, if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); - NewExt->insertAfter(WideVec); + NewExt->insertAfter(OldExt); IC.replaceInstUsesWith(*OldExt, NewExt); } } @@ -836,7 +837,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) return Shuf; - if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder)) + if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder)) return NewInsElt; // Turn a sequence of inserts that broadcasts a scalar into a single @@ -1019,9 +1020,9 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { SmallVector MaskValues; for (int i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] == -1) - MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); + MaskValues.push_back(UndefValue::get(Builder.getInt32Ty())); else - MaskValues.push_back(Builder->getInt32(Mask[i])); + MaskValues.push_back(Builder.getInt32(Mask[i])); } return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), ConstantVector::get(MaskValues)); @@ -1094,7 +1095,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); return InsertElementInst::Create(V, I->getOperand(1), - Builder->getInt32(Index), "", I); + Builder.getInt32(Index), "", I); } } llvm_unreachable("failed to reorder elements of vector instruction!"); @@ -1140,8 +1141,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); - if (auto *V = - SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ)) + if (auto *V = SimplifyShuffleVectorInst( + LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI))) return replaceInstUsesWith(SVI, V); bool MadeChange = false; @@ -1274,9 +1275,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { UndefValue::get(Int32Ty)); for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); - V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(ShuffleMask), - SVI.getName() + ".extract"); + V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(ShuffleMask), + SVI.getName() + ".extract"); BegIdx = 0; } unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; @@ -1286,10 +1287,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { auto *NewBC = BCAlreadyExists ? NewBCs[CastSrcTy] - : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); + : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); if (!BCAlreadyExists) NewBCs[CastSrcTy] = NewBC; - auto *Ext = Builder->CreateExtractElement( + auto *Ext = Builder.CreateExtractElement( NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); // The shufflevector isn't being replaced: the bitcast that used it // is. InstCombine will visit the newly-created instructions. diff --git a/interpreter/llvm/src/lib/Transforms/InstCombine/InstructionCombining.cpp b/interpreter/llvm/src/lib/Transforms/InstCombine/InstructionCombining.cpp index 92acad336dc15..c7766568fd9da 100644 --- a/interpreter/llvm/src/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/interpreter/llvm/src/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -33,7 +33,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/InstCombine/InstCombine.h" #include "InstCombineInternal.h" #include "llvm-c/Initialization.h" #include "llvm/ADT/SmallPtrSet.h" @@ -62,6 +61,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -88,7 +88,7 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine")); Value *InstCombiner::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(Builder, DL, GEP); + return llvm::EmitGEPOffset(&Builder, DL, GEP); } /// Return true if it is desirable to convert an integer computation from a @@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); @@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op C". I.setOperand(0, V); I.setOperand(1, C); @@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op B". I.setOperand(0, V); I.setOperand(1, B); @@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "B op V". I.setOperand(0, B); I.setOperand(1, V); @@ -498,8 +498,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, - BinaryOperator &I, +Value *InstCombiner::tryFactorization(BinaryOperator &I, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); @@ -521,13 +520,13 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, B, D, SQ); + V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); + V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); if (V) { - SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V); + SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V); } } @@ -540,14 +539,14 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, A, C, SQ); + V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); + V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); if (V) { - SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B); + SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B); } } @@ -610,7 +609,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. if (Op0 && Op1 && LHSOpcode == RHSOpcode) - if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D)) + if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D)) return V; // The instruction has the form "(A op' B) op (C)". Try to factorize common @@ -618,7 +617,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) if (Value *V = - tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident)) + tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common @@ -626,7 +625,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) if (Value *V = - tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D)) + tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) return V; } @@ -637,15 +636,35 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); + Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I)); + // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) { - // They do! Return "L op' R". - ++NumExpand; - C = Builder->CreateBinOp(InnerOpcode, L, R); - C->takeName(&I); - return C; - } + if (L && R) { + // They do! Return "L op' R". + ++NumExpand; + C = Builder.CreateBinOp(InnerOpcode, L, R); + C->takeName(&I); + return C; + } + + // Does "A op C" simplify to the identity value for the inner opcode? + if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { + // They do! Return "B op C". + ++NumExpand; + C = Builder.CreateBinOp(TopLevelOpcode, B, C); + C->takeName(&I); + return C; + } + + // Does "B op C" simplify to the identity value for the inner opcode? + if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { + // They do! Return "A op C". + ++NumExpand; + C = Builder.CreateBinOp(TopLevelOpcode, A, C); + C->takeName(&I); + return C; + } } if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { @@ -654,15 +673,35 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' + Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I)); + Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); + // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) { - // They do! Return "L op' R". - ++NumExpand; - A = Builder->CreateBinOp(InnerOpcode, L, R); - A->takeName(&I); - return A; - } + if (L && R) { + // They do! Return "L op' R". + ++NumExpand; + A = Builder.CreateBinOp(InnerOpcode, L, R); + A->takeName(&I); + return A; + } + + // Does "A op B" simplify to the identity value for the inner opcode? + if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { + // They do! Return "A op C". + ++NumExpand; + A = Builder.CreateBinOp(TopLevelOpcode, A, C); + A->takeName(&I); + return A; + } + + // Does "A op C" simplify to the identity value for the inner opcode? + if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { + // They do! Return "A op B". + ++NumExpand; + A = Builder.CreateBinOp(TopLevelOpcode, A, B); + A->takeName(&I); + return A; + } } // (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0)) @@ -671,19 +710,21 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (auto *SI1 = dyn_cast(RHS)) { if (SI0->getCondition() == SI1->getCondition()) { Value *SI = nullptr; - if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue(), SQ)) - SI = Builder->CreateSelect(SI0->getCondition(), - Builder->CreateBinOp(TopLevelOpcode, - SI0->getTrueValue(), - SI1->getTrueValue()), - V); - if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), - SI1->getTrueValue(), SQ)) - SI = Builder->CreateSelect( + if (Value *V = + SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue(), SQ.getWithInstruction(&I))) + SI = Builder.CreateSelect(SI0->getCondition(), + Builder.CreateBinOp(TopLevelOpcode, + SI0->getTrueValue(), + SI1->getTrueValue()), + V); + if (Value *V = + SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), + SI1->getTrueValue(), SQ.getWithInstruction(&I))) + SI = Builder.CreateSelect( SI0->getCondition(), V, - Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue())); + Builder.CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue())); if (SI) { SI->takeName(&I); return SI; @@ -745,9 +786,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { } static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, - InstCombiner *IC) { + InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast(&I)) - return IC->Builder->CreateCast(Cast->getOpcode(), SO, I.getType()); + return Builder.CreateCast(Cast->getOpcode(), SO, I.getType()); assert(I.isBinaryOp() && "Unexpected opcode for select folding"); @@ -766,8 +807,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, std::swap(Op0, Op1); auto *BO = cast(&I); - Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName() + ".op"); + Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName() + ".op"); auto *FPInst = dyn_cast(RI); if (FPInst && isa(FPInst)) FPInst->copyFastMathFlags(BO); @@ -785,7 +826,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return nullptr; // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->getScalarType()->isIntegerTy(1)) + if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of @@ -819,13 +860,13 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { } } - Value *NewTV = foldOperationIntoSelectOperand(Op, TV, this); - Value *NewFV = foldOperationIntoSelectOperand(Op, FV, this); + Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); + Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, - InstCombiner *IC) { + InstCombiner::BuilderTy &Builder) { bool ConstIsRHS = isa(I->getOperand(1)); Constant *C = cast(I->getOperand(ConstIsRHS)); @@ -839,7 +880,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, if (!ConstIsRHS) std::swap(Op0, Op1); - Value *RI = IC->Builder->CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); + Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); auto *FPInst = dyn_cast(RI); if (FPInst && isa(FPInst)) FPInst->copyFastMathFlags(I); @@ -910,7 +951,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // If we are going to have to insert a new computation, do so right before the // predecessor's terminator. if (NonConstBB) - Builder->SetInsertPoint(NonConstBB->getTerminator()); + Builder.SetInsertPoint(NonConstBB->getTerminator()); // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast(&I)) { @@ -933,9 +974,19 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // `TrueVInPred`. if (InC && !isa(InC) && isa(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - else - InV = Builder->CreateSelect(PN->getIncomingValue(i), - TrueVInPred, FalseVInPred, "phitmp"); + else { + // Generate the select in the same block as PN's current incoming block. + // Note: ThisBB need not be the NonConstBB because vector constants + // which are constants by definition are handled here. + // FIXME: This can lead to an increase in IR generation because we might + // generate selects for vector constant phi operand, that could not be + // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For + // non-vector phis, this transformation was always profitable because + // the select would be generated exactly once in the NonConstBB. + Builder.SetInsertPoint(ThisBB->getTerminator()); + InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, "phitmp"); + } NewPN->addIncoming(InV, ThisBB); } } else if (CmpInst *CI = dyn_cast(&I)) { @@ -945,16 +996,17 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa(CI)) - InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); else - InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else if (auto *BO = dyn_cast(&I)) { for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), this); + Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), + Builder); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { @@ -965,8 +1017,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); else - InV = Builder->CreateCast(CI->getOpcode(), - PN->getIncomingValue(i), I.getType(), "phitmp"); + InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i), + I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } @@ -1312,8 +1364,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { /// \brief Creates node of binary operation with the same attributes as the /// specified one but with other operands. static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *B) { - Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); + InstCombiner::BuilderTy &B) { + Value *BO = B.CreateBinOp(Inst.getOpcode(), LHS, RHS); // If LHS and RHS are constant, BO won't be a binary operator. if (BinaryOperator *NewBO = dyn_cast(BO)) NewBO->copyIRFlags(&Inst); @@ -1349,7 +1401,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) { Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), RShuf->getOperand(0), Builder); - return Builder->CreateShuffleVector( + return Builder.CreateShuffleVector( NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); } @@ -1388,7 +1440,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Value *NewLHS = isa(LHS) ? C2 : Shuffle->getOperand(0); Value *NewRHS = isa(LHS) ? Shuffle->getOperand(0) : C2; Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); - return Builder->CreateShuffleVector(NewBO, + return Builder.CreateShuffleVector(NewBO, UndefValue::get(Inst.getType()), Shuffle->getMask()); } } @@ -1399,7 +1451,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ)) + if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, + SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -1435,7 +1488,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, NewIndexType, true); + *I = Builder.CreateIntCast(*I, NewIndexType, true); MadeChange = true; } } @@ -1529,10 +1582,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // set that index. PHINode *NewPN; { - IRBuilderBase::InsertPointGuard Guard(*Builder); - Builder->SetInsertPoint(PN); - NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), - PN->getNumOperands()); + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(PN); + NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); } for (auto &I : PN->operands()) @@ -1588,7 +1641,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (SO1->getType() != GO1->getType()) return nullptr; - Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ); + Value *Sum = + SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) @@ -1651,8 +1705,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // pointer arithmetic. if (match(V, m_Neg(m_PtrToInt(m_Value())))) { Operator *Index = cast(V); - Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); - Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); + Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType()); + Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1)); return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); } // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) @@ -1705,7 +1759,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // -> // %0 = GEP i8 addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* - return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType()); + return new AddrSpaceCastInst(Builder.Insert(Res), GEP.getType()); } if (ArrayType *XATy = @@ -1733,10 +1787,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector Idx(GEP.idx_begin(), GEP.idx_end()); Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP( + ? Builder.CreateInBoundsGEP( nullptr, StrippedPtr, Idx, GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, Idx, - GEP.getName()); + : Builder.CreateGEP(nullptr, StrippedPtr, Idx, + GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1754,9 +1808,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx, - GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); + ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx, + GEP.getName()) + : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1789,10 +1843,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP may not be "inbounds". Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, - GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx, - GEP.getName()); + ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()) + : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1831,10 +1885,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP( + ? Builder.CreateInBoundsGEP( SrcElTy, StrippedPtr, Off, GEP.getName()) - : Builder->CreateGEP(SrcElTy, StrippedPtr, Off, - GEP.getName()); + : Builder.CreateGEP(SrcElTy, StrippedPtr, Off, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1898,8 +1952,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices) - : Builder->CreateGEP(nullptr, Operand, NewIndices); + ? Builder.CreateInBoundsGEP(nullptr, Operand, NewIndices) + : Builder.CreateGEP(nullptr, Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return replaceInstUsesWith(GEP, NGEP); @@ -1963,6 +2017,7 @@ static bool isAllocSiteRemovable(Instruction *AI, // Give up the moment we see something we can't handle. return false; + case Instruction::AddrSpaceCast: case Instruction::BitCast: case Instruction::GetElementPtr: Users.emplace_back(I); @@ -2064,7 +2119,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { replaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), C->isFalseWhenEqual())); - } else if (isa(I) || isa(I)) { + } else if (isa(I) || isa(I) || + isa(I)) { replaceInstUsesWith(*I, UndefValue::get(I->getType())); } eraseInstFromFunction(*I); @@ -2146,8 +2202,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { // free undef -> unreachable. if (isa(Op)) { // Insert a new store to null because we cannot modify the CFG here. - Builder->CreateStore(ConstantInt::getTrue(FI.getContext()), - UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); + Builder.CreateStore(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); return eraseInstFromFunction(FI); } @@ -2180,8 +2236,7 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { // There might be assume intrinsics dominating this return that completely // determine the value. If so, constant fold it. - KnownBits Known(VTy->getPrimitiveSizeInBits()); - computeKnownBits(ResultOp, Known, 0, &RI); + KnownBits Known = computeKnownBits(ResultOp, 0, &RI); if (Known.isConstant()) RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant())); @@ -2210,37 +2265,18 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } - // Canonicalize fcmp_one -> fcmp_oeq - FCmpInst::Predicate FPred; Value *Y; - if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || - FPred == FCmpInst::FCMP_OGE) { - FCmpInst *Cond = cast(BI.getCondition()); - Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); - - // Swap Destinations and condition. - BI.swapSuccessors(); - Worklist.Add(Cond); - return &BI; - } - - // Canonicalize icmp_ne -> icmp_eq - ICmpInst::Predicate IPred; - if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || - IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || - IPred == ICmpInst::ICMP_SGE) { - ICmpInst *Cond = cast(BI.getCondition()); - Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); - // Swap Destinations and condition. - BI.swapSuccessors(); - Worklist.Add(Cond); - return &BI; - } + // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq. + CmpInst::Predicate Pred; + if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest, + FalseDest)) && + !isCanonicalPredicate(Pred)) { + // Swap destinations and condition. + CmpInst *Cond = cast(BI.getCondition()); + Cond->setPredicate(CmpInst::getInversePredicate(Pred)); + BI.swapSuccessors(); + Worklist.Add(Cond); + return &BI; + } return nullptr; } @@ -2261,9 +2297,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { return &SI; } - unsigned BitWidth = cast(Cond->getType())->getBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(Cond, Known, 0, &SI); + KnownBits Known = computeKnownBits(Cond, 0, &SI); unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); @@ -2276,15 +2310,15 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes()); } - unsigned NewWidth = BitWidth - std::max(LeadingKnownZeros, LeadingKnownOnes); + unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); // Shrink the condition operand if the new type is smaller than the old type. // This may produce a non-standard type for the switch, but that's ok because // the backend should extend back to a legal type for the target. - if (NewWidth > 0 && NewWidth < BitWidth) { + if (NewWidth > 0 && NewWidth < Known.getBitWidth()) { IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); - Builder->SetInsertPoint(&SI); - Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc"); + Builder.SetInsertPoint(&SI); + Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc"); SI.setCondition(NewCond); for (auto Case : SI.cases()) { @@ -2303,7 +2337,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); - if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ)) + if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), + SQ.getWithInstruction(&EV))) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast(Agg)) { @@ -2340,8 +2375,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). - Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.getIndices()); + Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(), + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), makeArrayRef(insi, inse)); } @@ -2416,19 +2451,25 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector Indices; // Prefix an i32 0 since we need the first element. - Indices.push_back(Builder->getInt32(0)); + Indices.push_back(Builder.getInt32(0)); for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); I != E; ++I) - Indices.push_back(Builder->getInt32(*I)); + Indices.push_back(Builder.getInt32(*I)); // We need to insert these at the location of the old load, not at that of // the extractvalue. - Builder->SetInsertPoint(L); - Value *GEP = Builder->CreateInBoundsGEP(L->getType(), - L->getPointerOperand(), Indices); + Builder.SetInsertPoint(L); + Value *GEP = Builder.CreateInBoundsGEP(L->getType(), + L->getPointerOperand(), Indices); + Instruction *NL = Builder.CreateLoad(GEP); + // Whatever aliasing information we had for the orignal load must also + // hold for the smaller load, so propagate the annotations. + AAMDNodes Nodes; + L->getAAMetadata(Nodes); + NL->setAAMetadata(Nodes); // Returning the load directly will cause the main loop to insert it in // the wrong spot, so use replaceInstUsesWith(). - return replaceInstUsesWith(EV, Builder->CreateLoad(GEP)); + return replaceInstUsesWith(EV, NL); } // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) @@ -2860,9 +2901,7 @@ bool InstCombiner::run() { // a value even when the operands are not all constants. Type *Ty = I->getType(); if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = Ty->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(I, Known, /*Depth*/0, I); + KnownBits Known = computeKnownBits(I, /*Depth*/0, I); if (Known.isConstant()) { Constant *C = ConstantInt::get(Ty, Known.getConstant()); DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << @@ -2919,8 +2958,8 @@ bool InstCombiner::run() { } // Now that we have an instruction, try combining it to simplify it. - Builder->SetInsertPoint(I); - Builder->SetCurrentDebugLocation(I->getDebugLoc()); + Builder.SetInsertPoint(I); + Builder.SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; @@ -3015,6 +3054,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, ++NumDeadInst; DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); + MadeIRChange = true; continue; } @@ -3028,6 +3068,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, ++NumConstProp; if (isInstructionTriviallyDead(Inst, TLI)) Inst->eraseFromParent(); + MadeIRChange = true; continue; } @@ -3052,7 +3093,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, } } - InstrsForInstCombineWorklist.push_back(Inst); + // Skip processing debug intrinsics in InstCombine. Processing these call instructions + // consumes non-trivial amount of time and provides no value for the optimization. + if (!isa(Inst)) + InstrsForInstCombineWorklist.push_back(Inst); } // Recursively visit successors. If this is a branch or switch on a @@ -3152,7 +3196,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); - InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines, + InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, AC, TLI, DT, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/AddressSanitizer.cpp index d718046adf741..f8d255273b2a8 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -22,9 +22,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,6 +45,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" @@ -192,6 +195,11 @@ static cl::opt ClMaxInlinePoisoningSize( static cl::opt ClUseAfterReturn("asan-use-after-return", cl::desc("Check stack-use-after-return"), cl::Hidden, cl::init(true)); +static cl::opt ClRedzoneByvalArgs("asan-redzone-byval-args", + cl::desc("Create redzones for byval " + "arguments (extra copy " + "required)"), cl::Hidden, + cl::init(true)); static cl::opt ClUseAfterScope("asan-use-after-scope", cl::desc("Check stack-use-after-scope"), cl::Hidden, cl::init(false)); @@ -613,7 +621,15 @@ class AddressSanitizerModule : public ModulePass { bool UseGlobalsGC = true) : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), Recover(Recover || ClRecover), - UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC) {} + UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), + // Not a typo: ClWithComdat is almost completely pointless without + // ClUseGlobalsGC (because then it only works on modules without + // globals, which are rare); it is a prerequisite for ClUseGlobalsGC; + // and both suffer from gold PR19002 for which UseGlobalsGC constructor + // argument is designed as workaround. Therefore, disable both + // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to + // do globals-gc. + UseCtorComdat(UseGlobalsGC && ClWithComdat) {} bool runOnModule(Module &M) override; static char ID; // Pass identification, replacement for typeid StringRef getPassName() const override { return "AddressSanitizerModule"; } @@ -656,6 +672,7 @@ class AddressSanitizerModule : public ModulePass { bool CompileKernel; bool Recover; bool UseGlobalsGC; + bool UseCtorComdat; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; @@ -738,6 +755,10 @@ struct FunctionStackPoisoner : public InstVisitor { bool runOnFunction() { if (!ClStack) return false; + + if (ClRedzoneByvalArgs && Mapping.Offset != kDynamicShadowSentinel) + copyArgsPassedByValToAllocas(); + // Collect alloca, ret, lifetime instructions etc. for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); @@ -754,6 +775,11 @@ struct FunctionStackPoisoner : public InstVisitor { return true; } + // Arguments marked with the "byval" attribute are implicitly copied without + // using an alloca instruction. To produce redzones for those arguments, we + // copy them a second time into memory allocated with an alloca instruction. + void copyArgsPassedByValToAllocas(); + // Finds all Alloca instructions and puts // poisoned red zones around all of them. // Then unpoison everything back before the function returns. @@ -1221,7 +1247,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, if (auto *Vector = dyn_cast(Mask)) { // dyn_cast as we might get UndefValue if (auto *Masked = dyn_cast(Vector->getOperand(Idx))) { - if (Masked->isNullValue()) + if (Masked->isZero()) // Mask is constant false, so no instrumentation needed. continue; // If we have a true or undef value, fall through to doInstrumentAddress @@ -1677,7 +1703,7 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, : GlobalVariable::PrivateLinkage; GlobalVariable *Metadata = new GlobalVariable( M, Initializer->getType(), false, Linkage, Initializer, - Twine("__asan_global_") + GlobalValue::getRealLinkageName(OriginalName)); + Twine("__asan_global_") + GlobalValue::dropLLVMManglingEscape(OriginalName)); Metadata->setSection(getGlobalMetadataSection()); return Metadata; } @@ -2072,7 +2098,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // Put the constructor and destructor in comdat if both // (1) global instrumentation is not TU-specific // (2) target is ELF. - if (ClWithComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { + if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, AsanCtorFunction); @@ -2519,6 +2545,28 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) { llvm_unreachable("impossible LocalStackSize"); } +void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { + BasicBlock &FirstBB = *F.begin(); + IRBuilder<> IRB(&FirstBB, FirstBB.getFirstInsertionPt()); + const DataLayout &DL = F.getParent()->getDataLayout(); + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) { + Type *Ty = Arg.getType()->getPointerElementType(); + unsigned Align = Arg.getParamAlignment(); + if (Align == 0) Align = DL.getABITypeAlignment(Ty); + + const std::string &Name = Arg.hasName() ? Arg.getName().str() : + "Arg" + llvm::to_string(Arg.getArgNo()); + AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval"); + AI->setAlignment(Align); + Arg.replaceAllUsesWith(AI); + + uint64_t AllocSize = DL.getTypeAllocSize(Ty); + IRB.CreateMemCpy(AI, &Arg, AllocSize, Align); + } + } +} + PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue, Instruction *ThenTerm, diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/BoundsChecking.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/BoundsChecking.cpp index d4c8369fa9d3b..a193efe902cf5 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetFolder.h" @@ -25,6 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" using namespace llvm; #define DEBUG_TYPE "bounds-checking" diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/CFGMST.h b/interpreter/llvm/src/lib/Transforms/Instrumentation/CFGMST.h index 3802f9fbf7dbe..16e2e6b4e7304 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/CFGMST.h +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/CFGMST.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -24,10 +27,10 @@ #include #include -namespace llvm { - #define DEBUG_TYPE "cfgmst" +namespace llvm { + /// \brief An union-find based Minimum Spanning Tree for CFG /// /// Implements a Union-find algorithm to compute Minimum Spanning Tree @@ -220,5 +223,8 @@ template class CFGMST { } }; -#undef DEBUG_TYPE // "cfgmst" } // end namespace llvm + +#undef DEBUG_TYPE // "cfgmst" + +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/CMakeLists.txt b/interpreter/llvm/src/lib/Transforms/Instrumentation/CMakeLists.txt index 7ff69b9eb7f42..f2806e278e6e1 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMInstrumentation Instrumentation.cpp InstrProfiling.cpp PGOInstrumentation.cpp + PGOMemOPSizeOpt.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp EfficiencySanitizer.cpp diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index e2e3cbdbc295b..ddc975cbed1a7 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -44,15 +44,14 @@ /// For more information, please refer to the design document: /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" @@ -63,6 +62,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/SpecialCaseList.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -1470,6 +1470,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } i = CS.arg_begin(); + const unsigned ShadowArgStart = Args.size(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back(DFSF.getShadow(*i)); @@ -1505,6 +1506,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) { CustomCI->setCallingConv(CI->getCallingConv()); CustomCI->setAttributes(CI->getAttributes()); + // Update the parameter attributes of the custom call instruction to + // zero extend the shadow parameters. This is required for targets + // which consider ShadowTy an illegal type. + for (unsigned n = 0; n < FT->getNumParams(); n++) { + const unsigned ArgNo = ShadowArgStart + n; + if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy) + CustomCI->addParamAttr(ArgNo, Attribute::ZExt); + } + if (!FT->getReturnType()->isVoidTy()) { LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca); DFSF.setShadow(CustomCI, LabelLoad); diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp index e89384c559fe0..6864d295525c3 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -18,7 +18,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -32,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 96027bc3d0a91..4089d81ea3e1b 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -56,8 +56,6 @@ using namespace llvm; STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); -STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); -STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. @@ -111,44 +109,6 @@ static cl::opt ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); -// The minimum call count to optimize memory intrinsic calls. -static cl::opt - MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, - cl::init(1000), - cl::desc("The minimum count to optimize memory " - "intrinsic calls")); - -// Command line option to disable memory intrinsic optimization. The default is -// false. This is for debug purpose. -static cl::opt DisableMemOPOPT("disable-memop-opt", cl::init(false), - cl::Hidden, cl::desc("Disable optimize")); - -// The percent threshold to optimize memory intrinsic calls. -static cl::opt - MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), - cl::Hidden, cl::ZeroOrMore, - cl::desc("The percentage threshold for the " - "memory intrinsic calls optimization")); - -// Maximum number of versions for optimizing memory intrinsic call. -static cl::opt - MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, - cl::ZeroOrMore, - cl::desc("The max version for the optimized memory " - " intrinsic calls")); - -// Scale the counts from the annotation using the BB count value. -static cl::opt - MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, - cl::desc("Scale the memop size counts using the basic " - " block count value")); - -// This option sets the rangge of precise profile memop sizes. -extern cl::opt MemOPSizeRange; - -// This option sets the value that groups large memop sizes -extern cl::opt MemOPSizeLarge; - namespace { class PGOIndirectCallPromotionLegacyPass : public ModulePass { public: @@ -173,24 +133,6 @@ class PGOIndirectCallPromotionLegacyPass : public ModulePass { // the promoted direct call. bool SamplePGO; }; - -class PGOMemOPSizeOptLegacyPass : public FunctionPass { -public: - static char ID; - - PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { - initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "PGOMemOPSize"; } - -private: - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - } -}; } // end anonymous namespace char PGOIndirectCallPromotionLegacyPass::ID = 0; @@ -204,19 +146,6 @@ ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); } -char PGOMemOPSizeOptLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) - -FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { - return new PGOMemOPSizeOptLegacyPass(); -} - namespace { // The class for main data structure to promote indirect calls to conditional // direct calls. @@ -713,7 +642,12 @@ static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) { if (DisableICP) return false; InstrProfSymtab Symtab; - Symtab.create(M, InLTO); + if (Error E = Symtab.create(M, InLTO)) { + std::string SymtabFailure = toString(std::move(E)); + DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n"); + (void)SymtabFailure; + return false; + } bool Changed = false; for (auto &F : M) { if (F.isDeclaration()) @@ -749,285 +683,3 @@ PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, return PreservedAnalyses::none(); } - -namespace { -class MemOPSizeOpt : public InstVisitor { -public: - MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) - : Func(Func), BFI(BFI), Changed(false) { - ValueDataArray = - llvm::make_unique(MemOPMaxVersion + 2); - // Get the MemOPSize range information from option MemOPSizeRange, - getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, - PreciseRangeLast); - } - bool isChanged() const { return Changed; } - void perform() { - WorkList.clear(); - visit(Func); - - for (auto &MI : WorkList) { - ++NumOfPGOMemOPAnnotate; - if (perform(MI)) { - Changed = true; - ++NumOfPGOMemOPOpt; - DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() - << "is Transformed.\n"); - } - } - } - - void visitMemIntrinsic(MemIntrinsic &MI) { - Value *Length = MI.getLength(); - // Not perform on constant length calls. - if (dyn_cast(Length)) - return; - WorkList.push_back(&MI); - } - -private: - Function &Func; - BlockFrequencyInfo &BFI; - bool Changed; - std::vector WorkList; - // Start of the previse range. - int64_t PreciseRangeStart; - // Last value of the previse range. - int64_t PreciseRangeLast; - // The space to read the profile annotation. - std::unique_ptr ValueDataArray; - bool perform(MemIntrinsic *MI); - - // This kind shows which group the value falls in. For PreciseValue, we have - // the profile count for that value. LargeGroup groups the values that are in - // range [LargeValue, +inf). NonLargeGroup groups the rest of values. - enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; - - MemOPSizeKind getMemOPSizeKind(int64_t Value) const { - if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) - return LargeGroup; - if (Value == PreciseRangeLast + 1) - return NonLargeGroup; - return PreciseValue; - } -}; - -static const char *getMIName(const MemIntrinsic *MI) { - switch (MI->getIntrinsicID()) { - case Intrinsic::memcpy: - return "memcpy"; - case Intrinsic::memmove: - return "memmove"; - case Intrinsic::memset: - return "memset"; - default: - return "unknown"; - } -} - -static bool isProfitable(uint64_t Count, uint64_t TotalCount) { - assert(Count <= TotalCount); - if (Count < MemOPCountThreshold) - return false; - if (Count < TotalCount * MemOPPercentThreshold / 100) - return false; - return true; -} - -static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, - uint64_t Denom) { - if (!MemOPScaleCount) - return Count; - bool Overflowed; - uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); - return ScaleCount / Denom; -} - -bool MemOPSizeOpt::perform(MemIntrinsic *MI) { - assert(MI); - if (MI->getIntrinsicID() == Intrinsic::memmove) - return false; - - uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; - uint64_t TotalCount; - if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, - ValueDataArray.get(), NumVals, TotalCount)) - return false; - - uint64_t ActualCount = TotalCount; - uint64_t SavedTotalCount = TotalCount; - if (MemOPScaleCount) { - auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); - if (!BBEdgeCount) - return false; - ActualCount = *BBEdgeCount; - } - - ArrayRef VDs(ValueDataArray.get(), NumVals); - DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount - << "\n"); - DEBUG( - for (auto &VD - : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); - - if (ActualCount < MemOPCountThreshold) - return false; - // Skip if the total value profiled count is 0, in which case we can't - // scale up the counts properly (and there is no profitable transformation). - if (TotalCount == 0) - return false; - - TotalCount = ActualCount; - if (MemOPScaleCount) - DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount - << " denominator = " << SavedTotalCount << "\n"); - - // Keeping track of the count of the default case: - uint64_t RemainCount = TotalCount; - SmallVector SizeIds; - SmallVector CaseCounts; - uint64_t MaxCount = 0; - unsigned Version = 0; - // Default case is in the front -- save the slot here. - CaseCounts.push_back(0); - for (auto &VD : VDs) { - int64_t V = VD.Value; - uint64_t C = VD.Count; - if (MemOPScaleCount) - C = getScaledCount(C, ActualCount, SavedTotalCount); - - // Only care precise value here. - if (getMemOPSizeKind(V) != PreciseValue) - continue; - - // ValueCounts are sorted on the count. Break at the first un-profitable - // value. - if (!isProfitable(C, RemainCount)) - break; - - SizeIds.push_back(V); - CaseCounts.push_back(C); - if (C > MaxCount) - MaxCount = C; - - assert(RemainCount >= C); - RemainCount -= C; - - if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) - break; - } - - if (Version == 0) - return false; - - CaseCounts[0] = RemainCount; - if (RemainCount > MaxCount) - MaxCount = RemainCount; - - uint64_t SumForOpt = TotalCount - RemainCount; - - DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version - << " Versions (covering " << SumForOpt << " out of " - << TotalCount << ")\n"); - - // mem_op(..., size) - // ==> - // switch (size) { - // case s1: - // mem_op(..., s1); - // goto merge_bb; - // case s2: - // mem_op(..., s2); - // goto merge_bb; - // ... - // default: - // mem_op(..., size); - // goto merge_bb; - // } - // merge_bb: - - BasicBlock *BB = MI->getParent(); - DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); - DEBUG(dbgs() << *BB << "\n"); - auto OrigBBFreq = BFI.getBlockFreq(BB); - - BasicBlock *DefaultBB = SplitBlock(BB, MI); - BasicBlock::iterator It(*MI); - ++It; - assert(It != DefaultBB->end()); - BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); - MergeBB->setName("MemOP.Merge"); - BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); - DefaultBB->setName("MemOP.Default"); - - auto &Ctx = Func.getContext(); - IRBuilder<> IRB(BB); - BB->getTerminator()->eraseFromParent(); - Value *SizeVar = MI->getLength(); - SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); - - // Clear the value profile data. - MI->setMetadata(LLVMContext::MD_prof, nullptr); - - DEBUG(dbgs() << "\n\n== Basic Block After==\n"); - - for (uint64_t SizeId : SizeIds) { - ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); - BasicBlock *CaseBB = BasicBlock::Create( - Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); - Instruction *NewInst = MI->clone(); - // Fix the argument. - dyn_cast(NewInst)->setLength(CaseSizeId); - CaseBB->getInstList().push_back(NewInst); - IRBuilder<> IRBCase(CaseBB); - IRBCase.CreateBr(MergeBB); - SI->addCase(CaseSizeId, CaseBB); - DEBUG(dbgs() << *CaseBB << "\n"); - } - setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); - - DEBUG(dbgs() << *BB << "\n"); - DEBUG(dbgs() << *DefaultBB << "\n"); - DEBUG(dbgs() << *MergeBB << "\n"); - - emitOptimizationRemark(Func.getContext(), "memop-opt", Func, - MI->getDebugLoc(), - Twine("optimize ") + getMIName(MI) + " with count " + - Twine(SumForOpt) + " out of " + Twine(TotalCount) + - " for " + Twine(Version) + " versions"); - - return true; -} -} // namespace - -static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { - if (DisableMemOPOPT) - return false; - - if (F.hasFnAttribute(Attribute::OptimizeForSize)) - return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI); - MemOPSizeOpt.perform(); - return MemOPSizeOpt.isChanged(); -} - -bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis().getBFI(); - return PGOMemOPSizeOptImpl(F, BFI); -} - -namespace llvm { -char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; - -PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, - FunctionAnalysisManager &FAM) { - auto &BFI = FAM.getResult(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI); - if (!Changed) - return PreservedAnalyses::all(); - auto PA = PreservedAnalyses(); - PA.preserve(); - return PA; -} -} // namespace llvm diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/InstrProfiling.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/InstrProfiling.cpp index 9a82532d7703f..db8fa89779479 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -19,19 +19,21 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" @@ -40,7 +42,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include #include #include @@ -92,6 +97,46 @@ cl::opt NumCountersPerValueSite( // is usually smaller than 2. cl::init(1.0)); +cl::opt AtomicCounterUpdatePromoted( + "atomic-counter-update-promoted", cl::ZeroOrMore, + cl::desc("Do counter update using atomic fetch add " + " for promoted counters only"), + cl::init(false)); + +// If the option is not specified, the default behavior about whether +// counter promotion is done depends on how instrumentaiton lowering +// pipeline is setup, i.e., the default value of true of this option +// does not mean the promotion will be done by default. Explicitly +// setting this option can override the default behavior. +cl::opt DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore, + cl::desc("Do counter register promotion"), + cl::init(false)); +cl::opt MaxNumOfPromotionsPerLoop( + cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20), + cl::desc("Max number counter promotions per loop to avoid" + " increasing register pressure too much")); + +// A debug option +cl::opt + MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1), + cl::desc("Max number of allowed counter promotions")); + +cl::opt SpeculativeCounterPromotionMaxExiting( + cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3), + cl::desc("The max number of exiting blocks of a loop to allow " + " speculative counter promotion")); + +cl::opt SpeculativeCounterPromotionToLoop( + cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false), + cl::desc("When the option is false, if the target block is in a loop, " + "the promotion will be disallowed unless the promoted counter " + " update can be further/iteratively promoted into an acyclic " + " region.")); + +cl::opt IterativeCounterPromotion( + cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true), + cl::desc("Allow counter promotion across the whole loop nest.")); + class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -116,6 +161,183 @@ class InstrProfilingLegacyPass : public ModulePass { } }; +/// +/// A helper class to promote one counter RMW operation in the loop +/// into register update. +/// +/// RWM update for the counter will be sinked out of the loop after +/// the transformation. +/// +class PGOCounterPromoterHelper : public LoadAndStorePromoter { +public: + PGOCounterPromoterHelper( + Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init, + BasicBlock *PH, ArrayRef ExitBlocks, + ArrayRef InsertPts, + DenseMap> &LoopToCands, + LoopInfo &LI) + : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), + InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) { + assert(isa(L)); + assert(isa(S)); + SSA.AddAvailableValue(PH, Init); + } + + void doExtraRewritesBeforeFinalDeletion() const override { + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + Instruction *InsertPos = InsertPts[i]; + // Get LiveIn value into the ExitBlock. If there are multiple + // predecessors, the value is defined by a PHI node in this + // block. + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Value *Addr = cast(Store)->getPointerOperand(); + IRBuilder<> Builder(InsertPos); + if (AtomicCounterUpdatePromoted) + // automic update currently can only be promoted across the current + // loop, not the whole loop nest. + Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, + AtomicOrdering::SequentiallyConsistent); + else { + LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted"); + auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); + auto *NewStore = Builder.CreateStore(NewVal, Addr); + + // Now update the parent loop's candidate list: + if (IterativeCounterPromotion) { + auto *TargetLoop = LI.getLoopFor(ExitBlock); + if (TargetLoop) + LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore); + } + } + } + } + +private: + Instruction *Store; + ArrayRef ExitBlocks; + ArrayRef InsertPts; + DenseMap> &LoopToCandidates; + LoopInfo &LI; +}; + +/// A helper class to do register promotion for all profile counter +/// updates in a loop. +/// +class PGOCounterPromoter { +public: + PGOCounterPromoter( + DenseMap> &LoopToCands, + Loop &CurLoop, LoopInfo &LI) + : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), + LI(LI) { + + SmallVector LoopExitBlocks; + SmallPtrSet BlockSet; + L.getExitBlocks(LoopExitBlocks); + + for (BasicBlock *ExitBlock : LoopExitBlocks) { + if (BlockSet.insert(ExitBlock).second) { + ExitBlocks.push_back(ExitBlock); + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + } + } + } + + bool run(int64_t *NumPromoted) { + unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); + if (MaxProm == 0) + return false; + + unsigned Promoted = 0; + for (auto &Cand : LoopToCandidates[&L]) { + + SmallVector NewPHIs; + SSAUpdater SSA(&NewPHIs); + Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, + L.getLoopPreheader(), ExitBlocks, + InsertPts, LoopToCandidates, LI); + Promoter.run(SmallVector({Cand.first, Cand.second})); + Promoted++; + if (Promoted >= MaxProm) + break; + + (*NumPromoted)++; + if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) + break; + } + + DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" + << L.getLoopDepth() << ")\n"); + return Promoted != 0; + } + +private: + bool allowSpeculativeCounterPromotion(Loop *LP) { + SmallVector ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return true; + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return false; + return true; + } + + // Returns the max number of Counter Promotions for LP. + unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { + // We can't insert into a catchswitch. + SmallVector LoopExitBlocks; + LP->getExitBlocks(LoopExitBlocks); + if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { + return isa(Exit->getTerminator()); + })) + return 0; + + if (!LP->hasDedicatedExits()) + return 0; + + BasicBlock *PH = LP->getLoopPreheader(); + if (!PH) + return 0; + + SmallVector ExitingBlocks; + LP->getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return MaxNumOfPromotionsPerLoop; + + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return 0; + + // Whether the target block is in a loop does not matter: + if (SpeculativeCounterPromotionToLoop) + return MaxNumOfPromotionsPerLoop; + + // Now check the target block: + unsigned MaxProm = MaxNumOfPromotionsPerLoop; + for (auto *TargetBlock : LoopExitBlocks) { + auto *TargetLoop = LI.getLoopFor(TargetBlock); + if (!TargetLoop) + continue; + unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop); + unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size(); + MaxProm = + std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) - + PendingCandsInTarget); + } + return MaxProm; + } + + DenseMap> &LoopToCandidates; + SmallVector ExitBlocks; + SmallVector InsertPts; + Loop &L; + LoopInfo &LI; +}; + } // end anonymous namespace PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { @@ -147,6 +369,65 @@ static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { return dyn_cast(Instr); } +bool InstrProfiling::lowerIntrinsics(Function *F) { + bool MadeChange = false; + PromotionCandidates.clear(); + for (BasicBlock &BB : *F) { + for (auto I = BB.begin(), E = BB.end(); I != E;) { + auto Instr = I++; + InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); + if (Inc) { + lowerIncrement(Inc); + MadeChange = true; + } else if (auto *Ind = dyn_cast(Instr)) { + lowerValueProfileInst(Ind); + MadeChange = true; + } + } + } + + if (!MadeChange) + return false; + + promoteCounterLoadStores(F); + return true; +} + +bool InstrProfiling::isCounterPromotionEnabled() const { + if (DoCounterPromotion.getNumOccurrences() > 0) + return DoCounterPromotion; + + return Options.DoCounterPromotion; +} + +void InstrProfiling::promoteCounterLoadStores(Function *F) { + if (!isCounterPromotionEnabled()) + return; + + DominatorTree DT(*F); + LoopInfo LI(DT); + DenseMap> LoopPromotionCandidates; + + for (const auto &LoadStore : PromotionCandidates) { + auto *CounterLoad = LoadStore.first; + auto *CounterStore = LoadStore.second; + BasicBlock *BB = CounterLoad->getParent(); + Loop *ParentLoop = LI.getLoopFor(BB); + if (!ParentLoop) + continue; + LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore); + } + + SmallVector Loops = LI.getLoopsInPreorder(); + + // Do a post-order traversal of the loops so that counter updates can be + // iteratively hoisted outside the loop nest. + for (auto *Loop : llvm::reverse(Loops)) { + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); + Promoter.run(&TotalCountersPromoted); + } +} + bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { bool MadeChange = false; @@ -179,18 +460,7 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { } for (Function &F : M) - for (BasicBlock &BB : F) - for (auto I = BB.begin(), E = BB.end(); I != E;) { - auto Instr = I++; - InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); - if (Inc) { - lowerIncrement(Inc); - MadeChange = true; - } else if (auto *Ind = dyn_cast(Instr)) { - lowerValueProfileInst(Ind); - MadeChange = true; - } - } + MadeChange |= lowerIntrinsics(&F); if (GlobalVariable *CoverageNamesVar = M.getNamedGlobal(getCoverageUnusedNamesVarName())) { @@ -303,9 +573,12 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { IRBuilder<> Builder(Inc); uint64_t Index = Inc->getIndex()->getZExtValue(); Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); - Value *Count = Builder.CreateLoad(Addr, "pgocount"); - Count = Builder.CreateAdd(Count, Inc->getStep()); - Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr)); + Value *Load = Builder.CreateLoad(Addr, "pgocount"); + auto *Count = Builder.CreateAdd(Load, Inc->getStep()); + auto *Store = Builder.CreateStore(Count, Addr); + Inc->replaceAllUsesWith(Store); + if (isCounterPromotionEnabled()) + PromotionCandidates.emplace_back(cast(Load), Store); Inc->eraseFromParent(); } @@ -343,14 +616,24 @@ static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { static inline bool shouldRecordFunctionAddr(Function *F) { // Check the linkage + bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage(); if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && - !F->hasAvailableExternallyLinkage()) + !HasAvailableExternallyLinkage) return true; + + // A function marked 'alwaysinline' with available_externally linkage can't + // have its address taken. Doing so would create an undefined external ref to + // the function, which would fail to link. + if (HasAvailableExternallyLinkage && + F->hasFnAttribute(Attribute::AlwaysInline)) + return false; + // Prohibit function address recording if the function is both internal and // COMDAT. This avoids the profile data variable referencing internal symbols // in COMDAT. if (F->hasLocalLinkage() && F->hasComdat()) return false; + // Check uses of this function for other than direct calls or invokes to it. // Inline virtual functions have linkeOnceODR linkage. When a key method // exists, the vtable will only be emitted in the TU where the key method diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/interpreter/llvm/src/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 363539b2886f3..4eb758c69c581 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H -#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/IR/BasicBlock.h" @@ -108,4 +108,4 @@ namespace llvm { } // End llvm namespace -#endif +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ff753c20a94af..b7c6271869cd5 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2087,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor { switch (I.getNumArgOperands()) { case 3: assert(isa(I.getArgOperand(2)) && "Invalid rounding mode"); + LLVM_FALLTHROUGH; case 2: CopyOp = I.getArgOperand(0); ConvertOp = I.getArgOperand(1); @@ -2917,8 +2918,11 @@ struct MemorySanitizerVisitor : public InstVisitor { if (ClDumpStrictInstructions) dumpInst(I); DEBUG(dbgs() << "DEFAULT: " << I << "\n"); - for (size_t i = 0, n = I.getNumOperands(); i < n; i++) - insertShadowCheck(I.getOperand(i), &I); + for (size_t i = 0, n = I.getNumOperands(); i < n; i++) { + Value *Operand = I.getOperand(i); + if (Operand->getType()->isSized()) + insertShadowCheck(Operand, &I); + } setShadow(&I, getCleanShadow(&I)); setOrigin(&I, getCleanOrigin()); } @@ -3035,7 +3039,7 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVAStartInst(VAStartInst &I) override { - if (F.getCallingConv() == CallingConv::X86_64_Win64) + if (F.getCallingConv() == CallingConv::Win64) return; IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); @@ -3049,7 +3053,7 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVACopyInst(VACopyInst &I) override { - if (F.getCallingConv() == CallingConv::X86_64_Win64) + if (F.getCallingConv() == CallingConv::Win64) return; IRBuilder<> IRB(&I); Value *VAListTag = I.getArgOperand(0); diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 1af40c3b78464..8e4bfc0b91bc5 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -180,7 +180,15 @@ static cl::opt static cl::opt PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " - "memory instrinsic size profiling.")); + "memory intrinsic size profiling.")); + +// Emit branch probability as optimization remarks. +static cl::opt + EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, + cl::desc("When this option is on, the annotated " + "branch probability will be emitted as " + " optimization remarks: -Rpass-analysis=" + "pgo-instr-use")); // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -192,6 +200,39 @@ extern cl::opt ViewBlockFreqFuncName; namespace { +// Return a string describing the branch condition that can be +// used in static branch probability heuristics: +std::string getBranchCondString(Instruction *TI) { + BranchInst *BI = dyn_cast(TI); + if (!BI || !BI->isConditional()) + return std::string(); + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast(Cond); + if (!CI) + return std::string(); + + std::string result; + raw_string_ostream OS(result); + OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; + CI->getOperand(0)->getType()->print(OS, true); + + Value *RHS = CI->getOperand(1); + ConstantInt *CV = dyn_cast(RHS); + if (CV) { + if (CV->isZero()) + OS << "_Zero"; + else if (CV->isOne()) + OS << "_One"; + else if (CV->isMinusOne()) + OS << "_MinusOne"; + else + OS << "_Const"; + } + OS.flush(); + return result; +} + /// The select instruction visitor plays three roles specified /// by the mode. In \c VM_counting mode, it simply counts the number of /// select instructions. In \c VM_instrument mode, it inserts code to count @@ -319,7 +360,7 @@ class PGOInstrumentationUseLegacyPass : public ModulePass { PGOInstrumentationUseLegacyPass(std::string Filename = "") : ModulePass(ID), ProfileFileName(std::move(Filename)) { if (!PGOTestProfileFile.empty()) - ProfileFileName = PGOTestProfileFile.getValue(); + ProfileFileName = PGOTestProfileFile; initializePGOInstrumentationUseLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -1136,7 +1177,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FuncHash), Builder.CreatePtrToInt(Length, Int64Ty), + Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); ++CurCtrId; } @@ -1375,7 +1416,7 @@ static bool annotateAllFunctions( PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename) : ProfileFileName(std::move(Filename)) { if (!PGOTestProfileFile.empty()) - ProfileFileName = PGOTestProfileFile.getValue(); + ProfileFileName = PGOTestProfileFile; } PreservedAnalyses PGOInstrumentationUse::run(Module &M, @@ -1424,6 +1465,29 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts, for (const auto &W : Weights) { dbgs() << W << " "; } dbgs() << "\n";); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + if (EmitBranchProbability) { + std::string BrCondStr = getBranchCondString(TI); + if (BrCondStr.empty()) + return; + + unsigned WSum = + std::accumulate(Weights.begin(), Weights.end(), 0, + [](unsigned w1, unsigned w2) { return w1 + w2; }); + uint64_t TotalCount = + std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0, + [](uint64_t c1, uint64_t c2) { return c1 + c2; }); + BranchProbability BP(Weights[0], WSum); + std::string BranchProbStr; + raw_string_ostream OS(BranchProbStr); + OS << BP; + OS << " (total count : " << TotalCount << ")"; + OS.flush(); + Function *F = TI->getParent()->getParent(); + emitOptimizationRemarkAnalysis( + F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(), + Twine(BrCondStr) + + " is true with probability : " + Twine(BranchProbStr)); + } } template <> struct GraphTraits { diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp new file mode 100644 index 0000000000000..0bc9ddfbe4d33 --- /dev/null +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -0,0 +1,419 @@ +//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the transformation that optimizes memory intrinsics +// such as memcpy using the size value profile. When memory intrinsic size +// value profile metadata is available, a single memory intrinsic is expanded +// to a sequence of guarded specialized versions that are called with the +// hottest size(s), for later expansion into more optimal inline sequences. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/PGOInstrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "pgo-memop-opt" + +STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); +STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); + +// The minimum call count to optimize memory intrinsic calls. +static cl::opt + MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to optimize memory " + "intrinsic calls")); + +// Command line option to disable memory intrinsic optimization. The default is +// false. This is for debug purpose. +static cl::opt DisableMemOPOPT("disable-memop-opt", cl::init(false), + cl::Hidden, cl::desc("Disable optimize")); + +// The percent threshold to optimize memory intrinsic calls. +static cl::opt + MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), + cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold for the " + "memory intrinsic calls optimization")); + +// Maximum number of versions for optimizing memory intrinsic call. +static cl::opt + MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The max version for the optimized memory " + " intrinsic calls")); + +// Scale the counts from the annotation using the BB count value. +static cl::opt + MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, + cl::desc("Scale the memop size counts using the basic " + " block count value")); + +// This option sets the rangge of precise profile memop sizes. +extern cl::opt MemOPSizeRange; + +// This option sets the value that groups large memop sizes +extern cl::opt MemOPSizeLarge; + +namespace { +class PGOMemOPSizeOptLegacyPass : public FunctionPass { +public: + static char ID; + + PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { + initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "PGOMemOPSize"; } + +private: + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + } +}; +} // end anonymous namespace + +char PGOMemOPSizeOptLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) + +FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { + return new PGOMemOPSizeOptLegacyPass(); +} + +namespace { +class MemOPSizeOpt : public InstVisitor { +public: + MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) + : Func(Func), BFI(BFI), Changed(false) { + ValueDataArray = + llvm::make_unique(MemOPMaxVersion + 2); + // Get the MemOPSize range information from option MemOPSizeRange, + getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, + PreciseRangeLast); + } + bool isChanged() const { return Changed; } + void perform() { + WorkList.clear(); + visit(Func); + + for (auto &MI : WorkList) { + ++NumOfPGOMemOPAnnotate; + if (perform(MI)) { + Changed = true; + ++NumOfPGOMemOPOpt; + DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() + << "is Transformed.\n"); + } + } + } + + void visitMemIntrinsic(MemIntrinsic &MI) { + Value *Length = MI.getLength(); + // Not perform on constant length calls. + if (dyn_cast(Length)) + return; + WorkList.push_back(&MI); + } + +private: + Function &Func; + BlockFrequencyInfo &BFI; + bool Changed; + std::vector WorkList; + // Start of the previse range. + int64_t PreciseRangeStart; + // Last value of the previse range. + int64_t PreciseRangeLast; + // The space to read the profile annotation. + std::unique_ptr ValueDataArray; + bool perform(MemIntrinsic *MI); + + // This kind shows which group the value falls in. For PreciseValue, we have + // the profile count for that value. LargeGroup groups the values that are in + // range [LargeValue, +inf). NonLargeGroup groups the rest of values. + enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; + + MemOPSizeKind getMemOPSizeKind(int64_t Value) const { + if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) + return LargeGroup; + if (Value == PreciseRangeLast + 1) + return NonLargeGroup; + return PreciseValue; + } +}; + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +static bool isProfitable(uint64_t Count, uint64_t TotalCount) { + assert(Count <= TotalCount); + if (Count < MemOPCountThreshold) + return false; + if (Count < TotalCount * MemOPPercentThreshold / 100) + return false; + return true; +} + +static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, + uint64_t Denom) { + if (!MemOPScaleCount) + return Count; + bool Overflowed; + uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); + return ScaleCount / Denom; +} + +bool MemOPSizeOpt::perform(MemIntrinsic *MI) { + assert(MI); + if (MI->getIntrinsicID() == Intrinsic::memmove) + return false; + + uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; + uint64_t TotalCount; + if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount)) + return false; + + uint64_t ActualCount = TotalCount; + uint64_t SavedTotalCount = TotalCount; + if (MemOPScaleCount) { + auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + if (!BBEdgeCount) + return false; + ActualCount = *BBEdgeCount; + } + + ArrayRef VDs(ValueDataArray.get(), NumVals); + DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount + << "\n"); + DEBUG( + for (auto &VD + : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); + + if (ActualCount < MemOPCountThreshold) + return false; + // Skip if the total value profiled count is 0, in which case we can't + // scale up the counts properly (and there is no profitable transformation). + if (TotalCount == 0) + return false; + + TotalCount = ActualCount; + if (MemOPScaleCount) + DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount + << " denominator = " << SavedTotalCount << "\n"); + + // Keeping track of the count of the default case: + uint64_t RemainCount = TotalCount; + uint64_t SavedRemainCount = SavedTotalCount; + SmallVector SizeIds; + SmallVector CaseCounts; + uint64_t MaxCount = 0; + unsigned Version = 0; + // Default case is in the front -- save the slot here. + CaseCounts.push_back(0); + for (auto &VD : VDs) { + int64_t V = VD.Value; + uint64_t C = VD.Count; + if (MemOPScaleCount) + C = getScaledCount(C, ActualCount, SavedTotalCount); + + // Only care precise value here. + if (getMemOPSizeKind(V) != PreciseValue) + continue; + + // ValueCounts are sorted on the count. Break at the first un-profitable + // value. + if (!isProfitable(C, RemainCount)) + break; + + SizeIds.push_back(V); + CaseCounts.push_back(C); + if (C > MaxCount) + MaxCount = C; + + assert(RemainCount >= C); + RemainCount -= C; + assert(SavedRemainCount >= VD.Count); + SavedRemainCount -= VD.Count; + + if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) + break; + } + + if (Version == 0) + return false; + + CaseCounts[0] = RemainCount; + if (RemainCount > MaxCount) + MaxCount = RemainCount; + + uint64_t SumForOpt = TotalCount - RemainCount; + + DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version + << " Versions (covering " << SumForOpt << " out of " + << TotalCount << ")\n"); + + // mem_op(..., size) + // ==> + // switch (size) { + // case s1: + // mem_op(..., s1); + // goto merge_bb; + // case s2: + // mem_op(..., s2); + // goto merge_bb; + // ... + // default: + // mem_op(..., size); + // goto merge_bb; + // } + // merge_bb: + + BasicBlock *BB = MI->getParent(); + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + auto OrigBBFreq = BFI.getBlockFreq(BB); + + BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock::iterator It(*MI); + ++It; + assert(It != DefaultBB->end()); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + MergeBB->setName("MemOP.Merge"); + BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); + DefaultBB->setName("MemOP.Default"); + + auto &Ctx = Func.getContext(); + IRBuilder<> IRB(BB); + BB->getTerminator()->eraseFromParent(); + Value *SizeVar = MI->getLength(); + SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + + // Clear the value profile data. + MI->setMetadata(LLVMContext::MD_prof, nullptr); + // If all promoted, we don't need the MD.prof metadata. + if (SavedRemainCount > 0 || Version != NumVals) + // Otherwise we need update with the un-promoted records back. + annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), + SavedRemainCount, IPVK_MemOPSize, NumVals); + + DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + + for (uint64_t SizeId : SizeIds) { + ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); + BasicBlock *CaseBB = BasicBlock::Create( + Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); + Instruction *NewInst = MI->clone(); + // Fix the argument. + dyn_cast(NewInst)->setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewInst); + IRBuilder<> IRBCase(CaseBB); + IRBCase.CreateBr(MergeBB); + SI->addCase(CaseSizeId, CaseBB); + DEBUG(dbgs() << *CaseBB << "\n"); + } + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); + + DEBUG(dbgs() << *BB << "\n"); + DEBUG(dbgs() << *DefaultBB << "\n"); + DEBUG(dbgs() << *MergeBB << "\n"); + + emitOptimizationRemark(Func.getContext(), "memop-opt", Func, + MI->getDebugLoc(), + Twine("optimize ") + getMIName(MI) + " with count " + + Twine(SumForOpt) + " out of " + Twine(TotalCount) + + " for " + Twine(Version) + " versions"); + + return true; +} +} // namespace + +static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { + if (DisableMemOPOPT) + return false; + + if (F.hasFnAttribute(Attribute::OptimizeForSize)) + return false; + MemOPSizeOpt MemOPSizeOpt(F, BFI); + MemOPSizeOpt.perform(); + return MemOPSizeOpt.isChanged(); +} + +bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { + BlockFrequencyInfo &BFI = + getAnalysis().getBFI(); + return PGOMemOPSizeOptImpl(F, BFI); +} + +namespace llvm { +char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; + +PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &BFI = FAM.getResult(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = PreservedAnalyses(); + PA.preserve(); + return PA; +} +} // namespace llvm diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 4bc0a71331187..06fe07598374b 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -7,24 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Coverage instrumentation that works with AddressSanitizer -// and potentially with other Sanitizers. -// -// We create a Guard variable with the same linkage -// as the function and inject this code into the entry block (SCK_Function) -// or all blocks (SCK_BB): -// if (Guard < 0) { -// __sanitizer_cov(&Guard); -// } -// The accesses to Guard are atomic. The rest of the logic is -// in __sanitizer_cov (it's fine to call it more than once). -// -// With SCK_Edge we also split critical edges this effectively -// instrumenting all edges. -// -// This coverage implementation provides very limited data: -// it only tells if a given function (block) was ever executed. No counters. -// But for many use cases this is what we need and the added slowdown small. +// Coverage instrumentation done on LLVM IR level, works with Sanitizers. // //===----------------------------------------------------------------------===// @@ -56,9 +39,6 @@ using namespace llvm; #define DEBUG_TYPE "sancov" -static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init"; -static const char *const SanCovName = "__sanitizer_cov"; -static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check"; static const char *const SanCovTracePCIndirName = "__sanitizer_cov_trace_pc_indir"; static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; @@ -77,6 +57,11 @@ static const char *const SanCovTracePCGuardName = "__sanitizer_cov_trace_pc_guard"; static const char *const SanCovTracePCGuardInitName = "__sanitizer_cov_trace_pc_guard_init"; +static const char *const SanCov8bitCountersInitName = + "__sanitizer_cov_8bit_counters_init"; + +static const char *const SanCovGuardsSectionName = "sancov_guards"; +static const char *const SanCovCountersSectionName = "sancov_cntrs"; static cl::opt ClCoverageLevel( "sanitizer-coverage-level", @@ -84,20 +69,18 @@ static cl::opt ClCoverageLevel( "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); -static cl::opt ClCoverageBlockThreshold( - "sanitizer-coverage-block-threshold", - cl::desc("Use a callback with a guard check inside it if there are" - " more than this number of blocks."), - cl::Hidden, cl::init(0)); - -static cl::opt ClExperimentalTracePC("sanitizer-coverage-trace-pc", - cl::desc("Experimental pc tracing"), - cl::Hidden, cl::init(false)); +static cl::opt ClTracePC("sanitizer-coverage-trace-pc", + cl::desc("Experimental pc tracing"), cl::Hidden, + cl::init(false)); static cl::opt ClTracePCGuard("sanitizer-coverage-trace-pc-guard", cl::desc("pc tracing with a guard"), cl::Hidden, cl::init(false)); +static cl::opt ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", + cl::desc("increments 8-bit counter for every edge"), + cl::Hidden, cl::init(false)); + static cl::opt ClCMPTracing("sanitizer-coverage-trace-compares", cl::desc("Tracing of CMP and similar instructions"), @@ -149,8 +132,11 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { Options.TraceCmp |= ClCMPTracing; Options.TraceDiv |= ClDIVTracing; Options.TraceGep |= ClGEPTracing; - Options.TracePC |= ClExperimentalTracePC; + Options.TracePC |= ClTracePC; Options.TracePCGuard |= ClTracePCGuard; + Options.Inline8bitCounters |= ClInline8bitCounters; + if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters) + Options.TracePCGuard = true; // TracePCGuard is default. Options.NoPrune |= !ClPruneBlocks; return Options; } @@ -183,19 +169,22 @@ class SanitizerCoverageModule : public ModulePass { void InjectTraceForSwitch(Function &F, ArrayRef SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef AllBlocks); - void CreateFunctionGuardArray(size_t NumGuards, Function &F); - void SetNoSanitizeMetadata(Instruction *I); - void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, - bool UseCalls); - unsigned NumberOfInstrumentedBlocks() { - return SanCovFunction->getNumUses() + - SanCovWithCheckFunction->getNumUses(); + GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, + Function &F, Type *Ty, + const char *Section); + void CreateFunctionLocalArrays(size_t NumGuards, Function &F); + void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx); + void CreateInitCallForSection(Module &M, const char *InitFunctionName, + Type *Ty, const std::string &Section); + + void SetNoSanitizeMetadata(Instruction *I) { + I->setMetadata(I->getModule()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); } - StringRef getSanCovTracePCGuardSection() const; - StringRef getSanCovTracePCGuardSectionStart() const; - StringRef getSanCovTracePCGuardSectionEnd() const; - Function *SanCovFunction; - Function *SanCovWithCheckFunction; + + std::string getSectionName(const std::string &Section) const; + std::string getSectionStart(const std::string &Section) const; + std::string getSectionEnd(const std::string &Section) const; Function *SanCovTracePCIndir; Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; @@ -203,21 +192,48 @@ class SanitizerCoverageModule : public ModulePass { Function *SanCovTraceGepFunction; Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; - Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy; + Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, + *Int8Ty, *Int8PtrTy; Module *CurModule; Triple TargetTriple; LLVMContext *C; const DataLayout *DL; - GlobalVariable *GuardArray; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. - bool HasSancovGuardsSection; + GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters. SanitizerCoverageOptions Options; }; } // namespace +void SanitizerCoverageModule::CreateInitCallForSection( + Module &M, const char *InitFunctionName, Type *Ty, + const std::string &Section) { + IRBuilder<> IRB(M.getContext()); + Function *CtorFunc; + GlobalVariable *SecStart = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, + getSectionStart(Section)); + SecStart->setVisibility(GlobalValue::HiddenVisibility); + GlobalVariable *SecEnd = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, + nullptr, getSectionEnd(Section)); + SecEnd->setVisibility(GlobalValue::HiddenVisibility); + + std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( + M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, + {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)}); + + if (TargetTriple.supportsCOMDAT()) { + // Use comdat to dedup CtorFunc. + CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); + } else { + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); + } +} + bool SanitizerCoverageModule::runOnModule(Module &M) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; @@ -225,21 +241,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { DL = &M.getDataLayout(); CurModule = &M; TargetTriple = Triple(M.getTargetTriple()); - HasSancovGuardsSection = false; + FunctionGuardArray = nullptr; + Function8bitCounterArray = nullptr; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); IntptrPtrTy = PointerType::getUnqual(IntptrTy); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); - Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64Ty = IRB.getInt64Ty(); Int32Ty = IRB.getInt32Ty(); + Int8Ty = IRB.getInt8Ty(); - SanCovFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy)); - SanCovWithCheckFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy)); SanCovTracePCIndir = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); SanCovTraceCmpFunction[0] = @@ -267,6 +281,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { SanCovTraceSwitchFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy)); + // Make sure smaller parameters are zero-extended to i64 as required by the + // x86_64 ABI. + if (TargetTriple.getArch() == Triple::x86_64) { + for (int i = 0; i < 3; i++) { + SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt); + SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt); + } + SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt); + } + // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -278,78 +302,15 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTracePCGuardName, VoidTy, Int32PtrTy)); - // At this point we create a dummy array of guards because we don't - // know how many elements we will need. - Type *Int32Ty = IRB.getInt32Ty(); - - if (!Options.TracePCGuard) - GuardArray = - new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); - for (auto &F : M) runOnFunction(F); - auto N = NumberOfInstrumentedBlocks(); - - GlobalVariable *RealGuardArray = nullptr; - if (!Options.TracePCGuard) { - // Now we know how many elements we need. Create an array of guards - // with one extra element at the beginning for the size. - Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1); - RealGuardArray = new GlobalVariable( - M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov"); - - // Replace the dummy array with the real one. - GuardArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealGuardArray, Int32PtrTy)); - GuardArray->eraseFromParent(); - } - - // Create variable for module (compilation unit) name - Constant *ModNameStrConst = - ConstantDataArray::getString(M.getContext(), M.getName(), true); - GlobalVariable *ModuleName = new GlobalVariable( - M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage, - ModNameStrConst, "__sancov_gen_modname"); - if (Options.TracePCGuard) { - if (HasSancovGuardsSection) { - Function *CtorFunc; - GlobalVariable *SecStart = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionStart()); - SecStart->setVisibility(GlobalValue::HiddenVisibility); - GlobalVariable *SecEnd = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionEnd()); - SecEnd->setVisibility(GlobalValue::HiddenVisibility); - - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovTracePCGuardInitName, - {Int32PtrTy, Int32PtrTy}, - {IRB.CreatePointerCast(SecStart, Int32PtrTy), - IRB.CreatePointerCast(SecEnd, Int32PtrTy)}); - - if (TargetTriple.supportsCOMDAT()) { - // Use comdat to dedup CtorFunc. - CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); - } else { - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } - } - } else if (!Options.TracePC) { - Function *CtorFunc; - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovModuleInitName, - {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, - {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy), - IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); - - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } + if (FunctionGuardArray) + CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy, + SanCovGuardsSectionName); + if (Function8bitCounterArray) + CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy, + SanCovCountersSectionName); return true; } @@ -401,7 +362,10 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, if (Options.NoPrune || &F.getEntryBlock() == BB) return true; - return !(isFullDominator(BB, DT) || isFullPostDominator(BB, PDT)); + // Do not instrument full dominators, or full post-dominators with multiple + // predecessors. + return !isFullDominator(BB, DT) + && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor()); } bool SanitizerCoverageModule::runOnFunction(Function &F) { @@ -470,17 +434,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { InjectTraceForGep(F, GepTraceTargets); return true; } -void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards, - Function &F) { - if (!Options.TracePCGuard) return; - HasSancovGuardsSection = true; - ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards); - FunctionGuardArray = new GlobalVariable( - *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage, - Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_"); + +GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( + size_t NumElements, Function &F, Type *Ty, const char *Section) { + ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); + auto Array = new GlobalVariable( + *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, + Constant::getNullValue(ArrayTy), "__sancov_gen_"); if (auto Comdat = F.getComdat()) - FunctionGuardArray->setComdat(Comdat); - FunctionGuardArray->setSection(getSanCovTracePCGuardSection()); + Array->setComdat(Comdat); + Array->setSection(getSectionName(Section)); + return Array; +} +void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards, + Function &F) { + if (Options.TracePCGuard) + FunctionGuardArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int32Ty, SanCovGuardsSectionName); + if (Options.Inline8bitCounters) + Function8bitCounterArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int8Ty, SanCovCountersSectionName); } bool SanitizerCoverageModule::InjectCoverage(Function &F, @@ -490,14 +463,13 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, case SanitizerCoverageOptions::SCK_None: return false; case SanitizerCoverageOptions::SCK_Function: - CreateFunctionGuardArray(1, F); - InjectCoverageAtBlock(F, F.getEntryBlock(), 0, false); + CreateFunctionLocalArrays(1, F); + InjectCoverageAtBlock(F, F.getEntryBlock(), 0); return true; default: { - bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size(); - CreateFunctionGuardArray(AllBlocks.size(), F); + CreateFunctionLocalArrays(AllBlocks.size(), F); for (size_t i = 0, N = AllBlocks.size(); i < N; i++) - InjectCoverageAtBlock(F, *AllBlocks[i], i, UseCalls); + InjectCoverageAtBlock(F, *AllBlocks[i], i); return true; } } @@ -514,8 +486,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef IndirCalls) { if (IndirCalls.empty()) return; - if (!Options.TracePC && !Options.TracePCGuard) - return; + assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters); for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); @@ -622,13 +593,8 @@ void SanitizerCoverageModule::InjectTraceForCmp( } } -void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata(I->getModule()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); -} - void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, - size_t Idx, bool UseCalls) { + size_t Idx) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; @@ -648,68 +614,48 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else if (Options.TracePCGuard) { + } + if (Options.TracePCGuard) { auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), Int32PtrTy); - if (!UseCalls) { - auto GuardLoad = IRB.CreateLoad(GuardPtr); - GuardLoad->setAtomic(AtomicOrdering::Monotonic); - GuardLoad->setAlignment(8); - SetNoSanitizeMetadata(GuardLoad); // Don't instrument with e.g. asan. - auto Cmp = IRB.CreateICmpNE( - GuardLoad, Constant::getNullValue(GuardLoad->getType())); - auto Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - } IRB.CreateCall(SanCovTracePCGuard, GuardPtr); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else { - Value *GuardP = IRB.CreateAdd( - IRB.CreatePointerCast(GuardArray, IntptrTy), - ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); - GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (UseCalls) { - IRB.CreateCall(SanCovWithCheckFunction, GuardP); - } else { - LoadInst *Load = IRB.CreateLoad(GuardP); - Load->setAtomic(AtomicOrdering::Monotonic); - Load->setAlignment(4); - SetNoSanitizeMetadata(Load); - Value *Cmp = - IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); - Instruction *Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. - IRB.CreateCall(SanCovFunction, GuardP); - IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } + } + if (Options.Inline8bitCounters) { + auto CounterPtr = IRB.CreateGEP( + Function8bitCounterArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(CounterPtr); + auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1)); + auto Store = IRB.CreateStore(Inc, CounterPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); } } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const { +std::string +SanitizerCoverageModule::getSectionName(const std::string &Section) const { if (TargetTriple.getObjectFormat() == Triple::COFF) return ".SCOV$M"; if (TargetTriple.isOSBinFormatMachO()) - return "__DATA,__sancov_guards"; - return "__sancov_guards"; + return "__DATA,__" + Section; + return "__" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const { +std::string +SanitizerCoverageModule::getSectionStart(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$start$__DATA$__sancov_guards"; - return "__start___sancov_guards"; + return "\1section$start$__DATA$__" + Section; + return "__start___" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const { +std::string +SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$end$__DATA$__sancov_guards"; - return "__stop___sancov_guards"; + return "\1section$end$__DATA$__" + Section; + return "__stop___" + Section; } diff --git a/interpreter/llvm/src/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/interpreter/llvm/src/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 9260217bd5e62..ec6904486e109 100644 --- a/interpreter/llvm/src/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,7 +19,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -42,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" @@ -379,10 +379,11 @@ void ThreadSanitizer::chooseInstructionsToInstrument( } static bool isAtomic(Instruction *I) { + // TODO: Ask TTI whether synchronization scope is between threads. if (LoadInst *LI = dyn_cast(I)) - return LI->isAtomic() && LI->getSynchScope() == CrossThread; + return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread; if (StoreInst *SI = dyn_cast(I)) - return SI->isAtomic() && SI->getSynchScope() == CrossThread; + return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread; if (isa(I)) return true; if (isa(I)) @@ -676,7 +677,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { I->eraseFromParent(); } else if (FenceInst *FI = dyn_cast(I)) { Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; - Function *F = FI->getSynchScope() == SingleThread ? + Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ? TsanAtomicSignalFence : TsanAtomicThreadFence; CallInst *C = CallInst::Create(F, Args); ReplaceInstWithInst(I, C); diff --git a/interpreter/llvm/src/lib/Transforms/ObjCARC/BlotMapVector.h b/interpreter/llvm/src/lib/Transforms/ObjCARC/BlotMapVector.h index ef075bdccbfed..9c5cf6f5f5ab0 100644 --- a/interpreter/llvm/src/lib/Transforms/ObjCARC/BlotMapVector.h +++ b/interpreter/llvm/src/lib/Transforms/ObjCARC/BlotMapVector.h @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include #include +#include namespace llvm { /// \brief An associative container with fast insertion-order (deterministic) diff --git a/interpreter/llvm/src/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/interpreter/llvm/src/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 9d78e5ae3b9b6..464805051c65f 100644 --- a/interpreter/llvm/src/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/interpreter/llvm/src/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -20,8 +20,8 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "llvm/IR/CFG.h" diff --git a/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCContract.cpp index a86eaaec76412..e70e7591f6a70 100644 --- a/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -26,9 +26,9 @@ // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. -#include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Dominators.h" diff --git a/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 3c73376c99068..8f3a33f66c7f5 100644 --- a/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/interpreter/llvm/src/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -24,10 +24,10 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "BlotMapVector.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "PtrState.h" #include "llvm/ADT/DenseMap.h" diff --git a/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 9ffdfb4f7f9c6..62fc52f6d091b 100644 --- a/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -22,8 +22,8 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "ProvenanceAnalysis.h" +#include "ObjCARC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" diff --git a/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp index c274e8182fb5a..870a5f600fd84 100644 --- a/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp +++ b/interpreter/llvm/src/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp @@ -8,13 +8,13 @@ //===----------------------------------------------------------------------===// #include "ProvenanceAnalysis.h" -#include "llvm/Pass.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/ObjCARC/PtrState.h b/interpreter/llvm/src/lib/Transforms/ObjCARC/PtrState.h index 9749e44822b2d..87298fa59bfdc 100644 --- a/interpreter/llvm/src/lib/Transforms/ObjCARC/PtrState.h +++ b/interpreter/llvm/src/lib/Transforms/ObjCARC/PtrState.h @@ -21,8 +21,8 @@ #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace objcarc { diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index fd931c521c8f1..99480f12da9e3 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -19,12 +19,11 @@ #define AA_NAME "alignment-from-assumptions" #define DEBUG_TYPE AA_NAME #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -35,6 +34,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; STATISTIC(NumLoadAlignChanged, diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/BDCE.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/BDCE.cpp index 61e8700f1cd67..2e5618686ec21 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/BDCE.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/BDCE.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DemandedBits.h" @@ -35,6 +36,46 @@ using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed (unused)"); STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)"); +/// If an instruction is trivialized (dead), then the chain of users of that +/// instruction may need to be cleared of assumptions that can no longer be +/// guaranteed correct. +static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) { + assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?"); + + // Initialize the worklist with eligible direct users. + SmallVector WorkList; + for (User *JU : I->users()) { + // If all bits of a user are demanded, then we know that nothing below that + // in the def-use chain needs to be changed. + auto *J = dyn_cast(JU); + if (J && !DB.getDemandedBits(J).isAllOnesValue()) + WorkList.push_back(J); + } + + // DFS through subsequent users while tracking visits to avoid cycles. + SmallPtrSet Visited; + while (!WorkList.empty()) { + Instruction *J = WorkList.pop_back_val(); + + // NSW, NUW, and exact are based on operands that might have changed. + J->dropPoisonGeneratingFlags(); + + // We do not have to worry about llvm.assume or range metadata: + // 1. llvm.assume demands its operand, so trivializing can't change it. + // 2. range metadata only applies to memory accesses which demand all bits. + + Visited.insert(J); + + for (User *KU : J->users()) { + // If all bits of a user are demanded, then we know that nothing below + // that in the def-use chain needs to be changed. + auto *K = dyn_cast(KU); + if (K && !Visited.count(K) && !DB.getDemandedBits(K).isAllOnesValue()) + WorkList.push_back(K); + } + } +} + static bool bitTrackingDCE(Function &F, DemandedBits &DB) { SmallVector Worklist; bool Changed = false; @@ -51,6 +92,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) { // replacing all uses with something else. Then, if they don't need to // remain live (because they have side effects, etc.) we can remove them. DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + + clearAssumptionsOfUsers(&I, DB); + // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/CMakeLists.txt b/interpreter/llvm/src/lib/Transforms/Scalar/CMakeLists.txt index 523390758769a..457c9427ab9ac 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Transforms/Scalar/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_library(LLVMScalarOpts GuardWidening.cpp GVN.cpp GVNHoist.cpp + GVNSink.cpp IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp @@ -21,7 +22,6 @@ add_llvm_library(LLVMScalarOpts LICM.cpp LoopAccessAnalysisPrinter.cpp LoopSink.cpp - LoadCombine.cpp LoopDeletion.cpp LoopDataPrefetch.cpp LoopDistribute.cpp diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/ConstantHoisting.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/ConstantHoisting.cpp index f62e111460ca0..122c9314e022a 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -38,11 +38,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; @@ -54,7 +56,7 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); static cl::opt ConstHoistWithBlockFrequency( - "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + "consthoist-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to reduce the " "chance to execute const materialization more frequently than " "without hoisting.")); @@ -164,9 +166,9 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst, /// \brief Given \p BBs as input, find another set of BBs which collectively /// dominates \p BBs and have the minimal sum of frequencies. Return the BB /// set found in \p BBs. -void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, - BasicBlock *Entry, - SmallPtrSet &BBs) { +static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, + BasicBlock *Entry, + SmallPtrSet &BBs) { assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); // Nodes on the current path to the root. SmallPtrSet Path; @@ -230,7 +232,8 @@ void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, // Return the optimal insert points in BBs. if (Node == Entry) { BBs.clear(); - if (InsertPtsFreq > BFI.getBlockFreq(Node)) + if (InsertPtsFreq > BFI.getBlockFreq(Node) || + (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)) BBs.insert(Entry); else BBs.insert(InsertPts.begin(), InsertPts.end()); @@ -243,7 +246,15 @@ void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, SmallPtrSet &ParentInsertPts = InsertPtsMap[Parent].first; BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; // Choose to insert in Node or in subtree of Node. - if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) { + // Don't hoist to EHPad because we may not find a proper place to insert + // in EHPad. + // If the total frequency of InsertPts is the same as the frequency of the + // target Node, and InsertPts contains more than one nodes, choose hoisting + // to reduce code size. + if (NodeInBBs || + (!Node->isEHPad() && + (InsertPtsFreq > BFI.getBlockFreq(Node) || + (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) { ParentInsertPts.insert(Node); ParentPtsFreq += BFI.getBlockFreq(Node); } else { @@ -340,68 +351,65 @@ void ConstantHoistingPass::collectConstantCandidates( } } -/// \brief Scan the instruction for expensive integer constants and record them -/// in the constant candidate vector. -void ConstantHoistingPass::collectConstantCandidates( - ConstCandMapType &ConstCandMap, Instruction *Inst) { - // Skip all cast instructions. They are visited indirectly later on. - if (Inst->isCast()) - return; - - // Can't handle inline asm. Skip it. - if (auto Call = dyn_cast(Inst)) - if (isa(Call->getCalledValue())) - return; - // Switch cases must remain constant, and if the value being tested is - // constant the entire thing should disappear. - if (isa(Inst)) - return; +/// \brief Check the operand for instruction Inst at index Idx. +void ConstantHoistingPass::collectConstantCandidates( + ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) { + Value *Opnd = Inst->getOperand(Idx); - // Static allocas (constant size in the entry block) are handled by - // prologue/epilogue insertion so they're free anyway. We definitely don't - // want to make them non-constant. - auto AI = dyn_cast(Inst); - if (AI && AI->isStaticAlloca()) + // Visit constant integers. + if (auto ConstInt = dyn_cast(Opnd)) { + collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt); return; + } - // Scan all operands. - for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) { - Value *Opnd = Inst->getOperand(Idx); + // Visit cast instructions that have constant integers. + if (auto CastInst = dyn_cast(Opnd)) { + // Only visit cast instructions, which have been skipped. All other + // instructions should have already been visited. + if (!CastInst->isCast()) + return; - // Visit constant integers. - if (auto ConstInt = dyn_cast(Opnd)) { + if (auto *ConstInt = dyn_cast(CastInst->getOperand(0))) { + // Pretend the constant is directly used by the instruction and ignore + // the cast instruction. collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt); - continue; + return; } + } - // Visit cast instructions that have constant integers. - if (auto CastInst = dyn_cast(Opnd)) { - // Only visit cast instructions, which have been skipped. All other - // instructions should have already been visited. - if (!CastInst->isCast()) - continue; + // Visit constant expressions that have constant integers. + if (auto ConstExpr = dyn_cast(Opnd)) { + // Only visit constant cast expressions. + if (!ConstExpr->isCast()) + return; - if (auto *ConstInt = dyn_cast(CastInst->getOperand(0))) { - // Pretend the constant is directly used by the instruction and ignore - // the cast instruction. - collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt); - continue; - } + if (auto ConstInt = dyn_cast(ConstExpr->getOperand(0))) { + // Pretend the constant is directly used by the instruction and ignore + // the constant expression. + collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt); + return; } + } +} - // Visit constant expressions that have constant integers. - if (auto ConstExpr = dyn_cast(Opnd)) { - // Only visit constant cast expressions. - if (!ConstExpr->isCast()) - continue; - if (auto ConstInt = dyn_cast(ConstExpr->getOperand(0))) { - // Pretend the constant is directly used by the instruction and ignore - // the constant expression. - collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt); - continue; - } +/// \brief Scan the instruction for expensive integer constants and record them +/// in the constant candidate vector. +void ConstantHoistingPass::collectConstantCandidates( + ConstCandMapType &ConstCandMap, Instruction *Inst) { + // Skip all cast instructions. They are visited indirectly later on. + if (Inst->isCast()) + return; + + // Scan all operands. + for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) { + // The cost of materializing the constants (defined in + // `TargetTransformInfo::getIntImmCost`) for instructions which only take + // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So + // it's safe for us to collect constant candidates from all IntrinsicInsts. + if (canReplaceOperandWithVariable(Inst, Idx) || isa(Inst)) { + collectConstantCandidates(ConstCandMap, Inst, Idx); } } // end of for all operands } diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/ConstantProp.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/ConstantProp.cpp index 9e982194bac7e..4fa27891a9743 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/ConstantProp.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/ConstantProp.cpp @@ -18,15 +18,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 3f1a77b49a442..28157783daa7a 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -232,8 +232,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { pred_iterator PB = pred_begin(BB), PE = pred_end(BB); if (PB == PE) return false; - // Analyse each switch case in turn. This is done in reverse order so that - // removing a case doesn't cause trouble for the iteration. + // Analyse each switch case in turn. bool Changed = false; for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); @@ -291,7 +290,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { break; } - // Increment the case iterator sense we didn't delete it. + // Increment the case iterator since we didn't delete it. ++CI; } @@ -305,7 +304,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { /// Infer nonnull attributes for the arguments at the specified callsite. static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { - SmallVector Indices; + SmallVector ArgNos; unsigned ArgNo = 0; for (Value *V : CS.args()) { @@ -318,18 +317,19 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) - Indices.push_back(ArgNo + AttributeList::FirstArgIndex); + ArgNos.push_back(ArgNo); ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); - if (Indices.empty()) + if (ArgNos.empty()) return false; AttributeList AS = CS.getAttributes(); LLVMContext &Ctx = CS.getInstruction()->getContext(); - AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull)); + AS = AS.addParamAttribute(Ctx, ArgNos, + Attribute::get(Ctx, Attribute::NonNull)); CS.setAttributes(AS); return true; @@ -442,9 +442,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { bool Changed = false; if (!NUW) { - ConstantRange NUWRange = - LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange, - OBO::NoUnsignedWrap); + ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( + BinaryOperator::Add, LRange, OBO::NoUnsignedWrap); if (!NUWRange.isEmptySet()) { bool NewNUW = NUWRange.contains(LazyRRange()); AddOp->setHasNoUnsignedWrap(NewNUW); @@ -452,9 +451,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { } } if (!NSW) { - ConstantRange NSWRange = - LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange, - OBO::NoSignedWrap); + ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( + BinaryOperator::Add, LRange, OBO::NoSignedWrap); if (!NSWRange.isEmptySet()) { bool NewNSW = NSWRange.contains(LazyRRange()); AddOp->setHasNoSignedWrap(NewNSW); @@ -552,7 +550,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { BBChanged = true; } } - }; + } FnChanged |= BBChanged; } diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/DCE.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/DCE.cpp index 07a0ba9b12221..fa4806e884c30 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/DCE.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/DCE.cpp @@ -19,10 +19,10 @@ #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/EarlyCSE.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/EarlyCSE.cpp index d8f8a58a5fdfa..c5c9b2c185d63 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/EarlyCSE.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -506,7 +507,7 @@ class EarlyCSE { if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) { // Optimize MemoryPhi nodes that may become redundant by having all the // same input values once MA is removed. - SmallVector PhisToCheck; + SmallSetVector PhisToCheck; SmallVector WorkQueue; WorkQueue.push_back(MA); // Process MemoryPhi nodes in FIFO order using a ever-growing vector since @@ -517,7 +518,7 @@ class EarlyCSE { for (auto *U : WI->users()) if (MemoryPhi *MP = dyn_cast(U)) - PhisToCheck.push_back(MP); + PhisToCheck.insert(MP); MSSAUpdater->removeMemoryAccess(WI); @@ -561,13 +562,27 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration, if (!MSSA) return false; + // If MemorySSA has determined that one of EarlierInst or LaterInst does not + // read/write memory, then we can safely return true here. + // FIXME: We could be more aggressive when checking doesNotAccessMemory(), + // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass + // by also checking the MemorySSA MemoryAccess on the instruction. Initial + // experiments suggest this isn't worthwhile, at least for C/C++ code compiled + // with the default optimization pipeline. + auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst); + if (!EarlierMA) + return true; + auto *LaterMA = MSSA->getMemoryAccess(LaterInst); + if (!LaterMA) + return true; + // Since we know LaterDef dominates LaterInst and EarlierInst dominates // LaterInst, if LaterDef dominates EarlierInst then it can't occur between // EarlierInst and LaterInst and neither can any other write that potentially // clobbers LaterInst. MemoryAccess *LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); - return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst)); + return MSSA->dominates(LaterDef, EarlierMA); } bool EarlyCSE::processNode(DomTreeNode *Node) { @@ -606,7 +621,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (unsigned Count = replaceDominatedUsesWith( CondInst, TorF, DT, BasicBlockEdge(Pred, BB))) { Changed = true; - NumCSECVP = NumCSECVP + Count; + NumCSECVP += Count; } } } @@ -669,7 +684,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (auto *KnownCond = AvailableValues.lookup(CondI)) { // Is the condition known to be true? if (isa(KnownCond) && - cast(KnownCond)->isOneValue()) { + cast(KnownCond)->isOne()) { DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); removeMSSA(Inst); Inst->eraseFromParent(); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/FlattenCFGPass.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/FlattenCFGPass.cpp index 185cdbdda3781..063df779a30bb 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/CFG.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/GVN.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/GVN.cpp index c04646eed49a8..ea28705e684dc 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/GVN.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/GVN.cpp @@ -602,7 +602,7 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void GVN::dump(DenseMap& d) { +LLVM_DUMP_METHOD void GVN::dump(DenseMap& d) const { errs() << "{\n"; for (DenseMap::iterator I = d.begin(), E = d.end(); I != E; ++I) { @@ -1166,8 +1166,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", LI->isVolatile(), LI->getAlignment(), - LI->getOrdering(), LI->getSynchScope(), + LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); + NewLoad->setDebugLoc(LI->getDebugLoc()); // Transfer the old load's AA tags to the new load. AAMDNodes Tags; @@ -1202,7 +1203,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, V->takeName(LI); if (Instruction *I = dyn_cast(V)) I->setDebugLoc(LI->getDebugLoc()); - if (V->getType()->getScalarType()->isPointerTy()) + if (V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI) @@ -1289,7 +1290,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // to propagate LI's DebugLoc because LI may not post-dominate I. if (LI->getDebugLoc() && LI->getParent() == I->getParent()) I->setDebugLoc(LI->getDebugLoc()); - if (V->getType()->getScalarType()->isPointerTy()) + if (V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ++NumGVNLoad; @@ -1443,7 +1444,7 @@ bool GVN::processLoad(LoadInst *L) { reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more // information after forwarding it. - if (MD && AvailableValue->getType()->getScalarType()->isPointerTy()) + if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(AvailableValue); return true; } @@ -1598,7 +1599,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, // RHS neither 'true' nor 'false' - bail out. continue; // Whether RHS equals 'true'. Otherwise it equals 'false'. - bool isKnownTrue = CI->isAllOnesValue(); + bool isKnownTrue = CI->isMinusOne(); bool isKnownFalse = !isKnownTrue; // If "A && B" is known true then both A and B are known true. If "A || B" @@ -1698,7 +1699,7 @@ bool GVN::processInstruction(Instruction *I) { Changed = true; } if (Changed) { - if (MD && V->getType()->getScalarType()->isPointerTy()) + if (MD && V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); ++NumGVNSimpl; return true; @@ -1809,7 +1810,7 @@ bool GVN::processInstruction(Instruction *I) { // Remove it! patchAndReplaceAllUsesWith(I, Repl); - if (MD && Repl->getType()->getScalarType()->isPointerTy()) + if (MD && Repl->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(Repl); markInstructionForDeletion(I); return true; @@ -2057,7 +2058,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { // If we failed insertion, make sure we remove the instruction. DEBUG(verifyRemoved(PREInstr)); - delete PREInstr; + PREInstr->deleteValue(); return false; } } @@ -2083,7 +2084,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { addToLeaderTable(ValNo, Phi, CurrentBlock); Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); - if (MD && Phi->getType()->getScalarType()->isPointerTy()) + if (MD && Phi->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); removeFromLeaderTable(ValNo, CurInst, CurrentBlock); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/GVNHoist.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/GVNHoist.cpp index b7514a6d57931..29de792bd248c 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/GVNHoist.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/GVNHoist.cpp @@ -41,7 +41,6 @@ // ret void //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -50,6 +49,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/GVNSink.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/GVNSink.cpp new file mode 100644 index 0000000000000..5fd2dfc118b4b --- /dev/null +++ b/interpreter/llvm/src/lib/Transforms/Scalar/GVNSink.cpp @@ -0,0 +1,883 @@ +//===- GVNSink.cpp - sink expressions into successors -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file GVNSink.cpp +/// This pass attempts to sink instructions into successors, reducing static +/// instruction count and enabling if-conversion. +/// +/// We use a variant of global value numbering to decide what can be sunk. +/// Consider: +/// +/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] +/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] +/// \ / +/// [ %e = phi i32 %a2, %c2 ] +/// [ add i32 %e, 4 ] +/// +/// +/// GVN would number %a1 and %c1 differently because they compute different +/// results - the VN of an instruction is a function of its opcode and the +/// transitive closure of its operands. This is the key property for hoisting +/// and CSE. +/// +/// What we want when sinking however is for a numbering that is a function of +/// the *uses* of an instruction, which allows us to answer the question "if I +/// replace %a1 with %c1, will it contribute in an equivalent way to all +/// successive instructions?". The PostValueTable class in GVN provides this +/// mapping. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/GVNExpression.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "gvn-sink" + +STATISTIC(NumRemoved, "Number of instructions removed"); + +namespace llvm { +namespace GVNExpression { + +LLVM_DUMP_METHOD void Expression::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +} +} + +namespace { + +static bool isMemoryInst(const Instruction *I) { + return isa(I) || isa(I) || + (isa(I) && !cast(I)->doesNotAccessMemory()) || + (isa(I) && !cast(I)->doesNotAccessMemory()); +} + +/// Iterates through instructions in a set of blocks in reverse order from the +/// first non-terminator. For example (assume all blocks have size n): +/// LockstepReverseIterator I([B1, B2, B3]); +/// *I-- = [B1[n], B2[n], B3[n]]; +/// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; +/// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; +/// ... +/// +/// It continues until all blocks have been exhausted. Use \c getActiveBlocks() +/// to +/// determine which blocks are still going and the order they appear in the +/// list returned by operator*. +class LockstepReverseIterator { + ArrayRef Blocks; + SmallPtrSet ActiveBlocks; + SmallVector Insts; + bool Fail; + +public: + LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + ActiveBlocks.clear(); + for (BasicBlock *BB : Blocks) + ActiveBlocks.insert(BB); + Insts.clear(); + for (BasicBlock *BB : Blocks) { + if (BB->size() <= 1) { + // Block wasn't big enough - only contained a terminator. + ActiveBlocks.erase(BB); + continue; + } + Insts.push_back(BB->getTerminator()->getPrevNode()); + } + if (Insts.empty()) + Fail = true; + } + + bool isValid() const { return !Fail; } + ArrayRef operator*() const { return Insts; } + SmallPtrSet &getActiveBlocks() { return ActiveBlocks; } + + void restrictToBlocks(SmallPtrSetImpl &Blocks) { + for (auto II = Insts.begin(); II != Insts.end();) { + if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == + Blocks.end()) { + ActiveBlocks.erase((*II)->getParent()); + II = Insts.erase(II); + } else { + ++II; + } + } + } + + void operator--() { + if (Fail) + return; + SmallVector NewInsts; + for (auto *Inst : Insts) { + if (Inst == &Inst->getParent()->front()) + ActiveBlocks.erase(Inst->getParent()); + else + NewInsts.push_back(Inst->getPrevNode()); + } + if (NewInsts.empty()) { + Fail = true; + return; + } + Insts = NewInsts; + } +}; + +//===----------------------------------------------------------------------===// + +/// Candidate solution for sinking. There may be different ways to +/// sink instructions, differing in the number of instructions sunk, +/// the number of predecessors sunk from and the number of PHIs +/// required. +struct SinkingInstructionCandidate { + unsigned NumBlocks; + unsigned NumInstructions; + unsigned NumPHIs; + unsigned NumMemoryInsts; + int Cost = -1; + SmallVector Blocks; + + void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { + unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; + unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; + Cost = (NumInstructions * (NumBlocks - 1)) - + (NumExtraPHIs * + NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. + - SplitEdgeCost; + } + bool operator>(const SinkingInstructionCandidate &Other) const { + return Cost > Other.Cost; + } +}; + +#ifndef NDEBUG +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SinkingInstructionCandidate &C) { + OS << ""; + return OS; +} +#endif + +//===----------------------------------------------------------------------===// + +/// Describes a PHI node that may or may not exist. These track the PHIs +/// that must be created if we sunk a sequence of instructions. It provides +/// a hash function for efficient equality comparisons. +class ModelledPHI { + SmallVector Values; + SmallVector Blocks; + +public: + ModelledPHI() {} + ModelledPHI(const PHINode *PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) + Blocks.push_back(PN->getIncomingBlock(I)); + std::sort(Blocks.begin(), Blocks.end()); + + // This assumes the PHI is already well-formed and there aren't conflicting + // incoming values for the same block. + for (auto *B : Blocks) + Values.push_back(PN->getIncomingValueForBlock(B)); + } + /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI + /// without the same ID. + /// \note This is specifically for DenseMapInfo - do not use this! + static ModelledPHI createDummy(size_t ID) { + ModelledPHI M; + M.Values.push_back(reinterpret_cast(ID)); + return M; + } + + /// Create a PHI from an array of incoming values and incoming blocks. + template + ModelledPHI(const VArray &V, const BArray &B) { + std::copy(V.begin(), V.end(), std::back_inserter(Values)); + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + } + + /// Create a PHI from [I[OpNum] for I in Insts]. + template + ModelledPHI(ArrayRef Insts, unsigned OpNum, const BArray &B) { + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + for (auto *I : Insts) + Values.push_back(I->getOperand(OpNum)); + } + + /// Restrict the PHI's contents down to only \c NewBlocks. + /// \c NewBlocks must be a subset of \c this->Blocks. + void restrictToBlocks(const SmallPtrSetImpl &NewBlocks) { + auto BI = Blocks.begin(); + auto VI = Values.begin(); + while (BI != Blocks.end()) { + assert(VI != Values.end()); + if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) == + NewBlocks.end()) { + BI = Blocks.erase(BI); + VI = Values.erase(VI); + } else { + ++BI; + ++VI; + } + } + assert(Blocks.size() == NewBlocks.size()); + } + + ArrayRef getValues() const { return Values; } + + bool areAllIncomingValuesSame() const { + return all_of(Values, [&](Value *V) { return V == Values[0]; }); + } + bool areAllIncomingValuesSameType() const { + return all_of( + Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); + } + bool areAnyIncomingValuesConstant() const { + return any_of(Values, [&](Value *V) { return isa(V); }); + } + // Hash functor + unsigned hash() const { + return (unsigned)hash_combine_range(Values.begin(), Values.end()); + } + bool operator==(const ModelledPHI &Other) const { + return Values == Other.Values && Blocks == Other.Blocks; + } +}; + +template struct DenseMapInfo { + static inline ModelledPHI &getEmptyKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(0); + return Dummy; + } + static inline ModelledPHI &getTombstoneKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(1); + return Dummy; + } + static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } + static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { + return LHS == RHS; + } +}; + +typedef DenseSet> ModelledPHISet; + +//===----------------------------------------------------------------------===// +// ValueTable +//===----------------------------------------------------------------------===// +// This is a value number table where the value number is a function of the +// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know +// that the program would be equivalent if we replaced A with PHI(A, B). +//===----------------------------------------------------------------------===// + +/// A GVN expression describing how an instruction is used. The operands +/// field of BasicExpression is used to store uses, not operands. +/// +/// This class also contains fields for discriminators used when determining +/// equivalence of instructions with sideeffects. +class InstructionUseExpr : public GVNExpression::BasicExpression { + unsigned MemoryUseOrder = -1; + bool Volatile = false; + +public: + InstructionUseExpr(Instruction *I, ArrayRecycler &R, + BumpPtrAllocator &A) + : GVNExpression::BasicExpression(I->getNumUses()) { + allocateOperands(R, A); + setOpcode(I->getOpcode()); + setType(I->getType()); + + for (auto &U : I->uses()) + op_push_back(U.getUser()); + std::sort(op_begin(), op_end()); + } + void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } + void setVolatile(bool V) { Volatile = V; } + + virtual hash_code getHashValue() const { + return hash_combine(GVNExpression::BasicExpression::getHashValue(), + MemoryUseOrder, Volatile); + } + + template hash_code getHashValue(Function MapFn) { + hash_code H = + hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile); + for (auto *V : operands()) + H = hash_combine(H, MapFn(V)); + return H; + } +}; + +class ValueTable { + DenseMap ValueNumbering; + DenseMap ExpressionNumbering; + DenseMap HashNumbering; + BumpPtrAllocator Allocator; + ArrayRecycler Recycler; + uint32_t nextValueNumber; + + /// Create an expression for I based on its opcode and its uses. If I + /// touches or reads memory, the expression is also based upon its memory + /// order - see \c getMemoryUseOrder(). + InstructionUseExpr *createExpr(Instruction *I) { + InstructionUseExpr *E = + new (Allocator) InstructionUseExpr(I, Recycler, Allocator); + if (isMemoryInst(I)) + E->setMemoryUseOrder(getMemoryUseOrder(I)); + + if (CmpInst *C = dyn_cast(I)) { + CmpInst::Predicate Predicate = C->getPredicate(); + E->setOpcode((C->getOpcode() << 8) | Predicate); + } + return E; + } + + /// Helper to compute the value number for a memory instruction + /// (LoadInst/StoreInst), including checking the memory ordering and + /// volatility. + template InstructionUseExpr *createMemoryExpr(Inst *I) { + if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic()) + return nullptr; + InstructionUseExpr *E = createExpr(I); + E->setVolatile(I->isVolatile()); + return E; + } + +public: + /// Returns the value number for the specified value, assigning + /// it a new number if it did not have one before. + uint32_t lookupOrAdd(Value *V) { + auto VI = ValueNumbering.find(V); + if (VI != ValueNumbering.end()) + return VI->second; + + if (!isa(V)) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + Instruction *I = cast(V); + InstructionUseExpr *exp = nullptr; + switch (I->getOpcode()) { + case Instruction::Load: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Store: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Call: + case Instruction::Invoke: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + case Instruction::Select: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::InsertValue: + case Instruction::GetElementPtr: + exp = createExpr(I); + break; + default: + break; + } + + if (!exp) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t e = ExpressionNumbering[exp]; + if (!e) { + hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); }); + auto I = HashNumbering.find(H); + if (I != HashNumbering.end()) { + e = I->second; + } else { + e = nextValueNumber++; + HashNumbering[H] = e; + ExpressionNumbering[exp] = e; + } + } + ValueNumbering[V] = e; + return e; + } + + /// Returns the value number of the specified value. Fails if the value has + /// not yet been numbered. + uint32_t lookup(Value *V) const { + auto VI = ValueNumbering.find(V); + assert(VI != ValueNumbering.end() && "Value not numbered?"); + return VI->second; + } + + /// Removes all value numberings and resets the value table. + void clear() { + ValueNumbering.clear(); + ExpressionNumbering.clear(); + HashNumbering.clear(); + Recycler.clear(Allocator); + nextValueNumber = 1; + } + + ValueTable() : nextValueNumber(1) {} + + /// \c Inst uses or touches memory. Return an ID describing the memory state + /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), + /// the exact same memory operations happen after I1 and I2. + /// + /// This is a very hard problem in general, so we use domain-specific + /// knowledge that we only ever check for equivalence between blocks sharing a + /// single immediate successor that is common, and when determining if I1 == + /// I2 we will have already determined that next(I1) == next(I2). This + /// inductive property allows us to simply return the value number of the next + /// instruction that defines memory. + uint32_t getMemoryUseOrder(Instruction *Inst) { + auto *BB = Inst->getParent(); + for (auto I = std::next(Inst->getIterator()), E = BB->end(); + I != E && !I->isTerminator(); ++I) { + if (!isMemoryInst(&*I)) + continue; + if (isa(&*I)) + continue; + CallInst *CI = dyn_cast(&*I); + if (CI && CI->onlyReadsMemory()) + continue; + InvokeInst *II = dyn_cast(&*I); + if (II && II->onlyReadsMemory()) + continue; + return lookupOrAdd(&*I); + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// + +class GVNSink { +public: + GVNSink() : VN() {} + bool run(Function &F) { + DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); + + unsigned NumSunk = 0; + ReversePostOrderTraversal RPOT(&F); + for (auto *N : RPOT) + NumSunk += sinkBB(N); + + return NumSunk > 0; + } + +private: + ValueTable VN; + + bool isInstructionBlacklisted(Instruction *I) { + // These instructions may change or break semantics if moved. + if (isa(I) || I->isEHPad() || isa(I) || + I->getType()->isTokenTy()) + return true; + return false; + } + + /// The main heuristic function. Analyze the set of instructions pointed to by + /// LRI and return a candidate solution if these instructions can be sunk, or + /// None otherwise. + Optional analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents); + + /// Create a ModelledPHI for each PHI in BB, adding to PHIs. + void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, + SmallPtrSetImpl &PHIContents) { + for (auto &I : *BB) { + auto *PN = dyn_cast(&I); + if (!PN) + return; + + auto MPHI = ModelledPHI(PN); + PHIs.insert(MPHI); + for (auto *V : MPHI.getValues()) + PHIContents.insert(V); + } + } + + /// The main instruction sinking driver. Set up state and try and sink + /// instructions into BBEnd from its predecessors. + unsigned sinkBB(BasicBlock *BBEnd); + + /// Perform the actual mechanics of sinking an instruction from Blocks into + /// BBEnd, which is their only successor. + void sinkLastInstruction(ArrayRef Blocks, BasicBlock *BBEnd); + + /// Remove PHIs that all have the same incoming value. + void foldPointlessPHINodes(BasicBlock *BB) { + auto I = BB->begin(); + while (PHINode *PN = dyn_cast(I++)) { + if (!all_of(PN->incoming_values(), + [&](const Value *V) { return V == PN->getIncomingValue(0); })) + continue; + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } + } +}; + +Optional GVNSink::analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents) { + auto Insts = *LRI; + DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I + : Insts) { + I->dump(); + } dbgs() << " ]\n";); + + DenseMap VNums; + for (auto *I : Insts) { + uint32_t N = VN.lookupOrAdd(I); + DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + if (N == ~0U) + return None; + VNums[N]++; + } + unsigned VNumToSink = + std::max_element(VNums.begin(), VNums.end(), + [](const std::pair &I, + const std::pair &J) { + return I.second < J.second; + }) + ->first; + + if (VNums[VNumToSink] == 1) + // Can't sink anything! + return None; + + // Now restrict the number of incoming blocks down to only those with + // VNumToSink. + auto &ActivePreds = LRI.getActiveBlocks(); + unsigned InitialActivePredSize = ActivePreds.size(); + SmallVector NewInsts; + for (auto *I : Insts) { + if (VN.lookup(I) != VNumToSink) + ActivePreds.erase(I->getParent()); + else + NewInsts.push_back(I); + } + for (auto *I : NewInsts) + if (isInstructionBlacklisted(I)) + return None; + + // If we've restricted the incoming blocks, restrict all needed PHIs also + // to that set. + bool RecomputePHIContents = false; + if (ActivePreds.size() != InitialActivePredSize) { + ModelledPHISet NewNeededPHIs; + for (auto P : NeededPHIs) { + P.restrictToBlocks(ActivePreds); + NewNeededPHIs.insert(P); + } + NeededPHIs = NewNeededPHIs; + LRI.restrictToBlocks(ActivePreds); + RecomputePHIContents = true; + } + + // The sunk instruction's results. + ModelledPHI NewPHI(NewInsts, ActivePreds); + + // Does sinking this instruction render previous PHIs redundant? + if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) { + NeededPHIs.erase(NewPHI); + RecomputePHIContents = true; + } + + if (RecomputePHIContents) { + // The needed PHIs have changed, so recompute the set of all needed + // values. + PHIContents.clear(); + for (auto &PHI : NeededPHIs) + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + // Is this instruction required by a later PHI that doesn't match this PHI? + // if so, we can't sink this instruction. + for (auto *V : NewPHI.getValues()) + if (PHIContents.count(V)) + // V exists in this PHI, but the whole PHI is different to NewPHI + // (else it would have been removed earlier). We cannot continue + // because this isn't representable. + return None; + + // Which operands need PHIs? + // FIXME: If any of these fail, we should partition up the candidates to + // try and continue making progress. + Instruction *I0 = NewInsts[0]; + for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { + ModelledPHI PHI(NewInsts, OpNum, ActivePreds); + if (PHI.areAllIncomingValuesSame()) + continue; + if (!canReplaceOperandWithVariable(I0, OpNum)) + // We can 't create a PHI from this instruction! + return None; + if (NeededPHIs.count(PHI)) + continue; + if (!PHI.areAllIncomingValuesSameType()) + return None; + // Don't create indirect calls! The called value is the final operand. + if ((isa(I0) || isa(I0)) && OpNum == E - 1 && + PHI.areAnyIncomingValuesConstant()) + return None; + + NeededPHIs.reserve(NeededPHIs.size()); + NeededPHIs.insert(PHI); + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + if (isMemoryInst(NewInsts[0])) + ++MemoryInstNum; + + SinkingInstructionCandidate Cand; + Cand.NumInstructions = ++InstNum; + Cand.NumMemoryInsts = MemoryInstNum; + Cand.NumBlocks = ActivePreds.size(); + Cand.NumPHIs = NeededPHIs.size(); + for (auto *C : ActivePreds) + Cand.Blocks.push_back(C); + + return Cand; +} + +unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { + DEBUG(dbgs() << "GVNSink: running on basic block "; + BBEnd->printAsOperand(dbgs()); dbgs() << "\n"); + SmallVector Preds; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa(T) || isa(T)) + Preds.push_back(B); + else + return 0; + } + if (Preds.size() < 2) + return 0; + std::sort(Preds.begin(), Preds.end()); + + unsigned NumOrigPreds = Preds.size(); + // We can only sink instructions through unconditional branches. + for (auto I = Preds.begin(); I != Preds.end();) { + if ((*I)->getTerminator()->getNumSuccessors() != 1) + I = Preds.erase(I); + else + ++I; + } + + LockstepReverseIterator LRI(Preds); + SmallVector Candidates; + unsigned InstNum = 0, MemoryInstNum = 0; + ModelledPHISet NeededPHIs; + SmallPtrSet PHIContents; + analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); + unsigned NumOrigPHIs = NeededPHIs.size(); + + while (LRI.isValid()) { + auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, + NeededPHIs, PHIContents); + if (!Cand) + break; + Cand->calculateCost(NumOrigPHIs, Preds.size()); + Candidates.emplace_back(*Cand); + --LRI; + } + + std::stable_sort( + Candidates.begin(), Candidates.end(), + [](const SinkingInstructionCandidate &A, + const SinkingInstructionCandidate &B) { return A > B; }); + DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C + : Candidates) dbgs() + << " " << C << "\n";); + + // Pick the top candidate, as long it is positive! + if (Candidates.empty() || Candidates.front().Cost <= 0) + return 0; + auto C = Candidates.front(); + + DEBUG(dbgs() << " -- Sinking: " << C << "\n"); + BasicBlock *InsertBB = BBEnd; + if (C.Blocks.size() < NumOrigPreds) { + DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs()); + dbgs() << "\n"); + InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); + if (!InsertBB) { + DEBUG(dbgs() << " -- FAILED to split edge!\n"); + // Edge couldn't be split. + return 0; + } + } + + for (unsigned I = 0; I < C.NumInstructions; ++I) + sinkLastInstruction(C.Blocks, InsertBB); + + return C.NumInstructions; +} + +void GVNSink::sinkLastInstruction(ArrayRef Blocks, + BasicBlock *BBEnd) { + SmallVector Insts; + for (BasicBlock *BB : Blocks) + Insts.push_back(BB->getTerminator()->getPrevNode()); + Instruction *I0 = Insts.front(); + + SmallVector NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + // Update metadata and IR flags. + for (auto *I : Insts) + if (I != I0) { + combineMetadataForCSE(I0, I); + I0->andIRFlags(I); + } + + for (auto *I : Insts) + if (I != I0) + I->replaceAllUsesWith(I0); + foldPointlessPHINodes(BBEnd); + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); + + NumRemoved += Insts.size() - 1; +} + +//////////////////////////////////////////////////////////////////////////////// +// Pass machinery / boilerplate + +class GVNSinkLegacyPass : public FunctionPass { +public: + static char ID; + + GVNSinkLegacyPass() : FunctionPass(ID) { + initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + GVNSink G; + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + } +}; +} // namespace + +PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { + GVNSink G; + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +char GVNSinkLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) + +FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/GuardWidening.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/GuardWidening.cpp index 198d2b2b024f9..fb7c6e15758d3 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/GuardWidening.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/GuardWidening.cpp @@ -40,7 +40,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/GuardWidening.h" -#include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/LoopInfo.h" @@ -50,6 +49,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Scalar.h" @@ -537,9 +537,7 @@ bool GuardWideningImpl::parseRangeChecks( Changed = true; } else if (match(Check.getBase(), m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { - unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(OpLHS, Known, DL); + KnownBits Known = computeKnownBits(OpLHS, DL); if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { Check.setBase(OpLHS); APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/IndVarSimplify.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/IndVarSimplify.cpp index 3953198fe6052..10782963177c6 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -86,6 +86,10 @@ static cl::opt UsePostIncrementRanges( cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true)); +static cl::opt +DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), + cl::desc("Disable Linear Function Test Replace optimization")); + namespace { struct RewritePhi; @@ -1823,6 +1827,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { // An IV counter must preserve its type. if (IncI->getNumOperands() == 2) break; + LLVM_FALLTHROUGH; default: return nullptr; } @@ -2412,7 +2417,8 @@ bool IndVarSimplify::run(Loop *L) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) { + if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) && + needsLFTR(L, DT)) { PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 85db6e5e11052..99b4458ea0fa2 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -59,8 +59,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -805,6 +805,25 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP ConstantInt *One = ConstantInt::get(IndVarTy, 1); // TODO: generalize the predicates here to also match their unsigned variants. if (IsIncreasing) { + bool DecreasedRightValueByOne = false; + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (++i != len) { while (++i < len) { + // ... ---> ... + // } } + Pred = ICmpInst::ICMP_SLT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeSMin(SE, RightSCEV)) { + // while (true) { while (true) { + // if (++i == len) ---> if (++i > len - 1) + // break; break; + // ... ... + // } } + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } + bool FoundExpectedPred = (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) || (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0); @@ -829,16 +848,41 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP return None; } - IRBuilder<> B(Preheader->getTerminator()); - RightValue = B.CreateAdd(RightValue, One); + // We need to increase the right value unless we have already decreased + // it virtually when we replaced EQ with SGT. + if (!DecreasedRightValueByOne) { + IRBuilder<> B(Preheader->getTerminator()); + RightValue = B.CreateAdd(RightValue, One); + } } else { if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by upper limit"; return None; } + assert(!DecreasedRightValueByOne && + "Right value can be decreased only for LatchBrExitIdx == 0!"); } } else { + bool IncreasedRightValueByOne = false; + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (--i != len) { while (--i > len) { + // ... ---> ... + // } } + Pred = ICmpInst::ICMP_SGT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeSMax(SE, RightSCEV)) { + // while (true) { while (true) { + // if (--i == len) ---> if (--i < len + 1) + // break; break; + // ... ... + // } } + Pred = ICmpInst::ICMP_SLT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } + bool FoundExpectedPred = (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) || (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0); @@ -863,14 +907,20 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP return None; } - IRBuilder<> B(Preheader->getTerminator()); - RightValue = B.CreateSub(RightValue, One); + // We need to decrease the right value unless we have already increased + // it virtually when we replaced EQ with SLT. + if (!IncreasedRightValueByOne) { + IRBuilder<> B(Preheader->getTerminator()); + RightValue = B.CreateSub(RightValue, One); + } } else { if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by lower limit"; return None; } + assert(!IncreasedRightValueByOne && + "Right value can be increased only for LatchBrExitIdx == 0!"); } } @@ -917,20 +967,23 @@ LoopConstrainer::calculateSubRanges() const { // I think we can be more aggressive here and make this nuw / nsw if the // addition that feeds into the icmp for the latch's terminating branch is nuw // / nsw. In any case, a wrapping 2's complement addition is safe. - ConstantInt *One = ConstantInt::get(Ty, 1); const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart); const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt); bool Increasing = MainLoopStructure.IndVarIncreasing; - // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the - // range of values the induction variable takes. + // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or + // [Smallest, GreatestSeen] is the range of values the induction variable + // takes. - const SCEV *Smallest = nullptr, *Greatest = nullptr; + const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr; + const SCEV *One = SE.getOne(Ty); if (Increasing) { Smallest = Start; Greatest = End; + // No overflow, because the range [Smallest, GreatestSeen] is not empty. + GreatestSeen = SE.getMinusSCEV(End, One); } else { // These two computations may sign-overflow. Here is why that is okay: // @@ -948,8 +1001,9 @@ LoopConstrainer::calculateSubRanges() const { // will be an empty range. Returning an empty range is always safe. // - Smallest = SE.getAddExpr(End, SE.getSCEV(One)); - Greatest = SE.getAddExpr(Start, SE.getSCEV(One)); + Smallest = SE.getAddExpr(End, One); + Greatest = SE.getAddExpr(Start, One); + GreatestSeen = Start; } auto Clamp = [this, Smallest, Greatest](const SCEV *S) { @@ -964,7 +1018,7 @@ LoopConstrainer::calculateSubRanges() const { Result.LowLimit = Clamp(Range.getBegin()); bool ProvablyNoPostLoop = - SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd()); + SE.isKnownPredicate(ICmpInst::ICMP_SLT, GreatestSeen, Range.getEnd()); if (!ProvablyNoPostLoop) Result.HighLimit = Clamp(Range.getEnd()); @@ -1228,7 +1282,12 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef BBs) { Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent, ValueToValueMapTy &VM) { - Loop &New = LPM.addLoop(Parent); + Loop &New = *new Loop(); + if (Parent) + Parent->addChildLoop(&New); + else + LI.addTopLevelLoop(&New); + LPM.addLoop(New); // Add all of the blocks in Original to the new loop. for (auto *BB : Original->blocks()) @@ -1366,28 +1425,35 @@ bool LoopConstrainer::run() { DT.recalculate(F); + // We need to first add all the pre and post loop blocks into the loop + // structures (as part of createClonedLoopStructure), and then update the + // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating + // LI when LoopSimplifyForm is generated. + Loop *PreL = nullptr, *PostL = nullptr; if (!PreLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PreL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map); - formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Pre loops are slow paths, we do not need to perform any loop - // optimizations on them. - DisableAllLoopOptsOnLoop(*L); } if (!PostLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PostL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map); + } + + // This function canonicalizes the loop into Loop-Simplify and LCSSA forms. + auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) { formLCSSARecursively(*L, DT, &LI, &SE); simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Post loops are slow paths, we do not need to perform any loop + // Pre/post loops are slow paths, we do not need to perform any loop // optimizations on them. - DisableAllLoopOptsOnLoop(*L); - } - - formLCSSARecursively(OriginalLoop, DT, &LI, &SE); - simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true); + if (!IsOriginalLoop) + DisableAllLoopOptsOnLoop(*L); + }; + if (PreL) + CanonicalizeLoop(PreL, false); + if (PostL) + CanonicalizeLoop(PostL, false); + CanonicalizeLoop(&OriginalLoop, true); return true; } diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/InferAddressSpaces.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/InferAddressSpaces.cpp index 5e116ef2fe75e..89b28f0aeee6b 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -89,7 +89,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" @@ -100,6 +99,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -232,7 +232,7 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec:{ const ConstantInt *IsVolatile = dyn_cast(II->getArgOperand(4)); - if (!IsVolatile || !IsVolatile->isNullValue()) + if (!IsVolatile || !IsVolatile->isZero()) return false; LLVM_FALLTHROUGH; @@ -358,7 +358,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // If the operands of the expression on the top are already explored, // adds that expression to the resultant postorder. if (PostorderStack.back().second) { - Postorder.push_back(TopVal); + if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) + Postorder.push_back(TopVal); PostorderStack.pop_back(); continue; } @@ -500,6 +501,7 @@ static Value *cloneConstantExprWithNewAddressSpace( } // Computes the operands of the new constant expression. + bool IsNew = false; SmallVector NewOperands; for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { Constant *Operand = CE->getOperand(Index); @@ -509,6 +511,7 @@ static Value *cloneConstantExprWithNewAddressSpace( // bitcast, and getelementptr) do not incur cycles in the data flow graph // and (2) this function is called on constant expressions in postorder. if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) { + IsNew = true; NewOperands.push_back(cast(NewOperand)); } else { // Otherwise, reuses the old operand. @@ -516,6 +519,11 @@ static Value *cloneConstantExprWithNewAddressSpace( } } + // If !IsNew, we will replace the Value with itself. However, replaced values + // are assumed to wrapped in a addrspace cast later so drop it now. + if (!IsNew) + return nullptr; + if (CE->getOpcode() == Instruction::GetElementPtr) { // Needs to specify the source type while constructing a getelementptr // constant expression. diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/JumpThreading.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/JumpThreading.cpp index ae353ea445957..dc9143bebc45e 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/JumpThreading.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/JumpThreading.cpp @@ -12,20 +12,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/JumpThreading.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -36,6 +36,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -63,6 +64,11 @@ ImplicationSearchThreshold( "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden); +static cl::opt PrintLVIAfterJumpThreading( + "print-lvi-after-jump-threading", + cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), + cl::Hidden); + namespace { /// This pass performs 'jump threading', which looks at blocks that have /// multiple predecessors and multiple successors. If one or more of the @@ -92,9 +98,10 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + if (PrintLVIAfterJumpThreading) + AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addRequired(); } @@ -132,12 +139,18 @@ bool JumpThreading::runOnFunction(Function &F) { bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), + std::move(BPI)); + if (PrintLVIAfterJumpThreading) { + dbgs() << "LVI for function '" << F.getName() << "':\n"; + LVI->printLVI(F, getAnalysis().getDomTree(), + dbgs()); + } + return Changed; } PreservedAnalyses JumpThreadingPass::run(Function &F, @@ -152,7 +165,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, &TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -230,13 +243,15 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. - // We should not eliminate the loop header either, because eliminating - // a loop header might later prevent LoopSimplify from transforming nested - // loops into simplified form. + // We should not eliminate the loop header or latch either, because + // eliminating a loop header or latch might later prevent LoopSimplify + // from transforming nested loops into simplified form. We will rely on + // later passes in backend to clean up empty blocks. if (BI && BI->isUnconditional() && BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) { + BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) && + !LoopHeaders.count(BI->getSuccessor(0))) { // FIXME: It is always conservatively correct to drop the info // for a block even if it doesn't get erased. This isn't totally // awesome, but it allows us to use AssertingVH to prevent nasty @@ -253,6 +268,35 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, return EverChanged; } +// Replace uses of Cond with ToVal when safe to do so. If all uses are +// replaced, we can remove Cond. We cannot blindly replace all uses of Cond +// because we may incorrectly replace uses when guards/assumes are uses of +// of `Cond` and we used the guards/assume to reason about the `Cond` value +// at the end of block. RAUW unconditionally replaces all uses +// including the guards/assumes themselves and the uses before the +// guard/assume. +static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) { + assert(Cond->getType() == ToVal->getType()); + auto *BB = Cond->getParent(); + // We can unconditionally replace all uses in non-local blocks (i.e. uses + // strictly dominated by BB), since LVI information is true from the + // terminator of BB. + replaceNonLocalUsesWith(Cond, ToVal); + for (Instruction &I : reverse(*BB)) { + // Reached the Cond whose uses we are trying to replace, so there are no + // more uses. + if (&I == Cond) + break; + // We only replace uses in instructions that are guaranteed to reach the end + // of BB, where we know Cond is ToVal. + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + I.replaceUsesOfWith(Cond, ToVal); + } + if (Cond->use_empty() && !Cond->mayHaveSideEffects()) + Cond->eraseFromParent(); +} + /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block /// when exceeding the threshold. If duplication is impossible, returns ~0U. @@ -547,7 +591,12 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // Handle compare with phi operand, where the PHI is defined in this block. if (CmpInst *Cmp = dyn_cast(I)) { assert(Preference == WantInteger && "Compares only produce integers"); - PHINode *PN = dyn_cast(Cmp->getOperand(0)); + Type *CmpType = Cmp->getType(); + Value *CmpLHS = Cmp->getOperand(0); + Value *CmpRHS = Cmp->getOperand(1); + CmpInst::Predicate Pred = Cmp->getPredicate(); + + PHINode *PN = dyn_cast(CmpLHS); if (PN && PN->getParent() == BB) { const DataLayout &DL = PN->getModule()->getDataLayout(); // We can do this simplification if any comparisons fold to true or false. @@ -555,15 +604,15 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = PN->getIncomingBlock(i); Value *LHS = PN->getIncomingValue(i); - Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); + Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB); - Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL}); + Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL}); if (!Res) { if (!isa(RHS)) continue; LazyValueInfo::Tristate - ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS, + ResT = LVI->getPredicateOnEdge(Pred, LHS, cast(RHS), PredBB, BB, CxtI ? CxtI : Cmp); if (ResT == LazyValueInfo::Unknown) @@ -580,27 +629,67 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. - if (isa(Cmp->getOperand(1)) && !Cmp->getType()->isVectorTy()) { - Constant *CmpConst = cast(Cmp->getOperand(1)); + if (isa(CmpRHS) && !CmpType->isVectorTy()) { + Constant *CmpConst = cast(CmpRHS); - if (!isa(Cmp->getOperand(0)) || - cast(Cmp->getOperand(0))->getParent() != BB) { + if (!isa(CmpLHS) || + cast(CmpLHS)->getParent() != BB) { for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = - LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB, CxtI ? CxtI : Cmp); if (Res == LazyValueInfo::Unknown) continue; - Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Constant *ResC = ConstantInt::get(CmpType, Res); Result.push_back(std::make_pair(ResC, P)); } return !Result.empty(); } + // InstCombine can fold some forms of constant range checks into + // (icmp (add (x, C1)), C2). See if we have we have such a thing with + // x as a live-in. + { + using namespace PatternMatch; + Value *AddLHS; + ConstantInt *AddConst; + if (isa(CmpConst) && + match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) { + if (!isa(AddLHS) || + cast(AddLHS)->getParent() != BB) { + for (BasicBlock *P : predecessors(BB)) { + // If the value is known by LazyValueInfo to be a ConstantRange in + // a predecessor, use that information to try to thread this + // block. + ConstantRange CR = LVI->getConstantRangeOnEdge( + AddLHS, P, BB, CxtI ? CxtI : cast(CmpLHS)); + // Propagate the range through the addition. + CR = CR.add(AddConst->getValue()); + + // Get the range where the compare returns true. + ConstantRange CmpRange = ConstantRange::makeExactICmpRegion( + Pred, cast(CmpConst)->getValue()); + + Constant *ResC; + if (CmpRange.contains(CR)) + ResC = ConstantInt::getTrue(CmpType); + else if (CmpRange.inverse().contains(CR)) + ResC = ConstantInt::getFalse(CmpType); + else + continue; + + Result.push_back(std::make_pair(ResC, P)); + } + + return !Result.empty(); + } + } + } + // Try to find a constant value for the LHS of a comparison, // and evaluate it statically if we can. PredValueInfoTy LHSVals; @@ -609,8 +698,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( for (const auto &LHSVal : LHSVals) { Constant *V = LHSVal.first; - Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), - V, CmpConst); + Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst); if (Constant *KC = getKnownConstant(Folded, WantInteger)) Result.push_back(std::make_pair(KC, LHSVal.second)); } @@ -723,6 +811,37 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { LVI->eraseBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); + // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by + // BB code within one basic block `BB`), we need to invalidate the LVI + // information associated with BB, because the LVI information need not be + // true for all of BB after the merge. For example, + // Before the merge, LVI info and code is as follows: + // SinglePred: + // %y = use of %p + // call @exit() // need not transfer execution to successor. + // assume(%p) // from this point on %p is true + // br label %BB + // BB: + // %x = use of %p + // br label exit + // + // Note that this LVI info for blocks BB and SinglPred is correct for %p + // (info2 and info1 respectively). After the merge and the deletion of the + // LVI info1 for SinglePred. We have the following code: + // BB: + // %y = use of %p + // call @exit() + // assume(%p) + // %x = use of %p <-- LVI info2 is correct from here onwards. + // br label exit + // LVI info2 for BB is incorrect at the beginning of BB. + + // Invalidate LVI information for BB if the LVI is not provably true for + // all of BB. + if (any_of(*BB, [](Instruction &I) { + return !isGuaranteedToTransferExecutionToSuccessor(&I); + })) + LVI->eraseBlock(BB); return true; } } @@ -833,14 +952,18 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { CondBr->eraseFromParent(); if (CondCmp->use_empty()) CondCmp->eraseFromParent(); + // We can safely replace *some* uses of the CondInst if it has + // exactly one value as returned by LVI. RAUW is incorrect in the + // presence of guards and assumes, that have the `Cond` as the use. This + // is because we use the guards/assume to reason about the `Cond` value + // at the end of block, but RAUW unconditionally replaces all uses + // including the guards/assumes themselves and the uses before the + // guard/assume. else if (CondCmp->getParent() == BB) { - // If the fact we just learned is true for all uses of the - // condition, replace it with a constant value auto *CI = Ret == LazyValueInfo::True ? ConstantInt::getTrue(CondCmp->getType()) : ConstantInt::getFalse(CondCmp->getType()); - CondCmp->replaceAllUsesWith(CI); - CondCmp->eraseFromParent(); + ReplaceFoldableUses(CondCmp, CI); } return true; } @@ -1103,7 +1226,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { LoadInst *NewVal = new LoadInst( LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(), - LI->getSynchScope(), UnavailablePred->getTerminator()); + LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); @@ -1327,14 +1450,16 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (auto *CondInst = dyn_cast(Cond)) { if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) CondInst->eraseFromParent(); + // We can safely replace *some* uses of the CondInst if it has + // exactly one value as returned by LVI. RAUW is incorrect in the + // presence of guards and assumes, that have the `Cond` as the use. This + // is because we use the guards/assume to reason about the `Cond` value + // at the end of block, but RAUW unconditionally replaces all uses + // including the guards/assumes themselves and the uses before the + // guard/assume. else if (OnlyVal && OnlyVal != MultipleVal && - CondInst->getParent() == BB) { - // If we just learned Cond is the same value for all uses of the - // condition, replace it with a constant value - CondInst->replaceAllUsesWith(OnlyVal); - if (!CondInst->mayHaveSideEffects()) - CondInst->eraseFromParent(); - } + CondInst->getParent() == BB) + ReplaceFoldableUses(CondInst, OnlyVal); } return true; } @@ -1909,7 +2034,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; if (!New->mayHaveSideEffects()) { - delete New; + New->deleteValue(); New = nullptr; } } else { @@ -2057,11 +2182,19 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { return false; } -/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form +/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the +/// same BB in the form /// bb: /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... -/// %s = select p, trueval, falseval +/// %s = select %p, trueval, falseval +/// +/// or /// +/// bb: +/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ... +/// %c = cmp %p, 0 +/// %s = select %c, trueval, falseval +// /// And expand the select into a branch structure. This later enables /// jump-threading over bb in this pass. /// @@ -2075,44 +2208,54 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { if (LoopHeaders.count(BB)) return false; - // Look for a Phi/Select pair in the same basic block. The Phi feeds the - // condition of the Select and at least one of the incoming values is a - // constant. for (BasicBlock::iterator BI = BB->begin(); PHINode *PN = dyn_cast(BI); ++BI) { - unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || !PN->hasOneUse()) + // Look for a Phi having at least one constant incoming value. + if (llvm::all_of(PN->incoming_values(), + [](Value *V) { return !isa(V); })) continue; - SelectInst *SI = dyn_cast(PN->user_back()); - if (!SI || SI->getParent() != BB) - continue; - - Value *Cond = SI->getCondition(); - if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1)) - continue; - - bool HasConst = false; - for (unsigned i = 0; i != NumPHIValues; ++i) { - if (PN->getIncomingBlock(i) == BB) + auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) { + // Check if SI is in BB and use V as condition. + if (SI->getParent() != BB) return false; - if (isa(PN->getIncomingValue(i))) - HasConst = true; - } + Value *Cond = SI->getCondition(); + return (Cond && Cond == V && Cond->getType()->isIntegerTy(1)); + }; - if (HasConst) { - // Expand the select. - TerminatorInst *Term = - SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); - PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); - NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); - NewPN->addIncoming(SI->getFalseValue(), BB); - SI->replaceAllUsesWith(NewPN); - SI->eraseFromParent(); - return true; + SelectInst *SI = nullptr; + for (Use &U : PN->uses()) { + if (ICmpInst *Cmp = dyn_cast(U.getUser())) { + // Look for a ICmp in BB that compares PN with a constant and is the + // condition of a Select. + if (Cmp->getParent() == BB && Cmp->hasOneUse() && + isa(Cmp->getOperand(1 - U.getOperandNo()))) + if (SelectInst *SelectI = dyn_cast(Cmp->user_back())) + if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) { + SI = SelectI; + break; + } + } else if (SelectInst *SelectI = dyn_cast(U.getUser())) { + // Look for a Select in BB that uses PN as condtion. + if (isUnfoldCandidate(SelectI, U.get())) { + SI = SelectI; + break; + } + } } + + if (!SI) + continue; + // Expand the select. + TerminatorInst *Term = + SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); + NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); + NewPN->addIncoming(SI->getFalseValue(), BB); + SI->replaceAllUsesWith(NewPN); + SI->eraseFromParent(); + return true; } - return false; } diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LICM.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LICM.cpp index 340c81fed0fda..37b9c4b1094e0 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LICM.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LICM.cpp @@ -546,7 +546,7 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, // If there are escaping uses of invariant.start instruction, the load maybe // non-invariant. if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || - II->hasNUsesOrMore(1)) + !II->use_empty()) continue; unsigned InvariantSizeInBits = cast(II->getArgOperand(0))->getSExtValue() * 8; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoadCombine.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoadCombine.cpp deleted file mode 100644 index 02215d3450c23..0000000000000 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoadCombine.cpp +++ /dev/null @@ -1,295 +0,0 @@ -//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This transformation combines adjacent loads. -/// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/TargetFolder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "load-combine" - -STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); -STATISTIC(NumLoadsCombined, "Number of loads combined"); - -#define LDCOMBINE_NAME "Combine Adjacent Loads" - -namespace { -struct PointerOffsetPair { - Value *Pointer; - APInt Offset; -}; - -struct LoadPOPPair { - LoadInst *Load; - PointerOffsetPair POP; - /// \brief The new load needs to be created before the first load in IR order. - unsigned InsertOrder; -}; - -class LoadCombine : public BasicBlockPass { - LLVMContext *C; - AliasAnalysis *AA; - DominatorTree *DT; - -public: - LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) { - initializeLoadCombinePass(*PassRegistry::getPassRegistry()); - } - - using llvm::Pass::doInitialization; - bool doInitialization(Function &) override; - bool runOnBasicBlock(BasicBlock &BB) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - } - - StringRef getPassName() const override { return LDCOMBINE_NAME; } - static char ID; - - typedef IRBuilder BuilderTy; - -private: - BuilderTy *Builder; - - PointerOffsetPair getPointerOffsetPair(LoadInst &); - bool combineLoads(DenseMap> &); - bool aggregateLoads(SmallVectorImpl &); - bool combineLoads(SmallVectorImpl &); -}; -} - -bool LoadCombine::doInitialization(Function &F) { - DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); - C = &F.getContext(); - return true; -} - -PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { - auto &DL = LI.getModule()->getDataLayout(); - - PointerOffsetPair POP; - POP.Pointer = LI.getPointerOperand(); - unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace()); - POP.Offset = APInt(BitWidth, 0); - - while (isa(POP.Pointer) || isa(POP.Pointer)) { - if (auto *GEP = dyn_cast(POP.Pointer)) { - APInt LastOffset = POP.Offset; - if (!GEP->accumulateConstantOffset(DL, POP.Offset)) { - // Can't handle GEPs with variable indices. - POP.Offset = LastOffset; - return POP; - } - POP.Pointer = GEP->getPointerOperand(); - } else if (auto *BC = dyn_cast(POP.Pointer)) { - POP.Pointer = BC->getOperand(0); - } - } - return POP; -} - -bool LoadCombine::combineLoads( - DenseMap> &LoadMap) { - bool Combined = false; - for (auto &Loads : LoadMap) { - if (Loads.second.size() < 2) - continue; - std::sort(Loads.second.begin(), Loads.second.end(), - [](const LoadPOPPair &A, const LoadPOPPair &B) { - return A.POP.Offset.slt(B.POP.Offset); - }); - if (aggregateLoads(Loads.second)) - Combined = true; - } - return Combined; -} - -/// \brief Try to aggregate loads from a sorted list of loads to be combined. -/// -/// It is guaranteed that no writes occur between any of the loads. All loads -/// have the same base pointer. There are at least two loads. -bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) { - assert(Loads.size() >= 2 && "Insufficient loads!"); - LoadInst *BaseLoad = nullptr; - SmallVector AggregateLoads; - bool Combined = false; - bool ValidPrevOffset = false; - APInt PrevOffset; - uint64_t PrevSize = 0; - for (auto &L : Loads) { - if (ValidPrevOffset == false) { - BaseLoad = L.Load; - PrevOffset = L.POP.Offset; - PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( - L.Load->getType()); - AggregateLoads.push_back(L); - ValidPrevOffset = true; - continue; - } - if (L.Load->getAlignment() > BaseLoad->getAlignment()) - continue; - APInt PrevEnd = PrevOffset + PrevSize; - if (L.POP.Offset.sgt(PrevEnd)) { - // No other load will be combinable - if (combineLoads(AggregateLoads)) - Combined = true; - AggregateLoads.clear(); - ValidPrevOffset = false; - continue; - } - if (L.POP.Offset != PrevEnd) - // This load is offset less than the size of the last load. - // FIXME: We may want to handle this case. - continue; - PrevOffset = L.POP.Offset; - PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( - L.Load->getType()); - AggregateLoads.push_back(L); - } - if (combineLoads(AggregateLoads)) - Combined = true; - return Combined; -} - -/// \brief Given a list of combinable load. Combine the maximum number of them. -bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { - // Remove loads from the end while the size is not a power of 2. - unsigned TotalSize = 0; - for (const auto &L : Loads) - TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); - while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) - TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); - if (Loads.size() < 2) - return false; - - DEBUG({ - dbgs() << "***** Combining Loads ******\n"; - for (const auto &L : Loads) { - dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; - } - }); - - // Find first load. This is where we put the new load. - LoadPOPPair FirstLP; - FirstLP.InsertOrder = -1u; - for (const auto &L : Loads) - if (L.InsertOrder < FirstLP.InsertOrder) - FirstLP = L; - - unsigned AddressSpace = - FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); - - Builder->SetInsertPoint(FirstLP.Load); - Value *Ptr = Builder->CreateConstGEP1_64( - Builder->CreatePointerCast(Loads[0].POP.Pointer, - Builder->getInt8PtrTy(AddressSpace)), - Loads[0].POP.Offset.getSExtValue()); - LoadInst *NewLoad = new LoadInst( - Builder->CreatePointerCast( - Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), - Ptr->getType()->getPointerAddressSpace())), - Twine(Loads[0].Load->getName()) + ".combined", false, - Loads[0].Load->getAlignment(), FirstLP.Load); - - for (const auto &L : Loads) { - Builder->SetInsertPoint(L.Load); - Value *V = Builder->CreateExtractInteger( - L.Load->getModule()->getDataLayout(), NewLoad, - cast(L.Load->getType()), - (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract"); - L.Load->replaceAllUsesWith(V); - } - - NumLoadsCombined = NumLoadsCombined + Loads.size(); - return true; -} - -bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { - if (skipBasicBlock(BB)) - return false; - - AA = &getAnalysis().getAAResults(); - DT = &getAnalysis().getDomTree(); - - // Skip analysing dead blocks (not forward reachable from function entry). - if (!DT->isReachableFromEntry(&BB)) { - DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() << - " in " << BB.getParent()->getName() << "\n"); - return false; - } - - IRBuilder TheBuilder( - BB.getContext(), TargetFolder(BB.getModule()->getDataLayout())); - Builder = &TheBuilder; - - DenseMap> LoadMap; - AliasSetTracker AST(*AA); - - bool Combined = false; - unsigned Index = 0; - for (auto &I : BB) { - if (I.mayThrow() || AST.containsUnknown(&I)) { - if (combineLoads(LoadMap)) - Combined = true; - LoadMap.clear(); - AST.clear(); - continue; - } - if (I.mayWriteToMemory()) { - AST.add(&I); - continue; - } - LoadInst *LI = dyn_cast(&I); - if (!LI) - continue; - ++NumLoadsAnalyzed; - if (!LI->isSimple() || !LI->getType()->isIntegerTy()) - continue; - auto POP = getPointerOffsetPair(*LI); - if (!POP.Pointer) - continue; - LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++}); - AST.add(LI); - } - if (combineLoads(LoadMap)) - Combined = true; - return Combined; -} - -char LoadCombine::ID = 0; - -BasicBlockPass *llvm::createLoadCombinePass() { - return new LoadCombine(); -} - -INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopDeletion.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopDeletion.cpp index 3151ccd279c41..ac4dd44a0e906 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopDeletion.cpp @@ -31,20 +31,19 @@ using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); /// This function deletes dead loops. The caller of this function needs to -/// guarantee that the loop is infact dead. Here we handle two kinds of dead +/// guarantee that the loop is infact dead. Here we handle two kinds of dead /// loop. The first kind (\p isLoopDead) is where only invariant values from /// within the loop are used outside of it. The second kind (\p /// isLoopNeverExecuted) is where the loop is provably never executed. We can -/// always remove never executed loops since they will not cause any -/// difference to program behaviour. +/// always remove never executed loops since they will not cause any difference +/// to program behaviour. /// /// This also updates the relevant analysis information in \p DT, \p SE, and \p /// LI. It also updates the loop PM if an updater struct is provided. // TODO: This function will be used by loop-simplifyCFG as well. So, move this // to LoopUtils.cpp static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI, bool LoopIsNeverExecuted, - LPMUpdater *Updater = nullptr); + LoopInfo &LI, LPMUpdater *Updater = nullptr); /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -149,26 +148,35 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - // We can only remove the loop if there is a preheader that we can - // branch from after removing it. + // We can only remove the loop if there is a preheader that we can branch from + // after removing it. Also, if LoopSimplify form is not available, stay out + // of trouble. BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) + if (!Preheader || !L->hasDedicatedExits()) { + DEBUG(dbgs() + << "Deletion requires Loop with preheader and dedicated exits.\n"); return false; - - // If LoopSimplify form is not available, stay out of trouble. - if (!L->hasDedicatedExits()) - return false; - + } // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. - if (L->begin() != L->end()) + if (L->begin() != L->end()) { + DEBUG(dbgs() << "Loop contains subloops.\n"); return false; + } BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (ExitBlock && isLoopNeverExecuted(L)) { - deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater); + DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); + // Set incoming value to undef for phi nodes in the exit block. + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast(BI)) { + for (unsigned i = 0; i < P->getNumIncomingValues(); i++) + P->setIncomingValue(i, UndefValue::get(P->getType())); + BI++; + } + deleteDeadLoop(L, DT, SE, LI, Updater); ++NumDeleted; return true; } @@ -182,29 +190,34 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - if (!ExitBlock) + if (!ExitBlock) { + DEBUG(dbgs() << "Deletion requires single exit block\n"); return false; - + } // Finally, we have to check that the loop really is dead. bool Changed = false; - if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) + if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) { + DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n"); return Changed; + } // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. const SCEV *S = SE.getMaxBackedgeTakenCount(L); - if (isa(S)) + if (isa(S)) { + DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n"); return Changed; + } - deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater); + DEBUG(dbgs() << "Loop is invariant, delete it!"); + deleteDeadLoop(L, DT, SE, LI, Updater); ++NumDeleted; return true; } static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI, bool LoopIsNeverExecuted, - LPMUpdater *Updater) { + LoopInfo &LI, LPMUpdater *Updater) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); auto *Preheader = L->getLoopPreheader(); assert(Preheader && "Preheader should exist!"); @@ -227,6 +240,8 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, auto *ExitBlock = L->getUniqueExitBlock(); assert(ExitBlock && "Should have a unique exit block!"); + assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); + // Connect the preheader directly to the exit block. // Even when the loop is never executed, we cannot remove the edge from the // source block to the exit block. Consider the case where the unexecuted loop @@ -236,20 +251,28 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // non-loop, it will be deleted in a future iteration of loop deletion pass. Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock); - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); // Rewrite phis in the exit block to get their inputs from the Preheader // instead of the exiting block. - BasicBlock *ExitingBlock = ExitingBlocks[0]; BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast(BI)) { - int j = P->getBasicBlockIndex(ExitingBlock); - assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); - if (LoopIsNeverExecuted) - P->setIncomingValue(j, UndefValue::get(P->getType())); - P->setIncomingBlock(j, Preheader); - for (unsigned i = 1; i < ExitingBlocks.size(); ++i) - P->removeIncomingValue(ExitingBlocks[i]); + // Set the zero'th element of Phi to be from the preheader and remove all + // other incoming values. Given the loop has dedicated exits, all other + // incoming values must be from the exiting blocks. + int PredIndex = 0; + P->setIncomingBlock(PredIndex, Preheader); + // Removes all incoming values from all other exiting blocks (including + // duplicate values from an exiting block). + // Nuke all entries except the zero'th entry which is the preheader entry. + // NOTE! We need to remove Incoming Values in the reverse order as done + // below, to keep the indices valid for deletion (removeIncomingValues + // updates getNumIncomingValues and shifts all values down into the operand + // being deleted). + for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) + P->removeIncomingValue(e-i, false); + + assert((P->getNumIncomingValues() == 1 && + P->getIncomingBlock(PredIndex) == Preheader) && + "Should have exactly one value and that's from the preheader!"); ++BI; } @@ -296,6 +319,9 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &Updater) { + + DEBUG(dbgs() << "Analyzing Loop for deletion: "); + DEBUG(L.dump()); if (!deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, &Updater)) return PreservedAnalyses::all(); @@ -335,5 +361,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) { ScalarEvolution &SE = getAnalysis().getSE(); LoopInfo &LI = getAnalysis().getLoopInfo(); + DEBUG(dbgs() << "Analyzing Loop for deletion: "); + DEBUG(L->dump()); return deleteLoopIfDead(L, DT, SE, LI); } diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 48d5ae88cda91..4a6a35c0ab1b9 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -110,6 +110,16 @@ class LoopIdiomRecognize { bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + /// Return code for isLegalStore() + enum LegalStoreKind { + None = 0, + Memset, + MemsetPattern, + Memcpy, + UnorderedAtomicMemcpy, + DontUse // Dummy retval never to be used. Allows catching errors in retval + // handling. + }; /// \name Countable Loop Idiom Handling /// @{ @@ -119,8 +129,7 @@ class LoopIdiomRecognize { SmallVectorImpl &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern, - bool &ForMemcpy); + LegalStoreKind isLegalStore(StoreInst *SI); bool processLoopStores(SmallVectorImpl &SL, const SCEV *BECount, bool ForMemset); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); @@ -144,6 +153,10 @@ class LoopIdiomRecognize { bool recognizePopcount(); void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var); + bool recognizeAndInsertCTLZ(); + void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, + PHINode *CntPhi, Value *Var, const DebugLoc DL, + bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); /// @} }; @@ -339,20 +352,24 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { return ConstantArray::get(AT, std::vector(ArraySize, C)); } -bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, - bool &ForMemsetPattern, bool &ForMemcpy) { +LoopIdiomRecognize::LegalStoreKind +LoopIdiomRecognize::isLegalStore(StoreInst *SI) { + // Don't touch volatile stores. - if (!SI->isSimple()) - return false; + if (SI->isVolatile()) + return LegalStoreKind::None; + // We only want simple or unordered-atomic stores. + if (!SI->isUnordered()) + return LegalStoreKind::None; // Don't convert stores of non-integral pointer types to memsets (which stores // integers). if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) - return false; + return LegalStoreKind::None; // Avoid merging nontemporal stores. if (SI->getMetadata(LLVMContext::MD_nontemporal)) - return false; + return LegalStoreKind::None; Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); @@ -360,7 +377,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, // Reject stores that are so large that they overflow an unsigned. uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) - return false; + return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a @@ -368,11 +385,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, const SCEVAddRecExpr *StoreEv = dyn_cast(SE->getSCEV(StorePtr)); if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) - return false; + return LegalStoreKind::None; // Check to see if we have a constant stride. if (!isa(StoreEv->getOperand(1))) - return false; + return LegalStoreKind::None; // See if the store can be turned into a memset. @@ -383,22 +400,23 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; + // Note: memset and memset_pattern on unordered-atomic is yet not supported + bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple(); + // If we're allowed to form a memset, and the stored value would be // acceptable for memset, use it. - if (HasMemset && SplatValue && + if (!UnorderedAtomic && HasMemset && SplatValue && // Verify that the stored value is loop invariant. If not, we can't // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // It looks like we can use SplatValue. - ForMemset = true; - return true; - } else if (HasMemsetPattern && + return LegalStoreKind::Memset; + } else if (!UnorderedAtomic && HasMemsetPattern && // Don't create memset_pattern16s with address spaces. StorePtr->getType()->getPointerAddressSpace() == 0 && (PatternValue = getMemSetPatternValue(StoredVal, DL))) { // It looks like we can use PatternValue! - ForMemsetPattern = true; - return true; + return LegalStoreKind::MemsetPattern; } // Otherwise, see if the store can be turned into a memcpy. @@ -408,12 +426,17 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, APInt Stride = getStoreStride(StoreEv); unsigned StoreSize = getStoreSizeInBytes(SI, DL); if (StoreSize != Stride && StoreSize != -Stride) - return false; + return LegalStoreKind::None; // The store must be feeding a non-volatile load. LoadInst *LI = dyn_cast(SI->getValueOperand()); - if (!LI || !LI->isSimple()) - return false; + + // Only allow non-volatile loads + if (!LI || LI->isVolatile()) + return LegalStoreKind::None; + // Only allow simple or unordered-atomic loads + if (!LI->isUnordered()) + return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a @@ -421,18 +444,19 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(LI->getPointerOperand())); if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; + return LegalStoreKind::None; // The store and load must share the same stride. if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return false; + return LegalStoreKind::None; // Success. This store can be converted into a memcpy. - ForMemcpy = true; - return true; + UnorderedAtomic = UnorderedAtomic || LI->isAtomic(); + return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy + : LegalStoreKind::Memcpy; } // This store can't be transformed into a memset/memcpy. - return false; + return LegalStoreKind::None; } void LoopIdiomRecognize::collectStores(BasicBlock *BB) { @@ -444,24 +468,29 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) { if (!SI) continue; - bool ForMemset = false; - bool ForMemsetPattern = false; - bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy)) - continue; - - // Save the store locations. - if (ForMemset) { + switch (isLegalStore(SI)) { + case LegalStoreKind::None: + // Nothing to do + break; + case LegalStoreKind::Memset: { // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); StoreRefsForMemset[Ptr].push_back(SI); - } else if (ForMemsetPattern) { + } break; + case LegalStoreKind::MemsetPattern: { // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); StoreRefsForMemsetPattern[Ptr].push_back(SI); - } else if (ForMemcpy) + } break; + case LegalStoreKind::Memcpy: + case LegalStoreKind::UnorderedAtomicMemcpy: StoreRefsForMemcpy.push_back(SI); + break; + default: + assert(false && "unhandled return value"); + break; + } } } @@ -869,7 +898,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// for (i) A[i] = B[i]; bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount) { - assert(SI->isSimple() && "Expected only non-volatile stores."); + assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores."); Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); @@ -879,7 +908,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // The store must be feeding a non-volatile load. LoadInst *LI = cast(SI->getValueOperand()); - assert(LI->isSimple() && "Expected only non-volatile stores."); + assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a @@ -953,6 +982,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); + if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); @@ -960,9 +990,37 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); - CallInst *NewCall = - Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, - std::min(SI->getAlignment(), LI->getAlignment())); + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); + CallInst *NewCall = nullptr; + // Check whether to generate an unordered atomic memcpy: + // If the load or store are atomic, then they must neccessarily be unordered + // by previous checks. + if (!SI->isAtomic() && !LI->isAtomic()) + NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); + else { + // We cannot allow unaligned ops for unordered load/store, so reject + // anything where the alignment isn't at least the element size. + if (Align < StoreSize) + return false; + + // If the element.atomic memcpy is not lowered into explicit + // loads/stores later, then it will be lowered into an element-size + // specific lib call. If the lib call doesn't exist for our store size, then + // we shouldn't generate the memcpy. + if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) + return false; + + NewCall = Builder.CreateElementUnorderedAtomicMemCpy( + StoreBasePtr, LoadBasePtr, NumBytes, StoreSize); + + // Propagate alignment info onto the pointer args. Note that unordered + // atomic loads/stores are *required* by the spec to have an alignment + // but non-atomic loads/stores may not. + NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(), + SI->getAlignment())); + NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(), + LI->getAlignment())); + } NewCall->setDebugLoc(SI->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" @@ -994,7 +1052,7 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, } bool LoopIdiomRecognize::runOnNoncountableLoop() { - return recognizePopcount(); + return recognizePopcount() || recognizeAndInsertCTLZ(); } /// Check if the given conditional branch is based on the comparison between @@ -1022,6 +1080,17 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { return nullptr; } +// Check if the recurrence variable `VarX` is in the right form to create +// the idiom. Returns the value coerced to a PHINode if so. +static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX, + BasicBlock *LoopEntry) { + auto *PhiX = dyn_cast(VarX); + if (PhiX && PhiX->getParent() == LoopEntry && + (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX)) + return PhiX; + return nullptr; +} + /// Return true iff the idiom is detected in the loop. /// /// Additionally: @@ -1091,19 +1160,15 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, if (!Dec || !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || (SubInst->getOpcode() == Instruction::Add && - Dec->isAllOnesValue()))) { + Dec->isMinusOne()))) { return false; } } // step 3: Check the recurrence of variable X - { - PhiX = dyn_cast(VarX1); - if (!PhiX || - (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { - return false; - } - } + PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry); + if (!PhiX) + return false; // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 { @@ -1119,8 +1184,8 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, if (!Inc || !Inc->isOne()) continue; - PHINode *Phi = dyn_cast(Inst->getOperand(0)); - if (!Phi || Phi->getParent() != LoopEntry) + PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); + if (!Phi) continue; // Check if the result of the instruction is live of the loop. @@ -1159,6 +1224,169 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, return true; } +/// Return true if the idiom is detected in the loop. +/// +/// Additionally: +/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ) +/// or nullptr if there is no such. +/// 2) \p CntPhi is set to the corresponding phi node +/// or nullptr if there is no such. +/// 3) \p Var is set to the value whose CTLZ could be used. +/// 4) \p DefX is set to the instruction calculating Loop exit condition. +/// +/// The core idiom we are trying to detect is: +/// \code +/// if (x0 == 0) +/// goto loop-exit // the precondition of the loop +/// cnt0 = init-val; +/// do { +/// x = phi (x0, x.next); //PhiX +/// cnt = phi(cnt0, cnt.next); +/// +/// cnt.next = cnt + 1; +/// ... +/// x.next = x >> 1; // DefX +/// ... +/// } while(x.next != 0); +/// +/// loop-exit: +/// \endcode +static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, + Instruction *&CntInst, PHINode *&CntPhi, + Instruction *&DefX) { + BasicBlock *LoopEntry; + Value *VarX = nullptr; + + DefX = nullptr; + PhiX = nullptr; + CntInst = nullptr; + CntPhi = nullptr; + LoopEntry = *(CurLoop->block_begin()); + + // step 1: Check if the loop-back branch is in desirable form. + if (Value *T = matchCondition( + dyn_cast(LoopEntry->getTerminator()), LoopEntry)) + DefX = dyn_cast(T); + else + return false; + + // step 2: detect instructions corresponding to "x.next = x >> 1" + if (!DefX || DefX->getOpcode() != Instruction::AShr) + return false; + if (ConstantInt *Shft = dyn_cast(DefX->getOperand(1))) + if (!Shft || !Shft->isOne()) + return false; + VarX = DefX->getOperand(0); + + // step 3: Check the recurrence of variable X + PhiX = getRecurrenceVar(VarX, DefX, LoopEntry); + if (!PhiX) + return false; + + // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1 + // TODO: We can skip the step. If loop trip count is known (CTLZ), + // then all uses of "cnt.next" could be optimized to the trip count + // plus "cnt0". Currently it is not optimized. + // This step could be used to detect POPCNT instruction: + // cnt.next = cnt + (x.next & 1) + for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(), + IterE = LoopEntry->end(); + Iter != IterE; Iter++) { + Instruction *Inst = &*Iter; + if (Inst->getOpcode() != Instruction::Add) + continue; + + ConstantInt *Inc = dyn_cast(Inst->getOperand(1)); + if (!Inc || !Inc->isOne()) + continue; + + PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); + if (!Phi) + continue; + + CntInst = Inst; + CntPhi = Phi; + break; + } + if (!CntInst) + return false; + + return true; +} + +/// Recognize CTLZ idiom in a non-countable loop and convert the loop +/// to countable (with CTLZ trip count). +/// If CTLZ inserted as a new trip count returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeAndInsertCTLZ() { + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + Instruction *CntInst, *DefX; + PHINode *CntPhi, *PhiX; + if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX)) + return false; + + bool IsCntPhiUsedOutsideLoop = false; + for (User *U : CntPhi->users()) + if (!CurLoop->contains(dyn_cast(U))) { + IsCntPhiUsedOutsideLoop = true; + break; + } + bool IsCntInstUsedOutsideLoop = false; + for (User *U : CntInst->users()) + if (!CurLoop->contains(dyn_cast(U))) { + IsCntInstUsedOutsideLoop = true; + break; + } + // If both CntInst and CntPhi are used outside the loop the profitability + // is questionable. + if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop) + return false; + + // For some CPUs result of CTLZ(X) intrinsic is undefined + // when X is 0. If we can not guarantee X != 0, we need to check this + // when expand. + bool ZeroCheck = false; + // It is safe to assume Preheader exist as it was checked in + // parent function RunOnLoop. + BasicBlock *PH = CurLoop->getLoopPreheader(); + Value *InitX = PhiX->getIncomingValueForBlock(PH); + // If we check X != 0 before entering the loop we don't need a zero + // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the + // loop (if it is used we count CTLZ(X >> 1)). + if (!IsCntPhiUsedOutsideLoop) + if (BasicBlock *PreCondBB = PH->getSinglePredecessor()) + if (BranchInst *PreCondBr = + dyn_cast(PreCondBB->getTerminator())) { + if (matchCondition(PreCondBr, PH) == InitX) + ZeroCheck = true; + } + + // Check if CTLZ intrinsic is profitable. Assume it is always profitable + // if we delete the loop (the loop has only 6 instructions): + // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ] + // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ] + // %shr = ashr %n.addr.0, 1 + // %tobool = icmp eq %shr, 0 + // %inc = add nsw %i.0, 1 + // br i1 %tobool + + IRBuilder<> Builder(PH->getTerminator()); + SmallVector Ops = + {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()}; + ArrayRef Args(Ops); + if (CurLoop->getHeader()->size() != 6 && + TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) > + TargetTransformInfo::TCC_Basic) + return false; + + const DebugLoc DL = DefX->getDebugLoc(); + transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck, + IsCntPhiUsedOutsideLoop); + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic @@ -1222,6 +1450,134 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, return CI; } +static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, + const DebugLoc &DL, bool ZeroCheck) { + Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()}; + Type *Tys[] = {Val->getType()}; + + Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); + Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys); + CallInst *CI = IRBuilder.CreateCall(Func, Ops); + CI->setDebugLoc(DL); + + return CI; +} + +/// Transform the following loop: +/// loop: +/// CntPhi = PHI [Cnt0, CntInst] +/// PhiX = PHI [InitX, DefX] +/// CntInst = CntPhi + 1 +/// DefX = PhiX >> 1 +// LOOP_BODY +/// Br: loop if (DefX != 0) +/// Use(CntPhi) or Use(CntInst) +/// +/// Into: +/// If CntPhi used outside the loop: +/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1) +/// Count = CountPrev + 1 +/// else +/// Count = BitWidth(InitX) - CTLZ(InitX) +/// loop: +/// CntPhi = PHI [Cnt0, CntInst] +/// PhiX = PHI [InitX, DefX] +/// PhiCount = PHI [Count, Dec] +/// CntInst = CntPhi + 1 +/// DefX = PhiX >> 1 +/// Dec = PhiCount - 1 +/// LOOP_BODY +/// Br: loop if (Dec != 0) +/// Use(CountPrev + Cnt0) // Use(CntPhi) +/// or +/// Use(Count + Cnt0) // Use(CntInst) +/// +/// If LOOP_BODY is empty the loop will be deleted. +/// If CntInst and DefX are not used in LOOP_BODY they will be removed. +void LoopIdiomRecognize::transformLoopToCountable( + BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX, + const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { + BranchInst *PreheaderBr = dyn_cast(Preheader->getTerminator()); + + // Step 1: Insert the CTLZ instruction at the end of the preheader block + // Count = BitWidth - CTLZ(InitX); + // If there are uses of CntPhi create: + // CountPrev = BitWidth - CTLZ(InitX >> 1); + IRBuilder<> Builder(PreheaderBr); + Builder.SetCurrentDebugLocation(DL); + Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; + + if (IsCntPhiUsedOutsideLoop) + InitXNext = Builder.CreateAShr(InitX, + ConstantInt::get(InitX->getType(), 1)); + else + InitXNext = InitX; + CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); + Count = Builder.CreateSub( + ConstantInt::get(CTLZ->getType(), + CTLZ->getType()->getIntegerBitWidth()), + CTLZ); + if (IsCntPhiUsedOutsideLoop) { + CountPrev = Count; + Count = Builder.CreateAdd( + CountPrev, + ConstantInt::get(CountPrev->getType(), 1)); + } + if (IsCntPhiUsedOutsideLoop) + NewCount = Builder.CreateZExtOrTrunc(CountPrev, + cast(CntInst->getType())); + else + NewCount = Builder.CreateZExtOrTrunc(Count, + cast(CntInst->getType())); + + // If the CTLZ counter's initial value is not zero, insert Add Inst. + Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader); + ConstantInt *InitConst = dyn_cast(CntInitVal); + if (!InitConst || !InitConst->isZero()) + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + + // Step 2: Insert new IV and loop condition: + // loop: + // ... + // PhiCount = PHI [Count, Dec] + // ... + // Dec = PhiCount - 1 + // ... + // Br: loop if (Dec != 0) + BasicBlock *Body = *(CurLoop->block_begin()); + auto *LbBr = dyn_cast(Body->getTerminator()); + ICmpInst *LbCond = cast(LbBr->getCondition()); + Type *Ty = Count->getType(); + + PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front()); + + Builder.SetInsertPoint(LbCond); + Instruction *TcDec = cast( + Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1), + "tcdec", false, true)); + + TcPhi->addIncoming(Count, Preheader); + TcPhi->addIncoming(TcDec, Body); + + CmpInst::Predicate Pred = + (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + LbCond->setPredicate(Pred); + LbCond->setOperand(0, TcDec); + LbCond->setOperand(1, ConstantInt::get(Ty, 0)); + + // Step 3: All the references to the original counter outside + // the loop are replaced with the NewCount -- the value returned from + // __builtin_ctlz(x). + if (IsCntPhiUsedOutsideLoop) + CntPhi->replaceUsesOutsideBlock(NewCount, Body); + else + CntInst->replaceUsesOutsideBlock(NewCount, Body); + + // step 4: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + SE->forgetLoop(CurLoop); +} + void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var) { diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopInterchange.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopInterchange.cpp index 9f3875a3027f4..2e0d8e0374c08 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopInterchange.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -323,9 +324,10 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { class LoopInterchangeLegality { public: LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, - LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA) + LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, + OptimizationRemarkEmitter *ORE) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {} + PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, @@ -353,6 +355,8 @@ class LoopInterchangeLegality { LoopInfo *LI; DominatorTree *DT; bool PreserveLCSSA; + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter *ORE; bool InnerLoopHasReduction; }; @@ -361,8 +365,9 @@ class LoopInterchangeLegality { /// loop. class LoopInterchangeProfitability { public: - LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) - : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} + LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + OptimizationRemarkEmitter *ORE) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} /// Check if the loop interchange is profitable. bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, @@ -376,6 +381,8 @@ class LoopInterchangeProfitability { /// Scev analysis. ScalarEvolution *SE; + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter *ORE; }; /// LoopInterchangeTransform interchanges the loop. @@ -422,6 +429,9 @@ struct LoopInterchange : public FunctionPass { DependenceInfo *DI; DominatorTree *DT; bool PreserveLCSSA; + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter *ORE; + LoopInterchange() : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) { initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); @@ -435,6 +445,7 @@ struct LoopInterchange : public FunctionPass { AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); + AU.addRequired(); } bool runOnFunction(Function &F) override { @@ -446,6 +457,7 @@ struct LoopInterchange : public FunctionPass { DI = &getAnalysis().getDI(); auto *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; + ORE = &getAnalysis().getORE(); PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); // Build up a worklist of loop pairs to analyze. @@ -575,18 +587,23 @@ struct LoopInterchange : public FunctionPass { Loop *OuterLoop = LoopList[OuterLoopId]; LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, - PreserveLCSSA); + PreserveLCSSA, ORE); if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); return false; } DEBUG(dbgs() << "Loops are legal to interchange\n"); - LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE); + LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { DEBUG(dbgs() << "Interchanging loops not profitable\n"); return false; } + ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Loop interchanged with enclosing loop."); + LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit, LIL.hasInnerLoopReduction()); LIT.transform(); @@ -757,13 +774,28 @@ bool LoopInterchangeLegality::currentLimitations() { PHINode *InnerInductionVar; SmallVector Inductions; SmallVector Reductions; - if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) + if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { + DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes " + << "are supported currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedPHIInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with induction or reduction PHI nodes can be" + " interchange currently."); return true; + } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { DEBUG(dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "MultiInductionInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with 1 induction variable can be " + "interchanged currently."); return true; } if (Reductions.size() > 0) @@ -771,32 +803,80 @@ bool LoopInterchangeLegality::currentLimitations() { InnerInductionVar = Inductions.pop_back_val(); Reductions.clear(); - if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) + if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { + DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes " + << "are supported currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with induction or reduction PHI nodes can be" + " interchanged currently."); return true; + } // Outer loop cannot have reduction because then loops will not be tightly // nested. - if (!Reductions.empty()) + if (!Reductions.empty()) { + DEBUG(dbgs() << "Outer loops with reductions are not supported " + << "currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "ReductionsOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Outer loops with reductions cannot be interchangeed " + "currently."); return true; + } // TODO: Currently we handle only loops with 1 induction variable. - if (Inductions.size() != 1) + if (Inductions.size() != 1) { + DEBUG(dbgs() << "Loops with more than 1 induction variables are not " + << "supported currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "MultiIndutionOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with 1 induction variable can be " + "interchanged currently."); return true; + } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { DEBUG(dbgs() << "Loop structure not understood by pass\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedStructureInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Inner loop structure not understood currently."); return true; } // TODO: We only handle LCSSA PHI's corresponding to reduction for now. BasicBlock *LoopExitBlock = getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); - if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) + if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) { + DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoLCSSAPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with LCSSA PHIs can be interchange " + "currently."); return true; + } LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); - if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) + if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) { + DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoLCSSAPHIOuterInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with LCSSA PHIs can be interchange " + "currently."); return true; + } // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have @@ -816,8 +896,16 @@ bool LoopInterchangeLegality::currentLimitations() { InnerIndexVarInc = dyn_cast(InnerInductionVar->getIncomingValue(0)); - if (!InnerIndexVarInc) + if (!InnerIndexVarInc) { + DEBUG(dbgs() << "Did not find an instruction to increment the induction " + << "variable.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoIncrementInInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "The inner loop does not increment the induction variable."); return true; + } // Since we split the inner loop latch on this induction variable. Make sure // we do not have any instruction between the induction variable and branch @@ -827,19 +915,35 @@ bool LoopInterchangeLegality::currentLimitations() { for (const Instruction &I : reverse(*InnerLoopLatch)) { if (isa(I) || isa(I) || isa(I)) continue; + // We found an instruction. If this is not induction variable then it is not // safe to split this loop latch. - if (!I.isIdenticalTo(InnerIndexVarInc)) + if (!I.isIdenticalTo(InnerIndexVarInc)) { + DEBUG(dbgs() << "Found unsupported instructions between induction " + << "variable increment and branch.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedInsBetweenInduction", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Found unsupported instruction between induction variable " + "increment and branch."); return true; + } FoundInduction = true; break; } // The loop latch ended and we didn't find the induction variable return as // current limitation. - if (!FoundInduction) + if (!FoundInduction) { + DEBUG(dbgs() << "Did not find the induction variable.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoIndutionVariable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Did not find the induction variable."); return true; - + } return false; } @@ -851,6 +955,11 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "Dependence", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops due to dependences."); return false; } @@ -886,6 +995,12 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, // Check if the loops are tightly nested. if (!tightlyNested(OuterLoop, InnerLoop)) { DEBUG(dbgs() << "Loops not tightly nested\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NotTightlyNested", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops because they are not tightly " + "nested."); return false; } @@ -981,9 +1096,18 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, // It is not profitable as per current cache profitability model. But check if // we can move this loop outside to improve parallelism. - bool ImprovesPar = - isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix); - return ImprovesPar; + if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) + return true; + + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "InterchangeNotProfitable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Interchanging loops is too costly (cost=" + << ore::NV("Cost", Cost) << ", threshold=" + << ore::NV("Threshold", LoopInterchangeCostThreshold) << + ") and it does not improve parallelism."); + return false; } void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, @@ -1267,6 +1391,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopPredication.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopPredication.cpp index 0ce6044293261..9b12ba180444b 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopPredication.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopPredication.cpp @@ -37,7 +37,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" -#include "llvm/Pass.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -48,6 +47,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -58,12 +58,30 @@ using namespace llvm; namespace { class LoopPredication { + /// Represents an induction variable check: + /// icmp Pred, , + struct LoopICmp { + ICmpInst::Predicate Pred; + const SCEVAddRecExpr *IV; + const SCEV *Limit; + LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, + const SCEV *Limit) + : Pred(Pred), IV(IV), Limit(Limit) {} + LoopICmp() {} + }; + ScalarEvolution *SE; Loop *L; const DataLayout *DL; BasicBlock *Preheader; + Optional parseLoopICmp(ICmpInst *ICI); + + Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + Instruction *InsertAt); + Optional widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, IRBuilder<> &Builder); bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); @@ -116,16 +134,10 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } -/// If ICI can be widened to a loop invariant condition emits the loop -/// invariant condition in the loop preheader and return it, otherwise -/// returns None. -Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, - SCEVExpander &Expander, - IRBuilder<> &Builder) { - DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); - DEBUG(ICI->dump()); - +Optional +LoopPredication::parseLoopICmp(ICmpInst *ICI) { ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); const SCEV *LHSS = SE->getSCEV(LHS); @@ -135,17 +147,54 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, if (isa(RHSS)) return None; - // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index + // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV if (SE->isLoopInvariant(LHSS, L)) { std::swap(LHS, RHS); std::swap(LHSS, RHSS); Pred = ICmpInst::getSwappedPredicate(Pred); } - if (!SE->isLoopInvariant(RHSS, L) || !isSafeToExpand(RHSS, *SE)) + + const SCEVAddRecExpr *AR = dyn_cast(LHSS); + if (!AR || AR->getLoop() != L) return None; - const SCEVAddRecExpr *IndexAR = dyn_cast(LHSS); - if (!IndexAR || IndexAR->getLoop() != L) + return LoopICmp(Pred, AR, RHSS); +} + +Value *LoopPredication::expandCheck(SCEVExpander &Expander, + IRBuilder<> &Builder, + ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, Instruction *InsertAt) { + Type *Ty = LHS->getType(); + assert(Ty == RHS->getType() && "expandCheck operands have different types?"); + Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); + Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt); + return Builder.CreateICmp(Pred, LHSV, RHSV); +} + +/// If ICI can be widened to a loop invariant condition emits the loop +/// invariant condition in the loop preheader and return it, otherwise +/// returns None. +Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, + SCEVExpander &Expander, + IRBuilder<> &Builder) { + DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); + DEBUG(ICI->dump()); + + auto RangeCheck = parseLoopICmp(ICI); + if (!RangeCheck) { + DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); + return None; + } + + ICmpInst::Predicate Pred = RangeCheck->Pred; + const SCEVAddRecExpr *IndexAR = RangeCheck->IV; + const SCEV *RHSS = RangeCheck->Limit; + + auto CanExpand = [this](const SCEV *S) { + return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); + }; + if (!CanExpand(RHSS)) return None; DEBUG(dbgs() << "IndexAR: "); @@ -170,17 +219,13 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, DEBUG(dbgs() << "NewLHSS: "); DEBUG(NewLHSS->dump()); - if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE)) + if (!CanExpand(NewLHSS)) return None; DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n"); - Type *Ty = LHS->getType(); Instruction *InsertAt = Preheader->getTerminator(); - assert(Ty == RHS->getType() && "icmp operands have different types?"); - Value *NewLHS = Expander.expandCodeFor(NewLHSS, Ty, InsertAt); - Value *NewRHS = Expander.expandCodeFor(RHSS, Ty, InsertAt); - return Builder.CreateICmp(Pred, NewLHS, NewRHS); + return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt); } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, @@ -272,6 +317,9 @@ bool LoopPredication::runOnLoop(Loop *Loop) { if (II->getIntrinsicID() == Intrinsic::experimental_guard) Guards.push_back(II); + if (Guards.empty()) + return false; + SCEVExpander Expander(*SE, *DL, "loop-predication"); bool Changed = false; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopRerollPass.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopRerollPass.cpp index fd15a9014def5..fc0216e76a5bb 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -11,10 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -31,6 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopRotation.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopRotation.cpp index 2ba9265566a8c..3506ac343d594 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopRotation.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopRotation.cpp @@ -347,7 +347,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // in the map. ValueMap[Inst] = V; if (!C->mayHaveSideEffects()) { - delete C; + C->deleteValue(); C = nullptr; } } else { @@ -485,10 +485,22 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { DomTreeNode *Node = HeaderChildren[I]; BasicBlock *BB = Node->getBlock(); - pred_iterator PI = pred_begin(BB); - BasicBlock *NearestDom = *PI; - for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) - NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + BasicBlock *NearestDom = nullptr; + for (BasicBlock *Pred : predecessors(BB)) { + // Consider only reachable basic blocks. + if (!DT->getNode(Pred)) + continue; + + if (!NearestDom) { + NearestDom = Pred; + continue; + } + + NearestDom = DT->findNearestCommonDominator(NearestDom, Pred); + assert(NearestDom && "No NearestCommonDominator found"); + } + + assert(NearestDom && "Nearest dominator not found"); // Remember if this changes the DomTree. if (Node->getIDom()->getBlock() != NearestDom) { diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopStrengthReduce.cpp index ccedb98d7fa15..3638da118cb7e 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -140,6 +140,13 @@ static cl::opt LSRExpNarrow( cl::desc("Narrow LSR complex solution using" " expectation of registers number")); +// Flag to narrow search space by filtering non-optimal formulae with +// the same ScaledReg and Scale. +static cl::opt FilterSameScaledReg( + "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), + cl::desc("Narrow LSR search space by filtering non-optimal formulae" + " with the same ScaledReg and Scale")); + #ifndef NDEBUG // Stress test IV chain generation. static cl::opt StressIVChain( @@ -950,39 +957,37 @@ namespace { /// This class is used to measure and compare candidate formulae. class Cost { - /// TODO: Some of these could be merged. Also, a lexical ordering - /// isn't always optimal. - unsigned Insns; - unsigned NumRegs; - unsigned AddRecCost; - unsigned NumIVMuls; - unsigned NumBaseAdds; - unsigned ImmCost; - unsigned SetupCost; - unsigned ScaleCost; + TargetTransformInfo::LSRCost C; public: - Cost() - : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), - ImmCost(0), SetupCost(0), ScaleCost(0) {} + Cost() { + C.Insns = 0; + C.NumRegs = 0; + C.AddRecCost = 0; + C.NumIVMuls = 0; + C.NumBaseAdds = 0; + C.ImmCost = 0; + C.SetupCost = 0; + C.ScaleCost = 0; + } - bool operator<(const Cost &Other) const; + bool isLess(Cost &Other, const TargetTransformInfo &TTI); void Lose(); #ifndef NDEBUG // Once any of the metrics loses, they must all remain losers. bool isValid() { - return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds - | ImmCost | SetupCost | ScaleCost) != ~0u) - || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds - & ImmCost & SetupCost & ScaleCost) == ~0u); + return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds + | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u) + || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds + & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u); } #endif bool isLoser() { assert(isValid() && "invalid cost"); - return NumRegs == ~0u; + return C.NumRegs == ~0u; } void RateFormula(const TargetTransformInfo &TTI, @@ -1170,10 +1175,10 @@ void Cost::RateRegister(const SCEV *Reg, } // Otherwise, it will be an invariant with respect to Loop L. - ++NumRegs; + ++C.NumRegs; return; } - AddRecCost += 1; /// TODO: This should be a function of the stride. + C.AddRecCost += 1; /// TODO: This should be a function of the stride. // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. @@ -1185,7 +1190,7 @@ void Cost::RateRegister(const SCEV *Reg, } } } - ++NumRegs; + ++C.NumRegs; // Rough heuristic; favor registers which don't require extra setup // instructions in the preheader. @@ -1194,9 +1199,9 @@ void Cost::RateRegister(const SCEV *Reg, !(isa(Reg) && (isa(cast(Reg)->getStart()) || isa(cast(Reg)->getStart())))) - ++SetupCost; + ++C.SetupCost; - NumIVMuls += isa(Reg) && + C.NumIVMuls += isa(Reg) && SE.hasComputableLoopEvolution(Reg, L); } @@ -1229,9 +1234,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, SmallPtrSetImpl *LoserRegs) { assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); // Tally up the registers. - unsigned PrevAddRecCost = AddRecCost; - unsigned PrevNumRegs = NumRegs; - unsigned PrevNumBaseAdds = NumBaseAdds; + unsigned PrevAddRecCost = C.AddRecCost; + unsigned PrevNumRegs = C.NumRegs; + unsigned PrevNumBaseAdds = C.NumBaseAdds; if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { Lose(); @@ -1251,45 +1256,51 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, return; } - // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as - // additional instruction (at least fill). - unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; - if (NumRegs > TTIRegNum) { - // Cost already exceeded TTIRegNum, then only newly added register can add - // new instructions. - if (PrevNumRegs > TTIRegNum) - Insns += (NumRegs - PrevNumRegs); - else - Insns += (NumRegs - TTIRegNum); - } - // Determine how many (unfolded) adds we'll need inside the loop. size_t NumBaseParts = F.getNumRegs(); if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += + C.NumBaseAdds += NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); - NumBaseAdds += (F.UnfoldedOffset != 0); + C.NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. - ScaleCost += getScalingFactorCost(TTI, LU, F, *L); + C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L); // Tally up the non-zero immediates. for (const LSRFixup &Fixup : LU.Fixups) { int64_t O = Fixup.Offset; int64_t Offset = (uint64_t)O + F.BaseOffset; if (F.BaseGV) - ImmCost += 64; // Handle symbolic values conservatively. + C.ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. else if (Offset != 0) - ImmCost += APInt(64, Offset, true).getMinSignedBits(); + C.ImmCost += APInt(64, Offset, true).getMinSignedBits(); // Check with target if this offset with this instruction is // specifically not supported. if ((isa(Fixup.UserInst) || isa(Fixup.UserInst)) && !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) - NumBaseAdds++; + C.NumBaseAdds++; + } + + // If we don't count instruction cost exit here. + if (!InsnsCost) { + assert(isValid() && "invalid cost"); + return; + } + + // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as + // additional instruction (at least fill). + unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; + if (C.NumRegs > TTIRegNum) { + // Cost already exceeded TTIRegNum, then only newly added register can add + // new instructions. + if (PrevNumRegs > TTIRegNum) + C.Insns += (C.NumRegs - PrevNumRegs); + else + C.Insns += (C.NumRegs - TTIRegNum); } // If ICmpZero formula ends with not 0, it could not be replaced by @@ -1302,55 +1313,54 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, // For {-10, +, 1}: // i = i + 1; if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd()) - Insns++; + C.Insns++; // Each new AddRec adds 1 instruction to calculation. - Insns += (AddRecCost - PrevAddRecCost); + C.Insns += (C.AddRecCost - PrevAddRecCost); // BaseAdds adds instructions for unfolded registers. if (LU.Kind != LSRUse::ICmpZero) - Insns += NumBaseAdds - PrevNumBaseAdds; + C.Insns += C.NumBaseAdds - PrevNumBaseAdds; assert(isValid() && "invalid cost"); } /// Set this cost to a losing value. void Cost::Lose() { - Insns = ~0u; - NumRegs = ~0u; - AddRecCost = ~0u; - NumIVMuls = ~0u; - NumBaseAdds = ~0u; - ImmCost = ~0u; - SetupCost = ~0u; - ScaleCost = ~0u; + C.Insns = ~0u; + C.NumRegs = ~0u; + C.AddRecCost = ~0u; + C.NumIVMuls = ~0u; + C.NumBaseAdds = ~0u; + C.ImmCost = ~0u; + C.SetupCost = ~0u; + C.ScaleCost = ~0u; } /// Choose the lower cost. -bool Cost::operator<(const Cost &Other) const { - if (InsnsCost && Insns != Other.Insns) - return Insns < Other.Insns; - return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost, - ImmCost, SetupCost) < - std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls, - Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost, - Other.SetupCost); +bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) { + if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && + C.Insns != Other.C.Insns) + return C.Insns < Other.C.Insns; + return TTI.isLSRCostLess(C, Other.C); } void Cost::print(raw_ostream &OS) const { - OS << Insns << " instruction" << (Insns == 1 ? " " : "s "); - OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s"); - if (AddRecCost != 0) - OS << ", with addrec cost " << AddRecCost; - if (NumIVMuls != 0) - OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s"); - if (NumBaseAdds != 0) - OS << ", plus " << NumBaseAdds << " base add" - << (NumBaseAdds == 1 ? "" : "s"); - if (ScaleCost != 0) - OS << ", plus " << ScaleCost << " scale cost"; - if (ImmCost != 0) - OS << ", plus " << ImmCost << " imm cost"; - if (SetupCost != 0) - OS << ", plus " << SetupCost << " setup cost"; + if (InsnsCost) + OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); + OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); + if (C.AddRecCost != 0) + OS << ", with addrec cost " << C.AddRecCost; + if (C.NumIVMuls != 0) + OS << ", plus " << C.NumIVMuls << " IV mul" + << (C.NumIVMuls == 1 ? "" : "s"); + if (C.NumBaseAdds != 0) + OS << ", plus " << C.NumBaseAdds << " base add" + << (C.NumBaseAdds == 1 ? "" : "s"); + if (C.ScaleCost != 0) + OS << ", plus " << C.ScaleCost << " scale cost"; + if (C.ImmCost != 0) + OS << ", plus " << C.ImmCost << " imm cost"; + if (C.SetupCost != 0) + OS << ", plus " << C.SetupCost << " setup cost"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1899,6 +1909,7 @@ class LSRInstance { void NarrowSearchSpaceByDetectingSupersets(); void NarrowSearchSpaceByCollapsingUnrolledCode(); void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); void NarrowSearchSpaceByDeletingCostlyFormulas(); void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); @@ -2315,7 +2326,7 @@ LSRInstance::OptimizeLoopTermCond() { dyn_cast_or_null(getExactSDiv(B, A, SE))) { const ConstantInt *C = D->getValue(); // Stride of one or negative one can have reuse with non-addresses. - if (C->isOne() || C->isAllOnesValue()) + if (C->isOne() || C->isMinusOne()) goto decline_post_inc; // Avoid weird situations. if (C->getValue().getMinSignedBits() >= 64 || @@ -3805,6 +3816,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) continue; + F.canonicalize(*L); (void)InsertFormula(LU, LUIdx, F); } } @@ -3902,8 +3914,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // Compute the difference between the two. int64_t Imm = (uint64_t)JImm - M->first; - for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1; - LUIdx = UsedByIndices.find_next(LUIdx)) + for (unsigned LUIdx : UsedByIndices.set_bits()) // Make a memo of this use, offset, and register tuple. if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second) WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); @@ -4105,7 +4116,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Cost CostBest; Regs.clear(); CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU); - if (CostF < CostBest) + if (CostF.isLess(CostBest, TTI)) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -4303,6 +4314,104 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ } } +/// If a LSRUse has multiple formulae with the same ScaledReg and Scale. +/// Pick the best one and delete the others. +/// This narrowing heuristic is to keep as many formulae with different +/// Scale and ScaledReg pair as possible while narrowing the search space. +/// The benefit is that it is more likely to find out a better solution +/// from a formulae set with more Scale and ScaledReg variations than +/// a formulae set with the same Scale and ScaledReg. The picking winner +/// reg heurstic will often keep the formulae with the same Scale and +/// ScaledReg and filter others, and we want to avoid that if possible. +void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; + + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by choosing the best Formula " + "from the Formulae with the same Scale and ScaledReg.\n"); + + // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. + typedef DenseMap, size_t> BestFormulaeTy; + BestFormulaeTy BestFormulae; +#ifndef NDEBUG + bool ChangedFormulae = false; +#endif + DenseSet VisitedRegs; + SmallPtrSet Regs; + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); + + // Return true if Formula FA is better than Formula FB. + auto IsBetterThan = [&](Formula &FA, Formula &FB) { + // First we will try to choose the Formula with fewer new registers. + // For a register used by current Formula, the more the register is + // shared among LSRUses, the less we increase the register number + // counter of the formula. + size_t FARegNum = 0; + for (const SCEV *Reg : FA.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FARegNum += (NumUses - UsedByIndices.count() + 1); + } + size_t FBRegNum = 0; + for (const SCEV *Reg : FB.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FBRegNum += (NumUses - UsedByIndices.count() + 1); + } + if (FARegNum != FBRegNum) + return FARegNum < FBRegNum; + + // If the new register numbers are the same, choose the Formula with + // less Cost. + Cost CostFA, CostFB; + Regs.clear(); + CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU); + Regs.clear(); + CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU); + return CostFA.isLess(CostFB, TTI); + }; + + bool Any = false; + for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; + ++FIdx) { + Formula &F = LU.Formulae[FIdx]; + if (!F.ScaledReg) + continue; + auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx}); + if (P.second) + continue; + + Formula &Best = LU.Formulae[P.first->second]; + if (IsBetterThan(F, Best)) + std::swap(F, Best); + DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); + dbgs() << "\n" + " in favor of formula "; + Best.print(dbgs()); dbgs() << '\n'); +#ifndef NDEBUG + ChangedFormulae = true; +#endif + LU.DeleteFormula(F); + --FIdx; + --NumForms; + Any = true; + } + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); + + // Reset this to prepare for the next use. + BestFormulae.clear(); + } + + DEBUG(if (ChangedFormulae) { + dbgs() << "\n" + "After filtering out undesirable candidates:\n"; + print_uses(dbgs()); + }); +} + /// The function delete formulas with high registers number expectation. /// Assuming we don't know the value of each formula (already delete /// all inefficient), generate probability of not selecting for each @@ -4513,6 +4622,8 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { NarrowSearchSpaceByDetectingSupersets(); NarrowSearchSpaceByCollapsingUnrolledCode(); NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + if (FilterSameScaledReg) + NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); if (LSRExpNarrow) NarrowSearchSpaceByDeletingCostlyFormulas(); else @@ -4573,7 +4684,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, NewCost = CurCost; NewRegs = CurRegs; NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU); - if (NewCost < SolutionCost) { + if (NewCost.isLess(SolutionCost, TTI)) { Workspace.push_back(&F); if (Workspace.size() != Uses.size()) { SolveRecurse(Solution, SolutionCost, Workspace, NewCost, diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnrollPass.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnrollPass.cpp index 62aa6ee48069d..530a68424d5cb 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -131,7 +131,7 @@ static const unsigned NoThreshold = UINT_MAX; /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( - Loop *L, const TargetTransformInfo &TTI, int OptLevel, + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, Optional UserUpperBound) { @@ -158,7 +158,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.AllowPeeling = true; // Override with any target specific settings - TTI.getUnrollingPreferences(L, UP); + TTI.getUnrollingPreferences(L, SE, UP); // Apply size attributes if (L->getHeader()->getParent()->optForSize()) { @@ -699,7 +699,7 @@ static uint64_t getUnrolledLoopSize( // Calculates unroll count and writes it to UP.Count. static bool computeUnrollCount( Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount, + ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { // Check for explicit Count. @@ -770,7 +770,7 @@ static bool computeUnrollCount( // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. if (Optional Cost = analyzeLoopUnrollCost( - L, FullUnrollTripCount, DT, *SE, TTI, + L, FullUnrollTripCount, DT, SE, TTI, UP.Threshold * UP.MaxPercentThresholdBoost / 100)) { unsigned Boost = getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); @@ -836,6 +836,8 @@ static bool computeUnrollCount( } else { UP.Count = TripCount; } + if (UP.Count > UP.MaxCount) + UP.Count = UP.MaxCount; if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && UP.Count != TripCount) ORE->emit( @@ -926,7 +928,7 @@ static bool computeUnrollCount( } static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE, const TargetTransformInfo &TTI, + ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, Optional ProvidedCount, @@ -948,8 +950,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, bool NotDuplicatable; bool Convergent; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound); + L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount, + ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); // Exit early if unrolling is disabled. if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) return false; @@ -977,8 +979,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) ExitingBlock = L->getExitingBlock(); if (ExitingBlock) { - TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + TripCount = SE.getSmallConstantTripCount(L, ExitingBlock); + TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); } // If the loop contains a convergent operation, the prelude we'd add @@ -1000,8 +1002,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, // count. bool MaxOrZero = false; if (!TripCount) { - MaxTripCount = SE->getSmallConstantMaxTripCount(L); - MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); + MaxTripCount = SE.getSmallConstantMaxTripCount(L); + MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L); // We can unroll by the upper bound amount if it's generally allowed or if // we know that the loop is executed either the upper bound or zero times. // (MaxOrZero unrolling keeps only the first loop test, so the number of @@ -1030,7 +1032,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, // Unroll the loop. if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero, - TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE, + TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE, PreserveLCSSA)) return false; @@ -1073,7 +1075,7 @@ class LoopUnroll : public LoopPass { auto &DT = getAnalysis().getDomTree(); LoopInfo *LI = &getAnalysis().getLoopInfo(); - ScalarEvolution *SE = &getAnalysis().getSE(); + ScalarEvolution &SE = getAnalysis().getSE(); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); auto &AC = getAnalysis().getAssumptionCache(F); @@ -1157,7 +1159,7 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, if (!AllowPartialUnrolling) AllowPartialParam = RuntimeParam = UpperBoundParam = false; bool Changed = tryToUnrollLoop( - &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE, + &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam); if (!Changed) diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnswitch.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnswitch.cpp index 6ef1464e9338e..d0c96fa627a47 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -26,34 +26,34 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -831,7 +831,12 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, /// mapping the blocks with the specified map. static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM) { - Loop &New = LPM->addLoop(PL); + Loop &New = *new Loop(); + if (PL) + PL->addChildLoop(&New); + else + LI->addTopLevelLoop(&New); + LPM->addLoop(New); // Add all of the blocks in L to the new loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LowerAtomic.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LowerAtomic.cpp index 08e60b16bedff..6f77c5bd0d079 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LowerAtomic.cpp @@ -155,8 +155,7 @@ class LowerAtomicLegacyPass : public FunctionPass { } bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; + // Don't skip optnone functions; atomics still need to be lowered. FunctionAnalysisManager DummyFAM; auto PA = Impl.run(F, DummyFAM); return !PA.areAllPreserved(); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index a143b9a3c645f..46f8a35642656 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -83,6 +84,151 @@ static bool handleSwitchExpect(SwitchInst &SI) { return true; } +/// Handler for PHINodes that define the value argument to an +/// @llvm.expect call. +/// +/// If the operand of the phi has a constant value and it 'contradicts' +/// with the expected value of phi def, then the corresponding incoming +/// edge of the phi is unlikely to be taken. Using that information, +/// the branch probability info for the originating branch can be inferred. +static void handlePhiDef(CallInst *Expect) { + Value &Arg = *Expect->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast(Expect->getArgOperand(1)); + if (!ExpectedValue) + return; + const APInt &ExpectedPhiValue = ExpectedValue->getValue(); + + // Walk up in backward a list of instructions that + // have 'copy' semantics by 'stripping' the copies + // until a PHI node or an instruction of unknown kind + // is reached. Negation via xor is also handled. + // + // C = PHI(...); + // B = C; + // A = B; + // D = __builtin_expect(A, 0); + // + Value *V = &Arg; + SmallVector Operations; + while (!isa(V)) { + if (ZExtInst *ZExt = dyn_cast(V)) { + V = ZExt->getOperand(0); + Operations.push_back(ZExt); + continue; + } + + if (SExtInst *SExt = dyn_cast(V)) { + V = SExt->getOperand(0); + Operations.push_back(SExt); + continue; + } + + BinaryOperator *BinOp = dyn_cast(V); + if (!BinOp || BinOp->getOpcode() != Instruction::Xor) + return; + + ConstantInt *CInt = dyn_cast(BinOp->getOperand(1)); + if (!CInt) + return; + + V = BinOp->getOperand(0); + Operations.push_back(BinOp); + } + + // Executes the recorded operations on input 'Value'. + auto ApplyOperations = [&](const APInt &Value) { + APInt Result = Value; + for (auto Op : llvm::reverse(Operations)) { + switch (Op->getOpcode()) { + case Instruction::Xor: + Result ^= cast(Op->getOperand(1))->getValue(); + break; + case Instruction::ZExt: + Result = Result.zext(Op->getType()->getIntegerBitWidth()); + break; + case Instruction::SExt: + Result = Result.sext(Op->getType()->getIntegerBitWidth()); + break; + default: + llvm_unreachable("Unexpected operation"); + } + } + return Result; + }; + + auto *PhiDef = dyn_cast(V); + + // Get the first dominating conditional branch of the operand + // i's incoming block. + auto GetDomConditional = [&](unsigned i) -> BranchInst * { + BasicBlock *BB = PhiDef->getIncomingBlock(i); + BranchInst *BI = dyn_cast(BB->getTerminator()); + if (BI && BI->isConditional()) + return BI; + BB = BB->getSinglePredecessor(); + if (!BB) + return nullptr; + BI = dyn_cast(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + return nullptr; + return BI; + }; + + // Now walk through all Phi operands to find phi oprerands with values + // conflicting with the expected phi output value. Any such operand + // indicates the incoming edge to that operand is unlikely. + for (unsigned i = 0, e = PhiDef->getNumIncomingValues(); i != e; ++i) { + + Value *PhiOpnd = PhiDef->getIncomingValue(i); + ConstantInt *CI = dyn_cast(PhiOpnd); + if (!CI) + continue; + + // Not an interesting case when IsUnlikely is false -- we can not infer + // anything useful when the operand value matches the expected phi + // output. + if (ExpectedPhiValue == ApplyOperations(CI->getValue())) + continue; + + BranchInst *BI = GetDomConditional(i); + if (!BI) + continue; + + MDBuilder MDB(PhiDef->getContext()); + + // There are two situations in which an operand of the PhiDef comes + // from a given successor of a branch instruction BI. + // 1) When the incoming block of the operand is the successor block; + // 2) When the incoming block is BI's enclosing block and the + // successor is the PhiDef's enclosing block. + // + // Returns true if the operand which comes from OpndIncomingBB + // comes from outgoing edge of BI that leads to Succ block. + auto *OpndIncomingBB = PhiDef->getIncomingBlock(i); + auto IsOpndComingFromSuccessor = [&](BasicBlock *Succ) { + if (OpndIncomingBB == Succ) + // If this successor is the incoming block for this + // Phi operand, then this successor does lead to the Phi. + return true; + if (OpndIncomingBB == BI->getParent() && Succ == PhiDef->getParent()) + // Otherwise, if the edge is directly from the branch + // to the Phi, this successor is the one feeding this + // Phi operand. + return true; + return false; + }; + + if (IsOpndComingFromSuccessor(BI->getSuccessor(1))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight)); + else if (IsOpndComingFromSuccessor(BI->getSuccessor(0))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight)); + } +} + // Handle both BranchInst and SelectInst. template static bool handleBrSelExpect(BrSelInst &BSI) { @@ -98,10 +244,18 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { CallInst *CI; ICmpInst *CmpI = dyn_cast(BSI.getCondition()); + CmpInst::Predicate Predicate; + ConstantInt *CmpConstOperand = nullptr; if (!CmpI) { CI = dyn_cast(BSI.getCondition()); + Predicate = CmpInst::ICMP_NE; } else { - if (CmpI->getPredicate() != CmpInst::ICMP_NE) + Predicate = CmpI->getPredicate(); + if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ) + return false; + + CmpConstOperand = dyn_cast(CmpI->getOperand(1)); + if (!CmpConstOperand) return false; CI = dyn_cast(CmpI->getOperand(0)); } @@ -109,6 +263,13 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { if (!CI) return false; + uint64_t ValueComparedTo = 0; + if (CmpConstOperand) { + if (CmpConstOperand->getBitWidth() > 64) + return false; + ValueComparedTo = CmpConstOperand->getZExtValue(); + } + Function *Fn = CI->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) return false; @@ -121,9 +282,8 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { MDBuilder MDB(CI->getContext()); MDNode *Node; - // If expect value is equal to 1 it means that we are more likely to take - // branch 0, in other case more likely is branch 1. - if (ExpectedValue->isOne()) + if ((ExpectedValue->getZExtValue() == ValueComparedTo) == + (Predicate == CmpInst::ICMP_EQ)) Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); else Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); @@ -173,6 +333,10 @@ static bool lowerExpectIntrinsic(Function &F) { Function *Fn = CI->getCalledFunction(); if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + // Before erasing the llvm.expect, walk backward to find + // phi that define llvm.expect's first arg, and + // infer branch probability: + handlePhiDef(CI); Value *Exp = CI->getArgOperand(0); CI->replaceAllUsesWith(Exp); CI->eraseFromParent(); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp index 4f413715ffe68..070114a84cc50 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp @@ -17,10 +17,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 21a632073da7a..7896396f0898d 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -12,11 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -31,12 +32,12 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -49,7 +50,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Utils/Local.h" #include #include diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index acd3ef6791bed..6727cf0179c18 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -238,7 +238,7 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, &BB->front()); NewPN->addIncoming(Opd1, S0->getParent()); NewPN->addIncoming(Opd2, S1->getParent()); - if (MD && NewPN->getType()->getScalarType()->isPointerTy()) + if (MD && NewPN->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(NewPN); return NewPN; } diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/NewGVN.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/NewGVN.cpp index a8f399d49cce2..9d018563618ea 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/NewGVN.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/NewGVN.cpp @@ -30,9 +30,19 @@ /// tracks what operations have a given value number (IE it also tracks the /// reverse mapping from value number -> operations with that value number), so /// that it only needs to reprocess the instructions that are affected when -/// something's value number changes. The rest of the algorithm is devoted to -/// performing symbolic evaluation, forward propagation, and simplification of -/// operations based on the value numbers deduced so far. +/// something's value number changes. The vast majority of complexity and code +/// in this file is devoted to tracking what value numbers could change for what +/// instructions when various things happen. The rest of the algorithm is +/// devoted to performing symbolic evaluation, forward propagation, and +/// simplification of operations based on the value numbers deduced so far +/// +/// In order to make the GVN mostly-complete, we use a technique derived from +/// "Detection of Redundant Expressions: A Complete and Polynomial-time +/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA +/// based GVN algorithms is related to their inability to detect equivalence +/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). +/// We resolve this issue by generating the equivalent "phi of ops" form for +/// each op of phis we see, in a way that only takes polynomial time to resolve. /// /// We also do not perform elimination by using any published algorithm. All /// published algorithms are O(Instructions). Instead, we use a technique that @@ -51,7 +61,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -104,12 +113,14 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); -STATISTIC(NumGVNNotMostDominatingLeader, - "Number of times a member dominated it's new classes' leader"); STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); +STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created"); +STATISTIC(NumGVNPHIOfOpsEliminations, + "Number of things eliminated using PHI of ops"); DEBUG_COUNTER(VNCounter, "newgvn-vn", "Controls which instructions are value numbered") - +DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi", + "Controls which instructions we create phi of ops for") // Currently store defining access refinement is too slow due to basicaa being // egregiously slow. This flag lets us keep it working while we work on this // issue. @@ -172,10 +183,9 @@ struct TarjanSCC { } } // See if we really were the root of a component, by seeing if we still have - // our DFSNumber. - // If we do, we are the root of the component, and we have completed a - // component. If we do not, - // we are not the root of a component, and belong on the component stack. + // our DFSNumber. If we do, we are the root of the component, and we have + // completed a component. If we do not, we are not the root of a component, + // and belong on the component stack. if (Root.lookup(I) == OurDFS) { unsigned ComponentID = Components.size(); Components.resize(Components.size() + 1); @@ -283,7 +293,6 @@ class CongruenceClass { // Forward propagation info const Expression *getDefiningExpr() const { return DefiningExpr; } - void setDefiningExpr(const Expression *E) { DefiningExpr = E; } // Value member set bool empty() const { return Members.empty(); } @@ -369,6 +378,15 @@ class CongruenceClass { }; namespace llvm { +struct ExactEqualsExpression { + const Expression &E; + explicit ExactEqualsExpression(const Expression &E) : E(E) {} + hash_code getComputedHash() const { return E.getComputedHash(); } + bool operator==(const Expression &Other) const { + return E.exactlyEquals(Other); + } +}; + template <> struct DenseMapInfo { static const Expression *getEmptyKey() { auto Val = static_cast(-1); @@ -380,15 +398,30 @@ template <> struct DenseMapInfo { Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } - static unsigned getHashValue(const Expression *V) { - return static_cast(V->getHashValue()); + static unsigned getHashValue(const Expression *E) { + return E->getComputedHash(); + } + static unsigned getHashValue(const ExactEqualsExpression &E) { + return E.getComputedHash(); } + static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) { + if (RHS == getTombstoneKey() || RHS == getEmptyKey()) + return false; + return LHS == *RHS; + } + static bool isEqual(const Expression *LHS, const Expression *RHS) { if (LHS == RHS) return true; if (LHS == getTombstoneKey() || RHS == getTombstoneKey() || LHS == getEmptyKey() || RHS == getEmptyKey()) return false; + // Compare hashes before equality. This is *not* what the hashtable does, + // since it is computing it modulo the number of buckets, whereas we are + // using the full hash keyspace. Since the hashes are precomputed, this + // check is *much* faster than equality. + if (LHS->getComputedHash() != RHS->getComputedHash()) + return false; return *LHS == *RHS; } }; @@ -431,6 +464,33 @@ class NewGVN { // Value Mappings. DenseMap ValueToClass; DenseMap ValueToExpression; + // Value PHI handling, used to make equivalence between phi(op, op) and + // op(phi, phi). + // These mappings just store various data that would normally be part of the + // IR. + DenseSet PHINodeUses; + // Map a temporary instruction we created to a parent block. + DenseMap TempToBlock; + // Map between the temporary phis we created and the real instructions they + // are known equivalent to. + DenseMap RealToTemp; + // In order to know when we should re-process instructions that have + // phi-of-ops, we track the set of expressions that they needed as + // leaders. When we discover new leaders for those expressions, we process the + // associated phi-of-op instructions again in case they have changed. The + // other way they may change is if they had leaders, and those leaders + // disappear. However, at the point they have leaders, there are uses of the + // relevant operands in the created phi node, and so they will get reprocessed + // through the normal user marking we perform. + mutable DenseMap> AdditionalUsers; + DenseMap> + ExpressionToPhiOfOps; + // Map from basic block to the temporary operations we created + DenseMap> PHIOfOpsPHIs; + // Map from temporary operation to MemoryAccess. + DenseMap TempToMemory; + // Set of all temporary instructions we created. + DenseSet AllTempInstructions; // Mapping from predicate info we used to the instructions we used it with. // In order to correctly ensure propagation, we must keep track of what @@ -463,12 +523,19 @@ class NewGVN { enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; DenseMap MemoryPhiState; - enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle }; - mutable DenseMap PhiCycleState; + enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; + mutable DenseMap InstCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap; ExpressionClassMap ExpressionToClass; + // We have a single expression that represents currently DeadExpressions. + // For dead expressions we can prove will stay dead, we mark them with + // DFS number zero. However, it's possible in the case of phi nodes + // for us to assume/prove all arguments are dead during fixpointing. + // We use DeadExpression for that case. + DeadExpression *SingletonDeadExpression = nullptr; + // Which values have changed as a result of leader changes. SmallPtrSet LeaderChanges; @@ -522,7 +589,8 @@ class NewGVN { const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *) const; PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, - bool &AllConstant) const; + bool &OriginalOpsConstant) const; + const DeadExpression *createDeadExpression() const; const VariableExpression *createVariableExpression(Value *) const; const ConstantExpression *createConstantExpression(Constant *) const; const Expression *createVariableOrConstant(Value *V) const; @@ -563,6 +631,9 @@ class NewGVN { return CClass; } void initializeCongruenceClasses(Function &F); + const Expression *makePossiblePhiOfOps(Instruction *, + SmallPtrSetImpl &); + void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); // Value number an Instruction or MemoryPhi. void valueNumberMemoryPhi(MemoryPhi *); @@ -571,7 +642,8 @@ class NewGVN { // Symbolic evaluation. const Expression *checkSimplificationResults(Expression *, Instruction *, Value *) const; - const Expression *performSymbolicEvaluation(Value *) const; + const Expression *performSymbolicEvaluation(Value *, + SmallPtrSetImpl &) const; const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, Instruction *, MemoryAccess *) const; @@ -596,7 +668,7 @@ class NewGVN { bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; - bool isMemoryAccessTop(const MemoryAccess *) const; + bool isMemoryAccessTOP(const MemoryAccess *) const; // Ranking unsigned int getRank(const Value *) const; @@ -620,19 +692,26 @@ class NewGVN { void replaceInstruction(Instruction *, Value *); void markInstructionForDeletion(Instruction *); void deleteInstructionsInBlock(BasicBlock *); + Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const; // New instruction creation. void handleNewInstruction(Instruction *){}; // Various instruction touch utilities + template + void for_each_found(Map &, const KeyType &, Func); + template + void touchAndErase(Map &, const KeyType &); void markUsersTouched(Value *); void markMemoryUsersTouched(const MemoryAccess *); void markMemoryDefTouched(const MemoryAccess *); void markPredicateUsersTouched(Instruction *); void markValueLeaderChangeTouched(CongruenceClass *CC); void markMemoryLeaderChangeTouched(CongruenceClass *CC); + void markPhiOfOpsChanged(const Expression *E); void addPredicateUsers(const PredicateBase *, Instruction *) const; void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const; + void addAdditionalUsers(Value *To, Value *User) const; // Main loop of value numbering void iterateTouchedInstructions(); @@ -640,12 +719,18 @@ class NewGVN { // Utilities. void cleanupTables(); std::pair assignDFSNumbers(BasicBlock *, unsigned); - void updateProcessedCount(Value *V); + void updateProcessedCount(const Value *V); void verifyMemoryCongruency() const; void verifyIterationSettled(Function &F); - bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; + void verifyStoreExpressions() const; + bool singleReachablePHIPath(SmallPtrSet &, + const MemoryAccess *, const MemoryAccess *) const; BasicBlock *getBlockForValue(Value *V) const; void deleteExpression(const Expression *E) const; + MemoryUseOrDef *getMemoryAccess(const Instruction *) const; + MemoryAccess *getDefiningAccess(const MemoryAccess *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *) const; + template T *getMinDFSOfRange(const Range &) const; unsigned InstrToDFSNum(const Value *V) const { assert(isa(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -665,8 +750,8 @@ class NewGVN { ? InstrToDFSNum(cast(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } - bool isCycleFree(const PHINode *PN) const ; - template T *getMinDFSOfRange(const Range &) const; + bool isCycleFree(const Instruction *) const; + bool isBackedge(BasicBlock *From, BasicBlock *To) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. std::pair StartingVNCounter; @@ -694,20 +779,46 @@ bool StoreExpression::equals(const Expression &Other) const { return true; } +// Determine if the edge From->To is a backedge +bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { + if (From == To) + return true; + auto *FromDTN = DT->getNode(From); + auto *ToDTN = DT->getNode(To); + return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN); +} + #ifndef NDEBUG static std::string getBlockName(const BasicBlock *B) { return DOTGraphTraits::getSimpleNodeLabel(B, nullptr); } #endif +// Get a MemoryAccess for an instruction, fake or real. +MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { + auto *Result = MSSA->getMemoryAccess(I); + return Result ? Result : TempToMemory.lookup(I); +} + +// Get a MemoryPhi for a basic block. These are all real. +MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { + return MSSA->getMemoryAccess(BB); +} + // Get the basic block from an instruction/memory value. BasicBlock *NewGVN::getBlockForValue(Value *V) const { - if (auto *I = dyn_cast(V)) - return I->getParent(); - else if (auto *MP = dyn_cast(V)) - return MP->getBlock(); - llvm_unreachable("Should have been able to figure out a block for our value"); - return nullptr; + if (auto *I = dyn_cast(V)) { + auto *Parent = I->getParent(); + if (Parent) + return Parent; + Parent = TempToBlock.lookup(V); + assert(Parent && "Every fake instruction should have a block"); + return Parent; + } + + auto *MP = dyn_cast(V); + assert(MP && "Should have been an instruction or a MemoryPhi"); + return MP->getBlock(); } // Delete a definitely dead expression, so it can be reused by the expression @@ -719,10 +830,9 @@ void NewGVN::deleteExpression(const Expression *E) const { const_cast(BE)->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); } - PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, - bool &AllConstant) const { - BasicBlock *PHIBlock = I->getParent(); + bool &OriginalOpsConstant) const { + BasicBlock *PHIBlock = getBlockForValue(I); auto *PN = cast(I); auto *E = new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); @@ -731,8 +841,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, E->setType(I->getType()); E->setOpcode(I->getOpcode()); - unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); - // NewGVN assumes the operands of a PHI node are in a consistent order across // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix // this in LLVM at some point we don't want GVN to find wrong congruences. @@ -751,20 +859,21 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, // Filter out unreachable phi operands. auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { - return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}); + if (*U == PN) + return false; + if (!ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock})) + return false; + // Things in TOPClass are equivalent to everything. + if (ValueToClass.lookup(*U) == TOPClass) + return false; + return lookupOperandLeader(*U) != PN; }); - std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use *U) -> Value * { auto *BB = PN->getIncomingBlock(*U); - auto *DTN = DT->getNode(BB); - if (RPOOrdering.lookup(DTN) >= PHIRPO) - HasBackedge = true; - AllConstant &= isa(*U) || isa(*U); - - // Don't try to transform self-defined phis. - if (*U == PN) - return PN; + HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); + OriginalOpsConstant = + OriginalOpsConstant && isa(*U); return lookupOperandLeader(*U); }); return E; @@ -785,7 +894,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const { // whether all members are constant. std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { auto Operand = lookupOperandLeader(O); - AllConstant &= isa(Operand); + AllConstant = AllConstant && isa(Operand); return Operand; }); @@ -846,9 +955,17 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, CongruenceClass *CC = ValueToClass.lookup(V); if (CC && CC->getDefiningExpr()) { + // If we simplified to something else, we need to communicate + // that we're users of the value we simplified to. + if (I != V) { + // Don't add temporary instructions to the user lists. + if (!AllTempInstructions.count(I)) + addAdditionalUsers(V, I); + } + if (I) DEBUG(dbgs() << "Simplified " << *I << " to " - << " expression " << *V << "\n"); + << " expression " << *CC->getDefiningExpr() << "\n"); NumGVNOpsSimplified++; deleteExpression(E); return CC->getDefiningExpr(); @@ -961,6 +1078,12 @@ NewGVN::createAggregateValueExpression(Instruction *I) const { llvm_unreachable("Unhandled type of aggregate value operation"); } +const DeadExpression *NewGVN::createDeadExpression() const { + // DeadExpression has no arguments and all DeadExpression's are the same, + // so we only need one of them. + return SingletonDeadExpression; +} + const VariableExpression *NewGVN::createVariableExpression(Value *V) const { auto *E = new (ExpressionAllocator) VariableExpression(V); E->setOpcode(V->getValueID()); @@ -1032,7 +1155,7 @@ bool NewGVN::someEquivalentDominates(const Instruction *Inst, Value *NewGVN::lookupOperandLeader(Value *V) const { CongruenceClass *CC = ValueToClass.lookup(V); if (CC) { - // Everything in TOP is represneted by undef, as it can be any value. + // Everything in TOP is represented by undef, as it can be any value. // We do have to make sure we get the type right though, so we can't set the // RepLeader to undef. if (CC == TOPClass) @@ -1054,7 +1177,7 @@ const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { // Return true if the MemoryAccess is really equivalent to everything. This is // equivalent to the lattice value "TOP" in most lattices. This is the initial // state of all MemoryAccesses. -bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { +bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const { return getMemoryClass(MA) == TOPClass; } @@ -1100,7 +1223,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast(I); - auto *StoreAccess = MSSA->getMemoryAccess(SI); + auto *StoreAccess = getMemoryAccess(SI); // Get the expression, if any, for the RHS of the MemoryDef. const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); if (EnableStoreRefinement) @@ -1108,7 +1231,6 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // If we bypassed the use-def chains, make sure we add a use. if (StoreRHS != StoreAccess->getDefiningAccess()) addMemoryUsers(StoreRHS, StoreAccess); - StoreRHS = lookupMemoryLeader(StoreRHS); // If we are defined by ourselves, use the live on entry def. if (StoreRHS == StoreAccess) @@ -1120,27 +1242,24 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // only do this for simple stores, we should expand to cover memcpys, etc. const auto *LastStore = createStoreExpression(SI, StoreRHS); const auto *LastCC = ExpressionToClass.lookup(LastStore); - // Basically, check if the congruence class the store is in is defined by a - // store that isn't us, and has the same value. MemorySSA takes care of - // ensuring the store has the same memory state as us already. - // The RepStoredValue gets nulled if all the stores disappear in a class, so - // we don't need to check if the class contains a store besides us. - if (LastCC && - LastCC->getStoredValue() == lookupOperandLeader(SI->getValueOperand())) + // We really want to check whether the expression we matched was a store. No + // easy way to do that. However, we can check that the class we found has a + // store, which, assuming the value numbering state is not corrupt, is + // sufficient, because we must also be equivalent to that store's expression + // for it to be in the same class as the load. + if (LastCC && LastCC->getStoredValue() == LastStore->getStoredValue()) return LastStore; - deleteExpression(LastStore); // Also check if our value operand is defined by a load of the same memory // location, and the memory state is the same as it was then (otherwise, it // could have been overwritten later. See test32 in // transforms/DeadStoreElimination/simple.ll). - if (auto *LI = - dyn_cast(lookupOperandLeader(SI->getValueOperand()))) { + if (auto *LI = dyn_cast(LastStore->getStoredValue())) if ((lookupOperandLeader(LI->getPointerOperand()) == - lookupOperandLeader(SI->getPointerOperand())) && - (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) == + LastStore->getOperand(0)) && + (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) - return createVariableExpression(LI); - } + return LastStore; + deleteExpression(LastStore); } // If the store is not equivalent to anything, value number it as a store that @@ -1241,8 +1360,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { // Load of undef is undef. if (isa(LoadAddressLeader)) return createConstantExpression(UndefValue::get(LI->getType())); - - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + MemoryAccess *OriginalAccess = getMemoryAccess(I); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { @@ -1331,6 +1451,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { // operands are equal, because assumes must always be true. if (CmpInst::isTrueWhenEqual(Predicate)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } } @@ -1343,6 +1464,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) || (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } // Handle the special case of floating point. @@ -1350,6 +1472,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) && isa(FirstOp) && !cast(FirstOp)->isZero()) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createConstantExpression(cast(FirstOp)); } } @@ -1430,34 +1553,33 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, return Changed; } -// Determine if a phi is cycle-free. That means the values in the phi don't -// depend on any expressions that can change value as a result of the phi. -// For example, a non-cycle free phi would be v = phi(0, v+1). -bool NewGVN::isCycleFree(const PHINode *PN) const { - // In order to compute cycle-freeness, we do SCC finding on the phi, and see - // what kind of SCC it ends up in. If it is a singleton, it is cycle-free. - // If it is not in a singleton, it is only cycle free if the other members are - // all phi nodes (as they do not compute anything, they are copies). TODO: - // There are likely a few other intrinsics or expressions that could be - // included here, but this happens so infrequently already that it is not - // likely to be worth it. - auto PCS = PhiCycleState.lookup(PN); - if (PCS == PCS_Unknown) { - SCCFinder.Start(PN); - auto &SCC = SCCFinder.getComponentFor(PN); +// Determine if a instruction is cycle-free. That means the values in the +// instruction don't depend on any expressions that can change value as a result +// of the instruction. For example, a non-cycle free instruction would be v = +// phi(0, v+1). +bool NewGVN::isCycleFree(const Instruction *I) const { + // In order to compute cycle-freeness, we do SCC finding on the instruction, + // and see what kind of SCC it ends up in. If it is a singleton, it is + // cycle-free. If it is not in a singleton, it is only cycle free if the + // other members are all phi nodes (as they do not compute anything, they are + // copies). + auto ICS = InstCycleState.lookup(I); + if (ICS == ICS_Unknown) { + SCCFinder.Start(I); + auto &SCC = SCCFinder.getComponentFor(I); // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. if (SCC.size() == 1) - PhiCycleState.insert({PN, PCS_CycleFree}); + InstCycleState.insert({I, ICS_CycleFree}); else { bool AllPhis = llvm::all_of(SCC, [](const Value *V) { return isa(V); }); - PCS = AllPhis ? PCS_CycleFree : PCS_Cycle; + ICS = AllPhis ? ICS_CycleFree : ICS_Cycle; for (auto *Member : SCC) if (auto *MemberPhi = dyn_cast(Member)) - PhiCycleState.insert({MemberPhi, PCS}); + InstCycleState.insert({MemberPhi, ICS}); } } - if (PCS == PCS_Cycle) + if (ICS == ICS_Cycle) return false; return true; } @@ -1467,39 +1589,44 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands - // were constant. This is really shorthand for "this phi cannot cycle due - // to forward propagation", as any change in value of the phi is guaranteed - // not to later change the value of the phi. - // IE it can't be v = phi(undef, v+1) + // This is really shorthand for "this phi cannot cycle due to forward + // change in value of the phi is guaranteed not to later change the value of + // the phi. IE it can't be v = phi(undef, v+1) bool AllConstant = true; - auto *E = cast(createPHIExpression(I, HasBackedge, AllConstant)); + auto *E = + cast(createPHIExpression(I, HasBackedge, AllConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. // See if all arguments are the same. // We track if any were undef because they need special handling. bool HasUndef = false; - auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { - if (Arg == I) - return false; + auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) { if (isa(Arg)) { HasUndef = true; return false; } return true; }); - // If we are left with no operands, it's undef + // If we are left with no operands, it's dead. if (Filtered.begin() == Filtered.end()) { - DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" - << "\n"); + // If it has undef at this point, it means there are no-non-undef arguments, + // and thus, the value of the phi node must be undef. + if (HasUndef) { + DEBUG(dbgs() << "PHI Node " << *I + << " has no non-undef arguments, valuing it as undef\n"); + return createConstantExpression(UndefValue::get(I->getType())); + } + + DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n"); deleteExpression(E); - return createConstantExpression(UndefValue::get(I->getType())); + return createDeadExpression(); } unsigned NumOps = 0; Value *AllSameValue = *(Filtered.begin()); ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. - if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) { + if (llvm::all_of(Filtered, [&](Value *Arg) { ++NumOps; - return V == AllSameValue; + return Arg == AllSameValue; })) { // In LLVM's non-standard representation of phi nodes, it's possible to have // phi nodes with cycles (IE dependent on other phis that are .... dependent @@ -1518,7 +1645,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // constants, or all operands are ignored but the undef, it also must be // cycle free. if (!AllConstant && HasBackedge && NumOps > 0 && - !isa(AllSameValue) && !isCycleFree(cast(I))) + !isa(AllSameValue) && !isCycleFree(I)) return E; // Only have to check for instructions @@ -1526,7 +1653,12 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { if (!someEquivalentDominates(AllSameInst, I)) return E; } - + // Can't simplify to something that comes later in the iteration. + // Otherwise, when and if it changes congruence class, we will never catch + // up. We will always be a class behind it. + if (isa(AllSameValue) && + InstrToDFSNum(AllSameValue) > InstrToDFSNum(I)) + return E; NumGVNPhisAllSame++; DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue << "\n"); @@ -1689,8 +1821,18 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { return createExpression(I); } +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa(V) || isa(V); +} + // Substitute and symbolize the value before value numbering. -const Expression *NewGVN::performSymbolicEvaluation(Value *V) const { +const Expression * +NewGVN::performSymbolicEvaluation(Value *V, + SmallPtrSetImpl &Visited) const { const Expression *E = nullptr; if (auto *C = dyn_cast(V)) E = createConstantExpression(C); @@ -1768,12 +1910,40 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V) const { return E; } +// Look up a container in a map, and then call a function for each thing in the +// found container. +template +void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) { + const auto Result = M.find_as(Key); + if (Result != M.end()) + for (typename Map::mapped_type::value_type Mapped : Result->second) + F(Mapped); +} + +// Look up a container of values/instructions in a map, and touch all the +// instructions in the container. Then erase value from the map. +template +void NewGVN::touchAndErase(Map &M, const KeyType &Key) { + const auto Result = M.find_as(Key); + if (Result != M.end()) { + for (const typename Map::mapped_type::value_type Mapped : Result->second) + TouchedInstructions.set(InstrToDFSNum(Mapped)); + M.erase(Result); + } +} + +void NewGVN::addAdditionalUsers(Value *To, Value *User) const { + if (isa(To)) + AdditionalUsers[To].insert(User); +} + void NewGVN::markUsersTouched(Value *V) { // Now mark the users as touched. for (auto *User : V->users()) { assert(isa(User) && "Use of value not within an instruction?"); TouchedInstructions.set(InstrToDFSNum(User)); } + touchAndErase(AdditionalUsers, V); } void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const { @@ -1790,16 +1960,15 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { return; for (auto U : MA->users()) TouchedInstructions.set(MemoryToDFSNum(U)); - const auto Result = MemoryToUsers.find(MA); - if (Result != MemoryToUsers.end()) { - for (auto *User : Result->second) - TouchedInstructions.set(MemoryToDFSNum(User)); - MemoryToUsers.erase(Result); - } + touchAndErase(MemoryToUsers, MA); } // Add I to the set of users of a given predicate. void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { + // Don't add temporary instructions to the user lists. + if (AllTempInstructions.count(I)) + return; + if (auto *PBranch = dyn_cast(PB)) PredicateToUsers[PBranch->Condition].insert(I); else if (auto *PAssume = dyn_cast(PB)) @@ -1808,12 +1977,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { // Touch all the predicates that depend on this instruction. void NewGVN::markPredicateUsersTouched(Instruction *I) { - const auto Result = PredicateToUsers.find(I); - if (Result != PredicateToUsers.end()) { - for (auto *User : Result->second) - TouchedInstructions.set(InstrToDFSNum(User)); - PredicateToUsers.erase(Result); - } + touchAndErase(PredicateToUsers, I); } // Mark users affected by a memory leader change. @@ -1855,11 +2019,11 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { assert(!CC->definesNoMemory() && "Can't get next leader if there is none"); if (CC->getStoreCount() > 0) { if (auto *NL = dyn_cast_or_null(CC->getNextLeader().first)) - return MSSA->getMemoryAccess(NL); + return getMemoryAccess(NL); // Find the store with the minimum DFS number. auto *V = getMinDFSOfRange(make_filter_range( *CC, [&](const Value *V) { return isa(V); })); - return MSSA->getMemoryAccess(cast(V)); + return getMemoryAccess(cast(V)); } assert(CC->getStoreCount() == 0); @@ -1897,9 +2061,10 @@ Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const { // // The invariants of this function are: // -// I must be moving to NewClass from OldClass The StoreCount of OldClass and -// NewClass is expected to have been updated for I already if it is is a store. -// The OldClass memory leader has not been updated yet if I was the leader. +// - I must be moving to NewClass from OldClass +// - The StoreCount of OldClass and NewClass is expected to have been updated +// for I already if it is is a store. +// - The OldClass memory leader has not been updated yet if I was the leader. void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, MemoryAccess *InstMA, CongruenceClass *OldClass, @@ -1908,7 +2073,8 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, // be the MemoryAccess of OldClass. assert((!InstMA || !OldClass->getMemoryLeader() || OldClass->getLeader() != I || - OldClass->getMemoryLeader() == InstMA) && + MemoryAccessToClass.lookup(OldClass->getMemoryLeader()) == + MemoryAccessToClass.lookup(InstMA)) && "Representative MemoryAccess mismatch"); // First, see what happens to the new class if (!NewClass->getMemoryLeader()) { @@ -1944,31 +2110,11 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (I == OldClass->getNextLeader().first) OldClass->resetNextLeader(); - // It's possible, though unlikely, for us to discover equivalences such - // that the current leader does not dominate the old one. - // This statistic tracks how often this happens. - // We assert on phi nodes when this happens, currently, for debugging, because - // we want to make sure we name phi node cycles properly. - if (isa(NewClass->getLeader()) && NewClass->getLeader() && - I != NewClass->getLeader()) { - auto *IBB = I->getParent(); - auto *NCBB = cast(NewClass->getLeader())->getParent(); - bool Dominated = - IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader()); - Dominated = Dominated || DT->properlyDominates(IBB, NCBB); - if (Dominated) { - ++NumGVNNotMostDominatingLeader; - assert( - !isa(I) && - "New class for instruction should not be dominated by instruction"); - } - } + OldClass->erase(I); + NewClass->insert(I); if (NewClass->getLeader() != I) NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)}); - - OldClass->erase(I); - NewClass->insert(I); // Handle our special casing of stores. if (auto *SI = dyn_cast(I)) { OldClass->decStoreCount(); @@ -1982,17 +2128,15 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) { // If it's a store expression we are using, it means we are not equivalent // to something earlier. - if (isa(E)) { - assert(lookupOperandLeader(SI->getValueOperand()) != - NewClass->getLeader()); - NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + if (auto *SE = dyn_cast(E)) { + NewClass->setStoredValue(SE->getStoredValue()); markValueLeaderChangeTouched(NewClass); // Shift the new class leader to be the store DEBUG(dbgs() << "Changing leader of congruence class " << NewClass->getID() << " from " << *NewClass->getLeader() << " to " << *SI << " because store joined class\n"); // If we changed the leader, we have to mark it changed because we don't - // know what it will do to symbolic evlauation. + // know what it will do to symbolic evaluation. NewClass->setLeader(SI); } // We rely on the code below handling the MemoryAccess change. @@ -2003,17 +2147,26 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // instructions before. // If it's not a memory use, set the MemoryAccess equivalence - auto *InstMA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); - bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA; + auto *InstMA = dyn_cast_or_null(getMemoryAccess(I)); if (InstMA) moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); ValueToClass[I] = NewClass; // See if we destroyed the class or need to swap leaders. if (OldClass->empty() && OldClass != TOPClass) { if (OldClass->getDefiningExpr()) { - DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr() + DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr() << " from table\n"); - ExpressionToClass.erase(OldClass->getDefiningExpr()); + // We erase it as an exact expression to make sure we don't just erase an + // equivalent one. + auto Iter = ExpressionToClass.find_as( + ExactEqualsExpression(*OldClass->getDefiningExpr())); + if (Iter != ExpressionToClass.end()) + ExpressionToClass.erase(Iter); +#ifdef EXPENSIVE_CHECKS + assert( + (*OldClass->getDefiningExpr() != *E || ExpressionToClass.lookup(E)) && + "We erased the expression we just inserted, which should not happen"); +#endif } } else if (OldClass->getLeader() == I) { // When the leader changes, the value numbering of @@ -2030,52 +2183,35 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (OldClass->getStoredValue()) OldClass->setStoredValue(nullptr); } - // If we destroy the old access leader and it's a store, we have to - // effectively destroy the congruence class. When it comes to scalars, - // anything with the same value is as good as any other. That means that - // one leader is as good as another, and as long as you have some leader for - // the value, you are good.. When it comes to *memory states*, only one - // particular thing really represents the definition of a given memory - // state. Once it goes away, we need to re-evaluate which pieces of memory - // are really still equivalent. The best way to do this is to re-value - // number things. The only way to really make that happen is to destroy the - // rest of the class. In order to effectively destroy the class, we reset - // ExpressionToClass for each by using the ValueToExpression mapping. The - // members later get marked as touched due to the leader change. We will - // create new congruence classes, and the pieces that are still equivalent - // will end back together in a new class. If this becomes too expensive, it - // is possible to use a versioning scheme for the congruence classes to - // avoid the expressions finding this old class. Note that the situation is - // different for memory phis, becuase they are evaluated anew each time, and - // they become equal not by hashing, but by seeing if all operands are the - // same (or only one is reachable). - if (OldClass->getStoreCount() > 0 && InstWasMemoryLeader) { - DEBUG(dbgs() << "Kicking everything out of class " << OldClass->getID() - << " because MemoryAccess leader changed"); - for (auto Member : *OldClass) - ExpressionToClass.erase(ValueToExpression.lookup(Member)); - } OldClass->setLeader(getNextValueLeader(OldClass)); OldClass->resetNextLeader(); markValueLeaderChangeTouched(OldClass); } } +// For a given expression, mark the phi of ops instructions that could have +// changed as a result. +void NewGVN::markPhiOfOpsChanged(const Expression *E) { + touchAndErase(ExpressionToPhiOfOps, ExactEqualsExpression(*E)); +} + // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { - ValueToExpression[I] = E; // This is guaranteed to return something, since it will at least find // TOP. - CongruenceClass *IClass = ValueToClass[I]; + CongruenceClass *IClass = ValueToClass.lookup(I); assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. assert(!IClass->isDead() && "Found a dead class"); - CongruenceClass *EClass; + CongruenceClass *EClass = nullptr; if (const auto *VE = dyn_cast(E)) { - EClass = ValueToClass[VE->getVariableValue()]; - } else { + EClass = ValueToClass.lookup(VE->getVariableValue()); + } else if (isa(E)) { + EClass = TOPClass; + } + if (!EClass) { auto lookupResult = ExpressionToClass.insert({E, nullptr}); // If it's not in the value table, create a new congruence class. @@ -2090,7 +2226,7 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { } else if (const auto *SE = dyn_cast(E)) { StoreInst *SI = SE->getStoreInst(); NewClass->setLeader(SI); - NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + NewClass->setStoredValue(SE->getStoredValue()); // The RepMemoryAccess field will be filled in properly by the // moveValueToNewCongruenceClass call. } else { @@ -2125,14 +2261,34 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E << "\n"); - if (ClassChanged) + if (ClassChanged) { moveValueToNewCongruenceClass(I, E, IClass, EClass); + markPhiOfOpsChanged(E); + } + markUsersTouched(I); - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + if (MemoryAccess *MA = getMemoryAccess(I)) markMemoryUsersTouched(MA); if (auto *CI = dyn_cast(I)) markPredicateUsersTouched(CI); } + // If we changed the class of the store, we want to ensure nothing finds the + // old store expression. In particular, loads do not compare against stored + // value, so they will find old store expressions (and associated class + // mappings) if we leave them in the table. + if (ClassChanged && isa(I)) { + auto *OldE = ValueToExpression.lookup(I); + // It could just be that the old class died. We don't want to erase it if we + // just moved classes. + if (OldE && isa(OldE) && *E != *OldE) { + // Erase this as an exact expression to ensure we don't erase expressions + // equivalent to it. + auto Iter = ExpressionToClass.find_as(ExactEqualsExpression(*OldE)); + if (Iter != ExpressionToClass.end()) + ExpressionToClass.erase(Iter); + } + } + ValueToExpression[I] = E; } // Process the fact that Edge (from, to) is reachable, including marking @@ -2154,7 +2310,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // impact predicates. Otherwise, only mark the phi nodes as touched, as // they are the only thing that depend on new edges. Anything using their // values will get propagated to if necessary. - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To)) + if (MemoryAccess *MemPhi = getMemoryAccess(To)) TouchedInstructions.set(InstrToDFSNum(MemPhi)); auto BI = To->begin(); @@ -2162,6 +2318,9 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { TouchedInstructions.set(InstrToDFSNum(&*BI)); ++BI; } + for_each_found(PHIOfOpsPHIs, To, [&](const PHINode *I) { + TouchedInstructions.set(InstrToDFSNum(I)); + }); } } } @@ -2170,9 +2329,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // see if we know some constant value for it already. Value *NewGVN::findConditionEquivalence(Value *Cond) const { auto Result = lookupOperandLeader(Cond); - if (isa(Result)) - return Result; - return nullptr; + return isa(Result) ? Result : nullptr; } // Process the outgoing edges of a block for reachability. @@ -2251,7 +2408,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { // This also may be a memory defining terminator, in which case, set it // equivalent only to itself. // - auto *MA = MSSA->getMemoryAccess(TI); + auto *MA = getMemoryAccess(TI); if (MA && !isa(MA)) { auto *CC = ensureLeaderOfMemoryClass(MA); if (setMemoryClass(MA, CC)) @@ -2260,6 +2417,148 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { } } +void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, + Instruction *ExistingValue) { + InstrDFS[Op] = InstrToDFSNum(ExistingValue); + AllTempInstructions.insert(Op); + PHIOfOpsPHIs[BB].push_back(Op); + TempToBlock[Op] = BB; + RealToTemp[ExistingValue] = Op; +} + +static bool okayForPHIOfOps(const Instruction *I) { + return isa(I) || isa(I) || isa(I) || + isa(I); +} + +// When we see an instruction that is an op of phis, generate the equivalent phi +// of ops form. +const Expression * +NewGVN::makePossiblePhiOfOps(Instruction *I, + SmallPtrSetImpl &Visited) { + if (!okayForPHIOfOps(I)) + return nullptr; + + if (!Visited.insert(I).second) + return nullptr; + // For now, we require the instruction be cycle free because we don't + // *always* create a phi of ops for instructions that could be done as phi + // of ops, we only do it if we think it is useful. If we did do it all the + // time, we could remove the cycle free check. + if (!isCycleFree(I)) + return nullptr; + + unsigned IDFSNum = InstrToDFSNum(I); + SmallPtrSet ProcessedPHIs; + // TODO: We don't do phi translation on memory accesses because it's + // complicated. For a load, we'd need to be able to simulate a new memoryuse, + // which we don't have a good way of doing ATM. + auto *MemAccess = getMemoryAccess(I); + // If the memory operation is defined by a memory operation this block that + // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi + // can't help, as it would still be killed by that memory operation. + if (MemAccess && !isa(MemAccess->getDefiningAccess()) && + MemAccess->getDefiningAccess()->getBlock() == I->getParent()) + return nullptr; + + // Convert op of phis to phi of ops + for (auto &Op : I->operands()) { + // TODO: We can't handle expressions that must be recursively translated + // IE + // a = phi (b, c) + // f = use a + // g = f + phi of something + // To properly make a phi of ops for g, we'd have to properly translate and + // use the instruction for f. We should add this by splitting out the + // instruction creation we do below. + if (isa(Op) && PHINodeUses.count(cast(Op))) + return nullptr; + if (!isa(Op)) + continue; + auto *OpPHI = cast(Op); + // No point in doing this for one-operand phis. + if (OpPHI->getNumOperands() == 1) + continue; + if (!DebugCounter::shouldExecute(PHIOfOpsCounter)) + return nullptr; + SmallVector, 4> Ops; + auto *PHIBlock = getBlockForValue(OpPHI); + for (auto PredBB : OpPHI->blocks()) { + Value *FoundVal = nullptr; + // We could just skip unreachable edges entirely but it's tricky to do + // with rewriting existing phi nodes. + if (ReachableEdges.count({PredBB, PHIBlock})) { + // Clone the instruction, create an expression from it, and see if we + // have a leader. + Instruction *ValueOp = I->clone(); + if (MemAccess) + TempToMemory.insert({ValueOp, MemAccess}); + + for (auto &Op : ValueOp->operands()) { + Op = Op->DoPHITranslation(PHIBlock, PredBB); + // When this operand changes, it could change whether there is a + // leader for us or not. + addAdditionalUsers(Op, I); + } + // Make sure it's marked as a temporary instruction. + AllTempInstructions.insert(ValueOp); + // and make sure anything that tries to add it's DFS number is + // redirected to the instruction we are making a phi of ops + // for. + InstrDFS.insert({ValueOp, IDFSNum}); + const Expression *E = performSymbolicEvaluation(ValueOp, Visited); + InstrDFS.erase(ValueOp); + AllTempInstructions.erase(ValueOp); + ValueOp->deleteValue(); + if (MemAccess) + TempToMemory.erase(ValueOp); + if (!E) + return nullptr; + FoundVal = findPhiOfOpsLeader(E, PredBB); + if (!FoundVal) { + ExpressionToPhiOfOps[E].insert(I); + return nullptr; + } + if (auto *SI = dyn_cast(FoundVal)) + FoundVal = SI->getValueOperand(); + } else { + DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " + << getBlockName(PredBB) + << " because the block is unreachable\n"); + FoundVal = UndefValue::get(I->getType()); + } + + Ops.push_back({FoundVal, PredBB}); + DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " + << getBlockName(PredBB) << "\n"); + } + auto *ValuePHI = RealToTemp.lookup(I); + bool NewPHI = false; + if (!ValuePHI) { + ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + addPhiOfOps(ValuePHI, PHIBlock, I); + NewPHI = true; + NumGVNPHIOfOpsCreated++; + } + if (NewPHI) { + for (auto PHIOp : Ops) + ValuePHI->addIncoming(PHIOp.first, PHIOp.second); + } else { + unsigned int i = 0; + for (auto PHIOp : Ops) { + ValuePHI->setIncomingValue(i, PHIOp.first); + ValuePHI->setIncomingBlock(i, PHIOp.second); + ++i; + } + } + + DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I + << "\n"); + return performSymbolicEvaluation(ValuePHI, Visited); + } + return nullptr; +} + // The algorithm initially places the values of the routine in the TOP // congruence class. The leader of TOP is the undetermined value `undef`. // When the algorithm has finished, values still in TOP are unreachable. @@ -2302,6 +2601,12 @@ void NewGVN::initializeCongruenceClasses(Function &F) { TOPClass->incStoreCount(); } for (auto &I : *BB) { + // TODO: Move to helper + if (isa(&I)) + for (auto *U : I.users()) + if (auto *UInst = dyn_cast(U)) + if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst)) + PHINodeUses.insert(UInst); // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa(I) && I.getType()->isVoidTy()) @@ -2326,12 +2631,35 @@ void NewGVN::cleanupTables() { CongruenceClasses[i] = nullptr; } + // Destroy the value expressions + SmallVector TempInst(AllTempInstructions.begin(), + AllTempInstructions.end()); + AllTempInstructions.clear(); + + // We have to drop all references for everything first, so there are no uses + // left as we delete them. + for (auto *I : TempInst) { + I->dropAllReferences(); + } + + while (!TempInst.empty()) { + auto *I = TempInst.back(); + TempInst.pop_back(); + I->deleteValue(); + } + ValueToClass.clear(); ArgRecycler.clear(ExpressionAllocator); ExpressionAllocator.Reset(); CongruenceClasses.clear(); ExpressionToClass.clear(); ValueToExpression.clear(); + RealToTemp.clear(); + AdditionalUsers.clear(); + ExpressionToPhiOfOps.clear(); + TempToBlock.clear(); + TempToMemory.clear(); + PHIOfOpsPHIs.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -2347,14 +2675,17 @@ void NewGVN::cleanupTables() { MemoryToUsers.clear(); } +// Assign local DFS number mapping to instructions, and leave space for Value +// PHI's. std::pair NewGVN::assignDFSNumbers(BasicBlock *B, unsigned Start) { unsigned End = Start; - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) { + if (MemoryAccess *MemPhi = getMemoryAccess(B)) { InstrDFS[MemPhi] = End++; DFSToInstr.emplace_back(MemPhi); } + // Then the real block goes next. for (auto &I : *B) { // There's no need to call isInstructionTriviallyDead more than once on // an instruction. Therefore, once we know that an instruction is dead @@ -2365,7 +2696,6 @@ std::pair NewGVN::assignDFSNumbers(BasicBlock *B, markInstructionForDeletion(&I); continue; } - InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } @@ -2376,7 +2706,7 @@ std::pair NewGVN::assignDFSNumbers(BasicBlock *B, return std::make_pair(Start, End); } -void NewGVN::updateProcessedCount(Value *V) { +void NewGVN::updateProcessedCount(const Value *V) { #ifndef NDEBUG if (ProcessedCount.count(V) == 0) { ProcessedCount.insert({V, 1}); @@ -2390,12 +2720,13 @@ void NewGVN::updateProcessedCount(Value *V) { // Evaluate MemoryPhi nodes symbolically, just like PHI nodes void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // If all the arguments are the same, the MemoryPhi has the same value as the - // argument. - // Filter out unreachable blocks and self phis from our operands. + // argument. Filter out unreachable blocks and self phis from our operands. + // TODO: We could do cycle-checking on the memory phis to allow valueizing for + // self-phi checking. const BasicBlock *PHIBlock = MP->getBlock(); auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { - return lookupMemoryLeader(cast(U)) != MP && - !isMemoryAccessTop(cast(U)) && + return cast(U) != MP && + !isMemoryAccessTOP(cast(U)) && ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); // If all that is left is nothing, our memoryphi is undef. We keep it as @@ -2448,18 +2779,25 @@ void NewGVN::valueNumberInstruction(Instruction *I) { DEBUG(dbgs() << "Processing instruction " << *I << "\n"); if (!I->isTerminator()) { const Expression *Symbolized = nullptr; + SmallPtrSet Visited; if (DebugCounter::shouldExecute(VNCounter)) { - Symbolized = performSymbolicEvaluation(I); + Symbolized = performSymbolicEvaluation(I, Visited); + // Make a phi of ops if necessary + if (Symbolized && !isa(Symbolized) && + !isa(Symbolized) && PHINodeUses.count(I)) { + auto *PHIE = makePossiblePhiOfOps(I, Visited); + if (PHIE) + Symbolized = PHIE; + } + } else { // Mark the instruction as unused so we don't value number it again. InstrDFS[I] = 0; } // If we couldn't come up with a symbolic expression, use the unknown // expression - if (Symbolized == nullptr) { + if (Symbolized == nullptr) Symbolized = createUnknownExpression(I); - } - performCongruenceFinding(I, Symbolized); } else { // Handle terminators that return values. All of them produce values we @@ -2475,13 +2813,23 @@ void NewGVN::valueNumberInstruction(Instruction *I) { // Check if there is a path, using single or equal argument phi nodes, from // First to Second. -bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, - const MemoryAccess *Second) const { +bool NewGVN::singleReachablePHIPath( + SmallPtrSet &Visited, const MemoryAccess *First, + const MemoryAccess *Second) const { if (First == Second) return true; if (MSSA->isLiveOnEntryDef(First)) return false; + // This is not perfect, but as we're just verifying here, we can live with + // the loss of precision. The real solution would be that of doing strongly + // connected component finding in this routine, and it's probably not worth + // the complexity for the time being. So, we just keep a set of visited + // MemoryAccess and return true when we hit a cycle. + if (Visited.count(First)) + return true; + Visited.insert(First); + const auto *EndDef = First; for (auto *ChainDef : optimized_def_chain(First)) { if (ChainDef == Second) @@ -2504,7 +2852,8 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, Okay = std::equal(OperandList.begin(), OperandList.end(), OperandList.begin()); if (Okay) - return singleReachablePHIPath(cast(OperandList[0]), Second); + return singleReachablePHIPath(Visited, cast(OperandList[0]), + Second); return false; } @@ -2549,22 +2898,35 @@ void NewGVN::verifyMemoryCongruency() const { return false; if (auto *MemDef = dyn_cast(Pair.first)) return !isInstructionTriviallyDead(MemDef->getMemoryInst()); + + // We could have phi nodes which operands are all trivially dead, + // so we don't process them. + if (auto *MemPHI = dyn_cast(Pair.first)) { + for (auto &U : MemPHI->incoming_values()) { + if (Instruction *I = dyn_cast(U.get())) { + if (!isInstructionTriviallyDead(I)) + return true; + } + } + return false; + } + return true; }; auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); for (auto KV : Filtered) { - assert(KV.second != TOPClass && - "Memory not unreachable but ended up in TOP"); if (auto *FirstMUD = dyn_cast(KV.first)) { auto *SecondMUD = dyn_cast(KV.second->getMemoryLeader()); - if (FirstMUD && SecondMUD) - assert((singleReachablePHIPath(FirstMUD, SecondMUD) || + if (FirstMUD && SecondMUD) { + SmallPtrSet VisitedMAS; + assert((singleReachablePHIPath(VisitedMAS, FirstMUD, SecondMUD) || ValueToClass.lookup(FirstMUD->getMemoryInst()) == ValueToClass.lookup(SecondMUD->getMemoryInst())) && "The instructions for these memory operations should have " "been in the same congruence class or reachable through" "a single argument phi"); + } } else if (auto *FirstMP = dyn_cast(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have // the same class. @@ -2639,6 +3001,43 @@ void NewGVN::verifyIterationSettled(Function &F) { #endif } +// Verify that for each store expression in the expression to class mapping, +// only the latest appears, and multiple ones do not appear. +// Because loads do not use the stored value when doing equality with stores, +// if we don't erase the old store expressions from the table, a load can find +// a no-longer valid StoreExpression. +void NewGVN::verifyStoreExpressions() const { +#ifndef NDEBUG + // This is the only use of this, and it's not worth defining a complicated + // densemapinfo hash/equality function for it. + std::set< + std::pair>> + StoreExpressionSet; + for (const auto &KV : ExpressionToClass) { + if (auto *SE = dyn_cast(KV.first)) { + // Make sure a version that will conflict with loads is not already there + auto Res = StoreExpressionSet.insert( + {SE->getOperand(0), std::make_tuple(SE->getMemoryLeader(), KV.second, + SE->getStoredValue())}); + bool Okay = Res.second; + // It's okay to have the same expression already in there if it is + // identical in nature. + // This can happen when the leader of the stored value changes over time. + if (!Okay) + Okay = (std::get<1>(Res.first->second) == KV.second) && + (lookupOperandLeader(std::get<2>(Res.first->second)) == + lookupOperandLeader(SE->getStoredValue())); + assert(Okay && "Stored expression conflict exists in expression table"); + auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst()); + assert(ValueExpr && ValueExpr->equals(*SE) && + "StoreExpression in ExpressionToClass is not latest " + "StoreExpression for value"); + } + } +#endif +} + // This is the main value numbering loop, it iterates over the initial touched // instruction set, propagating value numbers, marking things touched, etc, // until the set of touched instructions is completely empty. @@ -2649,15 +3048,14 @@ void NewGVN::iterateTouchedInstructions() { // Nothing set, nothing to iterate, just return. if (FirstInstr == -1) return; - BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); + const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); while (TouchedInstructions.any()) { ++Iterations; // Walk through all the instructions in all the blocks in RPO. // TODO: As we hit a new block, we should push and pop equalities into a // table lookupOperandLeader can use, to catch things PredicateInfo // might miss, like edge-only equivalences. - for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; - InstrNum = TouchedInstructions.find_next(InstrNum)) { + for (unsigned InstrNum : TouchedInstructions.set_bits()) { // This instruction was found to be dead. We don't bother looking // at it again. @@ -2667,7 +3065,7 @@ void NewGVN::iterateTouchedInstructions() { } Value *V = InstrFromDFSNum(InstrNum); - BasicBlock *CurrBlock = getBlockForValue(V); + const BasicBlock *CurrBlock = getBlockForValue(V); // If we hit a new block, do reachability processing. if (CurrBlock != LastBlock) { @@ -2685,6 +3083,9 @@ void NewGVN::iterateTouchedInstructions() { } updateProcessedCount(CurrBlock); } + // Reset after processing (because we may mark ourselves as touched when + // we propagate equalities). + TouchedInstructions.reset(InstrNum); if (auto *MP = dyn_cast(V)) { DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n"); @@ -2695,9 +3096,6 @@ void NewGVN::iterateTouchedInstructions() { llvm_unreachable("Should have been a MemoryPhi or Instruction"); } updateProcessedCount(V); - // Reset after processing (because we may mark ourselves as touched when - // we propagate equalities). - TouchedInstructions.reset(InstrNum); } } NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); @@ -2710,6 +3108,7 @@ bool NewGVN::runGVN() { bool Changed = false; NumFuncArgs = F.arg_size(); MSSAWalker = MSSA->getWalker(); + SingletonDeadExpression = new (ExpressionAllocator) DeadExpression(); // Count number of instructions for sizing of hash tables, and come // up with a global dfs numbering for instructions. @@ -2748,6 +3147,7 @@ bool NewGVN::runGVN() { BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } + initializeCongruenceClasses(F); TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as @@ -2758,12 +3158,14 @@ bool NewGVN::runGVN() { // Initialize the touched instructions to include the entry block. const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); TouchedInstructions.set(InstRange.first, InstRange.second); + DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) + << " marked reachable\n"); ReachableBlocks.insert(&F.getEntryBlock()); - initializeCongruenceClasses(F); iterateTouchedInstructions(); verifyMemoryCongruency(); verifyIterationSettled(F); + verifyStoreExpressions(); Changed |= eliminateInstructions(F); @@ -2772,7 +3174,8 @@ bool NewGVN::runGVN() { if (!ToErase->use_empty()) ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); - ToErase->eraseFromParent(); + if (ToErase->getParent()) + ToErase->eraseFromParent(); } // Delete all unreachable blocks. @@ -2791,14 +3194,6 @@ bool NewGVN::runGVN() { return Changed; } -// Return true if V is a value that will always be available (IE can -// be placed anywhere) in the function. We don't do globals here -// because they are often worse to put in place. -// TODO: Separate cost from availability -static bool alwaysAvailable(Value *V) { - return isa(V) || isa(V); -} - struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; @@ -2888,9 +3283,21 @@ void NewGVN::convertClassToDFSOrdered( } assert(isa(D) && "The dense set member should always be an instruction"); - VDDef.LocalNum = InstrToDFSNum(D); - DFSOrderedSet.emplace_back(VDDef); Instruction *Def = cast(D); + VDDef.LocalNum = InstrToDFSNum(D); + DFSOrderedSet.push_back(VDDef); + // If there is a phi node equivalent, add it + if (auto *PN = RealToTemp.lookup(Def)) { + auto *PHIE = + dyn_cast_or_null(ValueToExpression.lookup(Def)); + if (PHIE) { + VDDef.Def.setInt(false); + VDDef.Def.setPointer(PN); + VDDef.LocalNum = 0; + DFSOrderedSet.push_back(VDDef); + } + } + unsigned int UseCount = 0; // Now add the uses. for (auto &U : Def->uses()) { @@ -2907,7 +3314,7 @@ void NewGVN::convertClassToDFSOrdered( // they are from. VDUse.LocalNum = InstrDFS.size() + 1; } else { - IBlock = I->getParent(); + IBlock = getBlockForValue(I); VDUse.LocalNum = InstrToDFSNum(I); } @@ -3077,6 +3484,37 @@ class ValueDFSStack { }; } +// Given a value and a basic block we are trying to see if it is available in, +// see if the value has a leader available in that block. +Value *NewGVN::findPhiOfOpsLeader(const Expression *E, + const BasicBlock *BB) const { + // It would already be constant if we could make it constant + if (auto *CE = dyn_cast(E)) + return CE->getConstantValue(); + if (auto *VE = dyn_cast(E)) + return VE->getVariableValue(); + + auto *CC = ExpressionToClass.lookup(E); + if (!CC) + return nullptr; + if (alwaysAvailable(CC->getLeader())) + return CC->getLeader(); + + for (auto Member : *CC) { + auto *MemberInst = dyn_cast(Member); + // Anything that isn't an instruction is always available. + if (!MemberInst) + return Member; + // If we are looking for something in the same block as the member, it must + // be a leader because this function is looking for operands for a phi node. + if (MemberInst->getParent() == BB || + DT->dominates(MemberInst->getParent(), BB)) { + return Member; + } + } + return nullptr; +} + bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available @@ -3107,25 +3545,43 @@ bool NewGVN::eliminateInstructions(Function &F) { // DFS numbers are updated, we compute some ourselves. DT->updateDFSNumbers(); - for (auto &B : F) { - if (!ReachableBlocks.count(&B)) { - for (const auto S : successors(&B)) { - for (auto II = S->begin(); isa(II); ++II) { - auto &Phi = cast(*II); - DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block " - << getBlockName(&B) - << " with undef due to it being unreachable\n"); - for (auto &Operand : Phi.incoming_values()) - if (Phi.getIncomingBlock(Operand) == &B) - Operand.set(UndefValue::get(Phi.getType())); - } + // Go through all of our phi nodes, and kill the arguments associated with + // unreachable edges. + auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) { + for (auto &Operand : PHI.incoming_values()) + if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) { + DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " + << getBlockName(PHI.getIncomingBlock(Operand)) + << " with undef due to it being unreachable\n"); + Operand.set(UndefValue::get(PHI.getType())); + } + }; + SmallPtrSet BlocksWithPhis; + for (auto &B : F) + if ((!B.empty() && isa(*B.begin())) || + (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end())) + BlocksWithPhis.insert(&B); + DenseMap ReachablePredCount; + for (auto KV : ReachableEdges) + ReachablePredCount[KV.getEnd()]++; + for (auto *BB : BlocksWithPhis) + // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in + // the block and subtract the pred count, but it's more complicated. + if (ReachablePredCount.lookup(BB) != + unsigned(std::distance(pred_begin(BB), pred_end(BB)))) { + for (auto II = BB->begin(); isa(II); ++II) { + auto &PHI = cast(*II); + ReplaceUnreachablePHIArgs(PHI, BB); } + for_each_found(PHIOfOpsPHIs, BB, [&](PHINode *PHI) { + ReplaceUnreachablePHIArgs(*PHI, BB); + }); } - } // Map to store the use counts DenseMap UseCounts; - for (CongruenceClass *CC : reverse(CongruenceClasses)) { + for (auto *CC : reverse(CongruenceClasses)) { + DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n"); // Track the equivalent store info so we can decide whether to try // dead store elimination. SmallVector PossibleDeadStores; @@ -3134,13 +3590,15 @@ bool NewGVN::eliminateInstructions(Function &F) { continue; // Everything still in the TOP class is unreachable or dead. if (CC == TOPClass) { -#ifndef NDEBUG - for (auto M : *CC) + for (auto M : *CC) { + auto *VTE = ValueToExpression.lookup(M); + if (VTE && isa(VTE)) + markInstructionForDeletion(cast(M)); assert((!ReachableBlocks.count(cast(M)->getParent()) || InstructionsToErase.count(cast(M))) && "Everything in TOP should be unreachable or dead at this " "point"); -#endif + } continue; } @@ -3170,10 +3628,8 @@ bool NewGVN::eliminateInstructions(Function &F) { } CC->swap(MembersLeft); } else { - DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() - << "\n"); // If this is a singleton, we can skip it. - if (CC->size() != 1) { + if (CC->size() != 1 || RealToTemp.lookup(Leader)) { // This is a stack because equality replacement/etc may place // constants in the middle of the member list, and we want to use // those constant values in preference to the current leader, over @@ -3195,6 +3651,22 @@ bool NewGVN::eliminateInstructions(Function &F) { // We ignore void things because we can't get a value from them. if (Def && Def->getType()->isVoidTy()) continue; + auto *DefInst = dyn_cast_or_null(Def); + if (DefInst && AllTempInstructions.count(DefInst)) { + auto *PN = cast(DefInst); + + // If this is a value phi and that's the expression we used, insert + // it into the program + // remove from temp instruction list. + AllTempInstructions.erase(PN); + auto *DefBlock = getBlockForValue(Def); + DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def + << " into block " + << getBlockName(getBlockForValue(Def)) << "\n"); + PN->insertBefore(&DefBlock->front()); + Def = PN; + NumGVNPHIOfOpsEliminations++; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -3279,6 +3751,10 @@ bool NewGVN::eliminateInstructions(Function &F) { Value *DominatingLeader = EliminationStack.back(); + auto *II = dyn_cast(DominatingLeader); + if (II && II->getIntrinsicID() == Intrinsic::ssa_copy) + DominatingLeader = II->getOperand(0); + // Don't replace our existing users with ourselves. if (U->get() == DominatingLeader) continue; @@ -3299,6 +3775,8 @@ bool NewGVN::eliminateInstructions(Function &F) { // It's about to be alive again. if (LeaderUseCount == 0 && isa(DominatingLeader)) ProbablyDead.erase(cast(DominatingLeader)); + if (LeaderUseCount == 0 && II) + ProbablyDead.insert(II); ++LeaderUseCount; AnythingReplaced = true; } @@ -3353,7 +3831,6 @@ bool NewGVN::eliminateInstructions(Function &F) { } } } - return AnythingReplaced; } @@ -3363,19 +3840,23 @@ bool NewGVN::eliminateInstructions(Function &F) { // we will simplify an operation with all constants so that it doesn't matter // what order they appear in. unsigned int NewGVN::getRank(const Value *V) const { - // Prefer undef to anything else + // Prefer constants to undef to anything else + // Undef is a constant, have to check it first. + // Prefer smaller constants to constantexprs + if (isa(V)) + return 2; if (isa(V)) - return 0; - if (isa(V)) return 1; + if (isa(V)) + return 0; else if (auto *A = dyn_cast(V)) - return 2 + A->getArgNo(); + return 3 + A->getArgNo(); // Need to shift the instruction DFS by number of arguments + 3 to account for // the constant and argument ranking above. unsigned Result = InstrToDFSNum(V); if (Result > 0) - return 3 + NumFuncArgs + Result; + return 4 + NumFuncArgs + Result; // Unreachable or something else, just return a really large number. return ~0; } @@ -3389,6 +3870,7 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const { return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B); } +namespace { class NewGVNLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. @@ -3408,6 +3890,7 @@ class NewGVNLegacyPass : public FunctionPass { AU.addPreserved(); } }; +} // namespace bool NewGVNLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/Reassociate.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/Reassociate.cpp index ef29d4141600a..e235e5eb1a06a 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/Reassociate.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/Reassociate.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -106,11 +107,12 @@ XorOpnd::XorOpnd(Value *V) { I->getOpcode() == Instruction::And)) { Value *V0 = I->getOperand(0); Value *V1 = I->getOperand(1); - if (isa(V0)) + const APInt *C; + if (match(V0, PatternMatch::m_APInt(C))) std::swap(V0, V1); - if (ConstantInt *C = dyn_cast(V1)) { - ConstPart = C->getValue(); + if (match(V1, PatternMatch::m_APInt(C))) { + ConstPart = *C; SymbolicPart = V0; isOr = (I->getOpcode() == Instruction::Or); return; @@ -119,7 +121,7 @@ XorOpnd::XorOpnd(Value *V) { // view the operand as "V | 0" SymbolicPart = V; - ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth()); + ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits()); isOr = true; } @@ -955,8 +957,8 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) { /// Scan backwards and forwards among values with the same rank as element i /// to see if X exists. If X does not exist, return i. This is useful when /// scanning for 'x' when we see '-x' because they both get the same rank. -static unsigned FindInOperandList(SmallVectorImpl &Ops, unsigned i, - Value *X) { +static unsigned FindInOperandList(const SmallVectorImpl &Ops, + unsigned i, Value *X) { unsigned XRank = Ops[i].Rank; unsigned e = Ops.size(); for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) { @@ -1134,20 +1136,19 @@ static Value *OptimizeAndOrXor(unsigned Opcode, /// instruction. There are two special cases: 1) if the constant operand is 0, /// it will return NULL. 2) if the constant is ~0, the symbolic operand will /// be returned. -static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, +static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd) { - if (ConstOpnd != 0) { - if (!ConstOpnd.isAllOnesValue()) { - LLVMContext &Ctx = Opnd->getType()->getContext(); - Instruction *I; - I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd), - "and.ra", InsertBefore); - I->setDebugLoc(InsertBefore->getDebugLoc()); - return I; - } + if (ConstOpnd.isNullValue()) + return nullptr; + + if (ConstOpnd.isAllOnesValue()) return Opnd; - } - return nullptr; + + Instruction *I = BinaryOperator::CreateAnd( + Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra", + InsertBefore); + I->setDebugLoc(InsertBefore->getDebugLoc()); + return I; } // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" @@ -1163,24 +1164,24 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, // = ((x | c1) ^ c1) ^ (c1 ^ c2) // = (x & ~c1) ^ (c1 ^ c2) // It is useful only when c1 == c2. - if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) { - if (!Opnd1->getValue()->hasOneUse()) - return false; + if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue()) + return false; - const APInt &C1 = Opnd1->getConstPart(); - if (C1 != ConstOpnd) - return false; + if (!Opnd1->getValue()->hasOneUse()) + return false; - Value *X = Opnd1->getSymbolicPart(); - Res = createAndInstr(I, X, ~C1); - // ConstOpnd was C2, now C1 ^ C2. - ConstOpnd ^= C1; + const APInt &C1 = Opnd1->getConstPart(); + if (C1 != ConstOpnd) + return false; - if (Instruction *T = dyn_cast(Opnd1->getValue())) - RedoInsts.insert(T); - return true; - } - return false; + Value *X = Opnd1->getSymbolicPart(); + Res = createAndInstr(I, X, ~C1); + // ConstOpnd was C2, now C1 ^ C2. + ConstOpnd ^= C1; + + if (Instruction *T = dyn_cast(Opnd1->getValue())) + RedoInsts.insert(T); + return true; } @@ -1221,8 +1222,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt C3((~C1) ^ C2); // Do not increase code size! - if (C3 != 0 && !C3.isAllOnesValue()) { - int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (!C3.isNullValue() && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; if (NewInstNum > DeadInstNum) return false; } @@ -1238,8 +1239,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt C3 = C1 ^ C2; // Do not increase code size - if (C3 != 0 && !C3.isAllOnesValue()) { - int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (!C3.isNullValue() && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; if (NewInstNum > DeadInstNum) return false; } @@ -1279,17 +1280,20 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, SmallVector Opnds; SmallVector OpndPtrs; Type *Ty = Ops[0].Op->getType(); - APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); + APInt ConstOpnd(Ty->getScalarSizeInBits(), 0); // Step 1: Convert ValueEntry to XorOpnd for (unsigned i = 0, e = Ops.size(); i != e; ++i) { Value *V = Ops[i].Op; - if (!isa(V)) { + const APInt *C; + // TODO: Support non-splat vectors. + if (match(V, PatternMatch::m_APInt(C))) { + ConstOpnd ^= *C; + } else { XorOpnd O(V); O.setSymbolicRank(getRank(O.getSymbolicPart())); Opnds.push_back(O); - } else - ConstOpnd ^= cast(V)->getValue(); + } } // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds". @@ -1327,7 +1331,8 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, Value *CV; // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd" - if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { + if (!ConstOpnd.isNullValue() && + CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { Changed = true; if (CV) *CurrOpnd = XorOpnd(CV); @@ -1369,17 +1374,17 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, ValueEntry VE(getRank(O.getValue()), O.getValue()); Ops.push_back(VE); } - if (ConstOpnd != 0) { - Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + if (!ConstOpnd.isNullValue()) { + Value *C = ConstantInt::get(Ty, ConstOpnd); ValueEntry VE(getRank(C), C); Ops.push_back(VE); } - int Sz = Ops.size(); + unsigned Sz = Ops.size(); if (Sz == 1) return Ops.back().Op; - else if (Sz == 0) { - assert(ConstOpnd == 0); - return ConstantInt::get(Ty->getContext(), ConstOpnd); + if (Sz == 0) { + assert(ConstOpnd.isNullValue()); + return ConstantInt::get(Ty, ConstOpnd); } } @@ -1582,7 +1587,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, } // No need for extra uses anymore. - delete DummyInst; + DummyInst->deleteValue(); unsigned NumAddedValues = NewMulOps.size(); Value *V = EmitAddTreeOfValues(I, NewMulOps); @@ -1627,8 +1632,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, /// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)] /// /// \returns Whether any factors have a power greater than one. -bool ReassociatePass::collectMultiplyFactors(SmallVectorImpl &Ops, - SmallVectorImpl &Factors) { +static bool collectMultiplyFactors(SmallVectorImpl &Ops, + SmallVectorImpl &Factors) { // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this. // Compute the sum of powers of simplifiable factors. unsigned FactorPowerSum = 0; @@ -1889,6 +1894,8 @@ void ReassociatePass::EraseInst(Instruction *I) { Op = Op->user_back(); RedoInsts.insert(Op); } + + MadeChange = true; } // Canonicalize expressions of the following form: @@ -1922,7 +1929,7 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) { // User must be a binary operator with one or more uses. Instruction *User = I->user_back(); - if (!isa(User) || !User->hasNUsesOrMore(1)) + if (!isa(User) || User->use_empty()) return nullptr; unsigned UserOpcode = User->getOpcode(); @@ -1934,6 +1941,12 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) { if (!User->isCommutative() && User->getOperand(1) != I) return nullptr; + // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the + // resulting subtract will be broken up later. This can get us into an + // infinite loop during reassociation. + if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User)) + return nullptr; + // Change the sign of the constant. APFloat Val = CF->getValueAPF(); Val.changeSign(); @@ -1999,11 +2012,6 @@ void ReassociatePass::OptimizeInst(Instruction *I) { if (I->isCommutative()) canonicalizeOperands(I); - // TODO: We should optimize vector Xor instructions, but they are - // currently unsupported. - if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor) - return; - // Don't optimize floating point instructions that don't have unsafe algebra. if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra()) return; @@ -2146,7 +2154,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { if (I->getOpcode() == Instruction::Mul && cast(I->user_back())->getOpcode() == Instruction::Add && isa(Ops.back().Op) && - cast(Ops.back().Op)->isAllOnesValue()) { + cast(Ops.back().Op)->isMinusOne()) { ValueEntry Tmp = Ops.pop_back_val(); Ops.insert(Ops.begin(), Tmp); } else if (I->getOpcode() == Instruction::FMul && diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/Reg2Mem.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/Reg2Mem.cpp index 615029dd161bb..96295683314cf 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/Reg2Mem.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -25,6 +24,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 77b2bd84f9b61..f19d45329d238 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -7,20 +7,19 @@ // //===----------------------------------------------------------------------===// // -// Rewrite an existing set of gc.statepoints such that they make potential -// relocations performed by the garbage collector explicit in the IR. +// Rewrite call/invoke instructions so as to make potential relocations +// performed by the garbage collector explicit in the IR. // //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -28,15 +27,16 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -89,10 +89,10 @@ struct RewriteStatepointsForGC : public ModulePass { Changed |= runOnFunction(F); if (Changed) { - // stripNonValidAttributes asserts that shouldRewriteStatepointsIn + // stripNonValidAttributesAndMetadata asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripNonValidAttributes(M); + stripNonValidAttributesAndMetadata(M); } return Changed; @@ -105,20 +105,24 @@ struct RewriteStatepointsForGC : public ModulePass { AU.addRequired(); } - /// The IR fed into RewriteStatepointsForGC may have had attributes implying - /// dereferenceability that are no longer valid/correct after - /// RewriteStatepointsForGC has run. This is because semantically, after + /// The IR fed into RewriteStatepointsForGC may have had attributes and + /// metadata implying dereferenceability that are no longer valid/correct after + /// RewriteStatepointsForGC has run. This is because semantically, after /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire - /// heap. stripNonValidAttributes (conservatively) restores correctness - /// by erasing all attributes in the module that externally imply - /// dereferenceability. - /// Similar reasoning also applies to the noalias attributes. gc.statepoint - /// can touch the entire heap including noalias objects. - void stripNonValidAttributes(Module &M); - - // Helpers for stripNonValidAttributes - void stripNonValidAttributesFromBody(Function &F); + /// heap. stripNonValidAttributesAndMetadata (conservatively) restores + /// correctness by erasing all attributes in the module that externally imply + /// dereferenceability. Similar reasoning also applies to the noalias + /// attributes and metadata. gc.statepoint can touch the entire heap including + /// noalias objects. + void stripNonValidAttributesAndMetadata(Module &M); + + // Helpers for stripNonValidAttributesAndMetadata + void stripNonValidAttributesAndMetadataFromBody(Function &F); void stripNonValidAttributesFromPrototype(Function &F); + // Certain metadata on instructions are invalid after running RS4GC. + // Optimizations that run after RS4GC can incorrectly use this metadata to + // optimize functions. We drop such metadata on the instruction. + void stripInvalidMetadataFromInstruction(Instruction &I); }; } // namespace @@ -1950,7 +1954,7 @@ static void rematerializeLiveValues(CallSite CS, // to identify the newly generated AlternateRootPhi (.base version of phi) // and RootOfChain (the original phi node itself) are the same, so that we // can rematerialize the gep and casts. This is a workaround for the - // deficieny in the findBasePointer algorithm. + // deficiency in the findBasePointer algorithm. if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi)) continue; // Now that the phi nodes are proved to be the same, assert that @@ -1990,7 +1994,7 @@ static void rematerializeLiveValues(CallSite CS, Instruction *LastClonedValue = nullptr; Instruction *LastValue = nullptr; for (Instruction *Instr: ChainToBase) { - // Only GEP's and casts are suported as we need to be careful to not + // Only GEP's and casts are supported as we need to be careful to not // introduce any new uses of pointers not in the liveset. // Note that it's fine to introduce new uses of pointers which were // otherwise not used after this statepoint. @@ -2094,9 +2098,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // live in the IR. We'll remove all of these when done. SmallVector Holders; - // Insert a dummy call with all of the arguments to the vm_state we'll need - // for the actual safepoint insertion. This ensures reference arguments in - // the deopt argument list are considered live through the safepoint (and + // Insert a dummy call with all of the deopt operands we'll need for the + // actual safepoint insertion as arguments. This ensures reference operands + // in the deopt argument list are considered live through the safepoint (and // thus makes sure they get relocated.) for (CallSite CS : ToUpdate) { SmallVector DeoptValues; @@ -2306,13 +2310,44 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); } -void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { +void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) { + + if (!isa(I) && !isa(I)) + return; + // These are the attributes that are still valid on loads and stores after + // RS4GC. + // The metadata implying dereferenceability and noalias are (conservatively) + // dropped. This is because semantically, after RewriteStatepointsForGC runs, + // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can + // touch the entire heap including noalias objects. Note: The reasoning is + // same as stripping the dereferenceability and noalias attributes that are + // analogous to the metadata counterparts. + // We also drop the invariant.load metadata on the load because that metadata + // implies the address operand to the load points to memory that is never + // changed once it became dereferenceable. This is no longer true after RS4GC. + // Similar reasoning applies to invariant.group metadata, which applies to + // loads within a group. + unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_alias_scope, + LLVMContext::MD_nontemporal, + LLVMContext::MD_nonnull, + LLVMContext::MD_align, + LLVMContext::MD_type}; + + // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC. + I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); + +} + +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) { if (F.empty()) return; LLVMContext &Ctx = F.getContext(); MDBuilder Builder(Ctx); + for (Instruction &I : instructions(F)) { if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); @@ -2333,6 +2368,8 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); } + stripInvalidMetadataFromInstruction(I); + if (CallSite CS = CallSite(&I)) { for (int i = 0, e = CS.arg_size(); i != e; i++) if (isa(CS.getArgument(i)->getType())) @@ -2357,7 +2394,7 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } -void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) { #ifndef NDEBUG assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!"); #endif @@ -2366,7 +2403,7 @@ void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { stripNonValidAttributesFromPrototype(F); for (Function &F : M) - stripNonValidAttributesFromBody(F); + stripNonValidAttributesAndMetadataFromBody(F); } bool RewriteStatepointsForGC::runOnFunction(Function &F) { diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/SCCP.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/SCCP.cpp index 8908dae2f5459..4822cf7cce0fe 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/SCCP.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/SCCP.cpp @@ -515,10 +515,6 @@ class SCCPSolver : public InstVisitor { void visitCmpInst(CmpInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); - void visitLandingPadInst(LandingPadInst &I) { markOverdefined(&I); } - void visitFuncletPadInst(FuncletPadInst &FPI) { - markOverdefined(&FPI); - } void visitCatchSwitchInst(CatchSwitchInst &CPI) { markOverdefined(&CPI); visitTerminatorInst(CPI); @@ -539,17 +535,11 @@ class SCCPSolver : public InstVisitor { void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitFenceInst (FenceInst &I) { /*returns void*/ } - void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { - markOverdefined(&I); - } - void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } - void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVAArgInst (Instruction &I) { markOverdefined(&I); } - void visitInstruction(Instruction &I) { - // If a new instruction is added to LLVM that we don't handle. + // All the instructions we don't do any special handling for just + // go to overdefined. DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n'); - markOverdefined(&I); // Just in case + markOverdefined(&I); } }; @@ -973,7 +963,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { } else { // X or -1 = -1 if (ConstantInt *CI = NonOverdefVal->getConstantInt()) - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return markConstant(IV, &I, NonOverdefVal->getConstant()); } } @@ -1117,7 +1107,7 @@ void SCCPSolver::visitCallSite(CallSite CS) { // Otherwise, if we have a single return value case, and if the function is // a declaration, maybe we can constant fold it. if (F && F->isDeclaration() && !I->getType()->isStructTy() && - canConstantFoldCallTo(F)) { + canConstantFoldCallTo(CS, F)) { SmallVector Operands; for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end(); @@ -1137,7 +1127,7 @@ void SCCPSolver::visitCallSite(CallSite CS) { // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands, TLI)) { + if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) { // call -> undef. if (isa(C)) return; @@ -1779,8 +1769,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. if (F.hasLocalLinkage()) { - if (AddressIsTaken(&F)) + if (F.hasAddressTaken()) { AddressTakenFunctions.insert(&F); + } else { Solver.AddArgumentTrackedFunction(&F); continue; @@ -1799,7 +1790,8 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, // variables that do not have their 'addresses taken'. If they don't have // their addresses taken, we can propagate constants through them. for (GlobalVariable &G : M.globals()) - if (!G.isConstant() && G.hasLocalLinkage() && !AddressIsTaken(&G)) + if (!G.isConstant() && G.hasLocalLinkage() && + G.hasDefinitiveInitializer() && !AddressIsTaken(&G)) Solver.TrackValueOfGlobalVariable(&G); // Solve for constants. @@ -1824,15 +1816,11 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, if (F.isDeclaration()) continue; - if (Solver.isBlockExecutable(&F.front())) { + if (Solver.isBlockExecutable(&F.front())) for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E; - ++AI) { - if (AI->use_empty()) - continue; - if (tryToReplaceWithConstant(Solver, &*AI)) + ++AI) + if (!AI->use_empty() && tryToReplaceWithConstant(Solver, &*AI)) ++IPNumArgsElimed; - } - } for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!Solver.isBlockExecutable(&*BB)) { diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/SROA.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/SROA.cpp index 1d9beffaf06bf..b9cee5b2ba956 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/SROA.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/SROA.cpp @@ -326,7 +326,7 @@ class llvm::sroa::Partition { /// partition. uint64_t BeginOffset, EndOffset; - /// \brief The start end end iterators of this partition. + /// \brief The start and end iterators of this partition. iterator SI, SJ; /// \brief A collection of split slice tails overlapping the partition. @@ -1252,7 +1252,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { if (!LI || !LI->isSimple()) return false; - // Both operands to the select need to be dereferencable, either + // Both operands to the select need to be dereferenceable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI)) @@ -1637,8 +1637,17 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { return cast(NewTy)->getPointerAddressSpace() == cast(OldTy)->getPointerAddressSpace(); } - if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) - return true; + + // We can convert integers to integral pointers, but not to non-integral + // pointers. + if (OldTy->isIntegerTy()) + return !DL.isNonIntegralPointerType(NewTy); + + // We can convert integral pointers to integers, but non-integral pointers + // need to remain pointers. + if (!DL.isNonIntegralPointerType(OldTy)) + return NewTy->isIntegerTy(); + return false; } @@ -1664,8 +1673,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // See if we need inttoptr for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isIntegerTy() && - NewTy->getScalarType()->isPointerTy()) { + if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), @@ -1681,8 +1689,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // See if we need ptrtoint for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isPointerTy() && - NewTy->getScalarType()->isIntegerTy()) { + if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) { // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), @@ -2391,11 +2398,22 @@ class llvm::sroa::AllocaSliceRewriter LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); + + // Any !nonnull metadata or !range metadata on the old load is also valid + // on the new load. This is even true in some cases even when the loads + // are different types, for example by mapping !nonnull metadata to + // !range metadata by modeling the null pointer constant converted to the + // integer type. + // FIXME: Add support for range metadata here. Currently the utilities + // for this don't propagate range metadata in trivial cases from one + // integer load to another, don't handle non-addrspace-0 null pointers + // correctly, and don't have any support for mapping ranges as the + // integer type becomes winder or narrower. + if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull)) + copyNonnullMetadata(LI, N, *NewLI); // Try to preserve nonnull metadata - if (TargetTy->isPointerTy()) - NewLI->copyMetadata(LI, LLVMContext::MD_nonnull); V = NewLI; // If this is an integer load past the end of the slice (which means the @@ -2415,7 +2433,7 @@ class llvm::sroa::AllocaSliceRewriter getSliceAlign(TargetTy), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); V = NewLI; IsPtrAdjusted = true; @@ -2443,7 +2461,7 @@ class llvm::sroa::AllocaSliceRewriter "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); - delete Placeholder; + Placeholder->deleteValue(); } else { LI.replaceAllUsesWith(V); } @@ -2558,7 +2576,7 @@ class llvm::sroa::AllocaSliceRewriter } NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); if (SI.isVolatile()) - NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope()); + NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); @@ -3571,10 +3589,11 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); + auto AS = LI->getPointerAddressSpace(); + auto *PartPtrTy = PartTy->getPointerTo(AS); LoadInst *PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, BasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), + APInt(DL.getPointerSizeInBits(AS), PartOffset), PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); @@ -3626,10 +3645,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { auto *PartPtrTy = PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); @@ -3698,7 +3719,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); + auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); + auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. LoadInst *PLoad; @@ -3706,20 +3728,23 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PLoad = (*SplitLoads)[Idx]; } else { IRB.SetInsertPoint(LI); + auto AS = LI->getPointerAddressSpace(); PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, LoadBasePtr->getName() + "."), + APInt(DL.getPointerSizeInBits(AS), PartOffset), + LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); } // And store this partition. IRB.SetInsertPoint(SI); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. @@ -3898,8 +3923,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, } NumAllocaPartitionUses += NumUses; - MaxUsesPerAllocaPartition = - std::max(NumUses, MaxUsesPerAllocaPartition); + MaxUsesPerAllocaPartition.updateMax(NumUses); // Now that we've processed all the slices in the new partition, check if any // PHIs or Selects would block promotion. @@ -4016,8 +4040,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } NumAllocaPartitions += NumPartitions; - MaxPartitionsPerAlloca = - std::max(NumPartitions, MaxPartitionsPerAlloca); + MaxPartitionsPerAlloca.updateMax(NumPartitions); // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/Scalar.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/Scalar.cpp index 52201d8f3e51a..ce6f93eb0c15f 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/Scalar.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/Scalar.cpp @@ -20,12 +20,12 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" using namespace llvm; @@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); + initializeGVNSinkLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); @@ -90,7 +91,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSeparateConstOffsetFromGEPPass(Registry); initializeSpeculativeExecutionLegacyPassPass(Registry); initializeStraightLineStrengthReducePass(Registry); - initializeLoadCombinePass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); initializeFloat2IntLegacyPassPass(Registry); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/Scalarizer.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/Scalarizer.cpp index c0c09a7e43fe9..d11855f2f3a93 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/Scalarizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index cde659b9d189f..84675f41cdd5e 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -156,27 +156,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 4f608c97147db..aaab5857e0f10 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1,4 +1,4 @@ -//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===// +//===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// // // The LLVM Compiler Infrastructure // @@ -8,24 +8,40 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include +#include #define DEBUG_TYPE "simple-loop-unswitch" @@ -107,11 +123,62 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, // exit block. DT.changeImmediateDominator(UnswitchedNode, OldPHNode); - // Blocks reachable from the unswitched block may need to change their IDom - // as well. + // For everything that moves up the dominator tree, we need to examine the + // dominator frontier to see if it additionally should move up the dominator + // tree. This lambda appends the dominator frontier for a node on the + // worklist. + // + // Note that we don't currently use the IDFCalculator here for two reasons: + // 1) It computes dominator tree levels for the entire function on each run + // of 'compute'. While this isn't terrible, given that we expect to update + // relatively small subtrees of the domtree, it isn't necessarily the right + // tradeoff. + // 2) The interface doesn't fit this usage well. It doesn't operate in + // append-only, and builds several sets that we don't need. + // + // FIXME: Neither of these issues are a big deal and could be addressed with + // some amount of refactoring of IDFCalculator. That would allow us to share + // the core logic here (which is solving the same core problem). SmallSetVector Worklist; - for (auto *SuccBB : successors(UnswitchedBB)) - Worklist.insert(SuccBB); + SmallVector DomNodes; + SmallPtrSet DomSet; + auto AppendDomFrontier = [&](DomTreeNode *Node) { + assert(DomNodes.empty() && "Must start with no dominator nodes."); + assert(DomSet.empty() && "Must start with an empty dominator set."); + + // First flatten this subtree into sequence of nodes by doing a pre-order + // walk. + DomNodes.push_back(Node); + // We intentionally re-evaluate the size as each node can add new children. + // Because this is a tree walk, this cannot add any duplicates. + for (int i = 0; i < (int)DomNodes.size(); ++i) + DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end()); + + // Now create a set of the basic blocks so we can quickly test for + // dominated successors. We could in theory use the DFS numbers of the + // dominator tree for this, but we want this to remain predictably fast + // even while we mutate the dominator tree in ways that would invalidate + // the DFS numbering. + for (DomTreeNode *InnerN : DomNodes) + DomSet.insert(InnerN->getBlock()); + + // Now re-walk the nodes, appending every successor of every node that isn't + // in the set. Note that we don't append the node itself, even though if it + // is a successor it does not strictly dominate itself and thus it would be + // part of the dominance frontier. The reason we don't append it is that + // the node passed in came *from* the worklist and so it has already been + // processed. + for (DomTreeNode *InnerN : DomNodes) + for (BasicBlock *SuccBB : successors(InnerN->getBlock())) + if (!DomSet.count(SuccBB)) + Worklist.insert(SuccBB); + + DomNodes.clear(); + DomSet.clear(); + }; + + // Append the initial dom frontier nodes. + AppendDomFrontier(UnswitchedNode); // Walk the worklist. We grow the list in the loop and so must recompute size. for (int i = 0; i < (int)Worklist.size(); ++i) { @@ -120,20 +187,17 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, DomTreeNode *Node = DT[BB]; assert(!DomChain.count(Node) && "Cannot be dominated by a block you can reach!"); - // If this block doesn't have an immediate dominator somewhere in the chain - // we hoisted over, then its position in the domtree hasn't changed. Either - // it is above the region hoisted and still valid, or it is below the - // hoisted block and so was trivially updated. This also applies to - // everything reachable from this block so we're completely done with the - // it. + + // If this block had an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree needs to move as it is + // reachable from a node hoisted over this chain. if (!DomChain.count(Node->getIDom())) continue; - // We need to change the IDom for this node but also walk its successors - // which could have similar dominance position. DT.changeImmediateDominator(Node, OldPHNode); - for (auto *SuccBB : successors(BB)) - Worklist.insert(SuccBB); + + // Now add this node's dominator frontier to the worklist as well. + AppendDomFrontier(Node); } } @@ -174,7 +238,7 @@ static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, // When the loop exit is directly unswitched we just need to update the // incoming basic block. We loop to handle weird cases with repeated // incoming blocks, but expect to typically only have one operand here. - for (auto i : llvm::seq(0, PN->getNumOperands())) { + for (auto i : seq(0, PN->getNumOperands())) { assert(PN->getIncomingBlock(i) == &OldExitingBB && "Found incoming block different from unique predecessor!"); PN->setIncomingBlock(i, &OldPH); @@ -688,9 +752,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, } namespace { + class SimpleLoopUnswitchLegacyPass : public LoopPass { public: static char ID; // Pass ID, replacement for typeid + explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { initializeSimpleLoopUnswitchLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -703,7 +769,8 @@ class SimpleLoopUnswitchLegacyPass : public LoopPass { getLoopAnalysisUsage(AU); } }; -} // namespace + +} // end anonymous namespace bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipLoop(L)) diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/Sink.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/Sink.cpp index 102e9eaeab772..5210f165b8742 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/Sink.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/Sink.cpp @@ -114,7 +114,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!isSafeToSpeculativelyExecute(Inst)) + if (isa(Inst)) return false; // We don't want to sink across a critical edge if we don't dominate the diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 2be3f5c533b9a..8b8d6590aa6a0 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -693,7 +693,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { UnlinkedInst->setOperand(I, nullptr); RecursivelyDeleteTriviallyDeadInstructions(Op); } - delete UnlinkedInst; + UnlinkedInst->deleteValue(); } bool Ret = !UnlinkedInstructions.empty(); UnlinkedInstructions.clear(); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/StructurizeCFG.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/StructurizeCFG.cpp index 49ce0262c97b0..0cccb415efdb1 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SCCIterator.h" @@ -20,6 +19,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -329,7 +329,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { Loops[Exit] = N->getEntry(); } else { - // Test for sucessors as back edge + // Test for successors as back edge BasicBlock *BB = N->getNodeAs(); BranchInst *Term = cast(BB->getTerminator()); diff --git a/interpreter/llvm/src/lib/Transforms/Scalar/TailRecursionElimination.cpp b/interpreter/llvm/src/lib/Transforms/Scalar/TailRecursionElimination.cpp index bf54a51c7635e..90c5c243f4648 100644 --- a/interpreter/llvm/src/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/interpreter/llvm/src/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -51,13 +51,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/TailRecursionElimination.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -69,6 +68,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" @@ -76,6 +76,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -90,16 +91,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); /// If it contains any dynamic allocas, returns false. static bool canTRE(Function &F) { // Because of PR962, we don't TRE dynamic allocas. - for (auto &BB : F) { - for (auto &I : BB) { - if (AllocaInst *AI = dyn_cast(&I)) { - if (!AI->isStaticAlloca()) - return false; - } - } - } - - return true; + return llvm::all_of(instructions(F), [](Instruction &I) { + auto *AI = dyn_cast(&I); + return !AI || AI->isStaticAlloca(); + }); } namespace { @@ -321,7 +316,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls) { /// instruction from after the call to before the call, assuming that all /// instructions between the call and this instruction are movable. /// -static bool canMoveAboveCall(Instruction *I, CallInst *CI) { +static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { // FIXME: We can move load/store/call/free instructions above the call if the // call does not mod/ref the memory location being processed. if (I->mayHaveSideEffects()) // This also handles volatile loads. @@ -332,10 +327,10 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI) { if (CI->mayHaveSideEffects()) { // Non-volatile loads may be moved above a call with side effects if it // does not write to memory and the load provably won't trap. - // FIXME: Writes to memory only matter if they may alias the pointer + // Writes to memory only matter if they may alias the pointer // being loaded from. const DataLayout &DL = L->getModule()->getDataLayout(); - if (CI->mayWriteToMemory() || + if ((AA->getModRefInfo(CI, MemoryLocation::get(L)) & MRI_Mod) || !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getAlignment(), DL, L)) return false; @@ -492,10 +487,11 @@ static CallInst *findTRECandidate(Instruction *TI, return CI; } -static bool -eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl &ArgumentPHIs) { +static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVectorImpl &ArgumentPHIs, + AliasAnalysis *AA) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -515,7 +511,8 @@ eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, // Check that this is the case now. BasicBlock::iterator BBI(CI); for (++BBI; &*BBI != Ret; ++BBI) { - if (canMoveAboveCall(&*BBI, CI)) continue; + if (canMoveAboveCall(&*BBI, CI, AA)) + continue; // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed @@ -674,12 +671,17 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret, bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI, + AliasAnalysis *AA) { bool Change = false; + // Make sure this block is a trivial return block. + assert(BB->getFirstNonPHIOrDbg() == Ret && + "Trying to fold non-trivial return block"); + // If the return block contains nothing but the return and PHI's, // there might be an opportunity to duplicate the return in its - // predecessors and perform TRC there. Look for predecessors that end + // predecessors and perform TRE there. Look for predecessors that end // in unconditional branch and recursive call(s). SmallVector UncondBranchPreds; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { @@ -706,7 +708,7 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret, BB->eraseFromParent(); eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs); + ArgumentPHIs, AA); ++NumRetDuped; Change = true; } @@ -719,16 +721,18 @@ static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI, + AliasAnalysis *AA) { CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI); if (!CI) return false; return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs); + ArgumentPHIs, AA); } -static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) { +static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, + AliasAnalysis *AA) { if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") return false; @@ -763,11 +767,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall, TTI); + ArgumentPHIs, !CanTRETailMarkedCall, TTI, AA); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = - foldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall, TTI); + Change = foldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall, TTI, AA); MadeChange |= Change; } } @@ -797,6 +801,7 @@ struct TailCallElim : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addPreserved(); } @@ -805,7 +810,8 @@ struct TailCallElim : public FunctionPass { return false; return eliminateTailRecursion( - F, &getAnalysis().getTTI(F)); + F, &getAnalysis().getTTI(F), + &getAnalysis().getAAResults()); } }; } @@ -826,8 +832,9 @@ PreservedAnalyses TailCallElimPass::run(Function &F, FunctionAnalysisManager &AM) { TargetTransformInfo &TTI = AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); - bool Changed = eliminateTailRecursion(F, &TTI); + bool Changed = eliminateTailRecursion(F, &TTI, &AA); if (!Changed) return PreservedAnalyses::all(); diff --git a/interpreter/llvm/src/lib/Transforms/Utils/BuildLibCalls.cpp b/interpreter/llvm/src/lib/Transforms/Utils/BuildLibCalls.cpp index ebde1f9a17dd6..b60dfb4f3541d 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/BuildLibCalls.cpp @@ -116,6 +116,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_wcslen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strchr: diff --git a/interpreter/llvm/src/lib/Transforms/Utils/CMakeLists.txt b/interpreter/llvm/src/lib/Transforms/Utils/CMakeLists.txt index 7a21c03da221a..83bc05d0311ca 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Transforms/Utils/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_library(LLVMTransformUtils MetaRenamer.cpp ModuleUtils.cpp NameAnonGlobals.cpp + OrderedInstructions.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp diff --git a/interpreter/llvm/src/lib/Transforms/Utils/CloneFunction.cpp b/interpreter/llvm/src/lib/Transforms/Utils/CloneFunction.cpp index 4aa26fd14fee3..9c4e13903ed78 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/CloneFunction.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/CloneFunction.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" @@ -31,29 +30,39 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include using namespace llvm; /// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - ValueToValueMapTy &VMap, +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo) { + ClonedCodeInfo *CodeInfo, + DebugInfoFinder *DIFinder) { DenseMap Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - + Module *TheModule = F ? F->getParent() : nullptr; + // Loop over all instructions, and copy them over. for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { + + if (DIFinder && TheModule) { + if (auto *DDI = dyn_cast(II)) + DIFinder->processDeclare(*TheModule, DDI); + else if (auto *DVI = dyn_cast(II)) + DIFinder->processValue(*TheModule, DVI); + + if (auto DbgLoc = II->getDebugLoc()) + DIFinder->processLocation(*TheModule, DbgLoc.get()); + } + Instruction *NewInst = II->clone(); - if (F && F->getSubprogram()) - DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(), - F->getSubprogram(), Cache); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -122,31 +131,40 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), OldAttrs.getRetAttributes(), NewArgAttrs)); + bool MustCloneSP = + OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); + DISubprogram *SP = OldFunc->getSubprogram(); + if (SP) { + assert(!MustCloneSP || ModuleLevelChanges); + // Add mappings for some DebugInfo nodes that we don't want duplicated + // even if they're distinct. + auto &MD = VMap.MD(); + MD[SP->getUnit()].reset(SP->getUnit()); + MD[SP->getType()].reset(SP->getType()); + MD[SP->getFile()].reset(SP->getFile()); + // If we're not cloning into the same module, no need to clone the + // subprogram + if (!MustCloneSP) + MD[SP].reset(SP); + } + SmallVector, 1> MDs; OldFunc->getAllMetadata(MDs); for (auto MD : MDs) { - MDNode *NewMD; - bool MustCloneSP = - (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() && - OldFunc->getParent() == NewFunc->getParent()); - if (MustCloneSP) { - auto *SP = cast(MD.second); - NewMD = DISubprogram::getDistinct( - NewFunc->getContext(), SP->getScope(), SP->getName(), - NewFunc->getName(), SP->getFile(), SP->getLine(), SP->getType(), - SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(), - SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(), - SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(), - SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(), - SP->getVariables(), SP->getThrownTypes()); - } else - NewMD = - MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); - NewFunc->addMetadata(MD.first, *NewMD); + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); } + // When we remap instructions, we want to avoid duplicating inlined + // DISubprograms, so record all subprograms we find as we duplicate + // instructions and then freeze them in the MD map. + // We also record information about dbg.value and dbg.declare to avoid + // duplicating the types. + DebugInfoFinder DIFinder; + // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. @@ -156,7 +174,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, + SP ? &DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; @@ -178,6 +197,16 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } + for (DISubprogram *ISP : DIFinder.subprograms()) { + if (ISP != SP) { + VMap.MD()[ISP].reset(ISP); + } + } + + for (auto *Type : DIFinder.types()) { + VMap.MD()[Type].reset(Type); + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = @@ -226,7 +255,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, } SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "", + CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", CodeInfo); return NewF; @@ -312,12 +341,13 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. - if (Value *MappedV = VMap.lookup(V)) - V = MappedV; + if (NewFunc != OldFunc) + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; if (!NewInst->mayHaveSideEffects()) { VMap[&*II] = V; - delete NewInst; + NewInst->deleteValue(); continue; } } diff --git a/interpreter/llvm/src/lib/Transforms/Utils/CloneModule.cpp b/interpreter/llvm/src/lib/Transforms/Utils/CloneModule.cpp index 5444b752de829..e5392b53050d3 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/CloneModule.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/CloneModule.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-c/Core.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm-c/Core.h" using namespace llvm; static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { @@ -132,7 +132,8 @@ std::unique_ptr llvm::CloneModule( SmallVector, 1> MDs; I->getAllMetadata(MDs); for (auto MD : MDs) - GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); + GV->addMetadata(MD.first, + *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); copyComdat(GV, &*I); } diff --git a/interpreter/llvm/src/lib/Transforms/Utils/CmpInstAnalysis.cpp b/interpreter/llvm/src/lib/Transforms/Utils/CmpInstAnalysis.cpp index 9f4d9c7e39810..d9294c4993091 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -81,7 +81,7 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, break; case ICmpInst::ICMP_SGT: // X > -1 is equivalent to (X & SignMask) == 0. - if (!C->isAllOnesValue()) + if (!C->isMinusOne()) return false; Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/CodeExtractor.cpp b/interpreter/llvm/src/lib/Transforms/Utils/CodeExtractor.cpp index ed72099ec3ed6..1189714dfab10 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/CodeExtractor.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/CodeExtractor.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" @@ -58,6 +59,33 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { // Landing pads must be in the function where they were inserted for cleanup. if (BB.isEHPad()) return false; + // taking the address of a basic block moved to another function is illegal + if (BB.hasAddressTaken()) + return false; + + // don't hoist code that uses another basicblock address, as it's likely to + // lead to unexpected behavior, like cross-function jumps + SmallPtrSet Visited; + SmallVector ToVisit; + + for (Instruction const &Inst : BB) + ToVisit.push_back(&Inst); + + while (!ToVisit.empty()) { + User const *Curr = ToVisit.pop_back_val(); + if (!Visited.insert(Curr).second) + continue; + if (isa(Curr)) + return false; // even a reference to self is likely to be not compatible + + if (isa(Curr) && cast(Curr)->getParent() != &BB) + continue; + + for (auto const &U : Curr->operands()) { + if (auto *UU = dyn_cast(U)) + ToVisit.push_back(UU); + } + } // Don't hoist code containing allocas, invokes, or vastarts. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { @@ -141,16 +169,255 @@ static bool definedInCaller(const SetVector &Blocks, Value *V) { return false; } -void CodeExtractor::findInputsOutputs(ValueSet &Inputs, - ValueSet &Outputs) const { +static BasicBlock *getCommonExitBlock(const SetVector &Blocks) { + BasicBlock *CommonExitBlock = nullptr; + auto hasNonCommonExitSucc = [&](BasicBlock *Block) { + for (auto *Succ : successors(Block)) { + // Internal edges, ok. + if (Blocks.count(Succ)) + continue; + if (!CommonExitBlock) { + CommonExitBlock = Succ; + continue; + } + if (CommonExitBlock == Succ) + continue; + + return true; + } + return false; + }; + + if (any_of(Blocks, hasNonCommonExitSucc)) + return nullptr; + + return CommonExitBlock; +} + +bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( + Instruction *Addr) const { + AllocaInst *AI = cast(Addr->stripInBoundsConstantOffsets()); + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + + if (isa(II)) + continue; + + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast(MemAddr)) + break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!dyn_cast(Base) || Base == AI) + return false; + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast(&II); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + break; + return false; + } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) + return false; + } + } + } + } + + return true; +} + +BasicBlock * +CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { + BasicBlock *SinglePredFromOutlineRegion = nullptr; + assert(!Blocks.count(CommonExitBlock) && + "Expect a block outside the region!"); + for (auto *Pred : predecessors(CommonExitBlock)) { + if (!Blocks.count(Pred)) + continue; + if (!SinglePredFromOutlineRegion) { + SinglePredFromOutlineRegion = Pred; + } else if (SinglePredFromOutlineRegion != Pred) { + SinglePredFromOutlineRegion = nullptr; + break; + } + } + + if (SinglePredFromOutlineRegion) + return SinglePredFromOutlineRegion; + +#ifndef NDEBUG + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; + // If there are any phi nodes, the single pred either exists or has already + // be created before code extraction. + assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); +#endif + + BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( + CommonExitBlock->getFirstNonPHI()->getIterator()); + + for (auto *Pred : predecessors(CommonExitBlock)) { + if (Blocks.count(Pred)) + continue; + Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); + } + // Now add the old exit block to the outline region. + Blocks.insert(CommonExitBlock); + return CommonExitBlock; +} + +void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + BasicBlock *&ExitBlock) const { + Function *Func = (*Blocks.begin())->getParent(); + ExitBlock = getCommonExitBlock(Blocks); + + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + auto *AI = dyn_cast(&II); + if (!AI) + continue; + + // Find the pair of life time markers for address 'Addr' that are either + // defined inside the outline region or can legally be shrinkwrapped into + // the outline region. If there are not other untracked uses of the + // address, return the pair of markers if found; otherwise return a pair + // of nullptr. + auto GetLifeTimeMarkers = + [&](Instruction *Addr, bool &SinkLifeStart, + bool &HoistLifeEnd) -> std::pair { + Instruction *LifeStart = nullptr, *LifeEnd = nullptr; + + for (User *U : Addr->users()) { + IntrinsicInst *IntrInst = dyn_cast(U); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { + // Do not handle the case where AI has multiple start markers. + if (LifeStart) + return std::make_pair(nullptr, nullptr); + LifeStart = IntrInst; + } + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { + if (LifeEnd) + return std::make_pair(nullptr, nullptr); + LifeEnd = IntrInst; + } + continue; + } + // Find untracked uses of the address, bail. + if (!definedInRegion(Blocks, U)) + return std::make_pair(nullptr, nullptr); + } + + if (!LifeStart || !LifeEnd) + return std::make_pair(nullptr, nullptr); + + SinkLifeStart = !definedInRegion(Blocks, LifeStart); + HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); + // Do legality Check. + if ((SinkLifeStart || HoistLifeEnd) && + !isLegalToShrinkwrapLifetimeMarkers(Addr)) + return std::make_pair(nullptr, nullptr); + + // Check to see if we have a place to do hoisting, if not, bail. + if (HoistLifeEnd && !ExitBlock) + return std::make_pair(nullptr, nullptr); + + return std::make_pair(LifeStart, LifeEnd); + }; + + bool SinkLifeStart = false, HoistLifeEnd = false; + auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); + + if (Markers.first) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); + SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); + continue; + } + + // Follow the bitcast. + Instruction *MarkerAddr = nullptr; + for (User *U : AI->users()) { + + if (U->stripInBoundsConstantOffsets() == AI) { + SinkLifeStart = false; + HoistLifeEnd = false; + Instruction *Bitcast = cast(U); + Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); + if (Markers.first) { + MarkerAddr = Bitcast; + continue; + } + } + + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + MarkerAddr = nullptr; + break; + } + } + + if (MarkerAddr) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); + if (!definedInRegion(Blocks, MarkerAddr)) + SinkCands.insert(MarkerAddr); + SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); + } + } + } +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &SinkCands) const { + for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) - if (definedInCaller(Blocks, *OI)) - Inputs.insert(*OI); + ++OI) { + Value *V = *OI; + if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + Inputs.insert(V); + } for (User *U : II.users()) if (!definedInRegion(Blocks, U)) { @@ -718,7 +985,8 @@ Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; - ValueSet inputs, outputs; + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; // Assumption: this is a single-entry code region, and the header is the first // block in the region. @@ -757,8 +1025,23 @@ Function *CodeExtractor::extractCodeRegion() { "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + findAllocas(SinkingCands, HoistingCands, CommonExit); + assert(HoistingCands.empty() || CommonExit); + // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs); + findInputsOutputs(inputs, outputs, SinkingCands); + + // Now sink all instructions which only have non-phi uses inside the region + for (auto *II : SinkingCands) + cast(II)->moveBefore(*newFuncRoot, + newFuncRoot->getFirstInsertionPt()); + + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast(II)->moveBefore(TI); + } // Calculate the exit blocks for the extracted region and the total exit // weights for each of those blocks. @@ -833,12 +1116,6 @@ Function *CodeExtractor::extractCodeRegion() { } } - //cerr << "NEW FUNCTION: " << *newFunction; - // verifyFunction(*newFunction); - - // cerr << "OLD FUNCTION: " << *oldFunction; - // verifyFunction(*oldFunction); - DEBUG(if (verifyFunction(*newFunction)) report_fatal_error("verifyFunction failed!")); return newFunction; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/DemoteRegToStack.cpp b/interpreter/llvm/src/lib/Transforms/Utils/DemoteRegToStack.cpp index 0eee6e19efac6..6d3d287defdb2 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/Evaluator.cpp b/interpreter/llvm/src/lib/Transforms/Utils/Evaluator.cpp index 59f176e2f231d..1328f2f3ec012 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/Evaluator.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/Evaluator.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -402,7 +402,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast(Ptr)) { Type *ElemTy = GV->getValueType(); - if (!Size->isAllOnesValue() && + if (!Size->isMinusOne() && Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); @@ -439,7 +439,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { + if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { InstResult = C; DEBUG(dbgs() << "Constant folded function call. Result: " << *InstResult << "\n"); diff --git a/interpreter/llvm/src/lib/Transforms/Utils/FlattenCFG.cpp b/interpreter/llvm/src/lib/Transforms/Utils/FlattenCFG.cpp index 7b96fbb11a142..435eff3bef47e 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/FlattenCFG.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/FlattenCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +18,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "flattencfg" diff --git a/interpreter/llvm/src/lib/Transforms/Utils/FunctionComparator.cpp b/interpreter/llvm/src/lib/Transforms/Utils/FunctionComparator.cpp index 73a0b2737e957..4a2be3a531767 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/FunctionComparator.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/FunctionComparator.cpp @@ -15,8 +15,8 @@ #include "llvm/Transforms/Utils/FunctionComparator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -76,12 +76,14 @@ int FunctionComparator::cmpMem(StringRef L, StringRef R) const { int FunctionComparator::cmpAttrs(const AttributeList L, const AttributeList R) const { - if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets())) return Res; - for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { - AttributeList::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), - RE = R.end(i); + for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) { + AttributeSet LAS = L.getAttributes(i); + AttributeSet RAS = R.getAttributes(i); + AttributeSet::iterator LI = LAS.begin(), LE = LAS.end(); + AttributeSet::iterator RI = RAS.begin(), RE = RAS.end(); for (; LI != LE && RI != RE; ++LI, ++RI) { Attribute LA = *LI; Attribute RA = *RI; @@ -511,8 +513,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(LI->getOrdering(), cast(R)->getOrdering())) return Res; - if (int Res = - cmpNumbers(LI->getSynchScope(), cast(R)->getSynchScope())) + if (int Res = cmpNumbers(LI->getSyncScopeID(), + cast(R)->getSyncScopeID())) return Res; return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), cast(R)->getMetadata(LLVMContext::MD_range)); @@ -527,7 +529,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(SI->getOrdering(), cast(R)->getOrdering())) return Res; - return cmpNumbers(SI->getSynchScope(), cast(R)->getSynchScope()); + return cmpNumbers(SI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const CmpInst *CI = dyn_cast(L)) return cmpNumbers(CI->getPredicate(), cast(R)->getPredicate()); @@ -582,7 +585,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(FI->getOrdering(), cast(R)->getOrdering())) return Res; - return cmpNumbers(FI->getSynchScope(), cast(R)->getSynchScope()); + return cmpNumbers(FI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const AtomicCmpXchgInst *CXI = dyn_cast(L)) { if (int Res = cmpNumbers(CXI->isVolatile(), @@ -599,8 +603,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, cmpOrderings(CXI->getFailureOrdering(), cast(R)->getFailureOrdering())) return Res; - return cmpNumbers(CXI->getSynchScope(), - cast(R)->getSynchScope()); + return cmpNumbers(CXI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const AtomicRMWInst *RMWI = dyn_cast(L)) { if (int Res = cmpNumbers(RMWI->getOperation(), @@ -612,8 +616,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(RMWI->getOrdering(), cast(R)->getOrdering())) return Res; - return cmpNumbers(RMWI->getSynchScope(), - cast(R)->getSynchScope()); + return cmpNumbers(RMWI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const PHINode *PNL = dyn_cast(L)) { const PHINode *PNR = cast(R); diff --git a/interpreter/llvm/src/lib/Transforms/Utils/FunctionImportUtils.cpp b/interpreter/llvm/src/lib/Transforms/Utils/FunctionImportUtils.cpp index b00f4b14068a2..a98d07237b474 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/GlobalStatus.cpp b/interpreter/llvm/src/lib/Transforms/Utils/GlobalStatus.cpp index ba4b78ac758a6..245fefb38ee88 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/GlobalStatus.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/GlobalStatus.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -18,7 +19,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/interpreter/llvm/src/lib/Transforms/Utils/InlineFunction.cpp b/interpreter/llvm/src/lib/Transforms/Utils/InlineFunction.cpp index 9cb4762b683c4..2a18c140c7886 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/InlineFunction.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/InlineFunction.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -28,13 +27,13 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -43,6 +42,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -1397,11 +1397,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const Optional &CalleeEntryCount, const Instruction *TheCall, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *CallerBFI) { if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) return; Optional CallSiteCount = - PSI ? PSI->getProfileCount(TheCall, nullptr) : None; + PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; uint64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, CalleeEntryCount.getValue()); @@ -1637,7 +1638,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CalledFunc->front()); updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, - IFI.PSI); + IFI.PSI, IFI.CallerBFI); // Update the profile count of callee. updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI); diff --git a/interpreter/llvm/src/lib/Transforms/Utils/InstructionNamer.cpp b/interpreter/llvm/src/lib/Transforms/Utils/InstructionNamer.cpp index 53b432fcafd4f..23ec45edb3efd 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/InstructionNamer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/InstructionNamer.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; namespace { diff --git a/interpreter/llvm/src/lib/Transforms/Utils/Local.cpp b/interpreter/llvm/src/lib/Transforms/Utils/Local.cpp index 608834b06debe..74610613001c6 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/Local.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/Local.cpp @@ -22,10 +22,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -1037,17 +1038,15 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); - KnownBits Known(BitWidth); - computeKnownBits(V, Known, DL, 0, AC, CxtI, DT); + KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT); unsigned TrailZ = Known.countMinTrailingZeros(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ); // LLVM doesn't support alignments larger than this currently. Align = std::min(Align, +Value::MaximumAlignment); @@ -1083,7 +1082,7 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, } /// See if there is a dbg.value intrinsic for DIVar for the PHI node. -static bool PhiHasDebugValue(DILocalVariable *DIVar, +static bool PhiHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, PHINode *APN) { // Since we can't guarantee that the original dbg.declare instrinsic @@ -1105,8 +1104,9 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { auto *DIVar = DDI->getVariable(); - auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); + auto *DIExpr = DDI->getExpression(); + Value *DV = SI->getOperand(0); // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1116,34 +1116,28 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast(SI->getOperand(0))) ExtendedArg = dyn_cast(SExt->getOperand(0)); if (ExtendedArg) { - // We're now only describing a subset of the variable. The fragment we're - // describing will always be smaller than the variable size, because - // VariableSize == Size of Alloca described by DDI. Since SI stores - // to the alloca described by DDI, if it's first operand is an extend, - // we're guaranteed that before extension, the value was narrower than - // the size of the alloca, hence the size of the described variable. - SmallVector Ops; - unsigned FragmentOffset = 0; - // If this already is a bit fragment, we drop the bit fragment from the - // expression and record the offset. - auto Fragment = DIExpr->getFragmentInfo(); - if (Fragment) { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); - FragmentOffset = Fragment->OffsetInBits; - } else { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + // If this DDI was already describing only a fragment of a variable, ensure + // that fragment is appropriately narrowed here. + // But if a fragment wasn't used, describe the value as the original + // argument (rather than the zext or sext) so that it remains described even + // if the sext/zext is optimized away. This widens the variable description, + // leaving it up to the consumer to know how the smaller value may be + // represented in a larger register. + if (auto Fragment = DIExpr->getFragmentInfo()) { + unsigned FragmentOffset = Fragment->OffsetInBits; + SmallVector Ops(DIExpr->elements_begin(), + DIExpr->elements_end() - 3); + Ops.push_back(dwarf::DW_OP_LLVM_fragment); + Ops.push_back(FragmentOffset); + const DataLayout &DL = DDI->getModule()->getDataLayout(); + Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); + DIExpr = Builder.createExpression(Ops); } - Ops.push_back(dwarf::DW_OP_LLVM_fragment); - Ops.push_back(FragmentOffset); - const DataLayout &DL = DDI->getModule()->getDataLayout(); - Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); - auto NewDIExpr = Builder.createExpression(Ops); - if (!LdStHasDebugValue(DIVar, NewDIExpr, SI)) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr, - DDI->getDebugLoc(), SI); - } else if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, - DDI->getDebugLoc(), SI); + DV = ExtendedArg; + } + if (!LdStHasDebugValue(DIVar, DIExpr, SI)) + Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(), + SI); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value @@ -1166,7 +1160,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DbgValue->insertAfter(LI); } -/// Inserts a llvm.dbg.value intrinsic after a phi +/// Inserts a llvm.dbg.value intrinsic after a phi /// that has an associated llvm.dbg.decl intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, PHINode *APN, DIBuilder &Builder) { @@ -1668,9 +1662,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { TI->eraseFromParent(); } -/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. +/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo +/// after modifying the CFG. bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -1749,12 +1744,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, // Preserve !invariant.group in K. break; case LLVMContext::MD_align: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; case LLVMContext::MD_dereferenceable: case LLVMContext::MD_dereferenceable_or_null: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; } @@ -1801,6 +1796,23 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To, return Count; } +unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) { + assert(From->getType() == To->getType()); + auto *BB = From->getParent(); + unsigned Count = 0; + + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *I = cast(U.getUser()); + if (I->getParent() == BB) + continue; + U.set(To); + ++Count; + } + return Count; +} + unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root) { @@ -1837,6 +1849,49 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { return false; } +void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, + LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + + // This only directly applies if the new type is also a pointer. + if (NewTy->isPointerTy()) { + NewLI.setMetadata(LLVMContext::MD_nonnull, N); + return; + } + + // The only other translation we can do is to integral loads with !range + // metadata. + if (!NewTy->isIntegerTy()) + return; + + MDBuilder MDB(NewLI.getContext()); + const Value *Ptr = OldLI.getPointerOperand(); + auto *ITy = cast(NewTy); + auto *NullInt = ConstantExpr::getPtrToInt( + ConstantPointerNull::get(cast(Ptr->getType())), ITy); + auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); + NewLI.setMetadata(LLVMContext::MD_range, + MDB.createRange(NonNullInt, NullInt)); +} + +void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, + MDNode *N, LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + + // Give up unless it is converted to a pointer where there is a single very + // valuable mapping we can do reliably. + // FIXME: It would be nice to propagate this in more ways, but the type + // conversions make it hard. + if (!NewTy->isPointerTy()) + return; + + unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLI.getContext(), None); + NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + } +} + namespace { /// A potential constituent of a bitreverse or bswap expression. See /// collectBitParts for a fuller explanation. @@ -1958,7 +2013,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, unsigned NumMaskedBits = AndMask.countPopulation(); if (!MatchBitReversals && NumMaskedBits % 8 != 0) return Result; - + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, MatchBitReversals, BPS); if (!Res) @@ -2099,3 +2154,57 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( !F->doesNotAccessMemory()) CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin); } + +bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: + // Can't handle inline asm. Skip it. + if (isa(ImmutableCallSite(I).getCalledValue())) + return false; + // Many arithmetic intrinsics have no issue taking a + // variable, however it's hard to distingish these from + // specials such as @llvm.frameaddress that require a constant. + if (isa(I)) + return false; + + // Constant bundle operands may need to retain their constant-ness for + // correctness. + if (ImmutableCallSite(I).isBundleOperand(OpIdx)) + return false; + return true; + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::Switch: + case Instruction::ExtractValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::InsertValue: + // All operands apart from the first and the second are constant. + return OpIdx < 2; + case Instruction::Alloca: + // Static allocas (constant size in the entry block) are handled by + // prologue/epilogue insertion so they're free anyway. We definitely don't + // want to make them non-constant. + return !dyn_cast(I)->isStaticAlloca(); + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = gep_type_begin(I); + for (auto E = std::next(It, OpIdx); It != E; ++It) + if (It.isStruct()) + return false; + return true; + } +} diff --git a/interpreter/llvm/src/lib/Transforms/Utils/LoopSimplify.cpp b/interpreter/llvm/src/lib/Transforms/Utils/LoopSimplify.cpp index 72c06aef80370..e21e34df8ded0 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/LoopSimplify.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/LoopSimplify.cpp @@ -38,15 +38,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -65,6 +64,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -72,7 +72,6 @@ using namespace llvm; #define DEBUG_TYPE "loop-simplify" -STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); // If the block isn't already, move the new block to right after some 'outside @@ -152,37 +151,6 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, return PreheaderBB; } -/// \brief Ensure that the loop preheader dominates all exit blocks. -/// -/// This method is used to split exit blocks that have predecessors outside of -/// the loop. -static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, - DominatorTree *DT, LoopInfo *LI, - bool PreserveLCSSA) { - SmallVector LoopBlocks; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { - BasicBlock *P = *I; - if (L->contains(P)) { - // Don't do this if the loop is exited via an indirect branch. - if (isa(P->getTerminator())) return nullptr; - - LoopBlocks.push_back(P); - } - } - - assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewExitBB = nullptr; - - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI, - PreserveLCSSA); - if (!NewExitBB) - return nullptr; - - DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " - << NewExitBB->getName() << "\n"); - return NewExitBB; -} - /// Add the specified block, and all of its predecessors, to the specified set, /// if it's not already in there. Stop predecessor traversal when we reach /// StopBlock. @@ -346,16 +314,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, // Split edges to exit blocks from the inner loop, if they emerged in the // process of separating the outer one. - SmallVector ExitBlocks; - L->getExitBlocks(ExitBlocks); - SmallSetVector ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - for (BasicBlock *ExitBlock : ExitBlockSet) { - if (any_of(predecessors(ExitBlock), - [L](BasicBlock *BB) { return !L->contains(BB); })) { - rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); - } - } + formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); if (PreserveLCSSA) { // Fix LCSSA form for L. Some values, which previously were only used inside @@ -563,29 +522,16 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); - if (Preheader) { - ++NumInserted; + if (Preheader) Changed = true; - } } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. - SmallVector ExitBlocks; - L->getExitBlocks(ExitBlocks); - - SmallSetVector ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - for (BasicBlock *ExitBlock : ExitBlockSet) { - if (any_of(predecessors(ExitBlock), - [L](BasicBlock *BB) { return !L->contains(BB); })) { - rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); - ++NumInserted; - Changed = true; - } - } + if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA)) + Changed = true; // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. @@ -614,10 +560,8 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, // insert a new block that all backedges target, then make it jump to the // loop header. LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); - if (LoopLatch) { - ++NumInserted; + if (LoopLatch) Changed = true; - } } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); @@ -645,7 +589,22 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. - if (ExitBlockSet.size() == 1) { + auto HasUniqueExitBlock = [&]() { + BasicBlock *UniqueExit = nullptr; + for (auto *ExitingBB : ExitingBlocks) + for (auto *SuccBB : successors(ExitingBB)) { + if (L->contains(SuccBB)) + continue; + + if (!UniqueExit) + UniqueExit = SuccBB; + else if (UniqueExit != SuccBB) + return false; + } + + return true; + }; + if (HasUniqueExitBlock()) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/LoopUnroll.cpp b/interpreter/llvm/src/lib/Transforms/Utils/LoopUnroll.cpp index 4ab4d7949d233..f2527f89e83e5 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/LoopUnroll.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -39,6 +38,7 @@ #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; #define DEBUG_TYPE "loop-unroll" diff --git a/interpreter/llvm/src/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/interpreter/llvm/src/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 391fde3b0b015..d43ce7abb7cd9 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,8 +21,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -37,6 +37,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" #include using namespace llvm; @@ -45,6 +47,10 @@ using namespace llvm; STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); +static cl::opt UnrollRuntimeMultiExit( + "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, + cl::desc("Allow runtime unrolling for loops with multiple exits, when " + "epilog is generated")); /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the @@ -60,9 +66,11 @@ STATISTIC(NumRuntimeUnrolled, /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, - BasicBlock *PrologExit, BasicBlock *PreHeader, - BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *PrologExit, + BasicBlock *OriginalLoopLatchExit, + BasicBlock *PreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast(VMap[Latch]); @@ -137,17 +145,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); - BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees - SmallVector Preds(predecessors(Exit)); - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + SmallVector Preds(predecessors(OriginalLoopLatchExit)); + SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); + B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) - DT->changeImmediateDominator(Exit, PrologExit); + DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -285,15 +291,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. -/// -static void CloneLoopBlocks(Loop *L, Value *NewIter, - const bool CreateRemainderLoop, - const bool UseEpilogRemainder, - BasicBlock *InsertTop, BasicBlock *InsertBot, - BasicBlock *Preheader, - std::vector &NewBlocks, - LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI) { +/// Return the new cloned loop that is created when CreateRemainderLoop is true. +static Loop * +CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, + const bool UseEpilogRemainder, BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Preheader, + std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -418,9 +422,56 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); + return NewLoop; } + else + return nullptr; } +/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits +/// is populated with all the loop exit blocks other than the LatchExit block. +static bool +canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl &OtherExits, + BasicBlock *LatchExit, bool PreserveLCSSA, + bool UseEpilogRemainder) { + + // Support runtime unrolling for multiple exit blocks and multiple exiting + // blocks. + if (!UnrollRuntimeMultiExit) + return false; + // Even if runtime multi exit is enabled, we currently have some correctness + // constrains in unrolling a multi-exit loop. + // We rely on LCSSA form being preserved when the exit blocks are transformed. + if (!PreserveLCSSA) + return false; + SmallVector Exits; + L->getUniqueExitBlocks(Exits); + for (auto *BB : Exits) + if (BB != LatchExit) + OtherExits.push_back(BB); + + // TODO: Support multiple exiting blocks jumping to the `LatchExit` when + // UnrollRuntimeMultiExit is true. This will need updating the logic in + // connectEpilog/connectProlog. + if (!LatchExit->getSinglePredecessor()) { + DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " + "predecessor.\n"); + return false; + } + // FIXME: We bail out of multi-exit unrolling when epilog loop is generated + // and L is an inner loop. This is because in presence of multiple exits, the + // outer loop is incorrect: we do not add the EpilogPreheader and exit to the + // outer loop. This is automatically handled in the prolog case, so we do not + // have that bug in prolog generation. + if (UseEpilogRemainder && L->getParentLoop()) + return false; + + // All constraints have been satisfied. + return true; +} + + + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -464,18 +515,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { - // for now, only unroll loops that contain a single exit - if (!L->getExitingBlock()) - return false; + DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); + DEBUG(L->dump()); - // Make sure the loop is in canonical form, and there is a single - // exit block only. - if (!L->isLoopSimplifyForm()) - return false; - BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop - if (!Exit) + // Make sure the loop is in canonical form. + if (!L->isLoopSimplifyForm()) { + DEBUG(dbgs() << "Not in simplify form!\n"); return false; + } + // Guaranteed by LoopSimplifyForm. + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + + BranchInst *LatchBR = cast(Latch->getTerminator()); + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be an exit block out of the loop. This needs + // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. + assert(!L->contains(LatchExit) && + "one of the loop latch successors should be the exit block!"); + // These are exit blocks other than the target of the latch exiting block. + SmallVector OtherExits; + bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop( + L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); + // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. + if (!isMultiExitUnrollingEnabled && + (!L->getExitingBlock() || OtherExits.size())) { + DEBUG( + dbgs() + << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " + "enabled!\n"); + return false; + } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -483,52 +556,52 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). - const SCEV *BECountSC = SE->getBackedgeTakenCount(L); + // We calculate the backedge count by using getExitCount on the Latch block, + // which is proven to be the only exiting block in this loop. This is same as + // calculating getBackedgeTakenCount on the loop (which computes SCEV for all + // exiting blocks). + const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa(BECountSC) || - !BECountSC->getType()->isIntegerTy()) + !BECountSC->getType()->isIntegerTy()) { + DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; + } unsigned BEWidth = cast(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); - if (isa(TripCountSC)) + if (isa(TripCountSC)) { + DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; + } - BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { + DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; + } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. - if (Log2_32(Count) > BEWidth) + if (Log2_32(Count) > BEWidth) { + DEBUG(dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); return false; + } - BasicBlock *Latch = L->getLoopLatch(); - - // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the - // targets of the Latch be the single exit block out of the loop. This needs - // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - BranchInst *LatchBR = cast(Latch->getTerminator()); - assert( - (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) && - "one of the loop latch successors should be " - "the exit block!"); - // Avoid warning of unused `LatchBR` variable in release builds. - (void)LatchBR; // Loop structure is the following: // // PreHeader // Header // ... // Latch - // Exit + // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; @@ -541,9 +614,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); - // Split Exit to create phi nodes from branch above. - SmallVector Preds(predecessors(Exit)); - NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", + // Split LatchExit to create phi nodes from branch above. + SmallVector Preds(predecessors(LatchExit)); + NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); @@ -570,7 +643,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Latch Header // *NewExit ... // *EpilogPreHeader Latch - // Exit Exit + // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. @@ -648,10 +721,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. - BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; + BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; - CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); + Loop *remainderLoop = CloneLoopBlocks( + L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, + NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), @@ -659,6 +733,66 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, NewBlocks[0]->getIterator(), F->end()); + // Now the loop blocks are cloned and the other exiting blocks from the + // remainder are connected to the original Loop's exit blocks. The remaining + // work is to update the phi nodes in the original loop, and take in the + // values from the cloned region. Also update the dominator info for + // OtherExits and their immediate successors, since we have new edges into + // OtherExits. + SmallSet ImmediateSuccessorsOfExitBlocks; + for (auto *BB : OtherExits) { + for (auto &II : *BB) { + + // Given we preserve LCSSA form, we know that the values used outside the + // loop will be used through these phi nodes at the exit blocks that are + // transformed below. + if (!isa(II)) + break; + PHINode *Phi = cast(&II); + unsigned oldNumOperands = Phi->getNumIncomingValues(); + // Add the incoming values from the remainder code to the end of the phi + // node. + for (unsigned i =0; i < oldNumOperands; i++){ + Value *newVal = VMap[Phi->getIncomingValue(i)]; + // newVal can be a constant or derived from values outside the loop, and + // hence need not have a VMap value. + if (!newVal) + newVal = Phi->getIncomingValue(i); + Phi->addIncoming(newVal, + cast(VMap[Phi->getIncomingBlock(i)])); + } + } +#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) + for (BasicBlock *SuccBB : successors(BB)) { + assert(!(any_of(OtherExits, + [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || + SuccBB == LatchExit) && + "Breaks the definition of dedicated exits!"); + } +#endif + // Update the dominator info because the immediate dominator is no longer the + // header of the original Loop. BB has edges both from L and remainder code. + // Since the preheader determines which loop is run (L or directly jump to + // the remainder code), we set the immediate dominator as the preheader. + if (DT) { + DT->changeImmediateDominator(BB, PreHeader); + // Also update the IDom for immediate successors of BB. If the current + // IDom is the header, update the IDom to be the preheader because that is + // the nearest common dominator of all predecessors of SuccBB. We need to + // check for IDom being the header because successors of exit blocks can + // have edges from outside the loop, and we should not incorrectly update + // the IDom in that case. + for (BasicBlock *SuccBB: successors(BB)) + if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { + if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { + assert(!SuccBB->getSinglePredecessor() && + "BB should be the IDom then!"); + DT->changeImmediateDominator(SuccBB, PreHeader); + } + } + } + } + // Loop structure should be the following: // Epilog Prolog // @@ -672,7 +806,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // EpilogHeader Header // ... ... // EpilogLatch Latch - // Exit Exit + // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. @@ -686,7 +820,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. - ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, + ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); @@ -712,8 +846,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, } else { // Connect the prolog code to the original loop and update the // PHI functions. - ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, - VMap, DT, LI, PreserveLCSSA); + ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the @@ -721,6 +855,19 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); + // Canonicalize to LoopSimplifyForm both original and remainder loops. We + // cannot rely on the LoopUnrollPass to do this because it only does + // canonicalization for parent/subloops and not the sibling loops. + if (OtherExits.size() > 0) { + // Generate dedicated exit blocks for the original loop, to preserve + // LoopSimplifyForm. + formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); + // Generate dedicated exit blocks for the remainder loop if one exists, to + // preserve LoopSimplifyForm. + if (remainderLoop) + formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); + } + NumRuntimeUnrolled++; return true; } diff --git a/interpreter/llvm/src/lib/Transforms/Utils/LoopUtils.cpp b/interpreter/llvm/src/lib/Transforms/Utils/LoopUtils.cpp index 81f033e7d51a2..3c522786641a1 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/LoopUtils.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/LoopUtils.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -30,6 +30,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -88,8 +89,7 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT // with a new integer type of the corresponding bit width. - if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), - m_And(m_APInt(M), m_Instruction(I))))) { + if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { int32_t Bits = (*M + 1).exactLogBase2(); if (Bits > 0) { RT = IntegerType::get(Phi->getContext(), Bits); @@ -528,8 +528,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, return false; } -bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, - DominatorTree *DT) { +bool RecurrenceDescriptor::isFirstOrderRecurrence( + PHINode *Phi, Loop *TheLoop, + DenseMap &SinkAfter, DominatorTree *DT) { // Ensure the phi node is in the loop header and has two incoming values. if (Phi->getParent() != TheLoop->getHeader() || @@ -551,12 +552,24 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, // Get the previous value. The previous value comes from the latch edge while // the initial value comes form the preheader edge. auto *Previous = dyn_cast(Phi->getIncomingValueForBlock(Latch)); - if (!Previous || !TheLoop->contains(Previous) || isa(Previous)) + if (!Previous || !TheLoop->contains(Previous) || isa(Previous) || + SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. return false; // Ensure every user of the phi node is dominated by the previous value. // The dominance requirement ensures the loop vectorizer will not need to // vectorize the initial value prior to the first iteration of the loop. + // TODO: Consider extending this sinking to handle other kinds of instructions + // and expressions, beyond sinking a single cast past Previous. + if (Phi->hasOneUse()) { + auto *I = Phi->user_back(); + if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && + DT->dominates(Previous, I->user_back())) { + SinkAfter[I] = Previous; + return true; + } + } + for (User *U : Phi->users()) if (auto *I = dyn_cast(U)) { if (!DT->dominates(Previous, I)) @@ -924,6 +937,69 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return true; } +bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + bool Changed = false; + + // We re-use a vector for the in-loop predecesosrs. + SmallVector InLoopPredecessors; + + auto RewriteExit = [&](BasicBlock *BB) { + assert(InLoopPredecessors.empty() && + "Must start with an empty predecessors list!"); + auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); + + // See if there are any non-loop predecessors of this exit block and + // keep track of the in-loop predecessors. + bool IsDedicatedExit = true; + for (auto *PredBB : predecessors(BB)) + if (L->contains(PredBB)) { + if (isa(PredBB->getTerminator())) + // We cannot rewrite exiting edges from an indirectbr. + return false; + + InLoopPredecessors.push_back(PredBB); + } else { + IsDedicatedExit = false; + } + + assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); + + // Nothing to do if this is already a dedicated exit. + if (IsDedicatedExit) + return false; + + auto *NewExitBB = SplitBlockPredecessors( + BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); + + if (!NewExitBB) + DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " + << *L << "\n"); + else + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); + return true; + }; + + // Walk the exit blocks directly rather than building up a data structure for + // them, but only visit each one once. + SmallPtrSet Visited; + for (auto *BB : L->blocks()) + for (auto *SuccBB : successors(BB)) { + // We're looking for exit blocks so skip in-loop successors. + if (L->contains(SuccBB)) + continue; + + // Visit each exit block exactly once. + if (!Visited.insert(SuccBB).second) + continue; + + Changed |= RewriteExit(SuccBB); + } + + return Changed; +} + /// \brief Returns the instructions that use values defined in the loop. SmallVector llvm::findDefsUsedOutsideOfLoop(Loop *L) { SmallVector UsedOutside; @@ -1300,16 +1376,21 @@ Value *llvm::createTargetReduction(IRBuilder<> &Builder, } } -void llvm::propagateIRFlags(Value *I, ArrayRef VL) { - if (auto *VecOp = dyn_cast(I)) { - if (auto *I0 = dyn_cast(VL[0])) { - // VecOVp is initialized to the 0th scalar, so start counting from index - // '1'. - VecOp->copyIRFlags(I0); - for (int i = 1, e = VL.size(); i < e; ++i) { - if (auto *Scalar = dyn_cast(VL[i])) - VecOp->andIRFlags(Scalar); - } - } +void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) { + auto *VecOp = dyn_cast(I); + if (!VecOp) + return; + auto *Intersection = (OpValue == nullptr) ? dyn_cast(VL[0]) + : dyn_cast(OpValue); + if (!Intersection) + return; + const unsigned Opcode = Intersection->getOpcode(); + VecOp->copyIRFlags(Intersection); + for (auto *V : VL) { + auto *Instr = dyn_cast(V); + if (!Instr) + continue; + if (OpValue == nullptr || Opcode == Instr->getOpcode()) + VecOp->andIRFlags(V); } } diff --git a/interpreter/llvm/src/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/interpreter/llvm/src/lib/Transforms/Utils/LowerMemIntrinsics.cpp index c7cb561b5e21d..900450b400612 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -8,12 +8,256 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +static unsigned getLoopOperandSizeInBytes(Type *Type) { + if (VectorType *VTy = dyn_cast(Type)) { + return VTy->getBitWidth() / 8; + } + + return Type->getPrimitiveSizeInBits() / 8; +} + +void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, ConstantInt *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI) { + // No need to expand zero length copies. + if (CopyLen->isZero()) + return; + + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = nullptr; + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *TypeOfCopyLen = CopyLen->getType(); + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; + + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); + + if (LoopEndCount != 0) { + // Split + PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); + PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + // Cast the Src and Dst pointers to pointers to the loop operand type (if + // needed). + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); + // Loop Body + Value *SrcGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + // Create the loop branch condition. + Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), + LoopBB, PostLoopBB); + } + + uint64_t BytesCopied = LoopEndCount * LoopOpSize; + uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; + if (RemainingBytes) { + IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() + : InsertBefore); + + // Update the alignment based on the copy size used in the loop body. + SrcAlign = std::min(SrcAlign, LoopOpSize); + DestAlign = std::min(DestAlign, LoopOpSize); + + SmallVector RemainingOps; + TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, + SrcAlign, DestAlign); + + for (auto OpTy : RemainingOps) { + // Calaculate the new index + unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); + uint64_t GepIndex = BytesCopied / OperandSize; + assert(GepIndex * OperandSize == BytesCopied && + "Division should have no Remainder!"); + // Cast source to operand type and load + PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); + Value *CastedSrc = SrcAddr->getType() == SrcPtrType + ? SrcAddr + : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); + Value *SrcGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); + Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + + // Cast destination to operand type and store. + PointerType *DstPtrType = PointerType::get(OpTy, DstAS); + Value *CastedDst = DstAddr->getType() == DstPtrType + ? DstAddr + : RBuilder.CreateBitCast(DstAddr, DstPtrType); + Value *DstGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); + RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + BytesCopied += OperandSize; + } + } + assert(BytesCopied == CopyLen->getZExtValue() && + "Bytes copied should match size in the call!"); +} + +void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, + Value *SrcAddr, Value *DstAddr, + Value *CopyLen, unsigned SrcAlign, + unsigned DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, + const TargetTransformInfo &TTI) { + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = + PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); + + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + // Calculate the loop trip count, and remaining bytes to copy after the loop. + Type *CopyLenType = CopyLen->getType(); + IntegerType *ILengthType = dyn_cast(CopyLenType); + assert(ILengthType && + "expected size argument to memcpy to be an integer type!"); + ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); + Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr); + IRBuilder<> LoopBuilder(LoopBB); + + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); + + Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + Type *Int8Type = Type::getInt8Ty(Ctx); + if (LoopOpType != Int8Type) { + // Loop body for the residual copy. + BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", + PreLoopBB->getParent(), nullptr); + // Residual loop header. + BasicBlock *ResHeaderBB = BasicBlock::Create( + Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); + + // Need to update the pre-loop basic block to branch to the correct place. + // branch to the main loop if the count is non-zero, branch to the residual + // loop if the copy size is smaller then 1 iteration of the main loop but + // non-zero and finally branch to after the residual loop if the memcpy + // size is zero. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, ResHeaderBB); + PreLoopBB->getTerminator()->eraseFromParent(); + + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + ResHeaderBB); + + // Determine if we need to branch to the residual loop or bypass it. + IRBuilder<> RHBuilder(ResHeaderBB); + RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), + ResLoopBB, PostLoopBB); + + // Copy the residual with single byte load/store loop. + IRBuilder<> ResBuilder(ResLoopBB); + PHINode *ResidualIndex = + ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); + ResidualIndex->addIncoming(Zero, ResHeaderBB); + + Value *SrcAsInt8 = + ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); + Value *DstAsInt8 = + ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); + Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); + Value *SrcGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); + Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); + ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *ResNewIndex = + ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); + ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); + + // Create the loop branch condition. + ResBuilder.CreateCondBr( + ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, + PostLoopBB); + } else { + // In this case the loop operand type was a byte, and there is no need for a + // residual loop to copy the remaining memory after the main loop. + // We do however need to patch up the control flow by creating the + // terminators for the preloop block and the memcpy loop. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, PostLoopBB); + PreLoopBB->getTerminator()->eraseFromParent(); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + PostLoopBB); + } +} + void llvm::createMemCpyLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, @@ -27,7 +271,6 @@ void llvm::createMemCpyLoop(Instruction *InsertBefore, BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); - OrigBB->getTerminator()->setSuccessor(0, LoopBB); IRBuilder<> Builder(OrigBB->getTerminator()); // SrcAddr and DstAddr are expected to be pointer types, @@ -39,6 +282,11 @@ void llvm::createMemCpyLoop(Instruction *InsertBefore, SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); + Builder.CreateCondBr( + Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, + LoopBB); + OrigBB->getTerminator()->eraseFromParent(); + IRBuilder<> LoopBuilder(LoopBB); PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); @@ -167,6 +415,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, unsigned Align, bool IsVolatile) { + Type *TypeOfCopyLen = CopyLen->getType(); BasicBlock *OrigBB = InsertBefore->getParent(); Function *F = OrigBB->getParent(); BasicBlock *NewBB = @@ -174,7 +423,6 @@ static void createMemSetLoop(Instruction *InsertBefore, BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); - OrigBB->getTerminator()->setSuccessor(0, LoopBB); IRBuilder<> Builder(OrigBB->getTerminator()); // Cast pointer to the type of value getting stored @@ -182,9 +430,14 @@ static void createMemSetLoop(Instruction *InsertBefore, DstAddr = Builder.CreateBitCast(DstAddr, PointerType::get(SetValue->getType(), dstAS)); + Builder.CreateCondBr( + Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, + LoopBB); + OrigBB->getTerminator()->eraseFromParent(); + IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0); - LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); LoopBuilder.CreateStore( SetValue, @@ -192,22 +445,48 @@ static void createMemSetLoop(Instruction *InsertBefore, IsVolatile); Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1)); + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); LoopIndex->addIncoming(NewIndex, LoopBB); LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB); } -void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) { - createMemCpyLoop(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile()); +void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, + const TargetTransformInfo &TTI) { + // Original implementation + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile()); + } else { + if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { + createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransformInfo */ TTI); + } else { + createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransfomrInfo */ TTI); + } + } } void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { diff --git a/interpreter/llvm/src/lib/Transforms/Utils/LowerSwitch.cpp b/interpreter/llvm/src/lib/Transforms/Utils/LowerSwitch.cpp index 8959e77438e99..890afbc46e636 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/LowerSwitch.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/LowerSwitch.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -24,6 +23,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include diff --git a/interpreter/llvm/src/lib/Transforms/Utils/MetaRenamer.cpp b/interpreter/llvm/src/lib/Transforms/Utils/MetaRenamer.cpp index 481c6aa29c3a1..9f2ad540c83dd 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/MetaRenamer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/MetaRenamer.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -23,6 +22,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { diff --git a/interpreter/llvm/src/lib/Transforms/Utils/OrderedInstructions.cpp b/interpreter/llvm/src/lib/Transforms/Utils/OrderedInstructions.cpp new file mode 100644 index 0000000000000..dc780542ce686 --- /dev/null +++ b/interpreter/llvm/src/lib/Transforms/Utils/OrderedInstructions.cpp @@ -0,0 +1,32 @@ +//===-- OrderedInstructions.cpp - Instruction dominance function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utility to check dominance relation of 2 instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/OrderedInstructions.h" +using namespace llvm; + +/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation +/// if the instructions are in the same basic block, Otherwise, use dominator +/// tree. +bool OrderedInstructions::dominates(const Instruction *InstA, + const Instruction *InstB) const { + const BasicBlock *IBB = InstA->getParent(); + // Use ordered basic block to do dominance check in case the 2 instructions + // are in the same basic block. + if (IBB == InstB->getParent()) { + auto OBB = OBBMap.find(IBB); + if (OBB == OBBMap.end()) + OBB = OBBMap.insert({IBB, make_unique(IBB)}).first; + return OBB->second->dominates(InstA, InstB); + } + return DT->dominates(InstA->getParent(), InstB->getParent()); +} diff --git a/interpreter/llvm/src/lib/Transforms/Utils/PredicateInfo.cpp b/interpreter/llvm/src/lib/Transforms/Utils/PredicateInfo.cpp index 8877aeafecdec..d4cdaede6b86b 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/PredicateInfo.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/PredicateInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -34,6 +33,7 @@ #include "llvm/Support/DebugCounter.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/OrderedInstructions.h" #include #define DEBUG_TYPE "predicateinfo" using namespace llvm; @@ -106,14 +106,27 @@ struct ValueDFS { bool EdgeOnly = false; }; +// Perform a strict weak ordering on instructions and arguments. +static bool valueComesBefore(OrderedInstructions &OI, const Value *A, + const Value *B) { + auto *ArgA = dyn_cast_or_null(A); + auto *ArgB = dyn_cast_or_null(B); + if (ArgA && !ArgB) + return true; + if (ArgB && !ArgA) + return false; + if (ArgA && ArgB) + return ArgA->getArgNo() < ArgB->getArgNo(); + return OI.dominates(cast(A), cast(B)); +} + // This compares ValueDFS structures, creating OrderedBasicBlocks where // necessary to compare uses/defs in the same block. Doing so allows us to walk // the minimum number of instructions necessary to compute our def/use ordering. struct ValueDFS_Compare { - DenseMap> &OBBMap; - ValueDFS_Compare( - DenseMap> &OBBMap) - : OBBMap(OBBMap) {} + OrderedInstructions &OI; + ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {} + bool operator()(const ValueDFS &A, const ValueDFS &B) const { if (&A == &B) return false; @@ -196,23 +209,12 @@ struct ValueDFS_Compare { auto *ArgA = dyn_cast_or_null(ADef); auto *ArgB = dyn_cast_or_null(BDef); - if (ArgA && !ArgB) - return true; - if (ArgB && !ArgA) - return false; - if (ArgA && ArgB) - return ArgA->getArgNo() < ArgB->getArgNo(); + if (ArgA || ArgB) + return valueComesBefore(OI, ArgA, ArgB); auto *AInst = getDefOrUser(ADef, A.U); auto *BInst = getDefOrUser(BDef, B.U); - - auto *BB = AInst->getParent(); - auto LookupResult = OBBMap.find(BB); - if (LookupResult != OBBMap.end()) - return LookupResult->second->dominates(AInst, BInst); - - auto Result = OBBMap.insert({BB, make_unique(BB)}); - return Result.first->second->dominates(AInst, BInst); + return valueComesBefore(OI, AInst, BInst); } }; @@ -460,6 +462,9 @@ void PredicateInfo::buildPredicateInfo() { if (auto *BI = dyn_cast(BranchBB->getTerminator())) { if (!BI->isConditional()) continue; + // Can't insert conditional information if they all go to the same place. + if (BI->getSuccessor(0) == BI->getSuccessor(1)) + continue; processBranch(BI, BranchBB, OpsToRename); } else if (auto *SI = dyn_cast(BranchBB->getTerminator())) { processSwitch(SI, BranchBB, OpsToRename); @@ -541,8 +546,14 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, // // TODO: Use this algorithm to perform fast single-variable renaming in // promotememtoreg and memoryssa. -void PredicateInfo::renameUses(SmallPtrSetImpl &OpsToRename) { - ValueDFS_Compare Compare(OBBMap); +void PredicateInfo::renameUses(SmallPtrSetImpl &OpSet) { + // Sort OpsToRename since we are going to iterate it. + SmallVector OpsToRename(OpSet.begin(), OpSet.end()); + auto Comparator = [&](const Value *A, const Value *B) { + return valueComesBefore(OI, A, B); + }; + std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator); + ValueDFS_Compare Compare(OI); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { unsigned Counter = 0; @@ -679,7 +690,7 @@ PredicateInfo::getValueInfo(Value *Operand) const { PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, AssumptionCache &AC) - : F(F), DT(DT), AC(AC) { + : F(F), DT(DT), AC(AC), OI(&DT) { // Push an empty operand info so that we can detect 0 as not finding one ValueInfos.resize(1); buildPredicateInfo(); diff --git a/interpreter/llvm/src/lib/Transforms/Utils/SSAUpdater.cpp b/interpreter/llvm/src/lib/Transforms/Utils/SSAUpdater.cpp index 8b6a2c3766d26..6ccf54e49dd31 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/SSAUpdater.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/SSAUpdater.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -30,7 +31,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include #include diff --git a/interpreter/llvm/src/lib/Transforms/Utils/SanitizerStats.cpp b/interpreter/llvm/src/lib/Transforms/Utils/SanitizerStats.cpp index 9afd175c10ed5..8c23957ac43e9 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/SanitizerStats.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/SanitizerStats.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SanitizerStats.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyCFG.cpp b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyCFG.cpp index b44bc74d6551e..8784b9702141a 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyCFG.cpp @@ -15,13 +15,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" @@ -29,8 +29,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -55,7 +55,6 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -1376,53 +1375,6 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, return true; } -// Is it legal to place a variable in operand \c OpIdx of \c I? -// FIXME: This should be promoted to Instruction. -static bool canReplaceOperandWithVariable(const Instruction *I, - unsigned OpIdx) { - // We can't have a PHI with a metadata type. - if (I->getOperand(OpIdx)->getType()->isMetadataTy()) - return false; - - // Early exit. - if (!isa(I->getOperand(OpIdx))) - return true; - - switch (I->getOpcode()) { - default: - return true; - case Instruction::Call: - case Instruction::Invoke: - // FIXME: many arithmetic intrinsics have no issue taking a - // variable, however it's hard to distingish these from - // specials such as @llvm.frameaddress that require a constant. - if (isa(I)) - return false; - - // Constant bundle operands may need to retain their constant-ness for - // correctness. - if (ImmutableCallSite(I).isBundleOperand(OpIdx)) - return false; - - return true; - - case Instruction::ShuffleVector: - // Shufflevector masks are constant. - return OpIdx != 2; - case Instruction::ExtractValue: - case Instruction::InsertValue: - // All operands apart from the first are constant. - return OpIdx == 0; - case Instruction::Alloca: - return false; - case Instruction::GetElementPtr: - if (OpIdx == 0) - return true; - gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1); - return It.isSequential(); - } -} - // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -2235,7 +2187,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { - delete N; // Instruction folded away, don't need actual inst + N->deleteValue(); // Instruction folded away, don't need actual inst N = nullptr; } } else { @@ -4368,8 +4320,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); - KnownBits Known(Bits); - computeKnownBits(Cond, Known, DL, 0, AC, SI); + KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) @@ -4380,7 +4331,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, // Gather dead cases. SmallVector DeadCases; for (auto &Case : SI->cases()) { - APInt CaseVal = Case.getCaseValue()->getValue(); + const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); @@ -4830,7 +4781,7 @@ class SwitchLookupTable { SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl> &Values, - Constant *DefaultValue, const DataLayout &DL); + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); /// Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. @@ -4884,7 +4835,7 @@ class SwitchLookupTable { SwitchLookupTable::SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl> &Values, - Constant *DefaultValue, const DataLayout &DL) + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); @@ -4946,7 +4897,7 @@ SwitchLookupTable::SwitchLookupTable( LinearMappingPossible = false; break; } - APInt Val = ConstVal->getValue(); + const APInt &Val = ConstVal->getValue(); if (I != 0) { APInt Dist = Val - PrevVal; if (I == 1) { @@ -4992,7 +4943,7 @@ SwitchLookupTable::SwitchLookupTable( Array = new GlobalVariable(M, ArrayTy, /*constant=*/true, GlobalVariable::PrivateLinkage, Initializer, - "switch.table"); + "switch.table." + FuncName); Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); Kind = ArrayKind; } @@ -5382,7 +5333,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; - SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL); + StringRef FuncName = SI->getParent()->getParent()->getName(); + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, + FuncName); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -5703,20 +5656,22 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header - // (This is for early invocations before loop simplify and vectorization - // to keep canonical loop forms for nested loops. - // These blocks can be eliminated when the pass is invoked later - // in the back-end.) + // if LoopHeader is provided, check if the block or its successor is a loop + // header (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + bool NeedCanonicalLoop = + !LateSimplifyCFG && + (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - (!LoopHeaders || !LoopHeaders->count(BB)) && - TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; // If the only instruction in the block is a seteq/setne comparison @@ -5801,8 +5756,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *Dom = BB->getSinglePredecessor()) { auto *PBI = dyn_cast_or_null(Dom->getTerminator()); if (PBI && PBI->isConditional() && - PBI->getSuccessor(0) != PBI->getSuccessor(1) && - (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) { + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB); bool CondIsFalse = PBI->getSuccessor(1) == BB; Optional Implication = isImpliedCondition( PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); diff --git a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyIndVar.cpp b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyIndVar.cpp index 02a5d3dbeadfb..6d90e6b48358a 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -80,6 +81,7 @@ namespace { bool IsSigned); bool eliminateSDiv(BinaryOperator *SDiv); bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); + bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); }; } @@ -154,6 +156,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); + ICmpInst::Predicate OriginalPred = Pred; if (IVOperand != ICmp->getOperand(0)) { // Swapped assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); @@ -262,6 +265,16 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { ICmp->setPredicate(InvariantPredicate); ICmp->setOperand(0, NewLHS); ICmp->setOperand(1, NewRHS); + } else if (ICmpInst::isSigned(OriginalPred) && + SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { + // If we were unable to make anything above, all we can is to canonicalize + // the comparison hoping that it will open the doors for other + // optimizations. If we find out that we compare two non-negative values, + // we turn the instruction's predicate to its unsigned version. Note that + // we cannot rely on Pred here unless we check if we have swapped it. + assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); + DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); + ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); } else return; @@ -352,9 +365,9 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { return false; typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( - const SCEV *, const SCEV *, SCEV::NoWrapFlags); + const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned); typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( - const SCEV *, Type *); + const SCEV *, Type *, unsigned); OperationFunctionTy Operation; ExtensionFunctionTy Extension; @@ -406,10 +419,11 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); const SCEV *A = - (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), + WideTy, 0); const SCEV *B = - (SE->*Operation)((SE->*Extension)(LHS, WideTy), - (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0), + (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0); if (A != B) return false; @@ -530,8 +544,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return false; const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, - SCEV::NoWrapFlags); - + SCEV::NoWrapFlags, unsigned); switch (BO->getOpcode()) { default: return false; @@ -560,7 +573,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoUnsignedWrap(); SE->forgetValue(BO); @@ -572,7 +585,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoSignedWrap(); SE->forgetValue(BO); @@ -583,6 +596,35 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return Changed; } +/// Annotate the Shr in (X << IVOperand) >> C as exact using the +/// information from the IV's range. Returns true if anything changed, false +/// otherwise. +bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, + Value *IVOperand) { + using namespace llvm::PatternMatch; + + if (BO->getOpcode() == Instruction::Shl) { + bool Changed = false; + ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); + for (auto *U : BO->users()) { + const APInt *C; + if (match(U, + m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || + match(U, + m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { + BinaryOperator *Shr = cast(U); + if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { + Shr->setIsExact(true); + Changed = true; + } + } + } + return Changed; + } + + return false; +} + /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( Instruction *Def, @@ -675,8 +717,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { } if (BinaryOperator *BO = dyn_cast(UseOper.first)) { - if (isa(BO) && - strengthenOverflowingOperation(BO, IVOperand)) { + if ((isa(BO) && + strengthenOverflowingOperation(BO, IVOperand)) || + (isa(BO) && strengthenRightShift(BO, IVOperand))) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. pushIVUsers(IVOperand, Simplified, SimpleIVUsers); diff --git a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyInstructions.cpp b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyInstructions.cpp index 2509b5f22046b..2ea15f65cef9a 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -27,8 +27,8 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "instsimplify" diff --git a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyLibCalls.cpp b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyLibCalls.cpp index 1de579ed41b09..77c0a41929ac7 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -85,20 +85,6 @@ static bool isCallingConvCCompatible(CallInst *CI) { return false; } -/// Return true if it only matters that the value is equal or not-equal to zero. -static bool isOnlyUsedInZeroEqualityComparison(Value *V) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast(U)) - if (IC->isEquality()) - if (Constant *C = dyn_cast(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - /// Return true if it is only used in equality comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { for (User *U : V->users()) { @@ -426,57 +412,68 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { return Dst; } -Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { +Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, + unsigned CharSize) { Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src)) + if (uint64_t Len = GetStringLength(Src, CharSize)) return ConstantInt::get(CI->getType(), Len - 1); // If s is a constant pointer pointing to a string literal, we can fold - // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // strlen(s + x) to strlen(s) - x, when x is known to be in the range // [0, strlen(s)] or the string has a single null terminator '\0' at the end. - // We only try to simplify strlen when the pointer s points to an array + // We only try to simplify strlen when the pointer s points to an array // of i8. Otherwise, we would need to scale the offset x before doing the - // subtraction. This will make the optimization more complex, and it's not - // very useful because calling strlen for a pointer of other types is + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is // very uncommon. if (GEPOperator *GEP = dyn_cast(Src)) { - if (!isGEPBasedOnPointerToString(GEP)) + if (!isGEPBasedOnPointerToString(GEP, CharSize)) return nullptr; - StringRef Str; - if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { - size_t NullTermIdx = Str.find('\0'); - - // If the string does not have '\0', leave it to strlen to compute - // its length. - if (NullTermIdx == StringRef::npos) - return nullptr; - + ConstantDataArraySlice Slice; + if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) { + uint64_t NullTermIdx; + if (Slice.Array == nullptr) { + NullTermIdx = 0; + } else { + NullTermIdx = ~((uint64_t)0); + for (uint64_t I = 0, E = Slice.Length; I < E; ++I) { + if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) { + NullTermIdx = I; + break; + } + } + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == ~((uint64_t)0)) + return nullptr; + } + Value *Offset = GEP->getOperand(2); - unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr); + KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); Known.Zero.flipAllBits(); - size_t ArrSize = + uint64_t ArrSize = cast(GEP->getSourceElementType())->getNumElements(); - // KnownZero's bits are flipped, so zeros in KnownZero now represent - // bits known to be zeros in Offset, and ones in KnowZero represent + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent // bits unknown in Offset. Therefore, Offset is known to be in range - // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // [0, NullTermIdx] when the flipped KnownZero is non-negative and // unsigned-less-than NullTermIdx. // - // If Offset is not provably in the range [0, NullTermIdx], we can still - // optimize if we can prove that the program has undefined behavior when - // Offset is outside that range. That is the case when GEP->getOperand(0) + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || + if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || (GEP->isInBounds() && isa(GEP->getOperand(0)) && - NullTermIdx == ArrSize - 1)) - return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + NullTermIdx == ArrSize - 1)) { + Offset = B.CreateSExtOrTrunc(Offset, CI->getType()); + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), Offset); + } } return nullptr; @@ -484,8 +481,8 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast(Src)) { - uint64_t LenTrue = GetStringLength(SI->getTrueValue()); - uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); + uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); if (LenTrue && LenFalse) { Function *Caller = CI->getParent()->getParent(); emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller, @@ -505,6 +502,17 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { + return optimizeStringLength(CI, B, 8); +} + +Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { + Module &M = *CI->getParent()->getParent()->getParent(); + unsigned WCharSize = TLI->getWCharSize(M) * 8; + + return optimizeStringLength(CI, B, WCharSize); +} + Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -648,7 +656,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); // memchr(x, y, 0) -> null - if (LenC && LenC->isNullValue()) + if (LenC && LenC->isZero()) return Constant::getNullValue(CI->getType()); // From now on we need at least constant length and string. @@ -730,8 +738,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); if (!LenC) return nullptr; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); @@ -2026,6 +2034,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeMemMove(CI, Builder); case LibFunc_memset: return optimizeMemSet(CI, Builder); + case LibFunc_wcslen: + return optimizeWcslen(CI, Builder); default: break; } @@ -2270,7 +2280,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, return true; if (ConstantInt *ObjSizeCI = dyn_cast(CI->getArgOperand(ObjSizeOp))) { - if (ObjSizeCI->isAllOnesValue()) + if (ObjSizeCI->isMinusOne()) return true; // If the object size wasn't -1 (unknown), bail out if we were asked to. if (OnlyLowerUnknownSize) diff --git a/interpreter/llvm/src/lib/Transforms/Utils/StripGCRelocates.cpp b/interpreter/llvm/src/lib/Transforms/Utils/StripGCRelocates.cpp index f3d3fadb51e93..49dc15cf5e7c0 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/StripGCRelocates.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/interpreter/llvm/src/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 66dbf335cb953..cd0378e0140cd 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { diff --git a/interpreter/llvm/src/lib/Transforms/Utils/SymbolRewriter.cpp b/interpreter/llvm/src/lib/Transforms/Utils/SymbolRewriter.cpp index 6d136636ce709..20107553665f6 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/SymbolRewriter.cpp @@ -59,9 +59,9 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/Transforms/Utils/SymbolRewriter.h" -#include "llvm/Pass.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/interpreter/llvm/src/lib/Transforms/Utils/Utils.cpp b/interpreter/llvm/src/lib/Transforms/Utils/Utils.cpp index 7106483c3bd2a..f6c7d1c4989eb 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/Utils.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/Utils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" #include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; diff --git a/interpreter/llvm/src/lib/Transforms/Utils/VNCoercion.cpp b/interpreter/llvm/src/lib/Transforms/Utils/VNCoercion.cpp index 60d9ede2c4871..c3feea6a0a414 100644 --- a/interpreter/llvm/src/lib/Transforms/Utils/VNCoercion.cpp +++ b/interpreter/llvm/src/lib/Transforms/Utils/VNCoercion.cpp @@ -51,25 +51,24 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, // If the store and reload are the same size, we can always reuse it. if (StoredValSize == LoadedValSize) { // Pointer to Pointer -> use bitcast. - if (StoredValTy->getScalarType()->isPointerTy() && - LoadedTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); } else { // Convert source pointers to integers, which can be bitcast. - if (StoredValTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy()) { StoredValTy = DL.getIntPtrType(StoredValTy); StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); } Type *TypeToCastTo = LoadedTy; - if (TypeToCastTo->getScalarType()->isPointerTy()) + if (TypeToCastTo->isPtrOrPtrVectorTy()) TypeToCastTo = DL.getIntPtrType(TypeToCastTo); if (StoredValTy != TypeToCastTo) StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); // Cast to pointer if the load needs a pointer type. - if (LoadedTy->getScalarType()->isPointerTy()) + if (LoadedTy->isPtrOrPtrVectorTy()) StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); } @@ -86,7 +85,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, "canCoerceMustAliasedValueToLoad fail"); // Convert source pointers to integers, which can be manipulated. - if (StoredValTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy()) { StoredValTy = DL.getIntPtrType(StoredValTy); StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); } @@ -112,7 +111,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, if (LoadedTy != NewIntTy) { // If the result is a pointer, inttoptr. - if (LoadedTy->getScalarType()->isPointerTy()) + if (LoadedTy->isPtrOrPtrVectorTy()) StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); else // Otherwise, bitcast. @@ -316,7 +315,7 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. - if (SrcVal->getType()->getScalarType()->isPointerTy()) + if (SrcVal->getType()->isPtrOrPtrVectorTy()) SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); if (!SrcVal->getType()->isIntegerTy()) SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); diff --git a/interpreter/llvm/src/lib/Transforms/Vectorize/BBVectorize.cpp b/interpreter/llvm/src/lib/Transforms/Vectorize/BBVectorize.cpp deleted file mode 100644 index c83b3f7b225bc..0000000000000 --- a/interpreter/llvm/src/lib/Transforms/Vectorize/BBVectorize.cpp +++ /dev/null @@ -1,3282 +0,0 @@ -//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a basic-block vectorization pass. The algorithm was -// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral, -// et al. It works by looking for chains of pairable operations and then -// pairing them. -// -//===----------------------------------------------------------------------===// - -#define BBV_NAME "bb-vectorize" -#include "llvm/Transforms/Vectorize.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" -#include -using namespace llvm; - -#define DEBUG_TYPE BBV_NAME - -static cl::opt -IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), - cl::Hidden, cl::desc("Ignore target information")); - -static cl::opt -ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, - cl::desc("The required chain depth for vectorization")); - -static cl::opt -UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), - cl::Hidden, cl::desc("Use the chain depth requirement with" - " target information")); - -static cl::opt -SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, - cl::desc("The maximum search distance for instruction pairs")); - -static cl::opt -SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, - cl::desc("Replicating one element to a pair breaks the chain")); - -static cl::opt -VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, - cl::desc("The size of the native vector registers")); - -static cl::opt -MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, - cl::desc("The maximum number of pairing iterations")); - -static cl::opt -Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, - cl::desc("Don't try to form non-2^n-length vectors")); - -static cl::opt -MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, - cl::desc("The maximum number of pairable instructions per group")); - -static cl::opt -MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, - cl::desc("The maximum number of candidate instruction pairs per group")); - -static cl::opt -MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), - cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" - " a full cycle check")); - -static cl::opt -NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize boolean (i1) values")); - -static cl::opt -NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize integer values")); - -static cl::opt -NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize floating-point values")); - -// FIXME: This should default to false once pointer vector support works. -static cl::opt -NoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden, - cl::desc("Don't try to vectorize pointer values")); - -static cl::opt -NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize casting (conversion) operations")); - -static cl::opt -NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize floating-point math intrinsics")); - -static cl::opt - NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize BitManipulation intrinsics")); - -static cl::opt -NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); - -static cl::opt -NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize select instructions")); - -static cl::opt -NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize comparison instructions")); - -static cl::opt -NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize getelementptr instructions")); - -static cl::opt -NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, - cl::desc("Don't try to vectorize loads and stores")); - -static cl::opt -AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, - cl::desc("Only generate aligned loads and stores")); - -static cl::opt -NoMemOpBoost("bb-vectorize-no-mem-op-boost", - cl::init(false), cl::Hidden, - cl::desc("Don't boost the chain-depth contribution of loads and stores")); - -static cl::opt -FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, - cl::desc("Use a fast instruction dependency analysis")); - -#ifndef NDEBUG -static cl::opt -DebugInstructionExamination("bb-vectorize-debug-instruction-examination", - cl::init(false), cl::Hidden, - cl::desc("When debugging is enabled, output information on the" - " instruction-examination process")); -static cl::opt -DebugCandidateSelection("bb-vectorize-debug-candidate-selection", - cl::init(false), cl::Hidden, - cl::desc("When debugging is enabled, output information on the" - " candidate-selection process")); -static cl::opt -DebugPairSelection("bb-vectorize-debug-pair-selection", - cl::init(false), cl::Hidden, - cl::desc("When debugging is enabled, output information on the" - " pair-selection process")); -static cl::opt -DebugCycleCheck("bb-vectorize-debug-cycle-check", - cl::init(false), cl::Hidden, - cl::desc("When debugging is enabled, output information on the" - " cycle-checking process")); - -static cl::opt -PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", - cl::init(false), cl::Hidden, - cl::desc("When debugging is enabled, dump the basic block after" - " every pair is fused")); -#endif - -STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); - -namespace { - struct BBVectorize : public BasicBlockPass { - static char ID; // Pass identification, replacement for typeid - - const VectorizeConfig Config; - - BBVectorize(const VectorizeConfig &C = VectorizeConfig()) - : BasicBlockPass(ID), Config(C) { - initializeBBVectorizePass(*PassRegistry::getPassRegistry()); - } - - BBVectorize(Pass *P, Function &F, const VectorizeConfig &C) - : BasicBlockPass(ID), Config(C) { - AA = &P->getAnalysis().getAAResults(); - DT = &P->getAnalysis().getDomTree(); - SE = &P->getAnalysis().getSE(); - TLI = &P->getAnalysis().getTLI(); - TTI = IgnoreTargetInfo - ? nullptr - : &P->getAnalysis().getTTI(F); - } - - typedef std::pair ValuePair; - typedef std::pair ValuePairWithCost; - typedef std::pair ValuePairWithDepth; - typedef std::pair VPPair; // A ValuePair pair - typedef std::pair VPPairWithType; - - AliasAnalysis *AA; - DominatorTree *DT; - ScalarEvolution *SE; - const TargetLibraryInfo *TLI; - const TargetTransformInfo *TTI; - - // FIXME: const correct? - - bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false); - - bool getCandidatePairs(BasicBlock &BB, - BasicBlock::iterator &Start, - DenseMap > &CandidatePairs, - DenseSet &FixedOrderPairs, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, bool NonPow2Len); - - // FIXME: The current implementation does not account for pairs that - // are connected in multiple ways. For example: - // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap) - enum PairConnectionType { - PairConnectionDirect, - PairConnectionSwap, - PairConnectionSplat - }; - - void computeConnectedPairs( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseMap &PairConnectionTypes); - - void buildDepMap(BasicBlock &BB, - DenseMap > &CandidatePairs, - std::vector &PairableInsts, - DenseSet &PairableInstUsers); - - void choosePairs(DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - DenseMap > &ConnectedPairs, - DenseMap > &ConnectedPairDeps, - DenseSet &PairableInstUsers, - DenseMap& ChosenPairs); - - void fuseChosenPairs(BasicBlock &BB, - std::vector &PairableInsts, - DenseMap& ChosenPairs, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - DenseMap > &ConnectedPairs, - DenseMap > &ConnectedPairDeps); - - - bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); - - bool areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore, bool NonPow2Len, - int &CostSavings, int &FixedOrder); - - bool trackUsesOfI(DenseSet &Users, - AliasSetTracker &WriteSet, Instruction *I, - Instruction *J, bool UpdateUsers = true, - DenseSet *LoadMoveSetPairs = nullptr); - - void computePairsConnectedTo( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseMap &PairConnectionTypes, - ValuePair P); - - bool pairsConflict(ValuePair P, ValuePair Q, - DenseSet &PairableInstUsers, - DenseMap > - *PairableInstUserMap = nullptr, - DenseSet *PairableInstUserPairSet = nullptr); - - bool pairWillFormCycle(ValuePair P, - DenseMap > &PairableInstUsers, - DenseSet &CurrentPairs); - - void pruneDAGFor( - DenseMap > &CandidatePairs, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseSet &PairableInstUsers, - DenseMap > &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseMap &DAG, - DenseSet &PrunedDAG, ValuePair J, - bool UseCycleCheck); - - void buildInitialDAGFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseSet &PairableInstUsers, - DenseMap &ChosenPairs, - DenseMap &DAG, ValuePair J); - - void findBestDAGFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - DenseMap > &ConnectedPairs, - DenseMap > &ConnectedPairDeps, - DenseSet &PairableInstUsers, - DenseMap > &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseSet &BestDAG, size_t &BestMaxDepth, - int &BestEffSize, Value *II, std::vector&JJ, - bool UseCycleCheck); - - Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o); - - void fillNewShuffleMask(LLVMContext& Context, Instruction *J, - unsigned MaskOffset, unsigned NumInElem, - unsigned NumInElem1, unsigned IdxOffset, - std::vector &Mask); - - Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, - Instruction *J); - - bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, - unsigned o, Value *&LOp, unsigned numElemL, - Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ, - unsigned IdxOff = 0); - - Value *getReplacementInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool IBeforeJ); - - void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, - Instruction *J, SmallVectorImpl &ReplacedOperands, - bool IBeforeJ); - - void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, - Instruction *J, Instruction *K, - Instruction *&InsertionPt, Instruction *&K1, - Instruction *&K2); - - void collectPairLoadMoveSet(BasicBlock &BB, - DenseMap &ChosenPairs, - DenseMap > &LoadMoveSet, - DenseSet &LoadMoveSetPairs, - Instruction *I); - - void collectLoadMoveSet(BasicBlock &BB, - std::vector &PairableInsts, - DenseMap &ChosenPairs, - DenseMap > &LoadMoveSet, - DenseSet &LoadMoveSetPairs); - - bool canMoveUsesOfIAfterJ(BasicBlock &BB, - DenseSet &LoadMoveSetPairs, - Instruction *I, Instruction *J); - - void moveUsesOfIAfterJ(BasicBlock &BB, - DenseSet &LoadMoveSetPairs, - Instruction *&InsertionPt, - Instruction *I, Instruction *J); - - bool vectorizeBB(BasicBlock &BB) { - if (skipBasicBlock(BB)) - return false; - if (!DT->isReachableFromEntry(&BB)) { - DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() << - " in " << BB.getParent()->getName() << "\n"); - return false; - } - - DEBUG(if (TTI) dbgs() << "BBV: using target information\n"); - - bool changed = false; - // Iterate a sufficient number of times to merge types of size 1 bit, - // then 2 bits, then 4, etc. up to half of the target vector width of the - // target vector register. - unsigned n = 1; - for (unsigned v = 2; - (TTI || v <= Config.VectorBits) && - (!Config.MaxIter || n <= Config.MaxIter); - v *= 2, ++n) { - DEBUG(dbgs() << "BBV: fusing loop #" << n << - " for " << BB.getName() << " in " << - BB.getParent()->getName() << "...\n"); - if (vectorizePairs(BB)) - changed = true; - else - break; - } - - if (changed && !Pow2LenOnly) { - ++n; - for (; !Config.MaxIter || n <= Config.MaxIter; ++n) { - DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " << - n << " for " << BB.getName() << " in " << - BB.getParent()->getName() << "...\n"); - if (!vectorizePairs(BB, true)) break; - } - } - - DEBUG(dbgs() << "BBV: done!\n"); - return changed; - } - - bool runOnBasicBlock(BasicBlock &BB) override { - // OptimizeNone check deferred to vectorizeBB(). - - AA = &getAnalysis().getAAResults(); - DT = &getAnalysis().getDomTree(); - SE = &getAnalysis().getSE(); - TLI = &getAnalysis().getTLI(); - TTI = IgnoreTargetInfo - ? nullptr - : &getAnalysis().getTTI( - *BB.getParent()); - - return vectorizeBB(BB); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - BasicBlockPass::getAnalysisUsage(AU); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.setPreservesCFG(); - } - - static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) { - assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() && - "Cannot form vector from incompatible scalar types"); - Type *STy = ElemTy->getScalarType(); - - unsigned numElem; - if (VectorType *VTy = dyn_cast(ElemTy)) { - numElem = VTy->getNumElements(); - } else { - numElem = 1; - } - - if (VectorType *VTy = dyn_cast(Elem2Ty)) { - numElem += VTy->getNumElements(); - } else { - numElem += 1; - } - - return VectorType::get(STy, numElem); - } - - static inline void getInstructionTypes(Instruction *I, - Type *&T1, Type *&T2) { - if (StoreInst *SI = dyn_cast(I)) { - // For stores, it is the value type, not the pointer type that matters - // because the value is what will come from a vector register. - - Value *IVal = SI->getValueOperand(); - T1 = IVal->getType(); - } else { - T1 = I->getType(); - } - - if (CastInst *CI = dyn_cast(I)) - T2 = CI->getSrcTy(); - else - T2 = T1; - - if (SelectInst *SI = dyn_cast(I)) { - T2 = SI->getCondition()->getType(); - } else if (ShuffleVectorInst *SI = dyn_cast(I)) { - T2 = SI->getOperand(0)->getType(); - } else if (CmpInst *CI = dyn_cast(I)) { - T2 = CI->getOperand(0)->getType(); - } - } - - // Returns the weight associated with the provided value. A chain of - // candidate pairs has a length given by the sum of the weights of its - // members (one weight per pair; the weight of each member of the pair - // is assumed to be the same). This length is then compared to the - // chain-length threshold to determine if a given chain is significant - // enough to be vectorized. The length is also used in comparing - // candidate chains where longer chains are considered to be better. - // Note: when this function returns 0, the resulting instructions are - // not actually fused. - inline size_t getDepthFactor(Value *V) { - // InsertElement and ExtractElement have a depth factor of zero. This is - // for two reasons: First, they cannot be usefully fused. Second, because - // the pass generates a lot of these, they can confuse the simple metric - // used to compare the dags in the next iteration. Thus, giving them a - // weight of zero allows the pass to essentially ignore them in - // subsequent iterations when looking for vectorization opportunities - // while still tracking dependency chains that flow through those - // instructions. - if (isa(V) || isa(V)) - return 0; - - // Give a load or store half of the required depth so that load/store - // pairs will vectorize. - if (!Config.NoMemOpBoost && (isa(V) || isa(V))) - return Config.ReqChainDepth/2; - - return 1; - } - - // Returns the cost of the provided instruction using TTI. - // This does not handle loads and stores. - unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2, - TargetTransformInfo::OperandValueKind Op1VK = - TargetTransformInfo::OK_AnyValue, - TargetTransformInfo::OperandValueKind Op2VK = - TargetTransformInfo::OK_AnyValue, - const Instruction *I = nullptr) { - switch (Opcode) { - default: break; - case Instruction::GetElementPtr: - // We mark this instruction as zero-cost because scalar GEPs are usually - // lowered to the instruction addressing mode. At the moment we don't - // generate vector GEPs. - return 0; - case Instruction::Br: - return TTI->getCFInstrCost(Opcode); - case Instruction::PHI: - return 0; - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - return TTI->getArithmeticInstrCost(Opcode, T1, Op1VK, Op2VK); - case Instruction::Select: - case Instruction::ICmp: - case Instruction::FCmp: - return TTI->getCmpSelInstrCost(Opcode, T1, T2, I); - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - case Instruction::ShuffleVector: - return TTI->getCastInstrCost(Opcode, T1, T2, I); - } - - return 1; - } - - // This determines the relative offset of two loads or stores, returning - // true if the offset could be determined to be some constant value. - // For example, if OffsetInElmts == 1, then J accesses the memory directly - // after I; if OffsetInElmts == -1 then I accesses the memory - // directly after J. - bool getPairPtrInfo(Instruction *I, Instruction *J, - Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, - unsigned &IAddressSpace, unsigned &JAddressSpace, - int64_t &OffsetInElmts, bool ComputeOffset = true) { - OffsetInElmts = 0; - if (LoadInst *LI = dyn_cast(I)) { - LoadInst *LJ = cast(J); - IPtr = LI->getPointerOperand(); - JPtr = LJ->getPointerOperand(); - IAlignment = LI->getAlignment(); - JAlignment = LJ->getAlignment(); - IAddressSpace = LI->getPointerAddressSpace(); - JAddressSpace = LJ->getPointerAddressSpace(); - } else { - StoreInst *SI = cast(I), *SJ = cast(J); - IPtr = SI->getPointerOperand(); - JPtr = SJ->getPointerOperand(); - IAlignment = SI->getAlignment(); - JAlignment = SJ->getAlignment(); - IAddressSpace = SI->getPointerAddressSpace(); - JAddressSpace = SJ->getPointerAddressSpace(); - } - - if (!ComputeOffset) - return true; - - const SCEV *IPtrSCEV = SE->getSCEV(IPtr); - const SCEV *JPtrSCEV = SE->getSCEV(JPtr); - - // If this is a trivial offset, then we'll get something like - // 1*sizeof(type). With target data, which we need anyway, this will get - // constant folded into a number. - const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV); - if (const SCEVConstant *ConstOffSCEV = - dyn_cast(OffsetSCEV)) { - ConstantInt *IntOff = ConstOffSCEV->getValue(); - int64_t Offset = IntOff->getSExtValue(); - const DataLayout &DL = I->getModule()->getDataLayout(); - Type *VTy = IPtr->getType()->getPointerElementType(); - int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy); - - Type *VTy2 = JPtr->getType()->getPointerElementType(); - if (VTy != VTy2 && Offset < 0) { - int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2); - OffsetInElmts = Offset/VTy2TSS; - return (std::abs(Offset) % VTy2TSS) == 0; - } - - OffsetInElmts = Offset/VTyTSS; - return (std::abs(Offset) % VTyTSS) == 0; - } - - return false; - } - - // Returns true if the provided CallInst represents an intrinsic that can - // be vectorized. - bool isVectorizableIntrinsic(CallInst* I) { - Function *F = I->getCalledFunction(); - if (!F) return false; - - Intrinsic::ID IID = F->getIntrinsicID(); - if (!IID) return false; - - switch(IID) { - default: - return false; - case Intrinsic::sqrt: - case Intrinsic::powi: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::log: - case Intrinsic::log2: - case Intrinsic::log10: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::pow: - case Intrinsic::round: - case Intrinsic::copysign: - case Intrinsic::ceil: - case Intrinsic::nearbyint: - case Intrinsic::rint: - case Intrinsic::trunc: - case Intrinsic::floor: - case Intrinsic::fabs: - case Intrinsic::minnum: - case Intrinsic::maxnum: - return Config.VectorizeMath; - case Intrinsic::bswap: - case Intrinsic::ctpop: - case Intrinsic::ctlz: - case Intrinsic::cttz: - return Config.VectorizeBitManipulations; - case Intrinsic::fma: - case Intrinsic::fmuladd: - return Config.VectorizeFMA; - } - } - - bool isPureIEChain(InsertElementInst *IE) { - InsertElementInst *IENext = IE; - do { - if (!isa(IENext->getOperand(0)) && - !isa(IENext->getOperand(0))) { - return false; - } - } while ((IENext = - dyn_cast(IENext->getOperand(0)))); - - return true; - } - }; - - // This function implements one vectorization iteration on the provided - // basic block. It returns true if the block is changed. - bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) { - bool ShouldContinue; - BasicBlock::iterator Start = BB.getFirstInsertionPt(); - - std::vector AllPairableInsts; - DenseMap AllChosenPairs; - DenseSet AllFixedOrderPairs; - DenseMap AllPairConnectionTypes; - DenseMap > AllConnectedPairs, - AllConnectedPairDeps; - - do { - std::vector PairableInsts; - DenseMap > CandidatePairs; - DenseSet FixedOrderPairs; - DenseMap CandidatePairCostSavings; - ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, - FixedOrderPairs, - CandidatePairCostSavings, - PairableInsts, NonPow2Len); - if (PairableInsts.empty()) continue; - - // Build the candidate pair set for faster lookups. - DenseSet CandidatePairsSet; - for (DenseMap >::iterator I = - CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I) - for (std::vector::iterator J = I->second.begin(), - JE = I->second.end(); J != JE; ++J) - CandidatePairsSet.insert(ValuePair(I->first, *J)); - - // Now we have a map of all of the pairable instructions and we need to - // select the best possible pairing. A good pairing is one such that the - // users of the pair are also paired. This defines a (directed) forest - // over the pairs such that two pairs are connected iff the second pair - // uses the first. - - // Note that it only matters that both members of the second pair use some - // element of the first pair (to allow for splatting). - - DenseMap > ConnectedPairs, - ConnectedPairDeps; - DenseMap PairConnectionTypes; - computeConnectedPairs(CandidatePairs, CandidatePairsSet, - PairableInsts, ConnectedPairs, PairConnectionTypes); - if (ConnectedPairs.empty()) continue; - - for (DenseMap >::iterator - I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) - for (std::vector::iterator J = I->second.begin(), - JE = I->second.end(); J != JE; ++J) - ConnectedPairDeps[*J].push_back(I->first); - - // Build the pairable-instruction dependency map - DenseSet PairableInstUsers; - buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); - - // There is now a graph of the connected pairs. For each variable, pick - // the pairing with the largest dag meeting the depth requirement on at - // least one branch. Then select all pairings that are part of that dag - // and remove them from the list of available pairings and pairable - // variables. - - DenseMap ChosenPairs; - choosePairs(CandidatePairs, CandidatePairsSet, - CandidatePairCostSavings, - PairableInsts, FixedOrderPairs, PairConnectionTypes, - ConnectedPairs, ConnectedPairDeps, - PairableInstUsers, ChosenPairs); - - if (ChosenPairs.empty()) continue; - AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), - PairableInsts.end()); - AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); - - // Only for the chosen pairs, propagate information on fixed-order pairs, - // pair connections, and their types to the data structures used by the - // pair fusion procedures. - for (DenseMap::iterator I = ChosenPairs.begin(), - IE = ChosenPairs.end(); I != IE; ++I) { - if (FixedOrderPairs.count(*I)) - AllFixedOrderPairs.insert(*I); - else if (FixedOrderPairs.count(ValuePair(I->second, I->first))) - AllFixedOrderPairs.insert(ValuePair(I->second, I->first)); - - for (DenseMap::iterator J = ChosenPairs.begin(); - J != IE; ++J) { - DenseMap::iterator K = - PairConnectionTypes.find(VPPair(*I, *J)); - if (K != PairConnectionTypes.end()) { - AllPairConnectionTypes.insert(*K); - } else { - K = PairConnectionTypes.find(VPPair(*J, *I)); - if (K != PairConnectionTypes.end()) - AllPairConnectionTypes.insert(*K); - } - } - } - - for (DenseMap >::iterator - I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) - for (std::vector::iterator J = I->second.begin(), - JE = I->second.end(); J != JE; ++J) - if (AllPairConnectionTypes.count(VPPair(I->first, *J))) { - AllConnectedPairs[I->first].push_back(*J); - AllConnectedPairDeps[*J].push_back(I->first); - } - } while (ShouldContinue); - - if (AllChosenPairs.empty()) return false; - NumFusedOps += AllChosenPairs.size(); - - // A set of pairs has now been selected. It is now necessary to replace the - // paired instructions with vector instructions. For this procedure each - // operand must be replaced with a vector operand. This vector is formed - // by using build_vector on the old operands. The replaced values are then - // replaced with a vector_extract on the result. Subsequent optimization - // passes should coalesce the build/extract combinations. - - fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs, - AllPairConnectionTypes, - AllConnectedPairs, AllConnectedPairDeps); - - // It is important to cleanup here so that future iterations of this - // function have less work to do. - (void)SimplifyInstructionsInBlock(&BB, TLI); - return true; - } - - // This function returns true if the provided instruction is capable of being - // fused into a vector instruction. This determination is based only on the - // type and other attributes of the instruction. - bool BBVectorize::isInstVectorizable(Instruction *I, - bool &IsSimpleLoadStore) { - IsSimpleLoadStore = false; - - if (CallInst *C = dyn_cast(I)) { - if (!isVectorizableIntrinsic(C)) - return false; - } else if (LoadInst *L = dyn_cast(I)) { - // Vectorize simple loads if possbile: - IsSimpleLoadStore = L->isSimple(); - if (!IsSimpleLoadStore || !Config.VectorizeMemOps) - return false; - } else if (StoreInst *S = dyn_cast(I)) { - // Vectorize simple stores if possbile: - IsSimpleLoadStore = S->isSimple(); - if (!IsSimpleLoadStore || !Config.VectorizeMemOps) - return false; - } else if (CastInst *C = dyn_cast(I)) { - // We can vectorize casts, but not casts of pointer types, etc. - if (!Config.VectorizeCasts) - return false; - - Type *SrcTy = C->getSrcTy(); - if (!SrcTy->isSingleValueType()) - return false; - - Type *DestTy = C->getDestTy(); - if (!DestTy->isSingleValueType()) - return false; - } else if (SelectInst *SI = dyn_cast(I)) { - if (!Config.VectorizeSelect) - return false; - // We can vectorize a select if either all operands are scalars, - // or all operands are vectors. Trying to "widen" a select between - // vectors that has a scalar condition results in a malformed select. - // FIXME: We could probably be smarter about this by rewriting the select - // with different types instead. - return (SI->getCondition()->getType()->isVectorTy() == - SI->getTrueValue()->getType()->isVectorTy()); - } else if (isa(I)) { - if (!Config.VectorizeCmp) - return false; - } else if (GetElementPtrInst *G = dyn_cast(I)) { - if (!Config.VectorizeGEP) - return false; - - // Currently, vector GEPs exist only with one index. - if (G->getNumIndices() != 1) - return false; - } else if (!(I->isBinaryOp() || isa(I) || - isa(I) || isa(I))) { - return false; - } - - Type *T1, *T2; - getInstructionTypes(I, T1, T2); - - // Not every type can be vectorized... - if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || - !(VectorType::isValidElementType(T2) || T2->isVectorTy())) - return false; - - if (T1->getScalarSizeInBits() == 1) { - if (!Config.VectorizeBools) - return false; - } else { - if (!Config.VectorizeInts && T1->isIntOrIntVectorTy()) - return false; - } - - if (T2->getScalarSizeInBits() == 1) { - if (!Config.VectorizeBools) - return false; - } else { - if (!Config.VectorizeInts && T2->isIntOrIntVectorTy()) - return false; - } - - if (!Config.VectorizeFloats - && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) - return false; - - // Don't vectorize target-specific types. - if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy()) - return false; - if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) - return false; - - if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() || - T2->getScalarType()->isPointerTy())) - return false; - - if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits || - T2->getPrimitiveSizeInBits() >= Config.VectorBits)) - return false; - - return true; - } - - // This function returns true if the two provided instructions are compatible - // (meaning that they can be fused into a vector instruction). This assumes - // that I has already been determined to be vectorizable and that J is not - // in the use dag of I. - bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore, bool NonPow2Len, - int &CostSavings, int &FixedOrder) { - DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << - " <-> " << *J << "\n"); - - CostSavings = 0; - FixedOrder = 0; - - // Loads and stores can be merged if they have different alignments, - // but are otherwise the same. - if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | - (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0))) - return false; - - Type *IT1, *IT2, *JT1, *JT2; - getInstructionTypes(I, IT1, IT2); - getInstructionTypes(J, JT1, JT2); - unsigned MaxTypeBits = std::max( - IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), - IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); - if (!TTI && MaxTypeBits > Config.VectorBits) - return false; - - // FIXME: handle addsub-type operations! - - if (IsSimpleLoadStore) { - Value *IPtr, *JPtr; - unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; - int64_t OffsetInElmts = 0; - if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, - IAddressSpace, JAddressSpace, OffsetInElmts) && - std::abs(OffsetInElmts) == 1) { - FixedOrder = (int) OffsetInElmts; - unsigned BottomAlignment = IAlignment; - if (OffsetInElmts < 0) BottomAlignment = JAlignment; - - Type *aTypeI = isa(I) ? - cast(I)->getValueOperand()->getType() : I->getType(); - Type *aTypeJ = isa(J) ? - cast(J)->getValueOperand()->getType() : J->getType(); - Type *VType = getVecTypeForPair(aTypeI, aTypeJ); - - if (Config.AlignedOnly) { - // An aligned load or store is possible only if the instruction - // with the lower offset has an alignment suitable for the - // vector type. - const DataLayout &DL = I->getModule()->getDataLayout(); - unsigned VecAlignment = DL.getPrefTypeAlignment(VType); - if (BottomAlignment < VecAlignment) - return false; - } - - if (TTI) { - unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI, - IAlignment, IAddressSpace); - unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ, - JAlignment, JAddressSpace); - unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType, - BottomAlignment, - IAddressSpace); - - ICost += TTI->getAddressComputationCost(aTypeI); - JCost += TTI->getAddressComputationCost(aTypeJ); - VCost += TTI->getAddressComputationCost(VType); - - if (VCost > ICost + JCost) - return false; - - // We don't want to fuse to a type that will be split, even - // if the two input types will also be split and there is no other - // associated cost. - unsigned VParts = TTI->getNumberOfParts(VType); - if (VParts > 1) - return false; - else if (!VParts && VCost == ICost + JCost) - return false; - - CostSavings = ICost + JCost - VCost; - } - } else { - return false; - } - } else if (TTI) { - TargetTransformInfo::OperandValueKind Op1VK = - TargetTransformInfo::OK_AnyValue; - TargetTransformInfo::OperandValueKind Op2VK = - TargetTransformInfo::OK_AnyValue; - unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2, Op1VK, Op2VK, I); - unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2, Op1VK, Op2VK, J); - Type *VT1 = getVecTypeForPair(IT1, JT1), - *VT2 = getVecTypeForPair(IT2, JT2); - - // On some targets (example X86) the cost of a vector shift may vary - // depending on whether the second operand is a Uniform or - // NonUniform Constant. - switch (I->getOpcode()) { - default : break; - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - - // If both I and J are scalar shifts by constant, then the - // merged vector shift count would be either a constant splat value - // or a non-uniform vector of constants. - if (ConstantInt *CII = dyn_cast(I->getOperand(1))) { - if (ConstantInt *CIJ = dyn_cast(J->getOperand(1))) - Op2VK = CII == CIJ ? TargetTransformInfo::OK_UniformConstantValue : - TargetTransformInfo::OK_NonUniformConstantValue; - } else { - // Check for a splat of a constant or for a non uniform vector - // of constants. - Value *IOp = I->getOperand(1); - Value *JOp = J->getOperand(1); - if ((isa(IOp) || isa(IOp)) && - (isa(JOp) || isa(JOp))) { - Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; - Constant *SplatValue = cast(IOp)->getSplatValue(); - if (SplatValue != nullptr && - SplatValue == cast(JOp)->getSplatValue()) - Op2VK = TargetTransformInfo::OK_UniformConstantValue; - } - } - } - - // Note that this procedure is incorrect for insert and extract element - // instructions (because combining these often results in a shuffle), - // but this cost is ignored (because insert and extract element - // instructions are assigned a zero depth factor and are not really - // fused in general). - unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK, I); - - if (VCost > ICost + JCost) - return false; - - // We don't want to fuse to a type that will be split, even - // if the two input types will also be split and there is no other - // associated cost. - unsigned VParts1 = TTI->getNumberOfParts(VT1), - VParts2 = TTI->getNumberOfParts(VT2); - if (VParts1 > 1 || VParts2 > 1) - return false; - else if ((!VParts1 || !VParts2) && VCost == ICost + JCost) - return false; - - CostSavings = ICost + JCost - VCost; - } - - // The powi,ctlz,cttz intrinsics are special because only the first - // argument is vectorized, the second arguments must be equal. - CallInst *CI = dyn_cast(I); - Function *FI; - if (CI && (FI = CI->getCalledFunction())) { - Intrinsic::ID IID = FI->getIntrinsicID(); - if (IID == Intrinsic::powi || IID == Intrinsic::ctlz || - IID == Intrinsic::cttz) { - Value *A1I = CI->getArgOperand(1), - *A1J = cast(J)->getArgOperand(1); - const SCEV *A1ISCEV = SE->getSCEV(A1I), - *A1JSCEV = SE->getSCEV(A1J); - return (A1ISCEV == A1JSCEV); - } - - if (IID && TTI) { - FastMathFlags FMFCI; - if (auto *FPMOCI = dyn_cast(CI)) - FMFCI = FPMOCI->getFastMathFlags(); - SmallVector IArgs(CI->arg_operands()); - unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI); - - CallInst *CJ = cast(J); - - FastMathFlags FMFCJ; - if (auto *FPMOCJ = dyn_cast(CJ)) - FMFCJ = FPMOCJ->getFastMathFlags(); - - SmallVector JArgs(CJ->arg_operands()); - unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ); - - assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && - "Intrinsic argument counts differ"); - SmallVector Tys; - SmallVector VecArgs; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || - IID == Intrinsic::cttz) && i == 1) { - Tys.push_back(CI->getArgOperand(i)->getType()); - VecArgs.push_back(CI->getArgOperand(i)); - } - else { - Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), - CJ->getArgOperand(i)->getType())); - // Add both operands, and then count their scalarization overhead - // with VF 1. - VecArgs.push_back(CI->getArgOperand(i)); - VecArgs.push_back(CJ->getArgOperand(i)); - } - } - - // Compute the scalarization cost here with the original operands (to - // check for uniqueness etc), and then call getIntrinsicInstrCost() - // with the constructed vector types. - Type *RetTy = getVecTypeForPair(IT1, JT1); - unsigned ScalarizationCost = 0; - if (!RetTy->isVoidTy()) - ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false); - ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1); - - FastMathFlags FMFV = FMFCI; - FMFV &= FMFCJ; - unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV, - ScalarizationCost); - - if (VCost > ICost + JCost) - return false; - - // We don't want to fuse to a type that will be split, even - // if the two input types will also be split and there is no other - // associated cost. - unsigned RetParts = TTI->getNumberOfParts(RetTy); - if (RetParts > 1) - return false; - else if (!RetParts && VCost == ICost + JCost) - return false; - - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - if (!Tys[i]->isVectorTy()) - continue; - - unsigned NumParts = TTI->getNumberOfParts(Tys[i]); - if (NumParts > 1) - return false; - else if (!NumParts && VCost == ICost + JCost) - return false; - } - - CostSavings = ICost + JCost - VCost; - } - } - - return true; - } - - // Figure out whether or not J uses I and update the users and write-set - // structures associated with I. Specifically, Users represents the set of - // instructions that depend on I. WriteSet represents the set - // of memory locations that are dependent on I. If UpdateUsers is true, - // and J uses I, then Users is updated to contain J and WriteSet is updated - // to contain any memory locations to which J writes. The function returns - // true if J uses I. By default, alias analysis is used to determine - // whether J reads from memory that overlaps with a location in WriteSet. - // If LoadMoveSet is not null, then it is a previously-computed map - // where the key is the memory-based user instruction and the value is - // the instruction to be compared with I. So, if LoadMoveSet is provided, - // then the alias analysis is not used. This is necessary because this - // function is called during the process of moving instructions during - // vectorization and the results of the alias analysis are not stable during - // that process. - bool BBVectorize::trackUsesOfI(DenseSet &Users, - AliasSetTracker &WriteSet, Instruction *I, - Instruction *J, bool UpdateUsers, - DenseSet *LoadMoveSetPairs) { - bool UsesI = false; - - // This instruction may already be marked as a user due, for example, to - // being a member of a selected pair. - if (Users.count(J)) - UsesI = true; - - if (!UsesI) - for (User::op_iterator JU = J->op_begin(), JE = J->op_end(); - JU != JE; ++JU) { - Value *V = *JU; - if (I == V || Users.count(V)) { - UsesI = true; - break; - } - } - if (!UsesI && J->mayReadFromMemory()) { - if (LoadMoveSetPairs) { - UsesI = LoadMoveSetPairs->count(ValuePair(J, I)); - } else { - for (AliasSetTracker::iterator W = WriteSet.begin(), - WE = WriteSet.end(); W != WE; ++W) { - if (W->aliasesUnknownInst(J, *AA)) { - UsesI = true; - break; - } - } - } - } - - if (UsesI && UpdateUsers) { - if (J->mayWriteToMemory()) WriteSet.add(J); - Users.insert(J); - } - - return UsesI; - } - - // This function iterates over all instruction pairs in the provided - // basic block and collects all candidate pairs for vectorization. - bool BBVectorize::getCandidatePairs(BasicBlock &BB, - BasicBlock::iterator &Start, - DenseMap > &CandidatePairs, - DenseSet &FixedOrderPairs, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, bool NonPow2Len) { - size_t TotalPairs = 0; - BasicBlock::iterator E = BB.end(); - if (Start == E) return false; - - bool ShouldContinue = false, IAfterStart = false; - for (BasicBlock::iterator I = Start++; I != E; ++I) { - if (I == Start) IAfterStart = true; - - bool IsSimpleLoadStore; - if (!isInstVectorizable(&*I, IsSimpleLoadStore)) - continue; - - // Look for an instruction with which to pair instruction *I... - DenseSet Users; - AliasSetTracker WriteSet(*AA); - if (I->mayWriteToMemory()) - WriteSet.add(&*I); - - bool JAfterStart = IAfterStart; - BasicBlock::iterator J = std::next(I); - for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { - if (J == Start) - JAfterStart = true; - - // Determine if J uses I, if so, exit the loop. - bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep); - if (Config.FastDep) { - // Note: For this heuristic to be effective, independent operations - // must tend to be intermixed. This is likely to be true from some - // kinds of grouped loop unrolling (but not the generic LLVM pass), - // but otherwise may require some kind of reordering pass. - - // When using fast dependency analysis, - // stop searching after first use: - if (UsesI) break; - } else { - if (UsesI) continue; - } - - // J does not use I, and comes before the first use of I, so it can be - // merged with I if the instructions are compatible. - int CostSavings, FixedOrder; - if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len, - CostSavings, FixedOrder)) - continue; - - // J is a candidate for merging with I. - if (PairableInsts.empty() || - PairableInsts[PairableInsts.size() - 1] != &*I) { - PairableInsts.push_back(&*I); - } - - CandidatePairs[&*I].push_back(&*J); - ++TotalPairs; - if (TTI) - CandidatePairCostSavings.insert( - ValuePairWithCost(ValuePair(&*I, &*J), CostSavings)); - - if (FixedOrder == 1) - FixedOrderPairs.insert(ValuePair(&*I, &*J)); - else if (FixedOrder == -1) - FixedOrderPairs.insert(ValuePair(&*J, &*I)); - - // The next call to this function must start after the last instruction - // selected during this invocation. - if (JAfterStart) { - Start = std::next(J); - IAfterStart = JAfterStart = false; - } - - DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " - << *I << " <-> " << *J << " (cost savings: " << - CostSavings << ")\n"); - - // If we have already found too many pairs, break here and this function - // will be called again starting after the last instruction selected - // during this invocation. - if (PairableInsts.size() >= Config.MaxInsts || - TotalPairs >= Config.MaxPairs) { - ShouldContinue = true; - break; - } - } - - if (ShouldContinue) - break; - } - - DEBUG(dbgs() << "BBV: found " << PairableInsts.size() - << " instructions with candidate pairs\n"); - - return ShouldContinue; - } - - // Finds candidate pairs connected to the pair P = . This means that - // it looks for pairs such that both members have an input which is an - // output of PI or PJ. - void BBVectorize::computePairsConnectedTo( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseMap &PairConnectionTypes, - ValuePair P) { - StoreInst *SI, *SJ; - - // For each possible pairing for this variable, look at the uses of - // the first value... - for (Value::user_iterator I = P.first->user_begin(), - E = P.first->user_end(); - I != E; ++I) { - User *UI = *I; - if (isa(UI)) { - // A pair cannot be connected to a load because the load only takes one - // operand (the address) and it is a scalar even after vectorization. - continue; - } else if ((SI = dyn_cast(UI)) && - P.first == SI->getPointerOperand()) { - // Similarly, a pair cannot be connected to a store through its - // pointer operand. - continue; - } - - // For each use of the first variable, look for uses of the second - // variable... - for (User *UJ : P.second->users()) { - if ((SJ = dyn_cast(UJ)) && - P.second == SJ->getPointerOperand()) - continue; - - // Look for : - if (CandidatePairsSet.count(ValuePair(UI, UJ))) { - VPPair VP(P, ValuePair(UI, UJ)); - ConnectedPairs[VP.first].push_back(VP.second); - PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); - } - - // Look for : - if (CandidatePairsSet.count(ValuePair(UJ, UI))) { - VPPair VP(P, ValuePair(UJ, UI)); - ConnectedPairs[VP.first].push_back(VP.second); - PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); - } - } - - if (Config.SplatBreaksChain) continue; - // Look for cases where just the first value in the pair is used by - // both members of another pair (splatting). - for (Value::user_iterator J = P.first->user_begin(); J != E; ++J) { - User *UJ = *J; - if ((SJ = dyn_cast(UJ)) && - P.first == SJ->getPointerOperand()) - continue; - - if (CandidatePairsSet.count(ValuePair(UI, UJ))) { - VPPair VP(P, ValuePair(UI, UJ)); - ConnectedPairs[VP.first].push_back(VP.second); - PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); - } - } - } - - if (Config.SplatBreaksChain) return; - // Look for cases where just the second value in the pair is used by - // both members of another pair (splatting). - for (Value::user_iterator I = P.second->user_begin(), - E = P.second->user_end(); - I != E; ++I) { - User *UI = *I; - if (isa(UI)) - continue; - else if ((SI = dyn_cast(UI)) && - P.second == SI->getPointerOperand()) - continue; - - for (Value::user_iterator J = P.second->user_begin(); J != E; ++J) { - User *UJ = *J; - if ((SJ = dyn_cast(UJ)) && - P.second == SJ->getPointerOperand()) - continue; - - if (CandidatePairsSet.count(ValuePair(UI, UJ))) { - VPPair VP(P, ValuePair(UI, UJ)); - ConnectedPairs[VP.first].push_back(VP.second); - PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); - } - } - } - } - - // This function figures out which pairs are connected. Two pairs are - // connected if some output of the first pair forms an input to both members - // of the second pair. - void BBVectorize::computeConnectedPairs( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseMap &PairConnectionTypes) { - for (std::vector::iterator PI = PairableInsts.begin(), - PE = PairableInsts.end(); PI != PE; ++PI) { - DenseMap >::iterator PP = - CandidatePairs.find(*PI); - if (PP == CandidatePairs.end()) - continue; - - for (std::vector::iterator P = PP->second.begin(), - E = PP->second.end(); P != E; ++P) - computePairsConnectedTo(CandidatePairs, CandidatePairsSet, - PairableInsts, ConnectedPairs, - PairConnectionTypes, ValuePair(*PI, *P)); - } - - DEBUG(size_t TotalPairs = 0; - for (DenseMap >::iterator I = - ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I) - TotalPairs += I->second.size(); - dbgs() << "BBV: found " << TotalPairs - << " pair connections.\n"); - } - - // This function builds a set of use tuples such that is in the set - // if B is in the use dag of A. If B is in the use dag of A, then B - // depends on the output of A. - void BBVectorize::buildDepMap( - BasicBlock &BB, - DenseMap > &CandidatePairs, - std::vector &PairableInsts, - DenseSet &PairableInstUsers) { - DenseSet IsInPair; - for (DenseMap >::iterator C = - CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) { - IsInPair.insert(C->first); - IsInPair.insert(C->second.begin(), C->second.end()); - } - - // Iterate through the basic block, recording all users of each - // pairable instruction. - - BasicBlock::iterator E = BB.end(), EL = - BasicBlock::iterator(cast(PairableInsts.back())); - for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { - if (IsInPair.find(&*I) == IsInPair.end()) - continue; - - DenseSet Users; - AliasSetTracker WriteSet(*AA); - if (I->mayWriteToMemory()) - WriteSet.add(&*I); - - for (BasicBlock::iterator J = std::next(I); J != E; ++J) { - (void)trackUsesOfI(Users, WriteSet, &*I, &*J); - - if (J == EL) - break; - } - - for (DenseSet::iterator U = Users.begin(), E = Users.end(); - U != E; ++U) { - if (IsInPair.find(*U) == IsInPair.end()) continue; - PairableInstUsers.insert(ValuePair(&*I, *U)); - } - - if (I == EL) - break; - } - } - - // Returns true if an input to pair P is an output of pair Q and also an - // input of pair Q is an output of pair P. If this is the case, then these - // two pairs cannot be simultaneously fused. - bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, - DenseSet &PairableInstUsers, - DenseMap > *PairableInstUserMap, - DenseSet *PairableInstUserPairSet) { - // Two pairs are in conflict if they are mutual Users of eachother. - bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || - PairableInstUsers.count(ValuePair(P.first, Q.second)) || - PairableInstUsers.count(ValuePair(P.second, Q.first)) || - PairableInstUsers.count(ValuePair(P.second, Q.second)); - bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) || - PairableInstUsers.count(ValuePair(Q.first, P.second)) || - PairableInstUsers.count(ValuePair(Q.second, P.first)) || - PairableInstUsers.count(ValuePair(Q.second, P.second)); - if (PairableInstUserMap) { - // FIXME: The expensive part of the cycle check is not so much the cycle - // check itself but this edge insertion procedure. This needs some - // profiling and probably a different data structure. - if (PUsesQ) { - if (PairableInstUserPairSet->insert(VPPair(Q, P)).second) - (*PairableInstUserMap)[Q].push_back(P); - } - if (QUsesP) { - if (PairableInstUserPairSet->insert(VPPair(P, Q)).second) - (*PairableInstUserMap)[P].push_back(Q); - } - } - - return (QUsesP && PUsesQ); - } - - // This function walks the use graph of current pairs to see if, starting - // from P, the walk returns to P. - bool BBVectorize::pairWillFormCycle(ValuePair P, - DenseMap > &PairableInstUserMap, - DenseSet &CurrentPairs) { - DEBUG(if (DebugCycleCheck) - dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " - << *P.second << "\n"); - // A lookup table of visisted pairs is kept because the PairableInstUserMap - // contains non-direct associations. - DenseSet Visited; - SmallVector Q; - // General depth-first post-order traversal: - Q.push_back(P); - do { - ValuePair QTop = Q.pop_back_val(); - Visited.insert(QTop); - - DEBUG(if (DebugCycleCheck) - dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " - << *QTop.second << "\n"); - DenseMap >::iterator QQ = - PairableInstUserMap.find(QTop); - if (QQ == PairableInstUserMap.end()) - continue; - - for (std::vector::iterator C = QQ->second.begin(), - CE = QQ->second.end(); C != CE; ++C) { - if (*C == P) { - DEBUG(dbgs() - << "BBV: rejected to prevent non-trivial cycle formation: " - << QTop.first << " <-> " << C->second << "\n"); - return true; - } - - if (CurrentPairs.count(*C) && !Visited.count(*C)) - Q.push_back(*C); - } - } while (!Q.empty()); - - return false; - } - - // This function builds the initial dag of connected pairs with the - // pair J at the root. - void BBVectorize::buildInitialDAGFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseSet &PairableInstUsers, - DenseMap &ChosenPairs, - DenseMap &DAG, ValuePair J) { - // Each of these pairs is viewed as the root node of a DAG. The DAG - // is then walked (depth-first). As this happens, we keep track of - // the pairs that compose the DAG and the maximum depth of the DAG. - SmallVector Q; - // General depth-first post-order traversal: - Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); - do { - ValuePairWithDepth QTop = Q.back(); - - // Push each child onto the queue: - bool MoreChildren = false; - size_t MaxChildDepth = QTop.second; - DenseMap >::iterator QQ = - ConnectedPairs.find(QTop.first); - if (QQ != ConnectedPairs.end()) - for (std::vector::iterator k = QQ->second.begin(), - ke = QQ->second.end(); k != ke; ++k) { - // Make sure that this child pair is still a candidate: - if (CandidatePairsSet.count(*k)) { - DenseMap::iterator C = DAG.find(*k); - if (C == DAG.end()) { - size_t d = getDepthFactor(k->first); - Q.push_back(ValuePairWithDepth(*k, QTop.second+d)); - MoreChildren = true; - } else { - MaxChildDepth = std::max(MaxChildDepth, C->second); - } - } - } - - if (!MoreChildren) { - // Record the current pair as part of the DAG: - DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); - Q.pop_back(); - } - } while (!Q.empty()); - } - - // Given some initial dag, prune it by removing conflicting pairs (pairs - // that cannot be simultaneously chosen for vectorization). - void BBVectorize::pruneDAGFor( - DenseMap > &CandidatePairs, - std::vector &PairableInsts, - DenseMap > &ConnectedPairs, - DenseSet &PairableInstUsers, - DenseMap > &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseMap &DAG, - DenseSet &PrunedDAG, ValuePair J, - bool UseCycleCheck) { - SmallVector Q; - // General depth-first post-order traversal: - Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); - do { - ValuePairWithDepth QTop = Q.pop_back_val(); - PrunedDAG.insert(QTop.first); - - // Visit each child, pruning as necessary... - SmallVector BestChildren; - DenseMap >::iterator QQ = - ConnectedPairs.find(QTop.first); - if (QQ == ConnectedPairs.end()) - continue; - - for (std::vector::iterator K = QQ->second.begin(), - KE = QQ->second.end(); K != KE; ++K) { - DenseMap::iterator C = DAG.find(*K); - if (C == DAG.end()) continue; - - // This child is in the DAG, now we need to make sure it is the - // best of any conflicting children. There could be multiple - // conflicting children, so first, determine if we're keeping - // this child, then delete conflicting children as necessary. - - // It is also necessary to guard against pairing-induced - // dependencies. Consider instructions a .. x .. y .. b - // such that (a,b) are to be fused and (x,y) are to be fused - // but a is an input to x and b is an output from y. This - // means that y cannot be moved after b but x must be moved - // after b for (a,b) to be fused. In other words, after - // fusing (a,b) we have y .. a/b .. x where y is an input - // to a/b and x is an output to a/b: x and y can no longer - // be legally fused. To prevent this condition, we must - // make sure that a child pair added to the DAG is not - // both an input and output of an already-selected pair. - - // Pairing-induced dependencies can also form from more complicated - // cycles. The pair vs. pair conflicts are easy to check, and so - // that is done explicitly for "fast rejection", and because for - // child vs. child conflicts, we may prefer to keep the current - // pair in preference to the already-selected child. - DenseSet CurrentPairs; - - bool CanAdd = true; - for (SmallVectorImpl::iterator C2 - = BestChildren.begin(), E2 = BestChildren.end(); - C2 != E2; ++C2) { - if (C2->first.first == C->first.first || - C2->first.first == C->first.second || - C2->first.second == C->first.first || - C2->first.second == C->first.second || - pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : nullptr, - UseCycleCheck ? &PairableInstUserPairSet - : nullptr)) { - if (C2->second >= C->second) { - CanAdd = false; - break; - } - - CurrentPairs.insert(C2->first); - } - } - if (!CanAdd) continue; - - // Even worse, this child could conflict with another node already - // selected for the DAG. If that is the case, ignore this child. - for (DenseSet::iterator T = PrunedDAG.begin(), - E2 = PrunedDAG.end(); T != E2; ++T) { - if (T->first == C->first.first || - T->first == C->first.second || - T->second == C->first.first || - T->second == C->first.second || - pairsConflict(*T, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : nullptr, - UseCycleCheck ? &PairableInstUserPairSet - : nullptr)) { - CanAdd = false; - break; - } - - CurrentPairs.insert(*T); - } - if (!CanAdd) continue; - - // And check the queue too... - for (SmallVectorImpl::iterator C2 = Q.begin(), - E2 = Q.end(); C2 != E2; ++C2) { - if (C2->first.first == C->first.first || - C2->first.first == C->first.second || - C2->first.second == C->first.first || - C2->first.second == C->first.second || - pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : nullptr, - UseCycleCheck ? &PairableInstUserPairSet - : nullptr)) { - CanAdd = false; - break; - } - - CurrentPairs.insert(C2->first); - } - if (!CanAdd) continue; - - // Last but not least, check for a conflict with any of the - // already-chosen pairs. - for (DenseMap::iterator C2 = - ChosenPairs.begin(), E2 = ChosenPairs.end(); - C2 != E2; ++C2) { - if (pairsConflict(*C2, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : nullptr, - UseCycleCheck ? &PairableInstUserPairSet - : nullptr)) { - CanAdd = false; - break; - } - - CurrentPairs.insert(*C2); - } - if (!CanAdd) continue; - - // To check for non-trivial cycles formed by the addition of the - // current pair we've formed a list of all relevant pairs, now use a - // graph walk to check for a cycle. We start from the current pair and - // walk the use dag to see if we again reach the current pair. If we - // do, then the current pair is rejected. - - // FIXME: It may be more efficient to use a topological-ordering - // algorithm to improve the cycle check. This should be investigated. - if (UseCycleCheck && - pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs)) - continue; - - // This child can be added, but we may have chosen it in preference - // to an already-selected child. Check for this here, and if a - // conflict is found, then remove the previously-selected child - // before adding this one in its place. - for (SmallVectorImpl::iterator C2 - = BestChildren.begin(); C2 != BestChildren.end();) { - if (C2->first.first == C->first.first || - C2->first.first == C->first.second || - C2->first.second == C->first.first || - C2->first.second == C->first.second || - pairsConflict(C2->first, C->first, PairableInstUsers)) - C2 = BestChildren.erase(C2); - else - ++C2; - } - - BestChildren.push_back(ValuePairWithDepth(C->first, C->second)); - } - - for (SmallVectorImpl::iterator C - = BestChildren.begin(), E2 = BestChildren.end(); - C != E2; ++C) { - size_t DepthF = getDepthFactor(C->first.first); - Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF)); - } - } while (!Q.empty()); - } - - // This function finds the best dag of mututally-compatible connected - // pairs, given the choice of root pairs as an iterator range. - void BBVectorize::findBestDAGFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - DenseMap > &ConnectedPairs, - DenseMap > &ConnectedPairDeps, - DenseSet &PairableInstUsers, - DenseMap > &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseSet &BestDAG, size_t &BestMaxDepth, - int &BestEffSize, Value *II, std::vector&JJ, - bool UseCycleCheck) { - for (std::vector::iterator J = JJ.begin(), JE = JJ.end(); - J != JE; ++J) { - ValuePair IJ(II, *J); - if (!CandidatePairsSet.count(IJ)) - continue; - - // Before going any further, make sure that this pair does not - // conflict with any already-selected pairs (see comment below - // near the DAG pruning for more details). - DenseSet ChosenPairSet; - bool DoesConflict = false; - for (DenseMap::iterator C = ChosenPairs.begin(), - E = ChosenPairs.end(); C != E; ++C) { - if (pairsConflict(*C, IJ, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : nullptr, - UseCycleCheck ? &PairableInstUserPairSet : nullptr)) { - DoesConflict = true; - break; - } - - ChosenPairSet.insert(*C); - } - if (DoesConflict) continue; - - if (UseCycleCheck && - pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet)) - continue; - - DenseMap DAG; - buildInitialDAGFor(CandidatePairs, CandidatePairsSet, - PairableInsts, ConnectedPairs, - PairableInstUsers, ChosenPairs, DAG, IJ); - - // Because we'll keep the child with the largest depth, the largest - // depth is still the same in the unpruned DAG. - size_t MaxDepth = DAG.lookup(IJ); - - DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {" - << *IJ.first << " <-> " << *IJ.second << "} of depth " << - MaxDepth << " and size " << DAG.size() << "\n"); - - // At this point the DAG has been constructed, but, may contain - // contradictory children (meaning that different children of - // some dag node may be attempting to fuse the same instruction). - // So now we walk the dag again, in the case of a conflict, - // keep only the child with the largest depth. To break a tie, - // favor the first child. - - DenseSet PrunedDAG; - pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs, - PairableInstUsers, PairableInstUserMap, - PairableInstUserPairSet, - ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck); - - int EffSize = 0; - if (TTI) { - DenseSet PrunedDAGInstrs; - for (DenseSet::iterator S = PrunedDAG.begin(), - E = PrunedDAG.end(); S != E; ++S) { - PrunedDAGInstrs.insert(S->first); - PrunedDAGInstrs.insert(S->second); - } - - // The set of pairs that have already contributed to the total cost. - DenseSet IncomingPairs; - - // If the cost model were perfect, this might not be necessary; but we - // need to make sure that we don't get stuck vectorizing our own - // shuffle chains. - bool HasNontrivialInsts = false; - - // The node weights represent the cost savings associated with - // fusing the pair of instructions. - for (DenseSet::iterator S = PrunedDAG.begin(), - E = PrunedDAG.end(); S != E; ++S) { - if (!isa(S->first) && - !isa(S->first) && - !isa(S->first)) - HasNontrivialInsts = true; - - bool FlipOrder = false; - - if (getDepthFactor(S->first)) { - int ESContrib = CandidatePairCostSavings.find(*S)->second; - DEBUG(if (DebugPairSelection) dbgs() << "\tweight {" - << *S->first << " <-> " << *S->second << "} = " << - ESContrib << "\n"); - EffSize += ESContrib; - } - - // The edge weights contribute in a negative sense: they represent - // the cost of shuffles. - DenseMap >::iterator SS = - ConnectedPairDeps.find(*S); - if (SS != ConnectedPairDeps.end()) { - unsigned NumDepsDirect = 0, NumDepsSwap = 0; - for (std::vector::iterator T = SS->second.begin(), - TE = SS->second.end(); T != TE; ++T) { - VPPair Q(*S, *T); - if (!PrunedDAG.count(Q.second)) - continue; - DenseMap::iterator R = - PairConnectionTypes.find(VPPair(Q.second, Q.first)); - assert(R != PairConnectionTypes.end() && - "Cannot find pair connection type"); - if (R->second == PairConnectionDirect) - ++NumDepsDirect; - else if (R->second == PairConnectionSwap) - ++NumDepsSwap; - } - - // If there are more swaps than direct connections, then - // the pair order will be flipped during fusion. So the real - // number of swaps is the minimum number. - FlipOrder = !FixedOrderPairs.count(*S) && - ((NumDepsSwap > NumDepsDirect) || - FixedOrderPairs.count(ValuePair(S->second, S->first))); - - for (std::vector::iterator T = SS->second.begin(), - TE = SS->second.end(); T != TE; ++T) { - VPPair Q(*S, *T); - if (!PrunedDAG.count(Q.second)) - continue; - DenseMap::iterator R = - PairConnectionTypes.find(VPPair(Q.second, Q.first)); - assert(R != PairConnectionTypes.end() && - "Cannot find pair connection type"); - Type *Ty1 = Q.second.first->getType(), - *Ty2 = Q.second.second->getType(); - Type *VTy = getVecTypeForPair(Ty1, Ty2); - if ((R->second == PairConnectionDirect && FlipOrder) || - (R->second == PairConnectionSwap && !FlipOrder) || - R->second == PairConnectionSplat) { - int ESContrib = (int) getInstrCost(Instruction::ShuffleVector, - VTy, VTy); - - if (VTy->getVectorNumElements() == 2) { - if (R->second == PairConnectionSplat) - ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( - TargetTransformInfo::SK_Broadcast, VTy)); - else - ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( - TargetTransformInfo::SK_Reverse, VTy)); - } - - DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << - *Q.second.first << " <-> " << *Q.second.second << - "} -> {" << - *S->first << " <-> " << *S->second << "} = " << - ESContrib << "\n"); - EffSize -= ESContrib; - } - } - } - - // Compute the cost of outgoing edges. We assume that edges outgoing - // to shuffles, inserts or extracts can be merged, and so contribute - // no additional cost. - if (!S->first->getType()->isVoidTy()) { - Type *Ty1 = S->first->getType(), - *Ty2 = S->second->getType(); - Type *VTy = getVecTypeForPair(Ty1, Ty2); - - bool NeedsExtraction = false; - for (User *U : S->first->users()) { - if (ShuffleVectorInst *SI = dyn_cast(U)) { - // Shuffle can be folded if it has no other input - if (isa(SI->getOperand(1))) - continue; - } - if (isa(U)) - continue; - if (PrunedDAGInstrs.count(U)) - continue; - NeedsExtraction = true; - break; - } - - if (NeedsExtraction) { - int ESContrib; - if (Ty1->isVectorTy()) { - ESContrib = (int) getInstrCost(Instruction::ShuffleVector, - Ty1, VTy); - ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( - TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1)); - } else - ESContrib = (int) TTI->getVectorInstrCost( - Instruction::ExtractElement, VTy, 0); - - DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << - *S->first << "} = " << ESContrib << "\n"); - EffSize -= ESContrib; - } - - NeedsExtraction = false; - for (User *U : S->second->users()) { - if (ShuffleVectorInst *SI = dyn_cast(U)) { - // Shuffle can be folded if it has no other input - if (isa(SI->getOperand(1))) - continue; - } - if (isa(U)) - continue; - if (PrunedDAGInstrs.count(U)) - continue; - NeedsExtraction = true; - break; - } - - if (NeedsExtraction) { - int ESContrib; - if (Ty2->isVectorTy()) { - ESContrib = (int) getInstrCost(Instruction::ShuffleVector, - Ty2, VTy); - ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( - TargetTransformInfo::SK_ExtractSubvector, VTy, - Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2)); - } else - ESContrib = (int) TTI->getVectorInstrCost( - Instruction::ExtractElement, VTy, 1); - DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << - *S->second << "} = " << ESContrib << "\n"); - EffSize -= ESContrib; - } - } - - // Compute the cost of incoming edges. - if (!isa(S->first) && !isa(S->first)) { - Instruction *S1 = cast(S->first), - *S2 = cast(S->second); - for (unsigned o = 0; o < S1->getNumOperands(); ++o) { - Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o); - - // Combining constants into vector constants (or small vector - // constants into larger ones are assumed free). - if (isa(O1) && isa(O2)) - continue; - - if (FlipOrder) - std::swap(O1, O2); - - ValuePair VP = ValuePair(O1, O2); - ValuePair VPR = ValuePair(O2, O1); - - // Internal edges are not handled here. - if (PrunedDAG.count(VP) || PrunedDAG.count(VPR)) - continue; - - Type *Ty1 = O1->getType(), - *Ty2 = O2->getType(); - Type *VTy = getVecTypeForPair(Ty1, Ty2); - - // Combining vector operations of the same type is also assumed - // folded with other operations. - if (Ty1 == Ty2) { - // If both are insert elements, then both can be widened. - InsertElementInst *IEO1 = dyn_cast(O1), - *IEO2 = dyn_cast(O2); - if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2)) - continue; - // If both are extract elements, and both have the same input - // type, then they can be replaced with a shuffle - ExtractElementInst *EIO1 = dyn_cast(O1), - *EIO2 = dyn_cast(O2); - if (EIO1 && EIO2 && - EIO1->getOperand(0)->getType() == - EIO2->getOperand(0)->getType()) - continue; - // If both are a shuffle with equal operand types and only two - // unqiue operands, then they can be replaced with a single - // shuffle - ShuffleVectorInst *SIO1 = dyn_cast(O1), - *SIO2 = dyn_cast(O2); - if (SIO1 && SIO2 && - SIO1->getOperand(0)->getType() == - SIO2->getOperand(0)->getType()) { - SmallSet SIOps; - SIOps.insert(SIO1->getOperand(0)); - SIOps.insert(SIO1->getOperand(1)); - SIOps.insert(SIO2->getOperand(0)); - SIOps.insert(SIO2->getOperand(1)); - if (SIOps.size() <= 2) - continue; - } - } - - int ESContrib; - // This pair has already been formed. - if (IncomingPairs.count(VP)) { - continue; - } else if (IncomingPairs.count(VPR)) { - ESContrib = (int) getInstrCost(Instruction::ShuffleVector, - VTy, VTy); - - if (VTy->getVectorNumElements() == 2) - ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( - TargetTransformInfo::SK_Reverse, VTy)); - } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) { - ESContrib = (int) TTI->getVectorInstrCost( - Instruction::InsertElement, VTy, 0); - ESContrib += (int) TTI->getVectorInstrCost( - Instruction::InsertElement, VTy, 1); - } else if (!Ty1->isVectorTy()) { - // O1 needs to be inserted into a vector of size O2, and then - // both need to be shuffled together. - ESContrib = (int) TTI->getVectorInstrCost( - Instruction::InsertElement, Ty2, 0); - ESContrib += (int) getInstrCost(Instruction::ShuffleVector, - VTy, Ty2); - } else if (!Ty2->isVectorTy()) { - // O2 needs to be inserted into a vector of size O1, and then - // both need to be shuffled together. - ESContrib = (int) TTI->getVectorInstrCost( - Instruction::InsertElement, Ty1, 0); - ESContrib += (int) getInstrCost(Instruction::ShuffleVector, - VTy, Ty1); - } else { - Type *TyBig = Ty1, *TySmall = Ty2; - if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements()) - std::swap(TyBig, TySmall); - - ESContrib = (int) getInstrCost(Instruction::ShuffleVector, - VTy, TyBig); - if (TyBig != TySmall) - ESContrib += (int) getInstrCost(Instruction::ShuffleVector, - TyBig, TySmall); - } - - DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" - << *O1 << " <-> " << *O2 << "} = " << - ESContrib << "\n"); - EffSize -= ESContrib; - IncomingPairs.insert(VP); - } - } - } - - if (!HasNontrivialInsts) { - DEBUG(if (DebugPairSelection) dbgs() << - "\tNo non-trivial instructions in DAG;" - " override to zero effective size\n"); - EffSize = 0; - } - } else { - for (DenseSet::iterator S = PrunedDAG.begin(), - E = PrunedDAG.end(); S != E; ++S) - EffSize += (int) getDepthFactor(S->first); - } - - DEBUG(if (DebugPairSelection) - dbgs() << "BBV: found pruned DAG for pair {" - << *IJ.first << " <-> " << *IJ.second << "} of depth " << - MaxDepth << " and size " << PrunedDAG.size() << - " (effective size: " << EffSize << ")\n"); - if (((TTI && !UseChainDepthWithTI) || - MaxDepth >= Config.ReqChainDepth) && - EffSize > 0 && EffSize > BestEffSize) { - BestMaxDepth = MaxDepth; - BestEffSize = EffSize; - BestDAG = PrunedDAG; - } - } - } - - // Given the list of candidate pairs, this function selects those - // that will be fused into vector instructions. - void BBVectorize::choosePairs( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - DenseMap > &ConnectedPairs, - DenseMap > &ConnectedPairDeps, - DenseSet &PairableInstUsers, - DenseMap& ChosenPairs) { - bool UseCycleCheck = - CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck; - - DenseMap > CandidatePairs2; - for (DenseSet::iterator I = CandidatePairsSet.begin(), - E = CandidatePairsSet.end(); I != E; ++I) { - std::vector &JJ = CandidatePairs2[I->second]; - if (JJ.empty()) JJ.reserve(32); - JJ.push_back(I->first); - } - - DenseMap > PairableInstUserMap; - DenseSet PairableInstUserPairSet; - for (std::vector::iterator I = PairableInsts.begin(), - E = PairableInsts.end(); I != E; ++I) { - // The number of possible pairings for this variable: - size_t NumChoices = CandidatePairs.lookup(*I).size(); - if (!NumChoices) continue; - - std::vector &JJ = CandidatePairs[*I]; - - // The best pair to choose and its dag: - size_t BestMaxDepth = 0; - int BestEffSize = 0; - DenseSet BestDAG; - findBestDAGFor(CandidatePairs, CandidatePairsSet, - CandidatePairCostSavings, - PairableInsts, FixedOrderPairs, PairConnectionTypes, - ConnectedPairs, ConnectedPairDeps, - PairableInstUsers, PairableInstUserMap, - PairableInstUserPairSet, ChosenPairs, - BestDAG, BestMaxDepth, BestEffSize, *I, JJ, - UseCycleCheck); - - if (BestDAG.empty()) - continue; - - // A dag has been chosen (or not) at this point. If no dag was - // chosen, then this instruction, I, cannot be paired (and is no longer - // considered). - - DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: " - << *cast(*I) << "\n"); - - for (DenseSet::iterator S = BestDAG.begin(), - SE2 = BestDAG.end(); S != SE2; ++S) { - // Insert the members of this dag into the list of chosen pairs. - ChosenPairs.insert(ValuePair(S->first, S->second)); - DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << - *S->second << "\n"); - - // Remove all candidate pairs that have values in the chosen dag. - std::vector &KK = CandidatePairs[S->first]; - for (std::vector::iterator K = KK.begin(), KE = KK.end(); - K != KE; ++K) { - if (*K == S->second) - continue; - - CandidatePairsSet.erase(ValuePair(S->first, *K)); - } - - std::vector &LL = CandidatePairs2[S->second]; - for (std::vector::iterator L = LL.begin(), LE = LL.end(); - L != LE; ++L) { - if (*L == S->first) - continue; - - CandidatePairsSet.erase(ValuePair(*L, S->second)); - } - - std::vector &MM = CandidatePairs[S->second]; - for (std::vector::iterator M = MM.begin(), ME = MM.end(); - M != ME; ++M) { - assert(*M != S->first && "Flipped pair in candidate list?"); - CandidatePairsSet.erase(ValuePair(S->second, *M)); - } - - std::vector &NN = CandidatePairs2[S->first]; - for (std::vector::iterator N = NN.begin(), NE = NN.end(); - N != NE; ++N) { - assert(*N != S->second && "Flipped pair in candidate list?"); - CandidatePairsSet.erase(ValuePair(*N, S->first)); - } - } - } - - DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n"); - } - - std::string getReplacementName(Instruction *I, bool IsInput, unsigned o, - unsigned n = 0) { - if (!I->hasName()) - return ""; - - return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) + - (n > 0 ? "." + utostr(n) : "")).str(); - } - - // Returns the value that is to be used as the pointer input to the vector - // instruction that fuses I with J. - Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, - Instruction *I, Instruction *J, unsigned o) { - Value *IPtr, *JPtr; - unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; - int64_t OffsetInElmts; - - // Note: the analysis might fail here, that is why the pair order has - // been precomputed (OffsetInElmts must be unused here). - (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, - IAddressSpace, JAddressSpace, - OffsetInElmts, false); - - // The pointer value is taken to be the one with the lowest offset. - Value *VPtr = IPtr; - - Type *ArgTypeI = IPtr->getType()->getPointerElementType(); - Type *ArgTypeJ = JPtr->getType()->getPointerElementType(); - Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); - Type *VArgPtrType - = PointerType::get(VArgType, - IPtr->getType()->getPointerAddressSpace()); - return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), - /* insert before */ I); - } - - void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, - unsigned MaskOffset, unsigned NumInElem, - unsigned NumInElem1, unsigned IdxOffset, - std::vector &Mask) { - unsigned NumElem1 = J->getType()->getVectorNumElements(); - for (unsigned v = 0; v < NumElem1; ++v) { - int m = cast(J)->getMaskValue(v); - if (m < 0) { - Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); - } else { - unsigned mm = m + (int) IdxOffset; - if (m >= (int) NumInElem1) - mm += (int) NumInElem; - - Mask[v+MaskOffset] = - ConstantInt::get(Type::getInt32Ty(Context), mm); - } - } - } - - // Returns the value that is to be used as the vector-shuffle mask to the - // vector instruction that fuses I with J. - Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context, - Instruction *I, Instruction *J) { - // This is the shuffle mask. We need to append the second - // mask to the first, and the numbers need to be adjusted. - - Type *ArgTypeI = I->getType(); - Type *ArgTypeJ = J->getType(); - Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); - - unsigned NumElemI = ArgTypeI->getVectorNumElements(); - - // Get the total number of elements in the fused vector type. - // By definition, this must equal the number of elements in - // the final mask. - unsigned NumElem = VArgType->getVectorNumElements(); - std::vector Mask(NumElem); - - Type *OpTypeI = I->getOperand(0)->getType(); - unsigned NumInElemI = OpTypeI->getVectorNumElements(); - Type *OpTypeJ = J->getOperand(0)->getType(); - unsigned NumInElemJ = OpTypeJ->getVectorNumElements(); - - // The fused vector will be: - // ----------------------------------------------------- - // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ | - // ----------------------------------------------------- - // from which we'll extract NumElem total elements (where the first NumElemI - // of them come from the mask in I and the remainder come from the mask - // in J. - - // For the mask from the first pair... - fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI, - 0, Mask); - - // For the mask from the second pair... - fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ, - NumInElemI, Mask); - - return ConstantVector::get(Mask); - } - - bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, Value *&LOp, - unsigned numElemL, - Type *ArgTypeL, Type *ArgTypeH, - bool IBeforeJ, unsigned IdxOff) { - bool ExpandedIEChain = false; - if (InsertElementInst *LIE = dyn_cast(LOp)) { - // If we have a pure insertelement chain, then this can be rewritten - // into a chain that directly builds the larger type. - if (isPureIEChain(LIE)) { - SmallVector VectElemts(numElemL, - UndefValue::get(ArgTypeL->getScalarType())); - InsertElementInst *LIENext = LIE; - do { - unsigned Idx = - cast(LIENext->getOperand(2))->getSExtValue(); - VectElemts[Idx] = LIENext->getOperand(1); - } while ((LIENext = - dyn_cast(LIENext->getOperand(0)))); - - LIENext = nullptr; - Value *LIEPrev = UndefValue::get(ArgTypeH); - for (unsigned i = 0; i < numElemL; ++i) { - if (isa(VectElemts[i])) continue; - LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], - ConstantInt::get(Type::getInt32Ty(Context), - i + IdxOff), - getReplacementName(IBeforeJ ? I : J, - true, o, i+1)); - LIENext->insertBefore(IBeforeJ ? J : I); - LIEPrev = LIENext; - } - - LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH); - ExpandedIEChain = true; - } - } - - return ExpandedIEChain; - } - - static unsigned getNumScalarElements(Type *Ty) { - if (VectorType *VecTy = dyn_cast(Ty)) - return VecTy->getNumElements(); - return 1; - } - - // Returns the value to be used as the specified operand of the vector - // instruction that fuses I with J. - Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool IBeforeJ) { - Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); - Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); - - // Compute the fused vector type for this operand - Type *ArgTypeI = I->getOperand(o)->getType(); - Type *ArgTypeJ = J->getOperand(o)->getType(); - VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); - - Instruction *L = I, *H = J; - Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; - - unsigned numElemL = getNumScalarElements(ArgTypeL); - unsigned numElemH = getNumScalarElements(ArgTypeH); - - Value *LOp = L->getOperand(o); - Value *HOp = H->getOperand(o); - unsigned numElem = VArgType->getNumElements(); - - // First, we check if we can reuse the "original" vector outputs (if these - // exist). We might need a shuffle. - ExtractElementInst *LEE = dyn_cast(LOp); - ExtractElementInst *HEE = dyn_cast(HOp); - ShuffleVectorInst *LSV = dyn_cast(LOp); - ShuffleVectorInst *HSV = dyn_cast(HOp); - - // FIXME: If we're fusing shuffle instructions, then we can't apply this - // optimization. The input vectors to the shuffle might be a different - // length from the shuffle outputs. Unfortunately, the replacement - // shuffle mask has already been formed, and the mask entries are sensitive - // to the sizes of the inputs. - bool IsSizeChangeShuffle = - isa(L) && - (LOp->getType() != L->getType() || HOp->getType() != H->getType()); - - if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { - // We can have at most two unique vector inputs. - bool CanUseInputs = true; - Value *I1, *I2 = nullptr; - if (LEE) { - I1 = LEE->getOperand(0); - } else { - I1 = LSV->getOperand(0); - I2 = LSV->getOperand(1); - if (I2 == I1 || isa(I2)) - I2 = nullptr; - } - - if (HEE) { - Value *I3 = HEE->getOperand(0); - if (!I2 && I3 != I1) - I2 = I3; - else if (I3 != I1 && I3 != I2) - CanUseInputs = false; - } else { - Value *I3 = HSV->getOperand(0); - if (!I2 && I3 != I1) - I2 = I3; - else if (I3 != I1 && I3 != I2) - CanUseInputs = false; - - if (CanUseInputs) { - Value *I4 = HSV->getOperand(1); - if (!isa(I4)) { - if (!I2 && I4 != I1) - I2 = I4; - else if (I4 != I1 && I4 != I2) - CanUseInputs = false; - } - } - } - - if (CanUseInputs) { - unsigned LOpElem = - cast(LOp)->getOperand(0)->getType() - ->getVectorNumElements(); - - unsigned HOpElem = - cast(HOp)->getOperand(0)->getType() - ->getVectorNumElements(); - - // We have one or two input vectors. We need to map each index of the - // operands to the index of the original vector. - SmallVector, 8> II(numElem); - for (unsigned i = 0; i < numElemL; ++i) { - int Idx, INum; - if (LEE) { - Idx = - cast(LEE->getOperand(1))->getSExtValue(); - INum = LEE->getOperand(0) == I1 ? 0 : 1; - } else { - Idx = LSV->getMaskValue(i); - if (Idx < (int) LOpElem) { - INum = LSV->getOperand(0) == I1 ? 0 : 1; - } else { - Idx -= LOpElem; - INum = LSV->getOperand(1) == I1 ? 0 : 1; - } - } - - II[i] = std::pair(Idx, INum); - } - for (unsigned i = 0; i < numElemH; ++i) { - int Idx, INum; - if (HEE) { - Idx = - cast(HEE->getOperand(1))->getSExtValue(); - INum = HEE->getOperand(0) == I1 ? 0 : 1; - } else { - Idx = HSV->getMaskValue(i); - if (Idx < (int) HOpElem) { - INum = HSV->getOperand(0) == I1 ? 0 : 1; - } else { - Idx -= HOpElem; - INum = HSV->getOperand(1) == I1 ? 0 : 1; - } - } - - II[i + numElemL] = std::pair(Idx, INum); - } - - // We now have an array which tells us from which index of which - // input vector each element of the operand comes. - VectorType *I1T = cast(I1->getType()); - unsigned I1Elem = I1T->getNumElements(); - - if (!I2) { - // In this case there is only one underlying vector input. Check for - // the trivial case where we can use the input directly. - if (I1Elem == numElem) { - bool ElemInOrder = true; - for (unsigned i = 0; i < numElem; ++i) { - if (II[i].first != (int) i && II[i].first != -1) { - ElemInOrder = false; - break; - } - } - - if (ElemInOrder) - return I1; - } - - // A shuffle is needed. - std::vector Mask(numElem); - for (unsigned i = 0; i < numElem; ++i) { - int Idx = II[i].first; - if (Idx == -1) - Mask[i] = UndefValue::get(Type::getInt32Ty(Context)); - else - Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx); - } - - Instruction *S = - new ShuffleVectorInst(I1, UndefValue::get(I1T), - ConstantVector::get(Mask), - getReplacementName(IBeforeJ ? I : J, - true, o)); - S->insertBefore(IBeforeJ ? J : I); - return S; - } - - VectorType *I2T = cast(I2->getType()); - unsigned I2Elem = I2T->getNumElements(); - - // This input comes from two distinct vectors. The first step is to - // make sure that both vectors are the same length. If not, the - // smaller one will need to grow before they can be shuffled together. - if (I1Elem < I2Elem) { - std::vector Mask(I2Elem); - unsigned v = 0; - for (; v < I1Elem; ++v) - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - for (; v < I2Elem; ++v) - Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); - - Instruction *NewI1 = - new ShuffleVectorInst(I1, UndefValue::get(I1T), - ConstantVector::get(Mask), - getReplacementName(IBeforeJ ? I : J, - true, o, 1)); - NewI1->insertBefore(IBeforeJ ? J : I); - I1 = NewI1; - I1Elem = I2Elem; - } else if (I1Elem > I2Elem) { - std::vector Mask(I1Elem); - unsigned v = 0; - for (; v < I2Elem; ++v) - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - for (; v < I1Elem; ++v) - Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); - - Instruction *NewI2 = - new ShuffleVectorInst(I2, UndefValue::get(I2T), - ConstantVector::get(Mask), - getReplacementName(IBeforeJ ? I : J, - true, o, 1)); - NewI2->insertBefore(IBeforeJ ? J : I); - I2 = NewI2; - } - - // Now that both I1 and I2 are the same length we can shuffle them - // together (and use the result). - std::vector Mask(numElem); - for (unsigned v = 0; v < numElem; ++v) { - if (II[v].first == -1) { - Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); - } else { - int Idx = II[v].first + II[v].second * I1Elem; - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); - } - } - - Instruction *NewOp = - new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), - getReplacementName(IBeforeJ ? I : J, true, o)); - NewOp->insertBefore(IBeforeJ ? J : I); - return NewOp; - } - } - - Type *ArgType = ArgTypeL; - if (numElemL < numElemH) { - if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, - ArgTypeL, VArgType, IBeforeJ, 1)) { - // This is another short-circuit case: we're combining a scalar into - // a vector that is formed by an IE chain. We've just expanded the IE - // chain, now insert the scalar and we're done. - - Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, - getReplacementName(IBeforeJ ? I : J, true, o)); - S->insertBefore(IBeforeJ ? J : I); - return S; - } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, - ArgTypeH, IBeforeJ)) { - // The two vector inputs to the shuffle must be the same length, - // so extend the smaller vector to be the same length as the larger one. - Instruction *NLOp; - if (numElemL > 1) { - - std::vector Mask(numElemH); - unsigned v = 0; - for (; v < numElemL; ++v) - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - for (; v < numElemH; ++v) - Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); - - NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), - ConstantVector::get(Mask), - getReplacementName(IBeforeJ ? I : J, - true, o, 1)); - } else { - NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, - getReplacementName(IBeforeJ ? I : J, - true, o, 1)); - } - - NLOp->insertBefore(IBeforeJ ? J : I); - LOp = NLOp; - } - - ArgType = ArgTypeH; - } else if (numElemL > numElemH) { - if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, - ArgTypeH, VArgType, IBeforeJ)) { - Instruction *S = - InsertElementInst::Create(LOp, HOp, - ConstantInt::get(Type::getInt32Ty(Context), - numElemL), - getReplacementName(IBeforeJ ? I : J, - true, o)); - S->insertBefore(IBeforeJ ? J : I); - return S; - } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, - ArgTypeL, IBeforeJ)) { - Instruction *NHOp; - if (numElemH > 1) { - std::vector Mask(numElemL); - unsigned v = 0; - for (; v < numElemH; ++v) - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - for (; v < numElemL; ++v) - Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); - - NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), - ConstantVector::get(Mask), - getReplacementName(IBeforeJ ? I : J, - true, o, 1)); - } else { - NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, - getReplacementName(IBeforeJ ? I : J, - true, o, 1)); - } - - NHOp->insertBefore(IBeforeJ ? J : I); - HOp = NHOp; - } - } - - if (ArgType->isVectorTy()) { - unsigned numElem = VArgType->getVectorNumElements(); - std::vector Mask(numElem); - for (unsigned v = 0; v < numElem; ++v) { - unsigned Idx = v; - // If the low vector was expanded, we need to skip the extra - // undefined entries. - if (v >= numElemL && numElemH > numElemL) - Idx += (numElemH - numElemL); - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); - } - - Instruction *BV = new ShuffleVectorInst(LOp, HOp, - ConstantVector::get(Mask), - getReplacementName(IBeforeJ ? I : J, true, o)); - BV->insertBefore(IBeforeJ ? J : I); - return BV; - } - - Instruction *BV1 = InsertElementInst::Create( - UndefValue::get(VArgType), LOp, CV0, - getReplacementName(IBeforeJ ? I : J, - true, o, 1)); - BV1->insertBefore(IBeforeJ ? J : I); - Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, - getReplacementName(IBeforeJ ? I : J, - true, o, 2)); - BV2->insertBefore(IBeforeJ ? J : I); - return BV2; - } - - // This function creates an array of values that will be used as the inputs - // to the vector instruction that fuses I with J. - void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, - Instruction *I, Instruction *J, - SmallVectorImpl &ReplacedOperands, - bool IBeforeJ) { - unsigned NumOperands = I->getNumOperands(); - - for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { - // Iterate backward so that we look at the store pointer - // first and know whether or not we need to flip the inputs. - - if (isa(I) || (o == 1 && isa(I))) { - // This is the pointer for a load/store instruction. - ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o); - continue; - } else if (isa(I)) { - Function *F = cast(I)->getCalledFunction(); - Intrinsic::ID IID = F->getIntrinsicID(); - if (o == NumOperands-1) { - BasicBlock &BB = *I->getParent(); - - Module *M = BB.getParent()->getParent(); - Type *ArgTypeI = I->getType(); - Type *ArgTypeJ = J->getType(); - Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); - - ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType); - continue; - } else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || - IID == Intrinsic::cttz) && o == 1) { - // The second argument of powi/ctlz/cttz is a single integer/constant - // and we've already checked that both arguments are equal. - // As a result, we just keep I's second argument. - ReplacedOperands[o] = I->getOperand(o); - continue; - } - } else if (isa(I) && o == NumOperands-1) { - ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); - continue; - } - - ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ); - } - } - - // This function creates two values that represent the outputs of the - // original I and J instructions. These are generally vector shuffles - // or extracts. In many cases, these will end up being unused and, thus, - // eliminated by later passes. - void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I, - Instruction *J, Instruction *K, - Instruction *&InsertionPt, - Instruction *&K1, Instruction *&K2) { - if (isa(I)) - return; - - Type *IType = I->getType(); - Type *JType = J->getType(); - - VectorType *VType = getVecTypeForPair(IType, JType); - unsigned numElem = VType->getNumElements(); - - unsigned numElemI = getNumScalarElements(IType); - unsigned numElemJ = getNumScalarElements(JType); - - if (IType->isVectorTy()) { - std::vector Mask1(numElemI), Mask2(numElemI); - for (unsigned v = 0; v < numElemI; ++v) { - Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ + v); - } - - K1 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get(Mask1), - getReplacementName(K, false, 1)); - } else { - Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); - K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1)); - } - - if (JType->isVectorTy()) { - std::vector Mask1(numElemJ), Mask2(numElemJ); - for (unsigned v = 0; v < numElemJ; ++v) { - Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI + v); - } - - K2 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get(Mask2), - getReplacementName(K, false, 2)); - } else { - Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem - 1); - K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2)); - } - - K1->insertAfter(K); - K2->insertAfter(K1); - InsertionPt = K2; - } - - // Move all uses of the function I (including pairing-induced uses) after J. - bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, - DenseSet &LoadMoveSetPairs, - Instruction *I, Instruction *J) { - // Skip to the first instruction past I. - BasicBlock::iterator L = std::next(BasicBlock::iterator(I)); - - DenseSet Users; - AliasSetTracker WriteSet(*AA); - if (I->mayWriteToMemory()) WriteSet.add(I); - - for (; cast(L) != J; ++L) - (void)trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs); - - assert(cast(L) == J && - "Tracking has not proceeded far enough to check for dependencies"); - // If J is now in the use set of I, then trackUsesOfI will return true - // and we have a dependency cycle (and the fusing operation must abort). - return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs); - } - - // Move all uses of the function I (including pairing-induced uses) after J. - void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, - DenseSet &LoadMoveSetPairs, - Instruction *&InsertionPt, - Instruction *I, Instruction *J) { - // Skip to the first instruction past I. - BasicBlock::iterator L = std::next(BasicBlock::iterator(I)); - - DenseSet Users; - AliasSetTracker WriteSet(*AA); - if (I->mayWriteToMemory()) WriteSet.add(I); - - for (; cast(L) != J;) { - if (trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs)) { - // Move this instruction - Instruction *InstToMove = &*L++; - - DEBUG(dbgs() << "BBV: moving: " << *InstToMove << - " to after " << *InsertionPt << "\n"); - InstToMove->removeFromParent(); - InstToMove->insertAfter(InsertionPt); - InsertionPt = InstToMove; - } else { - ++L; - } - } - } - - // Collect all load instruction that are in the move set of a given first - // pair member. These loads depend on the first instruction, I, and so need - // to be moved after J (the second instruction) when the pair is fused. - void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, - DenseMap &ChosenPairs, - DenseMap > &LoadMoveSet, - DenseSet &LoadMoveSetPairs, - Instruction *I) { - // Skip to the first instruction past I. - BasicBlock::iterator L = std::next(BasicBlock::iterator(I)); - - DenseSet Users; - AliasSetTracker WriteSet(*AA); - if (I->mayWriteToMemory()) WriteSet.add(I); - - // Note: We cannot end the loop when we reach J because J could be moved - // farther down the use chain by another instruction pairing. Also, J - // could be before I if this is an inverted input. - for (BasicBlock::iterator E = BB.end(); L != E; ++L) { - if (trackUsesOfI(Users, WriteSet, I, &*L)) { - if (L->mayReadFromMemory()) { - LoadMoveSet[&*L].push_back(I); - LoadMoveSetPairs.insert(ValuePair(&*L, I)); - } - } - } - } - - // In cases where both load/stores and the computation of their pointers - // are chosen for vectorization, we can end up in a situation where the - // aliasing analysis starts returning different query results as the - // process of fusing instruction pairs continues. Because the algorithm - // relies on finding the same use dags here as were found earlier, we'll - // need to precompute the necessary aliasing information here and then - // manually update it during the fusion process. - void BBVectorize::collectLoadMoveSet(BasicBlock &BB, - std::vector &PairableInsts, - DenseMap &ChosenPairs, - DenseMap > &LoadMoveSet, - DenseSet &LoadMoveSetPairs) { - for (std::vector::iterator PI = PairableInsts.begin(), - PIE = PairableInsts.end(); PI != PIE; ++PI) { - DenseMap::iterator P = ChosenPairs.find(*PI); - if (P == ChosenPairs.end()) continue; - - Instruction *I = cast(P->first); - collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, - LoadMoveSetPairs, I); - } - } - - // This function fuses the chosen instruction pairs into vector instructions, - // taking care preserve any needed scalar outputs and, then, it reorders the - // remaining instructions as needed (users of the first member of the pair - // need to be moved to after the location of the second member of the pair - // because the vector instruction is inserted in the location of the pair's - // second member). - void BBVectorize::fuseChosenPairs(BasicBlock &BB, - std::vector &PairableInsts, - DenseMap &ChosenPairs, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - DenseMap > &ConnectedPairs, - DenseMap > &ConnectedPairDeps) { - LLVMContext& Context = BB.getContext(); - - // During the vectorization process, the order of the pairs to be fused - // could be flipped. So we'll add each pair, flipped, into the ChosenPairs - // list. After a pair is fused, the flipped pair is removed from the list. - DenseSet FlippedPairs; - for (DenseMap::iterator P = ChosenPairs.begin(), - E = ChosenPairs.end(); P != E; ++P) - FlippedPairs.insert(ValuePair(P->second, P->first)); - for (DenseSet::iterator P = FlippedPairs.begin(), - E = FlippedPairs.end(); P != E; ++P) - ChosenPairs.insert(*P); - - DenseMap > LoadMoveSet; - DenseSet LoadMoveSetPairs; - collectLoadMoveSet(BB, PairableInsts, ChosenPairs, - LoadMoveSet, LoadMoveSetPairs); - - DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); - - for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { - DenseMap::iterator P = ChosenPairs.find(&*PI); - if (P == ChosenPairs.end()) { - ++PI; - continue; - } - - if (getDepthFactor(P->first) == 0) { - // These instructions are not really fused, but are tracked as though - // they are. Any case in which it would be interesting to fuse them - // will be taken care of by InstCombine. - --NumFusedOps; - ++PI; - continue; - } - - Instruction *I = cast(P->first), - *J = cast(P->second); - - DEBUG(dbgs() << "BBV: fusing: " << *I << - " <-> " << *J << "\n"); - - // Remove the pair and flipped pair from the list. - DenseMap::iterator FP = ChosenPairs.find(P->second); - assert(FP != ChosenPairs.end() && "Flipped pair not found in list"); - ChosenPairs.erase(FP); - ChosenPairs.erase(P); - - if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) { - DEBUG(dbgs() << "BBV: fusion of: " << *I << - " <-> " << *J << - " aborted because of non-trivial dependency cycle\n"); - --NumFusedOps; - ++PI; - continue; - } - - // If the pair must have the other order, then flip it. - bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I)); - if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) { - // This pair does not have a fixed order, and so we might want to - // flip it if that will yield fewer shuffles. We count the number - // of dependencies connected via swaps, and those directly connected, - // and flip the order if the number of swaps is greater. - bool OrigOrder = true; - DenseMap >::iterator IJ = - ConnectedPairDeps.find(ValuePair(I, J)); - if (IJ == ConnectedPairDeps.end()) { - IJ = ConnectedPairDeps.find(ValuePair(J, I)); - OrigOrder = false; - } - - if (IJ != ConnectedPairDeps.end()) { - unsigned NumDepsDirect = 0, NumDepsSwap = 0; - for (std::vector::iterator T = IJ->second.begin(), - TE = IJ->second.end(); T != TE; ++T) { - VPPair Q(IJ->first, *T); - DenseMap::iterator R = - PairConnectionTypes.find(VPPair(Q.second, Q.first)); - assert(R != PairConnectionTypes.end() && - "Cannot find pair connection type"); - if (R->second == PairConnectionDirect) - ++NumDepsDirect; - else if (R->second == PairConnectionSwap) - ++NumDepsSwap; - } - - if (!OrigOrder) - std::swap(NumDepsDirect, NumDepsSwap); - - if (NumDepsSwap > NumDepsDirect) { - FlipPairOrder = true; - DEBUG(dbgs() << "BBV: reordering pair: " << *I << - " <-> " << *J << "\n"); - } - } - } - - Instruction *L = I, *H = J; - if (FlipPairOrder) - std::swap(H, L); - - // If the pair being fused uses the opposite order from that in the pair - // connection map, then we need to flip the types. - DenseMap >::iterator HL = - ConnectedPairs.find(ValuePair(H, L)); - if (HL != ConnectedPairs.end()) - for (std::vector::iterator T = HL->second.begin(), - TE = HL->second.end(); T != TE; ++T) { - VPPair Q(HL->first, *T); - DenseMap::iterator R = PairConnectionTypes.find(Q); - assert(R != PairConnectionTypes.end() && - "Cannot find pair connection type"); - if (R->second == PairConnectionDirect) - R->second = PairConnectionSwap; - else if (R->second == PairConnectionSwap) - R->second = PairConnectionDirect; - } - - bool LBeforeH = !FlipPairOrder; - unsigned NumOperands = I->getNumOperands(); - SmallVector ReplacedOperands(NumOperands); - getReplacementInputsForPair(Context, L, H, ReplacedOperands, - LBeforeH); - - // Make a copy of the original operation, change its type to the vector - // type and replace its operands with the vector operands. - Instruction *K = L->clone(); - if (L->hasName()) - K->takeName(L); - else if (H->hasName()) - K->takeName(H); - - if (auto CS = CallSite(K)) { - SmallVector Tys; - FunctionType *Old = CS.getFunctionType(); - unsigned NumOld = Old->getNumParams(); - assert(NumOld <= ReplacedOperands.size()); - for (unsigned i = 0; i != NumOld; ++i) - Tys.push_back(ReplacedOperands[i]->getType()); - CS.mutateFunctionType( - FunctionType::get(getVecTypeForPair(L->getType(), H->getType()), - Tys, Old->isVarArg())); - } else if (!isa(K)) - K->mutateType(getVecTypeForPair(L->getType(), H->getType())); - - unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_group}; - combineMetadata(K, H, KnownIDs); - K->andIRFlags(H); - - for (unsigned o = 0; o < NumOperands; ++o) - K->setOperand(o, ReplacedOperands[o]); - - K->insertAfter(J); - - // Instruction insertion point: - Instruction *InsertionPt = K; - Instruction *K1 = nullptr, *K2 = nullptr; - replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); - - // The use dag of the first original instruction must be moved to after - // the location of the second instruction. The entire use dag of the - // first instruction is disjoint from the input dag of the second - // (by definition), and so commutes with it. - - moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J); - - if (!isa(I)) { - L->replaceAllUsesWith(K1); - H->replaceAllUsesWith(K2); - } - - // Instructions that may read from memory may be in the load move set. - // Once an instruction is fused, we no longer need its move set, and so - // the values of the map never need to be updated. However, when a load - // is fused, we need to merge the entries from both instructions in the - // pair in case those instructions were in the move set of some other - // yet-to-be-fused pair. The loads in question are the keys of the map. - if (I->mayReadFromMemory()) { - std::vector NewSetMembers; - DenseMap >::iterator II = - LoadMoveSet.find(I); - if (II != LoadMoveSet.end()) - for (std::vector::iterator N = II->second.begin(), - NE = II->second.end(); N != NE; ++N) - NewSetMembers.push_back(ValuePair(K, *N)); - DenseMap >::iterator JJ = - LoadMoveSet.find(J); - if (JJ != LoadMoveSet.end()) - for (std::vector::iterator N = JJ->second.begin(), - NE = JJ->second.end(); N != NE; ++N) - NewSetMembers.push_back(ValuePair(K, *N)); - for (std::vector::iterator A = NewSetMembers.begin(), - AE = NewSetMembers.end(); A != AE; ++A) { - LoadMoveSet[A->first].push_back(A->second); - LoadMoveSetPairs.insert(*A); - } - } - - // Before removing I, set the iterator to the next instruction. - PI = std::next(BasicBlock::iterator(I)); - if (cast(PI) == J) - ++PI; - - SE->forgetValue(I); - SE->forgetValue(J); - I->eraseFromParent(); - J->eraseFromParent(); - - DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" << - BB << "\n"); - } - - DEBUG(dbgs() << "BBV: final: \n" << BB << "\n"); - } -} - -char BBVectorize::ID = 0; -static const char bb_vectorize_name[] = "Basic-Block Vectorization"; -INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) -INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) - -BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { - return new BBVectorize(C); -} - -bool -llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { - BBVectorize BBVectorizer(P, *BB.getParent(), C); - return BBVectorizer.vectorizeBB(BB); -} - -//===----------------------------------------------------------------------===// -VectorizeConfig::VectorizeConfig() { - VectorBits = ::VectorBits; - VectorizeBools = !::NoBools; - VectorizeInts = !::NoInts; - VectorizeFloats = !::NoFloats; - VectorizePointers = !::NoPointers; - VectorizeCasts = !::NoCasts; - VectorizeMath = !::NoMath; - VectorizeBitManipulations = !::NoBitManipulation; - VectorizeFMA = !::NoFMA; - VectorizeSelect = !::NoSelect; - VectorizeCmp = !::NoCmp; - VectorizeGEP = !::NoGEP; - VectorizeMemOps = !::NoMemOps; - AlignedOnly = ::AlignedOnly; - ReqChainDepth= ::ReqChainDepth; - SearchLimit = ::SearchLimit; - MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; - SplatBreaksChain = ::SplatBreaksChain; - MaxInsts = ::MaxInsts; - MaxPairs = ::MaxPairs; - MaxIter = ::MaxIter; - Pow2LenOnly = ::Pow2LenOnly; - NoMemOpBoost = ::NoMemOpBoost; - FastDep = ::FastDep; -} diff --git a/interpreter/llvm/src/lib/Transforms/Vectorize/CMakeLists.txt b/interpreter/llvm/src/lib/Transforms/Vectorize/CMakeLists.txt index 395f440bda470..1aea73cd4a329 100644 --- a/interpreter/llvm/src/lib/Transforms/Vectorize/CMakeLists.txt +++ b/interpreter/llvm/src/lib/Transforms/Vectorize/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMVectorize - BBVectorize.cpp LoadStoreVectorizer.cpp LoopVectorize.cpp SLPVectorizer.cpp diff --git a/interpreter/llvm/src/lib/Transforms/Vectorize/LoopVectorize.cpp b/interpreter/llvm/src/lib/Transforms/Vectorize/LoopVectorize.cpp index 516ab7d03a88e..012b10c8a9b00 100644 --- a/interpreter/llvm/src/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/interpreter/llvm/src/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -114,12 +114,13 @@ static cl::opt EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); -/// We don't vectorize loops with a known constant trip count below this number. +/// Loops with a known constant trip count below this number are vectorized only +/// if no scalar iteration overheads are incurred. static cl::opt TinyTripCountVectorThreshold( "vectorizer-min-trip-count", cl::init(16), cl::Hidden, - cl::desc("Don't vectorize loops with a constant " - "trip count that is smaller than this " - "value.")); + cl::desc("Loops with a constant trip count that is smaller than this " + "value are vectorized only if no scalar iteration overheads " + "are incurred.")); static cl::opt MaximizeBandwidth( "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, @@ -485,8 +486,7 @@ class InnerLoopVectorizer { /// of scalars. If \p IfPredicateInstr is true we need to 'hide' each /// scalarized instruction behind an if block predicated on the control /// dependence of the instruction. - virtual void scalarizeInstruction(Instruction *Instr, - bool IfPredicateInstr = false); + void scalarizeInstruction(Instruction *Instr, bool IfPredicateInstr = false); /// Vectorize Load and Store instructions, virtual void vectorizeMemoryInstruction(Instruction *Instr); @@ -533,21 +533,34 @@ class InnerLoopVectorizer { /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; - /// Return a constant reference to the VectorParts corresponding to \p V from - /// the original loop. If the value has already been vectorized, the - /// corresponding vector entry in VectorLoopValueMap is returned. If, + /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a + /// vector or scalar value on-demand if one is not yet available. When + /// vectorizing a loop, we visit the definition of an instruction before its + /// uses. When visiting the definition, we either vectorize or scalarize the + /// instruction, creating an entry for it in the corresponding map. (In some + /// cases, such as induction variables, we will create both vector and scalar + /// entries.) Then, as we encounter uses of the definition, we derive values + /// for each scalar or vector use unless such a value is already available. + /// For example, if we scalarize a definition and one of its uses is vector, + /// we build the required vector on-demand with an insertelement sequence + /// when visiting the use. Otherwise, if the use is scalar, we can use the + /// existing scalar definition. + /// + /// Return a value in the new loop corresponding to \p V from the original + /// loop at unroll index \p Part. If the value has already been vectorized, + /// the corresponding vector entry in VectorLoopValueMap is returned. If, /// however, the value has a scalar entry in VectorLoopValueMap, we construct - /// new vector values on-demand by inserting the scalar values into vectors + /// a new vector value on-demand by inserting the scalar values into a vector /// with an insertelement sequence. If the value has been neither vectorized /// nor scalarized, it must be loop invariant, so we simply broadcast the - /// value into vectors. - const VectorParts &getVectorValue(Value *V); + /// value into a vector. + Value *getOrCreateVectorValue(Value *V, unsigned Part); /// Return a value in the new loop corresponding to \p V from the original /// loop at unroll index \p Part and vector index \p Lane. If the value has /// been vectorized but not scalarized, the necessary extractelement /// instruction will be generated. - Value *getScalarValue(Value *V, unsigned Part, unsigned Lane); + Value *getOrCreateScalarValue(Value *V, unsigned Part, unsigned Lane); /// Try to vectorize the interleaved access group that \p Instr belongs to. void vectorizeInterleaveGroup(Instruction *Instr); @@ -561,11 +574,9 @@ class InnerLoopVectorizer { /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(Loop *NewLoop); - /// Emit a bypass check to see if the trip count would overflow, or we - /// wouldn't have enough iterations to execute one vector loop. + /// Emit a bypass check to see if the vector trip count is zero, including if + /// it overflows. void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); - /// Emit a bypass check to see if the vector trip count is nonzero. - void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass); /// Emit a bypass check to see if all of the SCEV assumptions we've /// had to make are correct. void emitSCEVChecks(Loop *L, BasicBlock *Bypass); @@ -602,90 +613,103 @@ class InnerLoopVectorizer { /// UF x VF scalar values in the new loop. UF and VF are the unroll and /// vectorization factors, respectively. /// - /// Entries can be added to either map with initVector and initScalar, which - /// initialize and return a constant reference to the new entry. If a - /// non-constant reference to a vector entry is required, getVector can be - /// used to retrieve a mutable entry. We currently directly modify the mapped - /// values during "fix-up" operations that occur once the first phase of - /// widening is complete. These operations include type truncation and the - /// second phase of recurrence widening. + /// Entries can be added to either map with setVectorValue and setScalarValue, + /// which assert that an entry was not already added before. If an entry is to + /// replace an existing one, call resetVectorValue. This is currently needed + /// to modify the mapped values during "fix-up" operations that occur once the + /// first phase of widening is complete. These operations include type + /// truncation and the second phase of recurrence widening. /// - /// Otherwise, entries from either map should be accessed using the - /// getVectorValue or getScalarValue functions from InnerLoopVectorizer. - /// getVectorValue and getScalarValue coordinate to generate a vector or - /// scalar value on-demand if one is not yet available. When vectorizing a - /// loop, we visit the definition of an instruction before its uses. When - /// visiting the definition, we either vectorize or scalarize the - /// instruction, creating an entry for it in the corresponding map. (In some - /// cases, such as induction variables, we will create both vector and scalar - /// entries.) Then, as we encounter uses of the definition, we derive values - /// for each scalar or vector use unless such a value is already available. - /// For example, if we scalarize a definition and one of its uses is vector, - /// we build the required vector on-demand with an insertelement sequence - /// when visiting the use. Otherwise, if the use is scalar, we can use the - /// existing scalar definition. + /// Entries from either map can be retrieved using the getVectorValue and + /// getScalarValue functions, which assert that the desired value exists. + struct ValueMap { /// Construct an empty map with the given unroll and vectorization factors. - ValueMap(unsigned UnrollFactor, unsigned VecWidth) - : UF(UnrollFactor), VF(VecWidth) { - // The unroll and vectorization factors are only used in asserts builds - // to verify map entries are sized appropriately. - (void)UF; - (void)VF; + ValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {} + + /// \return True if the map has any vector entry for \p Key. + bool hasAnyVectorValue(Value *Key) const { + return VectorMapStorage.count(Key); } - /// \return True if the map has a vector entry for \p Key. - bool hasVector(Value *Key) const { return VectorMapStorage.count(Key); } - - /// \return True if the map has a scalar entry for \p Key. - bool hasScalar(Value *Key) const { return ScalarMapStorage.count(Key); } - - /// \brief Map \p Key to the given VectorParts \p Entry, and return a - /// constant reference to the new vector map entry. The given key should - /// not already be in the map, and the given VectorParts should be - /// correctly sized for the current unroll factor. - const VectorParts &initVector(Value *Key, const VectorParts &Entry) { - assert(!hasVector(Key) && "Vector entry already initialized"); - assert(Entry.size() == UF && "VectorParts has wrong dimensions"); - VectorMapStorage[Key] = Entry; - return VectorMapStorage[Key]; + /// \return True if the map has a vector entry for \p Key and \p Part. + bool hasVectorValue(Value *Key, unsigned Part) const { + assert(Part < UF && "Queried Vector Part is too large."); + if (!hasAnyVectorValue(Key)) + return false; + const VectorParts &Entry = VectorMapStorage.find(Key)->second; + assert(Entry.size() == UF && "VectorParts has wrong dimensions."); + return Entry[Part] != nullptr; } - /// \brief Map \p Key to the given ScalarParts \p Entry, and return a - /// constant reference to the new scalar map entry. The given key should - /// not already be in the map, and the given ScalarParts should be - /// correctly sized for the current unroll and vectorization factors. - const ScalarParts &initScalar(Value *Key, const ScalarParts &Entry) { - assert(!hasScalar(Key) && "Scalar entry already initialized"); - assert(Entry.size() == UF && - all_of(make_range(Entry.begin(), Entry.end()), - [&](const SmallVectorImpl &Values) -> bool { - return Values.size() == VF; - }) && - "ScalarParts has wrong dimensions"); - ScalarMapStorage[Key] = Entry; - return ScalarMapStorage[Key]; + /// \return True if the map has any scalar entry for \p Key. + bool hasAnyScalarValue(Value *Key) const { + return ScalarMapStorage.count(Key); } - /// \return A reference to the vector map entry corresponding to \p Key. - /// The key should already be in the map. This function should only be used - /// when it's necessary to update values that have already been vectorized. - /// This is the case for "fix-up" operations including type truncation and - /// the second phase of recurrence vectorization. If a non-const reference - /// isn't required, getVectorValue should be used instead. - VectorParts &getVector(Value *Key) { - assert(hasVector(Key) && "Vector entry not initialized"); - return VectorMapStorage.find(Key)->second; + /// \return True if the map has a scalar entry for \p Key, \p Part and + /// \p Part. + bool hasScalarValue(Value *Key, unsigned Part, unsigned Lane) const { + assert(Part < UF && "Queried Scalar Part is too large."); + assert(Lane < VF && "Queried Scalar Lane is too large."); + if (!hasAnyScalarValue(Key)) + return false; + const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; + assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); + assert(Entry[Part].size() == VF && "ScalarParts has wrong dimensions."); + return Entry[Part][Lane] != nullptr; } - /// Retrieve an entry from the vector or scalar maps. The preferred way to - /// access an existing mapped entry is with getVectorValue or - /// getScalarValue from InnerLoopVectorizer. Until those functions can be - /// moved inside ValueMap, we have to declare them as friends. - friend const VectorParts &InnerLoopVectorizer::getVectorValue(Value *V); - friend Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part, - unsigned Lane); + /// Retrieve the existing vector value that corresponds to \p Key and + /// \p Part. + Value *getVectorValue(Value *Key, unsigned Part) { + assert(hasVectorValue(Key, Part) && "Getting non-existent value."); + return VectorMapStorage[Key][Part]; + } + + /// Retrieve the existing scalar value that corresponds to \p Key, \p Part + /// and \p Lane. + Value *getScalarValue(Value *Key, unsigned Part, unsigned Lane) { + assert(hasScalarValue(Key, Part, Lane) && "Getting non-existent value."); + return ScalarMapStorage[Key][Part][Lane]; + } + + /// Set a vector value associated with \p Key and \p Part. Assumes such a + /// value is not already set. If it is, use resetVectorValue() instead. + void setVectorValue(Value *Key, unsigned Part, Value *Vector) { + assert(!hasVectorValue(Key, Part) && "Vector value already set for part"); + if (!VectorMapStorage.count(Key)) { + VectorParts Entry(UF); + VectorMapStorage[Key] = Entry; + } + VectorMapStorage[Key][Part] = Vector; + } + + /// Set a scalar value associated with \p Key for \p Part and \p Lane. + /// Assumes such a value is not already set. + void setScalarValue(Value *Key, unsigned Part, unsigned Lane, + Value *Scalar) { + assert(!hasScalarValue(Key, Part, Lane) && "Scalar value already set"); + if (!ScalarMapStorage.count(Key)) { + ScalarParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part].resize(VF, nullptr); + // TODO: Consider storing uniform values only per-part, as they occupy + // lane 0 only, keeping the other VF-1 redundant entries null. + ScalarMapStorage[Key] = Entry; + } + ScalarMapStorage[Key][Part][Lane] = Scalar; + } + + /// Reset the vector value associated with \p Key for the given \p Part. + /// This function can be used to update values that have already been + /// vectorized. This is the case for "fix-up" operations including type + /// truncation and the second phase of recurrence vectorization. + void resetVectorValue(Value *Key, unsigned Part, Value *Vector) { + assert(hasVectorValue(Key, Part) && "Vector value not set for part"); + VectorMapStorage[Key][Part] = Vector; + } private: /// The unroll factor. Each entry in the vector map contains UF vector @@ -1578,6 +1602,9 @@ class LoopVectorizationLegality { /// Return the first-order recurrences found in the loop. RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; } + /// Return the set of instructions to sink to handle first-order recurrences. + DenseMap &getSinkAfter() { return SinkAfter; } + /// Returns the widest induction type. Type *getWidestInductionType() { return WidestIndTy; } @@ -1780,6 +1807,9 @@ class LoopVectorizationLegality { InductionList Inductions; /// Holds the phi nodes that are first-order recurrences. RecurrenceSet FirstOrderRecurrences; + /// Holds instructions that need to sink past other instructions to handle + /// first-order recurrences. + DenseMap SinkAfter; /// Holds the widest induction type encountered. Type *WidestIndTy; @@ -2092,6 +2122,10 @@ class LoopVectorizationCostModel { /// The data is collected per VF. DenseMap> Scalars; + /// Holds the instructions (address computations) that are forced to be + /// scalarized. + DenseMap> ForcedScalars; + /// Returns the expected difference in cost from scalarizing the expression /// feeding a predicated instruction \p PredInst. The instructions to /// scalarize and their scalar costs are collected in \p ScalarCosts. A @@ -2414,15 +2448,13 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", &*LoopVectorBody->getFirstInsertionPt()); Instruction *LastInduction = VecInd; - VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Entry[Part] = LastInduction; + VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction); + if (isa(EntryVal)) + addMetadata(LastInduction, EntryVal); LastInduction = cast(addFastMathFlag( Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"))); } - VectorLoopValueMap.initVector(EntryVal, Entry); - if (isa(EntryVal)) - addMetadata(Entry, EntryVal); // Move the last step to the end of the latch block. This ensures consistent // placement of all induction updates. @@ -2528,13 +2560,13 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { // induction variable, and build the necessary step vectors. if (!VectorizedIV) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); - VectorParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = + for (unsigned Part = 0; Part < UF; ++Part) { + Value *EntryPart = getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode()); - VectorLoopValueMap.initVector(EntryVal, Entry); - if (Trunc) - addMetadata(Entry, Trunc); + VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); + if (Trunc) + addMetadata(EntryPart, Trunc); + } } // If an induction variable is only used for counting loop iterations or @@ -2634,17 +2666,14 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, Cost->isUniformAfterVectorization(cast(EntryVal), VF) ? 1 : VF; // Compute the scalar steps and save the results in VectorLoopValueMap. - ScalarParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Entry[Part].resize(VF); for (unsigned Lane = 0; Lane < Lanes; ++Lane) { auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane); auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); - Entry[Part][Lane] = Add; + VectorLoopValueMap.setScalarValue(EntryVal, Part, Lane, Add); } } - VectorLoopValueMap.initScalar(EntryVal, Entry); } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { @@ -2662,8 +2691,7 @@ bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } -const InnerLoopVectorizer::VectorParts & -InnerLoopVectorizer::getVectorValue(Value *V) { +Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't widen a vector"); assert(!V->getType()->isVoidTy() && "Type does not produce a value"); @@ -2672,17 +2700,16 @@ InnerLoopVectorizer::getVectorValue(Value *V) { if (Legal->hasStride(V)) V = ConstantInt::get(V->getType(), 1); - // If we have this scalar in the map, return it. - if (VectorLoopValueMap.hasVector(V)) - return VectorLoopValueMap.VectorMapStorage[V]; + // If we have a vector mapped to this value, return it. + if (VectorLoopValueMap.hasVectorValue(V, Part)) + return VectorLoopValueMap.getVectorValue(V, Part); // If the value has not been vectorized, check if it has been scalarized // instead. If it has been scalarized, and we actually need the value in // vector form, we will construct the vector values on demand. - if (VectorLoopValueMap.hasScalar(V)) { + if (VectorLoopValueMap.hasAnyScalarValue(V)) { - // Initialize a new vector map entry. - VectorParts Entry(UF); + Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, Part, 0); // If we've scalarized a value, that value should be an instruction. auto *I = cast(V); @@ -2690,17 +2717,17 @@ InnerLoopVectorizer::getVectorValue(Value *V) { // If we aren't vectorizing, we can just copy the scalar map values over to // the vector map. if (VF == 1) { - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = getScalarValue(V, Part, 0); - return VectorLoopValueMap.initVector(V, Entry); + VectorLoopValueMap.setVectorValue(V, Part, ScalarValue); + return ScalarValue; } - // Get the last scalar instruction we generated for V. If the value is - // known to be uniform after vectorization, this corresponds to lane zero - // of the last unroll iteration. Otherwise, the last instruction is the one - // we created for the last vector lane of the last unroll iteration. + // Get the last scalar instruction we generated for V and Part. If the value + // is known to be uniform after vectorization, this corresponds to lane zero + // of the Part unroll iteration. Otherwise, the last instruction is the one + // we created for the last vector lane of the Part unroll iteration. unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1; - auto *LastInst = cast(getScalarValue(V, UF - 1, LastLane)); + auto *LastInst = + cast(VectorLoopValueMap.getScalarValue(V, Part, LastLane)); // Set the insert point after the last scalarized instruction. This ensures // the insertelement sequence will directly follow the scalar definitions. @@ -2714,52 +2741,50 @@ InnerLoopVectorizer::getVectorValue(Value *V) { // iteration. Otherwise, we construct the vector values using insertelement // instructions. Since the resulting vectors are stored in // VectorLoopValueMap, we will only generate the insertelements once. - for (unsigned Part = 0; Part < UF; ++Part) { - Value *VectorValue = nullptr; - if (Cost->isUniformAfterVectorization(I, VF)) { - VectorValue = getBroadcastInstrs(getScalarValue(V, Part, 0)); - } else { - VectorValue = UndefValue::get(VectorType::get(V->getType(), VF)); - for (unsigned Lane = 0; Lane < VF; ++Lane) - VectorValue = Builder.CreateInsertElement( - VectorValue, getScalarValue(V, Part, Lane), - Builder.getInt32(Lane)); - } - Entry[Part] = VectorValue; + Value *VectorValue = nullptr; + if (Cost->isUniformAfterVectorization(I, VF)) { + VectorValue = getBroadcastInstrs(ScalarValue); + } else { + VectorValue = UndefValue::get(VectorType::get(V->getType(), VF)); + for (unsigned Lane = 0; Lane < VF; ++Lane) + VectorValue = Builder.CreateInsertElement( + VectorValue, getOrCreateScalarValue(V, Part, Lane), + Builder.getInt32(Lane)); } + VectorLoopValueMap.setVectorValue(V, Part, VectorValue); Builder.restoreIP(OldIP); - return VectorLoopValueMap.initVector(V, Entry); + return VectorValue; } // If this scalar is unknown, assume that it is a constant or that it is // loop invariant. Broadcast V and save the value for future uses. Value *B = getBroadcastInstrs(V); - return VectorLoopValueMap.initVector(V, VectorParts(UF, B)); + VectorLoopValueMap.setVectorValue(V, Part, B); + return B; } -Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part, - unsigned Lane) { +Value *InnerLoopVectorizer::getOrCreateScalarValue(Value *V, unsigned Part, + unsigned Lane) { // If the value is not an instruction contained in the loop, it should // already be scalar. if (OrigLoop->isLoopInvariant(V)) return V; - assert(Lane > 0 ? - !Cost->isUniformAfterVectorization(cast(V), VF) - : true && "Uniform values only have lane zero"); + assert(Lane > 0 ? !Cost->isUniformAfterVectorization(cast(V), VF) + : true && "Uniform values only have lane zero"); // If the value from the original loop has not been vectorized, it is // represented by UF x VF scalar values in the new loop. Return the requested // scalar value. - if (VectorLoopValueMap.hasScalar(V)) - return VectorLoopValueMap.ScalarMapStorage[V][Part][Lane]; + if (VectorLoopValueMap.hasScalarValue(V, Part, Lane)) + return VectorLoopValueMap.getScalarValue(V, Part, Lane); // If the value has not been scalarized, get its entry in VectorLoopValueMap // for the given unroll part. If this entry is not a vector type (i.e., the // vectorization factor is one), there is no need to generate an // extractelement instruction. - auto *U = getVectorValue(V)[Part]; + auto *U = getOrCreateVectorValue(V, Part); if (!U->getType()->isVectorTy()) { assert(VF == 1 && "Value not scalarized has non-vector type"); return U; @@ -2841,7 +2866,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { Index += (VF - 1) * Group->getFactor(); for (unsigned Part = 0; Part < UF; Part++) { - Value *NewPtr = getScalarValue(Ptr, Part, 0); + Value *NewPtr = getOrCreateScalarValue(Ptr, Part, 0); // Notice current instruction could be any index. Need to adjust the address // to the member of index 0. @@ -2884,7 +2909,6 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { if (!Member) continue; - VectorParts Entry(UF); Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF); for (unsigned Part = 0; Part < UF; Part++) { Value *StridedVec = Builder.CreateShuffleVector( @@ -2896,10 +2920,11 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy); } - Entry[Part] = - Group->isReverse() ? reverseVector(StridedVec) : StridedVec; + if (Group->isReverse()) + StridedVec = reverseVector(StridedVec); + + VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); } - VectorLoopValueMap.initVector(Member, Entry); } return; } @@ -2916,8 +2941,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { Instruction *Member = Group->getMember(i); assert(Member && "Fail to get a member from an interleaved store group"); - Value *StoredVec = - getVectorValue(cast(Member)->getValueOperand())[Part]; + Value *StoredVec = getOrCreateVectorValue( + cast(Member)->getValueOperand(), Part); if (Group->isReverse()) StoredVec = reverseVector(StoredVec); @@ -2978,16 +3003,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { bool CreateGatherScatter = (Decision == LoopVectorizationCostModel::CM_GatherScatter); - VectorParts VectorGep; + // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector + // gather/scatter. Otherwise Decision should have been to Scalarize. + assert((ConsecutiveStride || CreateGatherScatter) && + "The instruction should be scalarized"); // Handle consecutive loads/stores. - if (ConsecutiveStride) { - Ptr = getScalarValue(Ptr, 0, 0); - } else { - // At this point we should vector version of GEP for Gather or Scatter - assert(CreateGatherScatter && "The instruction should be scalarized"); - VectorGep = getVectorValue(Ptr); - } + if (ConsecutiveStride) + Ptr = getOrCreateScalarValue(Ptr, 0, 0); VectorParts Mask = createBlockInMask(Instr->getParent()); // Handle Stores: @@ -2995,16 +3018,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { assert(!Legal->isUniform(SI->getPointerOperand()) && "We do not allow storing to uniform addresses"); setDebugLocFromInst(Builder, SI); - // We don't want to update the value in the map as it might be used in - // another expression. So don't use a reference type for "StoredVal". - VectorParts StoredVal = getVectorValue(SI->getValueOperand()); for (unsigned Part = 0; Part < UF; ++Part) { Instruction *NewSI = nullptr; + Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part); if (CreateGatherScatter) { Value *MaskPart = Legal->isMaskRequired(SI) ? Mask[Part] : nullptr; - NewSI = Builder.CreateMaskedScatter(StoredVal[Part], VectorGep[Part], - Alignment, MaskPart); + Value *VectorGep = getOrCreateVectorValue(Ptr, Part); + NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, + MaskPart); } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = @@ -3013,7 +3035,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (Reverse) { // If we store to reverse consecutive memory locations, then we need // to reverse the order of elements in the stored value. - StoredVal[Part] = reverseVector(StoredVal[Part]); + StoredVal = reverseVector(StoredVal); + // We don't want to update the value in the map as it might be used in + // another expression. So don't call resetVectorValue(StoredVal). + // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. PartPtr = @@ -3027,11 +3052,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); if (Legal->isMaskRequired(SI)) - NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment, + NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, Mask[Part]); else - NewSI = - Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment); + NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); } addMetadata(NewSI, SI); } @@ -3041,14 +3065,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // Handle loads. assert(LI && "Must have a load instruction"); setDebugLocFromInst(Builder, LI); - VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Instruction *NewLI; + Value *NewLI; if (CreateGatherScatter) { Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr; - NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart, - 0, "wide.masked.gather"); - Entry[Part] = NewLI; + Value *VectorGep = getOrCreateVectorValue(Ptr, Part); + NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart, + nullptr, "wide.masked.gather"); + addMetadata(NewLI, LI); } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = @@ -3070,11 +3094,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { "wide.masked.load"); else NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); - Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI; + + // Add metadata to the load, but setVectorValue to the reverse shuffle. + addMetadata(NewLI, LI); + if (Reverse) + NewLI = reverseVector(NewLI); } - addMetadata(NewLI, LI); + VectorLoopValueMap.setVectorValue(Instr, Part, NewLI); } - VectorLoopValueMap.initVector(Instr, Entry); } void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, @@ -3091,9 +3118,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); - // Initialize a new scalar map entry. - ScalarParts Entry(UF); - VectorParts Cond; if (IfPredicateInstr) Cond = createBlockInMask(Instr->getParent()); @@ -3105,7 +3129,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // For each vector unroll 'part': for (unsigned Part = 0; Part < UF; ++Part) { - Entry[Part].resize(VF); // For each scalar that we create: for (unsigned Lane = 0; Lane < Lanes; ++Lane) { @@ -3126,7 +3149,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { - auto *NewOp = getScalarValue(Instr->getOperand(op), Part, Lane); + auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Part, Lane); Cloned->setOperand(op, NewOp); } addNewMetadata(Cloned, Instr); @@ -3135,7 +3158,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, Builder.Insert(Cloned); // Add the cloned scalar to the scalar map entry. - Entry[Part][Lane] = Cloned; + VectorLoopValueMap.setScalarValue(Instr, Part, Lane, Cloned); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -3147,7 +3170,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp)); } } - VectorLoopValueMap.initScalar(Instr, Entry); } PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, @@ -3265,37 +3287,16 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, BasicBlock *BB = L->getLoopPreheader(); IRBuilder<> Builder(BB->getTerminator()); - // Generate code to check that the loop's trip count that we computed by - // adding one to the backedge-taken count will not overflow. - Value *CheckMinIters = Builder.CreateICmpULT( - Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); - - BasicBlock *NewBB = - BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked"); - // Update dominator tree immediately if the generated block is a - // LoopBypassBlock because SCEV expansions to generate loop bypass - // checks may query it before the current function is finished. - DT->addNewBlock(NewBB, BB); - if (L->getParentLoop()) - L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); - ReplaceInstWithInst(BB->getTerminator(), - BranchInst::Create(Bypass, NewBB, CheckMinIters)); - LoopBypassBlocks.push_back(BB); -} - -void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, - BasicBlock *Bypass) { - Value *TC = getOrCreateVectorTripCount(L); - BasicBlock *BB = L->getLoopPreheader(); - IRBuilder<> Builder(BB->getTerminator()); - - // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. - Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()), - "cmp.zero"); + // Generate code to check if the loop's trip count is less than VF * UF, or + // equal to it in case a scalar epilogue is required; this implies that the + // vector trip count is zero. This check also covers the case where adding one + // to the backedge-taken count overflowed leading to an incorrect trip count + // of zero. In this case we will also jump to the scalar loop. + auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE + : ICmpInst::ICMP_ULT; + Value *CheckMinIters = Builder.CreateICmp( + P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); - // Generate code to check that the loop's trip count that we computed by - // adding one to the backedge-taken count will not overflow. BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass @@ -3304,7 +3305,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), - BranchInst::Create(Bypass, NewBB, Cmp)); + BranchInst::Create(Bypass, NewBB, CheckMinIters)); LoopBypassBlocks.push_back(BB); } @@ -3453,14 +3454,13 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() { Value *StartIdx = ConstantInt::get(IdxTy, 0); - // We need to test whether the backedge-taken count is uint##_max. Adding one - // to it will cause overflow and an incorrect loop trip count in the vector - // body. In case of overflow we want to directly jump to the scalar remainder - // loop. - emitMinimumIterationCountCheck(Lp, ScalarPH); // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. - emitVectorLoopEnteredCheck(Lp, ScalarPH); + // jump to the scalar loop. This check also covers the case where the + // backedge-taken count is uint##_max: adding one to it will overflow leading + // to an incorrect trip count of zero. In this (rare) case we will also jump + // to the scalar loop. + emitMinimumIterationCountCheck(Lp, ScalarPH); + // Generate the code to check any assumptions that we've made for SCEV // expressions. emitSCEVChecks(Lp, ScalarPH); @@ -3503,7 +3503,7 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() { // We know what the end value is. EndValue = CountRoundDown; } else { - IRBuilder<> B(LoopBypassBlocks.back()->getTerminator()); + IRBuilder<> B(Lp->getLoopPreheader()->getTerminator()); Type *StepType = II.getStep()->getType(); Instruction::CastOps CastOp = CastInst::getCastOpcode(CountRoundDown, true, StepType, true); @@ -3783,10 +3783,10 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // If the value wasn't vectorized, we must maintain the original scalar // type. The absence of the value from VectorLoopValueMap indicates that it // wasn't vectorized. - if (!VectorLoopValueMap.hasVector(KV.first)) + if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) continue; - VectorParts &Parts = VectorLoopValueMap.getVector(KV.first); - for (Value *&I : Parts) { + for (unsigned Part = 0; Part < UF; ++Part) { + Value *I = getOrCreateVectorValue(KV.first, Part); if (Erased.count(I) || I->use_empty() || !isa(I)) continue; Type *OriginalTy = I->getType(); @@ -3811,7 +3811,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { if (auto *BO = dyn_cast(I)) { NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)), ShrinkOperand(BO->getOperand(1))); - cast(NewI)->copyIRFlags(I); + + // Any wrapping introduced by shrinking this operation shouldn't be + // considered undefined behavior. So, we can't unconditionally copy + // arithmetic wrapping flags to NewI. + cast(NewI)->copyIRFlags(I, /*IncludeWrapFlags=*/false); } else if (auto *CI = dyn_cast(I)) { NewI = B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)), @@ -3871,7 +3875,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { I->replaceAllUsesWith(Res); cast(I)->eraseFromParent(); Erased.insert(I); - I = Res; + VectorLoopValueMap.resetVectorValue(KV.first, Part, Res); } } @@ -3880,15 +3884,15 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // If the value wasn't vectorized, we must maintain the original scalar // type. The absence of the value from VectorLoopValueMap indicates that it // wasn't vectorized. - if (!VectorLoopValueMap.hasVector(KV.first)) + if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) continue; - VectorParts &Parts = VectorLoopValueMap.getVector(KV.first); - for (Value *&I : Parts) { + for (unsigned Part = 0; Part < UF; ++Part) { + Value *I = getOrCreateVectorValue(KV.first, Part); ZExtInst *Inst = dyn_cast(I); if (Inst && Inst->use_empty()) { Value *NewI = Inst->getOperand(0); Inst->eraseFromParent(); - I = NewI; + VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI); } } } @@ -4018,28 +4022,29 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // We constructed a temporary phi node in the first phase of vectorization. // This phi node will eventually be deleted. - VectorParts &PhiParts = VectorLoopValueMap.getVector(Phi); - Builder.SetInsertPoint(cast(PhiParts[0])); + Builder.SetInsertPoint( + cast(VectorLoopValueMap.getVectorValue(Phi, 0))); // Create a phi node for the new recurrence. The current value will either be // the initial value inserted into a vector or loop-varying vector value. auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur"); VecPhi->addIncoming(VectorInit, LoopVectorPreHeader); - // Get the vectorized previous value. - auto &PreviousParts = getVectorValue(Previous); + // Get the vectorized previous value of the last part UF - 1. It appears last + // among all unrolled iterations, due to the order of their construction. + Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1); // Set the insertion point after the previous value if it is an instruction. // Note that the previous value may have been constant-folded so it is not // guaranteed to be an instruction in the vector loop. Also, if the previous // value is a phi node, we should insert after all the phi nodes to avoid // breaking basic block verification. - if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1]) || - isa(PreviousParts[UF - 1])) + if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) || + isa(PreviousLastPart)) Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); else Builder.SetInsertPoint( - &*++BasicBlock::iterator(cast(PreviousParts[UF - 1]))); + &*++BasicBlock::iterator(cast(PreviousLastPart))); // We will construct a vector for the recurrence by combining the values for // the current and previous iterations. This is the required shuffle mask. @@ -4054,15 +4059,16 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // Shuffle the current and previous vector and update the vector parts. for (unsigned Part = 0; Part < UF; ++Part) { + Value *PreviousPart = getOrCreateVectorValue(Previous, Part); + Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part); auto *Shuffle = - VF > 1 - ? Builder.CreateShuffleVector(Incoming, PreviousParts[Part], - ConstantVector::get(ShuffleMask)) - : Incoming; - PhiParts[Part]->replaceAllUsesWith(Shuffle); - cast(PhiParts[Part])->eraseFromParent(); - PhiParts[Part] = Shuffle; - Incoming = PreviousParts[Part]; + VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart, + ConstantVector::get(ShuffleMask)) + : Incoming; + PhiPart->replaceAllUsesWith(Shuffle); + cast(PhiPart)->eraseFromParent(); + VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle); + Incoming = PreviousPart; } // Fix the latch value of the new recurrence in the vector loop. @@ -4090,7 +4096,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // `Incoming`. This is analogous to the vectorized case above: extracting the // second last element when VF > 1. else if (UF > 1) - ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2]; + ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2); // Fix the initial value of the original recurrence in the scalar loop. Builder.SetInsertPoint(&*LoopScalarPreHeader->begin()); @@ -4138,11 +4144,10 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // To do so, we need to generate the 'identity' vector and override // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator()); + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); // This is the vector-clone of the value that leaves the loop. - const VectorParts &VectorExit = getVectorValue(LoopExitInst); - Type *VecTy = VectorExit[0]->getType(); + Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType(); // Find the reduction identity variable. Zero for addition, or, xor, // one for multiplication, -1 for And. @@ -4180,18 +4185,17 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // Reductions do not have to start at zero. They can start with // any loop invariant values. - const VectorParts &VecRdxPhi = getVectorValue(Phi); BasicBlock *Latch = OrigLoop->getLoopLatch(); Value *LoopVal = Phi->getIncomingValueForBlock(Latch); - const VectorParts &Val = getVectorValue(LoopVal); - for (unsigned part = 0; part < UF; ++part) { + for (unsigned Part = 0; Part < UF; ++Part) { + Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part); + Value *Val = getOrCreateVectorValue(LoopVal, Part); // Make sure to add the reduction stat value only to the // first unroll part. - Value *StartVal = (part == 0) ? VectorStart : Identity; - cast(VecRdxPhi[part]) - ->addIncoming(StartVal, LoopVectorPreHeader); - cast(VecRdxPhi[part]) - ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)->getLoopLatch()); + Value *StartVal = (Part == 0) ? VectorStart : Identity; + cast(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader); + cast(VecRdxPhi) + ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); } // Before each round, move the insertion point right between @@ -4200,7 +4204,6 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // instructions. Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); - VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst); setDebugLocFromInst(Builder, LoopExitInst); // If the vector reduction can be performed in a smaller type, we truncate @@ -4209,37 +4212,42 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) { Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); Builder.SetInsertPoint(LoopVectorBody->getTerminator()); - for (unsigned part = 0; part < UF; ++part) { - Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy); + VectorParts RdxParts(UF); + for (unsigned Part = 0; Part < UF; ++Part) { + RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part); + Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) - : Builder.CreateZExt(Trunc, VecTy); - for (Value::user_iterator UI = RdxParts[part]->user_begin(); - UI != RdxParts[part]->user_end();) + : Builder.CreateZExt(Trunc, VecTy); + for (Value::user_iterator UI = RdxParts[Part]->user_begin(); + UI != RdxParts[Part]->user_end();) if (*UI != Trunc) { - (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd); - RdxParts[part] = Extnd; + (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd); + RdxParts[Part] = Extnd; } else { ++UI; } } Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); - for (unsigned part = 0; part < UF; ++part) - RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy); + for (unsigned Part = 0; Part < UF; ++Part) { + RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); + VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]); + } } // Reduce all of the unrolled parts into a single vector. - Value *ReducedPartRdx = RdxParts[0]; + Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0); unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK); setDebugLocFromInst(Builder, ReducedPartRdx); - for (unsigned part = 1; part < UF; ++part) { + for (unsigned Part = 1; Part < UF; ++Part) { + Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part); if (Op != Instruction::ICmp && Op != Instruction::FCmp) // Floating point operations had to be 'fast' to enable the reduction. ReducedPartRdx = addFastMathFlag( - Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part], + Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx")); else ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp( - Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]); + Builder, MinMaxKind, ReducedPartRdx, RdxPart); } if (VF > 1) { @@ -4511,14 +4519,16 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { assert(BI && "Unexpected terminator found"); if (BI->isConditional()) { - VectorParts EdgeMask = getVectorValue(BI->getCondition()); - if (BI->getSuccessor(0) != Dst) - for (unsigned part = 0; part < UF; ++part) - EdgeMask[part] = Builder.CreateNot(EdgeMask[part]); + VectorParts EdgeMask(UF); + for (unsigned Part = 0; Part < UF; ++Part) { + auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part); + if (BI->getSuccessor(0) != Dst) + EdgeMaskPart = Builder.CreateNot(EdgeMaskPart); - for (unsigned part = 0; part < UF; ++part) - EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]); + EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]); + EdgeMask[Part] = EdgeMaskPart; + } EdgeMaskCache[Edge] = EdgeMask; return EdgeMask; @@ -4537,23 +4547,27 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { if (BCEntryIt != BlockMaskCache.end()) return BCEntryIt->second; + VectorParts BlockMask(UF); + // Loop incoming mask is all-one. if (OrigLoop->getHeader() == BB) { Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1); - const VectorParts &BlockMask = getVectorValue(C); + for (unsigned Part = 0; Part < UF; ++Part) + BlockMask[Part] = getOrCreateVectorValue(C, Part); BlockMaskCache[BB] = BlockMask; return BlockMask; } // This is the block mask. We OR all incoming edges, and with zero. Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0); - VectorParts BlockMask = getVectorValue(Zero); + for (unsigned Part = 0; Part < UF; ++Part) + BlockMask[Part] = getOrCreateVectorValue(Zero, Part); // For each pred: - for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) { - VectorParts EM = createEdgeMask(*it, BB); - for (unsigned part = 0; part < UF; ++part) - BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]); + for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) { + VectorParts EM = createEdgeMask(*It, BB); + for (unsigned Part = 0; Part < UF; ++Part) + BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EM[Part]); } BlockMaskCache[BB] = BlockMask; @@ -4568,15 +4582,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) { - VectorParts Entry(UF); - for (unsigned part = 0; part < UF; ++part) { + for (unsigned Part = 0; Part < UF; ++Part) { // This is phase one of vectorizing PHIs. Type *VecTy = (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF); - Entry[part] = PHINode::Create( + Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); + VectorLoopValueMap.setVectorValue(P, Part, EntryPart); } - VectorLoopValueMap.initVector(P, Entry); return; } @@ -4600,21 +4613,22 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, for (unsigned In = 0; In < NumIncoming; In++) { VectorParts Cond = createEdgeMask(P->getIncomingBlock(In), P->getParent()); - const VectorParts &In0 = getVectorValue(P->getIncomingValue(In)); - for (unsigned part = 0; part < UF; ++part) { + for (unsigned Part = 0; Part < UF; ++Part) { + Value *In0 = getOrCreateVectorValue(P->getIncomingValue(In), Part); // We might have single edge PHIs (blocks) - use an identity // 'select' for the first PHI operand. if (In == 0) - Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]); + Entry[Part] = Builder.CreateSelect(Cond[Part], In0, In0); else // Select between the current value and the previous incoming edge // based on the incoming mask. - Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part], + Entry[Part] = Builder.CreateSelect(Cond[Part], In0, Entry[Part], "predphi"); } } - VectorLoopValueMap.initVector(P, Entry); + for (unsigned Part = 0; Part < UF; ++Part) + VectorLoopValueMap.setVectorValue(P, Part, Entry[Part]); return; } @@ -4645,18 +4659,15 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF; // These are the scalar results. Notice that we don't generate vector GEPs // because scalar GEPs result in better code. - ScalarParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Entry[Part].resize(VF); for (unsigned Lane = 0; Lane < Lanes; ++Lane) { Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL); SclrGep->setName("next.gep"); - Entry[Part][Lane] = SclrGep; + VectorLoopValueMap.setScalarValue(P, Part, Lane, SclrGep); } } - VectorLoopValueMap.initScalar(P, Entry); return; } } @@ -4706,7 +4717,6 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // is vector-typed. Thus, to keep the representation compact, we only use // vector-typed operands for loop-varying values. auto *GEP = cast(&I); - VectorParts Entry(UF); if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) { // If we are vectorizing, but the GEP has only loop-invariant operands, @@ -4722,8 +4732,11 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // collectLoopScalars() and teach getVectorValue() to broadcast // the lane-zero scalar value. auto *Clone = Builder.Insert(GEP->clone()); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = Builder.CreateVectorSplat(VF, Clone); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *EntryPart = Builder.CreateVectorSplat(VF, Clone); + VectorLoopValueMap.setVectorValue(&I, Part, EntryPart); + addMetadata(EntryPart, GEP); + } } else { // If the GEP has at least one loop-varying operand, we are sure to // produce a vector of pointers. But if we are only unrolling, we want @@ -4736,9 +4749,10 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. - auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand()) - ? GEP->getPointerOperand() - : getVectorValue(GEP->getPointerOperand())[Part]; + auto *Ptr = + OrigLoop->isLoopInvariant(GEP->getPointerOperand()) + ? GEP->getPointerOperand() + : getOrCreateVectorValue(GEP->getPointerOperand(), Part); // Collect all the indices for the new GEP. If any index is // loop-invariant, we won't broadcast it. @@ -4747,7 +4761,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { if (OrigLoop->isLoopInvariant(U.get())) Indices.push_back(U.get()); else - Indices.push_back(getVectorValue(U.get())[Part]); + Indices.push_back(getOrCreateVectorValue(U.get(), Part)); } // Create the new GEP. Note that this GEP may be a scalar if VF == 1, @@ -4757,12 +4771,11 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { : Builder.CreateGEP(Ptr, Indices); assert((VF == 1 || NewGEP->getType()->isVectorTy()) && "NewGEP is not a pointer vector"); - Entry[Part] = NewGEP; + VectorLoopValueMap.setVectorValue(&I, Part, NewGEP); + addMetadata(NewGEP, GEP); } } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, GEP); break; } case Instruction::UDiv: @@ -4775,6 +4788,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { scalarizeInstruction(&I, true); break; } + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -4792,22 +4806,20 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // Just widen binops. auto *BinOp = cast(&I); setDebugLocFromInst(Builder, BinOp); - const VectorParts &A = getVectorValue(BinOp->getOperand(0)); - const VectorParts &B = getVectorValue(BinOp->getOperand(1)); - // Use this vector value for all users of the original instruction. - VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]); + Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part); + Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); if (BinaryOperator *VecOp = dyn_cast(V)) VecOp->copyIRFlags(BinOp); - Entry[Part] = V; + // Use this vector value for all users of the original instruction. + VectorLoopValueMap.setVectorValue(&I, Part, V); + addMetadata(V, BinOp); } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, BinOp); break; } case Instruction::Select: { @@ -4823,20 +4835,19 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // loop. This means that we can't just use the original 'cond' value. // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. - const VectorParts &Cond = getVectorValue(I.getOperand(0)); - const VectorParts &Op0 = getVectorValue(I.getOperand(1)); - const VectorParts &Op1 = getVectorValue(I.getOperand(2)); - auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0); + auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), 0, 0); - VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Entry[Part] = Builder.CreateSelect( - InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]); + Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part); + Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part); + Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part); + Value *Sel = + Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1); + VectorLoopValueMap.setVectorValue(&I, Part, Sel); + addMetadata(Sel, &I); } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); break; } @@ -4846,22 +4857,20 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { bool FCmp = (I.getOpcode() == Instruction::FCmp); auto *Cmp = dyn_cast(&I); setDebugLocFromInst(Builder, Cmp); - const VectorParts &A = getVectorValue(Cmp->getOperand(0)); - const VectorParts &B = getVectorValue(Cmp->getOperand(1)); - VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { + Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part); + Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part); Value *C = nullptr; if (FCmp) { - C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]); + C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); cast(C)->copyFastMathFlags(Cmp); } else { - C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]); + C = Builder.CreateICmp(Cmp->getPredicate(), A, B); } - Entry[Part] = C; + VectorLoopValueMap.setVectorValue(&I, Part, C); + addMetadata(C, &I); } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); break; } @@ -4898,12 +4907,12 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { Type *DestTy = (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); - const VectorParts &A = getVectorValue(CI->getOperand(0)); - VectorParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy); - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *A = getOrCreateVectorValue(CI->getOperand(0), Part); + Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); + VectorLoopValueMap.setVectorValue(&I, Part, Cast); + addMetadata(Cast, &I); + } break; } @@ -4941,17 +4950,14 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { break; } - VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Args; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { Value *Arg = CI->getArgOperand(i); // Some intrinsics have a scalar argument - don't replace it with a // vector. - if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) { - const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i)); - Arg = VectorArg[Part]; - } + if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) + Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part); Args.push_back(Arg); } @@ -4984,11 +4990,10 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { if (isa(V)) V->copyFastMathFlags(CI); - Entry[Part] = V; + VectorLoopValueMap.setVectorValue(&I, Part, V); + addMetadata(V, &I); } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); break; } @@ -5086,12 +5091,18 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { } bool LoopVectorizationLegality::canVectorize() { + // Store the result and return it at the end instead of exiting early, in case + // allowExtraAnalysis is used to report multiple reasons for not vectorizing. + bool Result = true; // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // FIXME: The code is currently dead, since the loop gets sent to @@ -5101,21 +5112,30 @@ bool LoopVectorizationLegality::canVectorize() { if (!TheLoop->empty()) { ORE->emit(createMissedAnalysis("NotInnermostLoop") << "loop is not the innermost loop"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We only handle bottom-tested loops, i.e. loop in which the condition is @@ -5124,7 +5144,10 @@ bool LoopVectorizationLegality::canVectorize() { if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We need to have a loop header. @@ -5135,28 +5158,28 @@ bool LoopVectorizationLegality::canVectorize() { unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); - return false; - } - - // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getBackedgeTakenCount(); - if (ExitCount == PSE.getSE()->getCouldNotCompute()) { - ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations") - << "could not determine number of loop iterations"); - DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } DEBUG(dbgs() << "LV: We can vectorize this loop" @@ -5184,13 +5207,17 @@ bool LoopVectorizationLegality::canVectorize() { << "Too many SCEV assumptions need to be made and checked " << "at runtime"); DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } - // Okay! We can vectorize. At this point we don't have any other mem analysis + // Okay! We've done all the tests. If any have failed, return false. Otherwise + // we can vectorize, and at this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. - return true; + return Result; } static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { @@ -5264,8 +5291,13 @@ void LoopVectorizationLegality::addInductionPhi( // Both the PHI node itself, and the "post-increment" value feeding // back into the PHI node may have external users. - AllowedExit.insert(Phi); - AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + // We can allow those uses, except if the SCEVs we have for them rely + // on predicates that only hold within the loop, since allowing the exit + // currently means re-using this SCEV outside the loop. + if (PSE.getUnionPredicate().isAlwaysTrue()) { + AllowedExit.insert(Phi); + AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + } DEBUG(dbgs() << "LV: Found an induction variable.\n"); return; @@ -5333,7 +5365,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } - if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, DT)) { + if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, + SinkAfter, DT)) { FirstOrderRecurrences.insert(Phi); continue; } @@ -5554,6 +5587,13 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); } + // Insert the forced scalars. + // FIXME: Currently widenPHIInstruction() often creates a dead vector + // induction variable when the PHI user is scalarized. + if (ForcedScalars.count(VF)) + for (auto *I : ForcedScalars.find(VF)->second) + Worklist.insert(I); + // Expand the worklist by looking through any bitcasts and getelementptr // instructions we've already identified as scalar. This is similar to the // expansion step in collectLoopUniforms(); however, here we're only @@ -5665,14 +5705,14 @@ bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I, void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // We should not collect Uniforms more than once per VF. Right now, - // this function is called from collectUniformsAndScalars(), which + // this function is called from collectUniformsAndScalars(), which // already does this check. Collecting Uniforms for VF=1 does not make any // sense. assert(VF >= 2 && !Uniforms.count(VF) && "This function should not be visited twice for the same VF"); - // Visit the list of Uniforms. If we'll not find any uniform value, we'll + // Visit the list of Uniforms. If we'll not find any uniform value, we'll // not analyze again. Uniforms.count(VF) will return 1. Uniforms[VF].clear(); @@ -5951,10 +5991,10 @@ void InterleavedAccessInfo::collectConstStrideAccesses( continue; Value *Ptr = getPointerOperand(&I); - // We don't check wrapping here because we don't know yet if Ptr will be - // part of a full group or a group with gaps. Checking wrapping for all + // We don't check wrapping here because we don't know yet if Ptr will be + // part of a full group or a group with gaps. Checking wrapping for all // pointers (even those that end up in groups with no gaps) will be overly - // conservative. For full groups, wrapping should be ok since if we would + // conservative. For full groups, wrapping should be ok since if we would // wrap around the address space we would do a memory access at nullptr // even without the transformation. The wrapping checks are therefore // deferred until after we've formed the interleaved groups. @@ -6207,7 +6247,7 @@ void InterleavedAccessInfo::analyzeInterleaving( Instruction *LastMember = Group->getMember(Group->getFactor() - 1); if (LastMember) { Value *LastMemberPtr = getPointerOperand(LastMember); - if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, + if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " "last group member potentially pointer-wrapping.\n"); @@ -6215,9 +6255,9 @@ void InterleavedAccessInfo::analyzeInterleaving( } } else { // Case 3: A non-reversed interleaved load group with gaps: We need - // to execute at least one scalar epilogue iteration. This will ensure + // to execute at least one scalar epilogue iteration. This will ensure // we don't speculatively access memory out-of-bounds. We only need - // to look for a member at index factor - 1, since every group must have + // to look for a member at index factor - 1, since every group must have // a member at index zero. if (Group->isReverse()) { releaseGroup(Group); @@ -7129,11 +7169,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (VF > 1 && isProfitableToScalarize(I, VF)) return VectorizationCostTy(InstsToScalarize[VF][I], false); + // Forced scalars do not have any scalarization overhead. + if (VF > 1 && ForcedScalars.count(VF) && + ForcedScalars.find(VF)->second.count(I)) + return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false); + Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); bool TypeNotScalarized = - VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF; + VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF; return VectorizationCostTy(C, TypeNotScalarized); } @@ -7208,6 +7253,62 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { setWideningDecision(&I, VF, Decision, Cost); } } + + // Make sure that any load of address and any other address computation + // remains scalar unless there is gather/scatter support. This avoids + // inevitable extracts into address registers, and also has the benefit of + // activating LSR more, since that pass can't optimize vectorized + // addresses. + if (TTI.prefersVectorizedAddressing()) + return; + + // Start with all scalar pointer uses. + SmallPtrSet AddrDefs; + for (BasicBlock *BB : TheLoop->blocks()) + for (Instruction &I : *BB) { + Instruction *PtrDef = + dyn_cast_or_null(getPointerOperand(&I)); + if (PtrDef && TheLoop->contains(PtrDef) && + getWideningDecision(&I, VF) != CM_GatherScatter) + AddrDefs.insert(PtrDef); + } + + // Add all instructions used to generate the addresses. + SmallVector Worklist; + for (auto *I : AddrDefs) + Worklist.push_back(I); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + for (auto &Op : I->operands()) + if (auto *InstOp = dyn_cast(Op)) + if ((InstOp->getParent() == I->getParent()) && !isa(InstOp) && + AddrDefs.insert(InstOp).second == true) + Worklist.push_back(InstOp); + } + + for (auto *I : AddrDefs) { + if (isa(I)) { + // Setting the desired widening decision should ideally be handled in + // by cost functions, but since this involves the task of finding out + // if the loaded register is involved in an address computation, it is + // instead changed here when we know this is the case. + if (getWideningDecision(I, VF) == CM_Widen) + // Scalarize a widened load of address. + setWideningDecision(I, VF, CM_Scalarize, + (VF * getMemoryInstructionCost(I, 1))); + else if (auto Group = Legal->getInterleavedAccessGroup(I)) { + // Scalarize an interleave group of address loads. + for (unsigned I = 0; I < Group->getFactor(); ++I) { + if (Instruction *Member = Group->getMember(I)) + setWideningDecision(Member, VF, CM_Scalarize, + (VF * getMemoryInstructionCost(Member, 1))); + } + } + } else + // Make sure I gets scalarized and a cost estimate without + // scalarization overhead. + ForcedScalars[VF].insert(I); + } } unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, @@ -7216,7 +7317,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - VectorTy = ToVectorTy(RetTy, VF); + VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. @@ -7302,6 +7403,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // likely. return Cost / getReciprocalPredBlockProb(); } + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -7349,9 +7451,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } else if (Legal->isUniform(Op2)) { Op2VK = TargetTransformInfo::OK_UniformValue; } - SmallVector Operands(I->operand_values()); - return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, - Op2VK, Op1VP, Op2VP, Operands); + SmallVector Operands(I->operand_values()); + unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; + return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, + Op2VK, Op1VP, Op2VP, Operands); } case Instruction::Select: { SelectInst *SI = cast(I); @@ -7374,7 +7477,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } case Instruction::Store: case Instruction::Load: { - VectorTy = ToVectorTy(getMemInstValueType(I), VF); + unsigned Width = VF; + if (Width > 1) { + InstWidening Decision = getWideningDecision(I, Width); + assert(Decision != CM_Unknown && + "CM decision should be taken at this point"); + if (Decision == CM_Scalarize) + Width = 1; + } + VectorTy = ToVectorTy(getMemInstValueType(I), Width); return getMemoryInstructionCost(I, VF); } case Instruction::ZExt: @@ -7399,7 +7510,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } Type *SrcScalarTy = I->getOperand(0)->getType(); - Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF); + Type *SrcVecTy = + VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy; if (canTruncateToMinimalBitwidth(I, VF)) { // This cast is going to be shrunk. This may remove the cast or it might // turn it into slightly different cast. For example, if MinBW == 16, @@ -7419,7 +7531,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } } - return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); + unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; + return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); } case Instruction::Call: { bool NeedToScalarize; @@ -7526,6 +7639,15 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV) { // 2. Copy and widen instructions from the old loop into the new loop. + // Move instructions to handle first-order recurrences. + DenseMap SinkAfter = Legal->getSinkAfter(); + for (auto &Entry : SinkAfter) { + Entry.first->removeFromParent(); + Entry.first->insertAfter(Entry.second); + DEBUG(dbgs() << "Sinking" << *Entry.first << " after" << *Entry.second + << " to vectorize a 1st order recurrence.\n"); + } + // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For // example, original induction update instructions can become dead because we @@ -7677,24 +7799,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } - // Check the loop for a trip count threshold: - // do not vectorize loops with a tiny trip count. - const unsigned MaxTC = SE->getSmallConstantMaxTripCount(L); - if (MaxTC > 0u && MaxTC < TinyTripCountVectorThreshold) { - DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " - << "This loop is not worth vectorizing."); - if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) - DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); - else { - DEBUG(dbgs() << "\n"); - ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(), - "NotBeneficial", L) - << "vectorization is not beneficial " - "and is not explicitly forced"); - return false; - } - } - PredicatedScalarEvolution PSE(*SE, *L); // Check if it is legal to vectorize the loop. @@ -7712,16 +7816,32 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize(); - // Compute the weighted frequency of this loop being executed and see if it - // is less than 20% of the function entry baseline frequency. Note that we - // always have a canonical loop here because we think we *can* vectorize. - // FIXME: This is hidden behind a flag due to pervasive problems with - // exactly what block frequency models. - if (LoopVectorizeWithBlockFrequency) { - BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); - if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && - LoopEntryFreq < ColdEntryFreq) + // Check the loop for a trip count threshold: vectorize loops with a tiny trip + // count by optimizing for size, to minimize overheads. + unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L); + bool HasExpectedTC = (ExpectedTC > 0); + + if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) { + auto EstimatedTC = getLoopEstimatedTripCount(L); + if (EstimatedTC) { + ExpectedTC = *EstimatedTC; + HasExpectedTC = true; + } + } + + if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { + DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is worth vectorizing only if no scalar " + << "iteration overheads are incurred."); + if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) + DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + else { + DEBUG(dbgs() << "\n"); + // Loops with a very small trip count are considered for vectorization + // under OptForSize, thereby making sure the cost of their loop body is + // dominant, free of runtime guards and scalar iteration overheads. OptForSize = true; + } } // Check the function attributes to see if implicit floats are allowed. @@ -7905,11 +8025,6 @@ bool LoopVectorizePass::runImpl( DB = &DB_; ORE = &ORE_; - // Compute some weights outside of the loop over the loops. Compute this - // using a BranchProbability to re-use its scaling math. - const BranchProbability ColdProb(1, 5); // 20% - ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; - // Don't attempt if // 1. the target claims to have no vector registers, and // 2. interleaving won't help ILP. diff --git a/interpreter/llvm/src/lib/Transforms/Vectorize/SLPVectorizer.cpp b/interpreter/llvm/src/lib/Transforms/Vectorize/SLPVectorizer.cpp index f6334eb141062..dcbcab459a6be 100644 --- a/interpreter/llvm/src/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/interpreter/llvm/src/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Vectorize.h" @@ -172,6 +173,11 @@ static unsigned getAltOpcode(unsigned Op) { } } +/// true if the \p Value is odd, false otherwise. +static bool isOdd(unsigned Value) { + return Value & 1; +} + ///\returns bool representing if Opcode \p Op can be part /// of an alternate sequence which can later be merged as /// a ShuffleVector instruction. @@ -189,7 +195,7 @@ static unsigned isAltInst(ArrayRef VL) { unsigned AltOpcode = getAltOpcode(Opcode); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast(VL[i]); - if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode)) + if (!I || I->getOpcode() != (isOdd(i) ? AltOpcode : Opcode)) return 0; } return Instruction::ShuffleVector; @@ -258,6 +264,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, if (hasVectorInstrinsicScalarOpd(ID, 1)) { return (CI->getArgOperand(1) == Scalar); } + LLVM_FALLTHROUGH; } default: return false; @@ -315,7 +322,10 @@ class BoUpSLP { else MaxVecRegSize = TTI->getRegisterBitWidth(true); - MinVecRegSize = MinVectorRegSizeOption; + if (MinVectorRegSizeOption.getNumOccurrences()) + MinVecRegSize = MinVectorRegSizeOption; + else + MinVecRegSize = TTI->getMinVectorRegisterBitWidth(); } /// \brief Vectorize the tree that starts with the elements in \p VL. @@ -424,7 +434,7 @@ class BoUpSLP { /// \returns the pointer to the vectorized value if \p VL is already /// vectorized, or NULL. They may happen in cycles. - Value *alreadyVectorized(ArrayRef VL) const; + Value *alreadyVectorized(ArrayRef VL, Value *OpValue) const; /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. @@ -499,7 +509,7 @@ class BoUpSLP { Last->NeedToGather = !Vectorized; if (Vectorized) { for (int i = 0, e = VL.size(); i != e; ++i) { - assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!"); + assert(!getTreeEntry(VL[i]) && "Scalar already in tree!"); ScalarToTreeEntry[VL[i]] = idx; } } else { @@ -516,6 +526,20 @@ class BoUpSLP { /// Holds all of the tree entries. std::vector VectorizableTree; + TreeEntry *getTreeEntry(Value *V) { + auto I = ScalarToTreeEntry.find(V); + if (I != ScalarToTreeEntry.end()) + return &VectorizableTree[I->second]; + return nullptr; + } + + const TreeEntry *getTreeEntry(Value *V) const { + auto I = ScalarToTreeEntry.find(V); + if (I != ScalarToTreeEntry.end()) + return &VectorizableTree[I->second]; + return nullptr; + } + /// Maps a specific scalar to its tree entry. SmallDenseMap ScalarToTreeEntry; @@ -574,12 +598,12 @@ class BoUpSLP { void eraseInstruction(Instruction *I) { I->removeFromParent(); I->dropAllReferences(); - DeletedInstructions.push_back(std::unique_ptr(I)); + DeletedInstructions.emplace_back(I); } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector, 8> DeletedInstructions; + SmallVector DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -833,10 +857,10 @@ class BoUpSLP { /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are /// actually moved at this stage. - bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP); + bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, Value *OpValue); /// Un-bundles a group of instructions. - void cancelScheduling(ArrayRef VL); + void cancelScheduling(ArrayRef VL, Value *OpValue); /// Extends the scheduling region so that V is inside the region. /// \returns true if the region size is within the limit. @@ -1043,14 +1067,14 @@ void BoUpSLP::buildTree(ArrayRef Roots, for (TreeEntry &EIdx : VectorizableTree) { TreeEntry *Entry = &EIdx; + // No need to handle users of gathered values. + if (Entry->NeedToGather) + continue; + // For each lane: for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; - // No need to handle users of gathered values. - if (Entry->NeedToGather) - continue; - // Check if the scalar is externally used as an extra arg. auto ExtI = ExternallyUsedValues.find(Scalar); if (ExtI != ExternallyUsedValues.end()) { @@ -1067,9 +1091,7 @@ void BoUpSLP::buildTree(ArrayRef Roots, continue; // Skip in-tree scalars that become vectors - if (ScalarToTreeEntry.count(U)) { - int Idx = ScalarToTreeEntry[U]; - TreeEntry *UseEntry = &VectorizableTree[Idx]; + if (TreeEntry *UseEntry = getTreeEntry(U)) { Value *UseScalar = UseEntry->Scalars[0]; // Some in-tree scalars will remain as scalar in vectorized // instructions. If that is the case, the one in Lane 0 will @@ -1078,7 +1100,7 @@ void BoUpSLP::buildTree(ArrayRef Roots, !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) { DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U << ".\n"); - assert(!VectorizableTree[Idx].NeedToGather && "Bad state"); + assert(!UseEntry->NeedToGather && "Bad state"); continue; } } @@ -1151,9 +1173,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // Check if this is a duplicate of another entry. - if (ScalarToTreeEntry.count(VL[0])) { - int Idx = ScalarToTreeEntry[VL[0]]; - TreeEntry *E = &VectorizableTree[Idx]; + if (TreeEntry *E = getTreeEntry(VL[0])) { for (unsigned i = 0, e = VL.size(); i != e; ++i) { DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n"); if (E->Scalars[i] != VL[i]) { @@ -1192,7 +1212,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Check that all of the users of the scalars that we want to vectorize are // schedulable. Instruction *VL0 = cast(VL[0]); - BasicBlock *BB = cast(VL0)->getParent(); + BasicBlock *BB = VL0->getParent(); if (!DT->isReachableFromEntry(BB)) { // Don't go into unreachable blocks. They may contain instructions with @@ -1217,7 +1237,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, this)) { + if (!BS.tryScheduleBundle(VL, this, VL0)) { DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL[0]) || !BS.getScheduleData(VL[0])->isPartOfBundle()) && @@ -1238,7 +1258,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, cast(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i))); if (Term) { DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1264,7 +1284,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Reuse) { DEBUG(dbgs() << "SLP: Reusing extract sequence.\n"); } else { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); } newTreeEntry(VL, Reuse, UserTreeIdx); return; @@ -1281,7 +1301,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; @@ -1292,7 +1312,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { LoadInst *L = cast(VL[i]); if (!L->isSimple()) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; @@ -1329,7 +1349,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, break; } - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); if (ReverseConsecutive) { @@ -1356,7 +1376,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned i = 0; i < VL.size(); ++i) { Type *Ty = cast(VL[i])->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; @@ -1384,7 +1404,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, CmpInst *Cmp = cast(VL[i]); if (Cmp->getPredicate() != P0 || Cmp->getOperand(0)->getType() != ComparedTy) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; @@ -1451,7 +1471,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned j = 0; j < VL.size(); ++j) { if (cast(VL[j])->getNumOperands() != 2) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1464,7 +1484,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Type *CurTy = cast(VL[j])->getOperand(0)->getType(); if (Ty0 != CurTy) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1476,7 +1496,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (!isa(Op)) { DEBUG( dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1498,7 +1518,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; @@ -1521,7 +1541,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // represented by an intrinsic call Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; @@ -1535,7 +1555,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (!CI2 || CI2->getCalledFunction() != Int || getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); @@ -1546,7 +1566,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (hasVectorInstrinsicScalarOpd(ID, 1)) { Value *A1J = CI2->getArgOperand(1); if (A1I != A1J) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument "<< A1I<<"!=" << A1J @@ -1559,7 +1579,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(), CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *VL[i] << '\n'); @@ -1583,7 +1603,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // If this is not an alternate sequence of opcode like add-sub // then do not vectorize this instruction. if (!isAltShuffle) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; @@ -1611,7 +1631,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, return; } default: - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; @@ -1992,7 +2012,7 @@ int BoUpSLP::getSpillCost() { // Update LiveValues. LiveValues.erase(PrevInst); for (auto &J : PrevInst->operands()) { - if (isa(&*J) && ScalarToTreeEntry.count(&*J)) + if (isa(&*J) && getTreeEntry(&*J)) LiveValues.insert(cast(&*J)); } @@ -2388,9 +2408,7 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { CSEBlocks.insert(Insrt->getParent()); // Add to our 'need-to-extract' list. - if (ScalarToTreeEntry.count(VL[i])) { - int Idx = ScalarToTreeEntry[VL[i]]; - TreeEntry *E = &VectorizableTree[Idx]; + if (TreeEntry *E = getTreeEntry(VL[i])) { // Find which lane we need to extract. int FoundLane = -1; for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) { @@ -2409,12 +2427,8 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { return Vec; } -Value *BoUpSLP::alreadyVectorized(ArrayRef VL) const { - SmallDenseMap::const_iterator Entry - = ScalarToTreeEntry.find(VL[0]); - if (Entry != ScalarToTreeEntry.end()) { - int Idx = Entry->second; - const TreeEntry *En = &VectorizableTree[Idx]; +Value *BoUpSLP::alreadyVectorized(ArrayRef VL, Value *OpValue) const { + if (const TreeEntry *En = getTreeEntry(OpValue)) { if (En->isSame(VL) && En->VectorizedValue) return En->VectorizedValue; } @@ -2422,12 +2436,9 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef VL) const { } Value *BoUpSLP::vectorizeTree(ArrayRef VL) { - if (ScalarToTreeEntry.count(VL[0])) { - int Idx = ScalarToTreeEntry[VL[0]]; - TreeEntry *E = &VectorizableTree[Idx]; + if (TreeEntry *E = getTreeEntry(VL[0])) if (E->isSame(VL)) return vectorizeTree(E); - } Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast(VL[0])) @@ -2542,7 +2553,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *InVec = vectorizeTree(INVL); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; CastInst *CI = dyn_cast(VL0); @@ -2564,7 +2575,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *L = vectorizeTree(LHSV); Value *R = vectorizeTree(RHSV); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; CmpInst::Predicate P0 = cast(VL0)->getPredicate(); @@ -2593,7 +2604,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *True = vectorizeTree(TrueVec); Value *False = vectorizeTree(FalseVec); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; Value *V = Builder.CreateSelect(Cond, True, False); @@ -2633,7 +2644,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; BinaryOperator *BinOp = cast(VL0); @@ -2662,9 +2673,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar so we add the new BitCast to // ExternalUses list to make sure that an extract will be generated in the // future. - if (ScalarToTreeEntry.count(LI->getPointerOperand())) - ExternalUses.push_back( - ExternalUser(LI->getPointerOperand(), cast(VecPtr), 0)); + Value *PO = LI->getPointerOperand(); + if (getTreeEntry(PO)) + ExternalUses.push_back(ExternalUser(PO, cast(VecPtr), 0)); unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); @@ -2695,9 +2706,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar so we add the new BitCast to // ExternalUses list to make sure that an extract will be generated in the // future. - if (ScalarToTreeEntry.count(SI->getPointerOperand())) - ExternalUses.push_back( - ExternalUser(SI->getPointerOperand(), cast(VecPtr), 0)); + Value *PO = SI->getPointerOperand(); + if (getTreeEntry(PO)) + ExternalUses.push_back(ExternalUser(PO, cast(VecPtr), 0)); if (!Alignment) { Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); @@ -2778,7 +2789,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The scalar argument uses an in-tree scalar so we add the new vectorized // call to ExternalUses list to make sure that an extract will be // generated in the future. - if (ScalarArg && ScalarToTreeEntry.count(ScalarArg)) + if (ScalarArg && getTreeEntry(ScalarArg)) ExternalUses.push_back(ExternalUser(ScalarArg, cast(V), 0)); E->VectorizedValue = V; @@ -2795,7 +2806,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; // Create a vector of LHS op1 RHS @@ -2814,7 +2825,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned e = E->Scalars.size(); SmallVector Mask(e); for (unsigned i = 0; i < e; ++i) { - if (i & 1) { + if (isOdd(i)) { Mask[i] = Builder.getInt32(e + i); OddScalars.push_back(E->Scalars[i]); } else { @@ -2892,10 +2903,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // has multiple uses of the same value. if (User && !is_contained(Scalar->users(), User)) continue; - assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar"); - - int Idx = ScalarToTreeEntry[Scalar]; - TreeEntry *E = &VectorizableTree[Idx]; + TreeEntry *E = getTreeEntry(Scalar); + assert(E && "Invalid scalar"); assert(!E->NeedToGather && "Extracting from a gather list"); Value *Vec = E->VectorizedValue; @@ -2981,7 +2990,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (User *U : Scalar->users()) { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - assert((ScalarToTreeEntry.count(U) || + assert((getTreeEntry(U) || // It is legal to replace users in the ignorelist by undef. is_contained(UserIgnoreList, U)) && "Replacing out-of-tree value with undef"); @@ -3088,8 +3097,8 @@ void BoUpSLP::optimizeGatherSequence() { // Groups the instructions to a bundle (which is then a single scheduling entity) // and schedules instructions until the bundle gets ready. bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, - BoUpSLP *SLP) { - if (isa(VL[0])) + BoUpSLP *SLP, Value *OpValue) { + if (isa(OpValue)) return true; // Initialize the instruction bundle. @@ -3097,7 +3106,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, ScheduleData *PrevInBundle = nullptr; ScheduleData *Bundle = nullptr; bool ReSchedule = false; - DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n"); + DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n"); // Make sure that the scheduling region contains all // instructions of the bundle. @@ -3168,17 +3177,18 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, } } if (!Bundle->isReady()) { - cancelScheduling(VL); + cancelScheduling(VL, OpValue); return false; } return true; } -void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL) { - if (isa(VL[0])) +void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, + Value *OpValue) { + if (isa(OpValue)) return; - ScheduleData *Bundle = getScheduleData(VL[0]); + ScheduleData *Bundle = getScheduleData(OpValue); DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); assert(!Bundle->IsScheduled && "Can't cancel bundle which is already scheduled"); @@ -3322,12 +3332,10 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { BundleMember->Dependencies++; ScheduleData *DestBundle = UseSD->FirstInBundle; - if (!DestBundle->IsScheduled) { + if (!DestBundle->IsScheduled) BundleMember->incrementUnscheduledDeps(1); - } - if (!DestBundle->hasValidDependencies()) { + if (!DestBundle->hasValidDependencies()) WorkList.push_back(DestBundle); - } } } else { // I'm not sure if this can ever happen. But we need to be safe. @@ -3446,7 +3454,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { I = I->getNextNode()) { ScheduleData *SD = BS->getScheduleData(I); assert( - SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) && + SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr) && "scheduler and vectorizer have different opinion on what is a bundle"); SD->FirstInBundle->SchedulingPriority = Idx++; if (SD->isSchedulingEntity()) { @@ -3695,10 +3703,8 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. IsKnownPositive = all_of(TreeRoot, [&](Value *R) { - bool KnownZero = false; - bool KnownOne = false; - ComputeSignBit(R, KnownZero, KnownOne, *DL); - return KnownZero; + KnownBits Known = computeKnownBits(R, *DL); + return Known.isNonNegative(); }); // Determine the maximum number of bits required to store the scalar @@ -4747,56 +4753,18 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } -namespace { -/// Tracks instructons and its children. -class WeakTrackingVHWithLevel final : public CallbackVH { - /// Operand index of the instruction currently beeing analized. - unsigned Level = 0; - /// Is this the instruction that should be vectorized, or are we now - /// processing children (i.e. operands of this instruction) for potential - /// vectorization? - bool IsInitial = true; - -public: - explicit WeakTrackingVHWithLevel() = default; - WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){}; - /// Restart children analysis each time it is repaced by the new instruction. - void allUsesReplacedWith(Value *New) override { - setValPtr(New); - Level = 0; - IsInitial = true; - } - /// Check if the instruction was not deleted during vectorization. - bool isValid() const { return !getValPtr(); } - /// Is the istruction itself must be vectorized? - bool isInitial() const { return IsInitial; } - /// Try to vectorize children. - void clearInitial() { IsInitial = false; } - /// Are all children processed already? - bool isFinal() const { - assert(getValPtr() && - (isa(getValPtr()) && - cast(getValPtr())->getNumOperands() >= Level)); - return getValPtr() && - cast(getValPtr())->getNumOperands() == Level; - } - /// Get next child operation. - Value *nextOperand() { - assert(getValPtr() && isa(getValPtr()) && - cast(getValPtr())->getNumOperands() > Level); - return cast(getValPtr())->getOperand(Level++); - } - virtual ~WeakTrackingVHWithLevel() = default; -}; -} // namespace - -/// \brief Attempt to reduce a horizontal reduction. -/// If it is legal to match a horizontal reduction feeding -/// the phi node P with reduction operators Root in a basic block BB, then check -/// if it can be done. -/// \returns true if a horizontal reduction was matched and reduced. -/// \returns false if a horizontal reduction was not matched. -static bool canBeVectorized( +/// Attempt to reduce a horizontal reduction. +/// If it is legal to match a horizontal reduction feeding the phi node \a P +/// with reduction operators \a Root (or one of its operands) in a basic block +/// \a BB, then check if it can be done. If horizontal reduction is not found +/// and root instruction is a binary operation, vectorization of the operands is +/// attempted. +/// \returns true if a horizontal reduction was matched and reduced or operands +/// of one of the binary instruction were vectorized. +/// \returns false if a horizontal reduction was not matched (or not possible) +/// or no vectorization of any binary operation feeding \a Root instruction was +/// performed. +static bool tryToVectorizeHorReductionOrInstOperands( PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI, const function_ref Vectorize) { @@ -4808,56 +4776,62 @@ static bool canBeVectorized( if (Root->getParent() != BB) return false; - SmallVector Stack(1, Root); + // Start analysis starting from Root instruction. If horizontal reduction is + // found, try to vectorize it. If it is not a horizontal reduction or + // vectorization is not possible or not effective, and currently analyzed + // instruction is a binary operation, try to vectorize the operands, using + // pre-order DFS traversal order. If the operands were not vectorized, repeat + // the same procedure considering each operand as a possible root of the + // horizontal reduction. + // Interrupt the process if the Root instruction itself was vectorized or all + // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. + SmallVector, 8> Stack(1, {Root, 0}); SmallSet VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V = Stack.back(); - if (!V) { - Stack.pop_back(); + Value *V; + unsigned Level; + std::tie(V, Level) = Stack.pop_back_val(); + if (!V) continue; - } auto *Inst = dyn_cast(V); - if (!Inst || isa(Inst)) { - Stack.pop_back(); + if (!Inst || isa(Inst)) continue; - } - if (Stack.back().isInitial()) { - Stack.back().clearInitial(); - if (auto *BI = dyn_cast(Inst)) { - HorizontalReduction HorRdx; - if (HorRdx.matchAssociativeReduction(P, BI)) { - if (HorRdx.tryToReduce(R, TTI)) { - Res = true; - P = nullptr; - continue; - } - } - if (P) { - Inst = dyn_cast(BI->getOperand(0)); - if (Inst == P) - Inst = dyn_cast(BI->getOperand(1)); - if (!Inst) { - P = nullptr; - continue; - } + if (auto *BI = dyn_cast(Inst)) { + HorizontalReduction HorRdx; + if (HorRdx.matchAssociativeReduction(P, BI)) { + if (HorRdx.tryToReduce(R, TTI)) { + Res = true; + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; } } - P = nullptr; - if (Vectorize(dyn_cast(Inst), R)) { - Res = true; - continue; + if (P) { + Inst = dyn_cast(BI->getOperand(0)); + if (Inst == P) + Inst = dyn_cast(BI->getOperand(1)); + if (!Inst) { + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; + } } } - if (Stack.back().isFinal()) { - Stack.pop_back(); + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + if (Vectorize(dyn_cast(Inst), R)) { + Res = true; continue; } - if (auto *NextV = dyn_cast(Stack.back().nextOperand())) - if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second && - Stack.size() < RecursionMaxDepth) - Stack.push_back(NextV); + // Try to vectorize operands. + if (++Level < RecursionMaxDepth) + for (auto *Op : Inst->operand_values()) + Stack.emplace_back(Op, Level); } return Res; } @@ -4874,10 +4848,10 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, if (!isa(I)) P = nullptr; // Try to match and vectorize a horizontal reduction. - return canBeVectorized(P, I, BB, R, TTI, - [this](BinaryOperator *BI, BoUpSLP &R) -> bool { - return tryToVectorize(BI, R); - }); + return tryToVectorizeHorReductionOrInstOperands( + P, I, BB, R, TTI, [this](BinaryOperator *BI, BoUpSLP &R) -> bool { + return tryToVectorize(BI, R); + }); } bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { diff --git a/interpreter/llvm/src/lib/Transforms/Vectorize/Vectorize.cpp b/interpreter/llvm/src/lib/Transforms/Vectorize/Vectorize.cpp index 28e0b2eb98666..fb2f509dcbaa9 100644 --- a/interpreter/llvm/src/lib/Transforms/Vectorize/Vectorize.cpp +++ b/interpreter/llvm/src/lib/Transforms/Vectorize/Vectorize.cpp @@ -17,16 +17,15 @@ #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/Vectorize.h" #include "llvm/Analysis/Passes.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" using namespace llvm; /// initializeVectorizationPasses - Initialize all passes linked into the /// Vectorization library. void llvm::initializeVectorization(PassRegistry &Registry) { - initializeBBVectorizePass(Registry); initializeLoopVectorizePass(Registry); initializeSLPVectorizerPass(Registry); initializeLoadStoreVectorizerPass(Registry); @@ -36,8 +35,8 @@ void LLVMInitializeVectorization(LLVMPassRegistryRef R) { initializeVectorization(*unwrap(R)); } +// DEPRECATED: Remove after the LLVM 5 release. void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createBBVectorizePass()); } void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) { diff --git a/interpreter/llvm/src/lib/XRay/InstrumentationMap.cpp b/interpreter/llvm/src/lib/XRay/InstrumentationMap.cpp index 431c251feb65e..d9ce255bc6887 100644 --- a/interpreter/llvm/src/lib/XRay/InstrumentationMap.cpp +++ b/interpreter/llvm/src/lib/XRay/InstrumentationMap.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/XRay/InstrumentationMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -22,7 +23,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm/XRay/InstrumentationMap.h" #include #include #include diff --git a/interpreter/llvm/src/runtimes/CMakeLists.txt b/interpreter/llvm/src/runtimes/CMakeLists.txt index ef56fa1b9367e..f475878e2f2a0 100644 --- a/interpreter/llvm/src/runtimes/CMakeLists.txt +++ b/interpreter/llvm/src/runtimes/CMakeLists.txt @@ -30,7 +30,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/../cmake" "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules" - "${LLVM_BINARY_DIR}/lib/cmake/llvm" + "${LLVM_LIBRARY_DIR}/cmake/llvm" ) # Some of the runtimes will conditionally use the compiler-rt sanitizers @@ -62,7 +62,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) set(LLVM_MAIN_SRC_DIR ${LLVM_BUILD_MAIN_SRC_DIR}) if(APPLE) - set(LLVM_ENABLE_LIBCXX ON CACHE BOOL "") + set(LLVM_ENABLE_LIBCXX ON CACHE BOOL "") endif() set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) @@ -96,16 +96,34 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) # The subdirectories need to treat this as standalone builds set(${canon_name}_STANDALONE_BUILD On) + if(LLVM_RUNTIMES_TARGET) + if(NOT "${entry}" MATCHES "compiler-rt") + set(${canon_name}_INSTALL_PREFIX "lib/${LLVM_RUNTIMES_TARGET}/" CACHE STRING "" FORCE) + endif() + endif() + # Setting a variable to let sub-projects detect which other projects # will be included under here. set(HAVE_${canon_name} On) endforeach() + set(SAFE_LLVM_BINARY_DIR ${LLVM_BINARY_DIR}) + set(SAFE_LLVM_LIBRARY_OUTPUT_INTDIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) + set(SAFE_LLVM_RUNTIMES_OUTPUT_INTDIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) get_filename_component(projName ${entry} NAME) - + + if(LLVM_RUNTIMES_TARGET) + if(NOT "${entry}" MATCHES "compiler-rt") + set(LLVM_BINARY_DIR "${LLVM_LIBRARY_DIR}/${LLVM_RUNTIMES_TARGET}") + set(LLVM_LIBRARY_OUTPUT_INTDIR "${LLVM_LIBRARY_DIR}/${LLVM_RUNTIMES_TARGET}/lib") + set(LLVM_RUNTIME_OUTPUT_INTDIR "${LLVM_TOOLS_BINARY_DIR}/${LLVM_RUNTIMES_TARGET}") + endif() + endif() + # Between each sub-project we want to cache and clear the LIT properties set_property(GLOBAL PROPERTY LLVM_LIT_TESTSUITES) set_property(GLOBAL PROPERTY LLVM_LIT_PARAMS) @@ -123,12 +141,20 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND RUNTIMES_LIT_PARAMS ${LLVM_LIT_PARAMS}) list(APPEND RUNTIMES_LIT_DEPENDS ${LLVM_LIT_DEPENDS}) list(APPEND RUNTIMES_LIT_EXTRA_ARGS ${LLVM_LIT_EXTRA_ARGS}) + + if(LLVM_RUNTIMES_TARGET) + if(NOT "${entry}" MATCHES "compiler-rt") + set(LLVM_BINARY_DIR "${SAFE_LLVM_BINARY_DIR}") + set(LLVM_LIBRARY_OUTPUT_INTDIR "${SAFE_LLVM_LIBRARY_OUTPUT_INTDIR}") + set(LLVM_RUNTIME_OUTPUT_INTDIR "${SAFE_LLVM_RUNTIME_OUTPUT_INTDIR}") + endif() + endif() endforeach() if(LLVM_INCLUDE_TESTS) # Add a global check rule now that all subdirectories have been traversed # and we know the total set of lit testsuites. - + add_lit_target(check-runtimes "Running all regression tests" ${RUNTIMES_LIT_TESTSUITES} @@ -147,9 +173,9 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) message(SEND_ERROR "Missing target for runtime component ${component}!") continue() endif() - if(LLVM_INCLUDE_TESTS AND NOT TARGET check-${component}) - message(SEND_ERROR "Missing check target for runtime component ${component}!") - continue() + + if(TARGET check-${component}) + list(APPEND SUB_CHECK_TARGETS check-${component}) endif() if(TARGET install-${component}) @@ -157,14 +183,18 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) endif() endforeach() - configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in - ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + if(LLVM_RUNTIMES_TARGET) + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in + ${LLVM_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake) + else() + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in + ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + endif() endif() else() # if this is included from LLVM's CMake - include(${LLVM_BINARY_DIR}/runtimes/Components.cmake OPTIONAL) - set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/Components.cmake) include(LLVMExternalProjectUtils) if(NOT LLVM_BUILD_RUNTIMES) @@ -188,7 +218,12 @@ else() # if this is included from LLVM's CMake else() get_cmake_property(variableNames VARIABLES) add_custom_target(builtins) + add_custom_target(install-builtins) foreach(target ${LLVM_BUILTIN_TARGETS}) + if(target STREQUAL "default") + set(target ${LLVM_DEFAULT_TARGET_TRIPLE}) + endif() + string(REPLACE "-" ";" builtin_target_list ${target}) foreach(item ${builtin_target_list}) string(TOLOWER "${item}" item_lower) @@ -218,6 +253,7 @@ else() # if this is included from LLVM's CMake USE_TOOLCHAIN ${EXTRA_ARGS}) add_dependencies(builtins builtins-${target}) + add_dependencies(install-builtins install-builtins-${target}) endforeach() endif() set(deps builtins) @@ -244,40 +280,140 @@ else() # if this is included from LLVM's CMake list(APPEND runtime_names ${projName}) endforeach() - if(runtimes) + # runtime_register_target(target) + # Utility function to register external runtime target. + function(runtime_register_target target) + if(target STREQUAL LLVM_DEFAULT_TARGET_TRIPLE) + include(${LLVM_BINARY_DIR}/runtimes/Components.cmake OPTIONAL) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + else() + include(${LLVM_BINARY_DIR}/runtimes/${target}/Components.cmake OPTIONAL) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/${target}/Components.cmake) + endif() foreach(runtime_name ${runtime_names}) - list(APPEND extra_targets - ${runtime_name} - install-${runtime_name} - check-${runtime_name}) + list(APPEND ${target}_extra_targets + ${runtime_name}-${target} + install-${runtime_name}-${target}) + if(LLVM_INCLUDE_TESTS) + list(APPEND ${target}_test_targets check-${runtime_name}-${target}) + endif() + endforeach() + + foreach(name IN LISTS SUB_COMPONENTS SUB_INSTALL_TARGETS) + list(APPEND ${target}_extra_targets "${name}:${name}-${target}") endforeach() if(LLVM_INCLUDE_TESTS) - set(test_targets runtimes-test-depends check-runtimes) - foreach(component ${SUB_COMPONENTS}) - list(APPEND SUB_COMPONENT_CHECK_TARGETS check-${component}) + list(APPEND ${target}_test_targets runtimes-test-depends-${target} check-runtimes-${target}) + foreach(name IN LISTS SUB_CHECK_TARGETS) + list(APPEND ${target}_test_targets "${name}:${name}-${target}") + list(APPEND test_targets ${name}-${target}) endforeach() + set(test_targets "${test_targets}" PARENT_SCOPE) endif() - # Create a runtimes target that uses this file as its top-level CMake file. - # The runtimes target is a configuration of all the runtime libraries - # together in a single CMake invocaiton. - llvm_ExternalProject_Add(runtimes + get_cmake_property(variableNames VARIABLES) + foreach(variableName ${variableNames}) + if(variableName MATCHES "^RUNTIMES_${target}") + string(REPLACE "RUNTIMES_${target}_" "" new_name ${variableName}) + list(APPEND ${target}_extra_args "-D${new_name}=${${variableName}}") + endif() + endforeach() + + if(NOT target STREQUAL LLVM_DEFAULT_TARGET_TRIPLE) + list(APPEND ${target}_extra_args "-DLLVM_RUNTIMES_TARGET=${target}") + endif() + + llvm_ExternalProject_Add(runtimes-${target} ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${deps} # Builtins were built separately above CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS} + -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR} + -DCMAKE_C_COMPILER_TARGET=${target} + -DCMAKE_CXX_COMPILER_TARGET=${target} + -DCMAKE_ASM_COMPILER_TARGET=${target} + -DCMAKE_C_COMPILER_WORKS=ON + -DCMAKE_CXX_COMPILER_WORKS=ON + -DCMAKE_ASM_COMPILER_WORKS=ON + -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON + ${${target}_extra_args} + TOOLCHAIN_TOOLS clang lld llvm-ar llvm-ranlib PASSTHROUGH_PREFIXES ${prefixes} - EXTRA_TARGETS ${extra_targets} - ${test_targets} - ${SUB_COMPONENTS} - ${SUB_COMPONENT_CHECK_TARGETS} - ${SUB_INSTALL_TARGETS} + EXTRA_TARGETS ${${target}_extra_targets} + ${${target}_test_targets} USE_TOOLCHAIN ${EXTRA_ARGS}) - + endfunction() + + if(runtimes) + # Create a runtimes target that uses this file as its top-level CMake file. + # The runtimes target is a configuration of all the runtime libraries + # together in a single CMake invocaiton. + if(NOT LLVM_RUNTIME_TARGETS) + include(${LLVM_BINARY_DIR}/runtimes/Components.cmake OPTIONAL) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + + foreach(runtime_name ${runtime_names}) + list(APPEND extra_targets + ${runtime_name} + install-${runtime_name}) + if(LLVM_INCLUDE_TESTS) + list(APPEND test_targets check-${runtime_name}) + endif() + endforeach() + + if(LLVM_INCLUDE_TESTS) + list(APPEND test_targets runtimes-test-depends check-runtimes) + foreach(component ${SUB_COMPONENTS}) + list(APPEND SUB_CHECK_TARGETS check-${component}) + endforeach() + endif() + + llvm_ExternalProject_Add(runtimes + ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS ${deps} + # Builtins were built separately above + CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off + -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS} + -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR} + PASSTHROUGH_PREFIXES ${prefixes} + EXTRA_TARGETS ${extra_targets} + ${test_targets} + ${SUB_COMPONENTS} + ${SUB_CHECK_TARGETS} + ${SUB_INSTALL_TARGETS} + USE_TOOLCHAIN + ${EXTRA_ARGS}) + else() + add_custom_target(runtimes) + add_custom_target(runtimes-configure) + add_custom_target(install-runtimes) + if(LLVM_INCLUDE_TESTS) + add_custom_target(check-runtimes) + add_custom_target(runtimes-test-depends) + set(test_targets "") + endif() + + foreach(target ${LLVM_RUNTIME_TARGETS}) + if(target STREQUAL "default") + set(target ${LLVM_DEFAULT_TARGET_TRIPLE}) + endif() + + runtime_register_target(${target}) + + add_dependencies(runtimes runtimes-${target}) + add_dependencies(runtimes-configure runtimes-${target}-configure) + add_dependencies(install-runtimes install-runtimes-${target}) + if(LLVM_INCLUDE_TESTS) + add_dependencies(check-runtimes check-runtimes-${target}) + add_dependencies(runtimes-test-depends runtimes-test-depends-${target}) + endif() + endforeach() + endif() + # TODO: This is a hack needed because the libcxx headers are copied into the # build directory during configuration. Without that step the clang in the # build directory cannot find the C++ headers in certain configurations. @@ -290,6 +426,21 @@ else() # if this is included from LLVM's CMake if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ADDITIONAL_TEST_DEPENDS runtimes-test-depends) set_property(GLOBAL APPEND PROPERTY LLVM_ADDITIONAL_TEST_TARGETS check-runtimes) + + set(RUNTIMES_TEST_DEPENDS + FileCheck + count + llvm-nm + llvm-objdump + llvm-xray + not + obj2yaml + sancov + sanstats + ) + foreach(target ${test_targets} ${SUB_CHECK_TARGETS}) + add_dependencies(${target} ${RUNTIMES_TEST_DEPENDS}) + endforeach() endif() endif() endif() diff --git a/interpreter/llvm/src/runtimes/Components.cmake.in b/interpreter/llvm/src/runtimes/Components.cmake.in index 6e24ac380d18b..1d8fb7ab174c2 100644 --- a/interpreter/llvm/src/runtimes/Components.cmake.in +++ b/interpreter/llvm/src/runtimes/Components.cmake.in @@ -1,2 +1,3 @@ set(SUB_COMPONENTS @SUB_COMPONENTS@) +set(SUB_CHECK_TARGETS @SUB_CHECK_TARGETS@) set(SUB_INSTALL_TARGETS @SUB_INSTALL_TARGETS@) diff --git a/interpreter/llvm/src/tools/LLVMBuild.txt b/interpreter/llvm/src/tools/LLVMBuild.txt index e3041a6d40d4e..bcf58842eac3b 100644 --- a/interpreter/llvm/src/tools/LLVMBuild.txt +++ b/interpreter/llvm/src/tools/LLVMBuild.txt @@ -40,7 +40,7 @@ subdirectories = llvm-modextract llvm-nm llvm-objdump - llvm-pdbdump + llvm-pdbutil llvm-profdata llvm-rtdyld llvm-size diff --git a/interpreter/llvm/src/tools/bugpoint/CMakeLists.txt b/interpreter/llvm/src/tools/bugpoint/CMakeLists.txt index 7598657427e8b..8975e67634340 100644 --- a/interpreter/llvm/src/tools/bugpoint/CMakeLists.txt +++ b/interpreter/llvm/src/tools/bugpoint/CMakeLists.txt @@ -1,4 +1,5 @@ set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} Analysis BitWriter CodeGen diff --git a/interpreter/llvm/src/tools/bugpoint/LLVMBuild.txt b/interpreter/llvm/src/tools/bugpoint/LLVMBuild.txt index 37a605870548f..68ecb8c8f4f91 100644 --- a/interpreter/llvm/src/tools/bugpoint/LLVMBuild.txt +++ b/interpreter/llvm/src/tools/bugpoint/LLVMBuild.txt @@ -30,3 +30,4 @@ required_libraries = Linker ObjCARC Scalar + all-targets diff --git a/interpreter/llvm/src/tools/bugpoint/OptimizerDriver.cpp b/interpreter/llvm/src/tools/bugpoint/OptimizerDriver.cpp index 246580c8bdbe3..489e50b881010 100644 --- a/interpreter/llvm/src/tools/bugpoint/OptimizerDriver.cpp +++ b/interpreter/llvm/src/tools/bugpoint/OptimizerDriver.cpp @@ -47,14 +47,13 @@ static cl::opt PreserveBitcodeUseListOrder( cl::desc("Preserve use-list order when writing LLVM bitcode."), cl::init(true), cl::Hidden); -namespace { // ChildOutput - This option captures the name of the child output file that // is set up by the parent bugpoint process -cl::opt ChildOutput("child-output", cl::ReallyHidden); -cl::opt OptCmd("opt-command", cl::init(""), - cl::desc("Path to opt. (default: search path " - "for 'opt'.)")); -} +static cl::opt ChildOutput("child-output", cl::ReallyHidden); +static cl::opt + OptCmd("opt-command", cl::init(""), + cl::desc("Path to opt. (default: search path " + "for 'opt'.)")); /// writeProgramToFile - This writes the current "Program" to the named bitcode /// file. If an error occurs, true is returned. @@ -203,10 +202,11 @@ bool BugDriver::runPasses(Module *Program, } else Args.push_back(tool.c_str()); - Args.push_back("-o"); - Args.push_back(OutputFilename.c_str()); for (unsigned i = 0, e = OptArgs.size(); i != e; ++i) Args.push_back(OptArgs[i].c_str()); + Args.push_back("-disable-symbolication"); + Args.push_back("-o"); + Args.push_back(OutputFilename.c_str()); std::vector pass_args; for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) { pass_args.push_back(std::string("-load")); diff --git a/interpreter/llvm/src/tools/bugpoint/ToolRunner.cpp b/interpreter/llvm/src/tools/bugpoint/ToolRunner.cpp index 10532ef8395b8..70b18e3dbbf9e 100644 --- a/interpreter/llvm/src/tools/bugpoint/ToolRunner.cpp +++ b/interpreter/llvm/src/tools/bugpoint/ToolRunner.cpp @@ -861,7 +861,7 @@ Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType, errs() << "\n";); if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", "")) return ProcessFailure(CCPath, &CCArgs[0]); - return Error::success();; + return Error::success(); } /// create - Try to find the CC executable diff --git a/interpreter/llvm/src/tools/bugpoint/bugpoint.cpp b/interpreter/llvm/src/tools/bugpoint/bugpoint.cpp index 85c1ddd8277d9..4ddea8dbec190 100644 --- a/interpreter/llvm/src/tools/bugpoint/bugpoint.cpp +++ b/interpreter/llvm/src/tools/bugpoint/bugpoint.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/Valgrind.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -138,6 +139,13 @@ int main(int argc, char **argv) { polly::initializePollyPasses(Registry); #endif + if (std::getenv("bar") == (char*) -1) { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + } + cl::ParseCommandLineOptions(argc, argv, "LLVM automatic testcase reducer. See\nhttp://" "llvm.org/cmds/bugpoint.html" diff --git a/interpreter/llvm/src/tools/clang/CMakeLists.txt b/interpreter/llvm/src/tools/clang/CMakeLists.txt index 9e43a103b2bc5..2667b1d6892e6 100644 --- a/interpreter/llvm/src/tools/clang/CMakeLists.txt +++ b/interpreter/llvm/src/tools/clang/CMakeLists.txt @@ -359,6 +359,10 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) PATTERN "*.inc" PATTERN "*.h" ) + + install(PROGRAMS utils/bash-autocomplete.sh + DESTINATION share/clang + ) endif() add_definitions( -D_GNU_SOURCE ) @@ -385,11 +389,7 @@ if(CLANG_ANALYZER_BUILD_Z3) endif() if(CLANG_ENABLE_ARCMT) - add_definitions(-DCLANG_ENABLE_ARCMT) - add_definitions(-DCLANG_ENABLE_OBJC_REWRITER) -endif() -if(CLANG_ENABLE_STATIC_ANALYZER) - add_definitions(-DCLANG_ENABLE_STATIC_ANALYZER) + set(CLANG_ENABLE_OBJC_REWRITER ON) endif() # Clang version information diff --git a/interpreter/llvm/src/tools/clang/bindings/python/clang/cindex.py b/interpreter/llvm/src/tools/clang/bindings/python/clang/cindex.py index 0cd5617e43a8a..236803a9ab9b1 100644 --- a/interpreter/llvm/src/tools/clang/bindings/python/clang/cindex.py +++ b/interpreter/llvm/src/tools/clang/bindings/python/clang/cindex.py @@ -782,7 +782,7 @@ def __repr__(self): # A C++ template type parameter CursorKind.TEMPLATE_TYPE_PARAMETER = CursorKind(27) -# A C++ non-type template paramater. +# A C++ non-type template parameter. CursorKind.TEMPLATE_NON_TYPE_PARAMETER = CursorKind(28) # A C++ template template parameter. @@ -1367,6 +1367,30 @@ class TemplateArgumentKind(BaseEnumeration): TemplateArgumentKind.NULLPTR = TemplateArgumentKind(3) TemplateArgumentKind.INTEGRAL = TemplateArgumentKind(4) +### Exception Specification Kinds ### +class ExceptionSpecificationKind(BaseEnumeration): + """ + An ExceptionSpecificationKind describes the kind of exception specification + that a function has. + """ + + # The required BaseEnumeration declarations. + _kinds = [] + _name_map = None + + def __repr__(self): + return 'ExceptionSpecificationKind.{}'.format(self.name) + +ExceptionSpecificationKind.NONE = ExceptionSpecificationKind(0) +ExceptionSpecificationKind.DYNAMIC_NONE = ExceptionSpecificationKind(1) +ExceptionSpecificationKind.DYNAMIC = ExceptionSpecificationKind(2) +ExceptionSpecificationKind.MS_ANY = ExceptionSpecificationKind(3) +ExceptionSpecificationKind.BASIC_NOEXCEPT = ExceptionSpecificationKind(4) +ExceptionSpecificationKind.COMPUTED_NOEXCEPT = ExceptionSpecificationKind(5) +ExceptionSpecificationKind.UNEVALUATED = ExceptionSpecificationKind(6) +ExceptionSpecificationKind.UNINSTANTIATED = ExceptionSpecificationKind(7) +ExceptionSpecificationKind.UNPARSED = ExceptionSpecificationKind(8) + ### Cursors ### class Cursor(Structure): @@ -1454,6 +1478,11 @@ def is_virtual_method(self): """ return conf.lib.clang_CXXMethod_isVirtual(self) + def is_scoped_enum(self): + """Returns True if the cursor refers to a scoped enum declaration. + """ + return conf.lib.clang_EnumDecl_isScoped(self) + def get_definition(self): """ If the cursor is a reference to a declaration or a declaration of @@ -1586,6 +1615,18 @@ def result_type(self): return self._result_type + @property + def exception_specification_kind(self): + ''' + Retrieve the exception specification kind, which is one of the values + from the ExceptionSpecificationKind enumeration. + ''' + if not hasattr(self, '_exception_specification_kind'): + exc_kind = conf.lib.clang_getCursorExceptionSpecificationType(self) + self._exception_specification_kind = ExceptionSpecificationKind.from_id(exc_kind) + + return self._exception_specification_kind + @property def underlying_typedef_type(self): """Return the underlying type of a typedef declaration. @@ -1963,6 +2004,47 @@ def __repr__(self): TypeKind.MEMBERPOINTER = TypeKind(117) TypeKind.AUTO = TypeKind(118) TypeKind.ELABORATED = TypeKind(119) +TypeKind.PIPE = TypeKind(120) +TypeKind.OCLIMAGE1DRO = TypeKind(121) +TypeKind.OCLIMAGE1DARRAYRO = TypeKind(122) +TypeKind.OCLIMAGE1DBUFFERRO = TypeKind(123) +TypeKind.OCLIMAGE2DRO = TypeKind(124) +TypeKind.OCLIMAGE2DARRAYRO = TypeKind(125) +TypeKind.OCLIMAGE2DDEPTHRO = TypeKind(126) +TypeKind.OCLIMAGE2DARRAYDEPTHRO = TypeKind(127) +TypeKind.OCLIMAGE2DMSAARO = TypeKind(128) +TypeKind.OCLIMAGE2DARRAYMSAARO = TypeKind(129) +TypeKind.OCLIMAGE2DMSAADEPTHRO = TypeKind(130) +TypeKind.OCLIMAGE2DARRAYMSAADEPTHRO = TypeKind(131) +TypeKind.OCLIMAGE3DRO = TypeKind(132) +TypeKind.OCLIMAGE1DWO = TypeKind(133) +TypeKind.OCLIMAGE1DARRAYWO = TypeKind(134) +TypeKind.OCLIMAGE1DBUFFERWO = TypeKind(135) +TypeKind.OCLIMAGE2DWO = TypeKind(136) +TypeKind.OCLIMAGE2DARRAYWO = TypeKind(137) +TypeKind.OCLIMAGE2DDEPTHWO = TypeKind(138) +TypeKind.OCLIMAGE2DARRAYDEPTHWO = TypeKind(139) +TypeKind.OCLIMAGE2DMSAAWO = TypeKind(140) +TypeKind.OCLIMAGE2DARRAYMSAAWO = TypeKind(141) +TypeKind.OCLIMAGE2DMSAADEPTHWO = TypeKind(142) +TypeKind.OCLIMAGE2DARRAYMSAADEPTHWO = TypeKind(143) +TypeKind.OCLIMAGE3DWO = TypeKind(144) +TypeKind.OCLIMAGE1DRW = TypeKind(145) +TypeKind.OCLIMAGE1DARRAYRW = TypeKind(146) +TypeKind.OCLIMAGE1DBUFFERRW = TypeKind(147) +TypeKind.OCLIMAGE2DRW = TypeKind(148) +TypeKind.OCLIMAGE2DARRAYRW = TypeKind(149) +TypeKind.OCLIMAGE2DDEPTHRW = TypeKind(150) +TypeKind.OCLIMAGE2DARRAYDEPTHRW = TypeKind(151) +TypeKind.OCLIMAGE2DMSAARW = TypeKind(152) +TypeKind.OCLIMAGE2DARRAYMSAARW = TypeKind(153) +TypeKind.OCLIMAGE2DMSAADEPTHRW = TypeKind(154) +TypeKind.OCLIMAGE2DARRAYMSAADEPTHRW = TypeKind(155) +TypeKind.OCLIMAGE3DRW = TypeKind(156) +TypeKind.OCLSAMPLER = TypeKind(157) +TypeKind.OCLEVENT = TypeKind(158) +TypeKind.OCLQUEUE = TypeKind(159) +TypeKind.OCLRESERVEID = TypeKind(160) class RefQualifierKind(BaseEnumeration): """Describes a specific ref-qualifier of a type.""" @@ -2121,6 +2203,12 @@ def is_function_variadic(self): return conf.lib.clang_isFunctionTypeVariadic(self) + def get_address_space(self): + return conf.lib.clang_getAddressSpace(self) + + def get_typedef_name(self): + return conf.lib.clang_getTypedefName(self) + def is_pod(self): """Determine whether this Type represents plain old data (POD).""" return conf.lib.clang_isPODType(self) @@ -2207,6 +2295,14 @@ def visitor(field, children): callbacks['fields_visit'](visitor), fields) return iter(fields) + def get_exception_specification_kind(self): + """ + Return the kind of the exception specification; a value from + the ExceptionSpecificationKind enumeration. + """ + return ExceptionSpecificationKind.from_id( + conf.lib.clang.getExceptionSpecificationType(self)) + @property def spelling(self): """Retrieve the spelling of this Type.""" @@ -3223,6 +3319,10 @@ def cursor(self): [Cursor], bool), + ("clang_EnumDecl_isScoped", + [Cursor], + bool), + ("clang_defaultDiagnosticDisplayOptions", [], c_uint), @@ -3624,6 +3724,11 @@ def cursor(self): Type, Type.from_result), + ("clang_getTypedefName", + [Type], + _CXString, + _CXString.from_result), + ("clang_getTypeKindSpelling", [c_uint], _CXString, diff --git a/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_cursor.py b/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_cursor.py index 8103e96df4f9e..4787ea931e13b 100644 --- a/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_cursor.py +++ b/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_cursor.py @@ -255,6 +255,22 @@ def test_is_virtual_method(): assert foo.is_virtual_method() assert not bar.is_virtual_method() +def test_is_scoped_enum(): + """Ensure Cursor.is_scoped_enum works.""" + source = 'class X {}; enum RegularEnum {}; enum class ScopedEnum {};' + tu = get_tu(source, lang='cpp') + + cls = get_cursor(tu, 'X') + regular_enum = get_cursor(tu, 'RegularEnum') + scoped_enum = get_cursor(tu, 'ScopedEnum') + assert cls is not None + assert regular_enum is not None + assert scoped_enum is not None + + assert not cls.is_scoped_enum() + assert not regular_enum.is_scoped_enum() + assert scoped_enum.is_scoped_enum() + def test_underlying_type(): tu = get_tu('typedef int foo;') typedef = get_cursor(tu, 'foo') diff --git a/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_type.py b/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_type.py index f2184338be4b4..6ee0773828ecd 100644 --- a/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_type.py +++ b/interpreter/llvm/src/tools/clang/bindings/python/tests/cindex/test_type.py @@ -37,37 +37,44 @@ def test_a_struct(): assert not fields[0].type.is_const_qualified() assert fields[0].type.kind == TypeKind.INT assert fields[0].type.get_canonical().kind == TypeKind.INT + assert fields[0].type.get_typedef_name() == '' assert fields[1].spelling == 'b' assert not fields[1].type.is_const_qualified() assert fields[1].type.kind == TypeKind.TYPEDEF assert fields[1].type.get_canonical().kind == TypeKind.INT assert fields[1].type.get_declaration().spelling == 'I' + assert fields[1].type.get_typedef_name() == 'I' assert fields[2].spelling == 'c' assert not fields[2].type.is_const_qualified() assert fields[2].type.kind == TypeKind.LONG assert fields[2].type.get_canonical().kind == TypeKind.LONG + assert fields[2].type.get_typedef_name() == '' assert fields[3].spelling == 'd' assert not fields[3].type.is_const_qualified() assert fields[3].type.kind == TypeKind.ULONG assert fields[3].type.get_canonical().kind == TypeKind.ULONG + assert fields[3].type.get_typedef_name() == '' assert fields[4].spelling == 'e' assert not fields[4].type.is_const_qualified() assert fields[4].type.kind == TypeKind.LONG assert fields[4].type.get_canonical().kind == TypeKind.LONG + assert fields[4].type.get_typedef_name() == '' assert fields[5].spelling == 'f' assert fields[5].type.is_const_qualified() assert fields[5].type.kind == TypeKind.INT assert fields[5].type.get_canonical().kind == TypeKind.INT + assert fields[5].type.get_typedef_name() == '' assert fields[6].spelling == 'g' assert not fields[6].type.is_const_qualified() assert fields[6].type.kind == TypeKind.POINTER assert fields[6].type.get_pointee().kind == TypeKind.INT + assert fields[6].type.get_typedef_name() == '' assert fields[7].spelling == 'h' assert not fields[7].type.is_const_qualified() @@ -75,6 +82,7 @@ def test_a_struct(): assert fields[7].type.get_pointee().kind == TypeKind.POINTER assert fields[7].type.get_pointee().get_pointee().kind == TypeKind.POINTER assert fields[7].type.get_pointee().get_pointee().get_pointee().kind == TypeKind.INT + assert fields[7].type.get_typedef_name() == '' def test_references(): """Ensure that a Type maintains a reference to a TranslationUnit.""" @@ -404,3 +412,12 @@ def test_decay(): assert a.kind == TypeKind.INCOMPLETEARRAY assert a.element_type.kind == TypeKind.INT assert a.get_canonical().kind == TypeKind.INCOMPLETEARRAY + +def test_addrspace(): + """Ensure the address space can be queried""" + tu = get_tu('__attribute__((address_space(2))) int testInteger = 3;', 'c') + + testInteger = get_cursor(tu, 'testInteger') + + assert testInteger is not None, "Could not find testInteger." + assert testInteger.type.get_address_space() == 2 diff --git a/interpreter/llvm/src/tools/clang/bindings/python/tests/test_exception_specification_kind.py b/interpreter/llvm/src/tools/clang/bindings/python/tests/test_exception_specification_kind.py new file mode 100644 index 0000000000000..543d47f7db974 --- /dev/null +++ b/interpreter/llvm/src/tools/clang/bindings/python/tests/test_exception_specification_kind.py @@ -0,0 +1,27 @@ +import clang.cindex +from clang.cindex import ExceptionSpecificationKind +from .util import get_tu + + +def find_function_declarations(node, declarations=[]): + if node.kind == clang.cindex.CursorKind.FUNCTION_DECL: + declarations.append((node.spelling, node.exception_specification_kind)) + for child in node.get_children(): + declarations = find_function_declarations(child, declarations) + return declarations + + +def test_exception_specification_kind(): + source = """int square1(int x); + int square2(int x) noexcept; + int square3(int x) noexcept(noexcept(x * x));""" + + tu = get_tu(source, lang='cpp', flags=['-std=c++14']) + + declarations = find_function_declarations(tu.cursor) + expected = [ + ('square1', ExceptionSpecificationKind.NONE), + ('square2', ExceptionSpecificationKind.BASIC_NOEXCEPT), + ('square3', ExceptionSpecificationKind.COMPUTED_NOEXCEPT) + ] + assert declarations == expected diff --git a/interpreter/llvm/src/tools/clang/cmake/caches/BaremetalARM.cmake b/interpreter/llvm/src/tools/clang/cmake/caches/BaremetalARM.cmake new file mode 100644 index 0000000000000..73f797d9c4b15 --- /dev/null +++ b/interpreter/llvm/src/tools/clang/cmake/caches/BaremetalARM.cmake @@ -0,0 +1,50 @@ +set(LLVM_TARGETS_TO_BUILD ARM;X86 CACHE STRING "") + +# Builtins +set(LLVM_BUILTIN_TARGETS "armv7m-none-eabi;armv6m-none-eabi;armv7em-none-eabi" CACHE STRING "Builtin Targets") + +set(BUILTINS_armv6m-none-eabi_CMAKE_SYSROOT ${BAREMETAL_ARMV6M_SYSROOT} CACHE STRING "armv6m-none-eabi Sysroot") +set(BUILTINS_armv6m-none-eabi_CMAKE_SYSTEM_NAME Generic CACHE STRING "armv6m-none-eabi System Name") +set(BUILTINS_armv6m-none-eabi_COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "armv6m-none-eabi Baremetal build") +set(BUILTINS_armv6m-none-eabi_COMPILER_RT_OS_DIR "baremetal" CACHE STRING "armv6m-none-eabi os dir") + +set(BUILTINS_armv7m-none-eabi_CMAKE_SYSROOT ${BAREMETAL_ARMV7M_SYSROOT} CACHE STRING "armv7m-none-eabi Sysroot") +set(BUILTINS_armv7m-none-eabi_CMAKE_SYSTEM_NAME Generic CACHE STRING "armv7m-none-eabi System Name") +set(BUILTINS_armv7m-none-eabi_COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "armv7m-none-eabi Baremetal build") +set(BUILTINS_armv7m-none-eabi_CMAKE_C_FLAGS "-mfpu=fp-armv8" CACHE STRING "armv7m-none-eabi C Flags") +set(BUILTINS_armv7m-none-eabi_CMAKE_ASM_FLAGS "-mfpu=fp-armv8" CACHE STRING "armv7m-none-eabi ASM Flags") +set(BUILTINS_armv7m-none-eabi_COMPILER_RT_OS_DIR "baremetal" CACHE STRING "armv7m-none-eabi os dir") + +set(BUILTINS_armv7em-none-eabi_CMAKE_SYSROOT ${BAREMETAL_ARMV7EM_SYSROOT} CACHE STRING "armv7em-none-eabi Sysroot") +set(BUILTINS_armv7em-none-eabi_CMAKE_SYSTEM_NAME Generic CACHE STRING "armv7em-none-eabi System Name") +set(BUILTINS_armv7em-none-eabi_COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "armv7em-none-eabi Baremetal build") +set(BUILTINS_armv7em-none-eabi_CMAKE_C_FLAGS "-mfpu=fp-armv8" CACHE STRING "armv7em-none-eabi C Flags") +set(BUILTINS_armv7em-none-eabi_CMAKE_ASM_FLAGS "-mfpu=fp-armv8" CACHE STRING "armv7em-none-eabi ASM Flags") +set(BUILTINS_armv7em-none-eabi_COMPILER_RT_OS_DIR "baremetal" CACHE STRING "armv7em-none-eabi os dir") + +set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "") +set(LLVM_TOOLCHAIN_TOOLS + llc + llvm-ar + llvm-cxxfilt + llvm-dwarfdump + llvm-dsymutil + llvm-nm + llvm-objdump + llvm-ranlib + llvm-readobj + llvm-size + llvm-symbolizer + opt + CACHE STRING "") + +set(LLVM_DISTRIBUTION_COMPONENTS + clang + lld + clang-headers + builtins-armv6m-none-eabi + builtins-armv7m-none-eabi + builtins-armv7em-none-eabi + runtimes + ${LLVM_TOOLCHAIN_TOOLS} + CACHE STRING "") diff --git a/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia-stage2.cmake b/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia-stage2.cmake index ca43e603a6310..1b7b636fef3e5 100644 --- a/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia-stage2.cmake @@ -7,7 +7,6 @@ set(PACKAGE_VENDOR Fuchsia CACHE STRING "") set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "") set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "") -set(LLVM_TOOL_CLANG_TOOLS_EXTRA_BUILD OFF CACHE BOOL "") set(LLVM_ENABLE_ZLIB ON CACHE BOOL "") set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "") set(LLVM_EXTERNALIZE_DEBUGINFO ON CACHE BOOL "") @@ -27,11 +26,27 @@ set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -gline-tables-only -DNDEBUG" CACHE STRING "") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -gline-tables-only -DNDEBUG" CACHE STRING "") -set(LLVM_BUILTIN_TARGETS "x86_64-fuchsia-none;aarch64-fuchsia-none" CACHE STRING "") -set(BUILTINS_x86_64-fuchsia-none_CMAKE_SYSROOT ${FUCHSIA_SYSROOT} CACHE STRING "") -set(BUILTINS_x86_64-fuchsia-none_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "") -set(BUILTINS_aarch64-fuchsia-none_CMAKE_SYSROOT ${FUCHSIA_SYSROOT} CACHE STRING "") -set(BUILTINS_aarch64-fuchsia-none_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "") +set(LLVM_BUILTIN_TARGETS "x86_64-fuchsia;aarch64-fuchsia" CACHE STRING "") +foreach(target x86_64;aarch64) + set(BUILTINS_${target}-fuchsia_CMAKE_SYSROOT ${FUCHSIA_${target}_SYSROOT} CACHE PATH "") + set(BUILTINS_${target}-fuchsia_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "") +endforeach() +if(NOT APPLE) + list(APPEND LLVM_BUILTIN_TARGETS "default") +endif() + +set(LLVM_RUNTIME_TARGETS "default;x86_64-fuchsia;aarch64-fuchsia" CACHE STRING "") +foreach(target x86_64;aarch64) + set(RUNTIMES_${target}-fuchsia_CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE BOOL "") + set(RUNTIMES_${target}-fuchsia_CMAKE_SYSROOT ${FUCHSIA_${target}_SYSROOT} CACHE PATH "") + set(RUNTIMES_${target}-fuchsia_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "") + set(RUNTIMES_${target}-fuchsia_UNIX 1 CACHE BOOL "") + set(RUNTIMES_${target}-fuchsia_LLVM_ENABLE_LIBCXX ON CACHE BOOL "") + set(RUNTIMES_${target}-fuchsia_LIBUNWIND_USE_COMPILER_RT ON CACHE BOOL "") + set(RUNTIMES_${target}-fuchsia_LIBCXXABI_USE_COMPILER_RT ON CACHE BOOL "") + set(RUNTIMES_${target}-fuchsia_LIBCXXABI_USE_LLVM_UNWINDER ON CACHE BOOL "") + set(RUNTIMES_${target}-fuchsia_LIBCXX_USE_COMPILER_RT ON CACHE BOOL "") +endforeach() # Setup toolchain. set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "") @@ -47,6 +62,7 @@ set(LLVM_TOOLCHAIN_TOOLS llvm-objdump llvm-profdata llvm-ranlib + llvm-readelf llvm-readobj llvm-size llvm-symbolizer @@ -61,8 +77,9 @@ set(LLVM_DISTRIBUTION_COMPONENTS LTO clang-format clang-headers - builtins-x86_64-fuchsia-none - builtins-aarch64-fuchsia-none + clang-tidy + clangd + builtins runtimes ${LLVM_TOOLCHAIN_TOOLS} CACHE STRING "") diff --git a/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia.cmake b/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia.cmake index c8a8cf6d58b7e..0932c046f628a 100644 --- a/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia.cmake +++ b/interpreter/llvm/src/tools/clang/cmake/caches/Fuchsia.cmake @@ -38,9 +38,11 @@ set(CLANG_BOOTSTRAP_TARGETS install-distribution clang CACHE STRING "") -if(FUCHSIA_SYSROOT) - set(EXTRA_ARGS -DFUCHSIA_SYSROOT=${FUCHSIA_SYSROOT}) -endif() +foreach(target x86_64;aarch64) + if(FUCHSIA_${target}_SYSROOT) + list(APPEND EXTRA_ARGS -DFUCHSIA_${target}_SYSROOT=${FUCHSIA_${target}_SYSROOT}) + endif() +endforeach() # Setup the bootstrap build. set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") diff --git a/interpreter/llvm/src/tools/clang/docs/AttributeReference.rst b/interpreter/llvm/src/tools/clang/docs/AttributeReference.rst index a763ddeaeb106..58004a3c0a325 100644 --- a/interpreter/llvm/src/tools/clang/docs/AttributeReference.rst +++ b/interpreter/llvm/src/tools/clang/docs/AttributeReference.rst @@ -1,13 +1,3471 @@ .. ------------------------------------------------------------------- NOTE: This file is automatically generated by running clang-tblgen - -gen-attr-docs. Do not edit this file by hand!! The contents for - this file are automatically generated by a server-side process. - - Please do not commit this file. The file exists for local testing - purposes only. + -gen-attr-docs. Do not edit this file by hand!! ------------------------------------------------------------------- =================== Attributes in Clang -=================== \ No newline at end of file +=================== +.. contents:: + :local: + +Introduction +============ + +This page lists the attributes currently supported by Clang. + +Function Attributes +=================== + + +interrupt +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +Clang supports the GNU style ``__attribute__((interrupt("TYPE")))`` attribute on +ARM targets. This attribute may be attached to a function definition and +instructs the backend to generate appropriate function entry/exit code so that +it can be used directly as an interrupt service routine. + +The parameter passed to the interrupt attribute is optional, but if +provided it must be a string literal with one of the following values: "IRQ", +"FIQ", "SWI", "ABORT", "UNDEF". + +The semantics are as follows: + +- If the function is AAPCS, Clang instructs the backend to realign the stack to + 8 bytes on entry. This is a general requirement of the AAPCS at public + interfaces, but may not hold when an exception is taken. Doing this allows + other AAPCS functions to be called. +- If the CPU is M-class this is all that needs to be done since the architecture + itself is designed in such a way that functions obeying the normal AAPCS ABI + constraints are valid exception handlers. +- If the CPU is not M-class, the prologue and epilogue are modified to save all + non-banked registers that are used, so that upon return the user-mode state + will not be corrupted. Note that to avoid unnecessary overhead, only + general-purpose (integer) registers are saved in this way. If VFP operations + are needed, that state must be saved manually. + + Specifically, interrupt kinds other than "FIQ" will save all core registers + except "lr" and "sp". "FIQ" interrupts will save r0-r7. +- If the CPU is not M-class, the return instruction is changed to one of the + canonical sequences permitted by the architecture for exception return. Where + possible the function itself will make the necessary "lr" adjustments so that + the "preferred return address" is selected. + + Unfortunately the compiler is unable to make this guarantee for an "UNDEF" + handler, where the offset from "lr" to the preferred return address depends on + the execution state of the code which generated the exception. In this case + a sequence equivalent to "movs pc, lr" will be used. + + +interrupt +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Clang supports the GNU style ``__attribute__((interrupt))`` attribute on +AVR targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be used +directly as an interrupt service routine. + +On the AVR, the hardware globally disables interrupts when an interrupt is executed. +The first instruction of an interrupt handler declared with this attribute is a SEI +instruction to re-enable interrupts. See also the signal attribute that +does not insert a SEI instruction. + + +signal +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Clang supports the GNU style ``__attribute__((signal))`` attribute on +AVR targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be used +directly as an interrupt service routine. + +Interrupt handler functions defined with the signal attribute do not re-enable interrupts. + + +abi_tag (gnu::abi_tag) +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``abi_tag`` attribute can be applied to a function, variable, class or +inline namespace declaration to modify the mangled name of the entity. It gives +the ability to distinguish between different versions of the same entity but +with different ABI versions supported. For example, a newer version of a class +could have a different set of data members and thus have a different size. Using +the ``abi_tag`` attribute, it is possible to have different mangled names for +a global variable of the class type. Therefor, the old code could keep using +the old manged name and the new code will use the new mangled name with tags. + + +acquire_capability (acquire_shared_capability, clang::acquire_capability, clang::acquire_shared_capability) +----------------------------------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +Marks a function as acquiring a capability. + + +alloc_align (gnu::alloc_align) +------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +Use ``__attribute__((alloc_align())`` on a function +declaration to specify that the return value of the function (which must be a +pointer type) is at least as aligned as the value of the indicated parameter. The +parameter is given by its index in the list of formal parameters; the first +parameter has index 1 unless the function is a C++ non-static member function, +in which case the first parameter has index 2 to account for the implicit ``this`` +parameter. + +.. code-block:: c++ + + // The returned pointer has the alignment specified by the first parameter. + void *a(size_t align) __attribute__((alloc_align(1))); + + // The returned pointer has the alignment specified by the second parameter. + void *b(void *v, size_t align) __attribute__((alloc_align(2))); + + // The returned pointer has the alignment specified by the second visible + // parameter, however it must be adjusted for the implicit 'this' parameter. + void *Foo::b(void *v, size_t align) __attribute__((alloc_align(3))); + +Note that this attribute merely informs the compiler that a function always +returns a sufficiently aligned pointer. It does not cause the compiler to +emit code to enforce that alignment. The behavior is undefined if the returned +poitner is not sufficiently aligned. + + +alloc_size (gnu::alloc_size) +---------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``alloc_size`` attribute can be placed on functions that return pointers in +order to hint to the compiler how many bytes of memory will be available at the +returned poiner. ``alloc_size`` takes one or two arguments. + +- ``alloc_size(N)`` implies that argument number N equals the number of + available bytes at the returned pointer. +- ``alloc_size(N, M)`` implies that the product of argument number N and + argument number M equals the number of available bytes at the returned + pointer. + +Argument numbers are 1-based. + +An example of how to use ``alloc_size`` + +.. code-block:: c + + void *my_malloc(int a) __attribute__((alloc_size(1))); + void *my_calloc(int a, int b) __attribute__((alloc_size(1, 2))); + + int main() { + void *const p = my_malloc(100); + assert(__builtin_object_size(p, 0) == 100); + void *const a = my_calloc(20, 5); + assert(__builtin_object_size(a, 0) == 100); + } + +.. Note:: This attribute works differently in clang than it does in GCC. + Specifically, clang will only trace ``const`` pointers (as above); we give up + on pointers that are not marked as ``const``. In the vast majority of cases, + this is unimportant, because LLVM has support for the ``alloc_size`` + attribute. However, this may cause mildly unintuitive behavior when used with + other attributes, such as ``enable_if``. + + +interrupt +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +Clang supports the GNU style ``__attribute__((interrupt))`` attribute on +x86/x86-64 targets.The compiler generates function entry and exit sequences +suitable for use in an interrupt handler when this attribute is present. +The 'IRET' instruction, instead of the 'RET' instruction, is used to return +from interrupt or exception handlers. All registers, except for the EFLAGS +register which is restored by the 'IRET' instruction, are preserved by the +compiler. + +Any interruptible-without-stack-switch code must be compiled with +-mno-red-zone since interrupt handlers can and will, because of the +hardware design, touch the red zone. + +1. interrupt handler must be declared with a mandatory pointer argument: + + .. code-block:: c + + struct interrupt_frame + { + uword_t ip; + uword_t cs; + uword_t flags; + uword_t sp; + uword_t ss; + }; + + __attribute__ ((interrupt)) + void f (struct interrupt_frame *frame) { + ... + } + +2. exception handler: + + The exception handler is very similar to the interrupt handler with + a different mandatory function signature: + + .. code-block:: c + + __attribute__ ((interrupt)) + void f (struct interrupt_frame *frame, uword_t error_code) { + ... + } + + and compiler pops 'ERROR_CODE' off stack before the 'IRET' instruction. + + The exception handler should only be used for exceptions which push an + error code and all other exceptions must use the interrupt handler. + The system will crash if the wrong handler is used. + + +no_caller_saved_registers (gnu::no_caller_saved_registers) +---------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +Use this attribute to indicate that the specified function has no +caller-saved registers. That is, all registers are callee-saved except for +registers used for passing parameters to the function or returning parameters +from the function. +The compiler saves and restores any modified registers that were not used for +passing or returning arguments to the function. + +The user can call functions specified with the 'no_caller_saved_registers' +attribute from an interrupt handler without saving and restoring all +call-clobbered registers. + +Note that 'no_caller_saved_registers' attribute is not a calling convention. +In fact, it only overrides the decision of which registers should be saved by +the caller, but not how the parameters are passed from the caller to the callee. + +For example: + + .. code-block:: c + + __attribute__ ((no_caller_saved_registers, fastcall)) + void f (int arg1, int arg2) { + ... + } + + In this case parameters 'arg1' and 'arg2' will be passed in registers. + In this case, on 32-bit x86 targets, the function 'f' will use ECX and EDX as + register parameters. However, it will not assume any scratch registers and + should save and restore any modified registers except for ECX and EDX. + + +assert_capability (assert_shared_capability, clang::assert_capability, clang::assert_shared_capability) +------------------------------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +Marks a function that dynamically tests whether a capability is held, and halts +the program if it is not held. + + +assume_aligned (gnu::assume_aligned) +------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +Use ``__attribute__((assume_aligned([,]))`` on a function +declaration to specify that the return value of the function (which must be a +pointer type) has the specified offset, in bytes, from an address with the +specified alignment. The offset is taken to be zero if omitted. + +.. code-block:: c++ + + // The returned pointer value has 32-byte alignment. + void *a() __attribute__((assume_aligned (32))); + + // The returned pointer value is 4 bytes greater than an address having + // 32-byte alignment. + void *b() __attribute__((assume_aligned (32, 4))); + +Note that this attribute provides information to the compiler regarding a +condition that the code already ensures is true. It does not cause the compiler +to enforce the provided alignment assumption. + + +availability +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +The ``availability`` attribute can be placed on declarations to describe the +lifecycle of that declaration relative to operating system versions. Consider +the function declaration for a hypothetical function ``f``: + +.. code-block:: c++ + + void f(void) __attribute__((availability(macos,introduced=10.4,deprecated=10.6,obsoleted=10.7))); + +The availability attribute states that ``f`` was introduced in macOS 10.4, +deprecated in macOS 10.6, and obsoleted in macOS 10.7. This information +is used by Clang to determine when it is safe to use ``f``: for example, if +Clang is instructed to compile code for macOS 10.5, a call to ``f()`` +succeeds. If Clang is instructed to compile code for macOS 10.6, the call +succeeds but Clang emits a warning specifying that the function is deprecated. +Finally, if Clang is instructed to compile code for macOS 10.7, the call +fails because ``f()`` is no longer available. + +The availability attribute is a comma-separated list starting with the +platform name and then including clauses specifying important milestones in the +declaration's lifetime (in any order) along with additional information. Those +clauses can be: + +introduced=\ *version* + The first version in which this declaration was introduced. + +deprecated=\ *version* + The first version in which this declaration was deprecated, meaning that + users should migrate away from this API. + +obsoleted=\ *version* + The first version in which this declaration was obsoleted, meaning that it + was removed completely and can no longer be used. + +unavailable + This declaration is never available on this platform. + +message=\ *string-literal* + Additional message text that Clang will provide when emitting a warning or + error about use of a deprecated or obsoleted declaration. Useful to direct + users to replacement APIs. + +replacement=\ *string-literal* + Additional message text that Clang will use to provide Fix-It when emitting + a warning about use of a deprecated declaration. The Fix-It will replace + the deprecated declaration with the new declaration specified. + +Multiple availability attributes can be placed on a declaration, which may +correspond to different platforms. Only the availability attribute with the +platform corresponding to the target platform will be used; any others will be +ignored. If no availability attribute specifies availability for the current +target platform, the availability attributes are ignored. Supported platforms +are: + +``ios`` + Apple's iOS operating system. The minimum deployment target is specified by + the ``-mios-version-min=*version*`` or ``-miphoneos-version-min=*version*`` + command-line arguments. + +``macos`` + Apple's macOS operating system. The minimum deployment target is + specified by the ``-mmacosx-version-min=*version*`` command-line argument. + ``macosx`` is supported for backward-compatibility reasons, but it is + deprecated. + +``tvos`` + Apple's tvOS operating system. The minimum deployment target is specified by + the ``-mtvos-version-min=*version*`` command-line argument. + +``watchos`` + Apple's watchOS operating system. The minimum deployment target is specified by + the ``-mwatchos-version-min=*version*`` command-line argument. + +A declaration can typically be used even when deploying back to a platform +version prior to when the declaration was introduced. When this happens, the +declaration is `weakly linked +`_, +as if the ``weak_import`` attribute were added to the declaration. A +weakly-linked declaration may or may not be present a run-time, and a program +can determine whether the declaration is present by checking whether the +address of that declaration is non-NULL. + +The flag ``strict`` disallows using API when deploying back to a +platform version prior to when the declaration was introduced. An +attempt to use such API before its introduction causes a hard error. +Weakly-linking is almost always a better API choice, since it allows +users to query availability at runtime. + +If there are multiple declarations of the same entity, the availability +attributes must either match on a per-platform basis or later +declarations must not have availability attributes for that +platform. For example: + +.. code-block:: c + + void g(void) __attribute__((availability(macos,introduced=10.4))); + void g(void) __attribute__((availability(macos,introduced=10.4))); // okay, matches + void g(void) __attribute__((availability(ios,introduced=4.0))); // okay, adds a new platform + void g(void); // okay, inherits both macos and ios availability from above. + void g(void) __attribute__((availability(macos,introduced=10.5))); // error: mismatch + +When one method overrides another, the overriding method can be more widely available than the overridden method, e.g.,: + +.. code-block:: objc + + @interface A + - (id)method __attribute__((availability(macos,introduced=10.4))); + - (id)method2 __attribute__((availability(macos,introduced=10.4))); + @end + + @interface B : A + - (id)method __attribute__((availability(macos,introduced=10.3))); // okay: method moved into base class later + - (id)method __attribute__((availability(macos,introduced=10.5))); // error: this method was available via the base class in 10.4 + @end + +Starting with the macOS 10.12 SDK, the ``API_AVAILABLE`` macro from +```` can simplify the spelling: + +.. code-block:: objc + + @interface A + - (id)method API_AVAILABLE(macos(10.11))); + - (id)otherMethod API_AVAILABLE(macos(10.11), ios(11.0)); + @end + +Also see the documentation for `@available +`_ + + +_Noreturn +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +A function declared as ``_Noreturn`` shall not return to its caller. The +compiler will generate a diagnostic for a function declared as ``_Noreturn`` +that appears to be capable of returning to its caller. + + +noreturn +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","X","","", "", "X" + +A function declared as ``[[noreturn]]`` shall not return to its caller. The +compiler will generate a diagnostic for a function declared as ``[[noreturn]]`` +that appears to be capable of returning to its caller. + + +carries_dependency +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``carries_dependency`` attribute specifies dependency propagation into and +out of functions. + +When specified on a function or Objective-C method, the ``carries_dependency`` +attribute means that the return value carries a dependency out of the function, +so that the implementation need not constrain ordering upon return from that +function. Implementations of the function and its caller may choose to preserve +dependencies instead of emitting memory ordering instructions such as fences. + +Note, this attribute does not change the meaning of the program, but may result +in generation of more efficient code. + + +convergent (clang::convergent) +------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``convergent`` attribute can be placed on a function declaration. It is +translated into the LLVM ``convergent`` attribute, which indicates that the call +instructions of a function with this attribute cannot be made control-dependent +on any additional values. + +In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA, +the call instructions of a function with this attribute must be executed by +all work items or threads in a work group or sub group. + +This attribute is different from ``noduplicate`` because it allows duplicating +function calls if it can be proved that the duplicated function calls are +not made control-dependent on any additional values, e.g., unrolling a loop +executed by all work items. + +Sample usage: +.. code-block:: c + + void convfunc(void) __attribute__((convergent)); + // Setting it as a C++11 attribute is also valid in a C++ program. + // void convfunc(void) [[clang::convergent]]; + + +deprecated (gnu::deprecated) +---------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","X","", "", "" + +The ``deprecated`` attribute can be applied to a function, a variable, or a +type. This is useful when identifying functions, variables, or types that are +expected to be removed in a future version of a program. + +Consider the function declaration for a hypothetical function ``f``: + +.. code-block:: c++ + + void f(void) __attribute__((deprecated("message", "replacement"))); + +When spelled as `__attribute__((deprecated))`, the deprecated attribute can have +two optional string arguments. The first one is the message to display when +emitting the warning; the second one enables the compiler to provide a Fix-It +to replace the deprecated name with a new name. Otherwise, when spelled as +`[[gnu::deprecated]] or [[deprecated]]`, the attribute can have one optional +string argument which is the message to display when emitting the warning. + + +diagnose_if +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +The ``diagnose_if`` attribute can be placed on function declarations to emit +warnings or errors at compile-time if calls to the attributed function meet +certain user-defined criteria. For example: + +.. code-block:: c + + void abs(int a) + __attribute__((diagnose_if(a >= 0, "Redundant abs call", "warning"))); + void must_abs(int a) + __attribute__((diagnose_if(a >= 0, "Redundant abs call", "error"))); + + int val = abs(1); // warning: Redundant abs call + int val2 = must_abs(1); // error: Redundant abs call + int val3 = abs(val); + int val4 = must_abs(val); // Because run-time checks are not emitted for + // diagnose_if attributes, this executes without + // issue. + + +``diagnose_if`` is closely related to ``enable_if``, with a few key differences: + +* Overload resolution is not aware of ``diagnose_if`` attributes: they're + considered only after we select the best candidate from a given candidate set. +* Function declarations that differ only in their ``diagnose_if`` attributes are + considered to be redeclarations of the same function (not overloads). +* If the condition provided to ``diagnose_if`` cannot be evaluated, no + diagnostic will be emitted. + +Otherwise, ``diagnose_if`` is essentially the logical negation of ``enable_if``. + +As a result of bullet number two, ``diagnose_if`` attributes will stack on the +same function. For example: + +.. code-block:: c + + int foo() __attribute__((diagnose_if(1, "diag1", "warning"))); + int foo() __attribute__((diagnose_if(1, "diag2", "warning"))); + + int bar = foo(); // warning: diag1 + // warning: diag2 + int (*fooptr)(void) = foo; // warning: diag1 + // warning: diag2 + + constexpr int supportsAPILevel(int N) { return N < 5; } + int baz(int a) + __attribute__((diagnose_if(!supportsAPILevel(10), + "Upgrade to API level 10 to use baz", "error"))); + int baz(int a) + __attribute__((diagnose_if(!a, "0 is not recommended.", "warning"))); + + int (*bazptr)(int) = baz; // error: Upgrade to API level 10 to use baz + int v = baz(0); // error: Upgrade to API level 10 to use baz + +Query for this feature with ``__has_attribute(diagnose_if)``. + + +disable_tail_calls (clang::disable_tail_calls) +---------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``disable_tail_calls`` attribute instructs the backend to not perform tail call optimization inside the marked function. + +For example: + + .. code-block:: c + + int callee(int); + + int foo(int a) __attribute__((disable_tail_calls)) { + return callee(a); // This call is not tail-call optimized. + } + +Marking virtual functions as ``disable_tail_calls`` is legal. + + .. code-block:: c++ + + int callee(int); + + class Base { + public: + [[clang::disable_tail_calls]] virtual int foo1() { + return callee(); // This call is not tail-call optimized. + } + }; + + class Derived1 : public Base { + public: + int foo1() override { + return callee(); // This call is tail-call optimized. + } + }; + + +enable_if +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +.. Note:: Some features of this attribute are experimental. The meaning of + multiple enable_if attributes on a single declaration is subject to change in + a future version of clang. Also, the ABI is not standardized and the name + mangling may change in future versions. To avoid that, use asm labels. + +The ``enable_if`` attribute can be placed on function declarations to control +which overload is selected based on the values of the function's arguments. +When combined with the ``overloadable`` attribute, this feature is also +available in C. + +.. code-block:: c++ + + int isdigit(int c); + int isdigit(int c) __attribute__((enable_if(c <= -1 || c > 255, "chosen when 'c' is out of range"))) __attribute__((unavailable("'c' must have the value of an unsigned char or EOF"))); + + void foo(char c) { + isdigit(c); + isdigit(10); + isdigit(-10); // results in a compile-time error. + } + +The enable_if attribute takes two arguments, the first is an expression written +in terms of the function parameters, the second is a string explaining why this +overload candidate could not be selected to be displayed in diagnostics. The +expression is part of the function signature for the purposes of determining +whether it is a redeclaration (following the rules used when determining +whether a C++ template specialization is ODR-equivalent), but is not part of +the type. + +The enable_if expression is evaluated as if it were the body of a +bool-returning constexpr function declared with the arguments of the function +it is being applied to, then called with the parameters at the call site. If the +result is false or could not be determined through constant expression +evaluation, then this overload will not be chosen and the provided string may +be used in a diagnostic if the compile fails as a result. + +Because the enable_if expression is an unevaluated context, there are no global +state changes, nor the ability to pass information from the enable_if +expression to the function body. For example, suppose we want calls to +strnlen(strbuf, maxlen) to resolve to strnlen_chk(strbuf, maxlen, size of +strbuf) only if the size of strbuf can be determined: + +.. code-block:: c++ + + __attribute__((always_inline)) + static inline size_t strnlen(const char *s, size_t maxlen) + __attribute__((overloadable)) + __attribute__((enable_if(__builtin_object_size(s, 0) != -1))), + "chosen when the buffer size is known but 'maxlen' is not"))) + { + return strnlen_chk(s, maxlen, __builtin_object_size(s, 0)); + } + +Multiple enable_if attributes may be applied to a single declaration. In this +case, the enable_if expressions are evaluated from left to right in the +following manner. First, the candidates whose enable_if expressions evaluate to +false or cannot be evaluated are discarded. If the remaining candidates do not +share ODR-equivalent enable_if expressions, the overload resolution is +ambiguous. Otherwise, enable_if overload resolution continues with the next +enable_if attribute on the candidates that have not been discarded and have +remaining enable_if attributes. In this way, we pick the most specific +overload out of a number of viable overloads using enable_if. + +.. code-block:: c++ + + void f() __attribute__((enable_if(true, ""))); // #1 + void f() __attribute__((enable_if(true, ""))) __attribute__((enable_if(true, ""))); // #2 + + void g(int i, int j) __attribute__((enable_if(i, ""))); // #1 + void g(int i, int j) __attribute__((enable_if(j, ""))) __attribute__((enable_if(true))); // #2 + +In this example, a call to f() is always resolved to #2, as the first enable_if +expression is ODR-equivalent for both declarations, but #1 does not have another +enable_if expression to continue evaluating, so the next round of evaluation has +only a single candidate. In a call to g(1, 1), the call is ambiguous even though +#2 has more enable_if attributes, because the first enable_if expressions are +not ODR-equivalent. + +Query for this feature with ``__has_attribute(enable_if)``. + +Note that functions with one or more ``enable_if`` attributes may not have +their address taken, unless all of the conditions specified by said +``enable_if`` are constants that evaluate to ``true``. For example: + +.. code-block:: c + + const int TrueConstant = 1; + const int FalseConstant = 0; + int f(int a) __attribute__((enable_if(a > 0, ""))); + int g(int a) __attribute__((enable_if(a == 0 || a != 0, ""))); + int h(int a) __attribute__((enable_if(1, ""))); + int i(int a) __attribute__((enable_if(TrueConstant, ""))); + int j(int a) __attribute__((enable_if(FalseConstant, ""))); + + void fn() { + int (*ptr)(int); + ptr = &f; // error: 'a > 0' is not always true + ptr = &g; // error: 'a == 0 || a != 0' is not a truthy constant + ptr = &h; // OK: 1 is a truthy constant + ptr = &i; // OK: 'TrueConstant' is a truthy constant + ptr = &j; // error: 'FalseConstant' is a constant, but not truthy + } + +Because ``enable_if`` evaluation happens during overload resolution, +``enable_if`` may give unintuitive results when used with templates, depending +on when overloads are resolved. In the example below, clang will emit a +diagnostic about no viable overloads for ``foo`` in ``bar``, but not in ``baz``: + +.. code-block:: c++ + + double foo(int i) __attribute__((enable_if(i > 0, ""))); + void *foo(int i) __attribute__((enable_if(i <= 0, ""))); + template + auto bar() { return foo(I); } + + template + auto baz() { return foo(T::number); } + + struct WithNumber { constexpr static int number = 1; }; + void callThem() { + bar(); + baz(); + } + +This is because, in ``bar``, ``foo`` is resolved prior to template +instantiation, so the value for ``I`` isn't known (thus, both ``enable_if`` +conditions for ``foo`` fail). However, in ``baz``, ``foo`` is resolved during +template instantiation, so the value for ``T::number`` is known. + + +external_source_symbol (clang::external_source_symbol) +------------------------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``external_source_symbol`` attribute specifies that a declaration originates +from an external source and describes the nature of that source. + +The fact that Clang is capable of recognizing declarations that were defined +externally can be used to provide better tooling support for mixed-language +projects or projects that rely on auto-generated code. For instance, an IDE that +uses Clang and that supports mixed-language projects can use this attribute to +provide a correct 'jump-to-definition' feature. For a concrete example, +consider a protocol that's defined in a Swift file: + +.. code-block:: swift + + @objc public protocol SwiftProtocol { + func method() + } + +This protocol can be used from Objective-C code by including a header file that +was generated by the Swift compiler. The declarations in that header can use +the ``external_source_symbol`` attribute to make Clang aware of the fact +that ``SwiftProtocol`` actually originates from a Swift module: + +.. code-block:: objc + + __attribute__((external_source_symbol(language="Swift",defined_in="module"))) + @protocol SwiftProtocol + @required + - (void) method; + @end + +Consequently, when 'jump-to-definition' is performed at a location that +references ``SwiftProtocol``, the IDE can jump to the original definition in +the Swift source file rather than jumping to the Objective-C declaration in the +auto-generated header file. + +The ``external_source_symbol`` attribute is a comma-separated list that includes +clauses that describe the origin and the nature of the particular declaration. +Those clauses can be: + +language=\ *string-literal* + The name of the source language in which this declaration was defined. + +defined_in=\ *string-literal* + The name of the source container in which the declaration was defined. The + exact definition of source container is language-specific, e.g. Swift's + source containers are modules, so ``defined_in`` should specify the Swift + module name. + +generated_declaration + This declaration was automatically generated by some tool. + +The clauses can be specified in any order. The clauses that are listed above are +all optional, but the attribute has to have at least one clause. + + +flatten (gnu::flatten) +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``flatten`` attribute causes calls within the attributed function to +be inlined unless it is impossible to do so, for example if the body of the +callee is unavailable or if the callee has the ``noinline`` attribute. + + +format (gnu::format) +-------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +Clang supports the ``format`` attribute, which indicates that the function +accepts a ``printf`` or ``scanf``-like format string and corresponding +arguments or a ``va_list`` that contains these arguments. + +Please see `GCC documentation about format attribute +`_ to find details +about attribute syntax. + +Clang implements two kinds of checks with this attribute. + +#. Clang checks that the function with the ``format`` attribute is called with + a format string that uses format specifiers that are allowed, and that + arguments match the format string. This is the ``-Wformat`` warning, it is + on by default. + +#. Clang checks that the format string argument is a literal string. This is + the ``-Wformat-nonliteral`` warning, it is off by default. + + Clang implements this mostly the same way as GCC, but there is a difference + for functions that accept a ``va_list`` argument (for example, ``vprintf``). + GCC does not emit ``-Wformat-nonliteral`` warning for calls to such + functions. Clang does not warn if the format string comes from a function + parameter, where the function is annotated with a compatible attribute, + otherwise it warns. For example: + + .. code-block:: c + + __attribute__((__format__ (__scanf__, 1, 3))) + void foo(const char* s, char *buf, ...) { + va_list ap; + va_start(ap, buf); + + vprintf(s, ap); // warning: format string is not a string literal + } + + In this case we warn because ``s`` contains a format string for a + ``scanf``-like function, but it is passed to a ``printf``-like function. + + If the attribute is removed, clang still warns, because the format string is + not a string literal. + + Another example: + + .. code-block:: c + + __attribute__((__format__ (__printf__, 1, 3))) + void foo(const char* s, char *buf, ...) { + va_list ap; + va_start(ap, buf); + + vprintf(s, ap); // warning + } + + In this case Clang does not warn because the format string ``s`` and + the corresponding arguments are annotated. If the arguments are + incorrect, the caller of ``foo`` will receive a warning. + + +ifunc (gnu::ifunc) +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +``__attribute__((ifunc("resolver")))`` is used to mark that the address of a declaration should be resolved at runtime by calling a resolver function. + +The symbol name of the resolver function is given in quotes. A function with this name (after mangling) must be defined in the current translation unit; it may be ``static``. The resolver function should take no arguments and return a pointer. + +The ``ifunc`` attribute may only be used on a function declaration. A function declaration with an ``ifunc`` attribute is considered to be a definition of the declared entity. The entity must not have weak linkage; for example, in C++, it cannot be applied to a declaration if a definition at that location would be considered inline. + +Not all targets support this attribute. ELF targets support this attribute when using binutils v2.20.1 or higher and glibc v2.11.1 or higher. Non-ELF targets currently do not support this attribute. + + +internal_linkage (clang::internal_linkage) +------------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``internal_linkage`` attribute changes the linkage type of the declaration to internal. +This is similar to C-style ``static``, but can be used on classes and class methods. When applied to a class definition, +this attribute affects all methods and static data members of that class. +This can be used to contain the ABI of a C++ library by excluding unwanted class methods from the export tables. + + +micromips (gnu::micromips) +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +Clang supports the GNU style ``__attribute__((micromips))`` and +``__attribute__((nomicromips))`` attributes on MIPS targets. These attributes +may be attached to a function definition and instructs the backend to generate +or not to generate microMIPS code for that function. + +These attributes override the `-mmicromips` and `-mno-micromips` options +on the command line. + + +interrupt +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Clang supports the GNU style ``__attribute__((interrupt("ARGUMENT")))`` attribute on +MIPS targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be used +directly as an interrupt service routine. + +By default, the compiler will produce a function prologue and epilogue suitable for +an interrupt service routine that handles an External Interrupt Controller (eic) +generated interrupt. This behaviour can be explicitly requested with the "eic" +argument. + +Otherwise, for use with vectored interrupt mode, the argument passed should be +of the form "vector=LEVEL" where LEVEL is one of the following values: +"sw0", "sw1", "hw0", "hw1", "hw2", "hw3", "hw4", "hw5". The compiler will +then set the interrupt mask to the corresponding level which will mask all +interrupts up to and including the argument. + +The semantics are as follows: + +- The prologue is modified so that the Exception Program Counter (EPC) and + Status coprocessor registers are saved to the stack. The interrupt mask is + set so that the function can only be interrupted by a higher priority + interrupt. The epilogue will restore the previous values of EPC and Status. + +- The prologue and epilogue are modified to save and restore all non-kernel + registers as necessary. + +- The FPU is disabled in the prologue, as the floating pointer registers are not + spilled to the stack. + +- The function return sequence is changed to use an exception return instruction. + +- The parameter sets the interrupt mask for the function corresponding to the + interrupt level specified. If no mask is specified the interrupt mask + defaults to "eic". + + +noalias +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","X","", "", "" + +The ``noalias`` attribute indicates that the only memory accesses inside +function are loads and stores from objects pointed to by its pointer-typed +arguments, with arbitrary offsets. + + +noduplicate (clang::noduplicate) +-------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``noduplicate`` attribute can be placed on function declarations to control +whether function calls to this function can be duplicated or not as a result of +optimizations. This is required for the implementation of functions with +certain special requirements, like the OpenCL "barrier" function, that might +need to be run concurrently by all the threads that are executing in lockstep +on the hardware. For example this attribute applied on the function +"nodupfunc" in the code below avoids that: + +.. code-block:: c + + void nodupfunc() __attribute__((noduplicate)); + // Setting it as a C++11 attribute is also valid + // void nodupfunc() [[clang::noduplicate]]; + void foo(); + void bar(); + + nodupfunc(); + if (a > n) { + foo(); + } else { + bar(); + } + +gets possibly modified by some optimizations into code similar to this: + +.. code-block:: c + + if (a > n) { + nodupfunc(); + foo(); + } else { + nodupfunc(); + bar(); + } + +where the call to "nodupfunc" is duplicated and sunk into the two branches +of the condition. + + +nomicromips (gnu::nomicromips) +------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +Clang supports the GNU style ``__attribute__((micromips))`` and +``__attribute__((nomicromips))`` attributes on MIPS targets. These attributes +may be attached to a function definition and instructs the backend to generate +or not to generate microMIPS code for that function. + +These attributes override the `-mmicromips` and `-mno-micromips` options +on the command line. + + +no_sanitize (clang::no_sanitize) +-------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +Use the ``no_sanitize`` attribute on a function declaration to specify +that a particular instrumentation or set of instrumentations should not be +applied to that function. The attribute takes a list of string literals, +which have the same meaning as values accepted by the ``-fno-sanitize=`` +flag. For example, ``__attribute__((no_sanitize("address", "thread")))`` +specifies that AddressSanitizer and ThreadSanitizer should not be applied +to the function. + +See :ref:`Controlling Code Generation ` for a +full list of supported sanitizer flags. + + +no_sanitize_address (no_address_safety_analysis, gnu::no_address_safety_analysis, gnu::no_sanitize_address) +----------------------------------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +.. _langext-address_sanitizer: + +Use ``__attribute__((no_sanitize_address))`` on a function declaration to +specify that address safety instrumentation (e.g. AddressSanitizer) should +not be applied to that function. + + +no_sanitize_thread +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +.. _langext-thread_sanitizer: + +Use ``__attribute__((no_sanitize_thread))`` on a function declaration to +specify that checks for data races on plain (non-atomic) memory accesses should +not be inserted by ThreadSanitizer. The function is still instrumented by the +tool to avoid false positives and provide meaningful stack traces. + + +no_sanitize_memory +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +.. _langext-memory_sanitizer: + +Use ``__attribute__((no_sanitize_memory))`` on a function declaration to +specify that checks for uninitialized memory should not be inserted +(e.g. by MemorySanitizer). The function may still be instrumented by the tool +to avoid false positives in other places. + + +no_split_stack (gnu::no_split_stack) +------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``no_split_stack`` attribute disables the emission of the split stack +preamble for a particular function. It has no effect if ``-fsplit-stack`` +is not specified. + + +not_tail_called (clang::not_tail_called) +---------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``not_tail_called`` attribute prevents tail-call optimization on statically bound calls. It has no effect on indirect calls. Virtual functions, objective-c methods, and functions marked as ``always_inline`` cannot be marked as ``not_tail_called``. + +For example, it prevents tail-call optimization in the following case: + + .. code-block:: c + + int __attribute__((not_tail_called)) foo1(int); + + int foo2(int a) { + return foo1(a); // No tail-call optimization on direct calls. + } + +However, it doesn't prevent tail-call optimization in this case: + + .. code-block:: c + + int __attribute__((not_tail_called)) foo1(int); + + int foo2(int a) { + int (*fn)(int) = &foo1; + + // not_tail_called has no effect on an indirect call even if the call can be + // resolved at compile time. + return (*fn)(a); + } + +Marking virtual functions as ``not_tail_called`` is an error: + + .. code-block:: c++ + + class Base { + public: + // not_tail_called on a virtual function is an error. + [[clang::not_tail_called]] virtual int foo1(); + + virtual int foo2(); + + // Non-virtual functions can be marked ``not_tail_called``. + [[clang::not_tail_called]] int foo3(); + }; + + class Derived1 : public Base { + public: + int foo1() override; + + // not_tail_called on a virtual function is an error. + [[clang::not_tail_called]] int foo2() override; + }; + + +#pragma omp declare simd +------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","", "X", "" + +The `declare simd` construct can be applied to a function to enable the creation +of one or more versions that can process multiple arguments using SIMD +instructions from a single invocation in a SIMD loop. The `declare simd` +directive is a declarative directive. There may be multiple `declare simd` +directives for a function. The use of a `declare simd` construct on a function +enables the creation of SIMD versions of the associated function that can be +used to process multiple arguments from a single invocation from a SIMD loop +concurrently. +The syntax of the `declare simd` construct is as follows: + + .. code-block:: c + + #pragma omp declare simd [clause[[,] clause] ...] new-line + [#pragma omp declare simd [clause[[,] clause] ...] new-line] + [...] + function definition or declaration + +where clause is one of the following: + + .. code-block:: c + + simdlen(length) + linear(argument-list[:constant-linear-step]) + aligned(argument-list[:alignment]) + uniform(argument-list) + inbranch + notinbranch + + +#pragma omp declare target +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","", "X", "" + +The `declare target` directive specifies that variables and functions are mapped +to a device for OpenMP offload mechanism. + +The syntax of the declare target directive is as follows: + + .. code-block:: c + + #pragma omp declare target new-line + declarations-definition-seq + #pragma omp end declare target new-line + + +objc_boxable +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Structs and unions marked with the ``objc_boxable`` attribute can be used +with the Objective-C boxed expression syntax, ``@(...)``. + +**Usage**: ``__attribute__((objc_boxable))``. This attribute +can only be placed on a declaration of a trivially-copyable struct or union: + +.. code-block:: objc + + struct __attribute__((objc_boxable)) some_struct { + int i; + }; + union __attribute__((objc_boxable)) some_union { + int i; + float f; + }; + typedef struct __attribute__((objc_boxable)) _some_struct some_struct; + + // ... + + some_struct ss; + NSValue *boxed = @(ss); + + +objc_method_family +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Many methods in Objective-C have conventional meanings determined by their +selectors. It is sometimes useful to be able to mark a method as having a +particular conventional meaning despite not having the right selector, or as +not having the conventional meaning that its selector would suggest. For these +use cases, we provide an attribute to specifically describe the "method family" +that a method belongs to. + +**Usage**: ``__attribute__((objc_method_family(X)))``, where ``X`` is one of +``none``, ``alloc``, ``copy``, ``init``, ``mutableCopy``, or ``new``. This +attribute can only be placed at the end of a method declaration: + +.. code-block:: objc + + - (NSString *)initMyStringValue __attribute__((objc_method_family(none))); + +Users who do not wish to change the conventional meaning of a method, and who +merely want to document its non-standard retain and release semantics, should +use the retaining behavior attributes (``ns_returns_retained``, +``ns_returns_not_retained``, etc). + +Query for this feature with ``__has_attribute(objc_method_family)``. + + +objc_requires_super +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Some Objective-C classes allow a subclass to override a particular method in a +parent class but expect that the overriding method also calls the overridden +method in the parent class. For these cases, we provide an attribute to +designate that a method requires a "call to ``super``" in the overriding +method in the subclass. + +**Usage**: ``__attribute__((objc_requires_super))``. This attribute can only +be placed at the end of a method declaration: + +.. code-block:: objc + + - (void)foo __attribute__((objc_requires_super)); + +This attribute can only be applied the method declarations within a class, and +not a protocol. Currently this attribute does not enforce any placement of +where the call occurs in the overriding method (such as in the case of +``-dealloc`` where the call must appear at the end). It checks only that it +exists. + +Note that on both OS X and iOS that the Foundation framework provides a +convenience macro ``NS_REQUIRES_SUPER`` that provides syntactic sugar for this +attribute: + +.. code-block:: objc + + - (void)foo NS_REQUIRES_SUPER; + +This macro is conditionally defined depending on the compiler's support for +this attribute. If the compiler does not support the attribute the macro +expands to nothing. + +Operationally, when a method has this annotation the compiler will warn if the +implementation of an override in a subclass does not call super. For example: + +.. code-block:: objc + + warning: method possibly missing a [super AnnotMeth] call + - (void) AnnotMeth{}; + ^ + + +objc_runtime_name +----------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +By default, the Objective-C interface or protocol identifier is used +in the metadata name for that object. The `objc_runtime_name` +attribute allows annotated interfaces or protocols to use the +specified string argument in the object's metadata name instead of the +default name. + +**Usage**: ``__attribute__((objc_runtime_name("MyLocalName")))``. This attribute +can only be placed before an @protocol or @interface declaration: + +.. code-block:: objc + + __attribute__((objc_runtime_name("MyLocalName"))) + @interface Message + @end + + +objc_runtime_visible +-------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +This attribute specifies that the Objective-C class to which it applies is visible to the Objective-C runtime but not to the linker. Classes annotated with this attribute cannot be subclassed and cannot have categories defined for them. + + +optnone (clang::optnone) +------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``optnone`` attribute suppresses essentially all optimizations +on a function or method, regardless of the optimization level applied to +the compilation unit as a whole. This is particularly useful when you +need to debug a particular function, but it is infeasible to build the +entire application without optimization. Avoiding optimization on the +specified function can improve the quality of the debugging information +for that function. + +This attribute is incompatible with the ``always_inline`` and ``minsize`` +attributes. + + +overloadable +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Clang provides support for C++ function overloading in C. Function overloading +in C is introduced using the ``overloadable`` attribute. For example, one +might provide several overloaded versions of a ``tgsin`` function that invokes +the appropriate standard function computing the sine of a value with ``float``, +``double``, or ``long double`` precision: + +.. code-block:: c + + #include + float __attribute__((overloadable)) tgsin(float x) { return sinf(x); } + double __attribute__((overloadable)) tgsin(double x) { return sin(x); } + long double __attribute__((overloadable)) tgsin(long double x) { return sinl(x); } + +Given these declarations, one can call ``tgsin`` with a ``float`` value to +receive a ``float`` result, with a ``double`` to receive a ``double`` result, +etc. Function overloading in C follows the rules of C++ function overloading +to pick the best overload given the call arguments, with a few C-specific +semantics: + +* Conversion from ``float`` or ``double`` to ``long double`` is ranked as a + floating-point promotion (per C99) rather than as a floating-point conversion + (as in C++). + +* A conversion from a pointer of type ``T*`` to a pointer of type ``U*`` is + considered a pointer conversion (with conversion rank) if ``T`` and ``U`` are + compatible types. + +* A conversion from type ``T`` to a value of type ``U`` is permitted if ``T`` + and ``U`` are compatible types. This conversion is given "conversion" rank. + +* If no viable candidates are otherwise available, we allow a conversion from a + pointer of type ``T*`` to a pointer of type ``U*``, where ``T`` and ``U`` are + incompatible. This conversion is ranked below all other types of conversions. + Please note: ``U`` lacking qualifiers that are present on ``T`` is sufficient + for ``T`` and ``U`` to be incompatible. + +The declaration of ``overloadable`` functions is restricted to function +declarations and definitions. If a function is marked with the ``overloadable`` +attribute, then all declarations and definitions of functions with that name, +except for at most one (see the note below about unmarked overloads), must have +the ``overloadable`` attribute. In addition, redeclarations of a function with +the ``overloadable`` attribute must have the ``overloadable`` attribute, and +redeclarations of a function without the ``overloadable`` attribute must *not* +have the ``overloadable`` attribute. e.g., + +.. code-block:: c + + int f(int) __attribute__((overloadable)); + float f(float); // error: declaration of "f" must have the "overloadable" attribute + int f(int); // error: redeclaration of "f" must have the "overloadable" attribute + + int g(int) __attribute__((overloadable)); + int g(int) { } // error: redeclaration of "g" must also have the "overloadable" attribute + + int h(int); + int h(int) __attribute__((overloadable)); // error: declaration of "h" must not + // have the "overloadable" attribute + +Functions marked ``overloadable`` must have prototypes. Therefore, the +following code is ill-formed: + +.. code-block:: c + + int h() __attribute__((overloadable)); // error: h does not have a prototype + +However, ``overloadable`` functions are allowed to use a ellipsis even if there +are no named parameters (as is permitted in C++). This feature is particularly +useful when combined with the ``unavailable`` attribute: + +.. code-block:: c++ + + void honeypot(...) __attribute__((overloadable, unavailable)); // calling me is an error + +Functions declared with the ``overloadable`` attribute have their names mangled +according to the same rules as C++ function names. For example, the three +``tgsin`` functions in our motivating example get the mangled names +``_Z5tgsinf``, ``_Z5tgsind``, and ``_Z5tgsine``, respectively. There are two +caveats to this use of name mangling: + +* Future versions of Clang may change the name mangling of functions overloaded + in C, so you should not depend on an specific mangling. To be completely + safe, we strongly urge the use of ``static inline`` with ``overloadable`` + functions. + +* The ``overloadable`` attribute has almost no meaning when used in C++, + because names will already be mangled and functions are already overloadable. + However, when an ``overloadable`` function occurs within an ``extern "C"`` + linkage specification, it's name *will* be mangled in the same way as it + would in C. + +For the purpose of backwards compatibility, at most one function with the same +name as other ``overloadable`` functions may omit the ``overloadable`` +attribute. In this case, the function without the ``overloadable`` attribute +will not have its name mangled. + +For example: + +.. code-block:: c + + // Notes with mangled names assume Itanium mangling. + int f(int); + int f(double) __attribute__((overloadable)); + void foo() { + f(5); // Emits a call to f (not _Z1fi, as it would with an overload that + // was marked with overloadable). + f(1.0); // Emits a call to _Z1fd. + } + +Support for unmarked overloads is not present in some versions of clang. You may +query for it using ``__has_extension(overloadable_unmarked)``. + +Query for this attribute with ``__has_attribute(overloadable)``. + + +release_capability (release_shared_capability, clang::release_capability, clang::release_shared_capability) +----------------------------------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +Marks a function as releasing a capability. + + +kernel +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +``__attribute__((kernel))`` is used to mark a ``kernel`` function in +RenderScript. + +In RenderScript, ``kernel`` functions are used to express data-parallel +computations. The RenderScript runtime efficiently parallelizes ``kernel`` +functions to run on computational resources such as multi-core CPUs and GPUs. +See the RenderScript_ documentation for more information. + +.. _RenderScript: https://developer.android.com/guide/topics/renderscript/compute.html + + +target (gnu::target) +-------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +Clang supports the GNU style ``__attribute__((target("OPTIONS")))`` attribute. +This attribute may be attached to a function definition and instructs +the backend to use different code generation options than were passed on the +command line. + +The current set of options correspond to the existing "subtarget features" for +the target with or without a "-mno-" in front corresponding to the absence +of the feature, as well as ``arch="CPU"`` which will change the default "CPU" +for the function. + +Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2", +"avx", "xop" and largely correspond to the machine specific options handled by +the front end. + + +try_acquire_capability (try_acquire_shared_capability, clang::try_acquire_capability, clang::try_acquire_shared_capability) +--------------------------------------------------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +Marks a function that attempts to acquire a capability. This function may fail to +actually acquire the capability; they accept a Boolean value determining +whether acquiring the capability means success (true), or failing to acquire +the capability means success (false). + + +nodiscard, warn_unused_result, clang::warn_unused_result, gnu::warn_unused_result +--------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +Clang supports the ability to diagnose when the results of a function call +expression are discarded under suspicious circumstances. A diagnostic is +generated when a function or its return type is marked with ``[[nodiscard]]`` +(or ``__attribute__((warn_unused_result))``) and the function call appears as a +potentially-evaluated discarded-value expression that is not explicitly cast to +`void`. + +.. code-block: c++ + struct [[nodiscard]] error_info { /*...*/ }; + error_info enable_missile_safety_mode(); + + void launch_missiles(); + void test_missiles() { + enable_missile_safety_mode(); // diagnoses + launch_missiles(); + } + error_info &foo(); + void f() { foo(); } // Does not diagnose, error_info is a reference. + + +xray_always_instrument (clang::xray_always_instrument), xray_never_instrument (clang::xray_never_instrument), xray_log_args (clang::xray_log_args) +-------------------------------------------------------------------------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +``__attribute__((xray_always_instrument))`` or ``[[clang::xray_always_instrument]]`` is used to mark member functions (in C++), methods (in Objective C), and free functions (in C, C++, and Objective C) to be instrumented with XRay. This will cause the function to always have space at the beginning and exit points to allow for runtime patching. + +Conversely, ``__attribute__((xray_never_instrument))`` or ``[[clang::xray_never_instrument]]`` will inhibit the insertion of these instrumentation points. + +If a function has neither of these attributes, they become subject to the XRay heuristics used to determine whether a function should be instrumented or otherwise. + +``__attribute__((xray_log_args(N)))`` or ``[[clang::xray_log_args(N)]]`` is used to preserve N function arguments for the logging function. Currently, only N==1 is supported. + + +xray_always_instrument (clang::xray_always_instrument), xray_never_instrument (clang::xray_never_instrument), xray_log_args (clang::xray_log_args) +-------------------------------------------------------------------------------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +``__attribute__((xray_always_instrument))`` or ``[[clang::xray_always_instrument]]`` is used to mark member functions (in C++), methods (in Objective C), and free functions (in C, C++, and Objective C) to be instrumented with XRay. This will cause the function to always have space at the beginning and exit points to allow for runtime patching. + +Conversely, ``__attribute__((xray_never_instrument))`` or ``[[clang::xray_never_instrument]]`` will inhibit the insertion of these instrumentation points. + +If a function has neither of these attributes, they become subject to the XRay heuristics used to determine whether a function should be instrumented or otherwise. + +``__attribute__((xray_log_args(N)))`` or ``[[clang::xray_log_args(N)]]`` is used to preserve N function arguments for the logging function. Currently, only N==1 is supported. + + +Variable Attributes +=================== + + +dllexport (gnu::dllexport) +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","X","", "", "X" + +The ``__declspec(dllexport)`` attribute declares a variable, function, or +Objective-C interface to be exported from the module. It is available under the +``-fdeclspec`` flag for compatibility with various compilers. The primary use +is for COFF object files which explicitly specify what interfaces are available +for external use. See the dllexport_ documentation on MSDN for more +information. + +.. _dllexport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx + + +dllimport (gnu::dllimport) +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","X","", "", "X" + +The ``__declspec(dllimport)`` attribute declares a variable, function, or +Objective-C interface to be imported from an external module. It is available +under the ``-fdeclspec`` flag for compatibility with various compilers. The +primary use is for COFF object files which explicitly specify what interfaces +are imported from external modules. See the dllimport_ documentation on MSDN +for more information. + +.. _dllimport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx + + +init_seg +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","", "X", "" + +The attribute applied by ``pragma init_seg()`` controls the section into +which global initialization function pointers are emitted. It is only +available with ``-fms-extensions``. Typically, this function pointer is +emitted into ``.CRT$XCU`` on Windows. The user can change the order of +initialization by using a different section name with the same +``.CRT$XC`` prefix and a suffix that sorts lexicographically before or +after the standard ``.CRT$XCU`` sections. See the init_seg_ +documentation on MSDN for more information. + +.. _init_seg: http://msdn.microsoft.com/en-us/library/7977wcck(v=vs.110).aspx + + +nodebug (gnu::nodebug) +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``nodebug`` attribute allows you to suppress debugging information for a +function or method, or for a variable that is not a parameter or a non-static +data member. + + +nosvm +----- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +OpenCL 2.0 supports the optional ``__attribute__((nosvm))`` qualifier for +pointer variable. It informs the compiler that the pointer does not refer +to a shared virtual memory region. See OpenCL v2.0 s6.7.2 for details. + +Since it is not widely used and has been removed from OpenCL 2.1, it is ignored +by Clang. + + +pass_object_size +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +.. Note:: The mangling of functions with parameters that are annotated with + ``pass_object_size`` is subject to change. You can get around this by + using ``__asm__("foo")`` to explicitly name your functions, thus preserving + your ABI; also, non-overloadable C functions with ``pass_object_size`` are + not mangled. + +The ``pass_object_size(Type)`` attribute can be placed on function parameters to +instruct clang to call ``__builtin_object_size(param, Type)`` at each callsite +of said function, and implicitly pass the result of this call in as an invisible +argument of type ``size_t`` directly after the parameter annotated with +``pass_object_size``. Clang will also replace any calls to +``__builtin_object_size(param, Type)`` in the function by said implicit +parameter. + +Example usage: + +.. code-block:: c + + int bzero1(char *const p __attribute__((pass_object_size(0)))) + __attribute__((noinline)) { + int i = 0; + for (/**/; i < (int)__builtin_object_size(p, 0); ++i) { + p[i] = 0; + } + return i; + } + + int main() { + char chars[100]; + int n = bzero1(&chars[0]); + assert(n == sizeof(chars)); + return 0; + } + +If successfully evaluating ``__builtin_object_size(param, Type)`` at the +callsite is not possible, then the "failed" value is passed in. So, using the +definition of ``bzero1`` from above, the following code would exit cleanly: + +.. code-block:: c + + int main2(int argc, char *argv[]) { + int n = bzero1(argv); + assert(n == -1); + return 0; + } + +``pass_object_size`` plays a part in overload resolution. If two overload +candidates are otherwise equally good, then the overload with one or more +parameters with ``pass_object_size`` is preferred. This implies that the choice +between two identical overloads both with ``pass_object_size`` on one or more +parameters will always be ambiguous; for this reason, having two such overloads +is illegal. For example: + +.. code-block:: c++ + + #define PS(N) __attribute__((pass_object_size(N))) + // OK + void Foo(char *a, char *b); // Overload A + // OK -- overload A has no parameters with pass_object_size. + void Foo(char *a PS(0), char *b PS(0)); // Overload B + // Error -- Same signature (sans pass_object_size) as overload B, and both + // overloads have one or more parameters with the pass_object_size attribute. + void Foo(void *a PS(0), void *b); + + // OK + void Bar(void *a PS(0)); // Overload C + // OK + void Bar(char *c PS(1)); // Overload D + + void main() { + char known[10], *unknown; + Foo(unknown, unknown); // Calls overload B + Foo(known, unknown); // Calls overload B + Foo(unknown, known); // Calls overload B + Foo(known, known); // Calls overload B + + Bar(known); // Calls overload D + Bar(unknown); // Calls overload D + } + +Currently, ``pass_object_size`` is a bit restricted in terms of its usage: + +* Only one use of ``pass_object_size`` is allowed per parameter. + +* It is an error to take the address of a function with ``pass_object_size`` on + any of its parameters. If you wish to do this, you can create an overload + without ``pass_object_size`` on any parameters. + +* It is an error to apply the ``pass_object_size`` attribute to parameters that + are not pointers. Additionally, any parameter that ``pass_object_size`` is + applied to must be marked ``const`` at its function's definition. + + +require_constant_initialization (clang::require_constant_initialization) +------------------------------------------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +This attribute specifies that the variable to which it is attached is intended +to have a `constant initializer `_ +according to the rules of [basic.start.static]. The variable is required to +have static or thread storage duration. If the initialization of the variable +is not a constant initializer an error will be produced. This attribute may +only be used in C++. + +Note that in C++03 strict constant expression checking is not done. Instead +the attribute reports if Clang can emit the variable as a constant, even if it's +not technically a 'constant initializer'. This behavior is non-portable. + +Static storage duration variables with constant initializers avoid hard-to-find +bugs caused by the indeterminate order of dynamic initialization. They can also +be safely used during dynamic initialization across translation units. + +This attribute acts as a compile time assertion that the requirements +for constant initialization have been met. Since these requirements change +between dialects and have subtle pitfalls it's important to fail fast instead +of silently falling back on dynamic initialization. + +.. code-block:: c++ + + // -std=c++14 + #define SAFE_STATIC [[clang::require_constant_initialization]] + struct T { + constexpr T(int) {} + ~T(); // non-trivial + }; + SAFE_STATIC T x = {42}; // Initialization OK. Doesn't check destructor. + SAFE_STATIC T y = 42; // error: variable does not have a constant initializer + // copy initialization is not a constant expression on a non-literal type. + + +section (gnu::section, __declspec(allocate)) +-------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","X","", "", "X" + +The ``section`` attribute allows you to specify a specific section a +global variable or function should be in after translation. + + +swiftcall (gnu::swiftcall) +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +The ``swiftcall`` attribute indicates that a function should be called +using the Swift calling convention for a function or function pointer. + +The lowering for the Swift calling convention, as described by the Swift +ABI documentation, occurs in multiple phases. The first, "high-level" +phase breaks down the formal parameters and results into innately direct +and indirect components, adds implicit paraameters for the generic +signature, and assigns the context and error ABI treatments to parameters +where applicable. The second phase breaks down the direct parameters +and results from the first phase and assigns them to registers or the +stack. The ``swiftcall`` convention only handles this second phase of +lowering; the C function type must accurately reflect the results +of the first phase, as follows: + +- Results classified as indirect by high-level lowering should be + represented as parameters with the ``swift_indirect_result`` attribute. + +- Results classified as direct by high-level lowering should be represented + as follows: + + - First, remove any empty direct results. + + - If there are no direct results, the C result type should be ``void``. + + - If there is one direct result, the C result type should be a type with + the exact layout of that result type. + + - If there are a multiple direct results, the C result type should be + a struct type with the exact layout of a tuple of those results. + +- Parameters classified as indirect by high-level lowering should be + represented as parameters of pointer type. + +- Parameters classified as direct by high-level lowering should be + omitted if they are empty types; otherwise, they should be represented + as a parameter type with a layout exactly matching the layout of the + Swift parameter type. + +- The context parameter, if present, should be represented as a trailing + parameter with the ``swift_context`` attribute. + +- The error result parameter, if present, should be represented as a + trailing parameter (always following a context parameter) with the + ``swift_error_result`` attribute. + +``swiftcall`` does not support variadic arguments or unprototyped functions. + +The parameter ABI treatment attributes are aspects of the function type. +A function type which which applies an ABI treatment attribute to a +parameter is a different type from an otherwise-identical function type +that does not. A single parameter may not have multiple ABI treatment +attributes. + +Support for this feature is target-dependent, although it should be +supported on every target that Swift supports. Query for this support +with ``__has_attribute(swiftcall)``. This implies support for the +``swift_context``, ``swift_error_result``, and ``swift_indirect_result`` +attributes. + + +swift_context (gnu::swift_context) +---------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``swift_context`` attribute marks a parameter of a ``swiftcall`` +function as having the special context-parameter ABI treatment. + +This treatment generally passes the context value in a special register +which is normally callee-preserved. + +A ``swift_context`` parameter must either be the last parameter or must be +followed by a ``swift_error_result`` parameter (which itself must always be +the last parameter). + +A context parameter must have pointer or reference type. + + +swift_error_result (gnu::swift_error_result) +-------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``swift_error_result`` attribute marks a parameter of a ``swiftcall`` +function as having the special error-result ABI treatment. + +This treatment generally passes the underlying error value in and out of +the function through a special register which is normally callee-preserved. +This is modeled in C by pretending that the register is addressable memory: + +- The caller appears to pass the address of a variable of pointer type. + The current value of this variable is copied into the register before + the call; if the call returns normally, the value is copied back into the + variable. + +- The callee appears to receive the address of a variable. This address + is actually a hidden location in its own stack, initialized with the + value of the register upon entry. When the function returns normally, + the value in that hidden location is written back to the register. + +A ``swift_error_result`` parameter must be the last parameter, and it must be +preceded by a ``swift_context`` parameter. + +A ``swift_error_result`` parameter must have type ``T**`` or ``T*&`` for some +type T. Note that no qualifiers are permitted on the intermediate level. + +It is undefined behavior if the caller does not pass a pointer or +reference to a valid object. + +The standard convention is that the error value itself (that is, the +value stored in the apparent argument) will be null upon function entry, +but this is not enforced by the ABI. + + +swift_indirect_result (gnu::swift_indirect_result) +-------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``swift_indirect_result`` attribute marks a parameter of a ``swiftcall`` +function as having the special indirect-result ABI treatment. + +This treatment gives the parameter the target's normal indirect-result +ABI treatment, which may involve passing it differently from an ordinary +parameter. However, only the first indirect result will receive this +treatment. Furthermore, low-level lowering may decide that a direct result +must be returned indirectly; if so, this will take priority over the +``swift_indirect_result`` parameters. + +A ``swift_indirect_result`` parameter must either be the first parameter or +follow another ``swift_indirect_result`` parameter. + +A ``swift_indirect_result`` parameter must have type ``T*`` or ``T&`` for +some object type ``T``. If ``T`` is a complete type at the point of +definition of a function, it is undefined behavior if the argument +value does not point to storage of adequate size and alignment for a +value of type ``T``. + +Making indirect results explicit in the signature allows C functions to +directly construct objects into them without relying on language +optimizations like C++'s named return value optimization (NRVO). + + +tls_model (gnu::tls_model) +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``tls_model`` attribute allows you to specify which thread-local storage +model to use. It accepts the following strings: + +* global-dynamic +* local-dynamic +* initial-exec +* local-exec + +TLS models are mutually exclusive. + + +thread +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","X","", "", "" + +The ``__declspec(thread)`` attribute declares a variable with thread local +storage. It is available under the ``-fms-extensions`` flag for MSVC +compatibility. See the documentation for `__declspec(thread)`_ on MSDN. + +.. _`__declspec(thread)`: http://msdn.microsoft.com/en-us/library/9w1sdazb.aspx + +In Clang, ``__declspec(thread)`` is generally equivalent in functionality to the +GNU ``__thread`` keyword. The variable must not have a destructor and must have +a constant initializer, if any. The attribute only applies to variables +declared with static storage duration, such as globals, class static data +members, and static locals. + + +maybe_unused, unused, gnu::unused +--------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +When passing the ``-Wunused`` flag to Clang, entities that are unused by the +program may be diagnosed. The ``[[maybe_unused]]`` (or +``__attribute__((unused))``) attribute can be used to silence such diagnostics +when the entity cannot be removed. For instance, a local variable may exist +solely for use in an ``assert()`` statement, which makes the local variable +unused when ``NDEBUG`` is defined. + +The attribute may be applied to the declaration of a class, a typedef, a +variable, a function or method, a function parameter, an enumeration, an +enumerator, a non-static data member, or a label. + +.. code-block: c++ + #include + + [[maybe_unused]] void f([[maybe_unused]] bool thing1, + [[maybe_unused]] bool thing2) { + [[maybe_unused]] bool b = thing1 && thing2; + assert(b); + } + + +Type Attributes +=============== + + +align_value +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +The align_value attribute can be added to the typedef of a pointer type or the +declaration of a variable of pointer or reference type. It specifies that the +pointer will point to, or the reference will bind to, only objects with at +least the provided alignment. This alignment value must be some positive power +of 2. + + .. code-block:: c + + typedef double * aligned_double_ptr __attribute__((align_value(64))); + void foo(double & x __attribute__((align_value(128)), + aligned_double_ptr y) { ... } + +If the pointer value does not have the specified alignment at runtime, the +behavior of the program is undefined. + + +empty_bases +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","X","", "", "" + +The empty_bases attribute permits the compiler to utilize the +empty-base-optimization more frequently. +This attribute only applies to struct, class, and union types. +It is only supported when using the Microsoft C++ ABI. + + +enum_extensibility (clang::enum_extensibility) +---------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +Attribute ``enum_extensibility`` is used to distinguish between enum definitions +that are extensible and those that are not. The attribute can take either +``closed`` or ``open`` as an argument. ``closed`` indicates a variable of the +enum type takes a value that corresponds to one of the enumerators listed in the +enum definition or, when the enum is annotated with ``flag_enum``, a value that +can be constructed using values corresponding to the enumerators. ``open`` +indicates a variable of the enum type can take any values allowed by the +standard and instructs clang to be more lenient when issuing warnings. + +.. code-block:: c + + enum __attribute__((enum_extensibility(closed))) ClosedEnum { + A0, A1 + }; + + enum __attribute__((enum_extensibility(open))) OpenEnum { + B0, B1 + }; + + enum __attribute__((enum_extensibility(closed),flag_enum)) ClosedFlagEnum { + C0 = 1 << 0, C1 = 1 << 1 + }; + + enum __attribute__((enum_extensibility(open),flag_enum)) OpenFlagEnum { + D0 = 1 << 0, D1 = 1 << 1 + }; + + void foo1() { + enum ClosedEnum ce; + enum OpenEnum oe; + enum ClosedFlagEnum cfe; + enum OpenFlagEnum ofe; + + ce = A1; // no warnings + ce = 100; // warning issued + oe = B1; // no warnings + oe = 100; // no warnings + cfe = C0 | C1; // no warnings + cfe = C0 | C1 | 4; // warning issued + ofe = D0 | D1; // no warnings + ofe = D0 | D1 | 4; // no warnings + } + + +flag_enum +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +This attribute can be added to an enumerator to signal to the compiler that it +is intended to be used as a flag type. This will cause the compiler to assume +that the range of the type includes all of the values that you can get by +manipulating bits of the enumerator when issuing warnings. + + +lto_visibility_public (clang::lto_visibility_public) +---------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","X","","", "", "X" + +See :doc:`LTOVisibility`. + + +layout_version +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","X","", "", "" + +The layout_version attribute requests that the compiler utilize the class +layout rules of a particular compiler version. +This attribute only applies to struct, class, and union types. +It is only supported when using the Microsoft C++ ABI. + + +__single_inhertiance, __multiple_inheritance, __virtual_inheritance +------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +This collection of keywords is enabled under ``-fms-extensions`` and controls +the pointer-to-member representation used on ``*-*-win32`` targets. + +The ``*-*-win32`` targets utilize a pointer-to-member representation which +varies in size and alignment depending on the definition of the underlying +class. + +However, this is problematic when a forward declaration is only available and +no definition has been made yet. In such cases, Clang is forced to utilize the +most general representation that is available to it. + +These keywords make it possible to use a pointer-to-member representation other +than the most general one regardless of whether or not the definition will ever +be present in the current translation unit. + +This family of keywords belong between the ``class-key`` and ``class-name``: + +.. code-block:: c++ + + struct __single_inheritance S; + int S::*i; + struct S {}; + +This keyword can be applied to class templates but only has an effect when used +on full specializations: + +.. code-block:: c++ + + template struct __single_inheritance A; // warning: inheritance model ignored on primary template + template struct __multiple_inheritance A; // warning: inheritance model ignored on partial specialization + template <> struct __single_inheritance A; + +Note that choosing an inheritance model less general than strictly necessary is +an error: + +.. code-block:: c++ + + struct __multiple_inheritance S; // error: inheritance model does not match definition + int S::*i; + struct S {}; + + +novtable +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","X","", "", "" + +This attribute can be added to a class declaration or definition to signal to +the compiler that constructors and destructors will not reference the virtual +function table. It is only supported when using the Microsoft C++ ABI. + + +objc_subclassing_restricted +--------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +This attribute can be added to an Objective-C ``@interface`` declaration to +ensure that this class cannot be subclassed. + + +transparent_union (gnu::transparent_union) +------------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +This attribute can be applied to a union to change the behaviour of calls to +functions that have an argument with a transparent union type. The compiler +behaviour is changed in the following manner: + +- A value whose type is any member of the transparent union can be passed as an + argument without the need to cast that value. + +- The argument is passed to the function using the calling convention of the + first member of the transparent union. Consequently, all the members of the + transparent union should have the same calling convention as its first member. + +Transparent unions are not supported in C++. + + +Statement Attributes +==================== + + +fallthrough, clang::fallthrough +------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","X","","", "", "" + +The ``fallthrough`` (or ``clang::fallthrough``) attribute is used +to annotate intentional fall-through +between switch labels. It can only be applied to a null statement placed at a +point of execution between any statement and the next switch label. It is +common to mark these places with a specific comment, but this attribute is +meant to replace comments with a more strict annotation, which can be checked +by the compiler. This attribute doesn't change semantics of the code and can +be used wherever an intended fall-through occurs. It is designed to mimic +control-flow statements like ``break;``, so it can be placed in most places +where ``break;`` can, but only if there are no statements on the execution path +between it and the next switch label. + +By default, Clang does not warn on unannotated fallthrough from one ``switch`` +case to another. Diagnostics on fallthrough without a corresponding annotation +can be enabled with the ``-Wimplicit-fallthrough`` argument. + +Here is an example: + +.. code-block:: c++ + + // compile with -Wimplicit-fallthrough + switch (n) { + case 22: + case 33: // no warning: no statements between case labels + f(); + case 44: // warning: unannotated fall-through + g(); + [[clang::fallthrough]]; + case 55: // no warning + if (x) { + h(); + break; + } + else { + i(); + [[clang::fallthrough]]; + } + case 66: // no warning + p(); + [[clang::fallthrough]]; // warning: fallthrough annotation does not + // directly precede case label + q(); + case 77: // warning: unannotated fall-through + r(); + } + + +#pragma clang loop +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","", "X", "" + +The ``#pragma clang loop`` directive allows loop optimization hints to be +specified for the subsequent loop. The directive allows vectorization, +interleaving, and unrolling to be enabled or disabled. Vector width as well +as interleave and unrolling count can be manually specified. See +`language extensions +`_ +for details. + + +#pragma unroll, #pragma nounroll +-------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","", "X", "" + +Loop unrolling optimization hints can be specified with ``#pragma unroll`` and +``#pragma nounroll``. The pragma is placed immediately before a for, while, +do-while, or c++11 range-based for loop. + +Specifying ``#pragma unroll`` without a parameter directs the loop unroller to +attempt to fully unroll the loop if the trip count is known at compile time and +attempt to partially unroll the loop if the trip count is not known at compile +time: + +.. code-block:: c++ + + #pragma unroll + for (...) { + ... + } + +Specifying the optional parameter, ``#pragma unroll _value_``, directs the +unroller to unroll the loop ``_value_`` times. The parameter may optionally be +enclosed in parentheses: + +.. code-block:: c++ + + #pragma unroll 16 + for (...) { + ... + } + + #pragma unroll(16) + for (...) { + ... + } + +Specifying ``#pragma nounroll`` indicates that the loop should not be unrolled: + +.. code-block:: c++ + + #pragma nounroll + for (...) { + ... + } + +``#pragma unroll`` and ``#pragma unroll _value_`` have identical semantics to +``#pragma clang loop unroll(full)`` and +``#pragma clang loop unroll_count(_value_)`` respectively. ``#pragma nounroll`` +is equivalent to ``#pragma clang loop unroll(disable)``. See +`language extensions +`_ +for further details including limitations of the unroll hints. + + +__read_only, __write_only, __read_write (read_only, write_only, read_write) +--------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The access qualifiers must be used with image object arguments or pipe arguments +to declare if they are being read or written by a kernel or function. + +The read_only/__read_only, write_only/__write_only and read_write/__read_write +names are reserved for use as access qualifiers and shall not be used otherwise. + +.. code-block:: c + + kernel void + foo (read_only image2d_t imageA, + write_only image2d_t imageB) { + ... + } + +In the above example imageA is a read-only 2D image object, and imageB is a +write-only 2D image object. + +The read_write (or __read_write) qualifier can not be used with pipe. + +More details can be found in the OpenCL C language Spec v2.0, Section 6.6. + + +__attribute__((intel_reqd_sub_group_size)) +------------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +The optional attribute intel_reqd_sub_group_size can be used to indicate that +the kernel must be compiled and executed with the specified subgroup size. When +this attribute is present, get_max_sub_group_size() is guaranteed to return the +specified integer value. This is important for the correctness of many subgroup +algorithms, and in some cases may be used by the compiler to generate more optimal +code. See `cl_intel_required_subgroup_size +` +for details. + + +__attribute__((opencl_unroll_hint)) +----------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +The opencl_unroll_hint attribute qualifier can be used to specify that a loop +(for, while and do loops) can be unrolled. This attribute qualifier can be +used to specify full unrolling or partial unrolling by a specified amount. +This is a compiler hint and the compiler may ignore this directive. See +`OpenCL v2.0 `_ +s6.11.5 for details. + + +suppress (gsl::suppress) +------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","X","","", "", "" + +The ``[[gsl::suppress]]`` attribute suppresses specific +clang-tidy diagnostics for rules of the `C++ Core Guidelines`_ in a portable +way. The attribute can be attached to declarations, statements, and at +namespace scope. + +.. code-block:: c++ + + [[gsl::suppress("Rh-public")]] + void f_() { + int *p; + [[gsl::suppress("type")]] { + p = reinterpret_cast(7); + } + } + namespace N { + [[clang::suppress("type", "bounds")]]; + ... + } + +.. _`C++ Core Guidelines`: https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#inforce-enforcement + + +Consumed Annotation Checking +============================ +Clang supports additional attributes for checking basic resource management +properties, specifically for unique objects that have a single owning reference. +The following attributes are currently supported, although **the implementation +for these annotations is currently in development and are subject to change.** + +callable_when +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Use ``__attribute__((callable_when(...)))`` to indicate what states a method +may be called in. Valid states are unconsumed, consumed, or unknown. Each +argument to this attribute must be a quoted string. E.g.: + +``__attribute__((callable_when("unconsumed", "unknown")))`` + + +consumable +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Each ``class`` that uses any of the typestate annotations must first be marked +using the ``consumable`` attribute. Failure to do so will result in a warning. + +This attribute accepts a single parameter that must be one of the following: +``unknown``, ``consumed``, or ``unconsumed``. + + +param_typestate +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +This attribute specifies expectations about function parameters. Calls to an +function with annotated parameters will issue a warning if the corresponding +argument isn't in the expected state. The attribute is also used to set the +initial state of the parameter when analyzing the function's body. + + +return_typestate +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +The ``return_typestate`` attribute can be applied to functions or parameters. +When applied to a function the attribute specifies the state of the returned +value. The function's body is checked to ensure that it always returns a value +in the specified state. On the caller side, values returned by the annotated +function are initialized to the given state. + +When applied to a function parameter it modifies the state of an argument after +a call to the function returns. The function's body is checked to ensure that +the parameter is in the expected state before returning. + + +set_typestate +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Annotate methods that transition an object into a new state with +``__attribute__((set_typestate(new_state)))``. The new state must be +unconsumed, consumed, or unknown. + + +test_typestate +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Use ``__attribute__((test_typestate(tested_state)))`` to indicate that a method +returns true if the object is in the specified state.. + + +AMD GPU Attributes +================== + + +amdgpu_flat_work_group_size +--------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +The flat work-group size is the number of work-items in the work-group size +specified when the kernel is dispatched. It is the product of the sizes of the +x, y, and z dimension of the work-group. + +Clang supports the +``__attribute__((amdgpu_flat_work_group_size(, )))`` attribute for the +AMDGPU target. This attribute may be attached to a kernel function definition +and is an optimization hint. + +```` parameter specifies the minimum flat work-group size, and ```` +parameter specifies the maximum flat work-group size (must be greater than +````) to which all dispatches of the kernel will conform. Passing ``0, 0`` +as ``, `` implies the default behavior (``128, 256``). + +If specified, the AMDGPU target backend might be able to produce better machine +code for barriers and perform scratch promotion by estimating available group +segment size. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes. + + +amdgpu_num_sgpr +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Clang supports the ``__attribute__((amdgpu_num_sgpr()))`` and +``__attribute__((amdgpu_num_vgpr()))`` attributes for the AMDGPU +target. These attributes may be attached to a kernel function definition and are +an optimization hint. + +If these attributes are specified, then the AMDGPU target backend will attempt +to limit the number of SGPRs and/or VGPRs used to the specified value(s). The +number of used SGPRs and/or VGPRs may further be rounded up to satisfy the +allocation requirements or constraints of the subtarget. Passing ``0`` as +``num_sgpr`` and/or ``num_vgpr`` implies the default behavior (no limits). + +These attributes can be used to test the AMDGPU target backend. It is +recommended that the ``amdgpu_waves_per_eu`` attribute be used to control +resources such as SGPRs and VGPRs since it is aware of the limits for different +subtargets. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + + +amdgpu_num_vgpr +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +Clang supports the ``__attribute__((amdgpu_num_sgpr()))`` and +``__attribute__((amdgpu_num_vgpr()))`` attributes for the AMDGPU +target. These attributes may be attached to a kernel function definition and are +an optimization hint. + +If these attributes are specified, then the AMDGPU target backend will attempt +to limit the number of SGPRs and/or VGPRs used to the specified value(s). The +number of used SGPRs and/or VGPRs may further be rounded up to satisfy the +allocation requirements or constraints of the subtarget. Passing ``0`` as +``num_sgpr`` and/or ``num_vgpr`` implies the default behavior (no limits). + +These attributes can be used to test the AMDGPU target backend. It is +recommended that the ``amdgpu_waves_per_eu`` attribute be used to control +resources such as SGPRs and VGPRs since it is aware of the limits for different +subtargets. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + + +amdgpu_waves_per_eu +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "X" + +A compute unit (CU) is responsible for executing the wavefronts of a work-group. +It is composed of one or more execution units (EU), which are responsible for +executing the wavefronts. An EU can have enough resources to maintain the state +of more than one executing wavefront. This allows an EU to hide latency by +switching between wavefronts in a similar way to symmetric multithreading on a +CPU. In order to allow the state for multiple wavefronts to fit on an EU, the +resources used by a single wavefront have to be limited. For example, the number +of SGPRs and VGPRs. Limiting such resources can allow greater latency hiding, +but can result in having to spill some register state to memory. + +Clang supports the ``__attribute__((amdgpu_waves_per_eu([, ])))`` +attribute for the AMDGPU target. This attribute may be attached to a kernel +function definition and is an optimization hint. + +```` parameter specifies the requested minimum number of waves per EU, and +*optional* ```` parameter specifies the requested maximum number of waves +per EU (must be greater than ```` if specified). If ```` is omitted, +then there is no restriction on the maximum number of waves per EU other than +the one dictated by the hardware for which the kernel is compiled. Passing +``0, 0`` as ``, `` implies the default behavior (no limits). + +If specified, this attribute allows an advanced developer to tune the number of +wavefronts that are capable of fitting within the resources of an EU. The AMDGPU +target backend can use this information to limit resources, such as number of +SGPRs, number of VGPRs, size of available group and private memory segments, in +such a way that guarantees that at least ```` wavefronts and at most +```` wavefronts are able to fit within the resources of an EU. Requesting +more wavefronts can hide memory latency but limits available registers which +can result in spilling. Requesting fewer wavefronts can help reduce cache +thrashing, but can reduce memory latency hiding. + +This attribute controls the machine code generated by the AMDGPU target backend +to ensure it is capable of meeting the requested values. However, when the +kernel is executed, there may be other reasons that prevent meeting the request, +for example, there may be wavefronts from other kernels executing on the EU. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + + +Calling Conventions +=================== +Clang supports several different calling conventions, depending on the target +platform and architecture. The calling convention used for a function determines +how parameters are passed, how results are returned to the caller, and other +low-level details of calling a function. + +fastcall (gnu::fastcall, __fastcall, _fastcall) +----------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","X", "", "" + +On 32-bit x86 targets, this attribute changes the calling convention of a +function to use ECX and EDX as register parameters and clear parameters off of +the stack on return. This convention does not support variadic calls or +unprototyped functions in C, and has no effect on x86_64 targets. This calling +convention is supported primarily for compatibility with existing code. Users +seeking register parameters should use the ``regparm`` attribute, which does +not require callee-cleanup. See the documentation for `__fastcall`_ on MSDN. + +.. _`__fastcall`: http://msdn.microsoft.com/en-us/library/6xa169sk.aspx + + +ms_abi (gnu::ms_abi) +-------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +On non-Windows x86_64 targets, this attribute changes the calling convention of +a function to match the default convention used on Windows x86_64. This +attribute has no effect on Windows targets or non-x86_64 targets. + + +pcs (gnu::pcs) +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +On ARM targets, this attribute can be used to select calling conventions +similar to ``stdcall`` on x86. Valid parameter values are "aapcs" and +"aapcs-vfp". + + +preserve_all +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +On X86-64 and AArch64 targets, this attribute changes the calling convention of +a function. The ``preserve_all`` calling convention attempts to make the code +in the caller even less intrusive than the ``preserve_most`` calling convention. +This calling convention also behaves identical to the ``C`` calling convention +on how arguments and return values are passed, but it uses a different set of +caller/callee-saved registers. This removes the burden of saving and +recovering a large register set before and after the call in the caller. If +the arguments are passed in callee-saved registers, then they will be +preserved by the callee across the call. This doesn't apply for values +returned in callee-saved registers. + +- On X86-64 the callee preserves all general purpose registers, except for + R11. R11 can be used as a scratch register. Furthermore it also preserves + all floating-point registers (XMMs/YMMs). + +The idea behind this convention is to support calls to runtime functions +that don't need to call out to any other functions. + +This calling convention, like the ``preserve_most`` calling convention, will be +used by a future version of the Objective-C runtime and should be considered +experimental at this time. + + +preserve_most +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +On X86-64 and AArch64 targets, this attribute changes the calling convention of +a function. The ``preserve_most`` calling convention attempts to make the code +in the caller as unintrusive as possible. This convention behaves identically +to the ``C`` calling convention on how arguments and return values are passed, +but it uses a different set of caller/callee-saved registers. This alleviates +the burden of saving and recovering a large register set before and after the +call in the caller. If the arguments are passed in callee-saved registers, +then they will be preserved by the callee across the call. This doesn't +apply for values returned in callee-saved registers. + +- On X86-64 the callee preserves all general purpose registers, except for + R11. R11 can be used as a scratch register. Floating-point registers + (XMMs/YMMs) are not preserved and need to be saved by the caller. + +The idea behind this convention is to support calls to runtime functions +that have a hot path and a cold path. The hot path is usually a small piece +of code that doesn't use many registers. The cold path might need to call out to +another function and therefore only needs to preserve the caller-saved +registers, which haven't already been saved by the caller. The +`preserve_most` calling convention is very similar to the ``cold`` calling +convention in terms of caller/callee-saved registers, but they are used for +different types of function calls. ``coldcc`` is for function calls that are +rarely executed, whereas `preserve_most` function calls are intended to be +on the hot path and definitely executed a lot. Furthermore ``preserve_most`` +doesn't prevent the inliner from inlining the function call. + +This calling convention will be used by a future version of the Objective-C +runtime and should therefore still be considered experimental at this time. +Although this convention was created to optimize certain runtime calls to +the Objective-C runtime, it is not limited to this runtime and might be used +by other runtimes in the future too. The current implementation only +supports X86-64 and AArch64, but the intention is to support more architectures +in the future. + + +regcall (gnu::regcall, __regcall) +--------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","X", "", "" + +On x86 targets, this attribute changes the calling convention to +`__regcall`_ convention. This convention aims to pass as many arguments +as possible in registers. It also tries to utilize registers for the +return value whenever it is possible. + +.. _`__regcall`: https://software.intel.com/en-us/node/693069 + + +regparm (gnu::regparm) +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +On 32-bit x86 targets, the regparm attribute causes the compiler to pass +the first three integer parameters in EAX, EDX, and ECX instead of on the +stack. This attribute has no effect on variadic functions, and all parameters +are passed via the stack as normal. + + +stdcall (gnu::stdcall, __stdcall, _stdcall) +------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","X", "", "" + +On 32-bit x86 targets, this attribute changes the calling convention of a +function to clear parameters off of the stack on return. This convention does +not support variadic calls or unprototyped functions in C, and has no effect on +x86_64 targets. This calling convention is used widely by the Windows API and +COM applications. See the documentation for `__stdcall`_ on MSDN. + +.. _`__stdcall`: http://msdn.microsoft.com/en-us/library/zxk0tw93.aspx + + +thiscall (gnu::thiscall, __thiscall, _thiscall) +----------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","X", "", "" + +On 32-bit x86 targets, this attribute changes the calling convention of a +function to use ECX for the first parameter (typically the implicit ``this`` +parameter of C++ methods) and clear parameters off of the stack on return. This +convention does not support variadic calls or unprototyped functions in C, and +has no effect on x86_64 targets. See the documentation for `__thiscall`_ on +MSDN. + +.. _`__thiscall`: http://msdn.microsoft.com/en-us/library/ek8tkfbw.aspx + + +vectorcall (__vectorcall, _vectorcall) +-------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","X", "", "" + +On 32-bit x86 *and* x86_64 targets, this attribute changes the calling +convention of a function to pass vector parameters in SSE registers. + +On 32-bit x86 targets, this calling convention is similar to ``__fastcall``. +The first two integer parameters are passed in ECX and EDX. Subsequent integer +parameters are passed in memory, and callee clears the stack. On x86_64 +targets, the callee does *not* clear the stack, and integer parameters are +passed in RCX, RDX, R8, and R9 as is done for the default Windows x64 calling +convention. + +On both 32-bit x86 and x86_64 targets, vector and floating point arguments are +passed in XMM0-XMM5. Homogeneous vector aggregates of up to four elements are +passed in sequential SSE registers if enough are available. If AVX is enabled, +256 bit vectors are passed in YMM0-YMM5. Any vector or aggregate type that +cannot be passed in registers for any reason is passed by reference, which +allows the caller to align the parameter memory. + +See the documentation for `__vectorcall`_ on MSDN for more details. + +.. _`__vectorcall`: http://msdn.microsoft.com/en-us/library/dn375768.aspx + + +Type Safety Checking +==================== +Clang supports additional attributes to enable checking type safety properties +that can't be enforced by the C type system. To see warnings produced by these +checks, ensure that -Wtype-safety is enabled. Use cases include: + +* MPI library implementations, where these attributes enable checking that + the buffer type matches the passed ``MPI_Datatype``; +* for HDF5 library there is a similar use case to MPI; +* checking types of variadic functions' arguments for functions like + ``fcntl()`` and ``ioctl()``. + +You can detect support for these attributes with ``__has_attribute()``. For +example: + +.. code-block:: c++ + + #if defined(__has_attribute) + # if __has_attribute(argument_with_type_tag) && \ + __has_attribute(pointer_with_type_tag) && \ + __has_attribute(type_tag_for_datatype) + # define ATTR_MPI_PWT(buffer_idx, type_idx) __attribute__((pointer_with_type_tag(mpi,buffer_idx,type_idx))) + /* ... other macros ... */ + # endif + #endif + + #if !defined(ATTR_MPI_PWT) + # define ATTR_MPI_PWT(buffer_idx, type_idx) + #endif + + int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) + ATTR_MPI_PWT(1,3); + +argument_with_type_tag +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +Use ``__attribute__((argument_with_type_tag(arg_kind, arg_idx, +type_tag_idx)))`` on a function declaration to specify that the function +accepts a type tag that determines the type of some other argument. + +This attribute is primarily useful for checking arguments of variadic functions +(``pointer_with_type_tag`` can be used in most non-variadic cases). + +In the attribute prototype above: + * ``arg_kind`` is an identifier that should be used when annotating all + applicable type tags. + * ``arg_idx`` provides the position of a function argument. The expected type of + this function argument will be determined by the function argument specified + by ``type_tag_idx``. In the code example below, "3" means that the type of the + function's third argument will be determined by ``type_tag_idx``. + * ``type_tag_idx`` provides the position of a function argument. This function + argument will be a type tag. The type tag will determine the expected type of + the argument specified by ``arg_idx``. In the code example below, "2" means + that the type tag associated with the function's second argument should agree + with the type of the argument specified by ``arg_idx``. + +For example: + +.. code-block:: c++ + + int fcntl(int fd, int cmd, ...) + __attribute__(( argument_with_type_tag(fcntl,3,2) )); + // The function's second argument will be a type tag; this type tag will + // determine the expected type of the function's third argument. + + +pointer_with_type_tag +--------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +Use ``__attribute__((pointer_with_type_tag(ptr_kind, ptr_idx, type_tag_idx)))`` +on a function declaration to specify that the function accepts a type tag that +determines the pointee type of some other pointer argument. + +In the attribute prototype above: + * ``ptr_kind`` is an identifier that should be used when annotating all + applicable type tags. + * ``ptr_idx`` provides the position of a function argument; this function + argument will have a pointer type. The expected pointee type of this pointer + type will be determined by the function argument specified by + ``type_tag_idx``. In the code example below, "1" means that the pointee type + of the function's first argument will be determined by ``type_tag_idx``. + * ``type_tag_idx`` provides the position of a function argument; this function + argument will be a type tag. The type tag will determine the expected pointee + type of the pointer argument specified by ``ptr_idx``. In the code example + below, "3" means that the type tag associated with the function's third + argument should agree with the pointee type of the pointer argument specified + by ``ptr_idx``. + +For example: + +.. code-block:: c++ + + typedef int MPI_Datatype; + int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) + __attribute__(( pointer_with_type_tag(mpi,1,3) )); + // The function's 3rd argument will be a type tag; this type tag will + // determine the expected pointee type of the function's 1st argument. + + +type_tag_for_datatype +--------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","","","", "", "" + +When declaring a variable, use +``__attribute__((type_tag_for_datatype(kind, type)))`` to create a type tag that +is tied to the ``type`` argument given to the attribute. + +In the attribute prototype above: + * ``kind`` is an identifier that should be used when annotating all applicable + type tags. + * ``type`` indicates the name of the type. + +Clang supports annotating type tags of two forms. + + * **Type tag that is a reference to a declared identifier.** + Use ``__attribute__((type_tag_for_datatype(kind, type)))`` when declaring that + identifier: + + .. code-block:: c++ + + typedef int MPI_Datatype; + extern struct mpi_datatype mpi_datatype_int + __attribute__(( type_tag_for_datatype(mpi,int) )); + #define MPI_INT ((MPI_Datatype) &mpi_datatype_int) + // &mpi_datatype_int is a type tag. It is tied to type "int". + + * **Type tag that is an integral literal.** + Declare a ``static const`` variable with an initializer value and attach + ``__attribute__((type_tag_for_datatype(kind, type)))`` on that declaration: + + .. code-block:: c++ + + typedef int MPI_Datatype; + static const MPI_Datatype mpi_datatype_int + __attribute__(( type_tag_for_datatype(mpi,int) )) = 42; + #define MPI_INT ((MPI_Datatype) 42) + // The number 42 is a type tag. It is tied to type "int". + + +The ``type_tag_for_datatype`` attribute also accepts an optional third argument +that determines how the type of the function argument specified by either +``arg_idx`` or ``ptr_idx`` is compared against the type associated with the type +tag. (Recall that for the ``argument_with_type_tag`` attribute, the type of the +function argument specified by ``arg_idx`` is compared against the type +associated with the type tag. Also recall that for the ``pointer_with_type_tag`` +attribute, the pointee type of the function argument specified by ``ptr_idx`` is +compared against the type associated with the type tag.) There are two supported +values for this optional third argument: + + * ``layout_compatible`` will cause types to be compared according to + layout-compatibility rules (In C++11 [class.mem] p 17, 18, see the + layout-compatibility rules for two standard-layout struct types and for two + standard-layout union types). This is useful when creating a type tag + associated with a struct or union type. For example: + + .. code-block:: c++ + + /* In mpi.h */ + typedef int MPI_Datatype; + struct internal_mpi_double_int { double d; int i; }; + extern struct mpi_datatype mpi_datatype_double_int + __attribute__(( type_tag_for_datatype(mpi, + struct internal_mpi_double_int, layout_compatible) )); + + #define MPI_DOUBLE_INT ((MPI_Datatype) &mpi_datatype_double_int) + + int MPI_Send(void *buf, int count, MPI_Datatype datatype, ...) + __attribute__(( pointer_with_type_tag(mpi,1,3) )); + + /* In user code */ + struct my_pair { double a; int b; }; + struct my_pair *buffer; + MPI_Send(buffer, 1, MPI_DOUBLE_INT /*, ... */); // no warning because the + // layout of my_pair is + // compatible with that of + // internal_mpi_double_int + + struct my_int_pair { int a; int b; } + struct my_int_pair *buffer2; + MPI_Send(buffer2, 1, MPI_DOUBLE_INT /*, ... */); // warning because the + // layout of my_int_pair + // does not match that of + // internal_mpi_double_int + + * ``must_be_null`` specifies that the function argument specified by either + ``arg_idx`` (for the ``argument_with_type_tag`` attribute) or ``ptr_idx`` (for + the ``pointer_with_type_tag`` attribute) should be a null pointer constant. + The second argument to the ``type_tag_for_datatype`` attribute is ignored. For + example: + + .. code-block:: c++ + + /* In mpi.h */ + typedef int MPI_Datatype; + extern struct mpi_datatype mpi_datatype_null + __attribute__(( type_tag_for_datatype(mpi, void, must_be_null) )); + + #define MPI_DATATYPE_NULL ((MPI_Datatype) &mpi_datatype_null) + int MPI_Send(void *buf, int count, MPI_Datatype datatype, ...) + __attribute__(( pointer_with_type_tag(mpi,1,3) )); + + /* In user code */ + struct my_pair { double a; int b; }; + struct my_pair *buffer; + MPI_Send(buffer, 1, MPI_DATATYPE_NULL /*, ... */); // warning: MPI_DATATYPE_NULL + // was specified but buffer + // is not a null pointer + + +OpenCL Address Spaces +===================== +The address space qualifier may be used to specify the region of memory that is +used to allocate the object. OpenCL supports the following address spaces: +__generic(generic), __global(global), __local(local), __private(private), +__constant(constant). + + .. code-block:: c + + __constant int c = ...; + + __generic int* foo(global int* g) { + __local int* l; + private int p; + ... + return l; + } + +More details can be found in the OpenCL C language Spec v2.0, Section 6.5. + +constant (__constant) +--------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The constant address space attribute signals that an object is located in +a constant (non-modifiable) memory region. It is available to all work items. +Any type can be annotated with the constant address space attribute. Objects +with the constant address space qualifier can be declared in any scope and must +have an initializer. + + +generic (__generic) +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The generic address space attribute is only available with OpenCL v2.0 and later. +It can be used with pointer types. Variables in global and local scope and +function parameters in non-kernel functions can have the generic address space +type attribute. It is intended to be a placeholder for any other address space +except for '__constant' in OpenCL code which can be used with multiple address +spaces. + + +global (__global) +----------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The global address space attribute specifies that an object is allocated in +global memory, which is accessible by all work items. The content stored in this +memory area persists between kernel executions. Pointer types to the global +address space are allowed as function parameters or local variables. Starting +with OpenCL v2.0, the global address space can be used with global (program +scope) variables and static local variable as well. + + +local (__local) +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The local address space specifies that an object is allocated in the local (work +group) memory area, which is accessible to all work items in the same work +group. The content stored in this memory region is not accessible after +the kernel execution ends. In a kernel function scope, any variable can be in +the local address space. In other scopes, only pointer types to the local address +space are allowed. Local address space variables cannot have an initializer. + + +private (__private) +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The private address space specifies that an object is allocated in the private +(work item) memory. Other work items cannot access the same memory area and its +content is destroyed after work item execution ends. Local variables can be +declared in the private address space. Function arguments are always in the +private address space. Kernel function arguments of a pointer or an array type +cannot point to the private address space. + + +Nullability Attributes +====================== +Whether a particular pointer may be "null" is an important concern when working with pointers in the C family of languages. The various nullability attributes indicate whether a particular pointer can be null or not, which makes APIs more expressive and can help static analysis tools identify bugs involving null pointers. Clang supports several kinds of nullability attributes: the ``nonnull`` and ``returns_nonnull`` attributes indicate which function or method parameters and result types can never be null, while nullability type qualifiers indicate which pointer types can be null (``_Nullable``) or cannot be null (``_Nonnull``). + +The nullability (type) qualifiers express whether a value of a given pointer type can be null (the ``_Nullable`` qualifier), doesn't have a defined meaning for null (the ``_Nonnull`` qualifier), or for which the purpose of null is unclear (the ``_Null_unspecified`` qualifier). Because nullability qualifiers are expressed within the type system, they are more general than the ``nonnull`` and ``returns_nonnull`` attributes, allowing one to express (for example) a nullable pointer to an array of nonnull pointers. Nullability qualifiers are written to the right of the pointer to which they apply. For example: + + .. code-block:: c + + // No meaningful result when 'ptr' is null (here, it happens to be undefined behavior). + int fetch(int * _Nonnull ptr) { return *ptr; } + + // 'ptr' may be null. + int fetch_or_zero(int * _Nullable ptr) { + return ptr ? *ptr : 0; + } + + // A nullable pointer to non-null pointers to const characters. + const char *join_strings(const char * _Nonnull * _Nullable strings, unsigned n); + +In Objective-C, there is an alternate spelling for the nullability qualifiers that can be used in Objective-C methods and properties using context-sensitive, non-underscored keywords. For example: + + .. code-block:: objective-c + + @interface NSView : NSResponder + - (nullable NSView *)ancestorSharedWithView:(nonnull NSView *)aView; + @property (assign, nullable) NSView *superview; + @property (readonly, nonnull) NSArray *subviews; + @end + +nonnull (gnu::nonnull) +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "" + +The ``nonnull`` attribute indicates that some function parameters must not be null, and can be used in several different ways. It's original usage (`from GCC `_) is as a function (or Objective-C method) attribute that specifies which parameters of the function are nonnull in a comma-separated list. For example: + + .. code-block:: c + + extern void * my_memcpy (void *dest, const void *src, size_t len) + __attribute__((nonnull (1, 2))); + +Here, the ``nonnull`` attribute indicates that parameters 1 and 2 +cannot have a null value. Omitting the parenthesized list of parameter indices means that all parameters of pointer type cannot be null: + + .. code-block:: c + + extern void * my_memcpy (void *dest, const void *src, size_t len) + __attribute__((nonnull)); + +Clang also allows the ``nonnull`` attribute to be placed directly on a function (or Objective-C method) parameter, eliminating the need to specify the parameter index ahead of type. For example: + + .. code-block:: c + + extern void * my_memcpy (void *dest __attribute__((nonnull)), + const void *src __attribute__((nonnull)), size_t len); + +Note that the ``nonnull`` attribute indicates that passing null to a non-null parameter is undefined behavior, which the optimizer may take advantage of to, e.g., remove null checks. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable. + + +returns_nonnull (gnu::returns_nonnull) +-------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "X","X","","", "", "X" + +The ``returns_nonnull`` attribute indicates that a particular function (or Objective-C method) always returns a non-null pointer. For example, a particular system ``malloc`` might be defined to terminate a process when memory is not available rather than returning a null pointer: + + .. code-block:: c + + extern void * malloc (size_t size) __attribute__((returns_nonnull)); + +The ``returns_nonnull`` attribute implies that returning a null pointer is undefined behavior, which the optimizer may take advantage of. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable + + +_Nonnull +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The ``_Nonnull`` nullability qualifier indicates that null is not a meaningful value for a value of the ``_Nonnull`` pointer type. For example, given a declaration such as: + + .. code-block:: c + + int fetch(int * _Nonnull ptr); + +a caller of ``fetch`` should not provide a null value, and the compiler will produce a warning if it sees a literal null value passed to ``fetch``. Note that, unlike the declaration attribute ``nonnull``, the presence of ``_Nonnull`` does not imply that passing null is undefined behavior: ``fetch`` is free to consider null undefined behavior or (perhaps for backward-compatibility reasons) defensively handle null. + + +_Null_unspecified +----------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The ``_Null_unspecified`` nullability qualifier indicates that neither the ``_Nonnull`` nor ``_Nullable`` qualifiers make sense for a particular pointer type. It is used primarily to indicate that the role of null with specific pointers in a nullability-annotated header is unclear, e.g., due to overly-complex implementations or historical factors with a long-lived API. + + +_Nullable +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma", "Pragma clang attribute" + + "","","","X", "", "" + +The ``_Nullable`` nullability qualifier indicates that a value of the ``_Nullable`` pointer type can be null. For example, given: + + .. code-block:: c + + int fetch_or_zero(int * _Nullable ptr); + +a caller of ``fetch_or_zero`` can provide null. + + diff --git a/interpreter/llvm/src/tools/clang/docs/Block-ABI-Apple.rst b/interpreter/llvm/src/tools/clang/docs/Block-ABI-Apple.rst index 628e6f3d90ba5..7f49bbd40d71c 100644 --- a/interpreter/llvm/src/tools/clang/docs/Block-ABI-Apple.rst +++ b/interpreter/llvm/src/tools/clang/docs/Block-ABI-Apple.rst @@ -856,15 +856,15 @@ mentioned above, call: .. code-block:: c - _Block_object_assign(&dst->target, src->target, BLOCK_FIELD_); + _Block_object_assign(&dst->target, src->target, BLOCK_FIELD_); in the copy helper and: .. code-block:: c - _Block_object_dispose(->target, BLOCK_FIELD_); + _Block_object_dispose(->target, BLOCK_FIELD_); -in the dispose helper where ```` is: +in the dispose helper where ```` is: .. code-block:: c @@ -888,7 +888,7 @@ and functions are generated in the same manner. Under ObjC we allow ``__weak`` as an attribute on ``__block`` variables, and this causes the addition of ``BLOCK_FIELD_IS_WEAK`` orred onto the ``BLOCK_FIELD_IS_BYREF`` flag when copying the ``block_byref`` structure in the -``Block`` copy helper, and onto the ``BLOCK_FIELD_`` field within the +``Block`` copy helper, and onto the ``BLOCK_FIELD_`` field within the ``block_byref`` copy/dispose helper calls. The prototypes, and summary, of the helper functions are: diff --git a/interpreter/llvm/src/tools/clang/docs/ClangCommandLineReference.rst b/interpreter/llvm/src/tools/clang/docs/ClangCommandLineReference.rst index add168829e1ba..d964e34b98bf6 100644 --- a/interpreter/llvm/src/tools/clang/docs/ClangCommandLineReference.rst +++ b/interpreter/llvm/src/tools/clang/docs/ClangCommandLineReference.rst @@ -96,6 +96,8 @@ Emit ARC errors even if the migrator can fix them Output path for the plist report +.. option:: --autocomplete= + .. option:: -bind\_at\_load .. option:: -bundle @@ -292,7 +294,7 @@ Disable builtin #include directories .. option:: -nomultidefs -.. option:: -nopie +.. option:: -nopie, -no-pie .. option:: -noprebind @@ -654,6 +656,10 @@ Pass to the assembler Pass to the clang compiler +.. option:: -fclang-abi-compat= + +Attempt to match the ABI of Clang + .. option:: -fcomment-block-commands=,... Treat each comma separated argument in as a documentation comment block command @@ -704,6 +710,10 @@ Don't use blacklist file for sanitizers Level of field padding for AddressSanitizer +.. option:: -fsanitize-address-globals-dead-stripping + +Enable linker dead stripping of globals in AddressSanitizer + .. option:: -fsanitize-address-use-after-scope, -fno-sanitize-address-use-after-scope Enable use-after-scope detection in AddressSanitizer @@ -1071,6 +1081,10 @@ Target-independent compilation options Enable C++17 aligned allocation functions +.. option:: -fallow-editor-placeholders, -fno-allow-editor-placeholders + +Treat editor placeholders as valid source code + .. option:: -fallow-unsupported .. option:: -faltivec, -fno-altivec @@ -1205,6 +1219,10 @@ Print absolute paths in diagnostics .. option:: -fdiagnostics-color= .. program:: clang +.. option:: -fdiagnostics-hotness-threshold= + +Prevent optimization remarks from being output if they do not have at least this profile count + .. option:: -fdiagnostics-show-hotness, -fno-diagnostics-show-hotness Enable profile hotness information in diagnostic line @@ -1585,6 +1603,8 @@ Turn on loop reroller .. option:: -fretain-comments-from-system-headers +.. option:: -frewrite-imports, -fno-rewrite-imports + .. option:: -frewrite-includes, -fno-rewrite-includes .. option:: -frewrite-map-file @@ -1639,10 +1659,6 @@ Use SjLj style exceptions Enable the superword-level parallelism vectorization passes -.. option:: -fslp-vectorize-aggressive, -fno-slp-vectorize-aggressive - -Enable the BB vectorization passes - .. option:: -fspell-checking, -fno-spell-checking .. option:: -fspell-checking-limit= @@ -1911,6 +1927,8 @@ Link stack frames through backchain on System Z .. option:: -mcpu=, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62) +.. option:: -mdefault-build-attributes, -mno-default-build-attributes + .. option:: -mdll .. option:: -mdouble-float @@ -1975,10 +1993,16 @@ Use Intel MCU ABI Generate branches with extended addressability, usually via indirect jumps. -.. option:: -mmacosx-version-min= +.. option:: -mmacosx-version-min=, -mmacos-version-min= Set Mac OS X deployment target +.. option:: -mmadd4, -mno-madd4 + +Enable the generation of 4-operand madd.s, madd.d and related instructions. + +.. option:: -mmcu= + .. option:: -mmicromips, -mno-micromips .. option:: -mms-bitfields, -mno-ms-bitfields @@ -1989,6 +2013,10 @@ Set the default structure layout to be compatible with the Microsoft compiler st Enable MSA ASE (MIPS only) +.. option:: -mmt, -mno-mt + +Enable MT ASE (MIPS only) + .. option:: -mnan= .. option:: -mno-mips16 @@ -2203,6 +2231,8 @@ X86 .. option:: -mavx512vl, -mno-avx512vl +.. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq + .. option:: -mbmi, -mno-bmi .. option:: -mbmi2, -mno-bmi2 @@ -2225,6 +2255,8 @@ X86 .. option:: -mfxsr, -mno-fxsr +.. option:: -mlwp, -mno-lwp + .. option:: -mlzcnt, -mno-lzcnt .. option:: -mmmx, -mno-mmx @@ -2372,6 +2404,16 @@ Debug information flags .. option:: -gstrict-dwarf, -gno-strict-dwarf +.. option:: -gz + +DWARF debug sections compression type + +.. program:: clang1 +.. option:: -gz= +.. program:: clang + +DWARF debug sections compression type + Static analyzer flags ===================== diff --git a/interpreter/llvm/src/tools/clang/docs/ClangFormat.rst b/interpreter/llvm/src/tools/clang/docs/ClangFormat.rst index ed0e58e452d30..902afcd08ed5f 100644 --- a/interpreter/llvm/src/tools/clang/docs/ClangFormat.rst +++ b/interpreter/llvm/src/tools/clang/docs/ClangFormat.rst @@ -120,6 +120,18 @@ entity. It operates on the current, potentially unsaved buffer and does not create or save any files. To revert a formatting, just undo. +An alternative option is to format changes when saving a file and thus to +have a zero-effort integration into the coding workflow. To do this, add this to +your `.vimrc`: + +.. code-block:: vim + + function! Formatonsave() + let l:formatdiff = 1 + pyf ~/llvm/tools/clang/tools/clang-format/clang-format.py + endfunction + autocmd BufWritePre *.h,*.cc,*.cpp call Formatonsave() + Emacs Integration ================= diff --git a/interpreter/llvm/src/tools/clang/docs/ClangFormatStyleOptions.rst b/interpreter/llvm/src/tools/clang/docs/ClangFormatStyleOptions.rst index f54acd9b81dd5..6133ca9900c97 100644 --- a/interpreter/llvm/src/tools/clang/docs/ClangFormatStyleOptions.rst +++ b/interpreter/llvm/src/tools/clang/docs/ClangFormatStyleOptions.rst @@ -309,12 +309,28 @@ the configuration (without a prefix: ``Auto``). * ``SFS_None`` (in configuration: ``None``) Never merge functions into a single line. + * ``SFS_InlineOnly`` (in configuration: ``InlineOnly``) + Only merge functions defined inside a class. Same as "inline", + except it does not implies "empty": i.e. top level empty functions + are not merged either. + + .. code-block:: c++ + + class Foo { + void f() { foo(); } + }; + void f() { + foo(); + } + void f() { + } + * ``SFS_Empty`` (in configuration: ``Empty``) Only merge empty functions. .. code-block:: c++ - void f() { bar(); } + void f() {} void f2() { bar2(); } @@ -327,6 +343,10 @@ the configuration (without a prefix: ``Auto``). class Foo { void f() { foo(); } }; + void f() { + foo(); + } + void f() {} * ``SFS_All`` (in configuration: ``All``) Merge all functions fitting on a single line. @@ -518,147 +538,159 @@ the configuration (without a prefix: ``Auto``). * ``bool AfterClass`` Wrap class definitions. - .. code-block:: c++ + .. code-block:: c++ - true: - class foo {}; + true: + class foo {}; - false: - class foo - {}; + false: + class foo + {}; * ``bool AfterControlStatement`` Wrap control statements (``if``/``for``/``while``/``switch``/..). - .. code-block:: c++ + .. code-block:: c++ - true: - if (foo()) - { - } else - {} - for (int i = 0; i < 10; ++i) - {} + true: + if (foo()) + { + } else + {} + for (int i = 0; i < 10; ++i) + {} - false: - if (foo()) { - } else { - } - for (int i = 0; i < 10; ++i) { - } + false: + if (foo()) { + } else { + } + for (int i = 0; i < 10; ++i) { + } * ``bool AfterEnum`` Wrap enum definitions. - .. code-block:: c++ + .. code-block:: c++ - true: - enum X : int - { - B - }; + true: + enum X : int + { + B + }; - false: - enum X : int { B }; + false: + enum X : int { B }; * ``bool AfterFunction`` Wrap function definitions. - .. code-block:: c++ + .. code-block:: c++ - true: - void foo() - { - bar(); - bar2(); - } + true: + void foo() + { + bar(); + bar2(); + } - false: - void foo() { - bar(); - bar2(); - } + false: + void foo() { + bar(); + bar2(); + } * ``bool AfterNamespace`` Wrap namespace definitions. - .. code-block:: c++ + .. code-block:: c++ - true: - namespace - { - int foo(); - int bar(); - } + true: + namespace + { + int foo(); + int bar(); + } - false: - namespace { - int foo(); - int bar(); - } + false: + namespace { + int foo(); + int bar(); + } * ``bool AfterObjCDeclaration`` Wrap ObjC definitions (``@autoreleasepool``, interfaces, ..). * ``bool AfterStruct`` Wrap struct definitions. - .. code-block:: c++ + .. code-block:: c++ - true: - struct foo - { - int x; - } + true: + struct foo + { + int x; + }; - false: - struct foo { - int x; - } + false: + struct foo { + int x; + }; * ``bool AfterUnion`` Wrap union definitions. - .. code-block:: c++ + .. code-block:: c++ - true: - union foo - { - int x; - } + true: + union foo + { + int x; + } - false: - union foo { - int x; - } + false: + union foo { + int x; + } * ``bool BeforeCatch`` Wrap before ``catch``. - .. code-block:: c++ + .. code-block:: c++ - true: - try { - foo(); - } - catch () { - } + true: + try { + foo(); + } + catch () { + } - false: - try { - foo(); - } catch () { - } + false: + try { + foo(); + } catch () { + } * ``bool BeforeElse`` Wrap before ``else``. - .. code-block:: c++ + .. code-block:: c++ - true: - if (foo()) { - } - else { - } + true: + if (foo()) { + } + else { + } - false: - if (foo()) { - } else { - } + false: + if (foo()) { + } else { + } * ``bool IndentBraces`` Indent the wrapped braces themselves. + * ``bool SplitEmptyFunctionBody`` If ``false``, empty function body can be put on a single line. + This option is used only if the opening brace of the function has + already been wrapped, i.e. the `AfterFunction` brace wrapping mode is + set, and the function could/should not be put on a single line (as per + `AllowShortFunctionsOnASingleLine` and constructor formatting options). + + .. code-block:: c++ + + int f() vs. inf f() + {} { + } + **BreakAfterJavaFieldAnnotations** (``bool``) Break after each annotation on a field in Java files. @@ -894,22 +926,45 @@ the configuration (without a prefix: ``Auto``). ? firstValue : SecondValueVeryVeryVeryVeryLong; - true: + false: veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongDescription ? firstValue : SecondValueVeryVeryVeryVeryLong; -**BreakConstructorInitializersBeforeComma** (``bool``) - Always break constructor initializers before commas and align - the commas with the colon. +**BreakConstructorInitializers** (``BreakConstructorInitializersStyle``) + The constructor initializers style to use. + + Possible values: + + * ``BCIS_BeforeColon`` (in configuration: ``BeforeColon``) + Break constructor initializers before the colon and after the commas. + + .. code-block:: c++ + + Constructor() + : initializer1(), + initializer2() + + * ``BCIS_BeforeComma`` (in configuration: ``BeforeComma``) + Break constructor initializers before the colon and commas, and align + the commas with the colon. + + .. code-block:: c++ + + Constructor() + : initializer1() + , initializer2() + + * ``BCIS_AfterColon`` (in configuration: ``AfterColon``) + Break constructor initializers after the colon and commas. + + .. code-block:: c++ + + Constructor() : + initializer1(), + initializer2() - .. code-block:: c++ - true: false: - SomeClass::Constructor() vs. SomeClass::Constructor() : a(a), - : a(a) b(b), - , b(b) c(c) {} - , c(c) {} **BreakStringLiterals** (``bool``) Allow breaking string literals when formatting. @@ -931,6 +986,31 @@ the configuration (without a prefix: ``Auto``). // Will leave the following line unaffected #include // FOOBAR pragma: keep +**CompactNamespaces** (``bool``) + If ``true``, consecutive namespace declarations will be on the same + line. If ``false``, each namespace is declared on a new line. + + .. code-block:: c++ + + true: + namespace Foo { namespace Bar { + }} + + false: + namespace Foo { + namespace Bar { + } + } + + If it does not fit on a single line, the overflowing namespaces get + wrapped: + + .. code-block:: c++ + + namespace Foo { namespace Bar { + namespace Extra { + }}} + **ConstructorInitializerAllOnOneLineOrOnePerLine** (``bool``) If the constructor initializers don't fit on a line, put each initializer on its own line. @@ -1321,6 +1401,9 @@ the configuration (without a prefix: ``Auto``). Add a space in front of an Objective-C protocol list, i.e. use ``Foo `` instead of ``Foo``. +**PenaltyBreakAssignment** (``unsigned``) + The penalty for breaking around an assignment operator. + **PenaltyBreakBeforeFirstCallParameter** (``unsigned``) The penalty for breaking a function call after ``call(``. @@ -1392,6 +1475,15 @@ the configuration (without a prefix: ``Auto``). #include "b.h" vs. #include "a.h" #include "a.h" #include "b.h" +**SortUsingDeclarations** (``bool``) + If ``true``, clang-format will sort using declarations. + + .. code-block:: c++ + + false: true: + using std::cout; vs. using std::cin; + using std::cin; using std::cout; + **SpaceAfterCStyleCast** (``bool``) If ``true``, a space is inserted after C style casts. diff --git a/interpreter/llvm/src/tools/clang/docs/ControlFlowIntegrityDesign.rst b/interpreter/llvm/src/tools/clang/docs/ControlFlowIntegrityDesign.rst index 69b72f9ea5b2e..e4225b35476a9 100644 --- a/interpreter/llvm/src/tools/clang/docs/ControlFlowIntegrityDesign.rst +++ b/interpreter/llvm/src/tools/clang/docs/ControlFlowIntegrityDesign.rst @@ -437,12 +437,17 @@ export this information, every DSO implements .. code-block:: none - void __cfi_check(uint64 CallSiteTypeId, void *TargetAddr) - -This function provides external modules with access to CFI checks for the -targets inside this DSO. For each known ``CallSiteTypeId``, this function -performs an ``llvm.type.test`` with the corresponding type identifier. It -aborts if the type is unknown, or if the check fails. + void __cfi_check(uint64 CallSiteTypeId, void *TargetAddr, void *DiagData) + +This function provides external modules with access to CFI checks for +the targets inside this DSO. For each known ``CallSiteTypeId``, this +function performs an ``llvm.type.test`` with the corresponding type +identifier. It reports an error if the type is unknown, or if the +check fails. Depending on the values of compiler flags +``-fsanitize-trap`` and ``-fsanitize-recover``, this function may +print an error, abort and/or return to the caller. ``DiagData`` is an +opaque pointer to the diagnostic information about the error, or +``null`` if the caller does not provide this information. The basic implementation is a large switch statement over all values of CallSiteTypeId supported by this DSO, and each case is similar to @@ -452,11 +457,10 @@ CFI Shadow ---------- To route CFI checks to the target DSO's __cfi_check function, a -mapping from possible virtual / indirect call targets to -the corresponding __cfi_check functions is maintained. This mapping is +mapping from possible virtual / indirect call targets to the +corresponding __cfi_check functions is maintained. This mapping is implemented as a sparse array of 2 bytes for every possible page (4096 -bytes) of memory. The table is kept readonly (FIXME: not yet) most of -the time. +bytes) of memory. The table is kept readonly most of the time. There are 3 types of shadow values: @@ -481,14 +485,24 @@ them. CFI_SlowPath ------------ -The slow path check is implemented in compiler-rt library as +The slow path check is implemented in a runtime support library as .. code-block:: none void __cfi_slowpath(uint64 CallSiteTypeId, void *TargetAddr) - -This functions loads a shadow value for ``TargetAddr``, finds the -address of __cfi_check as described above and calls that. + void __cfi_slowpath_diag(uint64 CallSiteTypeId, void *TargetAddr, void *DiagData) + +These functions loads a shadow value for ``TargetAddr``, finds the +address of ``__cfi_check`` as described above and calls +that. ``DiagData`` is an opaque pointer to diagnostic data which is +passed verbatim to ``__cfi_check``, and ``__cfi_slowpath`` passes +``nullptr`` instead. + +Compiler-RT library contains reference implementations of slowpath +functions, but they have unresolvable issues with correctness and +performance in the handling of dlopen(). It is recommended that +platforms provide their own implementations, usually as part of libc +or libdl. Position-independent executable requirement ------------------------------------------- diff --git a/interpreter/llvm/src/tools/clang/docs/LanguageExtensions.rst b/interpreter/llvm/src/tools/clang/docs/LanguageExtensions.rst index f8dd1a39c6761..78f987c4a8e8b 100644 --- a/interpreter/llvm/src/tools/clang/docs/LanguageExtensions.rst +++ b/interpreter/llvm/src/tools/clang/docs/LanguageExtensions.rst @@ -1271,6 +1271,87 @@ Further examples of these attributes are available in the static analyzer's `lis Query for these features with ``__has_attribute(ns_consumed)``, ``__has_attribute(ns_returns_retained)``, etc. +Objective-C @available +---------------------- + +It is possible to use the newest SDK but still build a program that can run on +older versions of macOS and iOS by passing ``-mmacosx-version-min=`` / +``-miphoneos-version-min=``. + +Before LLVM 5.0, when calling a function that exists only in the OS that's +newer than the target OS (as determined by the minimum deployment version), +programmers had to carefully check if the function exists at runtime, using +null checks for weakly-linked C functions, ``+class`` for Objective-C classes, +and ``-respondsToSelector:`` or ``+instancesRespondToSelector:`` for +Objective-C methods. If such a check was missed, the program would compile +fine, run fine on newer systems, but crash on older systems. + +As of LLVM 5.0, ``-Wunguarded-availability`` uses the `availability attributes +`_ together +with the new ``@available()`` keyword to assist with this issue. +When a method that's introduced in the OS newer than the target OS is called, a +-Wunguarded-availability warning is emitted if that call is not guarded: + +.. code-block:: objc + + void my_fun(NSSomeClass* var) { + // If fancyNewMethod was added in e.g. macOS 10.12, but the code is + // built with -mmacosx-version-min=10.11, then this unconditional call + // will emit a -Wunguarded-availability warning: + [var fancyNewMethod]; + } + +To fix the warning and to avoid the crash on macOS 10.11, wrap it in +``if(@available())``: + +.. code-block:: objc + + void my_fun(NSSomeClass* var) { + if (@available(macOS 10.12, *)) { + [var fancyNewMethod]; + } else { + // Put fallback behavior for old macOS versions (and for non-mac + // platforms) here. + } + } + +The ``*`` is required and means that platforms not explicitly listed will take +the true branch, and the compiler will emit ``-Wunguarded-availability`` +warnings for unlisted platforms based on those platform's deployment target. +More than one platform can be listed in ``@available()``: + +.. code-block:: objc + + void my_fun(NSSomeClass* var) { + if (@available(macOS 10.12, iOS 10, *)) { + [var fancyNewMethod]; + } + } + +If the caller of ``my_fun()`` already checks that ``my_fun()`` is only called +on 10.12, then add an `availability attribute +`_ to it, +which will also suppress the warning and require that calls to my_fun() are +checked: + +.. code-block:: objc + + API_AVAILABLE(macos(10.12)) void my_fun(NSSomeClass* var) { + [var fancyNewMethod]; // Now ok. + } + +``@available()`` is only available in Objective-C code. To use the feature +in C and C++ code, use the ``__builtin_available()`` spelling instead. + +If existing code uses null checks or ``-respondsToSelector:``, it should +be changed to use ``@available()`` (or ``__builtin_available``) instead. + +``-Wunguarded-availability`` is disabled by default, but +``-Wunguarded-availability-new``, which only emits this warning for APIs +that have been introduced in macOS >= 10.13, iOS >= 11, watchOS >= 4 and +tvOS >= 11, is enabled by default. + +.. _langext-overloading: Objective-C++ ABI: protocol-qualifier mangling of parameters ------------------------------------------------------------ @@ -1287,8 +1368,6 @@ parameters of protocol-qualified type. Query the presence of this new mangling with ``__has_feature(objc_protocol_qualifier_mangling)``. -.. _langext-overloading: - Initializer lists for complex numbers in C ========================================== @@ -2521,3 +2600,45 @@ whether or not an attribute is supported by the pragma by referring to the The attributes are applied to all matching declarations individually, even when the attribute is semantically incorrect. The attributes that aren't applied to any declaration are not verified semantically. + +Specifying section names for global objects (#pragma clang section) +=================================================================== + +The ``#pragma clang section`` directive provides a means to assign section-names +to global variables, functions and static variables. + +The section names can be specified as: + +.. code-block:: c++ + + #pragma clang section bss="myBSS" data="myData" rodata="myRodata" text="myText" + +The section names can be reverted back to default name by supplying an empty +string to the section kind, for example: + +.. code-block:: c++ + + #pragma clang section bss="" data="" text="" rodata="" + +The ``#pragma clang section`` directive obeys the following rules: + +* The pragma applies to all global variable, statics and function declarations + from the pragma to the end of the translation unit. + +* The pragma clang section is enabled automatically, without need of any flags. + +* This feature is only defined to work sensibly for ELF targets. + +* If section name is specified through _attribute_((section("myname"))), then + the attribute name gains precedence. + +* Global variables that are initialized to zero will be placed in the named + bss section, if one is present. + +* The ``#pragma clang section`` directive does not does try to infer section-kind + from the name. For example, naming a section "``.bss.mySec``" does NOT mean + it will be a bss section name. + +* The decision about which section-kind applies to each global is taken in the back-end. + Once the section-kind is known, appropriate section name, as specified by the user using + ``#pragma clang section`` directive, is applied to that global. diff --git a/interpreter/llvm/src/tools/clang/docs/LibASTMatchersReference.html b/interpreter/llvm/src/tools/clang/docs/LibASTMatchersReference.html index c91b2a2490290..cb5020af49c61 100644 --- a/interpreter/llvm/src/tools/clang/docs/LibASTMatchersReference.html +++ b/interpreter/llvm/src/tools/clang/docs/LibASTMatchersReference.html @@ -1859,17 +1859,44 @@

Narrowing Matchers

-Matcher<CXXBoolLiteral>equalsValueT Value -
Matches literals that are equal to the given value.
+Matcher<CXXBoolLiteralExpr>equalsValueT  Value
+
Matches literals that are equal to the given value of type ValueT.
 
-Example matches true (matcher = cxxBoolLiteral(equals(true)))
-  true
+Given
+  f('false, 3.14, 42);
+characterLiteral(equals(0))
+  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
+  match false
+floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
+  match 3.14
+integerLiteral(equals(42))
+  matches 42
+
+Note that you cannot directly match a negative numeric literal because the
+minus sign is not part of the literal: It is a unary operator whose operand
+is the positive numeric literal. Instead, you must use a unaryOperator()
+matcher to match the minus sign:
 
-Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteral>,
+unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(13))))
+
+Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
 
+Matcher<CXXBoolLiteralExpr>equalsbool Value +

+
+
+Matcher<CXXBoolLiteralExpr>equalsdouble Value
+

+
+
+Matcher<CXXBoolLiteralExpr>equalsunsigned Value
+

+
+
 Matcher<CXXCatchStmt>isCatchAll
 
Matches a C++ catch statement that has a catch-all handler.
 
@@ -2296,16 +2323,43 @@ 

Narrowing Matchers

Matcher<CharacterLiteral>equalsValueT Value -
Matches literals that are equal to the given value.
+
Matches literals that are equal to the given value of type ValueT.
 
-Example matches true (matcher = cxxBoolLiteral(equals(true)))
-  true
+Given
+  f('false, 3.14, 42);
+characterLiteral(equals(0))
+  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
+  match false
+floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
+  match 3.14
+integerLiteral(equals(42))
+  matches 42
+
+Note that you cannot directly match a negative numeric literal because the
+minus sign is not part of the literal: It is a unary operator whose operand
+is the positive numeric literal. Instead, you must use a unaryOperator()
+matcher to match the minus sign:
+
+unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(13))))
 
-Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteral>,
+Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
 
+Matcher<CharacterLiteral>equalsbool Value +

+
+
+Matcher<CharacterLiteral>equalsdouble Value
+

+
+
+Matcher<CharacterLiteral>equalsunsigned Value
+

+
+
 Matcher<ClassTemplateSpecializationDecl>templateArgumentCountIsunsigned N
 
Matches if the number of template arguments equals N.
 
@@ -2533,16 +2587,35 @@ 

Narrowing Matchers

Matcher<FloatingLiteral>equalsValueT Value -
Matches literals that are equal to the given value.
+
Matches literals that are equal to the given value of type ValueT.
 
-Example matches true (matcher = cxxBoolLiteral(equals(true)))
-  true
+Given
+  f('false, 3.14, 42);
+characterLiteral(equals(0))
+  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
+  match false
+floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
+  match 3.14
+integerLiteral(equals(42))
+  matches 42
+
+Note that you cannot directly match a negative numeric literal because the
+minus sign is not part of the literal: It is a unary operator whose operand
+is the positive numeric literal. Instead, you must use a unaryOperator()
+matcher to match the minus sign:
 
-Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteral>,
+unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(13))))
+
+Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
 
+Matcher<FloatingLiteral>equalsdouble Value +

+
+
 Matcher<FunctionDecl>hasDynamicExceptionSpec
 
Matches functions that have a dynamic exception specification.
 
@@ -2805,16 +2878,43 @@ 

Narrowing Matchers

Matcher<IntegerLiteral>equalsValueT Value -
Matches literals that are equal to the given value.
+
Matches literals that are equal to the given value of type ValueT.
 
-Example matches true (matcher = cxxBoolLiteral(equals(true)))
-  true
+Given
+  f('false, 3.14, 42);
+characterLiteral(equals(0))
+  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
+  match false
+floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
+  match 3.14
+integerLiteral(equals(42))
+  matches 42
+
+Note that you cannot directly match a negative numeric literal because the
+minus sign is not part of the literal: It is a unary operator whose operand
+is the positive numeric literal. Instead, you must use a unaryOperator()
+matcher to match the minus sign:
+
+unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(13))))
 
-Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteral>,
+Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
 
+Matcher<IntegerLiteral>equalsbool Value +

+
+
+Matcher<IntegerLiteral>equalsdouble Value
+

+
+
+Matcher<IntegerLiteral>equalsunsigned Value
+

+
+
 Matcher<MemberExpr>isArrow
 
Matches member expressions that are called with '->' as opposed
 to '.'.
diff --git a/interpreter/llvm/src/tools/clang/docs/LibFormat.rst b/interpreter/llvm/src/tools/clang/docs/LibFormat.rst
index eacdc16145671..086a52827d8ce 100644
--- a/interpreter/llvm/src/tools/clang/docs/LibFormat.rst
+++ b/interpreter/llvm/src/tools/clang/docs/LibFormat.rst
@@ -28,7 +28,9 @@ The core routine of LibFormat is ``reformat()``:
 
 This reads a token stream out of the lexer ``Lex`` and reformats all the code
 ranges in ``Ranges``. The ``FormatStyle`` controls basic decisions made during
-formatting. A list of options can be found under :ref:`style-options`. 
+formatting. A list of options can be found under :ref:`style-options`.
+
+The style options are described in :doc:`ClangFormatStyleOptions`.
 
 
 .. _style-options:
diff --git a/interpreter/llvm/src/tools/clang/docs/MemorySanitizer.rst b/interpreter/llvm/src/tools/clang/docs/MemorySanitizer.rst
index 8088ecdf561fd..5bb19ed8a5091 100644
--- a/interpreter/llvm/src/tools/clang/docs/MemorySanitizer.rst
+++ b/interpreter/llvm/src/tools/clang/docs/MemorySanitizer.rst
@@ -27,7 +27,7 @@ executable, so make sure to use ``clang`` (not ``ld``) for the final
 link step. When linking shared libraries, the MemorySanitizer run-time
 is not linked, so ``-Wl,-z,defs`` may cause link errors (don't use it
 with MemorySanitizer). To get a reasonable performance add ``-O1`` or
-higher. To get meaninful stack traces in error messages add
+higher. To get meaningful stack traces in error messages add
 ``-fno-omit-frame-pointer``. To get perfect stack traces you may need
 to disable inlining (just use ``-O1``) and tail call elimination
 (``-fno-optimize-sibling-calls``).
diff --git a/interpreter/llvm/src/tools/clang/docs/Modules.rst b/interpreter/llvm/src/tools/clang/docs/Modules.rst
index 2b1bde2fedc10..ed6f81715731b 100644
--- a/interpreter/llvm/src/tools/clang/docs/Modules.rst
+++ b/interpreter/llvm/src/tools/clang/docs/Modules.rst
@@ -403,7 +403,7 @@ A *requires-declaration* specifies the requirements that an importing translatio
   *feature*:
     ``!``:sub:`opt` *identifier*
 
-The requirements clause allows specific modules or submodules to specify that they are only accessible with certain language dialects or on certain platforms. The feature list is a set of identifiers, defined below. If any of the features is not available in a given translation unit, that translation unit shall not import the module. The optional ``!`` indicates that a feature is incompatible with the module.
+The requirements clause allows specific modules or submodules to specify that they are only accessible with certain language dialects or on certain platforms. The feature list is a set of identifiers, defined below. If any of the features is not available in a given translation unit, that translation unit shall not import the module. When building a module for use by a compilation, submodules requiring unavailable features are ignored. The optional ``!`` indicates that a feature is incompatible with the module.
 
 The following features are defined:
 
@@ -413,6 +413,9 @@ altivec
 blocks
   The "blocks" language feature is available.
 
+coroutines
+  Support for the coroutines TS is available.
+
 cplusplus
   C++ support is available.
 
@@ -466,9 +469,16 @@ A header declaration specifies that a particular header is associated with the e
 .. parsed-literal::
 
   *header-declaration*:
-    ``private``:sub:`opt` ``textual``:sub:`opt` ``header`` *string-literal*
-    ``umbrella`` ``header`` *string-literal*
-    ``exclude`` ``header`` *string-literal*
+    ``private``:sub:`opt` ``textual``:sub:`opt` ``header`` *string-literal* *header-attrs*:sub:`opt`
+    ``umbrella`` ``header`` *string-literal* *header-attrs*:sub:`opt`
+    ``exclude`` ``header`` *string-literal* *header-attrs*:sub:`opt`
+
+  *header-attrs*:
+    '{' *header-attr** '}'
+
+  *header-attr*:
+    ``size`` *integer-literal*
+    ``mtime`` *integer-literal*
 
 A header declaration that does not contain ``exclude`` nor ``textual`` specifies a header that contributes to the enclosing module. Specifically, when the module is built, the named header will be parsed and its declarations will be (logically) placed into the enclosing submodule.
 
@@ -501,6 +511,18 @@ A header with the ``exclude`` specifier is excluded from the module. It will not
 
 A given header shall not be referenced by more than one *header-declaration*.
 
+Two *header-declaration*\s, or a *header-declaration* and a ``#include``, are
+considered to refer to the same file if the paths resolve to the same file
+and the specified *header-attr*\s (if any) match the attributes of that file,
+even if the file is named differently (for instance, by a relative path or
+via symlinks).
+
+.. note::
+    The use of *header-attr*\s avoids the need for Clang to speculatively
+    ``stat`` every header referenced by a module map. It is recommended that
+    *header-attr*\s only be used in machine-generated module maps, to avoid
+    mismatches between attribute values and the corresponding files.
+
 Umbrella directory declaration
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 An umbrella directory declaration specifies that all of the headers in the specified directory should be included within the module.
diff --git a/interpreter/llvm/src/tools/clang/docs/ReleaseNotes.rst b/interpreter/llvm/src/tools/clang/docs/ReleaseNotes.rst
index f7e31e5c98d53..6e8b005e7cb12 100644
--- a/interpreter/llvm/src/tools/clang/docs/ReleaseNotes.rst
+++ b/interpreter/llvm/src/tools/clang/docs/ReleaseNotes.rst
@@ -1,6 +1,6 @@
-=======================================
-Clang 5.0.0 (In-Progress) Release Notes
-=======================================
+=========================
+Clang 5.0.0 Release Notes
+=========================
 
 .. contents::
    :local:
@@ -8,12 +8,6 @@ Clang 5.0.0 (In-Progress) Release Notes
 
 Written by the `LLVM Team `_
 
-.. warning::
-
-   These are in-progress notes for the upcoming Clang 5 release.
-   Release notes for previous releases can be found on
-   `the Download Page `_.
-
 Introduction
 ============
 
@@ -26,15 +20,9 @@ documentation `_. All LLVM
 releases may be downloaded from the `LLVM releases web
 site `_.
 
-For more information about Clang or LLVM, including information about
-the latest release, please check out the main please see the `Clang Web
-Site `_ or the `LLVM Web
-Site `_.
-
-Note that if you are reading this file from a Subversion checkout or the
-main Clang web page, this document applies to the *next* release, not
-the current one. To see the release notes for a specific release, please
-see the `releases page `_.
+For more information about Clang or LLVM, including information about the
+latest release, please see the `Clang Web Site `_ or the
+`LLVM Web Site `_.
 
 What's New in Clang 5.0.0?
 ==========================
@@ -47,86 +35,196 @@ sections with improvements to Clang's support for those languages.
 Major New Features
 ------------------
 
--  ...
+C++ coroutines
+^^^^^^^^^^^^^^
+`C++ coroutines TS
+`_
+implementation has landed. Use ``-fcoroutines-ts -stdlib=libc++`` to enable
+coroutine support. Here is `an example
+`_ to get you started.
+
 
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
--  -Wunused-lambda-capture warns when a variable explicitly captured
+-  ``-Wcast-qual`` was implemented for C++. C-style casts are now properly
+   diagnosed.
+
+-  ``-Wunused-lambda-capture`` warns when a variable explicitly captured
    by a lambda is not used in the body of the lambda.
 
+-  ``-Wstrict-prototypes`` is a new warning that warns about non-prototype
+   function and block declarations and types in C and Objective-C.
+
+-  ``-Wunguarded-availability`` is a new warning that warns about uses of new
+   APIs that were introduced in a system whose version is newer than the
+   deployment target version. A new Objective-C expression ``@available`` has
+   been introduced to perform system version checking at runtime. This warning
+   is off by default to prevent unexpected warnings in existing projects.
+   However, its less strict sibling ``-Wunguarded-availability-new`` is on by
+   default. It warns about unguarded uses of APIs only when they were introduced
+   in or after macOS 10.13, iOS 11, tvOS 11 or watchOS 4.
+
+-  The ``-Wdocumentation`` warning now allows the use of ``\param`` and
+   ``\returns`` documentation directives in the documentation comments for
+   declarations with a function or a block pointer type.
+
+-  The compiler no longer warns about unreachable ``__builtin_unreachable``
+   statements.
+
 New Compiler Flags
 ------------------
 
-The option ....
+- ``--autocomplete`` was implemented to obtain a list of flags and its arguments.
+  This is used for shell autocompletion.
+
+Deprecated Compiler Flags
+-------------------------
+
+The following options are deprecated and ignored. They will be removed in
+future versions of Clang.
+
+- ``-fslp-vectorize-aggressive`` used to enable the BB vectorizing pass. They have been superseeded
+  by the normal SLP vectorizer.
+- ``-fno-slp-vectorize-aggressive`` used to be the default behavior of clang.
 
 New Pragmas in Clang
 -----------------------
 
-Clang now supports the ...
+- Clang now supports the ``clang attribute`` pragma that allows users to apply
+  an attribute to multiple declarations.
 
+- ``pragma pack`` directives that are included in a precompiled header are now
+  applied correctly to the declarations in the compilation unit that includes
+  that precompiled header.
 
 Attribute Changes in Clang
 --------------------------
 
--  ...
+-  The ``overloadable`` attribute now allows at most one function with a given
+   name to lack the ``overloadable`` attribute. This unmarked function will not
+   have its name mangled.
+-  The ``ms_abi`` attribute and the ``__builtin_ms_va_list`` types and builtins
+   are now supported on AArch64.
 
-Windows Support
----------------
+C Language Changes in Clang
+---------------------------
 
-Clang's support for building native Windows programs ...
+- Added near complete support for implicit scalar to vector conversion, a GNU
+  C/C++ language extension. With this extension, the following code is
+  considered valid:
 
+.. code-block:: c
 
-C Language Changes in Clang
----------------------------
+    typedef unsigned v4i32 __attribute__((vector_size(16)));
+
+    v4i32 foo(v4i32 a) {
+      // Here 5 is implicitly casted to an unsigned value and replicated into a
+      // vector with as many elements as 'a'.
+      return a + 5;
+    }
 
-- ...
+The implicit conversion of a scalar value to a vector value--in the context of
+a vector expression--occurs when:
 
-...
+- The type of the vector is that of a ``__attribute__((vector_size(size)))``
+  vector, not an OpenCL ``__attribute__((ext_vector_type(size)))`` vector type.
 
-C11 Feature Support
-^^^^^^^^^^^^^^^^^^^
+- The scalar value can be casted to that of the vector element's type without
+  the loss of precision based on the type of the scalar and the type of the
+  vector's elements.
+
+- For compile time constant values, the above rule is weakened to consider the
+  value of the scalar constant rather than the constant's type. However,
+  for compatibility with GCC, floating point constants with precise integral
+  representations are not implicitly converted to integer values.
+
+Currently the basic integer and floating point types with the following
+operators are supported: ``+``, ``/``, ``-``, ``*``, ``%``, ``>``, ``<``,
+``>=``, ``<=``, ``==``, ``!=``, ``&``, ``|``, ``^`` and the corresponding
+assignment operators where applicable.
 
-...
 
 C++ Language Changes in Clang
 -----------------------------
 
-...
+- We expect this to be the last Clang release that defaults to ``-std=gnu++98``
+  when using the GCC-compatible ``clang++`` driver. From Clang 6 onwards we
+  expect to use ``-std=gnu++14`` or a later standard by default, to match the
+  behavior of recent GCC releases. Users are encouraged to change their build
+  files to explicitly specify their desired C++ standard.
+
+- Support for the C++17 standard has been completed. This mode can be enabled
+  using ``-std=c++17`` (the old flag ``-std=c++1z`` is still supported for
+  compatibility).
 
-C++1z Feature Support
-^^^^^^^^^^^^^^^^^^^^^
+- When targeting a platform that uses the Itanium C++ ABI, Clang implements a
+  `recent change to the ABI`__ that passes objects of class type indirectly if they
+  have a non-trivial move constructor. Previous versions of Clang only
+  considered the copy constructor, resulting in an ABI change in rare cases,
+  but GCC has already implemented this change for several releases.
+  This affects all targets other than Windows and PS4. You can opt out of this
+  ABI change with ``-fclang-abi-compat=4.0``.
 
-...
+- As mentioned in `C Language Changes in Clang`_, Clang's support for
+  implicit scalar to vector conversions also applies to C++. Additionally
+  the following operators are also supported: ``&&`` and ``||``.
+
+.. __: https://github.com/itanium-cxx-abi/cxx-abi/commit/7099637aba11fed6bdad7ee65bf4fd3f97fbf076
 
 Objective-C Language Changes in Clang
 -------------------------------------
 
-...
+- Clang now guarantees that a ``readwrite`` property is synthesized when an
+  ambiguous property (i.e. a property that's declared in multiple protocols)
+  is synthesized. The ``-Wprotocol-property-synthesis-ambiguity`` warning that
+  warns about incompatible property types is now promoted to an error when
+  there's an ambiguity between ``readwrite`` and ``readonly`` properties.
+
+- Clang now prohibits synthesis of ambiguous properties with incompatible
+  explicit property attributes. The following property attributes are
+  checked for differences: ``copy``, ``retain``/``strong``, ``atomic``,
+  ``getter`` and ``setter``.
 
 OpenCL C Language Changes in Clang
 ----------------------------------
 
-...
+Various bug fixes and improvements:
 
-OpenMP Support in Clang
-----------------------------------
+-  Extended OpenCL-related Clang tests.
 
-...
+-  Improved diagnostics across several areas: scoped address space
+   qualified variables, function pointers, atomics, type rank for overloading,
+   block captures, ``reserve_id_t``.
 
-Internal API Changes
---------------------
+-  Several address space related fixes for constant address space function scope variables,
+   IR generation, mangling of ``generic`` and alloca (post-fix from general Clang
+   refactoring of address spaces).
 
-These are major API changes that have happened since the 4.0.0 release of
-Clang. If upgrading an external codebase that uses Clang as a library,
-this section should help get you past the largest hurdles of upgrading.
+-  Several improvements in extensions: fixed OpenCL version for ``cl_khr_mipmap_image``,
+   added missing ``cl_khr_3d_image_writes``.
 
--  ...
+-  Improvements in ``enqueue_kernel``, especially the implementation of ``ndrange_t`` and blocks.
 
-AST Matchers
-------------
+-  OpenCL type related fixes: global samplers, the ``pipe_t`` size, internal type redefinition,
+   and type compatibility checking in ternary and other operations.
+
+-  The OpenCL header has been extended with missing extension guards, and direct mapping of ``as_type``
+   to ``__builtin_astype``.
+
+-  Fixed ``kernel_arg_type_qual`` and OpenCL/SPIR version in metadata.
+
+-  Added proper use of the kernel calling convention to various targets.
+
+The following new functionalities have been added:
+
+-  Added documentation on OpenCL to Clang user manual.
 
-...
+-  Extended Clang builtins with required ``cl_khr_subgroups`` support.
+
+-  Add ``intel_reqd_sub_group_size`` attribute support.
+
+-  Added OpenCL types to ``CIndex``.
 
 
 clang-format
@@ -170,7 +268,7 @@ clang-format
   |   namespace A {     |   namespace A {     |
   |   int i;            |   int i;            |
   |   int j;            |   int j;            |
-  |   }                 |   }                 |
+  |   }                 |   } // namespace A  |
   +---------------------+---------------------+
 
 * Comment reflow support added. Overly long comment lines will now be reflown with the rest of
@@ -179,33 +277,85 @@ clang-format
 libclang
 --------
 
-...
+- Libclang now provides code-completion results for more C++ constructs
+  and keywords. The following keywords/identifiers are now included in the
+  code-completion results: ``static_assert``, ``alignas``, ``constexpr``,
+  ``final``, ``noexcept``, ``override`` and ``thread_local``.
+
+- Libclang now provides code-completion results for members from dependent
+  classes. For example:
+
+  .. code-block:: c++
+
+    template
+    void appendValue(std::vector &dest, const T &value) {
+        dest. // Relevant completion results are now shown after '.'
+    }
+
+  Note that code-completion results are still not provided when the member
+  expression includes a dependent base expression. For example:
 
+  .. code-block:: c++
+
+    template
+    void appendValue(std::vector> &dest, const T &value) {
+        dest.at(0). // Libclang fails to provide completion results after '.'
+    }
 
 Static Analyzer
 ---------------
 
-...
+- The static analyzer now supports using the
+  `z3 theorem prover `_ from Microsoft Research
+  as an external constraint solver. This allows reasoning over more complex
+  queries, but performance is ~15x slower than the default range-based
+  constraint solver. To enable the z3 solver backend, clang must be built with
+  the ``CLANG_ANALYZER_BUILD_Z3=ON`` option, and the
+  ``-Xanalyzer -analyzer-constraints=z3`` arguments passed at runtime.
 
-Core Analysis Improvements
-==========================
+Undefined Behavior Sanitizer (UBSan)
+------------------------------------
+
+- The Undefined Behavior Sanitizer has a new check for pointer overflow. This
+  check is on by default. The flag to control this functionality is
+  ``-fsanitize=pointer-overflow``.
 
-- ...
+  Pointer overflow is an indicator of undefined behavior: when a pointer
+  indexing expression wraps around the address space, or produces other
+  unexpected results, its result may not point to a valid object.
 
-New Issues Found
-================
+- UBSan has several new checks which detect violations of nullability
+  annotations. These checks are off by default. The flag to control this group
+  of checks is ``-fsanitize=nullability``. The checks can be individially enabled
+  by ``-fsanitize=nullability-arg`` (which checks calls),
+  ``-fsanitize=nullability-assign`` (which checks assignments), and
+  ``-fsanitize=nullability-return`` (which checks return statements).
+
+- UBSan can now detect invalid loads from bitfields and from ObjC BOOLs.
+
+- UBSan can now avoid emitting unnecessary type checks in C++ class methods and
+  in several other cases where the result is known at compile-time. UBSan can
+  also avoid emitting unnecessary overflow checks in arithmetic expressions
+  with promoted integer operands.
 
-- ...
 
 Python Binding Changes
 ----------------------
 
+Python bindings now support both Python 2 and Python 3.
+
 The following methods have been added:
 
--  ...
+- ``is_scoped_enum`` has been added to ``Cursor``.
+
+- ``exception_specification_kind`` has been added to ``Cursor``.
+
+- ``get_address_space`` has been added to ``Type``.
+
+- ``get_typedef_name`` has been added to ``Type``.
+
+- ``get_exception_specification_kind`` has been added to ``Type``.
 
-Significant Known Problems
-==========================
 
 Additional Information
 ======================
diff --git a/interpreter/llvm/src/tools/clang/docs/SourceBasedCodeCoverage.rst b/interpreter/llvm/src/tools/clang/docs/SourceBasedCodeCoverage.rst
index 474af30ae30f7..805c98794804a 100644
--- a/interpreter/llvm/src/tools/clang/docs/SourceBasedCodeCoverage.rst
+++ b/interpreter/llvm/src/tools/clang/docs/SourceBasedCodeCoverage.rst
@@ -274,6 +274,11 @@ To specify an alternate directory for raw profiles, use
 Drawbacks and limitations
 =========================
 
+* Prior to version 2.26, the GNU binutils BFD linker is not able link programs
+  compiled with ``-fcoverage-mapping`` in its ``--gc-sections`` mode.  Possible
+  workarounds include disabling ``--gc-sections``, upgrading to a newer version
+  of BFD, or using the Gold linker.
+
 * Code coverage does not handle unpredictable changes in control flow or stack
   unwinding in the presence of exceptions precisely. Consider the following
   function:
diff --git a/interpreter/llvm/src/tools/clang/docs/ThinLTO.rst b/interpreter/llvm/src/tools/clang/docs/ThinLTO.rst
index 7c7d9513025a9..31fff51a61e9a 100644
--- a/interpreter/llvm/src/tools/clang/docs/ThinLTO.rst
+++ b/interpreter/llvm/src/tools/clang/docs/ThinLTO.rst
@@ -123,6 +123,52 @@ which currently must be enabled through a linker option.
   ``-Wl,-plugin-opt,cache-dir=/path/to/cache``
 - ld64 (support in clang 3.9 and Xcode 8):
   ``-Wl,-cache_path_lto,/path/to/cache``
+- lld (as of LLVM r296702):
+  ``-Wl,--thinlto-cache-dir=/path/to/cache``
+
+Cache Pruning
+-------------
+
+To help keep the size of the cache under control, ThinLTO supports cache
+pruning. Cache pruning is supported with ld64 and ELF lld, but currently only
+ELF lld allows you to control the policy with a policy string.  The cache
+policy must be specified with a linker option.
+
+- ELF lld (as of LLVM r298036):
+  ``-Wl,--thinlto-cache-policy,POLICY``
+
+A policy string is a series of key-value pairs separated by ``:`` characters.
+Possible key-value pairs are:
+
+- ``cache_size=X%``: The maximum size for the cache directory is ``X`` percent
+  of the available space on the the disk. Set to 100 to indicate no limit,
+  50 to indicate that the cache size will not be left over half the available
+  disk space. A value over 100 is invalid. A value of 0 disables the percentage
+  size-based pruning. The default is 75%.
+
+- ``cache_size_bytes=X``, ``cache_size_bytes=Xk``, ``cache_size_bytes=Xm``,
+  ``cache_size_bytes=Xg``:
+  Sets the maximum size for the cache directory to ``X`` bytes (or KB, MB,
+  GB respectively). A value over the amount of available space on the disk
+  will be reduced to the amount of available space. A value of 0 disables
+  the byte size-based pruning. The default is no byte size-based pruning.
+
+  Note that ThinLTO will apply both size-based pruning policies simultaneously,
+  and changing one does not affect the other. For example, a policy of
+  ``cache_size_bytes=1g`` on its own will cause both the 1GB and default 75%
+  policies to be applied unless the default ``cache_size`` is overridden.
+
+- ``prune_after=Xs``, ``prune_after=Xm``, ``prune_after=Xh``: Sets the
+  expiration time for cache files to ``X`` seconds (or minutes, hours
+  respectively).  When a file hasn't been accessed for ``prune_after`` seconds,
+  it is removed from the cache. A value of 0 disables the expiration-based
+  pruning. The default is 1 week.
+
+- ``prune_interval=Xs``, ``prune_interval=Xm``, ``prune_interval=Xh``:
+  Sets the pruning interval to ``X`` seconds (or minutes, hours
+  respectively). This is intended to be used to avoid scanning the directory
+  too often. It does not impact the decision of which files to prune. A
+  value of 0 forces the scan to occur. The default is every 20 minutes.
 
 Clang Bootstrap
 ---------------
diff --git a/interpreter/llvm/src/tools/clang/docs/UndefinedBehaviorSanitizer.rst b/interpreter/llvm/src/tools/clang/docs/UndefinedBehaviorSanitizer.rst
index d6fdad2a0c011..85dd549baaf80 100644
--- a/interpreter/llvm/src/tools/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/interpreter/llvm/src/tools/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -106,6 +106,8 @@ Available checks are:
      invalid pointers. These checks are made in terms of
      ``__builtin_object_size``, and consequently may be able to detect more
      problems at higher optimization levels.
+  -  ``-fsanitize=pointer-overflow``: Performing pointer arithmetic which
+     overflows.
   -  ``-fsanitize=return``: In C++, reaching the end of a
      value-returning function without returning a value.
   -  ``-fsanitize=returns-nonnull-attribute``: Returning null pointer
@@ -146,6 +148,12 @@ You can also use the following check groups:
      nullability does not have undefined behavior, it is often unintentional,
      so UBSan offers to catch it.
 
+Volatile
+--------
+
+The ``null``, ``alignment``, ``object-size``, and ``vptr`` checks do not apply
+to pointers to types with the ``volatile`` qualifier.
+
 Stack traces and report symbolization
 =====================================
 If you want UBSan to print symbolized stack trace for each error report, you
diff --git a/interpreter/llvm/src/tools/clang/docs/UsersManual.rst b/interpreter/llvm/src/tools/clang/docs/UsersManual.rst
index 7362456202ba1..e5f33fc295694 100644
--- a/interpreter/llvm/src/tools/clang/docs/UsersManual.rst
+++ b/interpreter/llvm/src/tools/clang/docs/UsersManual.rst
@@ -322,18 +322,40 @@ output format of the diagnostics that it generates.
    by category, so it should be a high level category. We want dozens
    of these, not hundreds or thousands of them.
 
+.. _opt_fsave-optimization-record:
+
+**-fsave-optimization-record**
+   Write optimization remarks to a YAML file.
+
+   This option, which defaults to off, controls whether Clang writes
+   optimization reports to a YAML file. By recording diagnostics in a file,
+   using a structured YAML format, users can parse or sort the remarks in a
+   convenient way.
+
+.. _opt_foptimization-record-file:
+
+**-foptimization-record-file**
+   Control the file to which optimization reports are written.
+
+   When optimization reports are being output (see
+   :ref:`-fsave-optimization-record `), this
+   option controls the file to which those reports are written.
+
+   If this option is not used, optimization records are output to a file named
+   after the primary file being compiled. If that's "foo.c", for example,
+   optimization records are output to "foo.opt.yaml".
+
 .. _opt_fdiagnostics-show-hotness:
 
 **-f[no-]diagnostics-show-hotness**
    Enable profile hotness information in diagnostic line.
 
-   This option, which defaults to off, controls whether Clang prints the
-   profile hotness associated with a diagnostics in the presence of
-   profile-guided optimization information.  This is currently supported with
-   optimization remarks (see :ref:`Options to Emit Optimization Reports
-   `).  The hotness information allows users to focus on the hot
-   optimization remarks that are likely to be more relevant for run-time
-   performance.
+   This option controls whether Clang prints the profile hotness associated
+   with diagnostics in the presence of profile-guided optimization information.
+   This is currently supported with optimization remarks (see
+   :ref:`Options to Emit Optimization Reports `). The hotness information
+   allows users to focus on the hot optimization remarks that are likely to be
+   more relevant for run-time performance.
 
    For example, in this output, the block containing the callsite of `foo` was
    executed 3000 times according to the profile data:
@@ -344,6 +366,23 @@ output format of the diagnostics that it generates.
            sum += foo(x, x - 2);
                   ^
 
+   This option is implied when
+   :ref:`-fsave-optimization-record ` is used.
+   Otherwise, it defaults to off.
+
+.. _opt_fdiagnostics-hotness-threshold:
+
+**-fdiagnostics-hotness-threshold**
+   Prevent optimization remarks from being output if they do not have at least
+   this hotness value.
+
+   This option, which defaults to zero, controls the minimum hotness an
+   optimization remark would need in order to be output by Clang. This is
+   currently supported with optimization remarks (see :ref:`Options to Emit
+   Optimization Reports `) when profile hotness information in
+   diagnostics is enabled (see
+   :ref:`-fdiagnostics-show-hotness `).
+
 .. _opt_fdiagnostics-fixit-info:
 
 **-f[no-]diagnostics-fixit-info**
@@ -2583,6 +2622,10 @@ Execute ``clang-cl /?`` to see a list of supported options:
       /Brepro                 Emit an object file which can be reproduced over time
       /C                      Don't discard comments when preprocessing
       /c                      Compile only
+      /d1reportAllClassLayout Dump record layout information
+      /diagnostics:caret      Enable caret and column diagnostics (on by default)
+      /diagnostics:classic    Disable column and caret diagnostics
+      /diagnostics:column     Disable caret diagnostics but keep column info
       /D       Define macro
       /EH              Exception handling model
       /EP                     Disable linemarker output and preprocess to stdout
@@ -2677,6 +2720,8 @@ Execute ``clang-cl /?`` to see a list of supported options:
       /Zc:threadSafeInit      Enable thread-safe initialization of static variables
       /Zc:trigraphs-          Disable trigraphs (default)
       /Zc:trigraphs           Enable trigraphs
+      /Zc:twoPhase-           Disable two-phase name lookup in templates
+      /Zc:twoPhase            Enable two-phase name lookup in templates
       /Zd                     Emit debug line number tables only
       /Zi                     Alias for /Z7. Does not produce PDBs.
       /Zl                     Don't mention any default libraries in the object file
@@ -2689,12 +2734,14 @@ Execute ``clang-cl /?`` to see a list of supported options:
       --analyze               Run the static analyzer
       -fansi-escape-codes     Use ANSI escape codes for diagnostics
       -fcolor-diagnostics     Use colors in diagnostics
+      -fdebug-macro           Emit macro debug information
       -fdelayed-template-parsing
                               Parse templated function definitions at the end of the translation unit
       -fdiagnostics-absolute-paths
                               Print absolute paths in diagnostics
       -fdiagnostics-parseable-fixits
                               Print fix-its in machine parseable form
+      -flto=           Set LTO mode to either 'full' or 'thin'
       -flto                   Enable LTO in 'full' mode
       -fms-compatibility-version=
                               Dot-separated value representing the Microsoft compiler version
@@ -2703,12 +2750,27 @@ Execute ``clang-cl /?`` to see a list of supported options:
       -fms-extensions         Accept some non-standard constructs supported by the Microsoft compiler
       -fmsc-version=   Microsoft compiler version number to report in _MSC_VER
                               (0 = don't define it (default))
+      -fno-debug-macro        Do not emit macro debug information
       -fno-delayed-template-parsing
                               Disable delayed template parsing
+      -fno-sanitize-address-use-after-scope
+                              Disable use-after-scope detection in AddressSanitizer
+      -fno-sanitize-blacklist Don't use blacklist file for sanitizers
+      -fno-sanitize-cfi-cross-dso
+                              Disable control flow integrity (CFI) checks for cross-DSO calls.
       -fno-sanitize-coverage=
                               Disable specified features of coverage instrumentation for Sanitizers
+      -fno-sanitize-memory-track-origins
+                              Disable origins tracking in MemorySanitizer
       -fno-sanitize-recover=
                               Disable recovery for specified sanitizers
+      -fno-sanitize-stats     Disable sanitizer statistics gathering.
+      -fno-sanitize-thread-atomics
+                              Disable atomic operations instrumentation in ThreadSanitizer
+      -fno-sanitize-thread-func-entry-exit
+                              Disable function entry/exit instrumentation in ThreadSanitizer
+      -fno-sanitize-thread-memory-access
+                              Disable memory access instrumentation in ThreadSanitizer
       -fno-sanitize-trap=
                               Disable trapping for specified sanitizers
       -fno-standalone-debug   Limit debug information produced to reduce size of debug binary
@@ -2720,13 +2782,36 @@ Execute ``clang-cl /?`` to see a list of supported options:
                               (overridden by '=' form of option or LLVM_PROFILE_FILE env var)
       -fprofile-instr-use=
                               Use instrumentation data for profile-guided optimization
+      -fsanitize-address-field-padding=
+                              Level of field padding for AddressSanitizer
+      -fsanitize-address-globals-dead-stripping
+                              Enable linker dead stripping of globals in AddressSanitizer
+      -fsanitize-address-use-after-scope
+                              Enable use-after-scope detection in AddressSanitizer
       -fsanitize-blacklist=
                               Path to blacklist file for sanitizers
+      -fsanitize-cfi-cross-dso
+                              Enable control flow integrity (CFI) checks for cross-DSO calls.
       -fsanitize-coverage=
                               Specify the type of coverage instrumentation for Sanitizers
+      -fsanitize-memory-track-origins=
+                              Enable origins tracking in MemorySanitizer
+      -fsanitize-memory-track-origins
+                              Enable origins tracking in MemorySanitizer
+      -fsanitize-memory-use-after-dtor
+                              Enable use-after-destroy detection in MemorySanitizer
       -fsanitize-recover=
                               Enable recovery for specified sanitizers
+      -fsanitize-stats        Enable sanitizer statistics gathering.
+      -fsanitize-thread-atomics
+                              Enable atomic operations instrumentation in ThreadSanitizer (default)
+      -fsanitize-thread-func-entry-exit
+                              Enable function entry/exit instrumentation in ThreadSanitizer (default)
+      -fsanitize-thread-memory-access
+                              Enable memory access instrumentation in ThreadSanitizer (default)
       -fsanitize-trap= Enable trapping for specified sanitizers
+      -fsanitize-undefined-strip-path-components=
+                              Strip (or keep only, if negative) a given number of path components when emitting check metadata.
       -fsanitize=      Turn on runtime checks for various forms of undefined or suspicious
                               behavior. See user manual for available checks
       -fstandalone-debug      Emit full debug info for all types used by the program
@@ -2734,6 +2819,7 @@ Execute ``clang-cl /?`` to see a list of supported options:
       -gline-tables-only      Emit debug line number tables only
       -miamcu                 Use Intel MCU ABI
       -mllvm           Additional arguments to forward to LLVM's option processing
+      -nobuiltininc           Disable builtin #include directories
       -Qunused-arguments      Don't emit warning for unused driver arguments
       -R              Enable the specified remark
       --target=        Generate code for the given target
diff --git a/interpreter/llvm/src/tools/clang/docs/tools/dump_format_style.py b/interpreter/llvm/src/tools/clang/docs/tools/dump_format_style.py
index 81a5af6ef42bf..e2571f46448d2 100755
--- a/interpreter/llvm/src/tools/clang/docs/tools/dump_format_style.py
+++ b/interpreter/llvm/src/tools/clang/docs/tools/dump_format_style.py
@@ -24,10 +24,10 @@ def doxygen2rst(text):
   text = re.sub(r'\\\w+ ', '', text)
   return text
 
-def indent(text, columns):
+def indent(text, columns, indent_first_line=True):
   indent = ' ' * columns
   s = re.sub(r'\n([^\n])', '\n' + indent + '\\1', text, flags=re.S)
-  if s.startswith('\n'):
+  if not indent_first_line or s.startswith('\n'):
     return s
   return indent + s
 
@@ -64,7 +64,9 @@ def __init__(self, name, comment):
     self.comment = comment.strip()
 
   def __str__(self):
-    return '\n* ``%s`` %s' % (self.name, doxygen2rst(self.comment))
+    return '\n* ``%s`` %s' % (
+        self.name,
+        doxygen2rst(indent(self.comment, 2, indent_first_line=False)))
 
 class Enum:
   def __init__(self, name, comment):
@@ -179,7 +181,7 @@ class State:
       if enums.has_key(option.type):
         option.enum = enums[option.type]
       elif nested_structs.has_key(option.type):
-        option.nested_struct = nested_structs[option.type];
+        option.nested_struct = nested_structs[option.type]
       else:
         raise Exception('Unknown type: %s' % option.type)
   return options
@@ -195,4 +197,3 @@ class State:
 
 with open(DOC_FILE, 'wb') as output:
   output.write(contents)
-
diff --git a/interpreter/llvm/src/tools/clang/include/clang-c/CXCompilationDatabase.h b/interpreter/llvm/src/tools/clang/include/clang-c/CXCompilationDatabase.h
index 9359abfebfe07..29f89e52a6e3d 100644
--- a/interpreter/llvm/src/tools/clang/include/clang-c/CXCompilationDatabase.h
+++ b/interpreter/llvm/src/tools/clang/include/clang-c/CXCompilationDatabase.h
@@ -7,7 +7,7 @@
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
-|* This header provides a public inferface to use CompilationDatabase without *|
+|* This header provides a public interface to use CompilationDatabase without *|
 |* the full Clang C++ API.                                                    *|
 |*                                                                            *|
 \*===----------------------------------------------------------------------===*/
diff --git a/interpreter/llvm/src/tools/clang/include/clang-c/Index.h b/interpreter/llvm/src/tools/clang/include/clang-c/Index.h
index 462d9269f7ad2..3b5ea9fa539ba 100644
--- a/interpreter/llvm/src/tools/clang/include/clang-c/Index.h
+++ b/interpreter/llvm/src/tools/clang/include/clang-c/Index.h
@@ -7,7 +7,7 @@
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
-|* This header provides a public inferface to a Clang library for extracting  *|
+|* This header provides a public interface to a Clang library for extracting  *|
 |* high-level symbol information from source files without exposing the full  *|
 |* Clang C++ API.                                                             *|
 |*                                                                            *|
@@ -32,7 +32,7 @@
  * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
  */
 #define CINDEX_VERSION_MAJOR 0
-#define CINDEX_VERSION_MINOR 39
+#define CINDEX_VERSION_MINOR 43
 
 #define CINDEX_VERSION_ENCODE(major, minor) ( \
       ((major) * 10000)                       \
@@ -171,7 +171,60 @@ typedef struct CXVersion {
    */
   int Subminor;
 } CXVersion;
-  
+
+/**
+ * \brief Describes the exception specification of a cursor.
+ *
+ * A negative value indicates that the cursor is not a function declaration.
+ */
+enum CXCursor_ExceptionSpecificationKind {
+
+  /**
+   * \brief The cursor has no exception specification.
+   */
+  CXCursor_ExceptionSpecificationKind_None,
+
+  /**
+   * \brief The cursor has exception specification throw()
+   */
+  CXCursor_ExceptionSpecificationKind_DynamicNone,
+
+  /**
+   * \brief The cursor has exception specification throw(T1, T2)
+   */
+  CXCursor_ExceptionSpecificationKind_Dynamic,
+
+  /**
+   * \brief The cursor has exception specification throw(...).
+   */
+  CXCursor_ExceptionSpecificationKind_MSAny,
+
+  /**
+   * \brief The cursor has exception specification basic noexcept.
+   */
+  CXCursor_ExceptionSpecificationKind_BasicNoexcept,
+
+  /**
+   * \brief The cursor has exception specification computed noexcept.
+   */
+  CXCursor_ExceptionSpecificationKind_ComputedNoexcept,
+
+  /**
+   * \brief The exception specification has not yet been evaluated.
+   */
+  CXCursor_ExceptionSpecificationKind_Unevaluated,
+
+  /**
+   * \brief The exception specification has not yet been instantiated.
+   */
+  CXCursor_ExceptionSpecificationKind_Uninstantiated,
+
+  /**
+   * \brief The exception specification has not been parsed yet.
+   */
+  CXCursor_ExceptionSpecificationKind_Unparsed
+};
+
 /**
  * \brief Provides a shared context for creating translation units.
  *
@@ -1234,7 +1287,12 @@ enum CXTranslationUnit_Flags {
    * purposes of an IDE, this is undesirable behavior and as much information
    * as possible should be reported. Use this flag to enable this behavior.
    */
-  CXTranslationUnit_KeepGoing = 0x200
+  CXTranslationUnit_KeepGoing = 0x200,
+
+  /**
+   * \brief Sets the preprocessor in a mode for parsing a single file only.
+   */
+  CXTranslationUnit_SingleFileParse = 0x400
 };
 
 /**
@@ -1418,6 +1476,15 @@ CINDEX_LINKAGE int clang_saveTranslationUnit(CXTranslationUnit TU,
                                              const char *FileName,
                                              unsigned options);
 
+/**
+ * \brief Suspend a translation unit in order to free memory associated with it.
+ *
+ * A suspended translation unit uses significantly less memory but on the other
+ * side does not support any other calls than \c clang_reparseTranslationUnit
+ * to resume it or \c clang_disposeTranslationUnit to dispose it completely.
+ */
+CINDEX_LINKAGE unsigned clang_suspendTranslationUnit(CXTranslationUnit);
+
 /**
  * \brief Destroy the specified CXTranslationUnit object.
  */
@@ -3076,7 +3143,52 @@ enum CXTypeKind {
    *
    * E.g., struct S, or via a qualified name, e.g., N::M::type, or both.
    */
-  CXType_Elaborated = 119
+  CXType_Elaborated = 119,
+
+  /* OpenCL PipeType. */
+  CXType_Pipe = 120,
+
+  /* OpenCL builtin types. */
+  CXType_OCLImage1dRO = 121,
+  CXType_OCLImage1dArrayRO = 122,
+  CXType_OCLImage1dBufferRO = 123,
+  CXType_OCLImage2dRO = 124,
+  CXType_OCLImage2dArrayRO = 125,
+  CXType_OCLImage2dDepthRO = 126,
+  CXType_OCLImage2dArrayDepthRO = 127,
+  CXType_OCLImage2dMSAARO = 128,
+  CXType_OCLImage2dArrayMSAARO = 129,
+  CXType_OCLImage2dMSAADepthRO = 130,
+  CXType_OCLImage2dArrayMSAADepthRO = 131,
+  CXType_OCLImage3dRO = 132,
+  CXType_OCLImage1dWO = 133,
+  CXType_OCLImage1dArrayWO = 134,
+  CXType_OCLImage1dBufferWO = 135,
+  CXType_OCLImage2dWO = 136,
+  CXType_OCLImage2dArrayWO = 137,
+  CXType_OCLImage2dDepthWO = 138,
+  CXType_OCLImage2dArrayDepthWO = 139,
+  CXType_OCLImage2dMSAAWO = 140,
+  CXType_OCLImage2dArrayMSAAWO = 141,
+  CXType_OCLImage2dMSAADepthWO = 142,
+  CXType_OCLImage2dArrayMSAADepthWO = 143,
+  CXType_OCLImage3dWO = 144,
+  CXType_OCLImage1dRW = 145,
+  CXType_OCLImage1dArrayRW = 146,
+  CXType_OCLImage1dBufferRW = 147,
+  CXType_OCLImage2dRW = 148,
+  CXType_OCLImage2dArrayRW = 149,
+  CXType_OCLImage2dDepthRW = 150,
+  CXType_OCLImage2dArrayDepthRW = 151,
+  CXType_OCLImage2dMSAARW = 152,
+  CXType_OCLImage2dArrayMSAARW = 153,
+  CXType_OCLImage2dMSAADepthRW = 154,
+  CXType_OCLImage2dArrayMSAADepthRW = 155,
+  CXType_OCLImage3dRW = 156,
+  CXType_OCLSampler = 157,
+  CXType_OCLEvent = 158,
+  CXType_OCLQueue = 159,
+  CXType_OCLReserveID = 160
 };
 
 /**
@@ -3093,7 +3205,9 @@ enum CXCallingConv {
   CXCallingConv_AAPCS_VFP = 7,
   CXCallingConv_X86RegCall = 8,
   CXCallingConv_IntelOclBicc = 9,
-  CXCallingConv_X86_64Win64 = 10,
+  CXCallingConv_Win64 = 10,
+  /* Alias for compatibility with older versions of API. */
+  CXCallingConv_X86_64Win64 = CXCallingConv_Win64,
   CXCallingConv_X86_64SysV = 11,
   CXCallingConv_X86VectorCall = 12,
   CXCallingConv_Swift = 13,
@@ -3362,6 +3476,16 @@ CINDEX_LINKAGE unsigned clang_isVolatileQualifiedType(CXType T);
  */
 CINDEX_LINKAGE unsigned clang_isRestrictQualifiedType(CXType T);
 
+/**
+ * \brief Returns the address space of the given type.
+ */
+CINDEX_LINKAGE unsigned clang_getAddressSpace(CXType T);
+
+/**
+ * \brief Returns the typedef name of the given type.
+ */
+CINDEX_LINKAGE CXString clang_getTypedefName(CXType CT);
+
 /**
  * \brief For pointer types, returns the type of the pointee.
  */
@@ -3401,6 +3525,13 @@ CINDEX_LINKAGE enum CXCallingConv clang_getFunctionTypeCallingConv(CXType T);
  */
 CINDEX_LINKAGE CXType clang_getResultType(CXType T);
 
+/**
+ * \brief Retrieve the exception specification type associated with a function type.
+ *
+ * If a non-function type is passed in, an error code of -1 is returned.
+ */
+CINDEX_LINKAGE int clang_getExceptionSpecificationType(CXType T);
+
 /**
  * \brief Retrieve the number of non-variadic parameters associated with a
  * function type.
@@ -3429,6 +3560,13 @@ CINDEX_LINKAGE unsigned clang_isFunctionTypeVariadic(CXType T);
  */
 CINDEX_LINKAGE CXType clang_getCursorResultType(CXCursor C);
 
+/**
+ * \brief Retrieve the exception specification type associated with a given cursor.
+ *
+ * This only returns a valid result if the cursor refers to a function or method.
+ */
+CINDEX_LINKAGE int clang_getCursorExceptionSpecificationType(CXCursor C);
+
 /**
  * \brief Return 1 if the CXType is a POD (plain old data) type, and 0
  *  otherwise.
@@ -4280,6 +4418,11 @@ CINDEX_LINKAGE unsigned clang_CXXMethod_isStatic(CXCursor C);
  */
 CINDEX_LINKAGE unsigned clang_CXXMethod_isVirtual(CXCursor C);
 
+/**
+ * \brief Determine if an enum declaration refers to a scoped enum.
+ */
+CINDEX_LINKAGE unsigned clang_EnumDecl_isScoped(CXCursor C);
+
 /**
  * \brief Determine if a C++ member function or member function template is
  * declared 'const'.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/ASTContext.h b/interpreter/llvm/src/tools/clang/include/clang/AST/ASTContext.h
index 4fb8507098cc3..5d290fffa79a9 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/ASTContext.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/ASTContext.h
@@ -935,7 +935,7 @@ class ASTContext : public RefCountedBase {
 
   /// \brief Get the additional modules in which the definition \p Def has
   /// been merged.
-  ArrayRef getModulesWithMergedDefinition(NamedDecl *Def) {
+  ArrayRef getModulesWithMergedDefinition(const NamedDecl *Def) {
     auto MergedIt = MergedDefModules.find(Def);
     if (MergedIt == MergedDefModules.end())
       return None;
@@ -1442,6 +1442,10 @@ class ASTContext : public RefCountedBase {
   /// The sizeof operator requires this (C99 6.5.3.4p4).
   CanQualType getSizeType() const;
 
+  /// \brief Return the unique signed counterpart of 
+  /// the integer type corresponding to size_t.
+  CanQualType getSignedSizeType() const;
+
   /// \brief Return the unique type for "intmax_t" (C99 7.18.1.5), defined in
   /// .
   CanQualType getIntMaxType() const;
@@ -2051,6 +2055,11 @@ class ASTContext : public RefCountedBase {
   /// Get the offset of a FieldDecl or IndirectFieldDecl, in bits.
   uint64_t getFieldOffset(const ValueDecl *FD) const;
 
+  /// Get the offset of an ObjCIvarDecl in bits.
+  uint64_t lookupFieldBitOffset(const ObjCInterfaceDecl *OID,
+                                const ObjCImplementationDecl *ID,
+                                const ObjCIvarDecl *Ivar) const;
+
   bool isNearlyEmpty(const CXXRecordDecl *RD) const;
 
   VTableContextBase *getVTableContext();
@@ -2325,8 +2334,7 @@ class ASTContext : public RefCountedBase {
   uint64_t getTargetNullPointerValue(QualType QT) const;
 
   bool addressSpaceMapManglingFor(unsigned AS) const {
-    return AddrSpaceMapMangling || 
-           AS >= LangAS::Count;
+    return AddrSpaceMapMangling || AS >= LangAS::FirstTargetAddressSpace;
   }
 
 private:
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/ASTStructuralEquivalence.h b/interpreter/llvm/src/tools/clang/include/clang/AST/ASTStructuralEquivalence.h
index 770bb5763fbde..23674c65f332f 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/ASTStructuralEquivalence.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/ASTStructuralEquivalence.h
@@ -62,9 +62,11 @@ struct StructuralEquivalenceContext {
   StructuralEquivalenceContext(
       ASTContext &FromCtx, ASTContext &ToCtx,
       llvm::DenseSet> &NonEquivalentDecls,
-      bool StrictTypeSpelling = false, bool Complain = true)
+      bool StrictTypeSpelling = false, bool Complain = true,
+      bool ErrorOnTagTypeMismatch = false)
       : FromCtx(FromCtx), ToCtx(ToCtx), NonEquivalentDecls(NonEquivalentDecls),
-        StrictTypeSpelling(StrictTypeSpelling), Complain(Complain),
+        StrictTypeSpelling(StrictTypeSpelling),
+        ErrorOnTagTypeMismatch(ErrorOnTagTypeMismatch), Complain(Complain),
         LastDiagFromC2(false) {}
 
   DiagnosticBuilder Diag1(SourceLocation Loc, unsigned DiagID);
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/CXXInheritance.h b/interpreter/llvm/src/tools/clang/include/clang/AST/CXXInheritance.h
index a7961ebe8ce6a..980608570fd68 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/CXXInheritance.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/CXXInheritance.h
@@ -127,7 +127,11 @@ class CXXBasePaths {
   /// class subobjects for that class type. The key of the map is
   /// the cv-unqualified canonical type of the base class subobject.
   llvm::SmallDenseMap, 8> ClassSubobjects;
-  
+
+  /// VisitedDependentRecords - Records the dependent records that have been
+  /// already visited.
+  llvm::SmallDenseSet VisitedDependentRecords;
+
   /// FindAmbiguities - Whether Sema::IsDerivedFrom should try find
   /// ambiguous paths while it is looking for a path from a derived
   /// type to a base type.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/Decl.h b/interpreter/llvm/src/tools/clang/include/clang/AST/Decl.h
index 0e7ded38f60c9..97f6a7256e648 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/Decl.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/Decl.h
@@ -853,6 +853,7 @@ class VarDecl : public DeclaratorDecl, public Redeclarable {
 
   class NonParmVarDeclBitfields {
     friend class VarDecl;
+    friend class ImplicitParamDecl;
     friend class ASTDeclReader;
 
     unsigned : NumVarDeclBits;
@@ -896,6 +897,10 @@ class VarDecl : public DeclaratorDecl, public Redeclarable {
     /// declared in the same block scope. This controls whether we should merge
     /// the type of this declaration with its previous declaration.
     unsigned PreviousDeclInSameBlockScope : 1;
+
+    /// Defines kind of the ImplicitParamDecl: 'this', 'self', 'vtt', '_cmd' or
+    /// something else.
+    unsigned ImplicitParamKind : 3;
   };
 
   union {
@@ -958,9 +963,16 @@ class VarDecl : public DeclaratorDecl, public Redeclarable {
   /// hasLocalStorage - Returns true if a variable with function scope
   ///  is a non-static local variable.
   bool hasLocalStorage() const {
-    if (getStorageClass() == SC_None)
+    if (getStorageClass() == SC_None) {
+      // OpenCL v1.2 s6.5.3: The __constant or constant address space name is
+      // used to describe variables allocated in global memory and which are
+      // accessed inside a kernel(s) as read-only variables. As such, variables
+      // in constant address space cannot have local storage.
+      if (getType().getAddressSpace() == LangAS::opencl_constant)
+        return false;
       // Second check is for C++11 [dcl.stc]p4.
       return !isFileVarDecl() && getTSCSpec() == TSCS_unspecified;
+    }
 
     // Global Named Register (GNU extension)
     if (getStorageClass() == SC_Register && !isLocalVarDeclOrParm())
@@ -1371,20 +1383,50 @@ class VarDecl : public DeclaratorDecl, public Redeclarable {
 
 class ImplicitParamDecl : public VarDecl {
   void anchor() override;
+
 public:
+  /// Defines the kind of the implicit parameter: is this an implicit parameter
+  /// with pointer to 'this', 'self', '_cmd', virtual table pointers, captured
+  /// context or something else.
+  enum ImplicitParamKind : unsigned {
+    ObjCSelf,        /// Parameter for Objective-C 'self' argument
+    ObjCCmd,         /// Parameter for Objective-C '_cmd' argument
+    CXXThis,         /// Parameter for C++ 'this' argument
+    CXXVTT,          /// Parameter for C++ virtual table pointers
+    CapturedContext, /// Parameter for captured context
+    Other,           /// Other implicit parameter
+  };
+
+  /// Create implicit parameter.
   static ImplicitParamDecl *Create(ASTContext &C, DeclContext *DC,
                                    SourceLocation IdLoc, IdentifierInfo *Id,
-                                   QualType T);
+                                   QualType T, ImplicitParamKind ParamKind);
+  static ImplicitParamDecl *Create(ASTContext &C, QualType T,
+                                   ImplicitParamKind ParamKind);
 
   static ImplicitParamDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
   ImplicitParamDecl(ASTContext &C, DeclContext *DC, SourceLocation IdLoc,
-                    IdentifierInfo *Id, QualType Type)
-    : VarDecl(ImplicitParam, C, DC, IdLoc, IdLoc, Id, Type,
-              /*tinfo*/ nullptr, SC_None) {
+                    IdentifierInfo *Id, QualType Type,
+                    ImplicitParamKind ParamKind)
+      : VarDecl(ImplicitParam, C, DC, IdLoc, IdLoc, Id, Type,
+                /*TInfo=*/nullptr, SC_None) {
+    NonParmVarDeclBits.ImplicitParamKind = ParamKind;
     setImplicit();
   }
 
+  ImplicitParamDecl(ASTContext &C, QualType Type, ImplicitParamKind ParamKind)
+      : VarDecl(ImplicitParam, C, /*DC=*/nullptr, SourceLocation(),
+                SourceLocation(), /*Id=*/nullptr, Type,
+                /*TInfo=*/nullptr, SC_None) {
+    NonParmVarDeclBits.ImplicitParamKind = ParamKind;
+    setImplicit();
+  }
+
+  /// Returns the implicit parameter kind.
+  ImplicitParamKind getParameterKind() const {
+    return static_cast(NonParmVarDeclBits.ImplicitParamKind);
+  }
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ImplicitParam; }
@@ -1616,6 +1658,7 @@ class FunctionDecl : public DeclaratorDecl, public DeclContext,
   unsigned HasImplicitReturnZero : 1;
   unsigned IsLateTemplateParsed : 1;
   unsigned IsConstexpr : 1;
+  unsigned InstantiationIsPending:1;
 
   /// \brief Indicates if the function uses __try.
   unsigned UsesSEHTry : 1;
@@ -1625,8 +1668,7 @@ class FunctionDecl : public DeclaratorDecl, public DeclContext,
   unsigned HasSkippedBody : 1;
 
   /// Indicates if the function declaration will have a body, once we're done
-  /// parsing it.  (We don't set it to false when we're done parsing, in the
-  /// hopes this is simpler.)
+  /// parsing it.
   unsigned WillHaveBody : 1;
 
   /// \brief End part of this FunctionDecl's source range.
@@ -1711,6 +1753,7 @@ class FunctionDecl : public DeclaratorDecl, public DeclContext,
         IsDeleted(false), IsTrivial(false), IsDefaulted(false),
         IsExplicitlyDefaulted(false), HasImplicitReturnZero(false),
         IsLateTemplateParsed(false), IsConstexpr(isConstexprSpecified),
+        InstantiationIsPending(false),
         UsesSEHTry(false), HasSkippedBody(false), WillHaveBody(false),
         EndRangeLoc(NameInfo.getEndLoc()), TemplateOrSpecialization(),
         DNLoc(NameInfo.getInfo()) {}
@@ -1824,14 +1867,15 @@ class FunctionDecl : public DeclaratorDecl, public DeclContext,
     return getBody(Definition);
   }
 
-  /// isThisDeclarationADefinition - Returns whether this specific
-  /// declaration of the function is also a definition. This does not
-  /// determine whether the function has been defined (e.g., in a
-  /// previous definition); for that information, use isDefined. Note
-  /// that this returns false for a defaulted function unless that function
-  /// has been implicitly defined (possibly as deleted).
+  /// Returns whether this specific declaration of the function is also a
+  /// definition that does not contain uninstantiated body.
+  ///
+  /// This does not determine whether the function has been defined (e.g., in a
+  /// previous definition); for that information, use isDefined.
+  ///
   bool isThisDeclarationADefinition() const {
-    return IsDeleted || Body || IsLateTemplateParsed;
+    return IsDeleted || IsDefaulted || Body || IsLateTemplateParsed ||
+      WillHaveBody || hasDefiningAttr();
   }
 
   /// doesThisDeclarationHaveABody - Returns whether this specific
@@ -1902,6 +1946,15 @@ class FunctionDecl : public DeclaratorDecl, public DeclContext,
   bool isConstexpr() const { return IsConstexpr; }
   void setConstexpr(bool IC) { IsConstexpr = IC; }
 
+  /// \brief Whether the instantiation of this function is pending.
+  /// This bit is set when the decision to instantiate this function is made
+  /// and unset if and when the function body is created. That leaves out
+  /// cases where instantiation did not happen because the template definition
+  /// was not seen in this TU. This bit remains set in those cases, under the
+  /// assumption that the instantiation will happen in some other TU.
+  bool instantiationIsPending() const { return InstantiationIsPending; }
+  void setInstantiationIsPending(bool IC) { InstantiationIsPending = IC; }
+
   /// \brief Indicates the function uses __try.
   bool usesSEHTry() const { return UsesSEHTry; }
   void setUsesSEHTry(bool UST) { UsesSEHTry = UST; }
@@ -1967,7 +2020,10 @@ class FunctionDecl : public DeclaratorDecl, public DeclContext,
   /// These functions have special behavior under C++1y [expr.new]:
   ///    An implementation is allowed to omit a call to a replaceable global
   ///    allocation function. [...]
-  bool isReplaceableGlobalAllocationFunction() const;
+  ///
+  /// If this function is an aligned allocation/deallocation function, return
+  /// true through IsAligned.
+  bool isReplaceableGlobalAllocationFunction(bool *IsAligned = nullptr) const;
 
   /// Compute the language linkage.
   LanguageLinkage getLanguageLinkage() const;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/DeclBase.h b/interpreter/llvm/src/tools/clang/include/clang/AST/DeclBase.h
index 08879b36cce54..041f0fd484d45 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/DeclBase.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/DeclBase.h
@@ -34,6 +34,7 @@ class DeclarationName;
 class DependentDiagnostic;
 class EnumDecl;
 class ExportDecl;
+class ExternalSourceSymbolAttr;
 class FunctionDecl;
 class FunctionType;
 enum Linkage : unsigned char;
@@ -201,26 +202,33 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
     OBJC_TQ_CSNullability = 0x40
   };
 
-protected:
-  // Enumeration values used in the bits stored in NextInContextAndBits.
-  enum {
-    /// \brief Whether this declaration is a top-level declaration (function,
-    /// global variable, etc.) that is lexically inside an objc container
-    /// definition.
-    TopLevelDeclInObjCContainerFlag = 0x01,
-    
-    /// \brief Whether this declaration is private to the module in which it was
-    /// defined.
-    ModulePrivateFlag = 0x02
+  /// The kind of ownership a declaration has, for visibility purposes.
+  /// This enumeration is designed such that higher values represent higher
+  /// levels of name hiding.
+  enum class ModuleOwnershipKind : unsigned {
+    /// This declaration is not owned by a module.
+    Unowned,
+    /// This declaration has an owning module, but is globally visible
+    /// (typically because its owning module is visible and we know that
+    /// modules cannot later become hidden in this compilation).
+    /// After serialization and deserialization, this will be converted
+    /// to VisibleWhenImported.
+    Visible,
+    /// This declaration has an owning module, and is visible when that
+    /// module is imported.
+    VisibleWhenImported,
+    /// This declaration has an owning module, but is only visible to
+    /// lookups that occur within that module.
+    ModulePrivate
   };
-  
+
+protected:
   /// \brief The next declaration within the same lexical
   /// DeclContext. These pointers form the linked list that is
   /// traversed via DeclContext's decls_begin()/decls_end().
   ///
-  /// The extra two bits are used for the TopLevelDeclInObjCContainer and
-  /// ModulePrivate bits.
-  llvm::PointerIntPair NextInContextAndBits;
+  /// The extra two bits are used for the ModuleOwnershipKind.
+  llvm::PointerIntPair NextInContextAndBits;
 
 private:
   friend class DeclContext;
@@ -281,6 +289,11 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
   /// are regarded as "referenced" but not "used".
   unsigned Referenced : 1;
 
+  /// \brief Whether this declaration is a top-level declaration (function,
+  /// global variable, etc.) that is lexically inside an objc container
+  /// definition.
+  unsigned TopLevelDeclInObjCContainer : 1;
+  
   /// \brief Whether statistic collection is enabled.
   static bool StatisticsEnabled;
 
@@ -293,11 +306,6 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
   /// \brief Whether this declaration was loaded from an AST file.
   unsigned FromASTFile : 1;
 
-  /// \brief Whether this declaration is hidden from normal name lookup, e.g.,
-  /// because it is was loaded from an AST file is either module-private or
-  /// because its submodule has not been made visible.
-  unsigned Hidden : 1;
-  
   /// IdentifierNamespace - This specifies what IDNS_* namespace this lives in.
   unsigned IdentifierNamespace : 13;
 
@@ -331,26 +339,38 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
 private:
   bool AccessDeclContextSanity() const;
 
-protected:
+  /// Get the module ownership kind to use for a local lexical child of \p DC,
+  /// which may be either a local or (rarely) an imported declaration.
+  static ModuleOwnershipKind getModuleOwnershipKindForChildOf(DeclContext *DC) {
+    if (DC) {
+      auto *D = cast(DC);
+      auto MOK = D->getModuleOwnershipKind();
+      if (MOK != ModuleOwnershipKind::Unowned &&
+          (!D->isFromASTFile() || D->hasLocalOwningModuleStorage()))
+        return MOK;
+      // If D is not local and we have no local module storage, then we don't
+      // need to track module ownership at all.
+    }
+    return ModuleOwnershipKind::Unowned;
+  }
 
+protected:
   Decl(Kind DK, DeclContext *DC, SourceLocation L)
-    : NextInContextAndBits(), DeclCtx(DC),
-      Loc(L), DeclKind(DK), InvalidDecl(0),
-      HasAttrs(false), Implicit(false), Used(false), Referenced(false),
-      Access(AS_none), FromASTFile(0), Hidden(DC && cast(DC)->Hidden),
-      IdentifierNamespace(getIdentifierNamespaceForKind(DK)),
-      CacheValidAndLinkage(0)
-  {
+      : NextInContextAndBits(nullptr, getModuleOwnershipKindForChildOf(DC)),
+        DeclCtx(DC), Loc(L), DeclKind(DK), InvalidDecl(0), HasAttrs(false),
+        Implicit(false), Used(false), Referenced(false),
+        TopLevelDeclInObjCContainer(false), Access(AS_none), FromASTFile(0),
+        IdentifierNamespace(getIdentifierNamespaceForKind(DK)),
+        CacheValidAndLinkage(0) {
     if (StatisticsEnabled) add(DK);
   }
 
   Decl(Kind DK, EmptyShell Empty)
-    : NextInContextAndBits(), DeclKind(DK), InvalidDecl(0),
-      HasAttrs(false), Implicit(false), Used(false), Referenced(false),
-      Access(AS_none), FromASTFile(0), Hidden(0),
-      IdentifierNamespace(getIdentifierNamespaceForKind(DK)),
-      CacheValidAndLinkage(0)
-  {
+      : NextInContextAndBits(), DeclKind(DK), InvalidDecl(0), HasAttrs(false),
+        Implicit(false), Used(false), Referenced(false),
+        TopLevelDeclInObjCContainer(false), Access(AS_none), FromASTFile(0),
+        IdentifierNamespace(getIdentifierNamespaceForKind(DK)),
+        CacheValidAndLinkage(0) {
     if (StatisticsEnabled) add(DK);
   }
 
@@ -550,22 +570,21 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
   /// global variable, etc.) that is lexically inside an objc container
   /// definition.
   bool isTopLevelDeclInObjCContainer() const {
-    return NextInContextAndBits.getInt() & TopLevelDeclInObjCContainerFlag;
+    return TopLevelDeclInObjCContainer;
   }
 
   void setTopLevelDeclInObjCContainer(bool V = true) {
-    unsigned Bits = NextInContextAndBits.getInt();
-    if (V)
-      Bits |= TopLevelDeclInObjCContainerFlag;
-    else
-      Bits &= ~TopLevelDeclInObjCContainerFlag;
-    NextInContextAndBits.setInt(Bits);
+    TopLevelDeclInObjCContainer = V;
   }
 
+  /// \brief Looks on this and related declarations for an applicable
+  /// external source symbol attribute.
+  ExternalSourceSymbolAttr *getExternalSourceSymbolAttr() const;
+
   /// \brief Whether this declaration was marked as being private to the
   /// module in which it was defined.
   bool isModulePrivate() const {
-    return NextInContextAndBits.getInt() & ModulePrivateFlag;
+    return getModuleOwnershipKind() == ModuleOwnershipKind::ModulePrivate;
   }
 
   /// \brief Whether this declaration is exported (by virtue of being lexically
@@ -580,15 +599,14 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
   const Attr *getDefiningAttr() const;
 
 protected:
-  /// \brief Specify whether this declaration was marked as being private
+  /// \brief Specify that this declaration was marked as being private
   /// to the module in which it was defined.
-  void setModulePrivate(bool MP = true) {
-    unsigned Bits = NextInContextAndBits.getInt();
-    if (MP)
-      Bits |= ModulePrivateFlag;
-    else
-      Bits &= ~ModulePrivateFlag;
-    NextInContextAndBits.setInt(Bits);
+  void setModulePrivate() {
+    // The module-private specifier has no effect on unowned declarations.
+    // FIXME: We should track this in some way for source fidelity.
+    if (getModuleOwnershipKind() == ModuleOwnershipKind::Unowned)
+      return;
+    setModuleOwnershipKind(ModuleOwnershipKind::ModulePrivate);
   }
 
   /// \brief Set the owning module ID.
@@ -687,7 +705,7 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
   /// \brief Get the imported owning module, if this decl is from an imported
   /// (non-local) module.
   Module *getImportedOwningModule() const {
-    if (!isFromASTFile())
+    if (!isFromASTFile() || !hasOwningModule())
       return nullptr;
 
     return getOwningModuleSlow();
@@ -696,28 +714,57 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
   /// \brief Get the local owning module, if known. Returns nullptr if owner is
   /// not yet known or declaration is not from a module.
   Module *getLocalOwningModule() const {
-    if (isFromASTFile() || !Hidden)
+    if (isFromASTFile() || !hasOwningModule())
       return nullptr;
+
+    assert(hasLocalOwningModuleStorage() &&
+           "owned local decl but no local module storage");
     return reinterpret_cast(this)[-1];
   }
   void setLocalOwningModule(Module *M) {
-    assert(!isFromASTFile() && Hidden && hasLocalOwningModuleStorage() &&
+    assert(!isFromASTFile() && hasOwningModule() &&
+           hasLocalOwningModuleStorage() &&
            "should not have a cached owning module");
     reinterpret_cast(this)[-1] = M;
   }
 
+  /// Is this declaration owned by some module?
+  bool hasOwningModule() const {
+    return getModuleOwnershipKind() != ModuleOwnershipKind::Unowned;
+  }
+
+  /// Get the module that owns this declaration.
   Module *getOwningModule() const {
     return isFromASTFile() ? getImportedOwningModule() : getLocalOwningModule();
   }
 
-  /// \brief Determine whether this declaration is hidden from name lookup.
-  bool isHidden() const { return Hidden; }
+  /// \brief Determine whether this declaration might be hidden from name
+  /// lookup. Note that the declaration might be visible even if this returns
+  /// \c false, if the owning module is visible within the query context.
+  // FIXME: Rename this to make it clearer what it does.
+  bool isHidden() const {
+    return (int)getModuleOwnershipKind() > (int)ModuleOwnershipKind::Visible;
+  }
+
+  /// Set that this declaration is globally visible, even if it came from a
+  /// module that is not visible.
+  void setVisibleDespiteOwningModule() {
+    if (isHidden())
+      setModuleOwnershipKind(ModuleOwnershipKind::Visible);
+  }
+
+  /// \brief Get the kind of module ownership for this declaration.
+  ModuleOwnershipKind getModuleOwnershipKind() const {
+    return NextInContextAndBits.getInt();
+  }
 
   /// \brief Set whether this declaration is hidden from name lookup.
-  void setHidden(bool Hide) {
-    assert((!Hide || isFromASTFile() || hasLocalOwningModuleStorage()) &&
-           "declaration with no owning module can't be hidden");
-    Hidden = Hide;
+  void setModuleOwnershipKind(ModuleOwnershipKind MOK) {
+    assert(!(getModuleOwnershipKind() == ModuleOwnershipKind::Unowned &&
+             MOK != ModuleOwnershipKind::Unowned && !isFromASTFile() &&
+             !hasLocalOwningModuleStorage()) &&
+           "no storage available for owning module for this declaration");
+    NextInContextAndBits.setInt(MOK);
   }
 
   unsigned getIdentifierNamespace() const {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/DeclCXX.h b/interpreter/llvm/src/tools/clang/include/clang/AST/DeclCXX.h
index eeaed93045cb0..db90fa06464ac 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/DeclCXX.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/DeclCXX.h
@@ -375,6 +375,7 @@ class CXXRecordDecl : public RecordDecl {
     /// \brief These flags are \c true if a defaulted corresponding special
     /// member can't be fully analyzed without performing overload resolution.
     /// @{
+    unsigned NeedOverloadResolutionForCopyConstructor : 1;
     unsigned NeedOverloadResolutionForMoveConstructor : 1;
     unsigned NeedOverloadResolutionForMoveAssignment : 1;
     unsigned NeedOverloadResolutionForDestructor : 1;
@@ -383,6 +384,7 @@ class CXXRecordDecl : public RecordDecl {
     /// \brief These flags are \c true if an implicit defaulted corresponding
     /// special member would be defined as deleted.
     /// @{
+    unsigned DefaultedCopyConstructorIsDeleted : 1;
     unsigned DefaultedMoveConstructorIsDeleted : 1;
     unsigned DefaultedMoveAssignmentIsDeleted : 1;
     unsigned DefaultedDestructorIsDeleted : 1;
@@ -415,6 +417,12 @@ class CXXRecordDecl : public RecordDecl {
     /// constructor.
     unsigned HasDefaultedDefaultConstructor : 1;
 
+    /// \brief True if this class can be passed in a non-address-preserving
+    /// fashion (such as in registers) according to the C++ language rules.
+    /// This does not imply anything about how the ABI in use will actually
+    /// pass an object of this class.
+    unsigned CanPassInRegisters : 1;
+
     /// \brief True if a defaulted default constructor for this class would
     /// be constexpr.
     unsigned DefaultedDefaultConstructorIsConstexpr : 1;
@@ -811,18 +819,50 @@ class CXXRecordDecl : public RecordDecl {
     return data().FirstFriend.isValid();
   }
 
+  /// \brief \c true if a defaulted copy constructor for this class would be
+  /// deleted.
+  bool defaultedCopyConstructorIsDeleted() const {
+    assert((!needsOverloadResolutionForCopyConstructor() ||
+            (data().DeclaredSpecialMembers & SMF_CopyConstructor)) &&
+           "this property has not yet been computed by Sema");
+    return data().DefaultedCopyConstructorIsDeleted;
+  }
+
+  /// \brief \c true if a defaulted move constructor for this class would be
+  /// deleted.
+  bool defaultedMoveConstructorIsDeleted() const {
+    assert((!needsOverloadResolutionForMoveConstructor() ||
+            (data().DeclaredSpecialMembers & SMF_MoveConstructor)) &&
+           "this property has not yet been computed by Sema");
+    return data().DefaultedMoveConstructorIsDeleted;
+  }
+
+  /// \brief \c true if a defaulted destructor for this class would be deleted.
+  bool defaultedDestructorIsDeleted() const {
+    return !data().DefaultedDestructorIsDeleted;
+  }
+
+  /// \brief \c true if we know for sure that this class has a single,
+  /// accessible, unambiguous copy constructor that is not deleted.
+  bool hasSimpleCopyConstructor() const {
+    return !hasUserDeclaredCopyConstructor() &&
+           !data().DefaultedCopyConstructorIsDeleted;
+  }
+
   /// \brief \c true if we know for sure that this class has a single,
   /// accessible, unambiguous move constructor that is not deleted.
   bool hasSimpleMoveConstructor() const {
     return !hasUserDeclaredMoveConstructor() && hasMoveConstructor() &&
            !data().DefaultedMoveConstructorIsDeleted;
   }
+
   /// \brief \c true if we know for sure that this class has a single,
   /// accessible, unambiguous move assignment operator that is not deleted.
   bool hasSimpleMoveAssignment() const {
     return !hasUserDeclaredMoveAssignment() && hasMoveAssignment() &&
            !data().DefaultedMoveAssignmentIsDeleted;
   }
+
   /// \brief \c true if we know for sure that this class has an accessible
   /// destructor that is not deleted.
   bool hasSimpleDestructor() const {
@@ -878,7 +918,16 @@ class CXXRecordDecl : public RecordDecl {
   /// \brief Determine whether we need to eagerly declare a defaulted copy
   /// constructor for this class.
   bool needsOverloadResolutionForCopyConstructor() const {
-    return data().HasMutableFields;
+    // C++17 [class.copy.ctor]p6:
+    //   If the class definition declares a move constructor or move assignment
+    //   operator, the implicitly declared copy constructor is defined as
+    //   deleted.
+    // In MSVC mode, sometimes a declared move assignment does not delete an
+    // implicit copy constructor, so defer this choice to Sema.
+    if (data().UserDeclaredSpecialMembers &
+        (SMF_MoveConstructor | SMF_MoveAssignment))
+      return true;
+    return data().NeedOverloadResolutionForCopyConstructor;
   }
 
   /// \brief Determine whether an implicit copy constructor for this type
@@ -919,7 +968,16 @@ class CXXRecordDecl : public RecordDecl {
            needsImplicitMoveConstructor();
   }
 
-  /// \brief Set that we attempted to declare an implicitly move
+  /// \brief Set that we attempted to declare an implicit copy
+  /// constructor, but overload resolution failed so we deleted it.
+  void setImplicitCopyConstructorIsDeleted() {
+    assert((data().DefaultedCopyConstructorIsDeleted ||
+            needsOverloadResolutionForCopyConstructor()) &&
+           "Copy constructor should not be deleted");
+    data().DefaultedCopyConstructorIsDeleted = true;
+  }
+
+  /// \brief Set that we attempted to declare an implicit move
   /// constructor, but overload resolution failed so we deleted it.
   void setImplicitMoveConstructorIsDeleted() {
     assert((data().DefaultedMoveConstructorIsDeleted ||
@@ -1316,6 +1374,18 @@ class CXXRecordDecl : public RecordDecl {
     return data().HasIrrelevantDestructor;
   }
 
+  /// \brief Determine whether this class has at least one trivial, non-deleted
+  /// copy or move constructor.
+  bool canPassInRegisters() const {
+    return data().CanPassInRegisters;
+  }
+
+  /// \brief Set that we can pass this RecordDecl in registers.
+  // FIXME: This should be set as part of completeDefinition.
+  void setCanPassInRegisters(bool CanPass) {
+    data().CanPassInRegisters = CanPass;
+  }
+
   /// \brief Determine whether this class has a non-literal or/ volatile type
   /// non-static data member or base class.
   bool hasNonLiteralTypeFieldsOrBases() const {
@@ -1887,6 +1957,19 @@ class CXXMethodDecl : public FunctionDecl {
     return (CD->begin_overridden_methods() != CD->end_overridden_methods());
   }
 
+  /// If it's possible to devirtualize a call to this method, return the called
+  /// function. Otherwise, return null.
+
+  /// \param Base The object on which this virtual function is called.
+  /// \param IsAppleKext True if we are compiling for Apple kext.
+  CXXMethodDecl *getDevirtualizedMethod(const Expr *Base, bool IsAppleKext);
+
+  const CXXMethodDecl *getDevirtualizedMethod(const Expr *Base,
+                                              bool IsAppleKext) const {
+    return const_cast(this)->getDevirtualizedMethod(
+        Base, IsAppleKext);
+  }
+
   /// \brief Determine whether this is a usual deallocation function
   /// (C++ [basic.stc.dynamic.deallocation]p2), which is an overloaded
   /// delete or delete[] operator with a particular signature.
@@ -1946,7 +2029,10 @@ class CXXMethodDecl : public FunctionDecl {
 
   /// \brief Returns the type of the \c this pointer.
   ///
-  /// Should only be called for instance (i.e., non-static) methods.
+  /// Should only be called for instance (i.e., non-static) methods. Note
+  /// that for the call operator of a lambda closure type, this returns the
+  /// desugared 'this' type (a pointer to the closure type), not the captured
+  /// 'this' type.
   QualType getThisType(ASTContext &C) const;
 
   unsigned getTypeQualifiers() const {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/DeclObjC.h b/interpreter/llvm/src/tools/clang/include/clang/AST/DeclObjC.h
index 26c0cbe82d176..1cd6e004f751f 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/DeclObjC.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/DeclObjC.h
@@ -1039,10 +1039,9 @@ class ObjCContainerDecl : public NamedDecl, public DeclContext {
   typedef llvm::DenseMap,
                          ObjCPropertyDecl*> PropertyMap;
-  
-  typedef llvm::DenseMap
-            ProtocolPropertyMap;
-  
+
+  typedef llvm::SmallDenseSet ProtocolPropertySet;
+
   typedef llvm::SmallVector PropertyDeclOrder;
   
   /// This routine collects list of properties to be implemented in the class.
@@ -2159,7 +2158,8 @@ class ObjCProtocolDecl : public ObjCContainerDecl,
                                     PropertyDeclOrder &PO) const override;
 
   void collectInheritedProtocolProperties(const ObjCPropertyDecl *Property,
-                                          ProtocolPropertyMap &PM) const;
+                                          ProtocolPropertySet &PS,
+                                          PropertyDeclOrder &PO) const;
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCProtocol; }
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/Expr.h b/interpreter/llvm/src/tools/clang/include/clang/AST/Expr.h
index 986145e62a529..0cdbd2a97ee4f 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/Expr.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/Expr.h
@@ -4284,6 +4284,9 @@ class DesignatedInitExpr final
   }
 
   Designator *getDesignator(unsigned Idx) { return &designators()[Idx]; }
+  const Designator *getDesignator(unsigned Idx) const {
+    return &designators()[Idx];
+  }
 
   void setDesignators(const ASTContext &C, const Designator *Desigs,
                       unsigned NumDesigs);
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/ExternalASTMerger.h b/interpreter/llvm/src/tools/clang/include/clang/AST/ExternalASTMerger.h
index 55459df1fe6ba..51d0c30ad23bf 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/ExternalASTMerger.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/ExternalASTMerger.h
@@ -44,8 +44,6 @@ class ExternalASTMerger : public ExternalASTSource {
   FindExternalLexicalDecls(const DeclContext *DC,
                            llvm::function_ref IsKindWeWant,
                            SmallVectorImpl &Result) override;
-
-   void CompleteType(TagDecl *Tag) override;
 };
 
 } // end namespace clang
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/NSAPI.h b/interpreter/llvm/src/tools/clang/include/clang/AST/NSAPI.h
index 583f9d9f1deb9..3757116e7c70e 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/NSAPI.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/NSAPI.h
@@ -49,7 +49,7 @@ class NSAPI {
     NSStr_initWithString,
     NSStr_initWithUTF8String
   };
-  static const unsigned NumNSStringMethods = 5;
+  static const unsigned NumNSStringMethods = 6;
 
   IdentifierInfo *getNSClassId(NSClassIdKindKind K) const;
 
@@ -112,7 +112,7 @@ class NSAPI {
     NSMutableDict_setObjectForKeyedSubscript,
     NSMutableDict_setValueForKey
   };
-  static const unsigned NumNSDictionaryMethods = 14;
+  static const unsigned NumNSDictionaryMethods = 13;
   
   /// \brief The Objective-C NSDictionary selectors.
   Selector getNSDictionarySelector(NSDictionaryMethodKind MK) const;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/OpenMPClause.h b/interpreter/llvm/src/tools/clang/include/clang/AST/OpenMPClause.h
index f977e63e04f6a..a1cae8e18f84e 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/OpenMPClause.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/OpenMPClause.h
@@ -20,6 +20,7 @@
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/MapVector.h"
 
 namespace clang {
 
@@ -1889,6 +1890,217 @@ class OMPReductionClause final
   }
 };
 
+/// This represents clause 'task_reduction' in the '#pragma omp taskgroup'
+/// directives.
+///
+/// \code
+/// #pragma omp taskgroup task_reduction(+:a,b)
+/// \endcode
+/// In this example directive '#pragma omp taskgroup' has clause
+/// 'task_reduction' with operator '+' and the variables 'a' and 'b'.
+///
+class OMPTaskReductionClause final
+    : public OMPVarListClause,
+      public OMPClauseWithPostUpdate,
+      private llvm::TrailingObjects {
+  friend TrailingObjects;
+  friend OMPVarListClause;
+  friend class OMPClauseReader;
+  /// Location of ':'.
+  SourceLocation ColonLoc;
+  /// Nested name specifier for C++.
+  NestedNameSpecifierLoc QualifierLoc;
+  /// Name of custom operator.
+  DeclarationNameInfo NameInfo;
+
+  /// Build clause with number of variables \a N.
+  ///
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  /// \param ColonLoc Location of ':'.
+  /// \param N Number of the variables in the clause.
+  /// \param QualifierLoc The nested-name qualifier with location information
+  /// \param NameInfo The full name info for reduction identifier.
+  ///
+  OMPTaskReductionClause(SourceLocation StartLoc, SourceLocation LParenLoc,
+                         SourceLocation ColonLoc, SourceLocation EndLoc,
+                         unsigned N, NestedNameSpecifierLoc QualifierLoc,
+                         const DeclarationNameInfo &NameInfo)
+      : OMPVarListClause(OMPC_task_reduction, StartLoc,
+                                                 LParenLoc, EndLoc, N),
+        OMPClauseWithPostUpdate(this), ColonLoc(ColonLoc),
+        QualifierLoc(QualifierLoc), NameInfo(NameInfo) {}
+
+  /// Build an empty clause.
+  ///
+  /// \param N Number of variables.
+  ///
+  explicit OMPTaskReductionClause(unsigned N)
+      : OMPVarListClause(
+            OMPC_task_reduction, SourceLocation(), SourceLocation(),
+            SourceLocation(), N),
+        OMPClauseWithPostUpdate(this), ColonLoc(), QualifierLoc(), NameInfo() {}
+
+  /// Sets location of ':' symbol in clause.
+  void setColonLoc(SourceLocation CL) { ColonLoc = CL; }
+  /// Sets the name info for specified reduction identifier.
+  void setNameInfo(DeclarationNameInfo DNI) { NameInfo = DNI; }
+  /// Sets the nested name specifier.
+  void setQualifierLoc(NestedNameSpecifierLoc NSL) { QualifierLoc = NSL; }
+
+  /// Set list of helper expressions, required for proper codegen of the clause.
+  /// These expressions represent private copy of the reduction variable.
+  void setPrivates(ArrayRef Privates);
+
+  /// Get the list of helper privates.
+  MutableArrayRef getPrivates() {
+    return MutableArrayRef(varlist_end(), varlist_size());
+  }
+  ArrayRef getPrivates() const {
+    return llvm::makeArrayRef(varlist_end(), varlist_size());
+  }
+
+  /// Set list of helper expressions, required for proper codegen of the clause.
+  /// These expressions represent LHS expression in the final reduction
+  /// expression performed by the reduction clause.
+  void setLHSExprs(ArrayRef LHSExprs);
+
+  /// Get the list of helper LHS expressions.
+  MutableArrayRef getLHSExprs() {
+    return MutableArrayRef(getPrivates().end(), varlist_size());
+  }
+  ArrayRef getLHSExprs() const {
+    return llvm::makeArrayRef(getPrivates().end(), varlist_size());
+  }
+
+  /// Set list of helper expressions, required for proper codegen of the clause.
+  /// These expressions represent RHS expression in the final reduction
+  /// expression performed by the reduction clause. Also, variables in these
+  /// expressions are used for proper initialization of reduction copies.
+  void setRHSExprs(ArrayRef RHSExprs);
+
+  ///  Get the list of helper destination expressions.
+  MutableArrayRef getRHSExprs() {
+    return MutableArrayRef(getLHSExprs().end(), varlist_size());
+  }
+  ArrayRef getRHSExprs() const {
+    return llvm::makeArrayRef(getLHSExprs().end(), varlist_size());
+  }
+
+  /// Set list of helper reduction expressions, required for proper
+  /// codegen of the clause. These expressions are binary expressions or
+  /// operator/custom reduction call that calculates new value from source
+  /// helper expressions to destination helper expressions.
+  void setReductionOps(ArrayRef ReductionOps);
+
+  ///  Get the list of helper reduction expressions.
+  MutableArrayRef getReductionOps() {
+    return MutableArrayRef(getRHSExprs().end(), varlist_size());
+  }
+  ArrayRef getReductionOps() const {
+    return llvm::makeArrayRef(getRHSExprs().end(), varlist_size());
+  }
+
+public:
+  /// Creates clause with a list of variables \a VL.
+  ///
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param ColonLoc Location of ':'.
+  /// \param EndLoc Ending location of the clause.
+  /// \param VL The variables in the clause.
+  /// \param QualifierLoc The nested-name qualifier with location information
+  /// \param NameInfo The full name info for reduction identifier.
+  /// \param Privates List of helper expressions for proper generation of
+  /// private copies.
+  /// \param LHSExprs List of helper expressions for proper generation of
+  /// assignment operation required for copyprivate clause. This list represents
+  /// LHSs of the reduction expressions.
+  /// \param RHSExprs List of helper expressions for proper generation of
+  /// assignment operation required for copyprivate clause. This list represents
+  /// RHSs of the reduction expressions.
+  /// Also, variables in these expressions are used for proper initialization of
+  /// reduction copies.
+  /// \param ReductionOps List of helper expressions that represents reduction
+  /// expressions:
+  /// \code
+  /// LHSExprs binop RHSExprs;
+  /// operator binop(LHSExpr, RHSExpr);
+  /// (LHSExpr, RHSExpr);
+  /// \endcode
+  /// Required for proper codegen of final reduction operation performed by the
+  /// reduction clause.
+  /// \param PreInit Statement that must be executed before entering the OpenMP
+  /// region with this clause.
+  /// \param PostUpdate Expression that must be executed after exit from the
+  /// OpenMP region with this clause.
+  ///
+  static OMPTaskReductionClause *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+         SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef VL,
+         NestedNameSpecifierLoc QualifierLoc,
+         const DeclarationNameInfo &NameInfo, ArrayRef Privates,
+         ArrayRef LHSExprs, ArrayRef RHSExprs,
+         ArrayRef ReductionOps, Stmt *PreInit, Expr *PostUpdate);
+
+  /// Creates an empty clause with the place for \a N variables.
+  ///
+  /// \param C AST context.
+  /// \param N The number of variables.
+  ///
+  static OMPTaskReductionClause *CreateEmpty(const ASTContext &C, unsigned N);
+
+  /// Gets location of ':' symbol in clause.
+  SourceLocation getColonLoc() const { return ColonLoc; }
+  /// Gets the name info for specified reduction identifier.
+  const DeclarationNameInfo &getNameInfo() const { return NameInfo; }
+  /// Gets the nested name specifier.
+  NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
+
+  typedef MutableArrayRef::iterator helper_expr_iterator;
+  typedef ArrayRef::iterator helper_expr_const_iterator;
+  typedef llvm::iterator_range helper_expr_range;
+  typedef llvm::iterator_range
+      helper_expr_const_range;
+
+  helper_expr_const_range privates() const {
+    return helper_expr_const_range(getPrivates().begin(), getPrivates().end());
+  }
+  helper_expr_range privates() {
+    return helper_expr_range(getPrivates().begin(), getPrivates().end());
+  }
+  helper_expr_const_range lhs_exprs() const {
+    return helper_expr_const_range(getLHSExprs().begin(), getLHSExprs().end());
+  }
+  helper_expr_range lhs_exprs() {
+    return helper_expr_range(getLHSExprs().begin(), getLHSExprs().end());
+  }
+  helper_expr_const_range rhs_exprs() const {
+    return helper_expr_const_range(getRHSExprs().begin(), getRHSExprs().end());
+  }
+  helper_expr_range rhs_exprs() {
+    return helper_expr_range(getRHSExprs().begin(), getRHSExprs().end());
+  }
+  helper_expr_const_range reduction_ops() const {
+    return helper_expr_const_range(getReductionOps().begin(),
+                                   getReductionOps().end());
+  }
+  helper_expr_range reduction_ops() {
+    return helper_expr_range(getReductionOps().begin(),
+                             getReductionOps().end());
+  }
+
+  child_range children() {
+    return child_range(reinterpret_cast(varlist_begin()),
+                       reinterpret_cast(varlist_end()));
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_task_reduction;
+  }
+};
+
 /// \brief This represents clause 'linear' in the '#pragma omp ...'
 /// directives.
 ///
@@ -3001,7 +3213,7 @@ class OMPMappableExprListClause : public OMPVarListClause,
     // Organize the components by declaration and retrieve the original
     // expression. Original expressions are always the first component of the
     // mappable component list.
-    llvm::DenseMap>
+    llvm::MapVector>
         ComponentListMap;
     {
       auto CI = ComponentLists.begin();
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/RecursiveASTVisitor.h b/interpreter/llvm/src/tools/clang/include/clang/AST/RecursiveASTVisitor.h
index cd2a39449825b..e7f271cc0812d 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -593,6 +593,16 @@ bool RecursiveASTVisitor::PostVisitStmt(Stmt *S) {
 #define STMT(CLASS, PARENT)                                                    \
   case Stmt::CLASS##Class:                                                     \
     TRY_TO(WalkUpFrom##CLASS(static_cast(S))); break;
+#define INITLISTEXPR(CLASS, PARENT)                                            \
+  case Stmt::CLASS##Class:                                                     \
+    {                                                                          \
+      auto ILE = static_cast(S);                                      \
+      if (auto Syn = ILE->isSemanticForm() ? ILE->getSyntacticForm() : ILE)    \
+        TRY_TO(WalkUpFrom##CLASS(Syn));                                        \
+      if (auto Sem = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm())     \
+        TRY_TO(WalkUpFrom##CLASS(Sem));                                        \
+      break;                                                                   \
+    }
 #include "clang/AST/StmtNodes.inc"
   }
 
@@ -1021,8 +1031,12 @@ DEF_TRAVERSE_TYPE(DeducedTemplateSpecializationType, {
 DEF_TRAVERSE_TYPE(RecordType, {})
 DEF_TRAVERSE_TYPE(EnumType, {})
 DEF_TRAVERSE_TYPE(TemplateTypeParmType, {})
-DEF_TRAVERSE_TYPE(SubstTemplateTypeParmType, {})
-DEF_TRAVERSE_TYPE(SubstTemplateTypeParmPackType, {})
+DEF_TRAVERSE_TYPE(SubstTemplateTypeParmType, {
+  TRY_TO(TraverseType(T->getReplacementType()));
+})
+DEF_TRAVERSE_TYPE(SubstTemplateTypeParmPackType, {
+  TRY_TO(TraverseTemplateArgument(T->getArgumentPack()));
+})
 
 DEF_TRAVERSE_TYPE(TemplateSpecializationType, {
   TRY_TO(TraverseTemplateName(T->getTemplateName()));
@@ -1249,8 +1263,12 @@ DEF_TRAVERSE_TYPELOC(DeducedTemplateSpecializationType, {
 DEF_TRAVERSE_TYPELOC(RecordType, {})
 DEF_TRAVERSE_TYPELOC(EnumType, {})
 DEF_TRAVERSE_TYPELOC(TemplateTypeParmType, {})
-DEF_TRAVERSE_TYPELOC(SubstTemplateTypeParmType, {})
-DEF_TRAVERSE_TYPELOC(SubstTemplateTypeParmPackType, {})
+DEF_TRAVERSE_TYPELOC(SubstTemplateTypeParmType, {
+  TRY_TO(TraverseType(TL.getTypePtr()->getReplacementType()));
+})
+DEF_TRAVERSE_TYPELOC(SubstTemplateTypeParmPackType, {
+  TRY_TO(TraverseTemplateArgument(TL.getTypePtr()->getArgumentPack()));
+})
 
 // FIXME: use the loc for the template name?
 DEF_TRAVERSE_TYPELOC(TemplateSpecializationType, {
@@ -1781,6 +1799,7 @@ DEF_TRAVERSE_DECL(CXXRecordDecl, { TRY_TO(TraverseCXXRecordHelper(D)); })
     if (TypeSourceInfo *TSI = D->getTypeAsWritten())                           \
       TRY_TO(TraverseTypeLoc(TSI->getTypeLoc()));                              \
                                                                                \
+    TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));              \
     if (!getDerived().shouldVisitTemplateInstantiations() &&                   \
         D->getTemplateSpecializationKind() != TSK_ExplicitSpecialization)      \
       /* Returning from here skips traversing the                              \
@@ -2212,13 +2231,15 @@ bool RecursiveASTVisitor::TraverseSynOrSemInitListExpr(
 // the syntactic and the semantic form.
 //
 // There is no guarantee about which form \p S takes when this method is called.
-DEF_TRAVERSE_STMT(InitListExpr, {
+template 
+bool RecursiveASTVisitor::TraverseInitListExpr(
+    InitListExpr *S, DataRecursionQueue *Queue) {
   TRY_TO(TraverseSynOrSemInitListExpr(
       S->isSemanticForm() ? S->getSyntacticForm() : S, Queue));
   TRY_TO(TraverseSynOrSemInitListExpr(
       S->isSemanticForm() ? S : S->getSemanticForm(), Queue));
-  ShouldVisitChildren = false;
-})
+  return true;
+}
 
 // GenericSelectionExpr is a special case because the types and expressions
 // are interleaved.  We also need to watch out for null types (default
@@ -2995,6 +3016,28 @@ RecursiveASTVisitor::VisitOMPReductionClause(OMPReductionClause *C) {
   return true;
 }
 
+template 
+bool RecursiveASTVisitor::VisitOMPTaskReductionClause(
+    OMPTaskReductionClause *C) {
+  TRY_TO(TraverseNestedNameSpecifierLoc(C->getQualifierLoc()));
+  TRY_TO(TraverseDeclarationNameInfo(C->getNameInfo()));
+  TRY_TO(VisitOMPClauseList(C));
+  TRY_TO(VisitOMPClauseWithPostUpdate(C));
+  for (auto *E : C->privates()) {
+    TRY_TO(TraverseStmt(E));
+  }
+  for (auto *E : C->lhs_exprs()) {
+    TRY_TO(TraverseStmt(E));
+  }
+  for (auto *E : C->rhs_exprs()) {
+    TRY_TO(TraverseStmt(E));
+  }
+  for (auto *E : C->reduction_ops()) {
+    TRY_TO(TraverseStmt(E));
+  }
+  return true;
+}
+
 template 
 bool RecursiveASTVisitor::VisitOMPFlushClause(OMPFlushClause *C) {
   TRY_TO(VisitOMPClauseList(C));
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/Redeclarable.h b/interpreter/llvm/src/tools/clang/include/clang/AST/Redeclarable.h
index cd5f186a2086c..89a9d3c4cc212 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/Redeclarable.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/Redeclarable.h
@@ -21,6 +21,60 @@
 namespace clang {
 class ASTContext;
 
+// Some notes on redeclarables:
+//
+//  - Every redeclarable is on a circular linked list.
+//
+//  - Every decl has a pointer to the first element of the chain _and_ a
+//    DeclLink that may point to one of 3 possible states:
+//      - the "previous" (temporal) element in the chain
+//      - the "latest" (temporal) element in the chain
+//      - the an "uninitialized-latest" value (when newly-constructed)
+//
+//  - The first element is also often called the canonical element. Every
+//    element has a pointer to it so that "getCanonical" can be fast.
+//
+//  - Most links in the chain point to previous, except the link out of
+//    the first; it points to latest.
+//
+//  - Elements are called "first", "previous", "latest" or
+//    "most-recent" when referring to temporal order: order of addition
+//    to the chain.
+//
+//  - To make matters confusing, the DeclLink type uses the term "next"
+//    for its pointer-storage internally (thus functions like
+//    NextIsPrevious). It's easiest to just ignore the implementation of
+//    DeclLink when making sense of the redeclaration chain.
+//
+//  - There's also a "definition" link for several types of
+//    redeclarable, where only one definition should exist at any given
+//    time (and the defn pointer is stored in the decl's "data" which
+//    is copied to every element on the chain when it's changed).
+//
+//    Here is some ASCII art:
+//
+//      "first"                                     "latest"
+//      "canonical"                                 "most recent"
+//      +------------+         first                +--------------+
+//      |            | <--------------------------- |              |
+//      |            |                              |              |
+//      |            |                              |              |
+//      |            |       +--------------+       |              |
+//      |            | first |              |       |              |
+//      |            | <---- |              |       |              |
+//      |            |       |              |       |              |
+//      | @class A   |  link | @interface A |  link | @class A     |
+//      | seen first | <---- | seen second  | <---- | seen third   |
+//      |            |       |              |       |              |
+//      +------------+       +--------------+       +--------------+
+//      | data       | defn  | data         |  defn | data         |
+//      |            | ----> |              | <---- |              |
+//      +------------+       +--------------+       +--------------+
+//        |                     |     ^                  ^
+//        |                     |defn |                  |
+//        | link                +-----+                  |
+//        +-->-------------------------------------------+
+
 /// \brief Provides common interface for the Decls that can be redeclared.
 template
 class Redeclarable {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/StmtCXX.h b/interpreter/llvm/src/tools/clang/include/clang/AST/StmtCXX.h
index 56bfce987f5bd..77f81838e5eb6 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/StmtCXX.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/StmtCXX.h
@@ -308,13 +308,16 @@ class CoroutineBodyStmt final
     OnFallthrough, ///< Handler for control flow falling off the body.
     Allocate,      ///< Coroutine frame memory allocation.
     Deallocate,    ///< Coroutine frame memory deallocation.
-    ReturnValue,   ///< Return value for thunk function.
+    ReturnValue,   ///< Return value for thunk function: p.get_return_object().
+    ResultDecl,    ///< Declaration holding the result of get_return_object.
+    ReturnStmt,    ///< Return statement for the thunk function.
     ReturnStmtOnAllocFailure, ///< Return statement if allocation failed.
     FirstParamMove ///< First offset for move construction of parameter copies.
   };
   unsigned NumParams;
 
   friend class ASTStmtReader;
+  friend class ASTReader;
   friend TrailingObjects;
 
   Stmt **getStoredStmts() { return getTrailingObjects(); }
@@ -332,7 +335,9 @@ class CoroutineBodyStmt final
     Stmt *OnFallthrough = nullptr;
     Expr *Allocate = nullptr;
     Expr *Deallocate = nullptr;
-    Stmt *ReturnValue = nullptr;
+    Expr *ReturnValue = nullptr;
+    Stmt *ResultDecl = nullptr;
+    Stmt *ReturnStmt = nullptr;
     Stmt *ReturnStmtOnAllocFailure = nullptr;
     ArrayRef ParamMoves;
   };
@@ -343,6 +348,8 @@ class CoroutineBodyStmt final
 
 public:
   static CoroutineBodyStmt *Create(const ASTContext &C, CtorArgs const &Args);
+  static CoroutineBodyStmt *Create(const ASTContext &C, EmptyShell,
+                                   unsigned NumParams);
 
   bool hasDependentPromiseType() const {
     return getPromiseDecl()->getType()->isDependentType();
@@ -381,10 +388,11 @@ class CoroutineBodyStmt final
   Expr *getDeallocate() const {
     return cast_or_null(getStoredStmts()[SubStmt::Deallocate]);
   }
-
   Expr *getReturnValueInit() const {
-    return cast_or_null(getStoredStmts()[SubStmt::ReturnValue]);
+    return cast(getStoredStmts()[SubStmt::ReturnValue]);
   }
+  Stmt *getResultDecl() const { return getStoredStmts()[SubStmt::ResultDecl]; }
+  Stmt *getReturnStmt() const { return getStoredStmts()[SubStmt::ReturnStmt]; }
   Stmt *getReturnStmtOnAllocFailure() const {
     return getStoredStmts()[SubStmt::ReturnStmtOnAllocFailure];
   }
@@ -439,6 +447,8 @@ class CoreturnStmt : public Stmt {
     SubStmts[SubStmt::PromiseCall] = PromiseCall;
   }
 
+  CoreturnStmt(EmptyShell) : CoreturnStmt({}, {}, {}) {}
+
   SourceLocation getKeywordLoc() const { return CoreturnLoc; }
 
   /// \brief Retrieve the operand of the 'co_return' statement. Will be nullptr
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/StmtOpenMP.h b/interpreter/llvm/src/tools/clang/include/clang/AST/StmtOpenMP.h
index 463af06fddaba..09dd87fdc8bc0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/StmtOpenMP.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/StmtOpenMP.h
@@ -1895,7 +1895,7 @@ class OMPTaskwaitDirective : public OMPExecutableDirective {
   }
 };
 
-/// \brief This represents '#pragma omp taskgroup' directive.
+/// This represents '#pragma omp taskgroup' directive.
 ///
 /// \code
 /// #pragma omp taskgroup
@@ -1903,39 +1903,45 @@ class OMPTaskwaitDirective : public OMPExecutableDirective {
 ///
 class OMPTaskgroupDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
-  /// \brief Build directive with the given start and end location.
+  /// Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending location of the directive.
+  /// \param NumClauses Number of clauses.
   ///
-  OMPTaskgroupDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+  OMPTaskgroupDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                        unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTaskgroupDirectiveClass, OMPD_taskgroup,
-                               StartLoc, EndLoc, 0, 1) {}
+                               StartLoc, EndLoc, NumClauses, 1) {}
 
-  /// \brief Build an empty directive.
+  /// Build an empty directive.
+  /// \param NumClauses Number of clauses.
   ///
-  explicit OMPTaskgroupDirective()
+  explicit OMPTaskgroupDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTaskgroupDirectiveClass, OMPD_taskgroup,
-                               SourceLocation(), SourceLocation(), 0, 1) {}
+                               SourceLocation(), SourceLocation(), NumClauses,
+                               1) {}
 
 public:
-  /// \brief Creates directive.
+  /// Creates directive.
   ///
   /// \param C AST context.
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
+  /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   ///
-  static OMPTaskgroupDirective *Create(const ASTContext &C,
-                                       SourceLocation StartLoc,
-                                       SourceLocation EndLoc,
-                                       Stmt *AssociatedStmt);
+  static OMPTaskgroupDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef Clauses, Stmt *AssociatedStmt);
 
-  /// \brief Creates an empty directive.
+  /// Creates an empty directive.
   ///
   /// \param C AST context.
+  /// \param NumClauses Number of clauses.
   ///
-  static OMPTaskgroupDirective *CreateEmpty(const ASTContext &C, EmptyShell);
+  static OMPTaskgroupDirective *CreateEmpty(const ASTContext &C,
+                                            unsigned NumClauses, EmptyShell);
 
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPTaskgroupDirectiveClass;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/Type.h b/interpreter/llvm/src/tools/clang/include/clang/AST/Type.h
index 5cd12e07f27c0..1d394d111dbd1 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/Type.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/Type.h
@@ -333,15 +333,18 @@ class Qualifiers {
 
   bool hasAddressSpace() const { return Mask & AddressSpaceMask; }
   unsigned getAddressSpace() const { return Mask >> AddressSpaceShift; }
+  bool hasTargetSpecificAddressSpace() const {
+    return getAddressSpace() >= LangAS::FirstTargetAddressSpace;
+  }
   /// Get the address space attribute value to be printed by diagnostics.
   unsigned getAddressSpaceAttributePrintValue() const {
     auto Addr = getAddressSpace();
     // This function is not supposed to be used with language specific
     // address spaces. If that happens, the diagnostic message should consider
     // printing the QualType instead of the address space value.
-    assert(Addr == 0 || Addr >= LangAS::Count);
+    assert(Addr == 0 || hasTargetSpecificAddressSpace());
     if (Addr)
-      return Addr - LangAS::Count;
+      return Addr - LangAS::FirstTargetAddressSpace;
     // TODO: The diagnostic messages where Addr may be 0 should be fixed
     // since it cannot differentiate the situation where 0 denotes the default
     // address space or user specified __attribute__((address_space(0))).
@@ -1749,6 +1752,7 @@ class Type : public ExtQualsTypeCommonBase {
   bool isTemplateTypeParmType() const;          // C++ template type parameter
   bool isNullPtrType() const;                   // C++11 std::nullptr_t
   bool isAlignValT() const;                     // C++17 std::align_val_t
+  bool isStdByteType() const;                   // C++17 std::byte
   bool isAtomicType() const;                    // C11 _Atomic()
 
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
@@ -2008,10 +2012,11 @@ class Type : public ExtQualsTypeCommonBase {
   Optional getNullability(const ASTContext &context) const;
 
   /// Determine whether the given type can have a nullability
-  /// specifier applied to it, i.e., if it is any kind of pointer type
-  /// or a dependent type that could instantiate to any kind of
-  /// pointer type.
-  bool canHaveNullability() const;
+  /// specifier applied to it, i.e., if it is any kind of pointer type.
+  ///
+  /// \param ResultIfUnknown The value to return if we don't yet know whether
+  ///        this type can have nullability because it is dependent.
+  bool canHaveNullability(bool ResultIfUnknown = true) const;
 
   /// Retrieve the set of substitutions required when accessing a member
   /// of the Objective-C receiver type that is declared in the given context.
@@ -3874,6 +3879,7 @@ class AttributedType : public Type, public llvm::FoldingSetNode {
     attr_sptr,
     attr_uptr,
     attr_nonnull,
+    attr_ns_returns_retained,
     attr_nullable,
     attr_null_unspecified,
     attr_objc_kindof,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/AST/VTableBuilder.h b/interpreter/llvm/src/tools/clang/include/clang/AST/VTableBuilder.h
index 5cbcf51dd69b2..b0b71e473516d 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/AST/VTableBuilder.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/AST/VTableBuilder.h
@@ -154,6 +154,28 @@ class VTableComponent {
 
   bool isRTTIKind() const { return isRTTIKind(getKind()); }
 
+  GlobalDecl getGlobalDecl() const {
+    assert(isUsedFunctionPointerKind() &&
+           "GlobalDecl can be created only from virtual function");
+
+    auto *DtorDecl = dyn_cast(getFunctionDecl());
+    switch (getKind()) {
+    case CK_FunctionPointer:
+      return GlobalDecl(getFunctionDecl());
+    case CK_CompleteDtorPointer:
+      return GlobalDecl(DtorDecl, CXXDtorType::Dtor_Complete);
+    case CK_DeletingDtorPointer:
+      return GlobalDecl(DtorDecl, CXXDtorType::Dtor_Deleting);
+    case CK_VCallOffset:
+    case CK_VBaseOffset:
+    case CK_OffsetToTop:
+    case CK_RTTI:
+    case CK_UnusedFunctionPointer:
+      llvm_unreachable("Only function pointers kinds");
+    }
+    llvm_unreachable("Should already return");
+  }
+
 private:
   static bool isFunctionPointerKind(Kind ComponentKind) {
     return isUsedFunctionPointerKind(ComponentKind) ||
diff --git a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/ASTMatchers.h b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/ASTMatchers.h
index 0ab8d5fe4fc13..c9b496df33f7c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -3806,14 +3806,30 @@ AST_MATCHER_P(CompoundStmt, statementCountIs, unsigned, N) {
   return Node.size() == N;
 }
 
-/// \brief Matches literals that are equal to the given value.
+/// \brief Matches literals that are equal to the given value of type ValueT.
 ///
-/// Example matches true (matcher = cxxBoolLiteral(equals(true)))
+/// Given
 /// \code
-///   true
+///   f('\0', false, 3.14, 42);
 /// \endcode
+/// characterLiteral(equals(0))
+///   matches '\0'
+/// cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
+///   match false
+/// floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
+///   match 3.14
+/// integerLiteral(equals(42))
+///   matches 42
+///
+/// Note that you cannot directly match a negative numeric literal because the
+/// minus sign is not part of the literal: It is a unary operator whose operand
+/// is the positive numeric literal. Instead, you must use a unaryOperator()
+/// matcher to match the minus sign:
 ///
-/// Usable as: Matcher, Matcher,
+/// unaryOperator(hasOperatorName("-"),
+///               hasUnaryOperand(integerLiteral(equals(13))))
+///
+/// Usable as: Matcher, Matcher,
 ///            Matcher, Matcher
 template 
 internal::PolymorphicMatcherWithParam1
@@ -3823,6 +3839,34 @@ equals(const ValueT &Value) {
     ValueT>(Value);
 }
 
+AST_POLYMORPHIC_MATCHER_P_OVERLOAD(equals,
+                          AST_POLYMORPHIC_SUPPORTED_TYPES(CharacterLiteral,
+                                                          CXXBoolLiteralExpr,
+                                                          IntegerLiteral),
+                          bool, Value, 0) {
+  return internal::ValueEqualsMatcher(Value)
+    .matchesNode(Node);
+}
+
+AST_POLYMORPHIC_MATCHER_P_OVERLOAD(equals,
+                          AST_POLYMORPHIC_SUPPORTED_TYPES(CharacterLiteral,
+                                                          CXXBoolLiteralExpr,
+                                                          IntegerLiteral),
+                          unsigned, Value, 1) {
+  return internal::ValueEqualsMatcher(Value)
+    .matchesNode(Node);
+}
+
+AST_POLYMORPHIC_MATCHER_P_OVERLOAD(equals,
+                          AST_POLYMORPHIC_SUPPORTED_TYPES(CharacterLiteral,
+                                                          CXXBoolLiteralExpr,
+                                                          FloatingLiteral,
+                                                          IntegerLiteral),
+                          double, Value, 2) {
+  return internal::ValueEqualsMatcher(Value)
+    .matchesNode(Node);
+}
+
 /// \brief Matches the operator Name of operator expressions (binary or
 /// unary).
 ///
diff --git a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h
index 2c76ddaa07d9e..908fa0db622d0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h
@@ -76,7 +76,7 @@ class Diagnostics {
     ET_ParserInvalidToken = 106,
     ET_ParserMalformedBindExpr = 107,
     ET_ParserTrailingCode = 108,
-    ET_ParserUnsignedError = 109,
+    ET_ParserNumberError = 109,
     ET_ParserOverloadedType = 110
   };
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Parser.h b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Parser.h
index 76926f09dbcbd..5ec4a9abf4bff 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Parser.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/Parser.h
@@ -19,8 +19,10 @@
 /// \code
 /// Grammar for the expressions supported:
 ///         :=  |  | 
-///            :=  | 
+///            :=  |  |  | 
 ///      := "quoted string"
+///            := true | false
+///             := [0-9]+.[0-9]* | [0-9]+.[0-9]*[eE][-+]?[0-9]+
 ///           := [0-9]+
 ///         := 
 ///  := () |
diff --git a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h
index c5426dd75ef5c..f9efe0f16f437 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h
@@ -35,6 +35,8 @@ class ArgKind {
  public:
   enum Kind {
     AK_Matcher,
+    AK_Boolean,
+    AK_Double,
     AK_Unsigned,
     AK_String
   };
@@ -241,6 +243,8 @@ struct VariantMatcher::TypedMatcherOps final : VariantMatcher::MatcherOps {
 /// copy/assignment.
 ///
 /// Supported types:
+///  - \c bool
+//   - \c double
 ///  - \c unsigned
 ///  - \c llvm::StringRef
 ///  - \c VariantMatcher (\c DynTypedMatcher / \c Matcher)
@@ -253,14 +257,29 @@ class VariantValue {
   VariantValue &operator=(const VariantValue &Other);
 
   /// \brief Specific constructors for each supported type.
+  VariantValue(bool Boolean);
+  VariantValue(double Double);
   VariantValue(unsigned Unsigned);
   VariantValue(StringRef String);
   VariantValue(const VariantMatcher &Matchers);
 
+  /// \brief Constructs an \c unsigned value (disambiguation from bool).
+  VariantValue(int Signed) : VariantValue(static_cast(Signed)) {}
+
   /// \brief Returns true iff this is not an empty value.
   explicit operator bool() const { return hasValue(); }
   bool hasValue() const { return Type != VT_Nothing; }
 
+  /// \brief Boolean value functions.
+  bool isBoolean() const;
+  bool getBoolean() const;
+  void setBoolean(bool Boolean);
+
+  /// \brief Double value functions.
+  bool isDouble() const;
+  double getDouble() const;
+  void setDouble(double Double);
+
   /// \brief Unsigned value functions.
   bool isUnsigned() const;
   unsigned getUnsigned() const;
@@ -303,6 +322,8 @@ class VariantValue {
   /// \brief All supported value types.
   enum ValueType {
     VT_Nothing,
+    VT_Boolean,
+    VT_Double,
     VT_Unsigned,
     VT_String,
     VT_Matcher
@@ -311,6 +332,8 @@ class VariantValue {
   /// \brief All supported value types.
   union AllValues {
     unsigned Unsigned;
+    double Double;
+    bool Boolean;
     std::string *String;
     VariantMatcher *Matcher;
   };
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Analysis/Analyses/Dominators.h b/interpreter/llvm/src/tools/clang/include/clang/Analysis/Analyses/Dominators.h
index 1229f8a8efacc..38010e1ee1d8e 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Analysis/Analyses/Dominators.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Analysis/Analyses/Dominators.h
@@ -38,15 +38,15 @@ typedef llvm::DomTreeNodeBase DomTreeNode;
 class DominatorTree : public ManagedAnalysis {
   virtual void anchor();
 public:
-  llvm::DominatorTreeBase* DT;
+  llvm::DomTreeBase* DT;
 
   DominatorTree() {
-    DT = new llvm::DominatorTreeBase(false);
+    DT = new llvm::DomTreeBase();
   }
 
   ~DominatorTree() override { delete DT; }
 
-  llvm::DominatorTreeBase& getBase() { return *DT; }
+  llvm::DomTreeBase& getBase() { return *DT; }
 
   /// \brief This method returns the root CFGBlock of the dominators tree.
   ///
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Analysis/AnalysisContext.h b/interpreter/llvm/src/tools/clang/include/clang/Analysis/AnalysisContext.h
index f6a47d646d1dd..ec7549d4535cc 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Analysis/AnalysisContext.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Analysis/AnalysisContext.h
@@ -426,6 +426,7 @@ class AnalysisDeclContextManager {
                              bool addImplicitDtors = false,
                              bool addInitializers = false,
                              bool addTemporaryDtors = false,
+                             bool addLifetime = false,
                              bool synthesizeBodies = false,
                              bool addStaticInitBranches = false,
                              bool addCXXNewAllocator = true,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Analysis/CFG.h b/interpreter/llvm/src/tools/clang/include/clang/Analysis/CFG.h
index d23ed77ded13d..97639bbfade20 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Analysis/CFG.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Analysis/CFG.h
@@ -58,6 +58,7 @@ class CFGElement {
     Statement,
     Initializer,
     NewAllocator,
+    LifetimeEnds,
     // dtor kind
     AutomaticObjectDtor,
     DeleteDtor,
@@ -167,6 +168,28 @@ class CFGNewAllocator : public CFGElement {
   }
 };
 
+/// Represents the point where the lifetime of an automatic object ends
+class CFGLifetimeEnds : public CFGElement {
+public:
+  explicit CFGLifetimeEnds(const VarDecl *var, const Stmt *stmt)
+      : CFGElement(LifetimeEnds, var, stmt) {}
+
+  const VarDecl *getVarDecl() const {
+    return static_cast(Data1.getPointer());
+  }
+
+  const Stmt *getTriggerStmt() const {
+    return static_cast(Data2.getPointer());
+  }
+
+private:
+  friend class CFGElement;
+  CFGLifetimeEnds() {}
+  static bool isKind(const CFGElement &elem) {
+    return elem.getKind() == LifetimeEnds;
+  }
+};
+
 /// CFGImplicitDtor - Represents C++ object destructor implicitly generated
 /// by compiler on various occasions.
 class CFGImplicitDtor : public CFGElement {
@@ -701,6 +724,10 @@ class CFGBlock {
     Elements.push_back(CFGAutomaticObjDtor(VD, S), C);
   }
 
+  void appendLifetimeEnds(VarDecl *VD, Stmt *S, BumpVectorContext &C) {
+    Elements.push_back(CFGLifetimeEnds(VD, S), C);
+  }
+
   void appendDeleteDtor(CXXRecordDecl *RD, CXXDeleteExpr *DE, BumpVectorContext &C) {
     Elements.push_back(CFGDeleteDtor(RD, DE), C);
   }
@@ -717,6 +744,19 @@ class CFGBlock {
     *I = CFGAutomaticObjDtor(VD, S);
     return ++I;
   }
+
+  // Scope leaving must be performed in reversed order. So insertion is in two
+  // steps. First we prepare space for some number of elements, then we insert
+  // the elements beginning at the last position in prepared space.
+  iterator beginLifetimeEndsInsert(iterator I, size_t Cnt,
+                                   BumpVectorContext &C) {
+    return iterator(
+        Elements.insert(I.base(), Cnt, CFGLifetimeEnds(nullptr, nullptr), C));
+  }
+  iterator insertLifetimeEnds(iterator I, VarDecl *VD, Stmt *S) {
+    *I = CFGLifetimeEnds(VD, S);
+    return ++I;
+  }
 };
 
 /// \brief CFGCallback defines methods that should be called when a logical
@@ -753,6 +793,7 @@ class CFG {
     bool AddEHEdges;
     bool AddInitializers;
     bool AddImplicitDtors;
+    bool AddLifetime;
     bool AddTemporaryDtors;
     bool AddStaticInitBranches;
     bool AddCXXNewAllocator;
@@ -774,8 +815,10 @@ class CFG {
 
     BuildOptions()
       : forcedBlkExprs(nullptr), Observer(nullptr),
-        PruneTriviallyFalseEdges(true), AddEHEdges(false),
+        PruneTriviallyFalseEdges(true),
+        AddEHEdges(false),
         AddInitializers(false), AddImplicitDtors(false),
+        AddLifetime(false),
         AddTemporaryDtors(false), AddStaticInitBranches(false),
         AddCXXNewAllocator(false), AddCXXDefaultInitExprInCtors(false) {}
   };
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Analysis/CloneDetection.h b/interpreter/llvm/src/tools/clang/include/clang/Analysis/CloneDetection.h
index 3b81735584081..6339deef41bde 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Analysis/CloneDetection.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Analysis/CloneDetection.h
@@ -15,8 +15,12 @@
 #ifndef LLVM_CLANG_AST_CLONEDETECTION_H
 #define LLVM_CLANG_AST_CLONEDETECTION_H
 
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
 #include 
 
 namespace clang {
@@ -27,6 +31,192 @@ class VarDecl;
 class ASTContext;
 class CompoundStmt;
 
+namespace clone_detection {
+
+/// Returns a string that represents all macro expansions that expanded into the
+/// given SourceLocation.
+///
+/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
+/// A and B are expanded from the same macros in the same order.
+std::string getMacroStack(SourceLocation Loc, ASTContext &Context);
+
+/// Collects the data of a single Stmt.
+///
+/// This class defines what a code clone is: If it collects for two statements
+/// the same data, then those two statements are considered to be clones of each
+/// other.
+///
+/// All collected data is forwarded to the given data consumer of the type T.
+/// The data consumer class needs to provide a member method with the signature:
+///   update(StringRef Str)
+template 
+class StmtDataCollector : public ConstStmtVisitor> {
+
+  ASTContext &Context;
+  /// The data sink to which all data is forwarded.
+  T &DataConsumer;
+
+public:
+  /// Collects data of the given Stmt.
+  /// \param S The given statement.
+  /// \param Context The ASTContext of S.
+  /// \param DataConsumer The data sink to which all data is forwarded.
+  StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer)
+      : Context(Context), DataConsumer(DataConsumer) {
+    this->Visit(S);
+  }
+
+  typedef unsigned DataPiece;
+
+  // Below are utility methods for appending different data to the vector.
+
+  void addData(DataPiece Integer) {
+    DataConsumer.update(
+        StringRef(reinterpret_cast(&Integer), sizeof(Integer)));
+  }
+
+  void addData(llvm::StringRef Str) { DataConsumer.update(Str); }
+
+  void addData(const QualType &QT) { addData(QT.getAsString()); }
+
+// The functions below collect the class specific data of each Stmt subclass.
+
+// Utility macro for defining a visit method for a given class. This method
+// calls back to the ConstStmtVisitor to visit all parent classes.
+#define DEF_ADD_DATA(CLASS, CODE)                                              \
+  void Visit##CLASS(const CLASS *S) {                                          \
+    CODE;                                                                      \
+    ConstStmtVisitor::Visit##CLASS(S);                      \
+  }
+
+  DEF_ADD_DATA(Stmt, {
+    addData(S->getStmtClass());
+    // This ensures that macro generated code isn't identical to macro-generated
+    // code.
+    addData(getMacroStack(S->getLocStart(), Context));
+    addData(getMacroStack(S->getLocEnd(), Context));
+  })
+  DEF_ADD_DATA(Expr, { addData(S->getType()); })
+
+  //--- Builtin functionality ----------------------------------------------//
+  DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
+  DEF_ADD_DATA(TypeTraitExpr, {
+    addData(S->getTrait());
+    for (unsigned i = 0; i < S->getNumArgs(); ++i)
+      addData(S->getArg(i)->getType());
+  })
+
+  //--- Calls --------------------------------------------------------------//
+  DEF_ADD_DATA(CallExpr, {
+    // Function pointers don't have a callee and we just skip hashing it.
+    if (const FunctionDecl *D = S->getDirectCallee()) {
+      // If the function is a template specialization, we also need to handle
+      // the template arguments as they are not included in the qualified name.
+      if (auto Args = D->getTemplateSpecializationArgs()) {
+        std::string ArgString;
+
+        // Print all template arguments into ArgString
+        llvm::raw_string_ostream OS(ArgString);
+        for (unsigned i = 0; i < Args->size(); ++i) {
+          Args->get(i).print(Context.getLangOpts(), OS);
+          // Add a padding character so that 'foo()' != 'foo()'.
+          OS << '\n';
+        }
+        OS.flush();
+
+        addData(ArgString);
+      }
+      addData(D->getQualifiedNameAsString());
+    }
+  })
+
+  //--- Exceptions ---------------------------------------------------------//
+  DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
+
+  //--- C++ OOP Stmts ------------------------------------------------------//
+  DEF_ADD_DATA(CXXDeleteExpr, {
+    addData(S->isArrayFormAsWritten());
+    addData(S->isGlobalDelete());
+  })
+
+  //--- Casts --------------------------------------------------------------//
+  DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
+
+  //--- Miscellaneous Exprs ------------------------------------------------//
+  DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
+  DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
+
+  //--- Control flow -------------------------------------------------------//
+  DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
+  DEF_ADD_DATA(IndirectGotoStmt, {
+    if (S->getConstantTarget())
+      addData(S->getConstantTarget()->getName());
+  })
+  DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
+  DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
+  DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
+
+  //--- Objective-C --------------------------------------------------------//
+  DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
+  DEF_ADD_DATA(ObjCPropertyRefExpr, {
+    addData(S->isSuperReceiver());
+    addData(S->isImplicitProperty());
+  })
+  DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
+
+  //--- Miscellaneous Stmts ------------------------------------------------//
+  DEF_ADD_DATA(CXXFoldExpr, {
+    addData(S->isRightFold());
+    addData(S->getOperator());
+  })
+  DEF_ADD_DATA(GenericSelectionExpr, {
+    for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
+      addData(S->getAssocType(i));
+    }
+  })
+  DEF_ADD_DATA(LambdaExpr, {
+    for (const LambdaCapture &C : S->captures()) {
+      addData(C.isPackExpansion());
+      addData(C.getCaptureKind());
+      if (C.capturesVariable())
+        addData(C.getCapturedVar()->getType());
+    }
+    addData(S->isGenericLambda());
+    addData(S->isMutable());
+  })
+  DEF_ADD_DATA(DeclStmt, {
+    auto numDecls = std::distance(S->decl_begin(), S->decl_end());
+    addData(static_cast(numDecls));
+    for (const Decl *D : S->decls()) {
+      if (const VarDecl *VD = dyn_cast(D)) {
+        addData(VD->getType());
+      }
+    }
+  })
+  DEF_ADD_DATA(AsmStmt, {
+    addData(S->isSimple());
+    addData(S->isVolatile());
+    addData(S->generateAsmString(Context));
+    for (unsigned i = 0; i < S->getNumInputs(); ++i) {
+      addData(S->getInputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
+      addData(S->getOutputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
+      addData(S->getClobber(i));
+    }
+  })
+  DEF_ADD_DATA(AttributedStmt, {
+    for (const Attr *A : S->getAttrs()) {
+      addData(std::string(A->getSpelling()));
+    }
+  })
+};
+} // namespace clone_detection
+
 /// Identifies a list of statements.
 ///
 /// Can either identify a single arbitrary Stmt object, a continuous sequence of
@@ -319,6 +509,26 @@ struct OnlyLargestCloneConstraint {
   void constrain(std::vector &Result);
 };
 
+struct FilenamePatternConstraint {
+  StringRef IgnoredFilesPattern;
+  std::shared_ptr IgnoredFilesRegex;
+
+  FilenamePatternConstraint(StringRef IgnoredFilesPattern) 
+      : IgnoredFilesPattern(IgnoredFilesPattern) {
+    IgnoredFilesRegex = std::make_shared("^(" +
+        IgnoredFilesPattern.str() + "$)");
+  }
+
+  bool isAutoGenerated(const CloneDetector::CloneGroup &Group);
+
+  void constrain(std::vector &CloneGroups) {
+    CloneConstraint::filterGroups(
+        CloneGroups, [this](const CloneDetector::CloneGroup &Group) {
+          return isAutoGenerated(Group);
+        });
+  }
+};
+
 /// Analyzes the pattern of the referenced variables in a statement.
 class VariablePattern {
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/AddressSpaces.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/AddressSpaces.h
index 0ec5aafd64b69..95b9b9c7d0b31 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/AddressSpaces.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/AddressSpaces.h
@@ -45,13 +45,12 @@ enum ID {
   // This denotes the count of language-specific address spaces and also
   // the offset added to the target-specific address spaces, which are usually
   // specified by address space attributes __attribute__(address_space(n))).
-  Count
+  FirstTargetAddressSpace
 };
 
 /// The type of a lookup table which maps from language-specific address spaces
 /// to target-specific ones.
-typedef unsigned Map[Count];
-
+typedef unsigned Map[FirstTargetAddressSpace];
 }
 
 }
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/AllDiagnostics.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/AllDiagnostics.h
index 18a2b8a318713..fc861a1952a51 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/AllDiagnostics.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/AllDiagnostics.h
@@ -28,7 +28,7 @@
 namespace clang {
 template 
 class StringSizerHelper {
-  char FIELD_TOO_SMALL[SizeOfStr <= FieldType(~0U) ? 1 : -1];
+  static_assert(SizeOfStr <= FieldType(~0U), "Field too small!");
 public:
   enum { Size = SizeOfStr };
 };
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Attr.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/Attr.td
index 4eb958e3f4d56..f13e13b0107b0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Attr.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Attr.td
@@ -149,6 +149,9 @@ class ExprArgument : Argument;
 class FunctionArgument : Argument;
+class NamedArgument : Argument;
 class TypeArgument : Argument;
 class UnsignedArgument : Argument;
 class VariadicUnsignedArgument : Argument;
@@ -1179,6 +1182,12 @@ def MipsInterrupt : InheritableAttr, TargetSpecificAttr {
   let Documentation = [MipsInterruptDocs];
 }
 
+def MicroMips : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [GCC<"micromips">];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+  let Documentation = [MicroMipsDocs];
+}
+
 def Mode : Attr {
   let Spellings = [GCC<"mode">];
   let Subjects = SubjectList<[Var, Enum, TypedefName, Field], ErrorDiag,
@@ -1261,6 +1270,12 @@ def NoMips16 : InheritableAttr, TargetSpecificAttr {
   let Documentation = [Undocumented];
 }
 
+def NoMicroMips : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [GCC<"nomicromips">];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+  let Documentation = [MicroMipsDocs];
+}
+
 // This is not a TargetSpecificAttr so that is silently accepted and
 // ignored on other targets as encouraged by the OpenCL spec.
 //
@@ -1668,6 +1683,42 @@ def Section : InheritableAttr {
   let Documentation = [SectionDocs];
 }
 
+def PragmaClangBSSSection : InheritableAttr {
+  // This attribute has no spellings as it is only ever created implicitly.
+  let Spellings = [];
+  let Args = [StringArgument<"Name">];
+  let Subjects = SubjectList<[GlobalVar], ErrorDiag,
+                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Documentation = [Undocumented];
+}
+
+def PragmaClangDataSection : InheritableAttr {
+  // This attribute has no spellings as it is only ever created implicitly.
+  let Spellings = [];
+  let Args = [StringArgument<"Name">];
+  let Subjects = SubjectList<[GlobalVar], ErrorDiag,
+                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Documentation = [Undocumented];
+}
+
+def PragmaClangRodataSection : InheritableAttr {
+  // This attribute has no spellings as it is only ever created implicitly.
+  let Spellings = [];
+  let Args = [StringArgument<"Name">];
+  let Subjects = SubjectList<[GlobalVar], ErrorDiag,
+                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Documentation = [Undocumented];
+}
+
+def PragmaClangTextSection : InheritableAttr {
+  // This attribute has no spellings as it is only ever created implicitly.
+  let Spellings = [];
+  let Args = [StringArgument<"Name">];
+  let Subjects = SubjectList<[Function], ErrorDiag,
+                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Documentation = [Undocumented];
+}
+
 def Sentinel : InheritableAttr {
   let Spellings = [GCC<"sentinel">];
   let Args = [DefaultIntArgument<"Sentinel", 0>,
@@ -1751,11 +1802,18 @@ def Target : InheritableAttr {
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let Documentation = [TargetDocs];
   let AdditionalMembers = [{
-    typedef std::pair, StringRef> ParsedTargetAttr;
+    struct ParsedTargetAttr {
+      std::vector Features;
+      StringRef Architecture;
+      bool DuplicateArchitecture = false;
+    };
     ParsedTargetAttr parse() const {
+      return parse(getFeaturesStr());
+    }
+    static ParsedTargetAttr parse(StringRef Features) {
       ParsedTargetAttr Ret;
       SmallVector AttrFeatures;
-      getFeaturesStr().split(AttrFeatures, ",");
+      Features.split(AttrFeatures, ",");
 
       // Grab the various features and prepend a "+" to turn on the feature to
       // the backend and add them to our existing set of features.
@@ -1772,12 +1830,15 @@ def Target : InheritableAttr {
 	  continue;
 
         // While we're here iterating check for a different target cpu.
-        if (Feature.startswith("arch="))
-          Ret.second = Feature.split("=").second.trim();
-        else if (Feature.startswith("no-"))
-          Ret.first.push_back("-" + Feature.split("-").second.str());
+        if (Feature.startswith("arch=")) {
+          if (!Ret.Architecture.empty())
+            Ret.DuplicateArchitecture = true;
+          else
+            Ret.Architecture = Feature.split("=").second.trim();
+        } else if (Feature.startswith("no-"))
+          Ret.Features.push_back("-" + Feature.split("-").second.str());
         else
-          Ret.first.push_back("+" + Feature.str());
+          Ret.Features.push_back("+" + Feature.str());
       }
       return Ret;
     }
@@ -1807,14 +1868,14 @@ def Unavailable : InheritableAttr {
 
 def DiagnoseIf : InheritableAttr {
   let Spellings = [GNU<"diagnose_if">];
-  let Subjects = SubjectList<[Function]>;
+  let Subjects = SubjectList<[Function, ObjCMethod, ObjCProperty]>;
   let Args = [ExprArgument<"Cond">, StringArgument<"Message">,
               EnumArgument<"DiagnosticType",
                            "DiagnosticType",
                            ["error", "warning"],
                            ["DT_Error", "DT_Warning"]>,
               BoolArgument<"ArgDependent", 0, /*fake*/ 1>,
-              FunctionArgument<"Parent", 0, /*fake*/ 1>];
+              NamedArgument<"Parent", 0, /*fake*/ 1>];
   let DuplicatesAllowedWhileMerging = 1;
   let LateParsed = 1;
   let AdditionalMembers = [{
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/AttrDocs.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/AttrDocs.td
index 9e2fdf4834aa3..33ef3ea4cade7 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/AttrDocs.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/AttrDocs.td
@@ -605,20 +605,27 @@ semantics:
   for ``T`` and ``U`` to be incompatible.
 
 The declaration of ``overloadable`` functions is restricted to function
-declarations and definitions.  Most importantly, if any function with a given
-name is given the ``overloadable`` attribute, then all function declarations
-and definitions with that name (and in that scope) must have the
-``overloadable`` attribute.  This rule even applies to redeclarations of
-functions whose original declaration had the ``overloadable`` attribute, e.g.,
+declarations and definitions.  If a function is marked with the ``overloadable``
+attribute, then all declarations and definitions of functions with that name,
+except for at most one (see the note below about unmarked overloads), must have
+the ``overloadable`` attribute.  In addition, redeclarations of a function with
+the ``overloadable`` attribute must have the ``overloadable`` attribute, and
+redeclarations of a function without the ``overloadable`` attribute must *not*
+have the ``overloadable`` attribute. e.g.,
 
 .. code-block:: c
 
   int f(int) __attribute__((overloadable));
   float f(float); // error: declaration of "f" must have the "overloadable" attribute
+  int f(int); // error: redeclaration of "f" must have the "overloadable" attribute
 
   int g(int) __attribute__((overloadable));
   int g(int) { } // error: redeclaration of "g" must also have the "overloadable" attribute
 
+  int h(int);
+  int h(int) __attribute__((overloadable)); // error: declaration of "h" must not
+                                            // have the "overloadable" attribute
+
 Functions marked ``overloadable`` must have prototypes.  Therefore, the
 following code is ill-formed:
 
@@ -651,7 +658,28 @@ caveats to this use of name mangling:
   linkage specification, it's name *will* be mangled in the same way as it
   would in C.
 
-Query for this feature with ``__has_extension(attribute_overloadable)``.
+For the purpose of backwards compatibility, at most one function with the same
+name as other ``overloadable`` functions may omit the ``overloadable``
+attribute. In this case, the function without the ``overloadable`` attribute
+will not have its name mangled.
+
+For example:
+
+.. code-block:: c
+
+  // Notes with mangled names assume Itanium mangling.
+  int f(int);
+  int f(double) __attribute__((overloadable));
+  void foo() {
+    f(5); // Emits a call to f (not _Z1fi, as it would with an overload that
+          // was marked with overloadable).
+    f(1.0); // Emits a call to _Z1fd.
+  }
+
+Support for unmarked overloads is not present in some versions of clang. You may
+query for it using ``__has_extension(overloadable_unmarked)``.
+
+Query for this attribute with ``__has_attribute(overloadable)``.
   }];
 }
 
@@ -882,13 +910,13 @@ the function declaration for a hypothetical function ``f``:
 
   void f(void) __attribute__((availability(macos,introduced=10.4,deprecated=10.6,obsoleted=10.7)));
 
-The availability attribute states that ``f`` was introduced in Mac OS X 10.4,
-deprecated in Mac OS X 10.6, and obsoleted in Mac OS X 10.7.  This information
+The availability attribute states that ``f`` was introduced in macOS 10.4,
+deprecated in macOS 10.6, and obsoleted in macOS 10.7.  This information
 is used by Clang to determine when it is safe to use ``f``: for example, if
-Clang is instructed to compile code for Mac OS X 10.5, a call to ``f()``
-succeeds.  If Clang is instructed to compile code for Mac OS X 10.6, the call
+Clang is instructed to compile code for macOS 10.5, a call to ``f()``
+succeeds.  If Clang is instructed to compile code for macOS 10.6, the call
 succeeds but Clang emits a warning specifying that the function is deprecated.
-Finally, if Clang is instructed to compile code for Mac OS X 10.7, the call
+Finally, if Clang is instructed to compile code for macOS 10.7, the call
 fails because ``f()`` is no longer available.
 
 The availability attribute is a comma-separated list starting with the
@@ -933,7 +961,7 @@ are:
   command-line arguments.
 
 ``macos``
-  Apple's Mac OS X operating system.  The minimum deployment target is
+  Apple's macOS operating system.  The minimum deployment target is
   specified by the ``-mmacosx-version-min=*version*`` command-line argument.
   ``macosx`` is supported for backward-compatibility reasons, but it is
   deprecated.
@@ -987,6 +1015,19 @@ When one method overrides another, the overriding method can be more widely avai
   - (id)method __attribute__((availability(macos,introduced=10.3))); // okay: method moved into base class later
   - (id)method __attribute__((availability(macos,introduced=10.5))); // error: this method was available via the base class in 10.4
   @end
+
+Starting with the macOS 10.12 SDK, the ``API_AVAILABLE`` macro from
+```` can simplify the spelling:
+
+.. code-block:: objc
+
+  @interface A
+  - (id)method API_AVAILABLE(macos(10.11)));
+  - (id)otherMethod API_AVAILABLE(macos(10.11), ios(11.0));
+  @end
+
+Also see the documentation for `@available
+`_
   }];
 }
 
@@ -1269,6 +1310,19 @@ The semantics are as follows:
   }];
 }
 
+def MicroMipsDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the GNU style ``__attribute__((micromips))`` and
+``__attribute__((nomicromips))`` attributes on MIPS targets. These attributes
+may be attached to a function definition and instructs the backend to generate
+or not to generate microMIPS code for that function.
+
+These attributes override the `-mmicromips` and `-mno-micromips` options
+on the command line.
+  }];
+}
+
 def AVRInterruptDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Builtins.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/Builtins.def
index a9ec172422ab0..1ddb9beaf9133 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Builtins.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Builtins.def
@@ -52,6 +52,7 @@
 //  LL  -> long long
 //  LLL -> __int128_t (e.g. LLLi)
 //  W   -> int64_t
+//  N   -> 'int' size if target is LP64, 'L' otherwise.
 //  S   -> signed
 //  U   -> unsigned
 //  I   -> Required to constant fold to an integer constant expression.
@@ -718,11 +719,11 @@ BUILTIN(__builtin_rindex, "c*cC*i", "Fn")
 LANGBUILTIN(_alloca,          "v*z", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__assume,         "vb",  "n", ALL_MS_LANGUAGES)
 LIBBUILTIN(_byteswap_ushort, "UsUs",     "fnc", "stdlib.h", ALL_MS_LANGUAGES)
-LIBBUILTIN(_byteswap_ulong,  "ULiULi",   "fnc", "stdlib.h", ALL_MS_LANGUAGES)
+LIBBUILTIN(_byteswap_ulong,  "UNiUNi",   "fnc", "stdlib.h", ALL_MS_LANGUAGES)
 LIBBUILTIN(_byteswap_uint64, "ULLiULLi", "fnc", "stdlib.h", ALL_MS_LANGUAGES)
 LANGBUILTIN(__debugbreak,     "v",   "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(__exception_code, "ULi", "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_exception_code,  "ULi", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__exception_code, "UNi", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_exception_code,  "UNi", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__exception_info, "v*",  "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_exception_info,  "v*",  "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__abnormal_termination, "i", "n", ALL_MS_LANGUAGES)
@@ -730,33 +731,33 @@ LANGBUILTIN(_abnormal_termination,  "i", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__GetExceptionInfo, "v*.", "ntu", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedAnd8,   "ccD*c",        "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedAnd16,  "ssD*s",        "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedAnd,    "LiLiD*Li",     "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedAnd,    "NiNiD*Ni",     "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedCompareExchange8,   "ccD*cc",         "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedCompareExchange16,  "ssD*ss",         "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedCompareExchange,    "LiLiD*LiLi",     "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedCompareExchange,    "NiNiD*NiNi",     "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedCompareExchange64,  "LLiLLiD*LLiLLi", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedCompareExchangePointer, "v*v*D*v*v*", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedDecrement16,        "ssD*",     "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedDecrement,          "LiLiD*",   "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedExchange,           "LiLiD*Li",     "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedDecrement,          "NiNiD*",   "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedExchange,           "NiNiD*Ni",     "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedExchange8,          "ccD*c",        "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedExchange16,         "ssD*s",        "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedExchangeAdd8,       "ccD*c",          "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedExchangeAdd16,      "ssD*s",          "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedExchangeAdd,        "LiLiD*Li",       "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedExchangeAdd,        "NiNiD*Ni",       "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedExchangePointer,    "v*v*D*v*",   "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedExchangeSub8,   "ccD*c",        "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedExchangeSub16,  "ssD*s",        "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedExchangeSub,    "LiLiD*Li",     "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedExchangeSub,    "NiNiD*Ni",     "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedIncrement16,        "ssD*",     "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedIncrement,          "LiLiD*",   "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedIncrement,          "NiNiD*",   "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedOr8,  "ccD*c",        "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedOr16, "ssD*s",        "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedOr,   "LiLiD*Li",     "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedOr,   "NiNiD*Ni",     "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedXor8,  "ccD*c",       "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedXor16, "ssD*s",       "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_InterlockedXor,   "LiLiD*Li",    "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_interlockedbittestandset, "UcLiD*Li", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_InterlockedXor,   "NiNiD*Ni",    "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_interlockedbittestandset, "UcNiD*Ni", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__noop,           "i.",  "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__popcnt16, "UsUs",     "nc", ALL_MS_LANGUAGES)
 LANGBUILTIN(__popcnt,   "UiUi",     "nc", ALL_MS_LANGUAGES)
@@ -765,12 +766,12 @@ LANGBUILTIN(_ReturnAddress, "v*", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotl8,  "UcUcUc",    "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotl16, "UsUsUc",    "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotl,   "UiUii",     "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_lrotl,  "ULiULii",   "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_lrotl,  "UNiUNii",   "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotl64, "ULLiULLii", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotr8,  "UcUcUc",    "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotr16, "UsUsUc",    "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotr,   "UiUii",     "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_lrotr,  "ULiULii",   "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_lrotr,  "UNiUNii",   "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_rotr64, "ULLiULLii", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__va_start,       "vc**.", "nt", ALL_MS_LANGUAGES)
 LANGBUILTIN(__fastfail, "vUi",    "nr", ALL_MS_LANGUAGES)
@@ -1412,6 +1413,11 @@ BUILTIN(__builtin_os_log_format, "v*v*cC*.", "p:0:nt")
 // Builtins for XRay
 BUILTIN(__xray_customevent, "vcC*z", "")
 
+// Win64-compatible va_list functions
+BUILTIN(__builtin_ms_va_start, "vc*&.", "nt")
+BUILTIN(__builtin_ms_va_end, "vc*&", "n")
+BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
+
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a8ab657c379e4..6542acafe48a5 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -36,6 +36,7 @@ BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc")
 // Instruction builtins.
 //===----------------------------------------------------------------------===//
 BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
+BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
 BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")
 BUILTIN(__builtin_amdgcn_s_sendmsghalt, "vIiUi", "n")
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsARM.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsARM.def
index e8db347d4be52..4e277f8a5a6b5 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsARM.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsARM.def
@@ -215,10 +215,10 @@ LANGBUILTIN(_MoveFromCoprocessor2, "UiIUiIUiIUiIUiIUi", "", ALL_MS_LANGUAGES)
 LANGBUILTIN(_MoveToCoprocessor, "vUiIUiIUiIUiIUiIUi", "", ALL_MS_LANGUAGES)
 LANGBUILTIN(_MoveToCoprocessor2, "vUiIUiIUiIUiIUiIUi", "", ALL_MS_LANGUAGES)
 
-TARGET_HEADER_BUILTIN(_BitScanForward, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_BitScanReverse, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_BitScanForward64, "UcULi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_BitScanReverse64, "UcULi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanReverse64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
 TARGET_HEADER_BUILTIN(_InterlockedAnd64,         "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedDecrement64,   "LLiLLiD*",    "nh", "intrin.h", ALL_MS_LANGUAGES, "")
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsHexagon.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsHexagon.def
index 85936cbfc08ea..14fc4adc25bc0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsHexagon.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsHexagon.def
@@ -882,6 +882,12 @@ BUILTIN(__builtin_HEXAGON_S2_ct0p,"iLLi","")
 BUILTIN(__builtin_HEXAGON_S2_ct1p,"iLLi","")
 BUILTIN(__builtin_HEXAGON_S2_interleave,"LLiLLi","")
 BUILTIN(__builtin_HEXAGON_S2_deinterleave,"LLiLLi","")
+BUILTIN(__builtin_HEXAGON_Y2_dccleana,"vv*","")
+BUILTIN(__builtin_HEXAGON_Y2_dccleaninva,"vv*","")
+BUILTIN(__builtin_HEXAGON_Y2_dcinva,"vv*","")
+BUILTIN(__builtin_HEXAGON_Y2_dczeroa,"vv*","")
+BUILTIN(__builtin_HEXAGON_Y4_l2fetch,"vv*Ui","")
+BUILTIN(__builtin_HEXAGON_Y5_l2fetch,"vv*LLUi","")
 
 BUILTIN(__builtin_HEXAGON_S6_rol_i_r,"iii","v:60:")
 BUILTIN(__builtin_HEXAGON_S6_rol_i_p,"LLiLLii","v:60:")
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsNios2.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsNios2.def
new file mode 100644
index 0000000000000..d9697e795c442
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsNios2.def
@@ -0,0 +1,70 @@
+//===-- BuiltinsNios2.def - Nios2 Builtin function database --------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Nios2-specific builtin function database. Users of
+// this file must define the BUILTIN macro to make use of this information.
+//
+//===----------------------------------------------------------------------===//
+
+// The format of this database matches clang/Basic/Builtins.def.
+
+#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
+#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
+#endif
+
+// Nios2 R1 builtins:
+
+//int __builtin_ldbio(volatile const void *);
+BUILTIN(__builtin_ldbio, "ivDC*", "")
+//int __builtin_ldbuio(volatile const void *);
+BUILTIN(__builtin_ldbuio, "ivDC*", "")
+//int __builtin_ldhio(volatile const void *);
+BUILTIN(__builtin_ldhio, "ivDC*", "")
+//int __builtin_ldhuio(volatile const void *);
+BUILTIN(__builtin_ldhuio, "ivDC*", "")
+//int __builtin_ldwio(volatile const void *);
+BUILTIN(__builtin_ldwio, "ivDC*", "")
+//int __builtin_ldwuio(int);
+BUILTIN(__builtin_ldwuio, "ii", "")
+// int __builtin_rdctl(int);
+BUILTIN(__builtin_rdctl, "iIi", "")
+// void __builtin_wrctl(int, int);
+BUILTIN(__builtin_wrctl, "vIii", "")
+// int __builtin_rdprs(int, int);
+BUILTIN(__builtin_rdprs, "iii", "")
+//void __builtin_stbio(volatile void *, int);
+BUILTIN(__builtin_stbio, "vvD*i", "")
+//void __builtin_sthio(volatile void *, int);
+BUILTIN(__builtin_sthio, "vvD*i", "")
+//void __builtin_stwio(volatile void *, int);
+BUILTIN(__builtin_stwio, "vvD*i", "")
+//void __builtin_sync(void);
+BUILTIN(__builtin_sync, "v", "")
+// void __builtin_flushd(volatile void *);
+BUILTIN(__builtin_flushd, "vvD*", "")
+// void __builtin_flushda(volatile void *);
+BUILTIN(__builtin_flushda, "vvD*", "")
+
+// Nios2 R2 builtins:
+
+// int __builtin_wrpie(int);
+TARGET_BUILTIN(__builtin_wrpie, "ii", "", "nios2r2mandatory")
+// void __builtin_eni(int);
+TARGET_BUILTIN(__builtin_eni, "vi", "", "nios2r2mandatory")
+// int __builtin_ldex(volatile const void *);
+TARGET_BUILTIN(__builtin_ldex, "ivDC*", "", "nios2r2mandatory")
+// int __builtin_stex(volatile void *, int);
+TARGET_BUILTIN(__builtin_stex, "ivD*i", "", "nios2r2mandatory")
+// int __builtin_ldsex(volatile const void *);
+TARGET_BUILTIN(__builtin_ldsex, "ivDC*", "", "nios2r2mpx")
+// int __builtin_stsex(volatile void *, int);
+TARGET_BUILTIN(__builtin_stsex, "ivDC*i", "", "nios2r2mpx")
+
+#undef BUILTIN
+#undef TARGET_BUILTIN
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsPPC.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsPPC.def
index f7cddc03131b4..faa70a48edc3c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsPPC.def
@@ -51,10 +51,10 @@ BUILTIN(__builtin_altivec_vavguw, "V4UiV4UiV4Ui", "")
 
 BUILTIN(__builtin_altivec_vrfip, "V4fV4f", "")
 
-BUILTIN(__builtin_altivec_vcfsx, "V4fV4ii", "")
-BUILTIN(__builtin_altivec_vcfux, "V4fV4ii", "")
-BUILTIN(__builtin_altivec_vctsxs, "V4SiV4fi", "")
-BUILTIN(__builtin_altivec_vctuxs, "V4UiV4fi", "")
+BUILTIN(__builtin_altivec_vcfsx, "V4fV4iIi", "")
+BUILTIN(__builtin_altivec_vcfux, "V4fV4iIi", "")
+BUILTIN(__builtin_altivec_vctsxs, "V4SiV4fIi", "")
+BUILTIN(__builtin_altivec_vctuxs, "V4UiV4fIi", "")
 
 BUILTIN(__builtin_altivec_dss, "vUi", "")
 BUILTIN(__builtin_altivec_dssall, "v", "")
@@ -420,6 +420,9 @@ BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "")
 BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "")
 BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "")
 
+BUILTIN(__builtin_vsx_xxpermdi, "v.", "t")
+BUILTIN(__builtin_vsx_xxsldwi, "v.", "t")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsSystemZ.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsSystemZ.def
index fa96e10b3990c..ac92286af0b5b 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsSystemZ.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsSystemZ.def
@@ -253,5 +253,29 @@ TARGET_BUILTIN(__builtin_s390_vfmsdb, "V2dV2dV2dV2d", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vfsqdb, "V2dV2d", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vftcidb, "V2SLLiV2dIii*", "nc", "vector")
 
+// Vector-enhancements facility 1 intrinsics.
+TARGET_BUILTIN(__builtin_s390_vlrl, "V16ScUivC*", "", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vstrl, "vV16ScUiv*", "", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vbperm, "V2ULLiV16UcV16Uc", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vmslg, "V16UcV2ULLiV2ULLiV16UcIi", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfmaxdb, "V2dV2dV2dIi", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfmindb, "V2dV2dV2dIi", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfnmadb, "V2dV2dV2dV2d", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfnmsdb, "V2dV2dV2dV2d", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfcesbs, "V4SiV4fV4fi*", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfchsbs, "V4SiV4fV4fi*", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfchesbs, "V4SiV4fV4fi*", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfisb, "V4fV4fIiIi", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfmaxsb, "V4fV4fV4fIi", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfminsb, "V4fV4fV4fIi", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vflnsb, "V4fV4f", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vflpsb, "V4fV4f", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfmasb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfmssb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfnmasb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfnmssb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vfsqsb, "V4fV4f", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vftcisb, "V4SiV4fIii*", "nc", "vector-enhancements-1")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsWebAssembly.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsWebAssembly.def
index de56908be83c6..19318dcebb9ec 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -21,4 +21,8 @@
 BUILTIN(__builtin_wasm_current_memory, "z", "n")
 BUILTIN(__builtin_wasm_grow_memory, "zz", "n")
 
+// Exception handling builtins.
+BUILTIN(__builtin_wasm_throw, "vUiv*", "r")
+BUILTIN(__builtin_wasm_rethrow, "v", "r")
+
 #undef BUILTIN
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86.def
index 68b868ce8e6ed..6d3a478ac360c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86.def
@@ -34,11 +34,6 @@
 // can use it?
 BUILTIN(__builtin_cpu_supports, "bcC*", "nc")
 
-// Win64-compatible va_list functions
-BUILTIN(__builtin_ms_va_start, "vc*&.", "nt")
-BUILTIN(__builtin_ms_va_end, "vc*&", "n")
-BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
-
 // Undefined Values
 //
 TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "")
@@ -343,8 +338,8 @@ TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
 TARGET_HEADER_BUILTIN(_mm_lfence, "v", "h", "emmintrin.h", ALL_LANGUAGES, "sse2")
 TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
 TARGET_HEADER_BUILTIN(_mm_mfence, "v", "h", "emmintrin.h", ALL_LANGUAGES, "sse2")
-TARGET_BUILTIN(__builtin_ia32_pause, "v", "", "sse2")
-TARGET_HEADER_BUILTIN(_mm_pause, "v", "h", "emmintrin.h", ALL_LANGUAGES, "sse2")
+TARGET_BUILTIN(__builtin_ia32_pause, "v", "", "")
+TARGET_HEADER_BUILTIN(_mm_pause, "v", "h", "emmintrin.h", ALL_LANGUAGES, "")
 TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "", "sse2")
@@ -1107,6 +1102,9 @@ TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "", "avx5
 TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd")
 TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd")
 
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
+
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
@@ -1819,8 +1817,8 @@ TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx")
 TARGET_BUILTIN(__builtin_ia32_clzero, "vv*", "", "clzero")
 
 // MSVC
-TARGET_HEADER_BUILTIN(_BitScanForward, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_BitScanReverse, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
 TARGET_HEADER_BUILTIN(_ReadWriteBarrier, "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_ReadBarrier,      "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
@@ -1835,15 +1833,15 @@ TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, ""
 TARGET_HEADER_BUILTIN(__int2c, "v",       "nr", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(__ud2,   "v",       "nr", "intrin.h", ALL_MS_LANGUAGES, "")
 
-TARGET_HEADER_BUILTIN(__readfsbyte,  "UcULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readfsword,  "UsULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readfsdword, "ULiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readfsqword, "ULLiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readfsbyte,  "UcUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readfsword,  "UsUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readfsdword, "UNiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readfsqword, "ULLiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
-TARGET_HEADER_BUILTIN(__readgsbyte,  "UcULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readgsword,  "UsULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readgsdword, "ULiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readgsqword, "ULLiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readgsbyte,  "UcUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readgsword,  "UsUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
 #undef BUILTIN
 #undef TARGET_BUILTIN
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86_64.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86_64.def
index 2851184c2c846..4cde153d83722 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -22,8 +22,8 @@
 #  define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
 
-TARGET_HEADER_BUILTIN(_BitScanForward64, "UcULi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_BitScanReverse64, "UcULi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_BitScanReverse64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
 TARGET_HEADER_BUILTIN(__mulh,  "LLiLLiLLi",    "nch", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nch", "intrin.h", ALL_MS_LANGUAGES, "")
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticASTKinds.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticASTKinds.td
index 652d06278557c..b3cba2066edd8 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -200,12 +200,17 @@ def note_odr_defined_here : Note<"also defined here">;
 def err_odr_function_type_inconsistent : Error<
   "external function %0 declared with incompatible types in different "
   "translation units (%1 vs. %2)">;
-def warn_odr_tag_type_inconsistent : Warning<
-  "type %0 has incompatible definitions in different translation units">,
-  InGroup>;
+def warn_odr_tag_type_inconsistent
+    : Warning<"type %0 has incompatible definitions in different translation "
+              "units">,
+      InGroup>;
+def err_odr_tag_type_inconsistent
+    : Error<"type %0 has incompatible definitions in different translation "
+            "units">;
 def note_odr_tag_kind_here: Note<
   "%0 is a %select{struct|interface|union|class|enum}1 here">;
 def note_odr_field : Note<"field %0 has type %1 here">;
+def note_odr_field_name : Note<"field has name %0 here">;
 def note_odr_missing_field : Note<"no corresponding field here">;
 def note_odr_bit_field : Note<"bit-field %0 with type %1 and length %2 here">;
 def note_odr_not_bit_field : Note<"field %0 is not a bit-field">;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 3833f0f28f05f..fcef881fa0aef 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -138,6 +138,10 @@ def err_drv_cc_print_options_failure : Error<
 def err_drv_lto_without_lld : Error<"LTO requires -fuse-ld=lld">;
 def err_drv_preamble_format : Error<
     "incorrect format for -preamble-bytes=N,END">;
+def warn_invalid_ios_deployment_target : Warning<
+  "invalid iOS deployment version '%0', iOS 10 is the maximum deployment "
+  "target for 32-bit targets">, InGroup,
+  DefaultError;
 def err_drv_conflicting_deployment_targets : Error<
   "conflicting deployment targets, both '%0' and '%1' are present in environment">;
 def err_arc_unsupported_on_runtime : Error<
@@ -175,6 +179,8 @@ def warn_drv_optimization_value : Warning<"optimization level '%0' is not suppor
   InGroup;
 def warn_ignored_gcc_optimization : Warning<"optimization flag '%0' is not supported">,
   InGroup;
+def warn_ignored_clang_option : Warning<"the flag '%0' has been deprecated and will be ignored">,
+  InGroup;
 def warn_drv_unsupported_opt_for_target : Warning<
   "optimization flag '%0' is not supported for target '%1'">,
   InGroup;
@@ -195,8 +201,8 @@ def warn_drv_unused_argument : Warning<
 def warn_drv_empty_joined_argument : Warning<
   "joined argument expects additional value: '%0'">,
   InGroup;
-def warn_drv_fdiagnostics_show_hotness_requires_pgo : Warning<
-  "argument '-fdiagnostics-show-hotness' requires profile-guided optimization information">,
+def warn_drv_diagnostics_hotness_requires_pgo : Warning<
+  "argument '%0' requires profile-guided optimization information">,
   InGroup;
 def warn_drv_clang_unsupported : Warning<
   "the clang compiler does not support '%0'">;
@@ -226,6 +232,9 @@ def warn_drv_enabling_rtti_with_exceptions : Warning<
 def warn_drv_disabling_vptr_no_rtti_default : Warning<
   "implicitly disabling vptr sanitizer because rtti wasn't enabled">,
   InGroup>;
+def warn_drv_object_size_disabled_O0 : Warning<
+  "the object size sanitizer has no effect at -O0, but is explicitly enabled: %0">,
+  InGroup;
 
 def note_drv_command_failed_diag_msg : Note<
   "diagnostic msg: %0">;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 1267f8d09f58a..57c24e9be73a7 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -179,6 +179,8 @@ def warn_incompatible_analyzer_plugin_api : Warning<
 def note_incompatible_analyzer_plugin_api : Note<
     "current API version is '%0', but plugin was compiled with version '%1'">;
 
+def err_module_build_requires_fmodules : Error<
+  "module compilation requires '-fmodules'">;
 def err_module_interface_requires_modules_ts : Error<
   "module interface compilation requires '-fmodules-ts'">;
 def warn_module_config_mismatch : Warning<
@@ -196,6 +198,7 @@ def err_no_submodule_suggest : Error<
   "no submodule named %0 in module '%1'; did you mean '%2'?">;
 def warn_missing_submodule : Warning<"missing submodule '%0'">,
   InGroup;
+def note_module_import_here : Note<"module imported here">;
 def err_module_cannot_create_includes : Error<
   "cannot create includes file for module %0: %1">;
 def warn_module_config_macro_undef : Warning<
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticGroups.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticGroups.td
index e1a41584023c4..23e4d4633ae2c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticGroups.td
@@ -98,7 +98,9 @@ def CXX11CompatDeprecatedWritableStr :
 def DeprecatedAttributes : DiagGroup<"deprecated-attributes">;
 def DeprecatedDeclarations : DiagGroup<"deprecated-declarations">;
 def UnavailableDeclarations : DiagGroup<"unavailable-declarations">;
-def UnguardedAvailability : DiagGroup<"unguarded-availability">;
+def UnguardedAvailabilityNew : DiagGroup<"unguarded-availability-new">;
+def UnguardedAvailability : DiagGroup<"unguarded-availability",
+                                      [UnguardedAvailabilityNew]>;
 // partial-availability is an alias of unguarded-availability.
 def : DiagGroup<"partial-availability", [UnguardedAvailability]>;
 def DeprecatedDynamicExceptionSpec
@@ -150,6 +152,13 @@ def GNUFoldingConstant : DiagGroup<"gnu-folding-constant">;
 def FormatExtraArgs : DiagGroup<"format-extra-args">;
 def FormatZeroLength : DiagGroup<"format-zero-length">;
 
+def InvalidIOSDeploymentTarget : DiagGroup<"invalid-ios-deployment-target">;
+
+def CXX17CompatMangling : DiagGroup<"c++17-compat-mangling">;
+def : DiagGroup<"c++1z-compat-mangling", [CXX17CompatMangling]>;
+// Name of this warning in GCC.
+def NoexceptType : DiagGroup<"noexcept-type", [CXX17CompatMangling]>;
+
 // Warnings for C++1y code which is not compatible with prior C++ standards.
 def CXXPre14Compat : DiagGroup<"c++98-c++11-compat">;
 def CXXPre14CompatPedantic : DiagGroup<"c++98-c++11-compat-pedantic",
@@ -210,8 +219,10 @@ def CXX14Compat : DiagGroup<"c++14-compat", [CXXPre1zCompat]>;
 def CXX14CompatPedantic : DiagGroup<"c++14-compat-pedantic",
                                     [CXXPre1zCompatPedantic]>;
 
-def CXX1zCompat : DiagGroup<"c++1z-compat", [DeprecatedRegister,
-                                             DeprecatedIncrementBool]>;
+def CXX17Compat : DiagGroup<"c++17-compat", [DeprecatedRegister,
+                                             DeprecatedIncrementBool,
+                                             CXX17CompatMangling]>;
+def : DiagGroup<"c++1z-compat", [CXX17Compat]>;
 
 def ExitTimeDestructors : DiagGroup<"exit-time-destructors">;
 def FlexibleArrayExtensions : DiagGroup<"flexible-array-extensions">;
@@ -307,6 +318,7 @@ def : DiagGroup<"nonportable-cfstrings">;
 def NonVirtualDtor : DiagGroup<"non-virtual-dtor">;
 def : DiagGroup<"effc++", [NonVirtualDtor]>;
 def OveralignedType : DiagGroup<"over-aligned">;
+def AlignedAllocationUnavailable : DiagGroup<"aligned-allocation-unavailable">;
 def OldStyleCast : DiagGroup<"old-style-cast">;
 def : DiagGroup<"old-style-definition">;
 def OutOfLineDeclaration : DiagGroup<"out-of-line-declaration">;
@@ -733,6 +745,7 @@ def Pedantic : DiagGroup<"pedantic">;
 // Aliases.
 def : DiagGroup<"", [Extra]>;                   // -W = -Wextra
 def : DiagGroup<"endif-labels", [ExtraTokens]>; // -Wendif-labels=-Wextra-tokens
+def : DiagGroup<"cpp", [PoundWarning]>;         // -Wcpp = -W#warnings
 def : DiagGroup<"comments", [Comment]>;         // -Wcomments = -Wcomment
 def : DiagGroup<"conversion-null",
                 [NullConversion]>; // -Wconversion-null = -Wnull-conversion
@@ -761,10 +774,11 @@ def CXX14 : DiagGroup<"c++14-extensions", [CXX14BinaryLiteral]>;
 
 // A warning group for warnings about using C++1z features as extensions in
 // earlier C++ versions.
-def CXX1z : DiagGroup<"c++1z-extensions">;
+def CXX17 : DiagGroup<"c++17-extensions">;
 
 def : DiagGroup<"c++0x-extensions", [CXX11]>;
 def : DiagGroup<"c++1y-extensions", [CXX14]>;
+def : DiagGroup<"c++1z-extensions", [CXX17]>;
 
 def DelegatingCtorCycles :
   DiagGroup<"delegating-ctor-cycles">;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticIDs.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticIDs.h
index 7646e33d2366e..cdd358542a0d3 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticIDs.h
@@ -18,6 +18,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/StringRef.h"
+#include 
 
 namespace clang {
   class DiagnosticsEngine;
@@ -32,7 +33,7 @@ namespace clang {
       DIAG_START_FRONTEND      = DIAG_START_DRIVER          +  200,
       DIAG_START_SERIALIZATION = DIAG_START_FRONTEND        +  100,
       DIAG_START_LEX           = DIAG_START_SERIALIZATION   +  120,
-      DIAG_START_PARSE         = DIAG_START_LEX             +  300,
+      DIAG_START_PARSE         = DIAG_START_LEX             +  400,
       DIAG_START_AST           = DIAG_START_PARSE           +  500,
       DIAG_START_COMMENT       = DIAG_START_AST             +  110,
       DIAG_START_SEMA          = DIAG_START_COMMENT         +  100,
@@ -263,6 +264,13 @@ class DiagnosticIDs : public RefCountedBase {
   /// are not SFINAE errors.
   static SFINAEResponse getDiagnosticSFINAEResponse(unsigned DiagID);
 
+  /// \brief Get the string of all diagnostic flags.
+  ///
+  /// \returns A list of all diagnostics flags as they would be written in a
+  /// command line invocation including their `no-` variants. For example:
+  /// `{"-Wempty-body", "-Wno-empty-body", ...}`
+  static std::vector getDiagnosticFlags();
+
   /// \brief Get the set of all diagnostic IDs in the group with the given name.
   ///
   /// \param[out] Diags - On return, the diagnostics in the group.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticLexKinds.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
index 77db8993f0185..706881bfdc5d7 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -181,10 +181,10 @@ def err_hex_constant_requires : Error<
 def ext_hex_constant_invalid : Extension<
   "hexadecimal floating constants are a C99 feature">, InGroup;
 def ext_hex_literal_invalid : Extension<
-  "hexadecimal floating literals are a C++1z feature">, InGroup;
+  "hexadecimal floating literals are a C++17 feature">, InGroup;
 def warn_cxx1z_hex_literal : Warning<
   "hexadecimal floating literals are incompatible with "
-  "C++ standards before C++1z">,
+  "C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def ext_binary_literal : Extension<
   "binary integer literals are a GNU extension">, InGroup;
@@ -208,7 +208,7 @@ def warn_cxx98_compat_unicode_literal : Warning<
   "unicode literals are incompatible with C++98">,
   InGroup, DefaultIgnore;
 def warn_cxx14_compat_u8_character_literal : Warning<
-  "unicode literals are incompatible with C++ standards before C++1z">,
+  "unicode literals are incompatible with C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def warn_cxx11_compat_user_defined_literal : Warning<
   "identifier after literal will be treated as a user-defined literal suffix "
@@ -525,6 +525,12 @@ def err_pp_module_begin_without_module_end : Error<
 def err_pp_module_end_without_module_begin : Error<
   "no matching '#pragma clang module begin' for this "
   "'#pragma clang module end'">;
+def note_pp_module_begin_here : Note<
+  "entering module '%0' due to this pragma">;
+def err_pp_module_build_pth : Error<
+  "'#pragma clang module build' not supported in pretokenized header">;
+def err_pp_module_build_missing_end : Error<
+  "no matching '#pragma clang module endbuild' for this '#pragma clang module build'">;
 
 def err_defined_macro_name : Error<"'defined' cannot be used as a macro name">;
 def err_paste_at_start : Error<
@@ -662,6 +668,12 @@ def warn_mmap_mismatched_top_level_private : Warning<
   InGroup;
 def note_mmap_rename_top_level_private_as_submodule : Note<
   "make '%0' a submodule of '%1' to ensure it can be found by name">;
+def err_mmap_duplicate_header_attribute : Error<
+  "header attribute '%0' specified multiple times">;
+def err_mmap_invalid_header_attribute_value : Error<
+  "expected integer literal as value for header attribute '%0'">;
+def err_mmap_expected_header_attribute : Error<
+  "expected a header attribute name ('size' or 'mtime')">;
 
 def warn_auto_module_import : Warning<
   "treating #%select{include|import|include_next|__include_macros}0 as an "
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.def
index 0ab6724ed9ef1..2467b24fd909a 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.def
@@ -87,6 +87,8 @@ VALUE_DIAGOPT(TemplateBacktraceLimit, 32, DefaultTemplateBacktraceLimit)
 VALUE_DIAGOPT(ConstexprBacktraceLimit, 32, DefaultConstexprBacktraceLimit)
 /// Limit number of times to perform spell checking.
 VALUE_DIAGOPT(SpellCheckingLimit, 32, DefaultSpellCheckingLimit)
+/// Limit number of lines shown in a snippet.
+VALUE_DIAGOPT(SnippetLineLimit, 32, DefaultSnippetLineLimit)
 
 VALUE_DIAGOPT(TabStop, 32, DefaultTabStop) /// The distance between tab stops.
 /// Column limit for formatting message diagnostics, or 0 if unused.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.h
index c9b0c5def9929..c195003de5c45 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticOptions.h
@@ -63,11 +63,15 @@ class DiagnosticOptions : public RefCountedBase{
   enum TextDiagnosticFormat { Clang, MSVC, Vi };
 
   // Default values.
-  enum { DefaultTabStop = 8, MaxTabStop = 100,
+  enum {
+    DefaultTabStop = 8,
+    MaxTabStop = 100,
     DefaultMacroBacktraceLimit = 6,
     DefaultTemplateBacktraceLimit = 10,
     DefaultConstexprBacktraceLimit = 10,
-    DefaultSpellCheckingLimit = 50 };
+    DefaultSpellCheckingLimit = 50,
+    DefaultSnippetLineLimit = 1,
+  };
 
   // Define simple diagnostic options (with no accessors).
 #define DIAGOPT(Name, Bits, Default) unsigned Name : Bits;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticParseKinds.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticParseKinds.td
index f04ed8ed4ce6e..5170c07bf6668 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -211,10 +211,10 @@ def err_inline_namespace_alias : Error<"namespace alias cannot be inline">;
 def err_namespace_nonnamespace_scope : Error<
   "namespaces can only be defined in global or namespace scope">;
 def ext_nested_namespace_definition : ExtWarn<
-  "nested namespace definition is a C++1z extension; "
-  "define each namespace separately">, InGroup;
+  "nested namespace definition is a C++17 extension; "
+  "define each namespace separately">, InGroup;
 def warn_cxx14_compat_nested_namespace_definition : Warning<
-  "nested namespace definition is incompatible with C++ standards before C++1z">,
+  "nested namespace definition is incompatible with C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def err_inline_nested_namespace_definition : Error<
   "nested namespace definition cannot be 'inline'">;
@@ -358,7 +358,7 @@ def err_expected_coloncolon_after_super : Error<
   "expected '::' after '__super'">;
 
 def ext_decomp_decl_empty : ExtWarn<
-  "ISO C++1z does not allow a decomposition group to be empty">,
+  "ISO C++17 does not allow a decomposition group to be empty">,
   InGroup>;
 
 /// Objective-C parser diagnostics
@@ -522,16 +522,16 @@ def err_function_is_not_record : Error<
 def err_super_in_using_declaration : Error<
   "'__super' cannot be used with a using declaration">;
 def ext_constexpr_if : ExtWarn<
-  "constexpr if is a C++1z extension">, InGroup;
+  "constexpr if is a C++17 extension">, InGroup;
 def warn_cxx14_compat_constexpr_if : Warning<
-  "constexpr if is incompatible with C++ standards before C++1z">,
+  "constexpr if is incompatible with C++ standards before C++17">,
   DefaultIgnore, InGroup;
 def ext_init_statement : ExtWarn<
-  "'%select{if|switch}0' initialization statements are a C++1z extension">,
-  InGroup;
+  "'%select{if|switch}0' initialization statements are a C++17 extension">,
+  InGroup;
 def warn_cxx14_compat_init_statement : Warning<
   "%select{if|switch}0 initialization statements are incompatible with "
-  "C++ standards before C++1z">, DefaultIgnore, InGroup;
+  "C++ standards before C++17">, DefaultIgnore, InGroup;
 
 // C++ derived classes
 def err_dup_virtual : Error<"duplicate 'virtual' in base specifier">;
@@ -560,7 +560,7 @@ def warn_cxx98_compat_nullptr : Warning<
 
 def warn_cxx14_compat_attribute : Warning<
   "attributes on %select{a namespace|an enumerator}0 declaration are "
-  "incompatible with C++ standards before C++1z">,
+  "incompatible with C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def warn_cxx98_compat_alignas : Warning<"'alignas' is incompatible with C++98">,
   InGroup, DefaultIgnore;
@@ -577,10 +577,10 @@ def err_cxx11_attribute_repeated : Error<
   "attribute %0 cannot appear multiple times in an attribute specifier">;
 def warn_cxx14_compat_using_attribute_ns : Warning<
   "default scope specifier for attributes is incompatible with C++ standards "
-  "before C++1z">, InGroup, DefaultIgnore;
+  "before C++17">, InGroup, DefaultIgnore;
 def ext_using_attribute_ns : ExtWarn<
-  "default scope specifier for attributes is a C++1z extension">,
-  InGroup;
+  "default scope specifier for attributes is a C++17 extension">,
+  InGroup;
 def err_using_attribute_ns_conflict : Error<
   "attribute with scope specifier cannot follow default scope specifier">;
 def err_attributes_not_allowed : Error<"an attribute list cannot appear here">;
@@ -617,11 +617,11 @@ def err_expected_comma_greater : Error<
 def err_class_on_template_template_param : Error<
   "template template parameter requires 'class' after the parameter list">;
 def ext_template_template_param_typename : ExtWarn<
-  "template template parameter using 'typename' is a C++1z extension">,
-  InGroup;
+  "template template parameter using 'typename' is a C++17 extension">,
+  InGroup;
 def warn_cxx14_compat_template_template_param_typename : Warning<
   "template template parameter using 'typename' is "
-  "incompatible with C++ standards before C++1z">,
+  "incompatible with C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def err_template_spec_syntax_non_template : Error<
   "identifier followed by '<' indicates a class template specialization but "
@@ -695,10 +695,10 @@ def err_default_template_template_parameter_not_template : Error<
   "template">;
 
 def ext_fold_expression : ExtWarn<
-  "pack fold expression is a C++1z extension">,
-  InGroup;
+  "pack fold expression is a C++17 extension">,
+  InGroup;
 def warn_cxx14_compat_fold_expression : Warning<
-  "pack fold expression is incompatible with C++ standards before C++1z">,
+  "pack fold expression is incompatible with C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def err_expected_fold_operator : Error<
   "expected a foldable binary operator in fold expression">;
@@ -751,16 +751,16 @@ def err_alias_declaration_pack_expansion : Error<
 // C++1z using-declaration pack expansions
 def ext_multi_using_declaration : ExtWarn<
   "use of multiple declarators in a single using declaration is "
-  "a C++1z extension">, InGroup;
+  "a C++17 extension">, InGroup;
 def warn_cxx1z_compat_multi_using_declaration : Warning<
   "use of multiple declarators in a single using declaration is "
-  "incompatible with C++ standards before C++1z">,
+  "incompatible with C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def ext_using_declaration_pack : ExtWarn<
-  "pack expansion of using declaration is a C++1z extension">, InGroup;
+  "pack expansion of using declaration is a C++17 extension">, InGroup;
 def warn_cxx1z_compat_using_declaration_pack : Warning<
   "pack expansion using declaration is incompatible with C++ standards "
-  "before C++1z">, InGroup, DefaultIgnore;
+  "before C++17">, InGroup, DefaultIgnore;
 
 // C++11 override control
 def ext_override_control_keyword : ExtWarn<
@@ -817,10 +817,10 @@ def err_expected_star_this_capture : Error<
 
 // C++1z constexpr lambda expressions
 def warn_cxx14_compat_constexpr_on_lambda : Warning<
-  "constexpr on lambda expressions is incompatible with C++ standards before C++1z">,
+  "constexpr on lambda expressions is incompatible with C++ standards before C++17">,
   InGroup, DefaultIgnore;
 def ext_constexpr_on_lambda_cxx1z : ExtWarn<
-  "'constexpr' on lambda expressions is a C++1z extension">, InGroup;
+  "'constexpr' on lambda expressions is a C++17 extension">, InGroup;
 
 // Availability attribute
 def err_expected_version : Error<
@@ -887,9 +887,18 @@ def warn_pragma_expected_rparen : Warning<
   "missing ')' after '#pragma %0' - ignoring">, InGroup;
 def warn_pragma_expected_identifier : Warning<
   "expected identifier in '#pragma %0' - ignored">, InGroup;
+
+// '#pragma clang section' related errors
+def err_pragma_expected_clang_section_name : Error<
+  "expected one of [bss|data|rodata|text] section kind in '#pragma %0'">;
+def err_pragma_clang_section_expected_equal : Error<
+  "expected '=' following '#pragma clang section %select{invalid|bss|data|rodata|text}0'">;
+def err_pragma_clang_section_expected_name_or_clear : Error<
+  "expected section name or '\"\"' following '#pragma clang section %select{invalid|bss|data|rodata|text}0'">;
 def warn_pragma_expected_section_name : Warning<
   "expected a string literal for the section name in '#pragma %0' - ignored">,
   InGroup;
+
 def warn_pragma_expected_section_push_pop_or_name : Warning<
   "expected push, pop or a string literal for the section name in '#pragma %0' - ignored">,
   InGroup;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 1db6704f6d1fb..6456913a15492 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -211,9 +211,9 @@ def warn_auto_storage_class : Warning<
 
 def warn_deprecated_register : Warning<
   "'register' storage class specifier is deprecated "
-  "and incompatible with C++1z">, InGroup;
+  "and incompatible with C++17">, InGroup;
 def ext_register_storage_class : ExtWarn<
-  "ISO C++1z does not allow 'register' storage class specifier">,
+  "ISO C++17 does not allow 'register' storage class specifier">,
   DefaultError, InGroup;
 
 def err_invalid_decl_spec_combination : Error<
@@ -391,9 +391,9 @@ def err_decomp_decl_context : Error<
   "decomposition declaration not permitted in this context">;
 def warn_cxx14_compat_decomp_decl : Warning<
   "decomposition declarations are incompatible with "
-  "C++ standards before C++1z">, DefaultIgnore, InGroup;
+  "C++ standards before C++17">, DefaultIgnore, InGroup;
 def ext_decomp_decl : ExtWarn<
-  "decomposition declarations are a C++1z extension">, InGroup;
+  "decomposition declarations are a C++17 extension">, InGroup;
 def err_decomp_decl_spec : Error<
   "decomposition declaration cannot be declared "
   "%plural{1:'%1'|:with '%1' specifiers}0">;
@@ -494,7 +494,7 @@ def err_access_decl : Error<
   "ISO C++11 does not allow access declarations; "
   "use using declarations instead">;
 def ext_dynamic_exception_spec : ExtWarn<
-  "ISO C++1z does not allow dynamic exception specifications">,
+  "ISO C++17 does not allow dynamic exception specifications">,
   InGroup, DefaultError;
 def warn_exception_spec_deprecated : Warning<
   "dynamic exception specifications are deprecated">,
@@ -507,7 +507,7 @@ def warn_deprecated_copy_operation : Warning<
   InGroup, DefaultIgnore;
 def warn_cxx1z_compat_exception_spec_in_signature : Warning<
   "mangled name of %0 will change in C++17 due to non-throwing exception "
-  "specification in function signature">, InGroup;
+  "specification in function signature">, InGroup;
 
 def warn_global_constructor : Warning<
   "declaration requires a global constructor">,
@@ -537,10 +537,10 @@ def err_maybe_falloff_nonvoid_block : Error<
 def err_falloff_nonvoid_block : Error<
   "control reaches end of non-void block">;
 def warn_maybe_falloff_nonvoid_coroutine : Warning<
-  "control may reach end of non-void coroutine">,
+  "control may reach end of coroutine; which is undefined behavior because the promise type %0 does not declare 'return_void()'">,
   InGroup;
 def warn_falloff_nonvoid_coroutine : Warning<
-  "control reaches end of non-void coroutine">,
+  "control reaches end of coroutine; which is undefined behavior because the promise type %0 does not declare 'return_void()'">,
   InGroup;
 def warn_suggest_noreturn_function : Warning<
   "%select{function|method}0 %1 could be declared with attribute 'noreturn'">,
@@ -808,8 +808,10 @@ def warn_property_types_are_incompatible : Warning<
   "property type %0 is incompatible with type %1 inherited from %2">,
   InGroup>;
 def warn_protocol_property_mismatch : Warning<
-  "property of type %0 was selected for synthesis">,
+  "property %select{of type %1|with attribute '%1'|without attribute '%1'|with "
+  "getter %1|with setter %1}0 was selected for synthesis">,
   InGroup>;
+def err_protocol_property_mismatch: Error;
 def err_undef_interface : Error<"cannot find interface declaration for %0">;
 def err_category_forward_interface : Error<
   "cannot define %select{category|class extension}0 for undefined class %1">;
@@ -1029,6 +1031,8 @@ def warn_auto_synthesizing_protocol_property :Warning<
   "auto property synthesis will not synthesize property %0"
   " declared in protocol %1">,
   InGroup>;
+def note_add_synthesize_directive : Note<
+  "add a '@synthesize' directive">;
 def warn_no_autosynthesis_shared_ivar_property : Warning <
   "auto property synthesis will not synthesize property "
   "%0 because it cannot share an ivar with another synthesized property">,
@@ -1086,7 +1090,9 @@ def err_category_property : Error<
 def note_property_declare : Note<
   "property declared here">;
 def note_protocol_property_declare : Note<
-  "it could also be property of type %0 declared here">;
+  "it could also be property "
+  "%select{of type %1|without attribute '%1'|with attribute '%1'|with getter "
+  "%1|with setter %1}0 declared here">;
 def note_property_synthesize : Note<
   "property synthesized here">;
 def err_synthesize_category_decl : Error<
@@ -1194,15 +1200,15 @@ def err_static_assert_expression_is_not_constant : Error<
   "static_assert expression is not an integral constant expression">;
 def err_static_assert_failed : Error<"static_assert failed%select{ %1|}0">;
 def ext_static_assert_no_message : ExtWarn<
-  "static_assert with no message is a C++1z extension">, InGroup;
+  "static_assert with no message is a C++17 extension">, InGroup;
 def warn_cxx14_compat_static_assert_no_message : Warning<
-  "static_assert with no message is incompatible with C++ standards before C++1z">,
+  "static_assert with no message is incompatible with C++ standards before C++17">,
   DefaultIgnore, InGroup;
 
 def ext_inline_variable : ExtWarn<
-  "inline variables are a C++1z extension">, InGroup;
+  "inline variables are a C++17 extension">, InGroup;
 def warn_cxx14_compat_inline_variable : Warning<
-  "inline variables are incompatible with C++ standards before C++1z">,
+  "inline variables are incompatible with C++ standards before C++17">,
   DefaultIgnore, InGroup;
 
 def warn_inline_namespace_reopened_noninline : Warning<
@@ -1554,11 +1560,9 @@ def note_ivar_decl : Note<"instance variable is declared here">;
 def note_bitfield_decl : Note<"bit-field is declared here">;
 def note_implicit_param_decl : Note<"%0 is an implicit parameter">;
 def note_member_synthesized_at : Note<
-  "implicit %select{default constructor|copy constructor|move constructor|copy "
-  "assignment operator|move assignment operator|destructor}0 for %1 first "
-  "required here">;
-def note_inhctor_synthesized_at : Note<
-  "inherited constructor for %0 first required here">;
+  "in implicit %select{default constructor|copy constructor|move constructor|"
+  "copy assignment operator|move assignment operator|destructor}0 for %1 "
+  "first required here">;
 def err_missing_default_ctor : Error<
   "%select{constructor for %1 must explicitly initialize the|"
   "implicit default constructor for %1 must explicitly initialize the|"
@@ -1918,7 +1922,7 @@ def err_auto_not_allowed : Error<
   "|in non-static struct member|in struct member"
   "|in non-static union member|in union member"
   "|in non-static class member|in interface member"
-  "|in exception declaration|in template parameter until C++1z|in block literal"
+  "|in exception declaration|in template parameter until C++17|in block literal"
   "|in template argument|in typedef|in type alias|in function return type"
   "|in conversion function type|here|in lambda parameter"
   "|in type allocated by 'new'|in K&R-style function parameter"
@@ -2143,11 +2147,11 @@ def err_for_range_iter_deduction_failure : Error<
 def err_for_range_member_begin_end_mismatch : Error<
   "range type %0 has '%select{begin|end}1' member but no '%select{end|begin}1' member">;
 def ext_for_range_begin_end_types_differ : ExtWarn<
-  "'begin' and 'end' returning different types (%0 and %1) is a C++1z extension">,
-  InGroup;
+  "'begin' and 'end' returning different types (%0 and %1) is a C++17 extension">,
+  InGroup;
 def warn_for_range_begin_end_types_differ : Warning<
   "'begin' and 'end' returning different types (%0 and %1) is incompatible "
-  "with C++ standards before C++1z">, InGroup, DefaultIgnore;
+  "with C++ standards before C++17">, InGroup, DefaultIgnore;
 def note_in_for_range: Note<
   "when looking up '%select{begin|end}0' function for range expression "
   "of type %1">;
@@ -2490,7 +2494,7 @@ def err_attribute_address_multiple_qualifiers : Error<
 def err_attribute_address_function_type : Error<
   "function type may not be qualified with an address space">;
 def err_as_qualified_auto_decl : Error<
-  "automatic variable qualified with an address space">;
+  "automatic variable qualified with an%select{| invalid}0 address space">;
 def err_arg_with_address_space : Error<
   "parameter may not be qualified with an address space">;
 def err_field_with_address_space : Error<
@@ -2769,6 +2773,7 @@ def warn_attribute_wrong_decl_type : Warning<
   "|types and namespaces"
   "|Objective-C interfaces"
   "|methods and properties"
+  "|functions, methods, and properties"
   "|struct or union"
   "|struct, union or class"
   "|types"
@@ -2871,18 +2876,32 @@ def note_protocol_method : Note<
 def warn_unguarded_availability :
   Warning<"%0 is only available on %1 %2 or newer">,
   InGroup, DefaultIgnore;
+def warn_unguarded_availability_new :
+  Warning,
+  InGroup;
 def warn_partial_availability : Warning<"%0 is only available conditionally">,
     InGroup, DefaultIgnore;
+def warn_partial_availability_new : Warning,
+  InGroup;
 def note_partial_availability_silence : Note<
-  "explicitly redeclare %0 to silence this warning">;
+  "annotate %select{%1|anonymous %1}0 with an availability attribute to silence">;
 def note_unguarded_available_silence : Note<
   "enclose %0 in %select{an @available|a __builtin_available}1 check to silence"
   " this warning">;
 def warn_partial_message : Warning<"%0 is partial: %1">,
     InGroup, DefaultIgnore;
+def warn_partial_message_new : Warning,
+  InGroup;
 def warn_partial_fwdclass_message : Warning<
     "%0 may be partial because the receiver type is unknown">,
     InGroup, DefaultIgnore;
+def warn_partial_fwdclass_message_new :
+  Warning,
+  InGroup;
+def warn_at_available_unchecked_use : Warning<
+  "%select{@available|__builtin_available}0 does not guard availability here; "
+  "use if (%select{@available|__builtin_available}0) instead">,
+  InGroup>;
 
 // Thread Safety Attributes
 def warn_invalid_capability_name : Warning<
@@ -3281,13 +3300,15 @@ def warn_iboutletcollection_property_assign : Warning<
   "IBOutletCollection properties should be copy/strong and not assign">,
   InGroup;
   
-def err_attribute_overloadable_missing : Error<
-  "%select{overloaded function|redeclaration of}0 %1 must have the "
-  "'overloadable' attribute">;
+def err_attribute_overloadable_mismatch : Error<
+  "redeclaration of %0 must %select{not |}1have the 'overloadable' attribute">;
 def note_attribute_overloadable_prev_overload : Note<
-  "previous overload of function is here">;
+  "previous %select{unmarked |}0overload of function is here">;
 def err_attribute_overloadable_no_prototype : Error<
   "'overloadable' function %0 must have a prototype">;
+def err_attribute_overloadable_multiple_unmarked_overloads : Error<
+  "at most one overload for a given name may lack the 'overloadable' "
+  "attribute">;
 def warn_ns_attribute_wrong_return_type : Warning<
   "%0 attribute only applies to %select{functions|methods|properties}1 that "
   "return %select{an Objective-C object|a pointer|a non-retainable pointer}2">,
@@ -3501,6 +3522,8 @@ def note_ovl_candidate_substitution_failure : Note<
     "candidate template ignored: substitution failure%0%1">;
 def note_ovl_candidate_disabled_by_enable_if : Note<
     "candidate template ignored: disabled by %0%1">;
+def note_ovl_candidate_disabled_by_requirement : Note<
+    "candidate template ignored: requirement '%0' was not satisfied%1">;
 def note_ovl_candidate_has_pass_object_size_params: Note<
     "candidate address cannot be taken because parameter %0 has "
     "pass_object_size attribute">;
@@ -3882,7 +3905,7 @@ def err_template_nontype_parm_bad_type : Error<
   "a non-type template parameter cannot have type %0">;
 def warn_cxx14_compat_template_nontype_parm_auto_type : Warning<
   "non-type template parameters declared with %0 are incompatible with C++ "
-  "standards before C++1z">,
+  "standards before C++17">,
   DefaultIgnore, InGroup;
 def err_template_param_default_arg_redefinition : Error<
   "template parameter redefines default argument">;
@@ -4414,6 +4437,9 @@ def err_typename_nested_not_found : Error<"no type named %0 in %1">;
 def err_typename_nested_not_found_enable_if : Error<
   "no type named 'type' in %0; 'enable_if' cannot be used to disable "
   "this declaration">;
+def err_typename_nested_not_found_requirement : Error<
+  "failed requirement '%0'; 'enable_if' cannot be used to disable this "
+  "declaration">;
 def err_typename_nested_not_type : Error<
     "typename specifier refers to non-type member %0 in %1">;
 def note_typename_refers_here : Note<
@@ -4553,8 +4579,11 @@ def warn_deprecated_fwdclass_message : Warning<
     "%0 may be deprecated because the receiver type is unknown">,
     InGroup;
 def warn_deprecated_def : Warning<
-    "Implementing deprecated %select{method|class|category}0">,
-    InGroup, DefaultIgnore;
+  "implementing deprecated %select{method|class|category}0">,
+  InGroup, DefaultIgnore;
+def warn_unavailable_def : Warning<
+  "implementing unavailable method">,
+  InGroup, DefaultIgnore;
 def err_unavailable : Error<"%0 is unavailable">;
 def err_property_method_unavailable :
     Error<"property access is using %0 method which is unavailable">;
@@ -4581,7 +4610,7 @@ def warn_missing_prototype : Warning<
 def note_declaration_not_a_prototype : Note<
   "this declaration is not a prototype; add 'void' to make it a prototype for a zero-parameter function">; 
 def warn_strict_prototypes : Warning<
-  "this %select{function declaration is not|"
+  "this %select{function declaration is not|block declaration is not|"
   "old-style function definition is not preceded by}0 a prototype">,
   InGroup>, DefaultIgnore;
 def warn_missing_variable_declarations : Warning<
@@ -5610,6 +5639,11 @@ def err_enumerator_does_not_exist : Error<
 def note_enum_specialized_here : Note<
   "enum %0 was explicitly specialized here">;
 
+def err_specialization_not_primary_template : Error<
+  "cannot reference member of primary template because deduced class "
+  "template specialization %0 is %select{instantiated from a partial|"
+  "an explicit}1 specialization">;
+
 def err_member_redeclared : Error<"class member cannot be redeclared">;
 def ext_member_redeclared : ExtWarn<"class member cannot be redeclared">,
   InGroup;
@@ -6298,12 +6332,14 @@ def warn_ambiguous_suitable_delete_function_found : Warning<
   InGroup>;
 def note_member_declared_here : Note<
   "member %0 declared here">;
+def note_member_first_declared_here : Note<
+  "member %0 first declared here">;
 def err_decrement_bool : Error<"cannot decrement expression of type bool">;
 def warn_increment_bool : Warning<
   "incrementing expression of type bool is deprecated and "
-  "incompatible with C++1z">, InGroup;
+  "incompatible with C++17">, InGroup;
 def ext_increment_bool : ExtWarn<
-  "ISO C++1z does not allow incrementing expression of type bool">,
+  "ISO C++17 does not allow incrementing expression of type bool">,
   DefaultError, InGroup;
 def err_increment_decrement_enum : Error<
   "cannot %select{decrement|increment}0 expression of enum type %1">;
@@ -6331,6 +6367,13 @@ def err_exceptions_disabled : Error<
   "cannot use '%0' with exceptions disabled">;
 def err_objc_exceptions_disabled : Error<
   "cannot use '%0' with Objective-C exceptions disabled">;
+def warn_throw_in_noexcept_func : Warning<
+  "%0 has a non-throwing exception specification but can still throw">,
+  InGroup;
+def note_throw_in_dtor : Note<
+  "%select{destructor|deallocator}0 has a %select{non-throwing|implicit "
+  "non-throwing}1 exception specification">;
+def note_throw_in_function : Note<"function declared non-throwing here">;
 def err_seh_try_outside_functions : Error<
   "cannot use SEH '__try' in blocks, captured regions, or Obj-C method decls">;
 def err_mixing_cxx_try_seh_try : Error<
@@ -6376,6 +6419,12 @@ def warn_overaligned_type : Warning<
   "type %0 requires %1 bytes of alignment and the default allocator only "
   "guarantees %2 bytes">,
   InGroup, DefaultIgnore;
+def warn_aligned_allocation_unavailable :Warning<
+  "aligned %select{allocation|deallocation}0 function of type '%1' possibly "
+  "unavailable on %2">, InGroup, DefaultError;
+def note_silence_unligned_allocation_unavailable : Note<
+  "if you supply your own aligned allocation functions, use "
+  "-Wno-aligned-allocation-unavailable to silence this diagnostic">;
 
 def err_conditional_void_nonvoid : Error<
   "%select{left|right}1 operand to ? is void, but %select{right|left}1 operand "
@@ -6479,10 +6528,10 @@ let CategoryName = "Lambda Issue" in {
 
   // C++1z '*this' captures.
   def warn_cxx14_compat_star_this_lambda_capture : Warning<
-    "by value capture of '*this' is incompatible with C++ standards before C++1z">,
+    "by value capture of '*this' is incompatible with C++ standards before C++17">,
      InGroup, DefaultIgnore;
   def ext_star_this_lambda_capture_cxx1z : ExtWarn<
-    "capture of '*this' by copy is a C++1z extension">, InGroup;
+    "capture of '*this' by copy is a C++17 extension">, InGroup;
 }
 
 def err_return_in_captured_stmt : Error<
@@ -7151,7 +7200,7 @@ def warn_unused_volatile : Warning<
 def ext_cxx14_attr : Extension<
   "use of the %0 attribute is a C++14 extension">, InGroup;
 def ext_cxx1z_attr : Extension<
-  "use of the %0 attribute is a C++1z extension">, InGroup;
+  "use of the %0 attribute is a C++17 extension">, InGroup;
 
 def warn_unused_comparison : Warning<
   "%select{%select{|in}1equality|relational}0 comparison result unused">,
@@ -7263,7 +7312,7 @@ def err_invalid_conversion_between_vector_and_integer : Error<
   "invalid conversion between vector type %0 and integer type %1 "
   "of different size">;
 
-def err_opencl_function_pointer_variable : Error<
+def err_opencl_function_pointer : Error<
   "pointers to functions are not allowed">;
 
 def err_opencl_taking_function_address : Error<
@@ -8009,10 +8058,13 @@ def err_block_on_nonlocal : Error<
 def err_block_on_vm : Error<
   "__block attribute not allowed on declaration with a variably modified type">;
 
-def err_shufflevector_non_vector : Error<
-  "first two arguments to __builtin_shufflevector must be vectors">;
-def err_shufflevector_incompatible_vector : Error<
-  "first two arguments to __builtin_shufflevector must have the same type">;
+def err_vec_builtin_non_vector : Error<
+ "first two arguments to %0 must be vectors">;
+def err_vec_builtin_incompatible_vector : Error<
+  "first two arguments to %0 must have the same type">;
+def err_vsx_builtin_nonconstant_argument : Error<
+  "argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
+
 def err_shufflevector_nonconstant_argument : Error<
   "index for __builtin_shufflevector must be a constant integer">;
 def err_shufflevector_argument_too_large : Error<
@@ -8061,10 +8113,10 @@ def err_systemz_invalid_tabort_code : Error<
   "invalid transaction abort code">;
 def err_64_bit_builtin_32_bit_tgt : Error<
   "this builtin is only available on 64-bit targets">;
+def err_builtin_x64_aarch64_only : Error<
+  "this builtin is only available on x86-64 and aarch64 targets">;
 def err_ppc_builtin_only_on_pwr7 : Error<
   "this builtin is only valid on POWER7 or later CPUs">;
-def err_x86_builtin_64_only : Error<
-  "this builtin is only available on x86-64 targets">;
 def err_x86_builtin_invalid_rounding : Error<
   "invalid rounding argument">;
 def err_x86_builtin_invalid_scale : Error<
@@ -8298,14 +8350,22 @@ def err_opencl_ext_vector_component_invalid_length : Error<
   "vector component access has invalid length %0.  Supported: 1,2,3,4,8,16.">;
 def err_opencl_function_variable : Error<
   "%select{non-kernel function|function scope}0 variable cannot be declared in %1 address space">;
+def err_opencl_addrspace_scope : Error<
+  "variables in the %0 address space can only be declared in the outermost "
+  "scope of a kernel function">;
 def err_static_function_scope : Error<
   "variables in function scope cannot be declared static">;
 def err_opencl_bitfields : Error<
   "bit-fields are not supported in OpenCL">;
 def err_opencl_vla : Error<
   "variable length arrays are not supported in OpenCL">;
+def err_opencl_scalar_type_rank_greater_than_vector_type : Error<
+    "scalar operand type has greater rank than the type of the vector "
+    "element. (%0 and %1)">;
 def err_bad_kernel_param_type : Error<
   "%0 cannot be used as the type of a kernel parameter">;
+def err_opencl_implicit_function_decl : Error<
+  "implicit declaration of function %0 is invalid in OpenCL">;
 def err_record_with_pointers_kernel_param : Error<
   "%select{struct|union}0 kernel parameters may not contain pointers">;
 def note_within_field_of_type : Note<
@@ -8362,7 +8422,7 @@ def warn_opencl_attr_deprecated_ignored : Warning <
 def err_opencl_variadic_function : Error<
   "invalid prototype, variadic arguments are not allowed in OpenCL">;
 def err_opencl_requires_extension : Error<
-  "use of %select{type |declaration}0%1 requires %2 extension to be enabled">;
+  "use of %select{type|declaration}0 %1 requires %2 extension to be enabled">;
 
 // OpenCL v2.0 s6.13.6 -- Builtin Pipe Functions
 def err_opencl_builtin_pipe_first_arg : Error<
@@ -8407,8 +8467,6 @@ def err_opencl_builtin_to_addr_invalid_arg : Error<
 // OpenCL v2.0 s6.13.17 Enqueue kernel restrictions.
 def err_opencl_enqueue_kernel_incorrect_args : Error<
   "illegal call to enqueue_kernel, incorrect argument types">;
-def err_opencl_enqueue_kernel_expected_type : Error<
-  "illegal call to enqueue_kernel, expected %0 argument type">;
 def err_opencl_enqueue_kernel_local_size_args : Error<
   "mismatch in number of block parameters and local size arguments passed">;
 def err_opencl_enqueue_kernel_invalid_local_size_type : Error<
@@ -8418,6 +8476,9 @@ def err_opencl_enqueue_kernel_blocks_non_local_void_args : Error<
 def err_opencl_enqueue_kernel_blocks_no_args : Error<
   "blocks with parameters are not accepted in this prototype of enqueue_kernel call">;
 
+def err_opencl_builtin_expected_type : Error<
+  "illegal call to %0, expected %1 argument type">;
+
 // OpenCL v2.2 s2.1.2.3 - Vector Component Access
 def ext_opencl_ext_vector_type_rgba_selector: ExtWarn<
   "vector component name '%0' is an OpenCL version 2.2 feature">,
@@ -8594,11 +8655,11 @@ def err_omp_unknown_reduction_identifier : Error<
 def err_omp_not_resolved_reduction_identifier : Error<
   "unable to resolve declare reduction construct for type %0">;
 def err_omp_reduction_ref_type_arg : Error<
-  "argument of OpenMP clause 'reduction' must reference the same object in all threads">;
+  "argument of OpenMP clause '%0' must reference the same object in all threads">;
 def err_omp_clause_not_arithmetic_type_arg : Error<
-  "arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of %select{scalar|arithmetic}0 type">;
+  "arguments of OpenMP clause '%0' for 'min' or 'max' must be of %select{scalar|arithmetic}1 type">;
 def err_omp_clause_floating_type_arg : Error<
-  "arguments of OpenMP clause 'reduction' with bitwise operators cannot be of floating type">;
+  "arguments of OpenMP clause '%0' with bitwise operators cannot be of floating type">;
 def err_omp_once_referenced : Error<
   "variable can appear only once in OpenMP '%0' clause">;
 def err_omp_once_referenced_in_target_update : Error<
@@ -8736,8 +8797,8 @@ def err_omp_not_mappable_type : Error<
   "type %0 is not mappable to target">;
 def err_omp_invalid_map_type_for_directive : Error<
   "%select{map type '%1' is not allowed|map type must be specified}0 for '#pragma omp %2'">;
-def err_omp_no_map_for_directive : Error<
-  "expected at least one map clause for '#pragma omp %0'">;
+def err_omp_no_clause_for_directive : Error<
+  "expected at least one %0 clause for '#pragma omp %1'">;
 def note_omp_polymorphic_in_target : Note<
   "mappable type cannot be polymorphic">;
 def note_omp_static_member_in_target : Note<
@@ -8806,6 +8867,10 @@ def warn_omp_nesting_simd : Warning<
 def err_omp_orphaned_device_directive : Error<
   "orphaned 'omp %0' directives are prohibited"
   "; perhaps you forget to enclose the directive into a %select{|||target |teams }1region?">;
+def err_omp_reduction_non_addressable_expression : Error<
+  "expected addressable reduction item for the task-based directives">;
+def err_omp_reduction_with_nogroup : Error<
+  "'reduction' clause cannot be used with 'nogroup' clause">;
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {
@@ -8899,8 +8964,6 @@ def note_equivalent_internal_linkage_decl : Note<
 
 def note_redefinition_modules_same_file : Note<
 	"'%0' included multiple times, additional include site in header from module '%1'">;
-def note_redefinition_modules_same_file_modulemap : Note<
-	"consider adding '%0' as part of '%1' definition">;
 def note_redefinition_include_same_file : Note<
 	"'%0' included multiple times, additional include site here">;
 }
@@ -8944,8 +9007,10 @@ def err_coroutine_promise_type_incomplete : Error<
 def err_coroutine_type_missing_specialization : Error<
   "this function cannot be a coroutine: missing definition of "
   "specialization %q0">;
-def err_coroutine_promise_return_ill_formed : Error<
-  "%0 declares both 'return_value' and 'return_void'">;
+def err_coroutine_promise_incompatible_return_functions : Error<
+  "the coroutine promise type %0 declares both 'return_value' and 'return_void'">;
+def err_coroutine_promise_requires_return_function : Error<
+  "the coroutine promise type %0 must declare either 'return_value' or 'return_void'">;
 def note_coroutine_promise_implicit_await_transform_required_here : Note<
   "call to 'await_transform' implicitly required by 'co_await' here">;
 def note_coroutine_promise_suspend_implicitly_required : Note<
@@ -8958,11 +9023,19 @@ def warn_coroutine_promise_unhandled_exception_required_with_exceptions : Warnin
   InGroup;
 def err_coroutine_promise_get_return_object_on_allocation_failure : Error<
   "%0: 'get_return_object_on_allocation_failure()' must be a static member function">;
+def err_seh_in_a_coroutine_with_cxx_exceptions : Error<
+  "cannot use SEH '__try' in a coroutine when C++ exceptions are enabled">;
 def err_coroutine_promise_new_requires_nothrow : Error<
   "%0 is required to have a non-throwing noexcept specification when the promise "
    "type declares 'get_return_object_on_allocation_failure()'">;
 def note_coroutine_promise_call_implicitly_required : Note<
   "call to %0 implicitly required by coroutine function here">;
+def err_await_suspend_invalid_return_type : Error<
+  "return type of 'await_suspend' is required to be 'void' or 'bool' (have %0)"
+>;
+def note_await_ready_no_bool_conversion : Note<
+  "return type of 'await_ready' is required to be contextually convertible to 'bool'"
+>;
 }
 
 let CategoryName = "Documentation Issue" in {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td
index 35e2f67e24b6d..420ccebbfaf03 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td
@@ -112,8 +112,13 @@ def note_module_odr_violation_possible_decl : Note<
 def err_module_odr_violation_different_definitions : Error<
   "%q0 has different definitions in different modules; "
   "%select{definition in module '%2' is here|defined here}1">;
+def note_first_module_difference : Note<
+  "in first definition, possible difference is here">;
 def note_module_odr_violation_different_definitions : Note<
   "definition in module '%0' is here">;
+def note_second_module_difference : Note<
+  "in second definition, possible difference is here">;
+
 def err_module_odr_violation_different_instantiations : Error<
   "instantiation of %q0 is different in different modules">;
 
@@ -121,10 +126,12 @@ def err_module_odr_violation_mismatch_decl : Error<
   "%q0 has different definitions in different modules; first difference is "
   "%select{definition in module '%2'|defined here}1 found "
   "%select{end of class|public access specifier|private access specifier|"
-  "protected access specifier|static assert|field|method}3">;
+  "protected access specifier|static assert|field|method|type alias|typedef|"
+  "data member|friend declaration}3">;
 def note_module_odr_violation_mismatch_decl : Note<"but in '%0' found "
   "%select{end of class|public access specifier|private access specifier|"
-  "protected access specifier|static assert|field|method}1">;
+  "protected access specifier|static assert|field|method|type alias|typedef|"
+  "data member|friend declaration}1">;
 
 def err_module_odr_violation_mismatch_decl_diff : Error<
   "%q0 has different definitions in different modules; first difference is "
@@ -140,16 +147,40 @@ def err_module_odr_violation_mismatch_decl_diff : Error<
   "%select{non-|}5mutable field %4|"
   "field %4 with %select{no|an}5 initalizer|"
   "field %4 with an initializer|"
-  "method %4|"
-  "method %4 is %select{not deleted|deleted}5|"
-  "method %4 is %select{|pure }5%select{not virtual|virtual}6|"
-  "method %4 is %select{not static|static}5|"
-  "method %4 is %select{not volatile|volatile}5|"
-  "method %4 is %select{not const|const}5|"
-  "method %4 is %select{not inline|inline}5|"
-  "method %4 that has %5 parameter%s5|"
-  "method %4 with %ordinal5 parameter of type %6%select{| decayed from %8}7|"
-  "method %4 with %ordinal5 parameter named %6}3">;
+  "%select{method %5|constructor|destructor}4|"
+  "%select{method %5|constructor|destructor}4 "
+    "is %select{not deleted|deleted}6|"
+  "%select{method %5|constructor|destructor}4 "
+    "is %select{|pure }6%select{not virtual|virtual}7|"
+  "%select{method %5|constructor|destructor}4 "
+    "is %select{not static|static}6|"
+  "%select{method %5|constructor|destructor}4 "
+    "is %select{not volatile|volatile}6|"
+  "%select{method %5|constructor|destructor}4 "
+    "is %select{not const|const}6|"
+  "%select{method %5|constructor|destructor}4 "
+    "is %select{not inline|inline}6|"
+  "%select{method %5|constructor|destructor}4 "
+    "that has %6 parameter%s6|"
+  "%select{method %5|constructor|destructor}4 "
+    "with %ordinal6 parameter of type %7%select{| decayed from %9}8|"
+  "%select{method %5|constructor|destructor}4 "
+    "with %ordinal6 parameter named %7|"
+  "%select{method %5|constructor|destructor}4 "
+    "with %ordinal6 parameter with%select{out|}7 a default argument|"
+  "%select{method %5|constructor|destructor}4 "
+    "with %ordinal6 parameter with a default argument|"
+  "%select{typedef|type alias}4 name %5|"
+  "%select{typedef|type alias}4 %5 with underlying type %6|"
+  "data member with name %4|"
+  "data member %4 with type %5|"
+  "data member %4 with%select{out|}5 an initializer|"
+  "data member %4 with an initializer|"
+  "data member %4 %select{is constexpr|is not constexpr}5|"
+  "friend %select{class|function}4|"
+  "friend %4|"
+  "friend function %4|"
+  "}3">;
 
 def note_module_odr_violation_mismatch_decl_diff : Note<"but in '%0' found "
   "%select{"
@@ -163,20 +194,51 @@ def note_module_odr_violation_mismatch_decl_diff : Note<"but in '%0' found "
   "%select{non-|}3mutable field %2|"
   "field %2 with %select{no|an}3 initializer|"
   "field %2 with a different initializer|"
-  "method %2|"
-  "method %2 is %select{not deleted|deleted}3|"
-  "method %2 is %select{|pure }3%select{not virtual|virtual}4|"
-  "method %2 is %select{not static|static}3|"
-  "method %2 is %select{not volatile|volatile}3|"
-  "method %2 is %select{not const|const}3|"
-  "method %2 is %select{not inline|inline}3|"
-  "method %2 that has %3 parameter%s3|"
-  "method %2 with %ordinal3 parameter of type %4%select{| decayed from %6}5|"
-  "method %2 with %ordinal3 parameter named %4}1">;
+  "%select{method %3|constructor|destructor}2|"
+  "%select{method %3|constructor|destructor}2 "
+    "is %select{not deleted|deleted}4|"
+  "%select{method %3|constructor|destructor}2 "
+    "is %select{|pure }4%select{not virtual|virtual}5|"
+  "%select{method %3|constructor|destructor}2 "
+    "is %select{not static|static}4|"
+  "%select{method %3|constructor|destructor}2 "
+    "is %select{not volatile|volatile}4|"
+  "%select{method %3|constructor|destructor}2 "
+    "is %select{not const|const}4|"
+  "%select{method %3|constructor|destructor}2 "
+    "is %select{not inline|inline}4|"
+  "%select{method %3|constructor|destructor}2 "
+    "that has %4 parameter%s4|"
+  "%select{method %3|constructor|destructor}2 "
+    "with %ordinal4 parameter of type %5%select{| decayed from %7}6|"
+  "%select{method %3|constructor|destructor}2 "
+    "with %ordinal4 parameter named %5|"
+  "%select{method %3|constructor|destructor}2 "
+    "with %ordinal4 parameter with%select{out|}5 a default argument|"
+  "%select{method %3|constructor|destructor}2 "
+    "with %ordinal4 parameter with a different default argument|"
+  "%select{typedef|type alias}2 name %3|"
+  "%select{typedef|type alias}2 %3 with different underlying type %4|"
+  "data member with name %2|"
+  "data member %2 with different type %3|"
+  "data member %2 with%select{out|}3 an initializer|"
+  "data member %2 with a different initializer|"
+  "data member %2 %select{is constexpr|is not constexpr}3|"
+  "friend %select{class|function}2|"
+  "friend %2|"
+  "friend function %2|"
+  "}1">;
 
-def warn_module_uses_date_time : Warning<
-  "%select{precompiled header|module}0 uses __DATE__ or __TIME__">,
-  InGroup>;
+def err_module_odr_violation_mismatch_decl_unknown : Error<
+  "%q0 %select{with definition in module '%2'|defined here}1 has different "
+  "definitions in different modules; first difference is this "
+  "%select{||||static assert|field|method|type alias|typedef|data member|"
+  "friend declaration|unexpected decl}3">;
+def note_module_odr_violation_mismatch_decl_unknown : Note<
+  "but in '%0' found "
+  "%select{||||different static assert|different field|different method|"
+  "different type alias|different typedef|different data member|"
+  "different friend declaration|another unexpected decl}1">;
 
 def warn_duplicate_module_file_extension : Warning<
   "duplicate module file extension block name '%0'">,
@@ -186,7 +248,15 @@ def warn_module_system_bit_conflict : Warning<
   "module file '%0' was validated as a system module and is now being imported "
   "as a non-system module; any difference in diagnostic options will be ignored">,
   InGroup;
+} // let CategoryName
 
+let CategoryName = "AST Serialization Issue" in {
+def warn_module_uses_date_time : Warning<
+  "%select{precompiled header|module}0 uses __DATE__ or __TIME__">,
+  InGroup>;
+def err_module_no_size_mtime_for_header : Error<
+  "cannot emit module %0: %select{size|mtime}1 must be explicitly specified "
+  "for missing header file \"%2\"">;
 } // let CategoryName
 } // let Component
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/IdentifierTable.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/IdentifierTable.h
index 9b1ba4a98e6fd..f94b2c9b2f420 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/IdentifierTable.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/IdentifierTable.h
@@ -272,10 +272,6 @@ class IdentifierInfo {
   /// this identifier is a C++ alternate representation of an operator.
   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
     IsCPPOperatorKeyword = Val;
-    if (Val)
-      NeedsHandleIdentifier = true;
-    else
-      RecomputeNeedsHandleIdentifier();
   }
   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
 
@@ -381,10 +377,9 @@ class IdentifierInfo {
   /// This method is very tied to the definition of HandleIdentifier.  Any
   /// change to it should be reflected here.
   void RecomputeNeedsHandleIdentifier() {
-    NeedsHandleIdentifier =
-      (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() |
-       isExtensionToken() | isFutureCompatKeyword() || isOutOfDate() ||
-       isModulesImport());
+    NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
+                            isExtensionToken() || isFutureCompatKeyword() ||
+                            isOutOfDate() || isModulesImport();
   }
 };
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.def
index 60c8a68cd2e92..c9230e0aaa6f3 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.def
@@ -90,6 +90,7 @@ LANGOPT(CPlusPlus         , 1, 0, "C++")
 LANGOPT(CPlusPlus11       , 1, 0, "C++11")
 LANGOPT(CPlusPlus14       , 1, 0, "C++14")
 LANGOPT(CPlusPlus1z       , 1, 0, "C++1z")
+LANGOPT(CPlusPlus2a       , 1, 0, "C++2a")
 LANGOPT(ObjC1             , 1, 0, "Objective-C 1")
 LANGOPT(ObjC2             , 1, 0, "Objective-C 2")
 BENIGN_LANGOPT(ObjCDefaultSynthProperties , 1, 0,
@@ -199,6 +200,7 @@ LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental
 
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
+LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")
 LANGOPT(NewAlignOverride  , 32, 0, "maximum alignment guaranteed by '::operator new(size_t)'")
 LANGOPT(ConceptsTS , 1, 0, "enable C++ Extensions for Concepts")
 BENIGN_LANGOPT(ModulesCodegen , 1, 0, "Modules code generation")
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.h
index ceaedf58574fd..8488515d2b677 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/LangOptions.h
@@ -58,6 +58,7 @@ class LangOptions : public LangOptionsBase {
     SOB_Trapping    // -ftrapv
   };
 
+  // FIXME: Unify with TUKind.
   enum CompilingModuleKind {
     CMK_None,           ///< Not compiling a module interface at all.
     CMK_ModuleMap,      ///< Compiling a module from a module map.
@@ -168,7 +169,7 @@ class LangOptions : public LangOptionsBase {
 
   /// Do we need to track the owning module for a local declaration?
   bool trackLocalOwningModule() const {
-    return ModulesLocalVisibility;
+    return isCompilingModule() || ModulesLocalVisibility || ModulesTS;
   }
 
   bool isSignedOverflowDefined() const {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Linkage.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/Linkage.h
index e96fb568c0099..6ec8763f24910 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Linkage.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Linkage.h
@@ -45,6 +45,17 @@ enum Linkage : unsigned char {
   /// translation units because of types defined in a inline function.
   VisibleNoLinkage,
 
+  /// \brief Internal linkage according to the Modules TS, but can be referred
+  /// to from other translation units indirectly through inline functions and
+  /// templates in the module interface.
+  ModuleInternalLinkage,
+
+  /// \brief Module linkage, which indicates that the entity can be referred
+  /// to from other translation units within the same module, and indirectly
+  /// from arbitrary other translation units through inline functions and
+  /// templates in the module interface.
+  ModuleLinkage,
+
   /// \brief External linkage, which indicates that the entity can
   /// be referred to from other translation units.
   ExternalLinkage
@@ -74,15 +85,20 @@ inline bool isDiscardableGVALinkage(GVALinkage L) {
 }
 
 inline bool isExternallyVisible(Linkage L) {
-  return L == ExternalLinkage || L == VisibleNoLinkage;
+  return L >= VisibleNoLinkage;
 }
 
 inline Linkage getFormalLinkage(Linkage L) {
-  if (L == UniqueExternalLinkage)
+  switch (L) {
+  case UniqueExternalLinkage:
     return ExternalLinkage;
-  if (L == VisibleNoLinkage)
+  case VisibleNoLinkage:
     return NoLinkage;
-  return L;
+  case ModuleInternalLinkage:
+    return InternalLinkage;
+  default:
+    return L;
+  }
 }
 
 inline bool isExternalFormalLinkage(Linkage L) {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Module.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/Module.h
index 28aa7db52992a..177175eae9650 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Module.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Module.h
@@ -83,6 +83,10 @@ class Module {
   /// are found.
   const DirectoryEntry *Directory;
 
+  /// \brief The presumed file name for the module map defining this module.
+  /// Only non-empty when building from preprocessed source.
+  std::string PresumedModuleMapFile;
+
   /// \brief The umbrella header or directory.
   llvm::PointerUnion Umbrella;
 
@@ -150,11 +154,19 @@ class Module {
   /// \brief Stored information about a header directive that was found in the
   /// module map file but has not been resolved to a file.
   struct UnresolvedHeaderDirective {
+    HeaderKind Kind = HK_Normal;
     SourceLocation FileNameLoc;
     std::string FileName;
-    bool IsUmbrella;
+    bool IsUmbrella = false;
+    bool HasBuiltinHeader = false;
+    Optional Size;
+    Optional ModTime;
   };
 
+  /// Headers that are mentioned in the module map file but that we have not
+  /// yet attempted to resolve to a file on the file system.
+  SmallVector UnresolvedHeaders;
+
   /// \brief Headers that are mentioned in the module map file but could not be
   /// found on the file system.
   SmallVector MissingHeaders;
@@ -381,7 +393,9 @@ class Module {
 
   /// \brief Retrieve the full name of this module, including the path from
   /// its top-level module.
-  std::string getFullModuleName() const;
+  /// \param AllowStringLiterals If \c true, components that might not be
+  ///        lexically valid as identifiers will be emitted as string literals.
+  std::string getFullModuleName(bool AllowStringLiterals = false) const;
 
   /// \brief Whether the full name of this module is equal to joining
   /// \p nameParts with "."s.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/OpenMPKinds.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/OpenMPKinds.def
index 74ec26f19ac20..645ed52b59cab 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/OpenMPKinds.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/OpenMPKinds.def
@@ -168,6 +168,9 @@
 #ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE
 #define OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(Name)
 #endif
+#ifndef OPENMP_TASKGROUP_CLAUSE
+#define OPENMP_TASKGROUP_CLAUSE(Name)
+#endif
 
 // OpenMP directives.
 OPENMP_DIRECTIVE(threadprivate)
@@ -270,6 +273,7 @@ OPENMP_CLAUSE(to, OMPToClause)
 OPENMP_CLAUSE(from, OMPFromClause)
 OPENMP_CLAUSE(use_device_ptr, OMPUseDevicePtrClause)
 OPENMP_CLAUSE(is_device_ptr, OMPIsDevicePtrClause)
+OPENMP_CLAUSE(task_reduction,  OMPTaskReductionClause)
 
 // Clauses allowed for OpenMP directive 'parallel'.
 OPENMP_PARALLEL_CLAUSE(if)
@@ -552,6 +556,7 @@ OPENMP_TASKLOOP_CLAUSE(priority)
 OPENMP_TASKLOOP_CLAUSE(grainsize)
 OPENMP_TASKLOOP_CLAUSE(nogroup)
 OPENMP_TASKLOOP_CLAUSE(num_tasks)
+OPENMP_TASKLOOP_CLAUSE(reduction)
 
 // Clauses allowed for OpenMP directive 'taskloop simd'.
 OPENMP_TASKLOOP_SIMD_CLAUSE(if)
@@ -572,6 +577,7 @@ OPENMP_TASKLOOP_SIMD_CLAUSE(simdlen)
 OPENMP_TASKLOOP_SIMD_CLAUSE(grainsize)
 OPENMP_TASKLOOP_SIMD_CLAUSE(nogroup)
 OPENMP_TASKLOOP_SIMD_CLAUSE(num_tasks)
+OPENMP_TASKLOOP_SIMD_CLAUSE(reduction)
 
 // Clauses allowed for OpenMP directive 'critical'.
 OPENMP_CRITICAL_CLAUSE(hint)
@@ -846,6 +852,10 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(aligned)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(safelen)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(simdlen)
 
+// Clauses allowed for OpenMP directive 'taskgroup'.
+OPENMP_TASKGROUP_CLAUSE(task_reduction)
+
+#undef OPENMP_TASKGROUP_CLAUSE
 #undef OPENMP_TASKLOOP_SIMD_CLAUSE
 #undef OPENMP_TASKLOOP_CLAUSE
 #undef OPENMP_LINEAR_KIND
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/PartialDiagnostic.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/PartialDiagnostic.h
index 53ce95cab1b06..b2f14afe5695a 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/PartialDiagnostic.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/PartialDiagnostic.h
@@ -329,6 +329,15 @@ class PartialDiagnostic {
 
   bool hasStorage() const { return DiagStorage != nullptr; }
 
+  /// Retrieve the string argument at the given index.
+  StringRef getStringArg(unsigned I) {
+    assert(DiagStorage && "No diagnostic storage?");
+    assert(I < DiagStorage->NumDiagArgs && "Not enough diagnostic args");
+    assert(DiagStorage->DiagArgumentsKind[I]
+             == DiagnosticsEngine::ak_std_string && "Not a string arg");
+    return DiagStorage->DiagArgumentsStr[I];
+  }
+
   friend const PartialDiagnostic &operator<<(const PartialDiagnostic &PD,
                                              unsigned I) {
     PD.AddTaggedVal(I, DiagnosticsEngine::ak_uint);
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.def
index f20d326e08f84..71b11974dbfde 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.def
@@ -73,6 +73,7 @@ SANITIZER("nullability-return", NullabilityReturn)
 SANITIZER_GROUP("nullability", Nullability,
                 NullabilityArg | NullabilityAssign | NullabilityReturn)
 SANITIZER("object-size", ObjectSize)
+SANITIZER("pointer-overflow", PointerOverflow)
 SANITIZER("return", Return)
 SANITIZER("returns-nonnull-attribute", ReturnsNonnullAttribute)
 SANITIZER("shift-base", ShiftBase)
@@ -108,9 +109,9 @@ SANITIZER("safe-stack", SafeStack)
 SANITIZER_GROUP("undefined", Undefined,
                 Alignment | Bool | ArrayBounds | Enum | FloatCastOverflow |
                     FloatDivideByZero | IntegerDivideByZero | NonnullAttribute |
-                    Null | ObjectSize | Return | ReturnsNonnullAttribute |
-                    Shift | SignedIntegerOverflow | Unreachable | VLABound |
-                    Function | Vptr)
+                    Null | ObjectSize | PointerOverflow | Return |
+                    ReturnsNonnullAttribute | Shift | SignedIntegerOverflow |
+                    Unreachable | VLABound | Function | Vptr)
 
 // -fsanitize=undefined-trap is an alias for -fsanitize=undefined.
 SANITIZER_GROUP("undefined-trap", UndefinedTrap, Undefined)
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.h
index bfa8e516edd3a..5317720095e06 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Sanitizers.h
@@ -61,8 +61,8 @@ struct SanitizerSet {
     Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
-  /// \brief Disable all sanitizers.
-  void clear() { Mask = 0; }
+  /// Disable the sanitizers specified in \p K.
+  void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
   /// \brief Returns true if at least one sanitizer is enabled.
   bool empty() const { return Mask == 0; }
@@ -79,6 +79,12 @@ SanitizerMask parseSanitizerValue(StringRef Value, bool AllowGroups);
 /// this group enables.
 SanitizerMask expandSanitizerGroups(SanitizerMask Kinds);
 
+/// Return the sanitizers which do not affect preprocessing.
+static inline SanitizerMask getPPTransparentSanitizers() {
+  return SanitizerKind::CFI | SanitizerKind::Integer |
+         SanitizerKind::Nullability | SanitizerKind::Undefined;
+}
+
 }  // end namespace clang
 
 #endif
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceLocation.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceLocation.h
index ee9ce4bc31a86..245872aefecda 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceLocation.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceLocation.h
@@ -262,6 +262,65 @@ class CharSourceRange {
   bool isInvalid() const { return !isValid(); }
 };
 
+/// \brief Represents an unpacked "presumed" location which can be presented
+/// to the user.
+///
+/// A 'presumed' location can be modified by \#line and GNU line marker
+/// directives and is always the expansion point of a normal location.
+///
+/// You can get a PresumedLoc from a SourceLocation with SourceManager.
+class PresumedLoc {
+  const char *Filename;
+  unsigned Line, Col;
+  SourceLocation IncludeLoc;
+
+public:
+  PresumedLoc() : Filename(nullptr) {}
+  PresumedLoc(const char *FN, unsigned Ln, unsigned Co, SourceLocation IL)
+      : Filename(FN), Line(Ln), Col(Co), IncludeLoc(IL) {}
+
+  /// \brief Return true if this object is invalid or uninitialized.
+  ///
+  /// This occurs when created with invalid source locations or when walking
+  /// off the top of a \#include stack.
+  bool isInvalid() const { return Filename == nullptr; }
+  bool isValid() const { return Filename != nullptr; }
+
+  /// \brief Return the presumed filename of this location.
+  ///
+  /// This can be affected by \#line etc.
+  const char *getFilename() const {
+    assert(isValid());
+    return Filename;
+  }
+
+  /// \brief Return the presumed line number of this location.
+  ///
+  /// This can be affected by \#line etc.
+  unsigned getLine() const {
+    assert(isValid());
+    return Line;
+  }
+
+  /// \brief Return the presumed column number of this location.
+  ///
+  /// This cannot be affected by \#line, but is packaged here for convenience.
+  unsigned getColumn() const {
+    assert(isValid());
+    return Col;
+  }
+
+  /// \brief Return the presumed include location of this location.
+  ///
+  /// This can be affected by GNU linemarker directives.
+  SourceLocation getIncludeLoc() const {
+    assert(isValid());
+    return IncludeLoc;
+  }
+};
+
+class FileEntry;
+
 /// \brief A SourceLocation and its associated SourceManager.
 ///
 /// This is useful for argument passing to functions that expect both objects.
@@ -274,6 +333,12 @@ class FullSourceLoc : public SourceLocation {
   explicit FullSourceLoc(SourceLocation Loc, const SourceManager &SM)
     : SourceLocation(Loc), SrcMgr(&SM) {}
 
+  bool hasManager() const {
+      bool hasSrcMgr =  SrcMgr != nullptr;
+      assert(hasSrcMgr == isValid() && "FullSourceLoc has location but no manager");
+      return hasSrcMgr;
+  }
+
   /// \pre This FullSourceLoc has an associated SourceManager.
   const SourceManager &getManager() const {
     assert(SrcMgr && "SourceManager is NULL.");
@@ -284,6 +349,13 @@ class FullSourceLoc : public SourceLocation {
 
   FullSourceLoc getExpansionLoc() const;
   FullSourceLoc getSpellingLoc() const;
+  FullSourceLoc getFileLoc() const;
+  std::pair getImmediateExpansionRange() const;
+  PresumedLoc getPresumedLoc(bool UseLineDirectives = true) const;
+  bool isMacroArgExpansion(FullSourceLoc *StartLoc = nullptr) const;
+  FullSourceLoc getImmediateMacroCallerLoc() const;
+  std::pair getModuleImportLoc() const;
+  unsigned getFileOffset() const;
 
   unsigned getExpansionLineNumber(bool *Invalid = nullptr) const;
   unsigned getExpansionColumnNumber(bool *Invalid = nullptr) const;
@@ -293,6 +365,12 @@ class FullSourceLoc : public SourceLocation {
 
   const char *getCharacterData(bool *Invalid = nullptr) const;
 
+  unsigned getLineNumber(bool *Invalid = nullptr) const;
+  unsigned getColumnNumber(bool *Invalid = nullptr) const;
+
+  std::pair getExpansionRange() const;
+
+  const FileEntry *getFileEntry() const;
 
   /// \brief Return a StringRef to the source buffer data for the
   /// specified FileID.
@@ -346,50 +424,6 @@ class FullSourceLoc : public SourceLocation {
 
 };
 
-/// \brief Represents an unpacked "presumed" location which can be presented
-/// to the user.
-///
-/// A 'presumed' location can be modified by \#line and GNU line marker
-/// directives and is always the expansion point of a normal location.
-///
-/// You can get a PresumedLoc from a SourceLocation with SourceManager.
-class PresumedLoc {
-  const char *Filename;
-  unsigned Line, Col;
-  SourceLocation IncludeLoc;
-public:
-  PresumedLoc() : Filename(nullptr) {}
-  PresumedLoc(const char *FN, unsigned Ln, unsigned Co, SourceLocation IL)
-    : Filename(FN), Line(Ln), Col(Co), IncludeLoc(IL) {
-  }
-
-  /// \brief Return true if this object is invalid or uninitialized.
-  ///
-  /// This occurs when created with invalid source locations or when walking
-  /// off the top of a \#include stack.
-  bool isInvalid() const { return Filename == nullptr; }
-  bool isValid() const { return Filename != nullptr; }
-
-  /// \brief Return the presumed filename of this location.
-  ///
-  /// This can be affected by \#line etc.
-  const char *getFilename() const { assert(isValid()); return Filename; }
-
-  /// \brief Return the presumed line number of this location.
-  ///
-  /// This can be affected by \#line etc.
-  unsigned getLine() const { assert(isValid()); return Line; }
-
-  /// \brief Return the presumed column number of this location.
-  ///
-  /// This cannot be affected by \#line, but is packaged here for convenience.
-  unsigned getColumn() const { assert(isValid()); return Col; }
-
-  /// \brief Return the presumed include location of this location.
-  ///
-  /// This can be affected by GNU linemarker directives.
-  SourceLocation getIncludeLoc() const { assert(isValid()); return IncludeLoc; }
-};
 
 
 }  // end namespace clang
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManager.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManager.h
index ce2ccb5283dec..0b0534406f4c0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManager.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManager.h
@@ -80,9 +80,19 @@ namespace SrcMgr {
   /// system_header is seen or in various other cases.
   ///
   enum CharacteristicKind {
-    C_User, C_System, C_ExternCSystem
+    C_User, C_System, C_ExternCSystem, C_User_ModuleMap, C_System_ModuleMap
   };
 
+  /// Determine whether a file / directory characteristic is for system code.
+  inline bool isSystem(CharacteristicKind CK) {
+    return CK != C_User && CK != C_User_ModuleMap;
+  }
+
+  /// Determine whether a file characteristic is for a module map.
+  inline bool isModuleMap(CharacteristicKind CK) {
+    return CK == C_User_ModuleMap || CK == C_System_ModuleMap;
+  }
+
   /// \brief One instance of this struct is kept for every file loaded or used.
   ///
   /// This object owns the MemoryBuffer object.
@@ -251,12 +261,14 @@ namespace SrcMgr {
     /// preprocessing of this \#include, including this SLocEntry.
     ///
     /// Zero means the preprocessor didn't provide such info for this SLocEntry.
-    unsigned NumCreatedFIDs;
+    unsigned NumCreatedFIDs : 31;
+
+    /// \brief Whether this FileInfo has any \#line directives.
+    unsigned HasLineDirectives : 1;
 
-    /// \brief Contains the ContentCache* and the bits indicating the
-    /// characteristic of the file and whether it has \#line info, all
-    /// bitmangled together.
-    uintptr_t Data;
+    /// \brief The content cache and the characteristic of the file.
+    llvm::PointerIntPair
+        ContentAndKind;
 
     friend class clang::SourceManager;
     friend class clang::ASTWriter;
@@ -269,10 +281,9 @@ namespace SrcMgr {
       FileInfo X;
       X.IncludeLoc = IL.getRawEncoding();
       X.NumCreatedFIDs = 0;
-      X.Data = (uintptr_t)Con;
-      assert((X.Data & 7) == 0 &&"ContentCache pointer insufficiently aligned");
-      assert((unsigned)FileCharacter < 4 && "invalid file character");
-      X.Data |= (unsigned)FileCharacter;
+      X.HasLineDirectives = false;
+      X.ContentAndKind.setPointer(Con);
+      X.ContentAndKind.setInt(FileCharacter);
       return X;
     }
 
@@ -280,22 +291,22 @@ namespace SrcMgr {
       return SourceLocation::getFromRawEncoding(IncludeLoc);
     }
 
-    const ContentCache* getContentCache() const {
-      return reinterpret_cast(Data & ~uintptr_t(7));
+    const ContentCache *getContentCache() const {
+      return ContentAndKind.getPointer();
     }
 
     /// \brief Return whether this is a system header or not.
     CharacteristicKind getFileCharacteristic() const {
-      return (CharacteristicKind)(Data & 3);
+      return ContentAndKind.getInt();
     }
 
     /// \brief Return true if this FileID has \#line directives in it.
-    bool hasLineDirectives() const { return (Data & 4) != 0; }
+    bool hasLineDirectives() const { return HasLineDirectives; }
 
     /// \brief Set the flag that indicates that this FileID has
     /// line table entries associated with it.
     void setHasLineDirectives() {
-      Data |= 4;
+      HasLineDirectives = true;
     }
   };
 
@@ -407,6 +418,8 @@ namespace SrcMgr {
     };
 
   public:
+    SLocEntry() : Offset(), IsExpansion(), File() {}
+
     unsigned getOffset() const { return Offset; }
 
     bool isExpansion() const { return IsExpansion; }
@@ -724,6 +737,10 @@ class SourceManager : public RefCountedBase {
 
   void invalidateCache(FileID FID);
 
+  /// Initialize this source manager suitably to replay the compilation
+  /// described by \p Old. Requires that \p Old outlive \p *this.
+  void initializeForReplay(const SourceManager &Old);
+
   DiagnosticsEngine &getDiagnostics() const { return Diag; }
 
   FileManager &getFileManager() const { return FileMgr; }
@@ -787,9 +804,8 @@ class SourceManager : public RefCountedBase {
   FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos,
                       SrcMgr::CharacteristicKind FileCharacter,
                       int LoadedID = 0, unsigned LoadedOffset = 0) {
-    const SrcMgr::ContentCache *
-      IR = getOrCreateContentCache(SourceFile,
-                              /*isSystemFile=*/FileCharacter != SrcMgr::C_User);
+    const SrcMgr::ContentCache *IR =
+        getOrCreateContentCache(SourceFile, isSystem(FileCharacter));
     assert(IR && "getOrCreateContentCache() cannot return NULL");
     return createFileID(IR, IncludePos, FileCharacter, LoadedID, LoadedOffset);
   }
@@ -1358,7 +1374,7 @@ class SourceManager : public RefCountedBase {
 
   /// \brief Returns if a SourceLocation is in a system header.
   bool isInSystemHeader(SourceLocation Loc) const {
-    return getFileCharacteristic(Loc) != SrcMgr::C_User;
+    return isSystem(getFileCharacteristic(Loc));
   }
 
   /// \brief Returns if a SourceLocation is in an "extern C" system header.
@@ -1401,10 +1417,9 @@ class SourceManager : public RefCountedBase {
   /// specified by Loc.
   ///
   /// If FilenameID is -1, it is considered to be unspecified.
-  void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID);
   void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID,
                    bool IsFileEntry, bool IsFileExit,
-                   bool IsSystemHeader, bool IsExternCHeader);
+                   SrcMgr::CharacteristicKind FileKind);
 
   /// \brief Determine if the source manager has a line table.
   bool hasLineTable() const { return LineTable != nullptr; }
@@ -1476,6 +1491,17 @@ class SourceManager : public RefCountedBase {
   /// \returns true if LHS source location comes before RHS, false otherwise.
   bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const;
 
+  /// \brief Determines whether the two decomposed source location is in the
+  ///        same translation unit. As a byproduct, it also calculates the order
+  ///        of the source locations in case they are in the same TU.
+  ///
+  /// \returns Pair of bools the first component is true if the two locations
+  ///          are in the same TU. The second bool is true if the first is true
+  ///          and \p LOffs is before \p ROffs.
+  std::pair
+  isInTheSameTranslationUnit(std::pair &LOffs,
+                             std::pair &ROffs) const;
+
   /// \brief Determines the order of 2 source locations in the "source location
   /// address space".
   bool isBeforeInSLocAddrSpace(SourceLocation LHS, SourceLocation RHS) const {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManagerInternals.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManagerInternals.h
index e65c97b0031d6..9403dea8889c2 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManagerInternals.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/SourceManagerInternals.h
@@ -101,8 +101,6 @@ class LineTableInfo {
   }
   unsigned getNumFilenames() const { return FilenamesByID.size(); }
 
-  void AddLineNote(FileID FID, unsigned Offset,
-                   unsigned LineNo, int FilenameID);
   void AddLineNote(FileID FID, unsigned Offset,
                    unsigned LineNo, int FilenameID,
                    unsigned EntryExit, SrcMgr::CharacteristicKind FileKind);
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Specifiers.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/Specifiers.h
index 33952f83ff23a..50fb936e01d1f 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Specifiers.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Specifiers.h
@@ -236,7 +236,7 @@ namespace clang {
     CC_X86ThisCall, // __attribute__((thiscall))
     CC_X86VectorCall, // __attribute__((vectorcall))
     CC_X86Pascal,   // __attribute__((pascal))
-    CC_X86_64Win64, // __attribute__((ms_abi))
+    CC_Win64,       // __attribute__((ms_abi))
     CC_X86_64SysV,  // __attribute__((sysv_abi))
     CC_X86RegCall, // __attribute__((regcall))
     CC_AAPCS,       // __attribute__((pcs("aapcs")))
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetBuiltins.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetBuiltins.h
index 5d45e162d9f61..8f4f5e9a74dd8 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetBuiltins.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetBuiltins.h
@@ -150,6 +150,16 @@ namespace clang {
     };
   }
 
+  /// \brief Nios2 builtins
+  namespace Nios2 {
+  enum {
+    LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/BuiltinsNios2.def"
+    LastTSBuiltin
+  };
+  }
+
   /// \brief MIPS builtins
   namespace Mips {
     enum {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetInfo.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetInfo.h
index 9bdb288eef4fd..d1a9ea85dbe96 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetInfo.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetInfo.h
@@ -23,6 +23,7 @@
 #include "clang/Basic/VersionTuple.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -225,6 +226,20 @@ class TargetInfo : public RefCountedBase {
 
 public:
   IntType getSizeType() const { return SizeType; }
+  IntType getSignedSizeType() const {
+    switch (SizeType) {
+    case UnsignedShort:
+      return SignedShort;
+    case UnsignedInt:
+      return SignedInt;
+    case UnsignedLong:
+      return SignedLong;
+    case UnsignedLongLong:
+      return SignedLongLong;
+    default:
+      llvm_unreachable("Invalid SizeType");
+    }
+  }
   IntType getIntMaxType() const { return IntMaxType; }
   IntType getUIntMaxType() const {
     return getCorrespondingUnsignedType(IntMaxType);
@@ -954,6 +969,14 @@ class TargetInfo : public RefCountedBase {
     return *AddrSpaceMap;
   }
 
+  /// \brief Return an AST address space which can be used opportunistically
+  /// for constant global memory. It must be possible to convert pointers into
+  /// this address space to LangAS::Default. If no such address space exists,
+  /// this may return None, and such optimizations will be disabled.
+  virtual llvm::Optional getConstantAddressSpace() const {
+    return LangAS::Default;
+  }
+
   /// \brief Retrieve the name of the platform as it is used in the
   /// availability attribute.
   StringRef getPlatformName() const { return PlatformName; }
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetOptions.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetOptions.h
index 6ca1ba39c8fb6..9bb19c7b79df0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/TargetOptions.h
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include "clang/Basic/OpenCLOptions.h"
+#include "llvm/Target/TargetOptions.h"
 
 namespace clang {
 
@@ -41,7 +42,7 @@ class TargetOptions {
   std::string ABI;
 
   /// The EABI version to use
-  std::string EABIVersion;
+  llvm::EABI EABIVersion;
 
   /// If given, the version string of the linker in use.
   std::string LinkerVersion;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/TemplateKinds.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/TemplateKinds.h
index aed287b462284..ac99ad185f330 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/TemplateKinds.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/TemplateKinds.h
@@ -26,13 +26,21 @@ enum TemplateNameKind {
   TNK_Function_template,
   /// The name refers to a template whose specialization produces a
   /// type. The template itself could be a class template, template
-  /// template parameter, or C++0x template alias.
+  /// template parameter, or template alias.
   TNK_Type_template,
   /// The name refers to a variable template whose specialization produces a
   /// variable.
   TNK_Var_template,
-  /// The name refers to a dependent template name. Whether the
-  /// template name is assumed to refer to a type template or a
+  /// The name refers to a dependent template name: 
+  /// \code
+  /// template struct apply2 {
+  ///   typedef typename MetaFun::template apply::type type;
+  /// };
+  /// \endcode
+  ///
+  /// Here, "apply" is a dependent template name within the typename
+  /// specifier in the typedef. "apply" is a nested template, and 
+  /// whether the template name is assumed to refer to a type template or a
   /// function template depends on the context in which the template
   /// name occurs.
   TNK_Dependent_template_name
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/TokenKinds.def b/interpreter/llvm/src/tools/clang/include/clang/Basic/TokenKinds.def
index 968b203a38279..be67663a1015c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/TokenKinds.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/TokenKinds.def
@@ -411,6 +411,7 @@ TYPE_TRAIT_1(__is_sealed, IsSealed, KEYMS)
 
 // MSVC12.0 / VS2013 Type Traits
 TYPE_TRAIT_1(__is_destructible, IsDestructible, KEYMS)
+TYPE_TRAIT_1(__is_trivially_destructible, IsTriviallyDestructible, KEYCXX)
 TYPE_TRAIT_1(__is_nothrow_destructible, IsNothrowDestructible, KEYMS)
 TYPE_TRAIT_2(__is_nothrow_assignable, IsNothrowAssignable, KEYCXX)
 TYPE_TRAIT_N(__is_constructible, IsConstructible, KEYCXX)
@@ -439,7 +440,6 @@ TYPE_TRAIT_2(__is_convertible_to, IsConvertibleTo, KEYCXX)
 TYPE_TRAIT_1(__is_empty, IsEmpty, KEYCXX)
 TYPE_TRAIT_1(__is_enum, IsEnum, KEYCXX)
 TYPE_TRAIT_1(__is_final, IsFinal, KEYCXX)
-// Tentative name - there's no implementation of std::is_literal_type yet.
 TYPE_TRAIT_1(__is_literal, IsLiteral, KEYCXX)
 // Name for GCC 4.6 compatibility - people have already written libraries using
 // this name unfortunately.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/TypeTraits.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/TypeTraits.h
index ffe62559002ed..6aadf795d82e5 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/TypeTraits.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/TypeTraits.h
@@ -65,6 +65,7 @@ namespace clang {
     UTT_IsStandardLayout,
     UTT_IsTrivial,
     UTT_IsTriviallyCopyable,
+    UTT_IsTriviallyDestructible,
     UTT_IsUnion,
     UTT_IsUnsigned,
     UTT_IsVoid,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/Visibility.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/Visibility.h
index 6ac52ed6b5e1d..cc839d789e7fd 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/Visibility.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/Visibility.h
@@ -75,6 +75,9 @@ class LinkageInfo {
   static LinkageInfo none() {
     return LinkageInfo(NoLinkage, DefaultVisibility, false);
   }
+  static LinkageInfo visible_none() {
+    return LinkageInfo(VisibleNoLinkage, DefaultVisibility, false);
+  }
 
   Linkage getLinkage() const { return (Linkage)linkage_; }
   Visibility getVisibility() const { return (Visibility)visibility_; }
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Basic/XRayLists.h b/interpreter/llvm/src/tools/clang/include/clang/Basic/XRayLists.h
index fe538289c3a60..8cfea70e280a9 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Basic/XRayLists.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Basic/XRayLists.h
@@ -37,6 +37,7 @@ class XRayFunctionFilter {
     NONE,
     ALWAYS,
     NEVER,
+    ALWAYS_ARG1,
   };
 
   ImbueAttribute shouldImbueFunction(StringRef FunctionName) const;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/CodeGen/CodeGenABITypes.h b/interpreter/llvm/src/tools/clang/include/clang/CodeGen/CodeGenABITypes.h
index 8ba769dfc3af3..615e55c8b69f8 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/CodeGen/CodeGenABITypes.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/CodeGen/CodeGenABITypes.h
@@ -31,6 +31,8 @@
 namespace llvm {
   class DataLayout;
   class Module;
+  class FunctionType;
+  class Type;
 }
 
 namespace clang {
@@ -70,6 +72,12 @@ const CGFunctionInfo &arrangeFreeFunctionCall(CodeGenModule &CGM,
                                               FunctionType::ExtInfo info,
                                               RequiredArgs args);
 
+// Returns null if the function type is incomplete and can't be lowered.
+llvm::FunctionType *convertFreeFunctionType(CodeGenModule &CGM,
+                                            const FunctionDecl *FD);
+
+llvm::Type *convertTypeForMemory(CodeGenModule &CGM, QualType T);
+
 }  // end namespace CodeGen
 }  // end namespace clang
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Config/config.h.cmake b/interpreter/llvm/src/tools/clang/include/clang/Config/config.h.cmake
index 6971b4e9f06d9..b138b5fcd8288 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Config/config.h.cmake
+++ b/interpreter/llvm/src/tools/clang/include/clang/Config/config.h.cmake
@@ -56,4 +56,9 @@
 /* enable x86 relax relocations by default */
 #cmakedefine01 ENABLE_X86_RELAX_RELOCATIONS
 
+/* Enable each functionality of modules */
+#cmakedefine CLANG_ENABLE_ARCMT
+#cmakedefine CLANG_ENABLE_OBJC_REWRITER
+#cmakedefine CLANG_ENABLE_STATIC_ANALYZER
+
 #endif
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Driver/CC1Options.td b/interpreter/llvm/src/tools/clang/include/clang/Driver/CC1Options.td
index bd2062d967b46..205f36b723c87 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Driver/CC1Options.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Driver/CC1Options.td
@@ -134,7 +134,6 @@ def migrator_no_finalize_removal : Flag<["-"], "no-finalize-removal">,
 //===----------------------------------------------------------------------===//
 
 let Flags = [CC1Option, CC1AsOption, NoDriverOption] in {
-
 def debug_info_kind_EQ : Joined<["-"], "debug-info-kind=">;
 def debug_info_macro : Flag<["-"], "debug-info-macro">,
   HelpText<"Emit macro debug information">;
@@ -144,20 +143,22 @@ def fdebug_compilation_dir : Separate<["-"], "fdebug-compilation-dir">,
   HelpText<"The compilation directory to embed in the debug info.">;
 def dwarf_debug_flags : Separate<["-"], "dwarf-debug-flags">,
   HelpText<"The string to embed in the Dwarf debug flags record.">;
+def compress_debug_sections : Flag<["-", "--"], "compress-debug-sections">,
+    HelpText<"DWARF debug sections compression">;
+def compress_debug_sections_EQ : Joined<["-"], "compress-debug-sections=">,
+    HelpText<"DWARF debug sections compression type">;
 def mno_exec_stack : Flag<["-"], "mnoexecstack">,
   HelpText<"Mark the file as not needing an executable stack">;
 def massembler_fatal_warnings : Flag<["-"], "massembler-fatal-warnings">,
   HelpText<"Make assembler warnings fatal">;
 def mrelax_relocations : Flag<["--"], "mrelax-relocations">,
     HelpText<"Use relaxable elf relocations">;
-def compress_debug_sections : Flag<["-"], "compress-debug-sections">,
-    HelpText<"Compress DWARF debug sections using zlib">;
 def msave_temp_labels : Flag<["-"], "msave-temp-labels">,
   HelpText<"Save temporary labels in the symbol table. "
            "Note this may change .s semantics and shouldn't generally be used "
            "on compiler-generated code.">;
 def mrelocation_model : Separate<["-"], "mrelocation-model">,
-  HelpText<"The relocation model to use">;
+  HelpText<"The relocation model to use">, Values<"static,pic,ropi,rwpi,ropi-rwpi,dynamic-no-pic">;
 def fno_math_builtin : Flag<["-"], "fno-math-builtin">,
   HelpText<"Disable implicit builtin knowledge of math functions">;
 }
@@ -172,6 +173,8 @@ def disable_llvm_optzns : Flag<["-"], "disable-llvm-optzns">,
 def disable_lifetimemarkers : Flag<["-"], "disable-lifetime-markers">,
   HelpText<"Disable lifetime-markers emission even when optimizations are "
            "enabled">;
+def disable_O0_optnone : Flag<["-"], "disable-O0-optnone">,
+  HelpText<"Disable adding the optnone attribute to functions at O0">;
 def disable_red_zone : Flag<["-"], "disable-red-zone">,
   HelpText<"Do not emit code that uses the red zone.">;
 def dwarf_column_info : Flag<["-"], "dwarf-column-info">,
@@ -226,7 +229,7 @@ def no_struct_path_tbaa : Flag<["-"], "no-struct-path-tbaa">,
 def masm_verbose : Flag<["-"], "masm-verbose">,
   HelpText<"Generate verbose assembly output">;
 def mcode_model : Separate<["-"], "mcode-model">,
-  HelpText<"The code model to use">;
+  HelpText<"The code model to use">, Values<"small,kernel,medium,large">;
 def mdebug_pass : Separate<["-"], "mdebug-pass">,
   HelpText<"Enable additional debug output">;
 def mdisable_fp_elim : Flag<["-"], "mdisable-fp-elim">,
@@ -265,8 +268,6 @@ def vectorize_loops : Flag<["-"], "vectorize-loops">,
   HelpText<"Run the Loop vectorization passes">;
 def vectorize_slp : Flag<["-"], "vectorize-slp">,
   HelpText<"Run the SLP vectorization passes">;
-def vectorize_slp_aggressive : Flag<["-"], "vectorize-slp-aggressive">,
-  HelpText<"Run the BB vectorization passes">;
 def dependent_lib : Joined<["--"], "dependent-lib=">,
   HelpText<"Add dependent library">;
 def linker_option : Joined<["--"], "linker-option=">,
@@ -291,6 +292,9 @@ def fsanitize_coverage_trace_gep
 def fsanitize_coverage_8bit_counters
     : Flag<["-"], "fsanitize-coverage-8bit-counters">,
       HelpText<"Enable frequency counters in sanitizer coverage">;
+def fsanitize_coverage_inline_8bit_counters
+    : Flag<["-"], "fsanitize-coverage-inline-8bit-counters">,
+      HelpText<"Enable inline 8-bit counters in sanitizer coverage">;
 def fsanitize_coverage_trace_pc
     : Flag<["-"], "fsanitize-coverage-trace-pc">,
       HelpText<"Enable PC tracing in sanitizer coverage">;
@@ -302,7 +306,7 @@ def fsanitize_coverage_no_prune
       HelpText<"Disable coverage pruning (i.e. instrument all blocks/edges)">;
 def fprofile_instrument_EQ : Joined<["-"], "fprofile-instrument=">,
     HelpText<"Enable PGO instrumentation. The accepted value is clang, llvm, "
-             "or none">;
+             "or none">, Values<"none,clang,llvm">;
 def fprofile_instrument_path_EQ : Joined<["-"], "fprofile-instrument-path=">,
     HelpText<"Generate instrumented code to collect execution counts into "
              " (overridden by LLVM_PROFILE_FILE env var)">;
@@ -317,6 +321,10 @@ def flto_unit: Flag<["-"], "flto-unit">,
 def fno_lto_unit: Flag<["-"], "fno-lto-unit">;
 def fthin_link_bitcode_EQ : Joined<["-"], "fthin-link-bitcode=">,
     HelpText<"Write minimized bitcode to  for the ThinLTO thin link only">;
+def fdebug_pass_manager : Flag<["-"], "fdebug-pass-manager">,
+    HelpText<"Prints debug information for the new pass manager">;
+def fno_debug_pass_manager : Flag<["-"], "fno-debug-pass-manager">,
+    HelpText<"Disables debug printing for the new pass manager">;
 
 //===----------------------------------------------------------------------===//
 // Dependency Output Options
@@ -342,9 +350,9 @@ def diagnostic_serialized_file : Separate<["-"], "serialize-diagnostic-file">,
   HelpText<"File for serializing diagnostics in a binary format">;
 
 def fdiagnostics_format : Separate<["-"], "fdiagnostics-format">,
-  HelpText<"Change diagnostic formatting to match IDE and command line tools">;
+  HelpText<"Change diagnostic formatting to match IDE and command line tools">, Values<"clang,msvc,msvc-fallback,vi">;
 def fdiagnostics_show_category : Separate<["-"], "fdiagnostics-show-category">,
-  HelpText<"Print diagnostic category">;
+  HelpText<"Print diagnostic category">, Values<"none,id,name">;
 def fno_diagnostics_use_presumed_location : Flag<["-"], "fno-diagnostics-use-presumed-location">,
   HelpText<"Ignore #line directives when displaying diagnostic locations">;
 def ftabstop : Separate<["-"], "ftabstop">, MetaVarName<"">,
@@ -359,6 +367,9 @@ def fconstexpr_backtrace_limit : Separate<["-"], "fconstexpr-backtrace-limit">,
   HelpText<"Set the maximum number of entries to print in a constexpr evaluation backtrace (0 = no limit).">;
 def fspell_checking_limit : Separate<["-"], "fspell-checking-limit">, MetaVarName<"">,
   HelpText<"Set the maximum number of times to perform spell checking on unrecognized identifiers (0 = no limit).">;
+def fcaret_diagnostics_max_lines :
+  Separate<["-"], "fcaret-diagnostics-max-lines">, MetaVarName<"">,
+  HelpText<"Set the maximum number of source lines to show in a caret diagnostic">;
 def fmessage_length : Separate<["-"], "fmessage-length">, MetaVarName<"">,
   HelpText<"Format message diagnostics so that they fit within N columns or fewer, when possible.">;
 def verify : Flag<["-"], "verify">,
@@ -559,6 +570,9 @@ def find_pch_source_EQ : Joined<["-"], "find-pch-source=">,
 def fno_pch_timestamp : Flag<["-"], "fno-pch-timestamp">,
   HelpText<"Disable inclusion of timestamp in precompiled headers">;
   
+def aligned_alloc_unavailable : Flag<["-"], "faligned-alloc-unavailable">,
+  HelpText<"Aligned allocation/deallocation functions are unavailable">;
+
 //===----------------------------------------------------------------------===//
 // Language Options
 //===----------------------------------------------------------------------===//
@@ -586,11 +600,11 @@ def fconstant_string_class : Separate<["-"], "fconstant-string-class">,
   MetaVarName<"">,
   HelpText<"Specify the class to use for constant Objective-C string objects.">;
 def fobjc_arc_cxxlib_EQ : Joined<["-"], "fobjc-arc-cxxlib=">,
-  HelpText<"Objective-C++ Automatic Reference Counting standard library kind">;
+  HelpText<"Objective-C++ Automatic Reference Counting standard library kind">, Values<"libc++,libstdc++,none">;
 def fobjc_runtime_has_weak : Flag<["-"], "fobjc-runtime-has-weak">,
   HelpText<"The target Objective-C runtime supports ARC weak operations">;
 def fobjc_dispatch_method_EQ : Joined<["-"], "fobjc-dispatch-method=">,
-  HelpText<"Objective-C dispatch method to use">;
+  HelpText<"Objective-C dispatch method to use">, Values<"legacy,non-legacy,mixed">;
 def disable_objc_default_synthesize_properties : Flag<["-"], "disable-objc-default-synthesize-properties">,
   HelpText<"disable the default synthesis of Objective-C properties">;
 def fencode_extended_block_signature : Flag<["-"], "fencode-extended-block-signature">,
@@ -664,7 +678,7 @@ def fnative_half_arguments_and_returns : Flag<["-"], "fnative-half-arguments-and
 def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-returns">,
   HelpText<"Allow function arguments and returns of type half">;
 def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">,
-  HelpText<"Set default MS calling convention">;
+  HelpText<"Set default MS calling convention">, Values<"cdecl,fastcall,stdcall,vectorcall">;
 def finclude_default_header : Flag<["-"], "finclude-default-header">,
   HelpText<"Include the default header file for OpenCL">;
 def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Driver/CLCompatOptions.td b/interpreter/llvm/src/tools/clang/include/clang/Driver/CLCompatOptions.td
index d0d9c679de7a3..aebb36ed0e2b6 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Driver/CLCompatOptions.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Driver/CLCompatOptions.td
@@ -63,6 +63,12 @@ def _SLASH_C : CLFlag<"C">,
 def _SLASH_c : CLFlag<"c">, HelpText<"Compile only">, Alias;
 def _SLASH_d1reportAllClassLayout : CLFlag<"d1reportAllClassLayout">,
   HelpText<"Dump record layout information">, Alias;
+def _SLASH_diagnostics_caret : CLFlag<"diagnostics:caret">,
+  HelpText<"Enable caret and column diagnostics (on by default)">;
+def _SLASH_diagnostics_column : CLFlag<"diagnostics:column">,
+  HelpText<"Disable caret diagnostics but keep column info">;
+def _SLASH_diagnostics_classic : CLFlag<"diagnostics:classic">,
+  HelpText<"Disable column and caret diagnostics">;
 def _SLASH_D : CLJoinedOrSeparate<"D">, HelpText<"Define macro">,
   MetaVarName<"">, Alias;
 def _SLASH_E : CLFlag<"E">, HelpText<"Preprocess to stdout">, Alias;
@@ -324,6 +330,7 @@ def _SLASH_Zc_forScope : CLIgnoredFlag<"Zc:forScope">;
 def _SLASH_Zc_inline : CLIgnoredFlag<"Zc:inline">;
 def _SLASH_Zc_rvalueCast : CLIgnoredFlag<"Zc:rvalueCast">;
 def _SLASH_Zc_wchar_t : CLIgnoredFlag<"Zc:wchar_t">;
+def _SLASH_Zc_ternary : CLIgnoredFlag<"Zc:ternary">;
 def _SLASH_Zm : CLIgnoredJoined<"Zm">;
 def _SLASH_Zo : CLIgnoredFlag<"Zo">;
 def _SLASH_Zo_ : CLIgnoredFlag<"Zo-">;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Driver/Compilation.h b/interpreter/llvm/src/tools/clang/include/clang/Driver/Compilation.h
index 114e0b33c75ab..036b04605ac70 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Driver/Compilation.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Driver/Compilation.h
@@ -105,10 +105,13 @@ class Compilation {
   /// Whether we're compiling for diagnostic purposes.
   bool ForDiagnostics;
 
+  /// Whether an error during the parsing of the input args.
+  bool ContainsError;
+
 public:
   Compilation(const Driver &D, const ToolChain &DefaultToolChain,
               llvm::opt::InputArgList *Args,
-              llvm::opt::DerivedArgList *TranslatedArgs);
+              llvm::opt::DerivedArgList *TranslatedArgs, bool ContainsError);
   ~Compilation();
 
   const Driver &getDriver() const { return TheDriver; }
@@ -275,6 +278,9 @@ class Compilation {
   /// Return true if we're compiling for diagnostics.
   bool isForDiagnostics() const { return ForDiagnostics; }
 
+  /// Return whether an error during the parsing of the input args.
+  bool containsError() const { return ContainsError; }
+
   /// Redirect - Redirect output of this compilation. Can only be done once.
   ///
   /// \param Redirects - array of pointers to paths. The array
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Driver/Driver.h b/interpreter/llvm/src/tools/clang/include/clang/Driver/Driver.h
index 1009754a15d52..5a087eea1b4ec 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Driver/Driver.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Driver/Driver.h
@@ -341,7 +341,8 @@ class Driver {
 
   /// ParseArgStrings - Parse the given list of strings into an
   /// ArgList.
-  llvm::opt::InputArgList ParseArgStrings(ArrayRef Args);
+  llvm::opt::InputArgList ParseArgStrings(ArrayRef Args,
+                                          bool &ContainsError);
 
   /// BuildInputs - Construct the list of inputs and their types from 
   /// the given arguments.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.h b/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.h
index 57e4452f3e8cc..2da3cb4828c8e 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.h
@@ -39,8 +39,9 @@ enum ClangFlags {
 
 enum ID {
     OPT_INVALID = 0, // This is not an option ID.
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
-               HELPTEXT, METAVAR) OPT_##ID,
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  OPT_##ID,
 #include "clang/Driver/Options.inc"
     LastOption
 #undef OPTION
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.td b/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.td
index d812bd8ec0326..05dc9d7eb3ad9 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/Driver/Options.td
@@ -194,6 +194,16 @@ def clang_ignored_f_Group : OptionGroup<"">,
 def clang_ignored_m_Group : OptionGroup<"">,
   Group, Flags<[Ignored]>;
 
+// Group for clang options in the process of deprecation.
+// Please include the version that deprecated the flag as comment to allow
+// easier garbage collection.
+def clang_ignored_legacy_options_Group : OptionGroup<"">,
+  Group, Flags<[Ignored]>;
+
+// Retired with clang-5.0
+def : Flag<["-"], "fslp-vectorize-aggressive">, Group;
+def : Flag<["-"], "fno-slp-vectorize-aggressive">, Group;
+
 // Group that ignores all gcc optimizations that won't be implemented
 def clang_ignored_gcc_optimization_f_Group : OptionGroup<
   "">, Group, Flags<[Ignored]>;
@@ -469,6 +479,7 @@ def arch__errors__fatal : Flag<["-"], "arch_errors_fatal">;
 def arch : Separate<["-"], "arch">, Flags<[DriverOption]>;
 def arch__only : Separate<["-"], "arch_only">;
 def a : Joined<["-"], "a">;
+def autocomplete : Joined<["--"], "autocomplete=">;
 def bind__at__load : Flag<["-"], "bind_at_load">;
 def bundle__loader : Separate<["-"], "bundle_loader">;
 def bundle : Flag<["-"], "bundle">;
@@ -492,7 +503,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC
 def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>,
   HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">;
 def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>,
-  HelpText<"OpenCL language standard to compile for.">;
+  HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0">;
 def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>,
   HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
 def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>,
@@ -683,6 +694,9 @@ def fbuiltin : Flag<["-"], "fbuiltin">, Group;
 def fbuiltin_module_map : Flag <["-"], "fbuiltin-module-map">, Group,
   Flags<[DriverOption]>, HelpText<"Load the clang builtins module map file.">;
 def fcaret_diagnostics : Flag<["-"], "fcaret-diagnostics">, Group;
+def fclang_abi_compat_EQ : Joined<["-"], "fclang-abi-compat=">, Group,
+  Flags<[CC1Option]>, MetaVarName<"">, Values<".,latest">,
+  HelpText<"Attempt to match the ABI of Clang ">;
 def fclasspath_EQ : Joined<["-"], "fclasspath=">, Group;
 def fcolor_diagnostics : Flag<["-"], "fcolor-diagnostics">, Group,
   Flags<[CoreOption, CC1Option]>, HelpText<"Use colors in diagnostics">;
@@ -722,6 +736,9 @@ def fdiagnostics_print_source_range_info : Flag<["-"], "fdiagnostics-print-sourc
     HelpText<"Print source range spans in numeric form">;
 def fdiagnostics_show_hotness : Flag<["-"], "fdiagnostics-show-hotness">, Group,
     Flags<[CC1Option]>, HelpText<"Enable profile hotness information in diagnostic line">;
+def fdiagnostics_hotness_threshold_EQ : Joined<["-"], "fdiagnostics-hotness-threshold=">,
+    Group, Flags<[CC1Option]>, MetaVarName<"">,
+    HelpText<"Prevent optimization remarks from being output if they do not have at least this profile count">;
 def fdiagnostics_show_option : Flag<["-"], "fdiagnostics-show-option">, Group,
     Flags<[CC1Option]>, HelpText<"Print option name with mappable diagnostics">;
 def fdiagnostics_show_note_include_stack : Flag<["-"], "fdiagnostics-show-note-include-stack">,
@@ -803,7 +820,7 @@ def fno_sanitize_coverage
     : CommaJoined<["-"], "fno-sanitize-coverage=">,
       Group, Flags<[CoreOption, DriverOption]>,
       HelpText<"Disable specified features of coverage instrumentation for "
-               "Sanitizers">;
+               "Sanitizers">, Values<"func,bb,edge,indirect-calls,trace-bb,trace-cmp,trace-div,trace-gep,8bit-counters,trace-pc,trace-pc-guard,no-prune,inline-8bit-counters">;
 def fsanitize_memory_track_origins_EQ : Joined<["-"], "fsanitize-memory-track-origins=">,
                                         Group,
                                         HelpText<"Enable origins tracking in MemorySanitizer">;
@@ -922,7 +939,7 @@ def ftrapping_math : Flag<["-"], "ftrapping-math">, Group, Flags<[CC1Op
 def fno_trapping_math : Flag<["-"], "fno-trapping-math">, Group, Flags<[CC1Option]>;
 def ffp_contract : Joined<["-"], "ffp-contract=">, Group,
   Flags<[CC1Option]>, HelpText<"Form fused FP ops (e.g. FMAs): fast (everywhere)"
-  " | on (according to FP_CONTRACT pragma, default) | off (never fuse)">;
+  " | on (according to FP_CONTRACT pragma, default) | off (never fuse)">, Values<"fast,on,off">;
 
 def ffor_scope : Flag<["-"], "ffor-scope">, Group;
 def fno_for_scope : Flag<["-"], "fno-for-scope">, Group;
@@ -931,6 +948,10 @@ def frewrite_includes : Flag<["-"], "frewrite-includes">, Group,
   Flags<[CC1Option]>;
 def fno_rewrite_includes : Flag<["-"], "fno-rewrite-includes">, Group;
 
+def frewrite_imports : Flag<["-"], "frewrite-imports">, Group,
+  Flags<[CC1Option]>;
+def fno_rewrite_imports : Flag<["-"], "fno-rewrite-imports">, Group;
+
 def frewrite_map_file : Separate<["-"], "frewrite-map-file">,
                         Group,
                         Flags<[ DriverOption, CC1Option ]>;
@@ -995,7 +1016,7 @@ def flat__namespace : Flag<["-"], "flat_namespace">;
 def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group;
 def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, Group;
 def flto_EQ : Joined<["-"], "flto=">, Flags<[CoreOption, CC1Option]>, Group,
-  HelpText<"Set LTO mode to either 'full' or 'thin'">;
+  HelpText<"Set LTO mode to either 'full' or 'thin'">, Values<"thin,full">;
 def flto : Flag<["-"], "flto">, Flags<[CoreOption, CC1Option]>, Group,
   HelpText<"Enable LTO in 'full' mode">;
 def fno_lto : Flag<["-"], "fno-lto">, Group,
@@ -1153,7 +1174,7 @@ def fno_experimental_new_pass_manager : Flag<["-"], "fno-experimental-new-pass-m
   Group, Flags<[CC1Option]>,
   HelpText<"Disables an experimental new pass manager in LLVM.">;
 def fveclib : Joined<["-"], "fveclib=">, Group, Flags<[CC1Option]>,
-    HelpText<"Use the given vector functions library">;
+    HelpText<"Use the given vector functions library">, Values<"Accelerate,SVML,none">;
 def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group,
   HelpText<"Disallow implicit conversions between vectors with a different number of elements or different element types">, Flags<[CC1Option]>;
 def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group,
@@ -1337,7 +1358,7 @@ def fno_short_wchar : Flag<["-"], "fno-short-wchar">, Group, Flags<[CC1
   HelpText<"Force wchar_t to be an unsigned int">;
 def fshow_overloads_EQ : Joined<["-"], "fshow-overloads=">, Group, Flags<[CC1Option]>,
   HelpText<"Which overload candidates to show when overload resolution fails: "
-           "best|all; defaults to all">;
+           "best|all; defaults to all">, Values<"best,all">;
 def fshow_column : Flag<["-"], "fshow-column">, Group, Flags<[CC1Option]>;
 def fshow_source_location : Flag<["-"], "fshow-source-location">, Group;
 def fspell_checking : Flag<["-"], "fspell-checking">, Group;
@@ -1400,9 +1421,6 @@ def : Flag<["-"], "fno-tree-vectorize">, Alias;
 def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group,
   HelpText<"Enable the superword-level parallelism vectorization passes">;
 def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group;
-def fslp_vectorize_aggressive : Flag<["-"], "fslp-vectorize-aggressive">, Group,
-  HelpText<"Enable the BB vectorization passes">;
-def fno_slp_vectorize_aggressive : Flag<["-"], "fno-slp-vectorize-aggressive">, Group;
 def : Flag<["-"], "ftree-slp-vectorize">, Alias;
 def : Flag<["-"], "fno-tree-slp-vectorize">, Alias;
 def Wlarge_by_value_copy_def : Flag<["-"], "Wlarge-by-value-copy">,
@@ -1451,7 +1469,7 @@ def fuse_init_array : Flag<["-"], "fuse-init-array">, Group, Flags<[CC1
 def fno_var_tracking : Flag<["-"], "fno-var-tracking">, Group;
 def fverbose_asm : Flag<["-"], "fverbose-asm">, Group;
 def fvisibility_EQ : Joined<["-"], "fvisibility=">, Group,
-  HelpText<"Set the default symbol visibility for all global declarations">;
+  HelpText<"Set the default symbol visibility for all global declarations">, Values<"hidden,default">;
 def fvisibility_inlines_hidden : Flag<["-"], "fvisibility-inlines-hidden">, Group,
   HelpText<"Give inline C++ member functions default visibility by default">,
   Flags<[CC1Option]>;
@@ -1558,6 +1576,10 @@ def gdwarf_aranges : Flag<["-"], "gdwarf-aranges">, Group;
 def gmodules : Flag <["-"], "gmodules">, Group,
   HelpText<"Generate debug info with external references to clang modules"
            " or precompiled headers">;
+def gz : Flag<["-"], "gz">, Group,
+    HelpText<"DWARF debug sections compression type">;
+def gz_EQ : Joined<["-"], "gz=">, Group,
+    HelpText<"DWARF debug sections compression type">;
 def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">;
 def help : Flag<["-", "--"], "help">, Flags<[CC1Option,CC1AsOption]>,
   HelpText<"Display available options">;
@@ -1672,7 +1694,7 @@ def minline_all_stringops : Flag<["-"], "minline-all-stringops">, Group, Group;
 def malign_double : Flag<["-"], "malign-double">, Group, Flags<[CC1Option]>,
   HelpText<"Align doubles to two words in structs (x86 only)">;
-def mfloat_abi_EQ : Joined<["-"], "mfloat-abi=">, Group;
+def mfloat_abi_EQ : Joined<["-"], "mfloat-abi=">, Group, Values<"soft,softfp,hard">;
 def mfpmath_EQ : Joined<["-"], "mfpmath=">, Group;
 def mfpu_EQ : Joined<["-"], "mfpu=">, Group;
 def mhwdiv_EQ : Joined<["-"], "mhwdiv=">, Group;
@@ -1704,9 +1726,9 @@ def mstack_alignment : Joined<["-"], "mstack-alignment=">, Group, Flags
 def mstack_probe_size : Joined<["-"], "mstack-probe-size=">, Group, Flags<[CC1Option]>,
   HelpText<"Set the stack probe size">;
 def mthread_model : Separate<["-"], "mthread-model">, Group, Flags<[CC1Option]>,
-  HelpText<"The thread model to use, e.g. posix, single (posix by default)">;
+  HelpText<"The thread model to use, e.g. posix, single (posix by default)">, Values<"posix,single">;
 def meabi : Separate<["-"], "meabi">, Group, Flags<[CC1Option]>,
-  HelpText<"Set EABI type, e.g. 4, 5 or gnu (default depends on triple)">;
+  HelpText<"Set EABI type, e.g. 4, 5 or gnu (default depends on triple)">, Values<"default,4,5,gnu">;
 
 def mmmx : Flag<["-"], "mmmx">, Group;
 def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group;
@@ -1740,6 +1762,7 @@ def mno_avx : Flag<["-"], "mno-avx">, Group;
 def mno_avx2 : Flag<["-"], "mno-avx2">, Group;
 def mno_avx512f : Flag<["-"], "mno-avx512f">, Group;
 def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group;
+def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group;
 def mno_avx512er : Flag<["-"], "mno-avx512er">, Group;
 def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group;
 def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group;
@@ -1940,6 +1963,7 @@ def mavx : Flag<["-"], "mavx">, Group;
 def mavx2 : Flag<["-"], "mavx2">, Group;
 def mavx512f : Flag<["-"], "mavx512f">, Group;
 def mavx512cd : Flag<["-"], "mavx512cd">, Group;
+def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group;
 def mavx512er : Flag<["-"], "mavx512er">, Group;
 def mavx512pf : Flag<["-"], "mavx512pf">, Group;
 def mavx512dq : Flag<["-"], "mavx512dq">, Group;
@@ -1998,10 +2022,18 @@ def mdspr2 : Flag<["-"], "mdspr2">, Group;
 def mno_dspr2 : Flag<["-"], "mno-dspr2">, Group;
 def msingle_float : Flag<["-"], "msingle-float">, Group;
 def mdouble_float : Flag<["-"], "mdouble-float">, Group;
+def mmadd4 : Flag<["-"], "mmadd4">, Group,
+  HelpText<"Enable the generation of 4-operand madd.s, madd.d and related instructions.">;
+def mno_madd4 : Flag<["-"], "mno-madd4">, Group,
+  HelpText<"Disable the generation of 4-operand madd.s, madd.d and related instructions.">;
 def mmsa : Flag<["-"], "mmsa">, Group,
   HelpText<"Enable MSA ASE (MIPS only)">;
 def mno_msa : Flag<["-"], "mno-msa">, Group,
   HelpText<"Disable MSA ASE (MIPS only)">;
+def mmt : Flag<["-"], "mmt">, Group,
+  HelpText<"Enable MT ASE (MIPS only)">;
+def mno_mt : Flag<["-"], "mno-mt">, Group,
+  HelpText<"Disable MT ASE (MIPS only)">;
 def mfp64 : Flag<["-"], "mfp64">, Group,
   HelpText<"Use 64-bit floating point registers (MIPS only)">;
 def mfp32 : Flag<["-"], "mfp32">, Group,
@@ -2081,7 +2113,7 @@ def no_cpp_precomp : Flag<["-"], "no-cpp-precomp">, Group
 def no_integrated_cpp : Flag<["-", "--"], "no-integrated-cpp">, Flags<[DriverOption]>;
 def no_pedantic : Flag<["-", "--"], "no-pedantic">, Group;
 def no__dead__strip__inits__and__terms : Flag<["-"], "no_dead_strip_inits_and_terms">;
-def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option]>,
+def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option, CoreOption]>,
   HelpText<"Disable builtin #include directories">;
 def nocudainc : Flag<["-"], "nocudainc">;
 def nocudalib : Flag<["-"], "nocudalib">;
@@ -2090,10 +2122,11 @@ def nofixprebinding : Flag<["-"], "nofixprebinding">;
 def nolibc : Flag<["-"], "nolibc">;
 def nomultidefs : Flag<["-"], "nomultidefs">;
 def nopie : Flag<["-"], "nopie">;
+def no_pie : Flag<["-"], "no-pie">, Alias;
 def noprebind : Flag<["-"], "noprebind">;
 def noseglinkedit : Flag<["-"], "noseglinkedit">;
 def nostartfiles : Flag<["-"], "nostartfiles">;
-def nostdinc : Flag<["-"], "nostdinc">;
+def nostdinc : Flag<["-"], "nostdinc">, Flags<[CoreOption]>;
 def nostdlibinc : Flag<["-"], "nostdlibinc">;
 def nostdincxx : Flag<["-"], "nostdinc++">, Flags<[CC1Option]>,
   HelpText<"Disable standard #include directories for the C++ standard library">;
@@ -2194,7 +2227,7 @@ def std_default_EQ : Joined<["-"], "std-default=">;
 def std_EQ : Joined<["-", "--"], "std=">, Flags<[CC1Option]>,
   Group, HelpText<"Language standard to compile for">;
 def stdlib_EQ : Joined<["-", "--"], "stdlib=">, Flags<[CC1Option]>,
-  HelpText<"C++ standard library to use">;
+  HelpText<"C++ standard library to use">, Values<"libc++,libstdc++,platform">;
 def sub__library : JoinedOrSeparate<["-"], "sub_library">;
 def sub__umbrella : JoinedOrSeparate<["-"], "sub_umbrella">;
 def system_header_prefix : Joined<["--"], "system-header-prefix=">,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Driver/ToolChain.h b/interpreter/llvm/src/tools/clang/include/clang/Driver/ToolChain.h
index 105d0f338ac6f..6651491e5b27c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Driver/ToolChain.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Driver/ToolChain.h
@@ -315,7 +315,7 @@ class ToolChain {
 
   /// IsUnwindTablesDefault - Does this tool chain use -funwind-tables
   /// by default.
-  virtual bool IsUnwindTablesDefault() const;
+  virtual bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const;
 
   /// \brief Test whether this toolchain defaults to PIC.
   virtual bool isPICDefault() const = 0;
@@ -411,7 +411,8 @@ class ToolChain {
 
   /// \brief Add options that need to be passed to cc1 for this target.
   virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                                     llvm::opt::ArgStringList &CC1Args) const;
+                                     llvm::opt::ArgStringList &CC1Args,
+                                     Action::OffloadKind DeviceOffloadKind) const;
 
   /// \brief Add warning options that need to be passed to cc1 for this target.
   virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Edit/EditedSource.h b/interpreter/llvm/src/tools/clang/include/clang/Edit/EditedSource.h
index b082e4e0a3dfb..d95a0c2be805d 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Edit/EditedSource.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Edit/EditedSource.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Support/Allocator.h"
 #include 
+#include 
 
 namespace clang {
   class LangOptions;
@@ -41,9 +42,21 @@ class EditedSource {
   typedef std::map FileEditsTy;
   FileEditsTy FileEdits;
 
-  llvm::DenseMap>
-    ExpansionToArgMap;
-  SmallVector, 2>
+  struct MacroArgUse {
+    IdentifierInfo *Identifier;
+    SourceLocation ImmediateExpansionLoc;
+    // Location of argument use inside the top-level macro
+    SourceLocation UseLoc;
+
+    bool operator==(const MacroArgUse &Other) const {
+      return std::tie(Identifier, ImmediateExpansionLoc, UseLoc) ==
+             std::tie(Other.Identifier, Other.ImmediateExpansionLoc,
+                      Other.UseLoc);
+    }
+  };
+
+  llvm::DenseMap> ExpansionToArgMap;
+  SmallVector, 2>
     CurrCommitMacroArgExps;
 
   IdentifierTable IdentTable;
@@ -84,7 +97,7 @@ class EditedSource {
   FileEditsTy::iterator getActionForOffset(FileOffset Offs);
   void deconstructMacroArgLoc(SourceLocation Loc,
                               SourceLocation &ExpansionLoc,
-                              IdentifierInfo *&II);
+                              MacroArgUse &ArgUse);
 
   void startingCommit();
   void finishedCommit();
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Format/Format.h b/interpreter/llvm/src/tools/clang/include/clang/Format/Format.h
index a963c6369aa9c..99d54e55e8285 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Format/Format.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Format/Format.h
@@ -184,9 +184,23 @@ struct FormatStyle {
   enum ShortFunctionStyle {
     /// \brief Never merge functions into a single line.
     SFS_None,
+    /// \brief Only merge functions defined inside a class. Same as "inline",
+    /// except it does not implies "empty": i.e. top level empty functions
+    /// are not merged either.
+    /// \code
+    ///   class Foo {
+    ///     void f() { foo(); }
+    ///   };
+    ///   void f() {
+    ///     foo();
+    ///   }
+    ///   void f() {
+    ///   }
+    /// \endcode
+    SFS_InlineOnly,
     /// \brief Only merge empty functions.
     /// \code
-    ///   void f() { bar(); }
+    ///   void f() {}
     ///   void f2() {
     ///     bar2();
     ///   }
@@ -197,6 +211,10 @@ struct FormatStyle {
     ///   class Foo {
     ///     void f() { foo(); }
     ///   };
+    ///   void f() {
+    ///     foo();
+    ///   }
+    ///   void f() {}
     /// \endcode
     SFS_Inline,
     /// \brief Merge all functions fitting on a single line.
@@ -634,12 +652,12 @@ struct FormatStyle {
     ///   struct foo
     ///   {
     ///     int x;
-    ///   }
+    ///   };
     ///
     ///   false:
     ///   struct foo {
     ///     int x;
-    ///   }
+    ///   };
     /// \endcode
     bool AfterStruct;
     /// \brief Wrap union definitions.
@@ -688,6 +706,40 @@ struct FormatStyle {
     bool BeforeElse;
     /// \brief Indent the wrapped braces themselves.
     bool IndentBraces;
+    /// \brief If ``false``, empty function body can be put on a single line.
+    /// This option is used only if the opening brace of the function has
+    /// already been wrapped, i.e. the `AfterFunction` brace wrapping mode is
+    /// set, and the function could/should not be put on a single line (as per
+    /// `AllowShortFunctionsOnASingleLine` and constructor formatting options).
+    /// \code
+    ///   int f()   vs.   inf f()
+    ///   {}              {
+    ///                   }
+    /// \endcode
+    ///
+    bool SplitEmptyFunction;
+    /// \brief If ``false``, empty record (e.g. class, struct or union) body
+    /// can be put on a single line. This option is used only if the opening
+    /// brace of the record has already been wrapped, i.e. the `AfterClass`
+    /// (for classes) brace wrapping mode is set.
+    /// \code
+    ///   class Foo   vs.  class Foo
+    ///   {}               {
+    ///                    }
+    /// \endcode
+    ///
+    bool SplitEmptyRecord;
+    /// \brief If ``false``, empty namespace body can be put on a single line.
+    /// This option is used only if the opening brace of the namespace has
+    /// already been wrapped, i.e. the `AfterNamespace` brace wrapping mode is
+    /// set.
+    /// \code
+    ///   namespace Foo   vs.  namespace Foo
+    ///   {}                   {
+    ///                        }
+    /// \endcode
+    ///
+    bool SplitEmptyNamespace;
   };
 
   /// \brief Control of individual brace wrapping cases.
@@ -703,23 +755,41 @@ struct FormatStyle {
   ///        ? firstValue
   ///        : SecondValueVeryVeryVeryVeryLong;
   ///
-  ///    true:
+  ///    false:
   ///    veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongDescription ?
   ///        firstValue :
   ///        SecondValueVeryVeryVeryVeryLong;
   /// \endcode
   bool BreakBeforeTernaryOperators;
 
-  /// \brief Always break constructor initializers before commas and align
-  /// the commas with the colon.
-  /// \code
-  ///    true:                                  false:
-  ///    SomeClass::Constructor()       vs.     SomeClass::Constructor() : a(a),
-  ///        : a(a)                                                   b(b),
-  ///        , b(b)                                                   c(c) {}
-  ///        , c(c) {}
-  /// \endcode
-  bool BreakConstructorInitializersBeforeComma;
+  /// \brief Different ways to break initializers.
+  enum BreakConstructorInitializersStyle {
+    /// Break constructor initializers before the colon and after the commas.
+    /// \code
+    /// Constructor()
+    ///     : initializer1(),
+    ///       initializer2()
+    /// \endcode
+    BCIS_BeforeColon,
+    /// Break constructor initializers before the colon and commas, and align
+    /// the commas with the colon.
+    /// \code
+    /// Constructor()
+    ///     : initializer1()
+    ///     , initializer2()
+    /// \endcode
+    BCIS_BeforeComma,
+    /// Break constructor initializers after the colon and commas.
+    /// \code
+    /// Constructor() :
+    ///     initializer1(),
+    ///     initializer2()
+    /// \endcode
+    BCIS_AfterColon
+  };
+
+  /// \brief The constructor initializers style to use.
+  BreakConstructorInitializersStyle BreakConstructorInitializers;
 
   /// \brief Break after each annotation on a field in Java files.
   /// \code{.java}
@@ -760,6 +830,29 @@ struct FormatStyle {
   /// \endcode
   bool BreakBeforeInheritanceComma;
 
+  /// \brief If ``true``, consecutive namespace declarations will be on the same
+  /// line. If ``false``, each namespace is declared on a new line.
+  /// \code
+  ///   true:
+  ///   namespace Foo { namespace Bar {
+  ///   }}
+  ///
+  ///   false:
+  ///   namespace Foo {
+  ///   namespace Bar {
+  ///   }
+  ///   }
+  /// \endcode
+  ///
+  /// If it does not fit on a single line, the overflowing namespaces get
+  /// wrapped:
+  /// \code
+  ///   namespace Foo { namespace Bar {
+  ///   namespace Extra {
+  ///   }}}
+  /// \endcode
+  bool CompactNamespaces;
+
   /// \brief If the constructor initializers don't fit on a line, put each
   /// initializer on its own line.
   /// \code
@@ -895,7 +988,7 @@ struct FormatStyle {
   ///   IncludeCategories:
   ///     - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
   ///       Priority:        2
-  ///     - Regex:           '^(<|"(gtest|isl|json)/)'
+  ///     - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
   ///       Priority:        3
   ///     - Regex:           '.*'
   ///       Priority:        1
@@ -1027,7 +1120,10 @@ struct FormatStyle {
     /// (https://developers.google.com/protocol-buffers/).
     LK_Proto,
     /// Should be used for TableGen code.
-    LK_TableGen
+    LK_TableGen,
+    /// Should be used for Protocol Buffer messages in text format
+    /// (https://developers.google.com/protocol-buffers/).
+    LK_TextProto
   };
   bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
 
@@ -1133,6 +1229,9 @@ struct FormatStyle {
   /// ``Foo `` instead of ``Foo``.
   bool ObjCSpaceBeforeProtocolList;
 
+  /// \brief The penalty for breaking around an assignment operator.
+  unsigned PenaltyBreakAssignment;
+
   /// \brief The penalty for breaking a function call after ``call(``.
   unsigned PenaltyBreakBeforeFirstCallParameter;
 
@@ -1196,6 +1295,14 @@ struct FormatStyle {
   /// \endcode
   bool SortIncludes;
 
+  /// \brief If ``true``, clang-format will sort using declarations.
+  /// \code
+  ///    false:                                 true:
+  ///    using std::cout;               vs.     using std::cin;
+  ///    using std::cin;                        using std::cout;
+  /// \endcode
+  bool SortUsingDeclarations;
+
   /// \brief If ``true``, a space is inserted after C style casts.
   /// \code
   ///    true:                                  false:
@@ -1387,8 +1494,8 @@ struct FormatStyle {
            BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators &&
            BreakBeforeBraces == R.BreakBeforeBraces &&
            BreakBeforeTernaryOperators == R.BreakBeforeTernaryOperators &&
-           BreakConstructorInitializersBeforeComma ==
-               R.BreakConstructorInitializersBeforeComma &&
+           BreakConstructorInitializers == R.BreakConstructorInitializers &&
+           CompactNamespaces == R.CompactNamespaces &&
            BreakAfterJavaFieldAnnotations == R.BreakAfterJavaFieldAnnotations &&
            BreakStringLiterals == R.BreakStringLiterals &&
            ColumnLimit == R.ColumnLimit && CommentPragmas == R.CommentPragmas &&
@@ -1420,6 +1527,8 @@ struct FormatStyle {
            ObjCBlockIndentWidth == R.ObjCBlockIndentWidth &&
            ObjCSpaceAfterProperty == R.ObjCSpaceAfterProperty &&
            ObjCSpaceBeforeProtocolList == R.ObjCSpaceBeforeProtocolList &&
+           PenaltyBreakAssignment ==
+               R.PenaltyBreakAssignment &&
            PenaltyBreakBeforeFirstCallParameter ==
                R.PenaltyBreakBeforeFirstCallParameter &&
            PenaltyBreakComment == R.PenaltyBreakComment &&
@@ -1582,6 +1691,16 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
                                               ArrayRef Ranges,
                                               StringRef FileName = "");
 
+/// \brief Sort consecutive using declarations in the given \p Ranges in
+/// \p Code.
+///
+/// Returns the ``Replacements`` that sort the using declarations in all
+/// \p Ranges in \p Code.
+tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
+                                            StringRef Code,
+                                            ArrayRef Ranges,
+                                            StringRef FileName = "");
+
 /// \brief Returns the ``LangOpts`` that the formatter expects you to set.
 ///
 /// \param Style determines specific settings for lexing mode.
@@ -1634,6 +1753,8 @@ inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
     return "JavaScript";
   case FormatStyle::LK_Proto:
     return "Proto";
+  case FormatStyle::LK_TextProto:
+    return "TextProto";
   default:
     return "Unknown";
   }
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/ASTUnit.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/ASTUnit.h
index 2a8df1b7b9aeb..1ac4f07a3549a 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/ASTUnit.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/ASTUnit.h
@@ -25,6 +25,7 @@
 #include "clang/Lex/PreprocessingRecord.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Serialization/ASTBitCodes.h"
+#include "clang/Frontend/PrecompiledPreamble.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
@@ -51,17 +52,23 @@ class DiagnosticsEngine;
 class FileEntry;
 class FileManager;
 class HeaderSearch;
+class InputKind;
 class MemoryBufferCache;
 class Preprocessor;
+class PreprocessorOptions;
 class PCHContainerOperations;
 class PCHContainerReader;
 class TargetInfo;
 class FrontendAction;
 class ASTDeserializationListener;
 
+namespace vfs {
+class FileSystem;
+}
+
 /// \brief Utility class for loading a ASTContext from an AST file.
 ///
-class ASTUnit : public ModuleLoader {
+class ASTUnit {
 public:
   struct StandaloneFixIt {
     std::pair RemoveRange;
@@ -92,6 +99,7 @@ class ASTUnit : public ModuleLoader {
   IntrusiveRefCntPtr          Ctx;
   std::shared_ptr          TargetOpts;
   std::shared_ptr    HSOpts;
+  std::shared_ptr    PPOpts;
   IntrusiveRefCntPtr Reader;
   bool HadModuleLoaderFatalFailure;
 
@@ -112,10 +120,13 @@ class ASTUnit : public ModuleLoader {
   /// LoadFromCommandLine available.
   std::shared_ptr Invocation;
 
+  /// Fake module loader: the AST unit doesn't need to load any modules.
+  TrivialModuleLoader ModuleLoader;
+
   // OnlyLocalDecls - when true, walking this AST should only visit declarations
   // that come from the AST itself, not from included precompiled headers.
   // FIXME: This is temporary; eventually, CIndex will always do this.
-  bool                              OnlyLocalDecls;
+  bool OnlyLocalDecls;
 
   /// \brief Whether to capture any diagnostics produced.
   bool CaptureDiagnostics;
@@ -181,103 +192,23 @@ class ASTUnit : public ModuleLoader {
   /// some number of calls.
   unsigned PreambleRebuildCounter;
 
-public:
-  class PreambleData {
-    const FileEntry *File;
-    std::vector Buffer;
-    mutable unsigned NumLines;
-    
-  public:
-    PreambleData() : File(nullptr), NumLines(0) { }
-    
-    void assign(const FileEntry *F, const char *begin, const char *end) {
-      File = F;
-      Buffer.assign(begin, end);
-      NumLines = 0;
-    }
-
-    void clear() { Buffer.clear(); File = nullptr; NumLines = 0; }
-
-    size_t size() const { return Buffer.size(); }
-    bool empty() const { return Buffer.empty(); }
-
-    const char *getBufferStart() const { return &Buffer[0]; }
-
-    unsigned getNumLines() const {
-      if (NumLines)
-        return NumLines;
-      countLines();
-      return NumLines;
-    }
-
-    SourceRange getSourceRange(const SourceManager &SM) const {
-      SourceLocation FileLoc = SM.getLocForStartOfFile(SM.getPreambleFileID());
-      return SourceRange(FileLoc, FileLoc.getLocWithOffset(size()-1));
-    }
-
-  private:
-    void countLines() const;
-  };
-
-  const PreambleData &getPreambleData() const {
-    return Preamble;
-  }
-
-  /// Data used to determine if a file used in the preamble has been changed.
-  struct PreambleFileHash {
-    /// All files have size set.
-    off_t Size;
-
-    /// Modification time is set for files that are on disk.  For memory
-    /// buffers it is zero.
-    time_t ModTime;
-
-    /// Memory buffers have MD5 instead of modification time.  We don't
-    /// compute MD5 for on-disk files because we hope that modification time is
-    /// enough to tell if the file was changed.
-    llvm::MD5::MD5Result MD5;
-
-    static PreambleFileHash createForFile(off_t Size, time_t ModTime);
-    static PreambleFileHash
-    createForMemoryBuffer(const llvm::MemoryBuffer *Buffer);
-
-    friend bool operator==(const PreambleFileHash &LHS,
-                           const PreambleFileHash &RHS);
-
-    friend bool operator!=(const PreambleFileHash &LHS,
-                           const PreambleFileHash &RHS) {
-      return !(LHS == RHS);
-    }
-  };
+  /// \brief Cache pairs "filename - source location"
+  ///
+  /// Cache contains only source locations from preamble so it is
+  /// guaranteed that they stay valid when the SourceManager is recreated.
+  /// This cache is used when loading preambule to increase performance
+  /// of that loading. It must be cleared when preamble is recreated.
+  llvm::StringMap PreambleSrcLocCache;
 
 private:
-  /// \brief The contents of the preamble that has been precompiled to
-  /// \c PreambleFile.
-  PreambleData Preamble;
-
-  /// \brief Whether the preamble ends at the start of a new line.
-  /// 
-  /// Used to inform the lexer as to whether it's starting at the beginning of
-  /// a line after skipping the preamble.
-  bool PreambleEndsAtStartOfLine;
-
-  /// \brief Keeps track of the files that were used when computing the 
-  /// preamble, with both their buffer size and their modification time.
-  ///
-  /// If any of the files have changed from one compile to the next,
-  /// the preamble must be thrown away.
-  llvm::StringMap FilesInPreamble;
+  /// The contents of the preamble.
+  llvm::Optional Preamble;
 
   /// \brief When non-NULL, this is the buffer used to store the contents of
   /// the main file when it has been padded for use with the precompiled
   /// preamble.
   std::unique_ptr SavedMainFileBuffer;
 
-  /// \brief When non-NULL, this is the buffer used to store the
-  /// contents of the preamble when it has been padded to build the
-  /// precompiled preamble.
-  std::unique_ptr PreambleBuffer;
-
   /// \brief The number of warnings that occurred while parsing the preamble.
   ///
   /// This value will be used to restore the state of the \c DiagnosticsEngine
@@ -301,9 +232,6 @@ class ASTUnit : public ModuleLoader {
   /// (likely to change while trying to use them).
   bool UserFilesAreVolatile : 1;
  
-  /// \brief The language options used when we load an AST file.
-  LangOptions ASTFileLangOpts;
-
   static void ConfigureDiags(IntrusiveRefCntPtr Diags,
                              ASTUnit &AST, bool CaptureDiagnostics);
 
@@ -419,27 +347,14 @@ class ASTUnit : public ModuleLoader {
   
   explicit ASTUnit(bool MainFileIsAST);
 
-  void CleanTemporaryFiles();
   bool Parse(std::shared_ptr PCHContainerOps,
-             std::unique_ptr OverrideMainBuffer);
-
-  struct ComputedPreamble {
-    llvm::MemoryBuffer *Buffer;
-    std::unique_ptr Owner;
-    unsigned Size;
-    bool PreambleEndsAtStartOfLine;
-    ComputedPreamble(llvm::MemoryBuffer *Buffer,
-                     std::unique_ptr Owner, unsigned Size,
-                     bool PreambleEndsAtStartOfLine)
-        : Buffer(Buffer), Owner(std::move(Owner)), Size(Size),
-          PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
-  };
-  ComputedPreamble ComputePreamble(CompilerInvocation &Invocation,
-                                   unsigned MaxLines);
+             std::unique_ptr OverrideMainBuffer,
+             IntrusiveRefCntPtr VFS);
 
   std::unique_ptr getMainBufferWithPrecompiledPreamble(
       std::shared_ptr PCHContainerOps,
-      const CompilerInvocation &PreambleInvocationIn, bool AllowRebuild = true,
+      const CompilerInvocation &PreambleInvocationIn,
+      IntrusiveRefCntPtr VFS, bool AllowRebuild = true,
       unsigned MaxLines = 0);
   void RealizeTopLevelDeclsFromPreamble();
 
@@ -482,7 +397,7 @@ class ASTUnit : public ModuleLoader {
   };
   friend class ConcurrencyCheck;
 
-  ~ASTUnit() override;
+  ~ASTUnit();
 
   bool isMainFileAST() const { return MainFileIsAST; }
 
@@ -512,9 +427,19 @@ class ASTUnit : public ModuleLoader {
   }
 
   const LangOptions &getLangOpts() const {
-    assert(LangOpts && " ASTUnit does not have language options");
+    assert(LangOpts && "ASTUnit does not have language options");
     return *LangOpts;
   }
+
+  const HeaderSearchOptions &getHeaderSearchOpts() const {
+    assert(HSOpts && "ASTUnit does not have header search options");
+    return *HSOpts;
+  }
+  
+  const PreprocessorOptions &getPreprocessorOpts() const {
+    assert(PPOpts && "ASTUnit does not have preprocessor options");
+    return *PPOpts;
+  }
   
   const FileManager &getFileManager() const { return *FileMgr; }
         FileManager &getFileManager()       { return *FileMgr; }
@@ -530,11 +455,6 @@ class ASTUnit : public ModuleLoader {
   ASTMutationListener *getASTMutationListener();
   ASTDeserializationListener *getDeserializationListener();
 
-  /// \brief Add a temporary file that the ASTUnit depends on.
-  ///
-  /// This file will be erased when the ASTUnit is destroyed.
-  void addTemporaryFile(StringRef TempFile);
-
   bool getOnlyLocalDecls() const { return OnlyLocalDecls; }
 
   bool getOwnsRemappedFileBuffers() const { return OwnsRemappedFileBuffers; }
@@ -585,12 +505,6 @@ class ASTUnit : public ModuleLoader {
   void findFileRegionDecls(FileID File, unsigned Offset, unsigned Length,
                            SmallVectorImpl &Decls);
 
-  /// \brief Add a new top-level declaration, identified by its ID in
-  /// the precompiled preamble.
-  void addTopLevelDeclFromPreamble(serialization::DeclID D) {
-    TopLevelDeclsInPreamble.push_back(D);
-  }
-
   /// \brief Retrieve a reference to the current top-level name hash value.
   ///
   /// Note: This is used internally by the top-level tracking action
@@ -701,6 +615,9 @@ class ASTUnit : public ModuleLoader {
   /// \brief Determine what kind of translation unit this AST represents.
   TranslationUnitKind getTranslationUnitKind() const { return TUKind; }
 
+  /// \brief Determine the input kind this AST unit represents.
+  InputKind getInputKind() const;
+
   /// \brief A mapping from a file name to the memory buffer that stores the
   /// remapped contents of that file.
   typedef std::pair RemappedFile;
@@ -711,6 +628,15 @@ class ASTUnit : public ModuleLoader {
          IntrusiveRefCntPtr Diags, bool CaptureDiagnostics,
          bool UserFilesAreVolatile);
 
+  enum WhatToLoad {
+    /// Load options and the preprocessor state.
+    LoadPreprocessorOnly,
+    /// Load the AST, but do not restore Sema state.
+    LoadASTOnly,
+    /// Load everything, including Sema.
+    LoadEverything
+  };
+
   /// \brief Create a ASTUnit from an AST file.
   ///
   /// \param Filename - The AST file to load.
@@ -723,7 +649,7 @@ class ASTUnit : public ModuleLoader {
   /// \returns - The initialized ASTUnit or null if the AST failed to load.
   static std::unique_ptr LoadFromASTFile(
       const std::string &Filename, const PCHContainerReader &PCHContainerRdr,
-      IntrusiveRefCntPtr Diags,
+      WhatToLoad ToLoad, IntrusiveRefCntPtr Diags,
       const FileSystemOptions &FileSystemOpts, bool UseDebugInfo = false,
       bool OnlyLocalDecls = false, ArrayRef RemappedFiles = None,
       bool CaptureDiagnostics = false, bool AllowPCHWithCompilerErrors = false,
@@ -737,11 +663,17 @@ class ASTUnit : public ModuleLoader {
   /// of this translation unit should be precompiled, to improve the performance
   /// of reparsing. Set to zero to disable preambles.
   ///
+  /// \param VFS - A vfs::FileSystem to be used for all file accesses. Note that
+  /// preamble is saved to a temporary directory on a RealFileSystem, so in order
+  /// for it to be loaded correctly, VFS should have access to it(i.e., be an
+  /// overlay over RealFileSystem).
+  ///
   /// \returns \c true if a catastrophic failure occurred (which means that the
   /// \c ASTUnit itself is invalid), or \c false otherwise.
   bool LoadFromCompilerInvocation(
       std::shared_ptr PCHContainerOps,
-      unsigned PrecompilePreambleAfterNParses);
+      unsigned PrecompilePreambleAfterNParses,
+      IntrusiveRefCntPtr VFS);
 
 public:
   
@@ -832,6 +764,11 @@ class ASTUnit : public ModuleLoader {
   /// (e.g. because the PCH could not be loaded), this accepts the ASTUnit
   /// mainly to allow the caller to see the diagnostics.
   ///
+  /// \param VFS - A vfs::FileSystem to be used for all file accesses. Note that
+  /// preamble is saved to a temporary directory on a RealFileSystem, so in order
+  /// for it to be loaded correctly, VFS should have access to it(i.e., be an
+  /// overlay over RealFileSystem). RealFileSystem will be used if \p VFS is nullptr.
+  ///
   // FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we
   // shouldn't need to specify them at construction time.
   static ASTUnit *LoadFromCommandLine(
@@ -846,17 +783,31 @@ class ASTUnit : public ModuleLoader {
       bool CacheCodeCompletionResults = false,
       bool IncludeBriefCommentsInCodeCompletion = false,
       bool AllowPCHWithCompilerErrors = false, bool SkipFunctionBodies = false,
+      bool SingleFileParse = false,
       bool UserFilesAreVolatile = false, bool ForSerialization = false,
       llvm::Optional ModuleFormat = llvm::None,
-      std::unique_ptr *ErrAST = nullptr);
+      std::unique_ptr *ErrAST = nullptr,
+      IntrusiveRefCntPtr VFS = nullptr);
 
   /// \brief Reparse the source files using the same command-line options that
   /// were originally used to produce this translation unit.
   ///
+  /// \param VFS - A vfs::FileSystem to be used for all file accesses. Note that
+  /// preamble is saved to a temporary directory on a RealFileSystem, so in order
+  /// for it to be loaded correctly, VFS should give an access to this(i.e. be an
+  /// overlay over RealFileSystem). FileMgr->getVirtualFileSystem() will be used if
+  /// \p VFS is nullptr.
+  ///
   /// \returns True if a failure occurred that causes the ASTUnit not to
   /// contain any translation-unit information, false otherwise.
   bool Reparse(std::shared_ptr PCHContainerOps,
-               ArrayRef RemappedFiles = None);
+               ArrayRef RemappedFiles = None,
+               IntrusiveRefCntPtr VFS = nullptr);
+
+  /// \brief Free data that will be re-generated on the next parse.
+  ///
+  /// Preamble-related data is not affected.
+  void ResetForParse();
 
   /// \brief Perform code completion at the given file, line, and
   /// column within this translation unit.
@@ -898,21 +849,6 @@ class ASTUnit : public ModuleLoader {
   ///
   /// \returns True if an error occurred, false otherwise.
   bool serialize(raw_ostream &OS);
-
-  ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path,
-                              Module::NameVisibilityKind Visibility,
-                              bool IsInclusionDirective) override {
-    // ASTUnit doesn't know how to load modules (not that this matters).
-    return ModuleLoadResult();
-  }
-
-  void makeModuleVisible(Module *Mod, Module::NameVisibilityKind Visibility,
-                         SourceLocation ImportLoc) override {}
-
-  GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
-    { return nullptr; }
-  bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
-    { return 0; }
 };
 
 } // namespace clang
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.def b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.def
index 251441d38ff82..4002415adc459 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.def
@@ -29,7 +29,8 @@ CODEGENOPT(Name, Bits, Default)
 #endif
 
 CODEGENOPT(DisableIntegratedAS, 1, 0) ///< -no-integrated-as
-CODEGENOPT(CompressDebugSections, 1, 0) ///< -Wa,-compress-debug-sections
+ENUM_CODEGENOPT(CompressDebugSections, llvm::DebugCompressionType, 2,
+                llvm::DebugCompressionType::None)
 CODEGENOPT(RelaxELFRelocations, 1, 0) ///< -Wa,--mrelax-relocations
 CODEGENOPT(AsmVerbose        , 1, 0) ///< -dA, -fverbose-asm.
 CODEGENOPT(PreserveAsmComments, 1, 1) ///< -dA, -fno-preserve-as-comments.
@@ -53,8 +54,11 @@ CODEGENOPT(DisableLLVMPasses , 1, 0) ///< Don't run any LLVM IR passes to get
                                      ///< the pristine IR generated by the
                                      ///< frontend.
 CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers
+CODEGENOPT(DisableO0ImplyOptNone , 1, 0) ///< Don't annonate function with optnone at O0
 CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental
                                              ///< pass manager.
+CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
+                                   ///< pass manager.
 CODEGENOPT(DisableRedZone    , 1, 0) ///< Set when -mno-red-zone is enabled.
 CODEGENOPT(DisableTailCalls  , 1, 0) ///< Do not emit tail calls.
 CODEGENOPT(EmitDeclMetadata  , 1, 0) ///< Emit special metadata indicating what
@@ -116,6 +120,10 @@ CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
 ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
 CODEGENOPT(OmitLeafFramePointer , 1, 0) ///< Set when -momit-leaf-frame-pointer is
                                         ///< enabled.
+
+/// A version of Clang that we should attempt to be ABI-compatible with.
+ENUM_CODEGENOPT(ClangABICompat, ClangABI, 4, ClangABI::Latest)
+
 VALUE_CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
 VALUE_CODEGENOPT(OptimizeSize, 2, 0) ///< If -Os (==1) or -Oz (==2) is specified.
 
@@ -162,6 +170,7 @@ CODEGENOPT(SanitizeCoverageTracePC, 1, 0) ///< Enable PC tracing
                                           ///< in sanitizer coverage.
 CODEGENOPT(SanitizeCoverageTracePCGuard, 1, 0) ///< Enable PC tracing with guard
                                                ///< in sanitizer coverage.
+CODEGENOPT(SanitizeCoverageInline8bitCounters, 1, 0) ///< Use inline 8bit counters.
 CODEGENOPT(SanitizeCoverageNoPrune, 1, 0) ///< Disable coverage pruning.
 CODEGENOPT(SanitizeStats     , 1, 0) ///< Collect statistics for sanitizers.
 CODEGENOPT(SimplifyLibCalls  , 1, 1) ///< Set when -fbuiltin is enabled.
@@ -174,7 +183,6 @@ CODEGENOPT(RerollLoops       , 1, 0) ///< Control whether loops are rerolled.
 CODEGENOPT(NoUseJumpTables   , 1, 0) ///< Set when -fno-jump-tables is enabled.
 CODEGENOPT(UnsafeFPMath      , 1, 0) ///< Allow unsafe floating point optzns.
 CODEGENOPT(UnwindTables      , 1, 0) ///< Emit unwind tables.
-CODEGENOPT(VectorizeBB       , 1, 0) ///< Run basic block vectorizer.
 CODEGENOPT(VectorizeLoop     , 1, 0) ///< Run loop vectorizer.
 CODEGENOPT(VectorizeSLP      , 1, 0) ///< Run SLP vectorizer.
 
@@ -257,6 +265,10 @@ VALUE_CODEGENOPT(EmitCheckPathComponentsToStrip, 32, 0)
 /// Whether to report the hotness of the code region for optimization remarks.
 CODEGENOPT(DiagnosticsWithHotness, 1, 0)
 
+/// The minimum hotness value a diagnostic needs in order to be included in
+/// optimization diagnostics.
+VALUE_CODEGENOPT(DiagnosticsHotnessThreshold, 32, 0)
+
 /// Whether copy relocations support is available when building as PIE.
 CODEGENOPT(PIECopyRelocations, 1, 0)
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.h
index 22d5d3d16ee29..71730a21dbe24 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CodeGenOptions.h
@@ -69,6 +69,23 @@ class CodeGenOptions : public CodeGenOptionsBase {
     LocalExecTLSModel
   };
 
+  /// Clang versions with different platform ABI conformance.
+  enum class ClangABI {
+    /// Attempt to be ABI-compatible with code generated by Clang 3.8.x
+    /// (SVN r257626). This causes <1 x long long> to be passed in an
+    /// integer register instead of an SSE register on x64_64.
+    Ver3_8,
+
+    /// Attempt to be ABI-compatible with code generated by Clang 4.0.x
+    /// (SVN r291814). This causes move operations to be ignored when
+    /// determining whether a class type can be passed or returned directly.
+    Ver4,
+
+    /// Conform to the underlying platform's C and C++ ABIs as closely
+    /// as we can.
+    Latest
+  };
+
   enum StructReturnConventionKind {
     SRCK_Default,  // No special option was passed.
     SRCK_OnStack,  // Small structs on the stack (-fpcc-struct-return).
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInstance.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInstance.h
index 0dba5a18c95ac..5b5c75298a313 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInstance.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInstance.h
@@ -136,6 +136,13 @@ class CompilerInstance : public ModuleLoader {
   /// along with the module map
   llvm::DenseMap KnownModules;
 
+  /// \brief The set of top-level modules that has already been built on the
+  /// fly as part of this overall compilation action.
+  std::map BuiltModules;
+
+  /// Should we delete the BuiltModules when we're done?
+  bool DeleteBuiltModules = true;
+
   /// \brief The location of the module-import keyword for the last module
   /// import. 
   SourceLocation LastModuleImportLoc;
@@ -633,9 +640,7 @@ class CompilerInstance : public ModuleLoader {
                     const CodeGenOptions *CodeGenOpts = nullptr);
 
   /// Create the file manager and replace any existing one with it.
-  ///
-  /// \return The new file manager on success, or null on failure.
-  FileManager *createFileManager();
+  void createFileManager();
 
   /// Create the source manager and replace any existing one with it.
   void createSourceManager(FileManager &FileMgr);
@@ -775,6 +780,9 @@ class CompilerInstance : public ModuleLoader {
                               Module::NameVisibilityKind Visibility,
                               bool IsInclusionDirective) override;
 
+  void loadModuleFromSource(SourceLocation ImportLoc, StringRef ModuleName,
+                            StringRef Source) override;
+
   void makeModuleVisible(Module *Mod, Module::NameVisibilityKind Visibility,
                          SourceLocation ImportLoc) override;
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInvocation.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInvocation.h
index cef7f73ecaa0f..8c4c932190bc3 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInvocation.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/CompilerInvocation.h
@@ -225,6 +225,11 @@ IntrusiveRefCntPtr
 createVFSFromCompilerInvocation(const CompilerInvocation &CI,
                                 DiagnosticsEngine &Diags);
 
+IntrusiveRefCntPtr
+createVFSFromCompilerInvocation(const CompilerInvocation &CI,
+                                DiagnosticsEngine &Diags,
+                                IntrusiveRefCntPtr BaseFS);
+
 } // end namespace clang
 
 #endif
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/DiagnosticRenderer.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/DiagnosticRenderer.h
index 2588feb2b87dd..e453d7db624c2 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/DiagnosticRenderer.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/DiagnosticRenderer.h
@@ -70,33 +70,27 @@ class DiagnosticRenderer {
                      DiagnosticOptions *DiagOpts);
   
   virtual ~DiagnosticRenderer();
-  
-  virtual void emitDiagnosticMessage(SourceLocation Loc, PresumedLoc PLoc,
+
+  virtual void emitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc,
                                      DiagnosticsEngine::Level Level,
                                      StringRef Message,
                                      ArrayRef Ranges,
-                                     const SourceManager *SM,
                                      DiagOrStoredDiag Info) = 0;
-  
-  virtual void emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
+
+  virtual void emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
                                  DiagnosticsEngine::Level Level,
-                                 ArrayRef Ranges,
-                                 const SourceManager &SM) = 0;
+                                 ArrayRef Ranges) = 0;
 
-  virtual void emitCodeContext(SourceLocation Loc,
+  virtual void emitCodeContext(FullSourceLoc Loc,
                                DiagnosticsEngine::Level Level,
-                               SmallVectorImpl& Ranges,
-                               ArrayRef Hints,
-                               const SourceManager &SM) = 0;
-  
-  virtual void emitIncludeLocation(SourceLocation Loc, PresumedLoc PLoc,
-                                   const SourceManager &SM) = 0;
-  virtual void emitImportLocation(SourceLocation Loc, PresumedLoc PLoc,
-                                  StringRef ModuleName,
-                                  const SourceManager &SM) = 0;
-  virtual void emitBuildingModuleLocation(SourceLocation Loc, PresumedLoc PLoc,
-                                          StringRef ModuleName,
-                                          const SourceManager &SM) = 0;
+                               SmallVectorImpl &Ranges,
+                               ArrayRef Hints) = 0;
+
+  virtual void emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) = 0;
+  virtual void emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                                  StringRef ModuleName) = 0;
+  virtual void emitBuildingModuleLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                                          StringRef ModuleName) = 0;
 
   virtual void beginDiagnostic(DiagOrStoredDiag D,
                                DiagnosticsEngine::Level Level) {}
@@ -106,25 +100,21 @@ class DiagnosticRenderer {
   
 private:
   void emitBasicNote(StringRef Message);
-  void emitIncludeStack(SourceLocation Loc, PresumedLoc PLoc,
-                        DiagnosticsEngine::Level Level, const SourceManager &SM);
-  void emitIncludeStackRecursively(SourceLocation Loc, const SourceManager &SM);
-  void emitImportStack(SourceLocation Loc, const SourceManager &SM);
-  void emitImportStackRecursively(SourceLocation Loc, StringRef ModuleName,
-                                  const SourceManager &SM);
+  void emitIncludeStack(FullSourceLoc Loc, PresumedLoc PLoc,
+                        DiagnosticsEngine::Level Level);
+  void emitIncludeStackRecursively(FullSourceLoc Loc);
+  void emitImportStack(FullSourceLoc Loc);
+  void emitImportStackRecursively(FullSourceLoc Loc, StringRef ModuleName);
   void emitModuleBuildStack(const SourceManager &SM);
-  void emitCaret(SourceLocation Loc, DiagnosticsEngine::Level Level,
-                 ArrayRef Ranges, ArrayRef Hints,
-                 const SourceManager &SM);
-  void emitSingleMacroExpansion(SourceLocation Loc,
+  void emitCaret(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+                 ArrayRef Ranges, ArrayRef Hints);
+  void emitSingleMacroExpansion(FullSourceLoc Loc,
                                 DiagnosticsEngine::Level Level,
-                                ArrayRef Ranges,
-                                const SourceManager &SM);
-  void emitMacroExpansions(SourceLocation Loc,
-                           DiagnosticsEngine::Level Level,
+                                ArrayRef Ranges);
+  void emitMacroExpansions(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
                            ArrayRef Ranges,
-                           ArrayRef Hints,
-                           const SourceManager &SM);
+                           ArrayRef Hints);
+
 public:
   /// \brief Emit a diagnostic.
   ///
@@ -138,12 +128,9 @@ class DiagnosticRenderer {
   /// \param Message The diagnostic message to emit.
   /// \param Ranges The underlined ranges for this code snippet.
   /// \param FixItHints The FixIt hints active for this diagnostic.
-  /// \param SM The SourceManager; will be null if the diagnostic came from the
-  ///        frontend, thus \p Loc will be invalid.
-  void emitDiagnostic(SourceLocation Loc, DiagnosticsEngine::Level Level,
+  void emitDiagnostic(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
                       StringRef Message, ArrayRef Ranges,
                       ArrayRef FixItHints,
-                      const SourceManager *SM,
                       DiagOrStoredDiag D = (Diagnostic *)nullptr);
 
   void emitStoredDiagnostic(StoredDiagnostic &Diag);
@@ -159,19 +146,15 @@ class DiagnosticNoteRenderer : public DiagnosticRenderer {
 
   ~DiagnosticNoteRenderer() override;
 
-  void emitIncludeLocation(SourceLocation Loc, PresumedLoc PLoc,
-                           const SourceManager &SM) override;
+  void emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) override;
 
-  void emitImportLocation(SourceLocation Loc, PresumedLoc PLoc,
-                          StringRef ModuleName,
-                          const SourceManager &SM) override;
+  void emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                          StringRef ModuleName) override;
 
-  void emitBuildingModuleLocation(SourceLocation Loc, PresumedLoc PLoc,
-                                  StringRef ModuleName,
-                                  const SourceManager &SM) override;
+  void emitBuildingModuleLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                                  StringRef ModuleName) override;
 
-  virtual void emitNote(SourceLocation Loc, StringRef Message,
-                        const SourceManager *SM) = 0;
+  virtual void emitNote(FullSourceLoc Loc, StringRef Message) = 0;
 };
 } // end clang namespace
 #endif
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendAction.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendAction.h
index 8d690a448f85c..7ae6173512a68 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendAction.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendAction.h
@@ -76,8 +76,7 @@ class FrontendAction {
   ///
   /// \return True on success; on failure ExecutionAction() and
   /// EndSourceFileAction() will not be called.
-  virtual bool BeginSourceFileAction(CompilerInstance &CI,
-                                     StringRef Filename) {
+  virtual bool BeginSourceFileAction(CompilerInstance &CI) {
     return true;
   }
 
@@ -176,10 +175,10 @@ class FrontendAction {
   virtual TranslationUnitKind getTranslationUnitKind() { return TU_Complete; }
 
   /// \brief Does this action support use with PCH?
-  virtual bool hasPCHSupport() const { return !usesPreprocessorOnly(); }
+  virtual bool hasPCHSupport() const { return true; }
 
   /// \brief Does this action support use with AST files?
-  virtual bool hasASTFileSupport() const { return !usesPreprocessorOnly(); }
+  virtual bool hasASTFileSupport() const { return true; }
 
   /// \brief Does this action support use with IR files?
   virtual bool hasIRSupport() const { return false; }
@@ -291,7 +290,7 @@ class WrapperFrontendAction : public FrontendAction {
   std::unique_ptr CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) override;
   bool BeginInvocation(CompilerInstance &CI) override;
-  bool BeginSourceFileAction(CompilerInstance &CI, StringRef Filename) override;
+  bool BeginSourceFileAction(CompilerInstance &CI) override;
   void ExecuteAction() override;
   void EndSourceFileAction() override;
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendActions.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendActions.h
index cb44985149553..c45aeaa208c8c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendActions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendActions.h
@@ -91,7 +91,7 @@ class GeneratePCHAction : public ASTFrontendAction {
   ComputeASTConsumerArguments(CompilerInstance &CI, StringRef InFile,
                               std::string &Sysroot, std::string &OutputFile);
 
-  bool BeginSourceFileAction(CompilerInstance &CI, StringRef Filename) override;
+  bool BeginSourceFileAction(CompilerInstance &CI) override;
 };
 
 class GenerateModuleAction : public ASTFrontendAction {
@@ -111,7 +111,7 @@ class GenerateModuleAction : public ASTFrontendAction {
 
 class GenerateModuleFromModuleMapAction : public GenerateModuleAction {
 private:
-  bool BeginSourceFileAction(CompilerInstance &CI, StringRef Filename) override;
+  bool BeginSourceFileAction(CompilerInstance &CI) override;
 
   std::unique_ptr
   CreateOutputFile(CompilerInstance &CI, StringRef InFile) override;
@@ -119,7 +119,7 @@ class GenerateModuleFromModuleMapAction : public GenerateModuleAction {
 
 class GenerateModuleInterfaceAction : public GenerateModuleAction {
 private:
-  bool BeginSourceFileAction(CompilerInstance &CI, StringRef Filename) override;
+  bool BeginSourceFileAction(CompilerInstance &CI) override;
 
   std::unique_ptr
   CreateOutputFile(CompilerInstance &CI, StringRef InFile) override;
@@ -181,8 +181,7 @@ class ASTMergeAction : public FrontendAction {
   std::unique_ptr CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) override;
 
-  bool BeginSourceFileAction(CompilerInstance &CI,
-                             StringRef Filename) override;
+  bool BeginSourceFileAction(CompilerInstance &CI) override;
 
   void ExecuteAction() override;
   void EndSourceFileAction() override;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendOptions.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendOptions.h
index 36c046891b478..e757a7e397e35 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/FrontendOptions.h
@@ -317,8 +317,8 @@ class FrontendOptions {
   /// \brief Auxiliary triple for CUDA compilation.
   std::string AuxTriple;
 
-  /// \brief If non-empty, search the pch input file as it was a header
-  // included by this file.
+  /// \brief If non-empty, search the pch input file as if it was a header
+  /// included by this file.
   std::string FindPchSource;
 
   /// Filename to write statistics to.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandard.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandard.h
index ec32aa8d161f9..6731e08bcae8f 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandard.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandard.h
@@ -26,11 +26,12 @@ enum LangFeatures {
   CPlusPlus11 = (1 << 4),
   CPlusPlus14 = (1 << 5),
   CPlusPlus1z = (1 << 6),
-  Digraphs = (1 << 7),
-  GNUMode = (1 << 8),
-  HexFloat = (1 << 9),
-  ImplicitInt = (1 << 10),
-  OpenCL = (1 << 11)
+  CPlusPlus2a = (1 << 7),
+  Digraphs = (1 << 8),
+  GNUMode = (1 << 9),
+  HexFloat = (1 << 10),
+  ImplicitInt = (1 << 11),
+  OpenCL = (1 << 12)
 };
 
 }
@@ -81,6 +82,10 @@ struct LangStandard {
   /// isCPlusPlus1z - Language is a C++17 variant (or later).
   bool isCPlusPlus1z() const { return Flags & frontend::CPlusPlus1z; }
 
+  /// isCPlusPlus2a - Language is a post-C++17 variant (or later).
+  bool isCPlusPlus2a() const { return Flags & frontend::CPlusPlus2a; }
+
+
   /// hasDigraphs - Language supports digraphs.
   bool hasDigraphs() const { return Flags & frontend::Digraphs; }
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandards.def b/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandards.def
index 1d214fd2a2bec..a019d63922140 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandards.def
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/LangStandards.def
@@ -109,15 +109,27 @@ LANGSTANDARD(gnucxx14, "gnu++14",
              GNUMode)
 LANGSTANDARD_ALIAS_DEPR(gnucxx14, "gnu++1y")
 
-LANGSTANDARD(cxx1z, "c++1z",
-             CXX, "Working draft for ISO C++ 2017",
+LANGSTANDARD(cxx17, "c++17",
+             CXX, "ISO C++ 2017 with amendments",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
              Digraphs | HexFloat)
+LANGSTANDARD_ALIAS_DEPR(cxx17, "c++1z")
 
-LANGSTANDARD(gnucxx1z, "gnu++1z",
-             CXX, "Working draft for ISO C++ 2017 with GNU extensions",
+LANGSTANDARD(gnucxx17, "gnu++17",
+             CXX, "ISO C++ 2017 with amendments and GNU extensions",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
              Digraphs | HexFloat | GNUMode)
+LANGSTANDARD_ALIAS_DEPR(gnucxx17, "gnu++1z")
+
+LANGSTANDARD(cxx2a, "c++2a",
+             CXX, "Working draft for ISO C++ 2020",
+             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
+             CPlusPlus2a | Digraphs | HexFloat)
+
+LANGSTANDARD(gnucxx2a, "gnu++2a",
+             CXX, "Working draft for ISO C++ 2020 with GNU extensions",
+             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
+             CPlusPlus2a | Digraphs | HexFloat | GNUMode)
 
 // OpenCL
 LANGSTANDARD(opencl10, "cl1.0",
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/PrecompiledPreamble.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/PrecompiledPreamble.h
new file mode 100644
index 0000000000000..8307392e7febd
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/PrecompiledPreamble.h
@@ -0,0 +1,248 @@
+//===--- PrecompiledPreamble.h - Build precompiled preambles ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Helper class to build precompiled preamble.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FRONTEND_PRECOMPILED_PREAMBLE_H
+#define LLVM_CLANG_FRONTEND_PRECOMPILED_PREAMBLE_H
+
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/Support/MD5.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+class MemoryBuffer;
+}
+
+namespace clang {
+namespace vfs {
+class FileSystem;
+}
+
+class CompilerInstance;
+class CompilerInvocation;
+class DeclGroupRef;
+class PCHContainerOperations;
+
+/// A size of the preamble and a flag required by
+/// PreprocessorOptions::PrecompiledPreambleBytes.
+struct PreambleBounds {
+  PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
+      : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+
+  /// \brief Size of the preamble in bytes.
+  unsigned Size;
+  /// \brief Whether the preamble ends at the start of a new line.
+  ///
+  /// Used to inform the lexer as to whether it's starting at the beginning of
+  /// a line after skipping the preamble.
+  bool PreambleEndsAtStartOfLine;
+};
+
+/// \brief Runs lexer to compute suggested preamble bounds.
+PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
+                                     llvm::MemoryBuffer *Buffer,
+                                     unsigned MaxLines);
+
+class PreambleCallbacks;
+
+/// A class holding a PCH and all information to check whether it is valid to
+/// reuse the PCH for the subsequent runs. Use BuildPreamble to create PCH and
+/// CanReusePreamble + AddImplicitPreamble to make use of it.
+class PrecompiledPreamble {
+  class TempPCHFile;
+  struct PreambleFileHash;
+
+public:
+  /// \brief Try to build PrecompiledPreamble for \p Invocation. See
+  /// BuildPreambleError for possible error codes.
+  ///
+  /// \param Invocation Original CompilerInvocation with options to compile the
+  /// file.
+  ///
+  /// \param MainFileBuffer Buffer with the contents of the main file.
+  ///
+  /// \param Bounds Bounds of the preamble, result of calling
+  /// ComputePreambleBounds.
+  ///
+  /// \param Diagnostics Diagnostics engine to be used while building the
+  /// preamble.
+  ///
+  /// \param VFS An instance of vfs::FileSystem to be used for file
+  /// accesses.
+  ///
+  /// \param PCHContainerOps An instance of PCHContainerOperations.
+  ///
+  /// \param Callbacks A set of callbacks to be executed when building
+  /// the preamble.
+  static llvm::ErrorOr
+  Build(const CompilerInvocation &Invocation,
+        const llvm::MemoryBuffer *MainFileBuffer, PreambleBounds Bounds,
+        DiagnosticsEngine &Diagnostics, IntrusiveRefCntPtr VFS,
+        std::shared_ptr PCHContainerOps,
+        PreambleCallbacks &Callbacks);
+
+  PrecompiledPreamble(PrecompiledPreamble &&) = default;
+  PrecompiledPreamble &operator=(PrecompiledPreamble &&) = default;
+
+  /// PreambleBounds used to build the preamble
+  PreambleBounds getBounds() const;
+
+  /// Check whether PrecompiledPreamble can be reused for the new contents(\p
+  /// MainFileBuffer) of the main file.
+  bool CanReuse(const CompilerInvocation &Invocation,
+                const llvm::MemoryBuffer *MainFileBuffer, PreambleBounds Bounds,
+                vfs::FileSystem *VFS) const;
+
+  /// Changes options inside \p CI to use PCH from this preamble. Also remaps
+  /// main file to \p MainFileBuffer.
+  void AddImplicitPreamble(CompilerInvocation &CI,
+                           llvm::MemoryBuffer *MainFileBuffer) const;
+
+private:
+  PrecompiledPreamble(TempPCHFile PCHFile, std::vector PreambleBytes,
+                      bool PreambleEndsAtStartOfLine,
+                      llvm::StringMap FilesInPreamble);
+
+  /// A temp file that would be deleted on destructor call. If destructor is not
+  /// called for any reason, the file will be deleted at static objects'
+  /// destruction.
+  /// An assertion will fire if two TempPCHFiles are created with the same name,
+  /// so it's not intended to be used outside preamble-handling.
+  class TempPCHFile {
+  public:
+    // A main method used to construct TempPCHFile.
+    static llvm::ErrorOr CreateNewPreamblePCHFile();
+
+    /// Call llvm::sys::fs::createTemporaryFile to create a new temporary file.
+    static llvm::ErrorOr createInSystemTempDir(const Twine &Prefix,
+                                                            StringRef Suffix);
+    /// Create a new instance of TemporaryFile for file at \p Path. Use with
+    /// extreme caution, there's an assertion checking that there's only a
+    /// single instance of TempPCHFile alive for each path.
+    static llvm::ErrorOr createFromCustomPath(const Twine &Path);
+
+  private:
+    TempPCHFile(std::string FilePath);
+
+  public:
+    TempPCHFile(TempPCHFile &&Other);
+    TempPCHFile &operator=(TempPCHFile &&Other);
+
+    TempPCHFile(const TempPCHFile &) = delete;
+    ~TempPCHFile();
+
+    /// A path where temporary file is stored.
+    llvm::StringRef getFilePath() const;
+
+  private:
+    void RemoveFileIfPresent();
+
+  private:
+    llvm::Optional FilePath;
+  };
+
+  /// Data used to determine if a file used in the preamble has been changed.
+  struct PreambleFileHash {
+    /// All files have size set.
+    off_t Size = 0;
+
+    /// Modification time is set for files that are on disk.  For memory
+    /// buffers it is zero.
+    time_t ModTime = 0;
+
+    /// Memory buffers have MD5 instead of modification time.  We don't
+    /// compute MD5 for on-disk files because we hope that modification time is
+    /// enough to tell if the file was changed.
+    llvm::MD5::MD5Result MD5 = {};
+
+    static PreambleFileHash createForFile(off_t Size, time_t ModTime);
+    static PreambleFileHash
+    createForMemoryBuffer(const llvm::MemoryBuffer *Buffer);
+
+    friend bool operator==(const PreambleFileHash &LHS,
+                           const PreambleFileHash &RHS) {
+      return LHS.Size == RHS.Size && LHS.ModTime == RHS.ModTime &&
+             LHS.MD5 == RHS.MD5;
+    }
+    friend bool operator!=(const PreambleFileHash &LHS,
+                           const PreambleFileHash &RHS) {
+      return !(LHS == RHS);
+    }
+  };
+
+  /// Manages the lifetime of temporary file that stores a PCH.
+  TempPCHFile PCHFile;
+  /// Keeps track of the files that were used when computing the
+  /// preamble, with both their buffer size and their modification time.
+  ///
+  /// If any of the files have changed from one compile to the next,
+  /// the preamble must be thrown away.
+  llvm::StringMap FilesInPreamble;
+  /// The contents of the file that was used to precompile the preamble. Only
+  /// contains first PreambleBounds::Size bytes. Used to compare if the relevant
+  /// part of the file has not changed, so that preamble can be reused.
+  std::vector PreambleBytes;
+  /// See PreambleBounds::PreambleEndsAtStartOfLine
+  bool PreambleEndsAtStartOfLine;
+};
+
+/// A set of callbacks to gather useful information while building a preamble.
+class PreambleCallbacks {
+public:
+  virtual ~PreambleCallbacks() = default;
+
+  /// Called after FrontendAction::Execute(), but before
+  /// FrontendAction::EndSourceFile(). Can be used to transfer ownership of
+  /// various CompilerInstance fields before they are destroyed.
+  virtual void AfterExecute(CompilerInstance &CI);
+  /// Called after PCH has been emitted. \p Writer may be used to retrieve
+  /// information about AST, serialized in PCH.
+  virtual void AfterPCHEmitted(ASTWriter &Writer);
+  /// Called for each TopLevelDecl.
+  /// NOTE: To allow more flexibility a custom ASTConsumer could probably be
+  /// used instead, but having only this method allows a simpler API.
+  virtual void HandleTopLevelDecl(DeclGroupRef DG);
+  /// Called for each macro defined in the Preamble.
+  /// NOTE: To allow more flexibility a custom PPCallbacks could probably be
+  /// used instead, but having only this method allows a simpler API.
+  virtual void HandleMacroDefined(const Token &MacroNameTok,
+                                  const MacroDirective *MD);
+};
+
+enum class BuildPreambleError {
+  PreambleIsEmpty = 1,
+  CouldntCreateTempFile,
+  CouldntCreateTargetInfo,
+  CouldntCreateVFSOverlay,
+  BeginSourceFileFailed,
+  CouldntEmitPCH
+};
+
+class BuildPreambleErrorCategory final : public std::error_category {
+public:
+  const char *name() const noexcept override;
+  std::string message(int condition) const override;
+};
+
+std::error_code make_error_code(BuildPreambleError Error);
+} // namespace clang
+
+namespace std {
+template <>
+struct is_error_code_enum : std::true_type {};
+} // namespace std
+
+#endif
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index 3261b66538094..94afcd06a3989 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -24,6 +24,7 @@ class PreprocessorOutputOptions {
   unsigned ShowMacros : 1;         ///< Print macro definitions.
   unsigned ShowIncludeDirectives : 1;  ///< Print includes, imports etc. within preprocessed output.
   unsigned RewriteIncludes : 1;    ///< Preprocess include directives only.
+  unsigned RewriteImports  : 1;    ///< Include contents of transitively-imported modules.
 
 public:
   PreprocessorOutputOptions() {
@@ -35,6 +36,7 @@ class PreprocessorOutputOptions {
     ShowMacros = 0;
     ShowIncludeDirectives = 0;
     RewriteIncludes = 0;
+    RewriteImports = 0;
   }
 };
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/TextDiagnostic.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/TextDiagnostic.h
index 9b108c28bd1c4..1bbfe9fa02e32 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/TextDiagnostic.h
@@ -75,44 +75,35 @@ class TextDiagnostic : public DiagnosticRenderer {
                                      unsigned Columns, bool ShowColors);
 
 protected:
-  void emitDiagnosticMessage(SourceLocation Loc,PresumedLoc PLoc,
-                             DiagnosticsEngine::Level Level,
-                             StringRef Message,
+  void emitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc,
+                             DiagnosticsEngine::Level Level, StringRef Message,
                              ArrayRef Ranges,
-                             const SourceManager *SM,
                              DiagOrStoredDiag D) override;
 
-  void emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
+  void emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
                          DiagnosticsEngine::Level Level,
-                         ArrayRef Ranges,
-                         const SourceManager &SM) override;
-
-  void emitCodeContext(SourceLocation Loc,
-                       DiagnosticsEngine::Level Level,
-                       SmallVectorImpl& Ranges,
-                       ArrayRef Hints,
-                       const SourceManager &SM) override {
-    emitSnippetAndCaret(Loc, Level, Ranges, Hints, SM);
+                         ArrayRef Ranges) override;
+
+  void emitCodeContext(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+                       SmallVectorImpl &Ranges,
+                       ArrayRef Hints) override {
+    emitSnippetAndCaret(Loc, Level, Ranges, Hints);
   }
 
-  void emitIncludeLocation(SourceLocation Loc, PresumedLoc PLoc,
-                           const SourceManager &SM) override;
+  void emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) override;
 
-  void emitImportLocation(SourceLocation Loc, PresumedLoc PLoc,
-                          StringRef ModuleName,
-                          const SourceManager &SM) override;
+  void emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                          StringRef ModuleName) override;
 
-  void emitBuildingModuleLocation(SourceLocation Loc, PresumedLoc PLoc,
-                                  StringRef ModuleName,
-                                  const SourceManager &SM) override;
+  void emitBuildingModuleLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                                  StringRef ModuleName) override;
 
 private:
   void emitFilename(StringRef Filename, const SourceManager &SM);
 
-  void emitSnippetAndCaret(SourceLocation Loc, DiagnosticsEngine::Level Level,
-                           SmallVectorImpl& Ranges,
-                           ArrayRef Hints,
-                           const SourceManager &SM);
+  void emitSnippetAndCaret(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+                           SmallVectorImpl &Ranges,
+                           ArrayRef Hints);
 
   void emitSnippet(StringRef SourceLine);
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Frontend/Utils.h b/interpreter/llvm/src/tools/clang/include/clang/Frontend/Utils.h
index 0ee46846c804e..8ccc31982dabd 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Frontend/Utils.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Frontend/Utils.h
@@ -184,10 +184,11 @@ createChainedIncludesSource(CompilerInstance &CI,
 ///
 /// \return A CompilerInvocation, or 0 if none was built for the given
 /// argument vector.
-std::unique_ptr
-createInvocationFromCommandLine(ArrayRef Args,
-                                IntrusiveRefCntPtr Diags =
-                                    IntrusiveRefCntPtr());
+std::unique_ptr createInvocationFromCommandLine(
+    ArrayRef Args,
+    IntrusiveRefCntPtr Diags =
+        IntrusiveRefCntPtr(),
+    IntrusiveRefCntPtr VFS = nullptr);
 
 /// Return the value of the last argument as an integer, or a default. If Diags
 /// is non-null, emits an error if the argument is given, but non-integral.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Index/IndexingAction.h b/interpreter/llvm/src/tools/clang/include/clang/Index/IndexingAction.h
index 8eed33c612275..fb703be4e5f53 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Index/IndexingAction.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Index/IndexingAction.h
@@ -11,11 +11,14 @@
 #define LLVM_CLANG_INDEX_INDEXINGACTION_H
 
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
 #include 
 
 namespace clang {
+  class ASTContext;
   class ASTReader;
   class ASTUnit;
+  class Decl;
   class FrontendAction;
 
 namespace serialization {
@@ -47,8 +50,11 @@ void indexASTUnit(ASTUnit &Unit,
                   std::shared_ptr DataConsumer,
                   IndexingOptions Opts);
 
-void indexModuleFile(serialization::ModuleFile &Mod,
-                     ASTReader &Reader,
+void indexTopLevelDecls(ASTContext &Ctx, ArrayRef Decls,
+                        std::shared_ptr DataConsumer,
+                        IndexingOptions Opts);
+
+void indexModuleFile(serialization::ModuleFile &Mod, ASTReader &Reader,
                      std::shared_ptr DataConsumer,
                      IndexingOptions Opts);
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/HeaderSearch.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/HeaderSearch.h
index 3a233f4d588a3..fdc658086aeb8 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/HeaderSearch.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/HeaderSearch.h
@@ -47,7 +47,7 @@ struct HeaderFileInfo {
   /// whether it is C++ clean or not.  This can be set by the include paths or
   /// by \#pragma gcc system_header.  This is an instance of
   /// SrcMgr::CharacteristicKind.
-  unsigned DirInfo : 2;
+  unsigned DirInfo : 3;
 
   /// \brief Whether this header file info was supplied by an external source,
   /// and has not changed since.
@@ -544,10 +544,13 @@ class HeaderSearch {
   /// \param Offset [inout] An offset within ID to start parsing. On exit,
   ///        filled by the end of the parsed contents (either EOF or the
   ///        location of an end-of-module-map pragma).
-  ///
+  /// \param OriginalModuleMapFile The original path to the module map file,
+  ///        used to resolve paths within the module (this is required when
+  ///        building the module from preprocessed source).
   /// \returns true if an error occurred, false otherwise.
   bool loadModuleMapFile(const FileEntry *File, bool IsSystem,
-                         FileID ID = FileID(), unsigned *Offset = nullptr);
+                         FileID ID = FileID(), unsigned *Offset = nullptr,
+                         StringRef OriginalModuleMapFile = StringRef());
 
   /// \brief Collect the set of all known, top-level modules.
   ///
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroArgs.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroArgs.h
index 7b2a48561ff63..cfe46ceb09797 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroArgs.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroArgs.h
@@ -53,9 +53,12 @@ class MacroArgs {
   /// Preprocessor owns which we use to avoid thrashing malloc/free.
   MacroArgs *ArgCache;
 
-  MacroArgs(unsigned NumToks, bool varargsElided)
-    : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided),
-      ArgCache(nullptr) {}
+  /// MacroArgs - The number of arguments the invoked macro expects.
+  unsigned NumMacroArgs;
+
+  MacroArgs(unsigned NumToks, bool varargsElided, unsigned MacroArgs)
+      : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided),
+        ArgCache(nullptr), NumMacroArgs(MacroArgs) {}
   ~MacroArgs() = default;
 
 public:
@@ -94,10 +97,9 @@ class MacroArgs {
                                       SourceLocation ExpansionLocStart,
                                       SourceLocation ExpansionLocEnd);
 
-  /// getNumArguments - Return the number of arguments passed into this macro
-  /// invocation.
-  unsigned getNumArguments() const { return NumUnexpArgTokens; }
-
+  /// getNumMacroArguments - Return the number of arguments the invoked macro
+  /// expects.
+  unsigned getNumMacroArguments() const { return NumMacroArgs; }
 
   /// isVarargsElidedUse - Return true if this is a C99 style varargs macro
   /// invocation and there was no argument specified for the "..." argument.  If
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroInfo.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroInfo.h
index 7da1e7b41ab8d..d25431b55fdc3 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroInfo.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/MacroInfo.h
@@ -42,14 +42,14 @@ class MacroInfo {
 
   /// \brief The list of arguments for a function-like macro.
   ///
-  /// ArgumentList points to the first of NumArguments pointers.
+  /// ParameterList points to the first of NumParameters pointers.
   ///
   /// This can be empty, for, e.g. "#define X()".  In a C99-style variadic
   /// macro, this includes the \c __VA_ARGS__ identifier on the list.
-  IdentifierInfo **ArgumentList;
+  IdentifierInfo **ParameterList;
 
-  /// \see ArgumentList
-  unsigned NumArguments;
+  /// \see ParameterList
+  unsigned NumParameters;
 
   /// \brief This is the list of tokens that the macro is defined to.
   SmallVector ReplacementTokens;
@@ -153,37 +153,37 @@ class MacroInfo {
   /// \brief Set the value of the IsWarnIfUnused flag.
   void setIsWarnIfUnused(bool val) { IsWarnIfUnused = val; }
 
-  /// \brief Set the specified list of identifiers as the argument list for
+  /// \brief Set the specified list of identifiers as the parameter list for
   /// this macro.
-  void setArgumentList(ArrayRef List,
+  void setParameterList(ArrayRef List,
                        llvm::BumpPtrAllocator &PPAllocator) {
-    assert(ArgumentList == nullptr && NumArguments == 0 &&
-           "Argument list already set!");
+    assert(ParameterList == nullptr && NumParameters == 0 &&
+           "Parameter list already set!");
     if (List.empty())
       return;
 
-    NumArguments = List.size();
-    ArgumentList = PPAllocator.Allocate(List.size());
-    std::copy(List.begin(), List.end(), ArgumentList);
+    NumParameters = List.size();
+    ParameterList = PPAllocator.Allocate(List.size());
+    std::copy(List.begin(), List.end(), ParameterList);
   }
 
-  /// Arguments - The list of arguments for a function-like macro.  This can be
-  /// empty, for, e.g. "#define X()".
-  typedef IdentifierInfo *const *arg_iterator;
-  bool arg_empty() const { return NumArguments == 0; }
-  arg_iterator arg_begin() const { return ArgumentList; }
-  arg_iterator arg_end() const { return ArgumentList + NumArguments; }
-  unsigned getNumArgs() const { return NumArguments; }
-  ArrayRef args() const {
-    return ArrayRef(ArgumentList, NumArguments);
+  /// Parameters - The list of parameters for a function-like macro.  This can 
+  /// be empty, for, e.g. "#define X()".
+  typedef IdentifierInfo *const *param_iterator;
+  bool param_empty() const { return NumParameters == 0; }
+  param_iterator param_begin() const { return ParameterList; }
+  param_iterator param_end() const { return ParameterList + NumParameters; }
+  unsigned getNumParams() const { return NumParameters; }
+  ArrayRef params() const {
+    return ArrayRef(ParameterList, NumParameters);
   }
 
-  /// \brief Return the argument number of the specified identifier,
-  /// or -1 if the identifier is not a formal argument identifier.
-  int getArgumentNum(const IdentifierInfo *Arg) const {
-    for (arg_iterator I = arg_begin(), E = arg_end(); I != E; ++I)
+  /// \brief Return the parameter number of the specified identifier,
+  /// or -1 if the identifier is not a formal parameter identifier.
+  int getParameterNum(const IdentifierInfo *Arg) const {
+    for (param_iterator I = param_begin(), E = param_end(); I != E; ++I)
       if (*I == Arg)
-        return I - arg_begin();
+        return I - param_begin();
     return -1;
   }
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleLoader.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleLoader.h
index 70770d17e9ffa..ee0638b57f872 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleLoader.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleLoader.h
@@ -109,6 +109,16 @@ class ModuleLoader {
                                       Module::NameVisibilityKind Visibility,
                                       bool IsInclusionDirective) = 0;
 
+  /// Attempt to load the given module from the specified source buffer. Does
+  /// not make any submodule visible; for that, use loadModule or
+  /// makeModuleVisible.
+  ///
+  /// \param Loc The location at which the module was loaded.
+  /// \param ModuleName The name of the module to build.
+  /// \param Source The source of the module: a (preprocessed) module map.
+  virtual void loadModuleFromSource(SourceLocation Loc, StringRef ModuleName,
+                                    StringRef Source) = 0;
+
   /// \brief Make the given module visible.
   virtual void makeModuleVisible(Module *Mod,
                                  Module::NameVisibilityKind Visibility,
@@ -136,6 +146,30 @@ class ModuleLoader {
 
   bool HadFatalFailure;
 };
+
+/// A module loader that doesn't know how to load modules.
+class TrivialModuleLoader : public ModuleLoader {
+public:
+  ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path,
+                              Module::NameVisibilityKind Visibility,
+                              bool IsInclusionDirective) override {
+    return ModuleLoadResult();
+  }
+
+  void loadModuleFromSource(SourceLocation ImportLoc, StringRef ModuleName,
+                            StringRef Source) override {}
+
+  void makeModuleVisible(Module *Mod, Module::NameVisibilityKind Visibility,
+                         SourceLocation ImportLoc) override {}
+
+  GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override {
+    return nullptr;
+  }
+  bool lookupMissingImports(StringRef Name,
+                            SourceLocation TriggerLoc) override {
+    return 0;
+  }
+};
   
 }
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleMap.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleMap.h
index 0fd6abe2f7d0f..11506939f9b18 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleMap.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/ModuleMap.h
@@ -26,6 +26,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/ADT/Twine.h"
 #include 
 #include 
@@ -116,6 +117,11 @@ class ModuleMap {
     // Adjust ModuleMap::addHeader.
   };
 
+  /// Convert a header kind to a role. Requires Kind to not be HK_Excluded.
+  static ModuleHeaderRole headerKindToRole(Module::HeaderKind Kind);
+  /// Convert a header role to a kind.
+  static Module::HeaderKind headerRoleToKind(ModuleHeaderRole Role);
+
   /// \brief A header that is known to reside within a given module,
   /// whether it was included or excluded.
   class KnownHeader {
@@ -165,7 +171,13 @@ class ModuleMap {
   /// \brief Mapping from each header to the module that owns the contents of
   /// that header.
   HeadersMap Headers;
-  
+
+  /// Map from file sizes to modules with lazy header directives of that size.
+  mutable llvm::DenseMap> LazyHeadersBySize;
+  /// Map from mtimes to modules with lazy header directives with those mtimes.
+  mutable llvm::DenseMap>
+              LazyHeadersByModTime;
+
   /// \brief Mapping from directories with umbrella headers to the module
   /// that is generated from the umbrella header.
   ///
@@ -257,6 +269,31 @@ class ModuleMap {
   /// resolved.
   Module *resolveModuleId(const ModuleId &Id, Module *Mod, bool Complain) const;
 
+  /// Add an unresolved header to a module.
+  void addUnresolvedHeader(Module *Mod,
+                           Module::UnresolvedHeaderDirective Header);
+
+  /// Look up the given header directive to find an actual header file.
+  ///
+  /// \param M The module in which we're resolving the header directive.
+  /// \param Header The header directive to resolve.
+  /// \param RelativePathName Filled in with the relative path name from the
+  ///        module to the resolved header.
+  /// \return The resolved file, if any.
+  const FileEntry *findHeader(Module *M,
+                              const Module::UnresolvedHeaderDirective &Header,
+                              SmallVectorImpl &RelativePathName);
+
+  /// Resolve the given header directive.
+  void resolveHeader(Module *M,
+                     const Module::UnresolvedHeaderDirective &Header);
+
+  /// Attempt to resolve the specified header directive as naming a builtin
+  /// header.
+  /// \return \c true if a corresponding builtin header was found.
+  bool resolveAsBuiltinHeader(Module *M,
+                              const Module::UnresolvedHeaderDirective &Header);
+
   /// \brief Looks up the modules that \p File corresponds to.
   ///
   /// If \p File represents a builtin header within Clang's builtin include
@@ -351,6 +388,15 @@ class ModuleMap {
   /// the preferred module for the header.
   ArrayRef findAllModulesForHeader(const FileEntry *File) const;
 
+  /// Resolve all lazy header directives for the specified file.
+  ///
+  /// This ensures that the HeaderFileInfo on HeaderSearch is up to date. This
+  /// is effectively internal, but is exposed so HeaderSearch can call it.
+  void resolveHeaderDirectives(const FileEntry *File) const;
+
+  /// Resolve all lazy header directives for the specified module.
+  void resolveHeaderDirectives(Module *Mod) const;
+
   /// \brief Reports errors if a module must not include a specific file.
   ///
   /// \param RequestingModule The module including a file.
@@ -507,16 +553,6 @@ class ModuleMap {
   /// false otherwise.
   bool resolveConflicts(Module *Mod, bool Complain);
 
-  /// \brief Infers the (sub)module based on the given source location and
-  /// source manager.
-  ///
-  /// \param Loc The location within the source that we are querying, along
-  /// with its source manager.
-  ///
-  /// \returns The module that owns this source location, or null if no
-  /// module owns this source location.
-  Module *inferModuleFromLocation(FullSourceLoc Loc);
-  
   /// \brief Sets the umbrella header of the given module to the given
   /// header.
   void setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/PTHLexer.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/PTHLexer.h
index 904be792b2a98..f96af665b1574 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/PTHLexer.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/PTHLexer.h
@@ -36,7 +36,7 @@ class PTHLexer : public PreprocessorLexer {
   const unsigned char* LastHashTokPtr;
 
   /// PPCond - Pointer to a side table in the PTH file that provides a
-  ///  a consise summary of the preproccessor conditional block structure.
+  ///  a concise summary of the preprocessor conditional block structure.
   ///  This is used to perform quick skipping of conditional blocks.
   const unsigned char* PPCond;
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/Preprocessor.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/Preprocessor.h
index 2acc65fef935d..fe1212456cc53 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/Preprocessor.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/Preprocessor.h
@@ -283,6 +283,44 @@ class Preprocessor {
   /// This is used when loading a precompiled preamble.
   std::pair SkipMainFilePreamble;
 
+  class PreambleConditionalStackStore {
+    enum State {
+      Off = 0,
+      Recording = 1,
+      Replaying = 2,
+    };
+
+  public:
+    PreambleConditionalStackStore() : ConditionalStackState(Off) {}
+
+    void startRecording() { ConditionalStackState = Recording; }
+    void startReplaying() { ConditionalStackState = Replaying; }
+    bool isRecording() const { return ConditionalStackState == Recording; }
+    bool isReplaying() const { return ConditionalStackState == Replaying; }
+
+    ArrayRef getStack() const {
+      return ConditionalStack;
+    }
+
+    void doneReplaying() {
+      ConditionalStack.clear();
+      ConditionalStackState = Off;
+    }
+
+    void setStack(ArrayRef s) {
+      if (!isRecording() && !isReplaying())
+        return;
+      ConditionalStack.clear();
+      ConditionalStack.append(s.begin(), s.end());
+    }
+
+    bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
+
+  private:
+    SmallVector ConditionalStack;
+    State ConditionalStackState;
+  } PreambleConditionalStack;
+
   /// \brief The current top of the stack that we're lexing from if
   /// not expanding a macro and we are lexing directly from source code.
   ///
@@ -1854,11 +1892,24 @@ class Preprocessor {
   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
                      bool *ShadowFlag = nullptr);
 
+  /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
+  /// entire line) of the macro's tokens and adds them to MacroInfo, and while
+  /// doing so performs certain validity checks including (but not limited to):
+  ///   - # (stringization) is followed by a macro parameter
+  /// \param MacroNameTok - Token that represents the macro name
+  /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
+  /// 
+  ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
+  ///  returns a nullptr if an invalid sequence of tokens is encountered.
+
+  MacroInfo *ReadOptionalMacroParameterListAndBody(
+      const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
+
   /// The ( starting an argument list of a macro definition has just been read.
-  /// Lex the rest of the arguments and the closing ), updating \p MI with
+  /// Lex the rest of the parameters and the closing ), updating \p MI with
   /// what we learn and saving in \p LastTok the last token read.
   /// Return true if an error occurs parsing the arg list.
-  bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
+  bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
 
   /// We just read a \#if or related directive and decided that the
   /// subsequent tokens are in the \#if'd out portion of the
@@ -1875,11 +1926,20 @@ class Preprocessor {
   /// \brief A fast PTH version of SkipExcludedConditionalBlock.
   void PTHSkipExcludedConditionalBlock();
 
+  /// Information about the result for evaluating an expression for a
+  /// preprocessor directive.
+  struct DirectiveEvalResult {
+    /// Whether the expression was evaluated as true or not.
+    bool Conditional;
+    /// True if the expression contained identifiers that were undefined.
+    bool IncludedUndefinedIds;
+  };
+
   /// \brief Evaluate an integer constant expression that may occur after a
-  /// \#if or \#elif directive and return it as a bool.
+  /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
   ///
   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
-  bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
+  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
 
   /// \brief Install the standard preprocessor pragmas:
   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
@@ -1910,7 +1970,7 @@ class Preprocessor {
 
   /// After reading "MACRO(", this method is invoked to read all of the formal
   /// arguments specified for the macro invocation.  Returns null on error.
-  MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
+  MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
                                        SourceLocation &ExpansionEnd);
 
   /// \brief If an identifier token is read that is to be expanded
@@ -1994,20 +2054,24 @@ class Preprocessor {
   void HandleMicrosoftImportDirective(Token &Tok);
 
 public:
+  /// Check that the given module is available, producing a diagnostic if not.
+  /// \return \c true if the check failed (because the module is not available).
+  ///         \c false if the module appears to be usable.
+  static bool checkModuleIsAvailable(const LangOptions &LangOpts,
+                                     const TargetInfo &TargetInfo,
+                                     DiagnosticsEngine &Diags, Module *M);
+
   // Module inclusion testing.
   /// \brief Find the module that owns the source or header file that
   /// \p Loc points to. If the location is in a file that was included
   /// into a module, or is outside any module, returns nullptr.
   Module *getModuleForLocation(SourceLocation Loc);
 
-  /// \brief Find the module that contains the specified location, either
-  /// directly or indirectly.
-  Module *getModuleContainingLocation(SourceLocation Loc);
-
   /// \brief We want to produce a diagnostic at location IncLoc concerning a
   /// missing module import.
   ///
   /// \param IncLoc The location at which the missing import was detected.
+  /// \param M The desired module.
   /// \param MLoc A location within the desired module at which some desired
   ///        effect occurred (eg, where a desired entity was declared).
   ///
@@ -2015,9 +2079,35 @@ class Preprocessor {
   ///         Null if no such file could be determined or if a #include is not
   ///         appropriate.
   const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
+                                                          Module *M,
                                                           SourceLocation MLoc);
 
+  bool isRecordingPreamble() const {
+    return PreambleConditionalStack.isRecording();
+  }
+
+  bool hasRecordedPreamble() const {
+    return PreambleConditionalStack.hasRecordedPreamble();
+  }
+
+  ArrayRef getPreambleConditionalStack() const {
+      return PreambleConditionalStack.getStack();
+  }
+
+  void setRecordedPreambleConditionalStack(ArrayRef s) {
+    PreambleConditionalStack.setStack(s);
+  }
+
+  void setReplayablePreambleConditionalStack(ArrayRef s) {
+    PreambleConditionalStack.startReplaying();
+    PreambleConditionalStack.setStack(s);
+  }
+
 private:
+  /// \brief After processing predefined file, initialize the conditional stack from
+  /// the preamble.
+  void replayPreambleConditionalStack();
+
   // Macro handling.
   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
   void HandleUndefDirective();
@@ -2042,6 +2132,7 @@ class Preprocessor {
   void HandlePragmaPushMacro(Token &Tok);
   void HandlePragmaPopMacro(Token &Tok);
   void HandlePragmaIncludeAlias(Token &Tok);
+  void HandlePragmaModuleBuild(Token &Tok);
   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
 
   // Return true and store the first token only if any CommentHandler
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorLexer.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorLexer.h
index 6d6cf05a96c45..5c2e4d41454b6 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorLexer.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorLexer.h
@@ -17,6 +17,7 @@
 
 #include "clang/Lex/MultipleIncludeOpt.h"
 #include "clang/Lex/Token.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace clang {
@@ -176,6 +177,11 @@ class PreprocessorLexer {
   conditional_iterator conditional_end() const { 
     return ConditionalStack.end(); 
   }
+
+  void setConditionalLevels(ArrayRef CL) {
+    ConditionalStack.clear();
+    ConditionalStack.append(CL.begin(), CL.end());
+  }
 };
 
 }  // end namespace clang
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorOptions.h b/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorOptions.h
index 58d79f7ff81a6..d91c665cf1dd5 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Lex/PreprocessorOptions.h
@@ -80,7 +80,14 @@ class PreprocessorOptions {
   /// The boolean indicates whether the preamble ends at the start of a new
   /// line.
   std::pair PrecompiledPreambleBytes;
-  
+
+  /// \brief True indicates that a preamble is being generated.
+  ///
+  /// When the lexer is done, one of the things that need to be preserved is the
+  /// conditional #if stack, so the ASTWriter/ASTReader can save/restore it when
+  /// processing the rest of the file.
+  bool GeneratePreamble;
+
   /// The implicit PTH input included at the start of the translation unit, or
   /// empty.
   std::string ImplicitPTHInclude;
@@ -88,6 +95,16 @@ class PreprocessorOptions {
   /// If given, a PTH cache file to use for speeding up header parsing.
   std::string TokenCache;
 
+  /// When enabled, preprocessor is in a mode for parsing a single file only.
+  ///
+  /// Disables #includes of other files and if there are unresolved identifiers
+  /// in preprocessor directive conditions it causes all blocks to be parsed so
+  /// that the client can get the maximum amount of information from the parser.
+  bool SingleFileParseMode = false;
+
+  /// When enabled, the preprocessor will construct editor placeholder tokens.
+  bool LexEditorPlaceholders = true;
+
   /// \brief True if the SourceManager should report the original file name for
   /// contents of files that were remapped to other files. Defaults to true.
   bool RemappedFilesKeepOriginalName;
@@ -144,6 +161,7 @@ class PreprocessorOptions {
                           AllowPCHWithCompilerErrors(false),
                           DumpDeserializedPCHDecls(false),
                           PrecompiledPreambleBytes(0, true),
+                          GeneratePreamble(false),
                           RemappedFilesKeepOriginalName(true),
                           RetainRemappedFileBuffers(false),
                           ObjCXXARCStandardLibrary(ARCXX_nolib) { }
@@ -173,6 +191,8 @@ class PreprocessorOptions {
     ImplicitPCHInclude.clear();
     ImplicitPTHInclude.clear();
     TokenCache.clear();
+    SingleFileParseMode = false;
+    LexEditorPlaceholders = true;
     RetainRemappedFileBuffers = true;
     PrecompiledPreambleBytes.first = 0;
     PrecompiledPreambleBytes.second = 0;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Parse/Parser.h b/interpreter/llvm/src/tools/clang/include/clang/Parse/Parser.h
index 482573a660220..5410c9ca7f932 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Parse/Parser.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Parse/Parser.h
@@ -171,6 +171,7 @@ class Parser : public CodeCompletionHandler {
   std::unique_ptr FPContractHandler;
   std::unique_ptr OpenCLExtensionHandler;
   std::unique_ptr OpenMPHandler;
+  std::unique_ptr PCSectionHandler;
   std::unique_ptr MSCommentHandler;
   std::unique_ptr MSDetectMismatchHandler;
   std::unique_ptr MSPointersToMembers;
@@ -339,8 +340,9 @@ class Parser : public CodeCompletionHandler {
   }
 
   /// ConsumeToken - Consume the current 'peek token' and lex the next one.
-  /// This does not work with special tokens: string literals, code completion
-  /// and balanced tokens must be handled using the specific consume methods.
+  /// This does not work with special tokens: string literals, code completion,
+  /// annotation tokens and balanced tokens must be handled using the specific
+  /// consume methods.
   /// Returns the location of the consumed token.
   SourceLocation ConsumeToken() {
     assert(!isTokenSpecial() &&
@@ -367,6 +369,27 @@ class Parser : public CodeCompletionHandler {
     return true;
   }
 
+  /// ConsumeAnyToken - Dispatch to the right Consume* method based on the
+  /// current token type.  This should only be used in cases where the type of
+  /// the token really isn't known, e.g. in error recovery.
+  SourceLocation ConsumeAnyToken(bool ConsumeCodeCompletionTok = false) {
+    if (isTokenParen())
+      return ConsumeParen();
+    if (isTokenBracket())
+      return ConsumeBracket();
+    if (isTokenBrace())
+      return ConsumeBrace();
+    if (isTokenStringLiteral())
+      return ConsumeStringToken();
+    if (Tok.is(tok::code_completion))
+      return ConsumeCodeCompletionTok ? ConsumeCodeCompletionToken()
+                                      : handleUnexpectedCodeCompletionToken();
+    if (Tok.isAnnotation())
+      return ConsumeAnnotationToken();
+    return ConsumeToken();
+  }
+
+
   SourceLocation getEndOfPreviousToken() {
     return PP.getLocForEndOfToken(PrevTokLocation);
   }
@@ -401,7 +424,7 @@ class Parser : public CodeCompletionHandler {
   /// isTokenSpecial - True if this token requires special consumption methods.
   bool isTokenSpecial() const {
     return isTokenStringLiteral() || isTokenParen() || isTokenBracket() ||
-           isTokenBrace() || Tok.is(tok::code_completion);
+           isTokenBrace() || Tok.is(tok::code_completion) || Tok.isAnnotation();
   }
 
   /// \brief Returns true if the current token is '=' or is a type of '='.
@@ -417,22 +440,12 @@ class Parser : public CodeCompletionHandler {
       PP.EnterToken(Next);
   }
 
-  /// ConsumeAnyToken - Dispatch to the right Consume* method based on the
-  /// current token type.  This should only be used in cases where the type of
-  /// the token really isn't known, e.g. in error recovery.
-  SourceLocation ConsumeAnyToken(bool ConsumeCodeCompletionTok = false) {
-    if (isTokenParen())
-      return ConsumeParen();
-    if (isTokenBracket())
-      return ConsumeBracket();
-    if (isTokenBrace())
-      return ConsumeBrace();
-    if (isTokenStringLiteral())
-      return ConsumeStringToken();
-    if (Tok.is(tok::code_completion))
-      return ConsumeCodeCompletionTok ? ConsumeCodeCompletionToken()
-                                      : handleUnexpectedCodeCompletionToken();
-    return ConsumeToken();
+  SourceLocation ConsumeAnnotationToken() {
+    assert(Tok.isAnnotation() && "wrong consume method");
+    SourceLocation Loc = Tok.getLocation();
+    PrevTokLocation = Tok.getAnnotationEndLoc();
+    PP.Lex(Tok);
+    return Loc;
   }
 
   /// ConsumeParen - This consume method keeps the paren count up-to-date.
@@ -627,7 +640,7 @@ class Parser : public CodeCompletionHandler {
   }
 
   /// getTypeAnnotation - Read a parsed type out of an annotation token.
-  static ParsedType getTypeAnnotation(Token &Tok) {
+  static ParsedType getTypeAnnotation(const Token &Tok) {
     return ParsedType::getFromOpaquePtr(Tok.getAnnotationValue());
   }
 
@@ -638,7 +651,7 @@ class Parser : public CodeCompletionHandler {
 
   /// \brief Read an already-translated primary expression out of an annotation
   /// token.
-  static ExprResult getExprAnnotation(Token &Tok) {
+  static ExprResult getExprAnnotation(const Token &Tok) {
     return ExprResult::getFromOpaquePointer(Tok.getAnnotationValue());
   }
 
@@ -1484,6 +1497,8 @@ class Parser : public CodeCompletionHandler {
   };
 
   ExprResult ParseExpression(TypeCastState isTypeCast = NotTypeCast);
+  ExprResult ParseConstantExpressionInExprEvalContext(
+      TypeCastState isTypeCast = NotTypeCast);
   ExprResult ParseConstantExpression(TypeCastState isTypeCast = NotTypeCast);
   ExprResult ParseConstraintExpression();
   // Expr that doesn't include commas.
@@ -1873,6 +1888,7 @@ class Parser : public CodeCompletionHandler {
     DSC_trailing, // C++11 trailing-type-specifier in a trailing return type
     DSC_alias_declaration, // C++11 type-specifier-seq in an alias-declaration
     DSC_top_level, // top-level/namespace declaration context
+    DSC_template_param, // template parameter context
     DSC_template_type_arg, // template type argument context
     DSC_objc_method_result, // ObjC method result context, enables 'instancetype'
     DSC_condition // condition declaration context
@@ -1883,6 +1899,7 @@ class Parser : public CodeCompletionHandler {
   static bool isTypeSpecifier(DeclSpecContext DSC) {
     switch (DSC) {
     case DSC_normal:
+    case DSC_template_param:
     case DSC_class:
     case DSC_top_level:
     case DSC_objc_method_result:
@@ -1903,6 +1920,7 @@ class Parser : public CodeCompletionHandler {
   static bool isClassTemplateDeductionContext(DeclSpecContext DSC) {
     switch (DSC) {
     case DSC_normal:
+    case DSC_template_param:
     case DSC_class:
     case DSC_top_level:
     case DSC_condition:
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Rewrite/Frontend/FrontendActions.h b/interpreter/llvm/src/tools/clang/include/clang/Rewrite/Frontend/FrontendActions.h
index 27976eac4ed23..5f83ac16fedf6 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Rewrite/Frontend/FrontendActions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Rewrite/Frontend/FrontendActions.h
@@ -11,6 +11,7 @@
 #define LLVM_CLANG_REWRITE_FRONTEND_FRONTENDACTIONS_H
 
 #include "clang/Frontend/FrontendAction.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace clang {
 class FixItRewriter;
@@ -34,8 +35,7 @@ class FixItAction : public ASTFrontendAction {
   std::unique_ptr CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) override;
 
-  bool BeginSourceFileAction(CompilerInstance &CI,
-                             StringRef Filename) override;
+  bool BeginSourceFileAction(CompilerInstance &CI) override;
 
   void EndSourceFileAction() override;
 
@@ -74,7 +74,10 @@ class RewriteTestAction : public PreprocessorFrontendAction {
 };
 
 class RewriteIncludesAction : public PreprocessorFrontendAction {
+  std::shared_ptr OutputStream;
+  class RewriteImportsListener;
 protected:
+  bool BeginSourceFileAction(CompilerInstance &CI) override;
   void ExecuteAction() override;
 };
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/AttributeList.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/AttributeList.h
index f3b042c9ce791..6bdd9d5fcdb6f 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/AttributeList.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/AttributeList.h
@@ -915,6 +915,7 @@ enum AttributeDeclKind {
   ExpectedTypeOrNamespace,
   ExpectedObjectiveCInterface,
   ExpectedMethodOrProperty,
+  ExpectedFunctionOrMethodOrProperty,
   ExpectedStructOrUnion,
   ExpectedStructOrUnionOrClass,
   ExpectedType,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/DeclSpec.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/DeclSpec.h
index df5e1050367e7..bc817150ab82c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/DeclSpec.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/DeclSpec.h
@@ -1999,41 +1999,6 @@ class Declarator {
     llvm_unreachable("unknown context kind!");
   }
 
-  /// diagnoseIdentifier - Return true if the identifier is prohibited and
-  /// should be diagnosed (because it cannot be anything else).
-  bool diagnoseIdentifier() const {
-    switch (Context) {
-    case FileContext:
-    case KNRTypeListContext:
-    case MemberContext:
-    case BlockContext:
-    case ForContext:
-    case InitStmtContext:
-    case ConditionContext:
-    case PrototypeContext:
-    case LambdaExprParameterContext:
-    case TemplateParamContext:
-    case CXXCatchContext:
-    case ObjCCatchContext:
-    case TypeNameContext:
-    case FunctionalCastContext:
-    case ConversionIdContext:
-    case ObjCParameterContext:
-    case ObjCResultContext:
-    case BlockLiteralContext:
-    case CXXNewContext:
-    case LambdaExprContext:
-      return false;
-
-    case AliasDeclContext:
-    case AliasTemplateContext:
-    case TemplateTypeArgContext:
-    case TrailingReturnContext:
-      return true;
-    }
-    llvm_unreachable("unknown context kind!");
-  }
-
   /// Return true if the context permits a C++17 decomposition declarator.
   bool mayHaveDecompositionDeclarator() const {
     switch (Context) {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/DelayedDiagnostic.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/DelayedDiagnostic.h
index b73ec0868f52f..d65dbf0cd34e0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/DelayedDiagnostic.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/DelayedDiagnostic.h
@@ -124,7 +124,8 @@ class DelayedDiagnostic {
 
   static DelayedDiagnostic makeAvailability(AvailabilityResult AR,
                                             SourceLocation Loc,
-                                            const NamedDecl *D,
+                                            const NamedDecl *ReferringDecl,
+                                            const NamedDecl *OffendingDecl,
                                             const ObjCInterfaceDecl *UnknownObjCClass,
                                             const ObjCPropertyDecl  *ObjCProperty,
                                             StringRef Msg,
@@ -164,9 +165,13 @@ class DelayedDiagnostic {
     return *reinterpret_cast(AccessData);
   }
 
-  const NamedDecl *getAvailabilityDecl() const {
+  const NamedDecl *getAvailabilityReferringDecl() const {
     assert(Kind == Availability && "Not an availability diagnostic.");
-    return AvailabilityData.Decl;
+    return AvailabilityData.ReferringDecl;
+  }
+
+  const NamedDecl *getAvailabilityOffendingDecl() const {
+    return AvailabilityData.OffendingDecl;
   }
 
   StringRef getAvailabilityMessage() const {
@@ -213,7 +218,8 @@ class DelayedDiagnostic {
 private:
 
   struct AD {
-    const NamedDecl *Decl;
+    const NamedDecl *ReferringDecl;
+    const NamedDecl *OffendingDecl;
     const ObjCInterfaceDecl *UnknownObjCClass;
     const ObjCPropertyDecl  *ObjCProperty;
     const char *Message;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/Lookup.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/Lookup.h
index 145355c5ec3d1..ea32997d40667 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/Lookup.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/Lookup.h
@@ -18,6 +18,8 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/Sema/Sema.h"
 
+#include "llvm/ADT/Optional.h"
+
 namespace clang {
 
 /// @brief Represents the results of name lookup.
@@ -273,7 +275,7 @@ class LookupResult {
   /// declarations, such as those in modules that have not yet been imported.
   bool isHiddenDeclarationVisible(NamedDecl *ND) const {
     return AllowHidden ||
-           (isForRedeclaration() && ND->isExternallyVisible());
+           (isForRedeclaration() && ND->hasExternalFormalLinkage());
   }
 
   /// Sets whether tag declarations should be hidden by non-tag
@@ -465,9 +467,10 @@ class LookupResult {
         Paths = nullptr;
       }
     } else {
-      AmbiguityKind SavedAK = Ambiguity;
+      llvm::Optional SavedAK;
       bool WasAmbiguous = false;
       if (ResultKind == Ambiguous) {
+        SavedAK = Ambiguity;
         WasAmbiguous = true;
       }
       ResultKind = Found;
@@ -478,7 +481,7 @@ class LookupResult {
       if (ResultKind == Ambiguous) {
         (void)WasAmbiguous;
         assert(WasAmbiguous);
-        Ambiguity = SavedAK;
+        Ambiguity = SavedAK.getValue();
       } else if (Paths) {
         deletePaths(Paths);
         Paths = nullptr;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/Overload.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/Overload.h
index 941b772b78801..ffdf011d1dcbc 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/Overload.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/Overload.h
@@ -633,12 +633,9 @@ namespace clang {
     /// Might be a UsingShadowDecl or a FunctionTemplateDecl.
     DeclAccessPair FoundDecl;
 
-    // BuiltinTypes - Provides the return and parameter types of a
-    // built-in overload candidate. Only valid when Function is NULL.
-    struct {
-      QualType ResultTy;
-      QualType ParamTypes[3];
-    } BuiltinTypes;
+    /// BuiltinParamTypes - Provides the parameter types of a built-in overload
+    /// candidate. Only valid when Function is NULL.
+    QualType BuiltinParamTypes[3];
 
     /// Surrogate - The conversion function for which this candidate
     /// is a surrogate, but only if IsSurrogate is true.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/ParsedTemplate.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/ParsedTemplate.h
index 03de9ff6ae44f..01a4ab3f37a51 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/ParsedTemplate.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/ParsedTemplate.h
@@ -145,12 +145,15 @@ namespace clang {
   /// expressions, or template names, and the source locations for important 
   /// tokens. All of the information about template arguments is allocated 
   /// directly after this structure.
-  struct TemplateIdAnnotation {
+  struct TemplateIdAnnotation final
+      : private llvm::TrailingObjects {
+    friend TrailingObjects;
     /// \brief The nested-name-specifier that precedes the template name.
     CXXScopeSpec SS;
 
-    /// TemplateKWLoc - The location of the template keyword within the
-    /// source.
+    /// TemplateKWLoc - The location of the template keyword.
+    /// For e.g. typename T::template Y
     SourceLocation TemplateKWLoc;
 
     /// TemplateNameLoc - The location of the template name within the
@@ -183,34 +186,56 @@ namespace clang {
     
     /// \brief Retrieves a pointer to the template arguments
     ParsedTemplateArgument *getTemplateArgs() { 
-      return reinterpret_cast(this + 1); 
+      return getTrailingObjects(); 
     }
 
     /// \brief Creates a new TemplateIdAnnotation with NumArgs arguments and
     /// appends it to List.
     static TemplateIdAnnotation *
-    Allocate(unsigned NumArgs, SmallVectorImpl &List) {
-      TemplateIdAnnotation *TemplateId
-        = (TemplateIdAnnotation *)std::malloc(sizeof(TemplateIdAnnotation) +
-                                      sizeof(ParsedTemplateArgument) * NumArgs);
-      TemplateId->NumArgs = NumArgs;
-      
-      // Default-construct nested-name-specifier.
-      new (&TemplateId->SS) CXXScopeSpec();
-      
-      // Default-construct parsed template arguments.
-      ParsedTemplateArgument *TemplateArgs = TemplateId->getTemplateArgs();
-      for (unsigned I = 0; I != NumArgs; ++I)
-        new (TemplateArgs + I) ParsedTemplateArgument();
-      
-      List.push_back(TemplateId);
+    Create(CXXScopeSpec SS, SourceLocation TemplateKWLoc,
+           SourceLocation TemplateNameLoc, IdentifierInfo *Name,
+           OverloadedOperatorKind OperatorKind,
+           ParsedTemplateTy OpaqueTemplateName, TemplateNameKind TemplateKind,
+           SourceLocation LAngleLoc, SourceLocation RAngleLoc,
+           ArrayRef TemplateArgs,
+           SmallVectorImpl &CleanupList) {
+
+      TemplateIdAnnotation *TemplateId = new (std::malloc(
+          totalSizeToAlloc(TemplateArgs.size())))
+          TemplateIdAnnotation(SS, TemplateKWLoc, TemplateNameLoc, Name,
+                               OperatorKind, OpaqueTemplateName, TemplateKind,
+                               LAngleLoc, RAngleLoc, TemplateArgs);
+      CleanupList.push_back(TemplateId);
       return TemplateId;
     }
-    
-    void Destroy() { 
-      SS.~CXXScopeSpec();
+
+    void Destroy() {
+      std::for_each(
+          getTemplateArgs(), getTemplateArgs() + NumArgs,
+          [](ParsedTemplateArgument &A) { A.~ParsedTemplateArgument(); });
+      this->~TemplateIdAnnotation();
       free(this); 
     }
+  private:
+    TemplateIdAnnotation(const TemplateIdAnnotation &) = delete;
+
+    TemplateIdAnnotation(CXXScopeSpec SS, SourceLocation TemplateKWLoc,
+                         SourceLocation TemplateNameLoc, IdentifierInfo *Name,
+                         OverloadedOperatorKind OperatorKind,
+                         ParsedTemplateTy OpaqueTemplateName,
+                         TemplateNameKind TemplateKind,
+                         SourceLocation LAngleLoc, SourceLocation RAngleLoc,
+                         ArrayRef TemplateArgs) noexcept
+        : SS(SS), TemplateKWLoc(TemplateKWLoc),
+          TemplateNameLoc(TemplateNameLoc), Name(Name), Operator(OperatorKind),
+          Template(OpaqueTemplateName), Kind(TemplateKind),
+          LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc),
+          NumArgs(TemplateArgs.size()) {
+
+      std::uninitialized_copy(TemplateArgs.begin(), TemplateArgs.end(),
+                              getTemplateArgs());
+    }
+    ~TemplateIdAnnotation() = default;
   };
 
   /// Retrieves the range of the given template parameter lists.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/ScopeInfo.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/ScopeInfo.h
index 4487c7c2ccb6c..4251fa649a82c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/ScopeInfo.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/ScopeInfo.h
@@ -388,6 +388,8 @@ class FunctionScopeInfo {
           (HasBranchProtectedScope && HasBranchIntoScope));
   }
 
+  bool isCoroutine() const { return !FirstCoroutineStmtLoc.isInvalid(); }
+
   void setFirstCoroutineStmt(SourceLocation Loc, StringRef Keyword) {
     assert(FirstCoroutineStmtLoc.isInvalid() &&
                    "first coroutine statement location already set");
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/Sema.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/Sema.h
index eb3b87c689427..8349382109bee 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/Sema.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/Sema.h
@@ -338,6 +338,35 @@ class Sema {
   /// \brief Source location for newly created implicit MSInheritanceAttrs
   SourceLocation ImplicitMSInheritanceAttrLoc;
 
+  /// \brief pragma clang section kind
+  enum PragmaClangSectionKind {
+    PCSK_Invalid      = 0,
+    PCSK_BSS          = 1,
+    PCSK_Data         = 2,
+    PCSK_Rodata       = 3,
+    PCSK_Text         = 4
+   };
+
+  enum PragmaClangSectionAction {
+    PCSA_Set     = 0,
+    PCSA_Clear   = 1
+  };
+
+  struct PragmaClangSection {
+    std::string SectionName;
+    bool Valid = false;
+    SourceLocation PragmaLocation;
+
+    void Act(SourceLocation PragmaLocation,
+             PragmaClangSectionAction Action,
+             StringLiteral* Name);
+   };
+
+   PragmaClangSection PragmaClangBSSSection;
+   PragmaClangSection PragmaClangDataSection;
+   PragmaClangSection PragmaClangRodataSection;
+   PragmaClangSection PragmaClangTextSection;
+
   enum PragmaMsStackAction {
     PSK_Reset     = 0x0,                // #pragma ()
     PSK_Set       = 0x1,                // #pragma (value)
@@ -726,17 +755,37 @@ class Sema {
   class SynthesizedFunctionScope {
     Sema &S;
     Sema::ContextRAII SavedContext;
+    bool PushedCodeSynthesisContext = false;
 
   public:
     SynthesizedFunctionScope(Sema &S, DeclContext *DC)
-      : S(S), SavedContext(S, DC)
-    {
+        : S(S), SavedContext(S, DC) {
       S.PushFunctionScope();
       S.PushExpressionEvaluationContext(
           Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
+      if (auto *FD = dyn_cast(DC))
+        FD->setWillHaveBody(true);
+      else
+        assert(isa(DC));
+    }
+
+    void addContextNote(SourceLocation UseLoc) {
+      assert(!PushedCodeSynthesisContext);
+
+      Sema::CodeSynthesisContext Ctx;
+      Ctx.Kind = Sema::CodeSynthesisContext::DefiningSynthesizedFunction;
+      Ctx.PointOfInstantiation = UseLoc;
+      Ctx.Entity = cast(S.CurContext);
+      S.pushCodeSynthesisContext(Ctx);
+
+      PushedCodeSynthesisContext = true;
     }
 
     ~SynthesizedFunctionScope() {
+      if (PushedCodeSynthesisContext)
+        S.popCodeSynthesisContext();
+      if (auto *FD = dyn_cast(S.CurContext))
+        FD->setWillHaveBody(false);
       S.PopExpressionEvaluationContext();
       S.PopFunctionScopeInfo();
     }
@@ -1130,6 +1179,10 @@ class Sema {
   /// correctly named definition after the renamed definition.
   llvm::SmallPtrSet TypoCorrectedFunctionDefinitions;
 
+  /// Stack of types that correspond to the parameter entities that are
+  /// currently being copy-initialized. Can be empty.
+  llvm::SmallVector CurrentParameterCopyTypes;
+
   void ReadMethodPool(Selector Sel);
   void updateOutOfDateSelector(Selector Sel);
 
@@ -1269,6 +1322,7 @@ class Sema {
 
   void emitAndClearUnusedLocalTypedefWarnings();
 
+  void ActOnStartOfTranslationUnit();
   void ActOnEndOfTranslationUnit();
 
   void CheckDelegatingCtorCycles();
@@ -1544,6 +1598,11 @@ class Sema {
                                  llvm::SmallVectorImpl *Modules);
 
   bool hasVisibleMergedDefinition(NamedDecl *Def);
+  bool hasMergedDefinitionInCurrentModule(NamedDecl *Def);
+
+  /// Determine if \p D and \p Suggested have a structurally compatible
+  /// layout as described in C11 6.2.7/1.
+  bool hasStructuralCompatLayout(Decl *D, Decl *Suggested);
 
   /// Determine if \p D has a visible definition. If not, suggest a declaration
   /// that should be made visible to expose the definition.
@@ -1559,6 +1618,12 @@ class Sema {
   hasVisibleDefaultArgument(const NamedDecl *D,
                             llvm::SmallVectorImpl *Modules = nullptr);
 
+  /// Determine if there is a visible declaration of \p D that is an explicit
+  /// specialization declaration for a specialization of a template. (For a
+  /// member specialization, use hasVisibleMemberSpecialization.)
+  bool hasVisibleExplicitSpecialization(
+      const NamedDecl *D, llvm::SmallVectorImpl *Modules = nullptr);
+
   /// Determine if there is a visible declaration of \p D that is a member
   /// specialization declaration (as opposed to an instantiated declaration).
   bool hasVisibleMemberSpecialization(
@@ -1626,9 +1691,13 @@ class Sema {
   //
 
   struct SkipBodyInfo {
-    SkipBodyInfo() : ShouldSkip(false), Previous(nullptr) {}
+    SkipBodyInfo()
+        : ShouldSkip(false), CheckSameAsPrevious(false), Previous(nullptr),
+          New(nullptr) {}
     bool ShouldSkip;
+    bool CheckSameAsPrevious;
     NamedDecl *Previous;
+    NamedDecl *New;
   };
 
   DeclGroupPtrTy ConvertDeclToDeclGroup(Decl *Ptr, Decl *OwnedType = nullptr);
@@ -2142,15 +2211,14 @@ class Sema {
   };
 
   Decl *ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
-                 SourceLocation KWLoc, CXXScopeSpec &SS,
-                 IdentifierInfo *Name, SourceLocation NameLoc,
-                 AttributeList *Attr, AccessSpecifier AS,
-                 SourceLocation ModulePrivateLoc,
-                 MultiTemplateParamsArg TemplateParameterLists,
-                 bool &OwnedDecl, bool &IsDependent,
-                 SourceLocation ScopedEnumKWLoc,
+                 SourceLocation KWLoc, CXXScopeSpec &SS, IdentifierInfo *Name,
+                 SourceLocation NameLoc, AttributeList *Attr,
+                 AccessSpecifier AS, SourceLocation ModulePrivateLoc,
+                 MultiTemplateParamsArg TemplateParameterLists, bool &OwnedDecl,
+                 bool &IsDependent, SourceLocation ScopedEnumKWLoc,
                  bool ScopedEnumUsesClassTag, TypeResult UnderlyingType,
-                 bool IsTypeSpecifier, SkipBodyInfo *SkipBody = nullptr);
+                 bool IsTypeSpecifier, bool IsTemplateParamOrArg,
+                 SkipBodyInfo *SkipBody = nullptr);
 
   Decl *ActOnTemplatedFriendTag(Scope *S, SourceLocation FriendLoc,
                                 unsigned TagSpec, SourceLocation TagLoc,
@@ -2215,6 +2283,12 @@ class Sema {
   /// struct, or union).
   void ActOnTagStartDefinition(Scope *S, Decl *TagDecl);
 
+  /// Perform ODR-like check for C/ObjC when merging tag types from modules.
+  /// Differently from C++, actually parse the body and reject / error out
+  /// in case of a structural mismatch.
+  bool ActOnDuplicateDefinition(DeclSpec &DS, Decl *Prev,
+                                SkipBodyInfo &SkipBody);
+
   typedef void *SkippedDefinitionContext;
 
   /// \brief Invoked when we enter a tag definition that we're skipping.
@@ -2268,8 +2342,8 @@ class Sema {
 
   Decl *ActOnEnumConstant(Scope *S, Decl *EnumDecl, Decl *LastEnumConstant,
                           SourceLocation IdLoc, IdentifierInfo *Id,
-                          AttributeList *Attrs,
-                          SourceLocation EqualLoc, Expr *Val);
+                          AttributeList *Attrs, SourceLocation EqualLoc,
+                          Expr *Val);
   void ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange,
                      Decl *EnumDecl,
                      ArrayRef Elements,
@@ -2412,7 +2486,7 @@ class Sema {
   void MergeVarDeclTypes(VarDecl *New, VarDecl *Old, bool MergeTypeWithOld);
   void MergeVarDeclExceptionSpecs(VarDecl *New, VarDecl *Old);
   bool checkVarDeclRedefinition(VarDecl *OldDefn, VarDecl *NewDefn);
-  void notePreviousDefinition(SourceLocation Old, SourceLocation New);
+  void notePreviousDefinition(const NamedDecl *Old, SourceLocation New);
   bool MergeCXXFunctionDecl(FunctionDecl *New, FunctionDecl *Old, Scope *S);
 
   // AssignmentAction - This is used by all the assignment diagnostic functions
@@ -2724,8 +2798,7 @@ class Sema {
                                    SourceLocation OpLoc, ArrayRef Args,
                                    OverloadCandidateSet& CandidateSet,
                                    SourceRange OpRange = SourceRange());
-  void AddBuiltinCandidate(QualType ResultTy, QualType *ParamTys,
-                           ArrayRef Args,
+  void AddBuiltinCandidate(QualType *ParamTys, ArrayRef Args,
                            OverloadCandidateSet& CandidateSet,
                            bool IsAssignmentOperator = false,
                            unsigned NumContextualBoolArguments = 0);
@@ -2773,7 +2846,7 @@ class Sema {
   /// of a function.
   ///
   /// Returns true if any errors were emitted.
-  bool diagnoseArgIndependentDiagnoseIfAttrs(const FunctionDecl *Function,
+  bool diagnoseArgIndependentDiagnoseIfAttrs(const NamedDecl *ND,
                                              SourceLocation Loc);
 
   /// Returns whether the given function's address can be taken or not,
@@ -3230,7 +3303,7 @@ class Sema {
   void ProcessPragmaWeak(Scope *S, Decl *D);
   // Decl attributes - this routine is the top level dispatcher.
   void ProcessDeclAttributes(Scope *S, Decl *D, const Declarator &PD);
-  // Helper for delayed proccessing of attributes.
+  // Helper for delayed processing of attributes.
   void ProcessDeclAttributeDelayed(Decl *D, const AttributeList *AttrList);
   void ProcessDeclAttributeList(Scope *S, Decl *D, const AttributeList *AL,
                                 bool IncludeCXX11Attributes = true);
@@ -3343,9 +3416,10 @@ class Sema {
 
   /// DefaultSynthesizeProperties - This routine default synthesizes all
   /// properties which must be synthesized in the class's \@implementation.
-  void DefaultSynthesizeProperties (Scope *S, ObjCImplDecl* IMPDecl,
-                                    ObjCInterfaceDecl *IDecl);
-  void DefaultSynthesizeProperties(Scope *S, Decl *D);
+  void DefaultSynthesizeProperties(Scope *S, ObjCImplDecl *IMPDecl,
+                                   ObjCInterfaceDecl *IDecl,
+                                   SourceLocation AtEnd);
+  void DefaultSynthesizeProperties(Scope *S, Decl *D, SourceLocation AtEnd);
 
   /// IvarBacksCurrentMethodAccessor - This routine returns 'true' if 'IV' is
   /// an ivar synthesized for 'Method' and 'Method' is a property accessor
@@ -3863,11 +3937,10 @@ class Sema {
 
   void redelayDiagnostics(sema::DelayedDiagnosticPool &pool);
 
-  void EmitAvailabilityWarning(AvailabilityResult AR, NamedDecl *D,
-                               StringRef Message, SourceLocation Loc,
-                               const ObjCInterfaceDecl *UnknownObjCClass,
-                               const ObjCPropertyDecl *ObjCProperty,
-                               bool ObjCPropertyAccess);
+  void DiagnoseAvailabilityOfDecl(NamedDecl *D, SourceLocation Loc,
+                                  const ObjCInterfaceDecl *UnknownObjCClass,
+                                  bool ObjCPropertyAccess,
+                                  bool AvoidPartialAvailabilityChecks = false);
 
   bool makeUnavailableInSystemHeader(SourceLocation loc,
                                      UnavailableAttr::ImplicitReason reason);
@@ -3880,8 +3953,9 @@ class Sema {
 
   bool CanUseDecl(NamedDecl *D, bool TreatUnavailableAsInvalid);
   bool DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
-                         const ObjCInterfaceDecl *UnknownObjCClass=nullptr,
-                         bool ObjCPropertyAccess=false);
+                         const ObjCInterfaceDecl *UnknownObjCClass = nullptr,
+                         bool ObjCPropertyAccess = false,
+                         bool AvoidPartialAvailabilityChecks = false);
   void NoteDeletedFunction(FunctionDecl *FD);
   void NoteDeletedInheritingConstructor(CXXConstructorDecl *CD);
   std::string getDeletedOrUnavailableSuffix(const FunctionDecl *FD);
@@ -3923,7 +3997,7 @@ class Sema {
   void MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
                               bool MightBeOdrUse = true);
   void MarkVariableReferenced(SourceLocation Loc, VarDecl *Var);
-  void MarkDeclRefReferenced(DeclRefExpr *E);
+  void MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base = nullptr);
   void MarkMemberReferenced(MemberExpr *E);
 
   void UpdateMarkingForLValueToRValue(Expr *E);
@@ -7020,6 +7094,10 @@ class Sema {
 
       /// We are declaring an implicit special member function.
       DeclaringSpecialMember,
+
+      /// We are defining a synthesized function (such as a defaulted special
+      /// member).
+      DefiningSynthesizedFunction,
     } Kind;
 
     /// \brief Was the enclosing context a non-instantiation SFINAE context?
@@ -7429,9 +7507,9 @@ class Sema {
   /// but have not yet been performed.
   std::deque PendingInstantiations;
 
-  class SavePendingInstantiationsAndVTableUsesRAII {
+  class GlobalEagerInstantiationScope {
   public:
-    SavePendingInstantiationsAndVTableUsesRAII(Sema &S, bool Enabled)
+    GlobalEagerInstantiationScope(Sema &S, bool Enabled)
         : S(S), Enabled(Enabled) {
       if (!Enabled) return;
 
@@ -7439,7 +7517,14 @@ class Sema {
       SavedVTableUses.swap(S.VTableUses);
     }
 
-    ~SavePendingInstantiationsAndVTableUsesRAII() {
+    void perform() {
+      if (Enabled) {
+        S.DefineUsedVTables();
+        S.PerformPendingInstantiations();
+      }
+    }
+
+    ~GlobalEagerInstantiationScope() {
       if (!Enabled) return;
 
       // Restore the set of pending vtables.
@@ -7469,14 +7554,16 @@ class Sema {
   /// types, static variables, enumerators, etc.
   std::deque PendingLocalImplicitInstantiations;
 
-  class SavePendingLocalImplicitInstantiationsRAII {
+  class LocalEagerInstantiationScope {
   public:
-    SavePendingLocalImplicitInstantiationsRAII(Sema &S): S(S) {
+    LocalEagerInstantiationScope(Sema &S) : S(S) {
       SavedPendingLocalImplicitInstantiations.swap(
           S.PendingLocalImplicitInstantiations);
     }
 
-    ~SavePendingLocalImplicitInstantiationsRAII() {
+    void perform() { S.PerformPendingInstantiations(/*LocalOnly=*/true); }
+
+    ~LocalEagerInstantiationScope() {
       assert(S.PendingLocalImplicitInstantiations.empty() &&
              "there shouldn't be any pending local implicit instantiations");
       SavedPendingLocalImplicitInstantiations.swap(
@@ -7486,7 +7573,7 @@ class Sema {
   private:
     Sema &S;
     std::deque
-    SavedPendingLocalImplicitInstantiations;
+        SavedPendingLocalImplicitInstantiations;
   };
 
   /// A helper class for building up ExtParameterInfos.
@@ -7529,8 +7616,7 @@ class Sema {
       SavedPendingInstantiations.swap(S.PendingInstantiations);
     }
   private:
-    SavePendingLocalImplicitInstantiationsRAII
-      SavedPendingLocalImplicitInstantiations;
+    LocalEagerInstantiationScope SavedPendingLocalImplicitInstantiations;
     Sema &S;
     std::deque SavedPendingInstantiations;
   };
@@ -7558,6 +7644,10 @@ class Sema {
                                         unsigned ThisTypeQuals);
   void SubstExceptionSpec(FunctionDecl *New, const FunctionProtoType *Proto,
                           const MultiLevelTemplateArgumentList &Args);
+  bool SubstExceptionSpec(SourceLocation Loc,
+                          FunctionProtoType::ExceptionSpecInfo &ESI,
+                          SmallVectorImpl &ExceptionStorage,
+                          const MultiLevelTemplateArgumentList &Args);
   ParmVarDecl *SubstParmVarDecl(ParmVarDecl *D,
                             const MultiLevelTemplateArgumentList &TemplateArgs,
                                 int indexAdjustment,
@@ -7643,6 +7733,9 @@ class Sema {
                           LateInstantiatedAttrVec *LateAttrs = nullptr,
                           LocalInstantiationScope *OuterMostScope = nullptr);
 
+  bool usesPartialOrExplicitSpecialization(
+      SourceLocation Loc, ClassTemplateSpecializationDecl *ClassTemplateSpec);
+
   bool
   InstantiateClassTemplateSpecialization(SourceLocation PointOfInstantiation,
                            ClassTemplateSpecializationDecl *ClassTemplateSpec,
@@ -7717,7 +7810,8 @@ class Sema {
                             const MultiLevelTemplateArgumentList &TemplateArgs);
 
   NamedDecl *FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
-                          const MultiLevelTemplateArgumentList &TemplateArgs);
+                          const MultiLevelTemplateArgumentList &TemplateArgs,
+                          bool FindingInstantiatedContext = false);
   DeclContext *FindInstantiatedContext(SourceLocation Loc, DeclContext *DC,
                           const MultiLevelTemplateArgumentList &TemplateArgs);
 
@@ -8149,6 +8243,11 @@ class Sema {
     POAK_Reset    // #pragma options align=reset
   };
 
+  /// ActOnPragmaClangSection - Called on well formed \#pragma clang section
+  void ActOnPragmaClangSection(SourceLocation PragmaLoc,
+                               PragmaClangSectionAction Action,
+                               PragmaClangSectionKind SecKind, StringRef SecName);
+
   /// ActOnPragmaOptionsAlign - Called on well formed \#pragma options align.
   void ActOnPragmaOptionsAlign(PragmaOptionsAlignKind Kind,
                                SourceLocation PragmaLoc);
@@ -8352,9 +8451,13 @@ class Sema {
                          unsigned SpellingListIndex, bool isNSConsumed,
                          bool isTemplateInstantiation);
 
+  bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type);
+
   //===--------------------------------------------------------------------===//
   // C++ Coroutines TS
   //
+  bool ActOnCoroutineBodyStart(Scope *S, SourceLocation KwLoc,
+                               StringRef Keyword);
   ExprResult ActOnCoawaitExpr(Scope *S, SourceLocation KwLoc, Expr *E);
   ExprResult ActOnCoyieldExpr(Scope *S, SourceLocation KwLoc, Expr *E);
   StmtResult ActOnCoreturnStmt(Scope *S, SourceLocation KwLoc, Expr *E);
@@ -8420,7 +8523,7 @@ class Sema {
   /// is disabled due to required OpenCL extensions being disabled. If so,
   /// emit diagnostics.
   /// \return true if type is disabled.
-  bool checkOpenCLDisabledDecl(const Decl &D, const Expr &E);
+  bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E);
 
   //===--------------------------------------------------------------------===//
   // OpenMP directives and clauses.
@@ -8650,7 +8753,8 @@ class Sema {
   StmtResult ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc,
                                           SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp taskgroup'.
-  StmtResult ActOnOpenMPTaskgroupDirective(Stmt *AStmt, SourceLocation StartLoc,
+  StmtResult ActOnOpenMPTaskgroupDirective(ArrayRef Clauses,
+                                           Stmt *AStmt, SourceLocation StartLoc,
                                            SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp flush'.
   StmtResult ActOnOpenMPFlushDirective(ArrayRef Clauses,
@@ -8992,6 +9096,13 @@ class Sema {
       CXXScopeSpec &ReductionIdScopeSpec,
       const DeclarationNameInfo &ReductionId,
       ArrayRef UnresolvedReductions = llvm::None);
+  /// Called on well-formed 'task_reduction' clause.
+  OMPClause *ActOnOpenMPTaskReductionClause(
+      ArrayRef VarList, SourceLocation StartLoc,
+      SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc,
+      CXXScopeSpec &ReductionIdScopeSpec,
+      const DeclarationNameInfo &ReductionId,
+      ArrayRef UnresolvedReductions = llvm::None);
   /// \brief Called on well-formed 'linear' clause.
   OMPClause *
   ActOnOpenMPLinearClause(ArrayRef VarList, Expr *Step,
@@ -10177,6 +10288,7 @@ class Sema {
   bool SemaBuiltinVAStartARM(CallExpr *Call);
   bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
   bool SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs);
+  bool SemaBuiltinVSX(CallExpr *TheCall);
   bool SemaBuiltinOSLogFormat(CallExpr *TheCall);
 
 public:
@@ -10263,6 +10375,7 @@ class Sema {
   void CheckFloatComparison(SourceLocation Loc, Expr* LHS, Expr* RHS);
   void CheckImplicitConversions(Expr *E, SourceLocation CC = SourceLocation());
   void CheckBoolLikeConversion(Expr *E, SourceLocation CC);
+  void CheckForIntOverflow(Expr *E);
   void CheckUnsequencedOperations(Expr *E);
 
   /// \brief Perform semantic checks on a completed expression. This will either
@@ -10385,17 +10498,6 @@ class Sema {
     return OriginalLexicalContext ? OriginalLexicalContext : CurContext;
   }
 
-  /// \brief The diagnostic we should emit for \c D, or \c AR_Available.
-  ///
-  /// \param D The declaration to check. Note that this may be altered to point
-  /// to another declaration that \c D gets it's availability from. i.e., we
-  /// walk the list of typedefs to find an availability attribute.
-  ///
-  /// \param Message If non-null, this will be populated with the message from
-  /// the availability attribute that is selected.
-  AvailabilityResult ShouldDiagnoseAvailabilityOfDecl(NamedDecl *&D,
-                                                      std::string *Message);
-
   const DeclContext *getCurObjCLexicalContext() const {
     const DeclContext *DC = getCurLexicalContext();
     // A category implicitly has the attribute of the interface.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Sema/TemplateDeduction.h b/interpreter/llvm/src/tools/clang/include/clang/Sema/TemplateDeduction.h
index d92cbab4fbcf8..cd9ed6abfaf9d 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Sema/TemplateDeduction.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Sema/TemplateDeduction.h
@@ -88,6 +88,12 @@ class TemplateDeductionInfo {
     HasSFINAEDiagnostic = false;
   }
 
+  /// Peek at the SFINAE diagnostic.
+  const PartialDiagnosticAt &peekSFINAEDiagnostic() const {
+    assert(HasSFINAEDiagnostic);
+    return SuppressedDiagnostics.front();
+  }
+
   /// \brief Provide a new template argument list that contains the
   /// results of template argument deduction.
   void reset(TemplateArgumentList *NewDeduced) {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTBitCodes.h b/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTBitCodes.h
index 823440b197137..9227b33d2c53c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTBitCodes.h
@@ -607,6 +607,9 @@ namespace clang {
 
       /// \brief Record code for \#pragma pack options.
       PACK_PRAGMA_OPTIONS = 61,
+
+      /// \brief The stack of open #ifs/#ifdefs recorded in a preamble.
+      PP_CONDITIONAL_STACK = 62,
     };
 
     /// \brief Record types used within a source manager block.
@@ -1542,9 +1545,14 @@ namespace clang {
 
       // ARC
       EXPR_OBJC_BRIDGED_CAST,     // ObjCBridgedCastExpr
-      
+
       STMT_MS_DEPENDENT_EXISTS,   // MSDependentExistsStmt
-      EXPR_LAMBDA                 // LambdaExpr
+      EXPR_LAMBDA,                // LambdaExpr
+      STMT_COROUTINE_BODY,
+      STMT_CORETURN,
+      EXPR_COAWAIT,
+      EXPR_COYIELD,
+      EXPR_DEPENDENT_COAWAIT,
     };
 
     /// \brief The kinds of designators that can occur in a
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTReader.h b/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTReader.h
index 3d1a6a71d6c84..0a8b52c9250b2 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTReader.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTReader.h
@@ -400,7 +400,7 @@ class ASTReader
   Preprocessor &PP;
 
   /// \brief The AST context into which we'll read the AST files.
-  ASTContext &Context;
+  ASTContext *ContextObj = nullptr;
 
   /// \brief The AST consumer.
   ASTConsumer *Consumer = nullptr;
@@ -478,10 +478,18 @@ class ASTReader
   /// in the chain.
   DeclUpdateOffsetsMap DeclUpdateOffsets;
 
+  struct PendingUpdateRecord {
+    Decl *D;
+    serialization::GlobalDeclID ID;
+    // Whether the declaration was just deserialized.
+    bool JustLoaded;
+    PendingUpdateRecord(serialization::GlobalDeclID ID, Decl *D,
+                        bool JustLoaded)
+        : D(D), ID(ID), JustLoaded(JustLoaded) {}
+  };
   /// \brief Declaration updates for already-loaded declarations that we need
   /// to apply once we finish processing an import.
-  llvm::SmallVector, 16>
-      PendingUpdateRecords;
+  llvm::SmallVector PendingUpdateRecords;
 
   enum class PendingFakeDefinitionKind { NotFake, Fake, FakeLoaded };
 
@@ -1141,6 +1149,7 @@ class ASTReader
     time_t StoredTime;
     bool Overridden;
     bool Transient;
+    bool TopLevelModuleMap;
   };
 
   /// \brief Reads the stored information about an input file.
@@ -1282,7 +1291,7 @@ class ASTReader
 
   RecordLocation DeclCursorForID(serialization::DeclID ID,
                                  SourceLocation &Location);
-  void loadDeclUpdateRecords(serialization::DeclID ID, Decl *D);
+  void loadDeclUpdateRecords(PendingUpdateRecord &Record);
   void loadPendingDeclChain(Decl *D, uint64_t LocalOffset);
   void loadObjCCategories(serialization::GlobalDeclID ID, ObjCInterfaceDecl *D,
                           unsigned PreviousGeneration = 0);
@@ -1381,7 +1390,7 @@ class ASTReader
   /// precompiled header will be loaded.
   ///
   /// \param Context the AST context that this precompiled header will be
-  /// loaded into.
+  /// loaded into, if any.
   ///
   /// \param PCHContainerRdr the PCHContainerOperations to use for loading and
   /// creating modules.
@@ -1413,7 +1422,7 @@ class ASTReader
   ///
   /// \param ReadTimer If non-null, a timer used to track the time spent
   /// deserializing.
-  ASTReader(Preprocessor &PP, ASTContext &Context,
+  ASTReader(Preprocessor &PP, ASTContext *Context,
             const PCHContainerReader &PCHContainerRdr,
             ArrayRef> Extensions,
             StringRef isysroot = "", bool DisableValidation = false,
@@ -2202,7 +2211,10 @@ class ASTReader
   void completeVisibleDeclsMap(const DeclContext *DC) override;
 
   /// \brief Retrieve the AST context that this AST reader supplements.
-  ASTContext &getContext() { return Context; }
+  ASTContext &getContext() {
+    assert(ContextObj && "requested AST context when not loading AST");
+    return *ContextObj;
+  }
 
   // \brief Contains the IDs for declarations that were requested before we have
   // access to a Sema object.
@@ -2244,6 +2256,12 @@ class ASTReader
           llvm::function_ref Visitor);
 
+  /// Visit all the top-level module maps loaded when building the given module
+  /// file.
+  void visitTopLevelModuleMaps(serialization::ModuleFile &MF,
+                               llvm::function_ref<
+                                   void(const FileEntry *)> Visitor);
+
   bool isProcessingUpdateRecords() { return ProcessingUpdateRecords; }
 };
 
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTWriter.h b/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTWriter.h
index 17cf726e4d6bb..f14dfc73baa98 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTWriter.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Serialization/ASTWriter.h
@@ -627,10 +627,6 @@ class ASTWriter : public ASTDeserializationListener,
   /// \brief Add a version tuple to the given record
   void AddVersionTuple(const VersionTuple &Version, RecordDataImpl &Record);
 
-  /// \brief Infer the submodule ID that contains an entity at the given
-  /// source location.
-  serialization::SubmoduleID inferSubmoduleIDFromLocation(SourceLocation Loc);
-
   /// \brief Retrieve or create a submodule ID for this module, or return 0 if
   /// the submodule is neither local (a submodle of the currently-written module)
   /// nor from an imported module.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 790ba5c121c95..82ab720af8dc7 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -38,6 +38,11 @@ def CoreAlpha : Package<"core">, InPackage, Hidden;
 // default. Such checkers belong in the alpha package.
 def OptIn : Package<"optin">;
 
+// In the Portability package reside checkers for finding code that relies on
+// implementation-defined behavior. Such checks are wanted for cross-platform
+// development, but unwanted for developers who target only a single platform.
+def PortabilityOptIn : Package<"portability">, InPackage;
+
 def Nullability : Package<"nullability">;
 
 def Cplusplus : Package<"cplusplus">;
@@ -279,15 +284,15 @@ def VirtualCallChecker : Checker<"VirtualCall">,
 
 let ParentPackage = CplusplusAlpha in {
 
+def IteratorRangeChecker : Checker<"IteratorRange">,
+  HelpText<"Check for iterators used outside their valid ranges">,
+  DescFile<"IteratorChecker.cpp">;
+
 def MisusedMovedObjectChecker: Checker<"MisusedMovedObject">,
      HelpText<"Method calls on a moved-from object and copying a moved-from "
               "object will be reported">,
      DescFile<"MisusedMovedObjectChecker.cpp">;
 
-def IteratorPastEndChecker : Checker<"IteratorPastEnd">,
-  HelpText<"Check iterators used past end">,
-  DescFile<"IteratorPastEndChecker.cpp">;
-
 } // end: "alpha.cplusplus"
 
 
@@ -416,7 +421,7 @@ def GenericTaintChecker : Checker<"TaintPropagation">,
 
 let ParentPackage = Unix in {
 
-def UnixAPIChecker : Checker<"API">,
+def UnixAPIMisuseChecker : Checker<"API">,
   HelpText<"Check calls to various UNIX/Posix functions">,
   DescFile<"UnixAPIChecker.cpp">;
 
@@ -754,3 +759,14 @@ def CloneChecker : Checker<"CloneChecker">,
 
 } // end "clone"
 
+//===----------------------------------------------------------------------===//
+// Portability checkers.
+//===----------------------------------------------------------------------===//
+
+let ParentPackage = PortabilityOptIn in {
+
+def UnixAPIPortabilityChecker : Checker<"UnixAPI">,
+  HelpText<"Finds implementation-defined behavior in UNIX/Posix functions">,
+  DescFile<"UnixAPIChecker.cpp">;
+
+} // end optin.portability
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index 4fb50deb0f6b8..5dd6bdf384968 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -205,9 +205,15 @@ class AnalyzerOptions : public RefCountedBase {
   /// Controls which C++ member functions will be considered for inlining.
   CXXInlineableMemberKind CXXMemberInliningMode;
   
+  /// \sa includeImplicitDtorsInCFG
+  Optional IncludeImplicitDtorsInCFG;
+
   /// \sa includeTemporaryDtorsInCFG
   Optional IncludeTemporaryDtorsInCFG;
-  
+
+  /// \sa IncludeLifetimeInCFG
+  Optional IncludeLifetimeInCFG;
+
   /// \sa mayInlineCXXStandardLibrary
   Optional InlineCXXStandardLibrary;
   
@@ -395,6 +401,20 @@ class AnalyzerOptions : public RefCountedBase {
   /// accepts the values "true" and "false".
   bool includeTemporaryDtorsInCFG();
 
+  /// Returns whether or not implicit destructors for C++ objects should
+  /// be included in the CFG.
+  ///
+  /// This is controlled by the 'cfg-implicit-dtors' config option, which
+  /// accepts the values "true" and "false".
+  bool includeImplicitDtorsInCFG();
+
+  /// Returns whether or not end-of-lifetime information should be included in
+  /// the CFG.
+  ///
+  /// This is controlled by the 'cfg-lifetime' config option, which accepts
+  /// the values "true" and "false".
+  bool includeLifetimeInCFG();
+
   /// Returns whether or not C++ standard library functions may be considered
   /// for inlining.
   ///
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
index dc6e54a33206e..a07cd88950d85 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
@@ -550,13 +550,15 @@ class PathDiagnosticEventPiece : public PathDiagnosticSpotPiece {
 class PathDiagnosticCallPiece : public PathDiagnosticPiece {
   PathDiagnosticCallPiece(const Decl *callerD,
                           const PathDiagnosticLocation &callReturnPos)
-    : PathDiagnosticPiece(Call), Caller(callerD), Callee(nullptr),
-      NoExit(false), callReturn(callReturnPos) {}
+      : PathDiagnosticPiece(Call), Caller(callerD), Callee(nullptr),
+        NoExit(false), IsCalleeAnAutosynthesizedPropertyAccessor(false),
+        callReturn(callReturnPos) {}
 
   PathDiagnosticCallPiece(PathPieces &oldPath, const Decl *caller)
-    : PathDiagnosticPiece(Call), Caller(caller), Callee(nullptr),
-      NoExit(true), path(oldPath) {}
-  
+      : PathDiagnosticPiece(Call), Caller(caller), Callee(nullptr),
+        NoExit(true), IsCalleeAnAutosynthesizedPropertyAccessor(false),
+        path(oldPath) {}
+
   const Decl *Caller;
   const Decl *Callee;
 
@@ -564,6 +566,10 @@ class PathDiagnosticCallPiece : public PathDiagnosticPiece {
   // call exit.
   bool NoExit;
 
+  // Flag signifying that the callee function is an Objective-C autosynthesized
+  // property getter or setter.
+  bool IsCalleeAnAutosynthesizedPropertyAccessor;
+
   // The custom string, which should appear after the call Return Diagnostic.
   // TODO: Should we allow multiple diagnostics?
   std::string CallStackMessage;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 52ed260346bf3..88cb08a4b647c 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -286,7 +286,7 @@ class CheckerManager {
   void runCheckersForEndAnalysis(ExplodedGraph &G, BugReporter &BR,
                                  ExprEngine &Eng);
 
-  /// \brief Run checkers on begining of function.
+  /// \brief Run checkers on beginning of function.
   void runCheckersForBeginFunction(ExplodedNodeSet &Dst,
                                    const BlockEdge &L,
                                    ExplodedNode *Pred,
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
index fb427f6185759..4aa87443e4c22 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
@@ -124,7 +124,7 @@ class BasicValueFactory {
   /// Returns the type of the APSInt used to store values of the given QualType.
   APSIntType getAPSIntType(QualType T) const {
     assert(T->isIntegralOrEnumerationType() || Loc::isLocType(T));
-    return APSIntType(Ctx.getTypeSize(T),
+    return APSIntType(Ctx.getIntWidth(T),
                       !T->isSignedIntegerOrEnumerationType());
   }
 
@@ -180,6 +180,11 @@ class BasicValueFactory {
     return getValue(X);
   }
 
+  inline const llvm::APSInt& getZeroWithTypeSize(QualType T) {
+    assert(T->isScalarType());
+    return getValue(0, Ctx.getTypeSize(T), true);
+  }
+
   inline const llvm::APSInt& getZeroWithPtrWidth(bool isUnsigned = true) {
     return getValue(0, Ctx.getTypeSize(Ctx.VoidPtrTy), isUnsigned);
   }
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index 2910ef4212ccd..e3a2164b11ff0 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -43,6 +43,7 @@ typedef std::unique_ptr(*ConstraintManagerCreator)(
     ProgramStateManager &, SubEngine *);
 typedef std::unique_ptr(*StoreManagerCreator)(
     ProgramStateManager &);
+typedef llvm::ImmutableMap TaintedSubRegions;
 
 //===----------------------------------------------------------------------===//
 // ProgramStateTrait - Traits used by the Generic Data Map of a ProgramState.
@@ -343,6 +344,9 @@ class ProgramState : public llvm::FoldingSetNode {
   ProgramStateRef addTaint(const Stmt *S, const LocationContext *LCtx,
                                TaintTagType Kind = TaintTagGeneric) const;
 
+  /// Create a new state in which the value is marked as tainted.
+  ProgramStateRef addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const;
+
   /// Create a new state in which the symbol is marked as tainted.
   ProgramStateRef addTaint(SymbolRef S,
                                TaintTagType Kind = TaintTagGeneric) const;
@@ -351,6 +355,14 @@ class ProgramState : public llvm::FoldingSetNode {
   ProgramStateRef addTaint(const MemRegion *R,
                                TaintTagType Kind = TaintTagGeneric) const;
 
+  /// Create a new state in a which a sub-region of a given symbol is tainted.
+  /// This might be necessary when referring to regions that can not have an
+  /// individual symbol, e.g. if they are represented by the default binding of
+  /// a LazyCompoundVal.
+  ProgramStateRef addPartialTaint(SymbolRef ParentSym,
+                                  const SubRegion *SubRegion,
+                                  TaintTagType Kind = TaintTagGeneric) const;
+
   /// Check if the statement is tainted in the current state.
   bool isTainted(const Stmt *S, const LocationContext *LCtx,
                  TaintTagType Kind = TaintTagGeneric) const;
@@ -453,6 +465,7 @@ class ProgramStateManager {
   std::unique_ptr   ConstraintMgr;
 
   ProgramState::GenericDataMap::Factory     GDMFactory;
+  TaintedSubRegions::Factory TSRFactory;
 
   typedef llvm::DenseMap > GDMContextsTy;
   GDMContextsTy GDMContexts;
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
index 14aa3af37620a..d58d0a690c88e 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
@@ -315,6 +315,13 @@ class SValBuilder {
     return nonloc::ConcreteInt(BasicVals.getTruthValue(b));
   }
 
+  /// Create NULL pointer, with proper pointer bit-width for given address
+  /// space.
+  /// \param type pointer type.
+  Loc makeNullWithType(QualType type) {
+    return loc::ConcreteInt(BasicVals.getZeroWithTypeSize(type));
+  }
+
   Loc makeNull() {
     return loc::ConcreteInt(BasicVals.getZeroWithPtrWidth());
   }
diff --git a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
index d39b5017d312d..7b76263f040c9 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
@@ -35,6 +35,16 @@ template<> struct ProgramStateTrait
   static void *GDMIndex() { static int index = 0; return &index; }
 };
 
+/// The GDM component mapping derived symbols' parent symbols to their
+/// underlying regions. This is used to efficiently check whether a symbol is
+/// tainted when it represents a sub-region of a tainted symbol.
+struct DerivedSymTaint {};
+typedef llvm::ImmutableMap DerivedSymTaintImpl;
+template<> struct ProgramStateTrait
+    :  public ProgramStatePartialTrait {
+  static void *GDMIndex() { static int index; return &index; }
+};
+
 class TaintManager {
 
   TaintManager() {}
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/ArgumentsAdjusters.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/ArgumentsAdjusters.h
index 1fd7be688761a..4eb02251a7758 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Tooling/ArgumentsAdjusters.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/ArgumentsAdjusters.h
@@ -44,6 +44,10 @@ ArgumentsAdjuster getClangSyntaxOnlyAdjuster();
 /// arguments.
 ArgumentsAdjuster getClangStripOutputAdjuster();
 
+/// \brief Gets an argument adjuster which removes dependency-file
+/// related command line arguments.
+ArgumentsAdjuster getClangStripDependencyFileAdjuster();
+
 enum class ArgumentInsertPosition { BEGIN, END };
 
 /// \brief Gets an argument adjuster which inserts \p Extra arguments in the
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/CompilationDatabase.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/CompilationDatabase.h
index 4611d3cdae5ed..e988b84b6eaea 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Tooling/CompilationDatabase.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/CompilationDatabase.h
@@ -60,16 +60,6 @@ struct CompileCommand {
 
   /// The output file associated with the command.
   std::string Output;
-
-  /// \brief An optional mapping from each file's path to its content for all
-  /// files needed for the compilation that are not available via the file
-  /// system.
-  ///
-  /// Note that a tool implementation is required to fall back to the file
-  /// system if a source file is not provided in the mapped sources, as
-  /// compilation databases will usually not provide all files in mapped sources
-  /// for performance reasons.
-  std::vector > MappedSources;
 };
 
 /// \brief Interface for compilation databases.
@@ -186,10 +176,11 @@ class FixedCompilationDatabase : public CompilationDatabase {
   /// the number of arguments before "--", if "--" was found in the argument
   /// list.
   /// \param Argv Points to the command line arguments.
+  /// \param ErrorMsg Contains error text if the function returns null pointer.
   /// \param Directory The base directory used in the FixedCompilationDatabase.
-  static FixedCompilationDatabase *loadFromCommandLine(int &Argc,
-                                                       const char *const *Argv,
-                                                       Twine Directory = ".");
+  static std::unique_ptr loadFromCommandLine(
+      int &Argc, const char *const *Argv, std::string &ErrorMsg,
+      Twine Directory = ".");
 
   /// \brief Constructs a compilation data base from a specified directory
   /// and command line.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Core/Diagnostic.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Core/Diagnostic.h
index d657f16df1837..b4920d4fe456d 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Core/Diagnostic.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Core/Diagnostic.h
@@ -58,9 +58,9 @@ struct Diagnostic {
   Diagnostic(llvm::StringRef DiagnosticName, Level DiagLevel,
              StringRef BuildDirectory);
 
-  Diagnostic(llvm::StringRef DiagnosticName, DiagnosticMessage &Message,
-             llvm::StringMap &Fix,
-             SmallVector &Notes, Level DiagLevel,
+  Diagnostic(llvm::StringRef DiagnosticName, const DiagnosticMessage &Message,
+             const llvm::StringMap &Fix,
+             const SmallVector &Notes, Level DiagLevel,
              llvm::StringRef BuildDirectory);
 
   /// \brief Name identifying the Diagnostic.
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/DiagnosticsYaml.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
index f32b9fa9c94bf..4d6ff063641b9 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
@@ -56,6 +56,9 @@ template <> struct MappingTraits {
     MappingNormalization Keys(
         Io, D);
     Io.mapRequired("DiagnosticName", Keys->DiagnosticName);
+    Io.mapRequired("Message", Keys->Message.Message);
+    Io.mapRequired("FileOffset", Keys->Message.FileOffset);
+    Io.mapRequired("FilePath", Keys->Message.FilePath);
 
     // FIXME: Export properly all the different fields.
 
@@ -82,17 +85,7 @@ template <> struct MappingTraits {
 template <> struct MappingTraits {
   static void mapping(IO &Io, clang::tooling::TranslationUnitDiagnostics &Doc) {
     Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
-
-    std::vector Diagnostics;
-    for (auto &Diagnostic : Doc.Diagnostics) {
-      // FIXME: Export all diagnostics, not just the ones with fixes.
-      // Update MappingTraits::mapping.
-      if (Diagnostic.Fix.size() > 0) {
-        Diagnostics.push_back(Diagnostic);
-      }
-    }
-    Io.mapRequired("Diagnostics", Diagnostics);
-    Doc.Diagnostics = Diagnostics;
+    Io.mapRequired("Diagnostics", Doc.Diagnostics);
   }
 };
 } // end namespace yaml
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h
new file mode 100644
index 0000000000000..8b01a61256f6b
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h
@@ -0,0 +1,122 @@
+//===--- RecursiveSymbolVisitor.h - Clang refactoring library -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief A wrapper class around \c RecursiveASTVisitor that visits each
+/// occurrences of a named symbol.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H
+#define LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H
+
+#include "clang/AST/AST.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+namespace tooling {
+
+/// Traverses the AST and visits the occurrence of each named symbol in the
+/// given nodes.
+template 
+class RecursiveSymbolVisitor
+    : public RecursiveASTVisitor> {
+  using BaseType = RecursiveASTVisitor>;
+
+public:
+  RecursiveSymbolVisitor(const SourceManager &SM, const LangOptions &LangOpts)
+      : SM(SM), LangOpts(LangOpts) {}
+
+  bool visitSymbolOccurrence(const NamedDecl *ND,
+                             ArrayRef NameRanges) {
+    return true;
+  }
+
+  // Declaration visitors:
+
+  bool VisitNamedDecl(const NamedDecl *D) {
+    return isa(D) ? true : visit(D, D->getLocation());
+  }
+
+  bool VisitCXXConstructorDecl(const CXXConstructorDecl *CD) {
+    for (const auto *Initializer : CD->inits()) {
+      // Ignore implicit initializers.
+      if (!Initializer->isWritten())
+        continue;
+      if (const FieldDecl *FD = Initializer->getMember()) {
+        if (!visit(FD, Initializer->getSourceLocation(),
+                   Lexer::getLocForEndOfToken(Initializer->getSourceLocation(),
+                                              0, SM, LangOpts)))
+          return false;
+      }
+    }
+    return true;
+  }
+
+  // Expression visitors:
+
+  bool VisitDeclRefExpr(const DeclRefExpr *Expr) {
+    return visit(Expr->getFoundDecl(), Expr->getLocation());
+  }
+
+  bool VisitMemberExpr(const MemberExpr *Expr) {
+    return visit(Expr->getFoundDecl().getDecl(), Expr->getMemberLoc());
+  }
+
+  // Other visitors:
+
+  bool VisitTypeLoc(const TypeLoc Loc) {
+    const SourceLocation TypeBeginLoc = Loc.getBeginLoc();
+    const SourceLocation TypeEndLoc =
+        Lexer::getLocForEndOfToken(TypeBeginLoc, 0, SM, LangOpts);
+    if (const auto *TemplateTypeParm =
+            dyn_cast(Loc.getType())) {
+      if (!visit(TemplateTypeParm->getDecl(), TypeBeginLoc, TypeEndLoc))
+        return false;
+    }
+    if (const auto *TemplateSpecType =
+            dyn_cast(Loc.getType())) {
+      if (!visit(TemplateSpecType->getTemplateName().getAsTemplateDecl(),
+                 TypeBeginLoc, TypeEndLoc))
+        return false;
+    }
+    return visit(Loc.getType()->getAsCXXRecordDecl(), TypeBeginLoc, TypeEndLoc);
+  }
+
+  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
+    // The base visitor will visit NNSL prefixes, so we should only look at
+    // the current NNS.
+    if (NNS) {
+      const NamespaceDecl *ND = NNS.getNestedNameSpecifier()->getAsNamespace();
+      if (!visit(ND, NNS.getLocalBeginLoc(), NNS.getLocalEndLoc()))
+        return false;
+    }
+    return BaseType::TraverseNestedNameSpecifierLoc(NNS);
+  }
+
+private:
+  const SourceManager &SM;
+  const LangOptions &LangOpts;
+
+  bool visit(const NamedDecl *ND, SourceLocation BeginLoc,
+             SourceLocation EndLoc) {
+    return static_cast(this)->visitSymbolOccurrence(
+        ND, SourceRange(BeginLoc, EndLoc));
+  }
+  bool visit(const NamedDecl *ND, SourceLocation Loc) {
+    return visit(ND, Loc,
+                 Loc.getLocWithOffset(ND->getNameAsString().length() - 1));
+  }
+};
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/RenamingAction.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/RenamingAction.h
new file mode 100644
index 0000000000000..099eaca6c42ae
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/RenamingAction.h
@@ -0,0 +1,70 @@
+//===--- RenamingAction.h - Clang refactoring library ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides an action to rename every symbol at a point.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_RENAMING_ACTION_H
+#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_RENAMING_ACTION_H
+
+#include "clang/Tooling/Refactoring.h"
+
+namespace clang {
+class ASTConsumer;
+class CompilerInstance;
+
+namespace tooling {
+
+class RenamingAction {
+public:
+  RenamingAction(const std::vector &NewNames,
+                 const std::vector &PrevNames,
+                 const std::vector> &USRList,
+                 std::map &FileToReplaces,
+                 bool PrintLocations = false)
+      : NewNames(NewNames), PrevNames(PrevNames), USRList(USRList),
+        FileToReplaces(FileToReplaces), PrintLocations(PrintLocations) {}
+
+  std::unique_ptr newASTConsumer();
+
+private:
+  const std::vector &NewNames, &PrevNames;
+  const std::vector> &USRList;
+  std::map &FileToReplaces;
+  bool PrintLocations;
+};
+
+/// Rename all symbols identified by the given USRs.
+class QualifiedRenamingAction {
+public:
+  QualifiedRenamingAction(
+      const std::vector &NewNames,
+      const std::vector> &USRList,
+      std::map &FileToReplaces)
+      : NewNames(NewNames), USRList(USRList), FileToReplaces(FileToReplaces) {}
+
+  std::unique_ptr newASTConsumer();
+
+private:
+  /// New symbol names.
+  const std::vector &NewNames;
+
+  /// A list of USRs. Each element represents USRs of a symbol being renamed.
+  const std::vector> &USRList;
+
+  /// A file path to replacements map.
+  std::map &FileToReplaces;
+};
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_RENAMING_ACTION_H
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h
new file mode 100644
index 0000000000000..b74a5d7f70af3
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h
@@ -0,0 +1,50 @@
+//===--- USRFinder.h - Clang refactoring library --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Methods for determining the USR of a symbol at a location in source
+/// code.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDER_H
+#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDER_H
+
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTContext.h"
+#include 
+#include 
+
+namespace clang {
+
+class ASTContext;
+class Decl;
+class SourceLocation;
+class NamedDecl;
+
+namespace tooling {
+
+// Given an AST context and a point, returns a NamedDecl identifying the symbol
+// at the point. Returns null if nothing is found at the point.
+const NamedDecl *getNamedDeclAt(const ASTContext &Context,
+                                const SourceLocation Point);
+
+// Given an AST context and a fully qualified name, returns a NamedDecl
+// identifying the symbol with a matching name. Returns null if nothing is
+// found for the name.
+const NamedDecl *getNamedDeclFor(const ASTContext &Context,
+                                 const std::string &Name);
+
+// Converts a Decl into a USR.
+std::string getUSRForDecl(const Decl *Decl);
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDER_H
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h
new file mode 100644
index 0000000000000..8aafee95bc097
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h
@@ -0,0 +1,54 @@
+//===--- USRFindingAction.h - Clang refactoring library -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides an action to find all relevant USRs at a point.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDING_ACTION_H
+#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDING_ACTION_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+
+#include 
+#include 
+
+namespace clang {
+class ASTConsumer;
+class CompilerInstance;
+class NamedDecl;
+
+namespace tooling {
+
+struct USRFindingAction {
+  USRFindingAction(ArrayRef SymbolOffsets,
+                   ArrayRef QualifiedNames, bool Force)
+      : SymbolOffsets(SymbolOffsets), QualifiedNames(QualifiedNames),
+        ErrorOccurred(false), Force(Force) {}
+  std::unique_ptr newASTConsumer();
+
+  ArrayRef getUSRSpellings() { return SpellingNames; }
+  ArrayRef> getUSRList() { return USRList; }
+  bool errorOccurred() { return ErrorOccurred; }
+
+private:
+  std::vector SymbolOffsets;
+  std::vector QualifiedNames;
+  std::vector SpellingNames;
+  std::vector> USRList;
+  bool ErrorOccurred;
+  bool Force;
+};
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDING_ACTION_H
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h
new file mode 100644
index 0000000000000..733ea1a6ac9ed
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h
@@ -0,0 +1,49 @@
+//===--- USRLocFinder.h - Clang refactoring library -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides functionality for finding all instances of a USR in a given
+/// AST.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_LOC_FINDER_H
+#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_LOC_FINDER_H
+
+#include "clang/AST/AST.h"
+#include "clang/Tooling/Core/Replacement.h"
+#include "clang/Tooling/Refactoring/AtomicChange.h"
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+
+namespace clang {
+namespace tooling {
+
+/// Create atomic changes for renaming all symbol references which are
+/// identified by the USRs set to a given new name.
+///
+/// \param USRs The set containing USRs of a particular old symbol.
+/// \param NewName The new name to replace old symbol name.
+/// \param TranslationUnitDecl The translation unit declaration.
+///
+/// \return Atomic changes for renaming.
+std::vector
+createRenameAtomicChanges(llvm::ArrayRef USRs,
+                          llvm::StringRef NewName, Decl *TranslationUnitDecl);
+
+// FIXME: make this an AST matcher. Wouldn't that be awesome??? I agree!
+std::vector
+getLocationsOfUSRs(const std::vector &USRs,
+                   llvm::StringRef PrevName, Decl *Decl);
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_LOC_FINDER_H
diff --git a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Tooling.h b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Tooling.h
index 10e26ac25d174..6f9bc9e1a1504 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/Tooling/Tooling.h
+++ b/interpreter/llvm/src/tools/clang/include/clang/Tooling/Tooling.h
@@ -116,7 +116,7 @@ class SourceFileCallbacks {
 
   /// \brief Called before a source file is processed by a FrontEndAction.
   /// \see clang::FrontendAction::BeginSourceFileAction
-  virtual bool handleBeginSource(CompilerInstance &CI, StringRef Filename) {
+  virtual bool handleBeginSource(CompilerInstance &CI) {
     return true;
   }
 
@@ -202,12 +202,15 @@ buildASTFromCode(const Twine &Code, const Twine &FileName = "input.cc",
 /// \param PCHContainerOps The PCHContainerOperations for loading and creating
 /// clang modules.
 ///
+/// \param Adjuster A function to filter the command line arguments as specified.
+///
 /// \return The resulting AST or null if an error occurred.
 std::unique_ptr buildASTFromCodeWithArgs(
     const Twine &Code, const std::vector &Args,
     const Twine &FileName = "input.cc", const Twine &ToolName = "clang-tool",
     std::shared_ptr PCHContainerOps =
-        std::make_shared());
+      std::make_shared(),
+    ArgumentsAdjuster Adjuster = getClangStripDependencyFileAdjuster());
 
 /// \brief Utility to run a FrontendAction in a single clang invocation.
 class ToolInvocation {
@@ -388,12 +391,11 @@ inline std::unique_ptr newFrontendActionFactory(
       }
 
     protected:
-      bool BeginSourceFileAction(CompilerInstance &CI,
-                                 StringRef Filename) override {
-        if (!clang::ASTFrontendAction::BeginSourceFileAction(CI, Filename))
+      bool BeginSourceFileAction(CompilerInstance &CI) override {
+        if (!clang::ASTFrontendAction::BeginSourceFileAction(CI))
           return false;
         if (Callbacks)
-          return Callbacks->handleBeginSource(CI, Filename);
+          return Callbacks->handleBeginSource(CI);
         return true;
       }
       void EndSourceFileAction() override {
diff --git a/interpreter/llvm/src/tools/clang/include/clang/module.modulemap b/interpreter/llvm/src/tools/clang/include/clang/module.modulemap
index 3b42381100260..d850bd552e1fd 100644
--- a/interpreter/llvm/src/tools/clang/include/clang/module.modulemap
+++ b/interpreter/llvm/src/tools/clang/include/clang/module.modulemap
@@ -33,6 +33,7 @@ module Clang_Basic {
   textual header "Basic/BuiltinsLe64.def"
   textual header "Basic/BuiltinsMips.def"
   textual header "Basic/BuiltinsNEON.def"
+  textual header "Basic/BuiltinsNios2.def"
   textual header "Basic/BuiltinsNVPTX.def"
   textual header "Basic/BuiltinsPPC.def"
   textual header "Basic/BuiltinsSystemZ.def"
@@ -132,7 +133,7 @@ module Clang_StaticAnalyzer_Frontend {
 
 module Clang_Tooling {
   requires cplusplus umbrella "Tooling" module * { export * }
-  // FIXME: Exclude this header to avoid pulling all of the AST matchers
+  // FIXME: Exclude these headers to avoid pulling all of the AST matchers
   // library into clang-format. Due to inline key functions in the headers,
   // importing the AST matchers library gives a link dependency on the AST
   // matchers (and thus the AST), which clang-format should not have.
diff --git a/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransRetainReleaseDealloc.cpp b/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
index f81133f3aad37..389f3655aa520 100644
--- a/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
@@ -78,6 +78,7 @@ class RetainReleaseDeallocRemover :
         }
       }
       // Pass through.
+      LLVM_FALLTHROUGH;
     case OMF_retain:
     case OMF_release:
       if (E->getReceiverKind() == ObjCMessageExpr::Instance)
diff --git a/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransformActions.cpp b/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransformActions.cpp
index c628b54ed414d..4f3fb58459257 100644
--- a/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransformActions.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/ARCMigrate/TransformActions.cpp
@@ -539,6 +539,7 @@ void TransformActionsImpl::addRemoval(CharSourceRange range) {
       return;
     case Range_Contains:
       RI->End = newRange.End;
+      LLVM_FALLTHROUGH;
     case Range_ExtendsBegin:
       newRange.End = RI->End;
       Removals.erase(RI);
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ASTContext.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ASTContext.cpp
index 9a54faab10432..49ca5f2824ec6 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ASTContext.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ASTContext.cpp
@@ -894,7 +894,7 @@ void ASTContext::mergeDefinitionIntoModule(NamedDecl *ND, Module *M,
   if (getLangOpts().ModulesLocalVisibility)
     MergedDefModules[ND].push_back(M);
   else
-    ND->setHidden(false);
+    ND->setVisibleDespiteOwningModule();
 }
 
 void ASTContext::deduplicateMergedDefinitonsFor(NamedDecl *ND) {
@@ -1939,9 +1939,8 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   break;
 
   case Type::Pipe: {
-    TypeInfo Info = getTypeInfo(cast(T)->getElementType());
-    Width = Info.Width;
-    Align = Info.Align;
+    Width = Target->getPointerWidth(getTargetAddressSpace(LangAS::opencl_global));
+    Align = Target->getPointerAlign(getTargetAddressSpace(LangAS::opencl_global));
   }
 
   }
@@ -3571,7 +3570,7 @@ QualType ASTContext::getSubstTemplateTypeParmPackType(
     = new (*this, TypeAlignment) SubstTemplateTypeParmPackType(Parm, Canon,
                                                                ArgPack);
   Types.push_back(SubstParm);
-  SubstTemplateTypeParmTypes.InsertNode(SubstParm, InsertPos);
+  SubstTemplateTypeParmPackTypes.InsertNode(SubstParm, InsertPos);
   return QualType(SubstParm, 0);  
 }
 
@@ -4531,6 +4530,12 @@ CanQualType ASTContext::getSizeType() const {
   return getFromTargetType(Target->getSizeType());
 }
 
+/// Return the unique signed counterpart of the integer type 
+/// corresponding to size_t.
+CanQualType ASTContext::getSignedSizeType() const {
+  return getFromTargetType(Target->getSignedSizeType());
+}
+
 /// getIntMaxType - Return the unique type for "intmax_t" (C99 7.18.1.5).
 CanQualType ASTContext::getIntMaxType() const {
   return getFromTargetType(Target->getIntMaxType());
@@ -5996,9 +6001,19 @@ static void EncodeBitField(const ASTContext *Ctx, std::string& S,
   // compatibility with GCC, although providing it breaks anything that
   // actually uses runtime introspection and wants to work on both runtimes...
   if (Ctx->getLangOpts().ObjCRuntime.isGNUFamily()) {
-    const RecordDecl *RD = FD->getParent();
-    const ASTRecordLayout &RL = Ctx->getASTRecordLayout(RD);
-    S += llvm::utostr(RL.getFieldOffset(FD->getFieldIndex()));
+    uint64_t Offset;
+
+    if (const auto *IVD = dyn_cast(FD)) {
+      Offset = Ctx->lookupFieldBitOffset(IVD->getContainingInterface(), nullptr,
+                                         IVD);
+    } else {
+      const RecordDecl *RD = FD->getParent();
+      const ASTRecordLayout &RL = Ctx->getASTRecordLayout(RD);
+      Offset = RL.getFieldOffset(FD->getFieldIndex());
+    }
+
+    S += llvm::utostr(Offset);
+
     if (const EnumType *ET = T->getAs())
       S += ObjCEncodingForEnumType(Ctx, ET);
     else {
@@ -6245,6 +6260,8 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
       S += "{objc_class=}";
       return;
     }
+    // TODO: Double check to make sure this intentially falls through.
+    LLVM_FALLTHROUGH;
   }
   
   case Type::ObjCInterface: {
@@ -8518,6 +8535,9 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
   
   // Read the prefixed modifiers first.
   bool Done = false;
+  #ifndef NDEBUG
+  bool IsSpecialLong = false;
+  #endif
   while (!Done) {
     switch (*Str++) {
     default: Done = true; --Str; break;
@@ -8535,12 +8555,28 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
       Unsigned = true;
       break;
     case 'L':
+      assert(!IsSpecialLong && "Can't use 'L' with 'W' or 'N' modifiers");
       assert(HowLong <= 2 && "Can't have LLLL modifier");
       ++HowLong;
       break;
+    case 'N': {
+      // 'N' behaves like 'L' for all non LP64 targets and 'int' otherwise.
+      assert(!IsSpecialLong && "Can't use two 'N' or 'W' modifiers!");
+      assert(HowLong == 0 && "Can't use both 'L' and 'N' modifiers!");
+      #ifndef NDEBUG
+      IsSpecialLong = true;
+      #endif
+      if (Context.getTargetInfo().getLongWidth() == 32)
+        ++HowLong;
+      break;
+    }
     case 'W':
       // This modifier represents int64 type.
+      assert(!IsSpecialLong && "Can't use two 'N' or 'W' modifiers!");
       assert(HowLong == 0 && "Can't use both 'L' and 'W' modifiers!");
+      #ifndef NDEBUG
+      IsSpecialLong = true;
+      #endif
       switch (Context.getTargetInfo().getInt64Type()) {
       default:
         llvm_unreachable("Unexpected integer type");
@@ -8551,6 +8587,7 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
         HowLong = 2;
         break;
       }
+      break;
     }
   }
 
@@ -8735,8 +8772,8 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
       char *End;
       unsigned AddrSpace = strtoul(Str, &End, 10);
       if (End != Str && AddrSpace != 0) {
-        Type = Context.getAddrSpaceQualType(Type, AddrSpace +
-            LangAS::Count);
+        Type = Context.getAddrSpaceQualType(
+            Type, AddrSpace + LangAS::FirstTargetAddressSpace);
         Str = End;
       }
       if (c == '*')
@@ -9551,13 +9588,8 @@ uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const {
 }
 
 unsigned ASTContext::getTargetAddressSpace(unsigned AS) const {
-  // For OpenCL, only function local variables are not explicitly marked with
-  // an address space in the AST, and these need to be the address space of
-  // alloca.
-  if (!AS && LangOpts.OpenCL)
-    return getTargetInfo().getDataLayout().getAllocaAddrSpace();
-  if (AS >= LangAS::Count)
-    return AS - LangAS::Count;
+  if (AS >= LangAS::FirstTargetAddressSpace)
+    return AS - LangAS::FirstTargetAddressSpace;
   else
     return (*AddrSpaceMap)[AS];
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ASTDiagnostic.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ASTDiagnostic.cpp
index 03e6115a0dba1..b43c28deb3620 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ASTDiagnostic.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ASTDiagnostic.cpp
@@ -360,6 +360,7 @@ void clang::FormatASTNodeDiagnosticArgument(
       Modifier = StringRef();
       Argument = StringRef();
       // Fall through
+      LLVM_FALLTHROUGH;
     }
     case DiagnosticsEngine::ak_qualtype: {
       assert(Modifier.empty() && Argument.empty() &&
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ASTDumper.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ASTDumper.cpp
index d89be0d9e6fa4..92ed7da94d8e8 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ASTDumper.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ASTDumper.cpp
@@ -1184,6 +1184,27 @@ void ASTDumper::VisitFunctionDecl(const FunctionDecl *D) {
          I != E; ++I)
       dumpCXXCtorInitializer(*I);
 
+  if (const CXXMethodDecl *MD = dyn_cast(D))
+    if (MD->size_overridden_methods() != 0) {
+      auto dumpOverride =
+        [=](const CXXMethodDecl *D) {
+          SplitQualType T_split = D->getType().split();
+          OS << D << " " << D->getParent()->getName() << "::"
+             << D->getNameAsString() << " '" << QualType::getAsString(T_split) << "'";
+        };
+
+      dumpChild([=] {
+        auto FirstOverrideItr = MD->begin_overridden_methods();
+        OS << "Overrides: [ ";
+        dumpOverride(*FirstOverrideItr);
+        for (const auto *Override :
+               llvm::make_range(FirstOverrideItr + 1,
+                                MD->end_overridden_methods()))
+          dumpOverride(Override);
+        OS << " ]";
+      });
+    }
+
   if (D->doesThisDeclarationHaveABody())
     dumpStmt(D->getBody());
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ASTImporter.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ASTImporter.cpp
index 847638b7bbeb8..2c0bb11cc4bc3 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ASTImporter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ASTImporter.cpp
@@ -319,6 +319,9 @@ namespace clang {
     bool ImportArrayChecked(const InContainerTy &InContainer, OIter Obegin) {
       return ImportArrayChecked(InContainer.begin(), InContainer.end(), Obegin);
     }
+
+    // Importing overrides.
+    void ImportOverrides(CXXMethodDecl *ToMethod, CXXMethodDecl *FromMethod);
   };
 }
 
@@ -953,12 +956,16 @@ bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To,
     ToData.HasUninitializedFields = FromData.HasUninitializedFields;
     ToData.HasInheritedConstructor = FromData.HasInheritedConstructor;
     ToData.HasInheritedAssignment = FromData.HasInheritedAssignment;
+    ToData.NeedOverloadResolutionForCopyConstructor
+      = FromData.NeedOverloadResolutionForCopyConstructor;
     ToData.NeedOverloadResolutionForMoveConstructor
       = FromData.NeedOverloadResolutionForMoveConstructor;
     ToData.NeedOverloadResolutionForMoveAssignment
       = FromData.NeedOverloadResolutionForMoveAssignment;
     ToData.NeedOverloadResolutionForDestructor
       = FromData.NeedOverloadResolutionForDestructor;
+    ToData.DefaultedCopyConstructorIsDeleted
+      = FromData.DefaultedCopyConstructorIsDeleted;
     ToData.DefaultedMoveConstructorIsDeleted
       = FromData.DefaultedMoveConstructorIsDeleted;
     ToData.DefaultedMoveAssignmentIsDeleted
@@ -970,6 +977,7 @@ bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To,
       = FromData.HasConstexprNonCopyMoveConstructor;
     ToData.HasDefaultedDefaultConstructor
       = FromData.HasDefaultedDefaultConstructor;
+    ToData.CanPassInRegisters = FromData.CanPassInRegisters;
     ToData.DefaultedDefaultConstructorIsConstexpr
       = FromData.DefaultedDefaultConstructorIsConstexpr;
     ToData.HasConstexprDefaultConstructor
@@ -2025,6 +2033,9 @@ Decl *ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
   // Add this function to the lexical context.
   LexicalDC->addDeclInternal(ToFunction);
 
+  if (auto *FromCXXMethod = dyn_cast(D))
+    ImportOverrides(cast(ToFunction), FromCXXMethod);
+
   return ToFunction;
 }
 
@@ -2462,10 +2473,9 @@ Decl *ASTNodeImporter::VisitImplicitParamDecl(ImplicitParamDecl *D) {
     return nullptr;
 
   // Create the imported parameter.
-  ImplicitParamDecl *ToParm
-    = ImplicitParamDecl::Create(Importer.getToContext(), DC,
-                                Loc, Name.getAsIdentifierInfo(),
-                                T);
+  auto *ToParm = ImplicitParamDecl::Create(Importer.getToContext(), DC, Loc,
+                                           Name.getAsIdentifierInfo(), T,
+                                           D->getParameterKind());
   return Importer.Imported(D, ToParm);
 }
 
@@ -5500,6 +5510,14 @@ Expr *ASTNodeImporter::VisitSubstNonTypeTemplateParmExpr(
         Replacement);
 }
 
+void ASTNodeImporter::ImportOverrides(CXXMethodDecl *ToMethod,
+                                      CXXMethodDecl *FromMethod) {
+  for (auto *FromOverriddenMethod : FromMethod->overridden_methods())
+    ToMethod->addOverriddenMethod(
+      cast(Importer.Import(const_cast(
+                                            FromOverriddenMethod))));
+}
+
 ASTImporter::ASTImporter(ASTContext &ToContext, FileManager &ToFileManager,
                          ASTContext &FromContext, FileManager &FromFileManager,
                          bool MinimalImport)
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ASTStructuralEquivalence.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ASTStructuralEquivalence.cpp
index 9376ee1d4ee49..ea7faab767edf 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -424,6 +424,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
       return false;
 
     // Fall through to check the bits common with FunctionNoProtoType.
+    LLVM_FALLTHROUGH;
   }
 
   case Type::FunctionNoProto: {
@@ -734,13 +735,28 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
   // Check for equivalent field names.
   IdentifierInfo *Name1 = Field1->getIdentifier();
   IdentifierInfo *Name2 = Field2->getIdentifier();
-  if (!::IsStructurallyEquivalent(Name1, Name2))
+  if (!::IsStructurallyEquivalent(Name1, Name2)) {
+    if (Context.Complain) {
+      Context.Diag2(Owner2->getLocation(),
+                    Context.ErrorOnTagTypeMismatch
+                        ? diag::err_odr_tag_type_inconsistent
+                        : diag::warn_odr_tag_type_inconsistent)
+          << Context.ToCtx.getTypeDeclType(Owner2);
+      Context.Diag2(Field2->getLocation(), diag::note_odr_field_name)
+          << Field2->getDeclName();
+      Context.Diag1(Field1->getLocation(), diag::note_odr_field_name)
+          << Field1->getDeclName();
+    }
     return false;
+  }
 
   if (!IsStructurallyEquivalent(Context, Field1->getType(),
                                 Field2->getType())) {
     if (Context.Complain) {
-      Context.Diag2(Owner2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+      Context.Diag2(Owner2->getLocation(),
+                    Context.ErrorOnTagTypeMismatch
+                        ? diag::err_odr_tag_type_inconsistent
+                        : diag::warn_odr_tag_type_inconsistent)
           << Context.ToCtx.getTypeDeclType(Owner2);
       Context.Diag2(Field2->getLocation(), diag::note_odr_field)
           << Field2->getDeclName() << Field2->getType();
@@ -752,7 +768,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
 
   if (Field1->isBitField() != Field2->isBitField()) {
     if (Context.Complain) {
-      Context.Diag2(Owner2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+      Context.Diag2(Owner2->getLocation(),
+                    Context.ErrorOnTagTypeMismatch
+                        ? diag::err_odr_tag_type_inconsistent
+                        : diag::warn_odr_tag_type_inconsistent)
           << Context.ToCtx.getTypeDeclType(Owner2);
       if (Field1->isBitField()) {
         Context.Diag1(Field1->getLocation(), diag::note_odr_bit_field)
@@ -779,7 +798,9 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     if (Bits1 != Bits2) {
       if (Context.Complain) {
         Context.Diag2(Owner2->getLocation(),
-                      diag::warn_odr_tag_type_inconsistent)
+                      Context.ErrorOnTagTypeMismatch
+                          ? diag::err_odr_tag_type_inconsistent
+                          : diag::warn_odr_tag_type_inconsistent)
             << Context.ToCtx.getTypeDeclType(Owner2);
         Context.Diag2(Field2->getLocation(), diag::note_odr_bit_field)
             << Field2->getDeclName() << Field2->getType() << Bits2;
@@ -798,7 +819,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
                                      RecordDecl *D1, RecordDecl *D2) {
   if (D1->isUnion() != D2->isUnion()) {
     if (Context.Complain) {
-      Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+      Context.Diag2(D2->getLocation(),
+                    Context.ErrorOnTagTypeMismatch
+                        ? diag::err_odr_tag_type_inconsistent
+                        : diag::warn_odr_tag_type_inconsistent)
           << Context.ToCtx.getTypeDeclType(D2);
       Context.Diag1(D1->getLocation(), diag::note_odr_tag_kind_here)
           << D1->getDeclName() << (unsigned)D1->getTagKind();
@@ -926,7 +950,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
        Field1 != Field1End; ++Field1, ++Field2) {
     if (Field2 == Field2End) {
       if (Context.Complain) {
-        Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+        Context.Diag2(D2->getLocation(),
+                      Context.ErrorOnTagTypeMismatch
+                          ? diag::err_odr_tag_type_inconsistent
+                          : diag::warn_odr_tag_type_inconsistent)
             << Context.ToCtx.getTypeDeclType(D2);
         Context.Diag1(Field1->getLocation(), diag::note_odr_field)
             << Field1->getDeclName() << Field1->getType();
@@ -941,7 +968,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
 
   if (Field2 != Field2End) {
     if (Context.Complain) {
-      Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+      Context.Diag2(D2->getLocation(),
+                    Context.ErrorOnTagTypeMismatch
+                        ? diag::err_odr_tag_type_inconsistent
+                        : diag::warn_odr_tag_type_inconsistent)
           << Context.ToCtx.getTypeDeclType(D2);
       Context.Diag2(Field2->getLocation(), diag::note_odr_field)
           << Field2->getDeclName() << Field2->getType();
@@ -963,7 +993,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
        EC1 != EC1End; ++EC1, ++EC2) {
     if (EC2 == EC2End) {
       if (Context.Complain) {
-        Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+        Context.Diag2(D2->getLocation(),
+                      Context.ErrorOnTagTypeMismatch
+                          ? diag::err_odr_tag_type_inconsistent
+                          : diag::warn_odr_tag_type_inconsistent)
             << Context.ToCtx.getTypeDeclType(D2);
         Context.Diag1(EC1->getLocation(), diag::note_odr_enumerator)
             << EC1->getDeclName() << EC1->getInitVal().toString(10);
@@ -977,7 +1010,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     if (!llvm::APSInt::isSameValue(Val1, Val2) ||
         !IsStructurallyEquivalent(EC1->getIdentifier(), EC2->getIdentifier())) {
       if (Context.Complain) {
-        Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+        Context.Diag2(D2->getLocation(),
+                      Context.ErrorOnTagTypeMismatch
+                          ? diag::err_odr_tag_type_inconsistent
+                          : diag::warn_odr_tag_type_inconsistent)
             << Context.ToCtx.getTypeDeclType(D2);
         Context.Diag2(EC2->getLocation(), diag::note_odr_enumerator)
             << EC2->getDeclName() << EC2->getInitVal().toString(10);
@@ -990,7 +1026,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
 
   if (EC2 != EC2End) {
     if (Context.Complain) {
-      Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent)
+      Context.Diag2(D2->getLocation(),
+                    Context.ErrorOnTagTypeMismatch
+                        ? diag::err_odr_tag_type_inconsistent
+                        : diag::warn_odr_tag_type_inconsistent)
           << Context.ToCtx.getTypeDeclType(D2);
       Context.Diag2(EC2->getLocation(), diag::note_odr_enumerator)
           << EC2->getDeclName() << EC2->getInitVal().toString(10);
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/AST/CMakeLists.txt
index bbebf758212e2..ab9b59184294b 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/AST/CMakeLists.txt
@@ -1,4 +1,7 @@
-set(LLVM_LINK_COMPONENTS support)
+set(LLVM_LINK_COMPONENTS
+  BinaryFormat
+  Support
+  )
 
 add_clang_library(clangAST
   APValue.cpp
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/CXXInheritance.cpp b/interpreter/llvm/src/tools/clang/lib/AST/CXXInheritance.cpp
index 4499ec70366fa..fc4d8b137337f 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/CXXInheritance.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/CXXInheritance.cpp
@@ -57,6 +57,7 @@ bool CXXBasePaths::isAmbiguous(CanQualType BaseType) {
 void CXXBasePaths::clear() {
   Paths.clear();
   ClassSubobjects.clear();
+  VisitedDependentRecords.clear();
   ScratchPath.clear();
   DetectedVirtual = nullptr;
 }
@@ -67,6 +68,7 @@ void CXXBasePaths::swap(CXXBasePaths &Other) {
   std::swap(Origin, Other.Origin);
   Paths.swap(Other.Paths);
   ClassSubobjects.swap(Other.ClassSubobjects);
+  VisitedDependentRecords.swap(Other.VisitedDependentRecords);
   std::swap(FindAmbiguities, Other.FindAmbiguities);
   std::swap(RecordPaths, Other.RecordPaths);
   std::swap(DetectVirtual, Other.DetectVirtual);
@@ -278,6 +280,14 @@ bool CXXBasePaths::lookupInBases(ASTContext &Context,
                   dyn_cast_or_null(TN.getAsTemplateDecl()))
             BaseRecord = TD->getTemplatedDecl();
         }
+        if (BaseRecord) {
+          if (!BaseRecord->hasDefinition() ||
+              VisitedDependentRecords.count(BaseRecord)) {
+            BaseRecord = nullptr;
+          } else {
+            VisitedDependentRecords.insert(BaseRecord);
+          }
+        }
       } else {
         BaseRecord = cast(
             BaseSpec.getType()->castAs()->getDecl());
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/Decl.cpp b/interpreter/llvm/src/tools/clang/lib/AST/Decl.cpp
index a1342f477b681..573a98efe980b 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/Decl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/Decl.cpp
@@ -573,6 +573,44 @@ static bool isSingleLineLanguageLinkage(const Decl &D) {
   return false;
 }
 
+static bool isExportedFromModuleIntefaceUnit(const NamedDecl *D) {
+  switch (D->getModuleOwnershipKind()) {
+  case Decl::ModuleOwnershipKind::Unowned:
+  case Decl::ModuleOwnershipKind::ModulePrivate:
+    return false;
+  case Decl::ModuleOwnershipKind::Visible:
+  case Decl::ModuleOwnershipKind::VisibleWhenImported:
+    if (auto *M = D->getOwningModule())
+      return M->Kind == Module::ModuleInterfaceUnit;
+  }
+  llvm_unreachable("unexpected module ownership kind");
+}
+
+static LinkageInfo getInternalLinkageFor(const NamedDecl *D) {
+  // Internal linkage declarations within a module interface unit are modeled
+  // as "module-internal linkage", which means that they have internal linkage
+  // formally but can be indirectly accessed from outside the module via inline
+  // functions and templates defined within the module.
+  if (auto *M = D->getOwningModule())
+    if (M->Kind == Module::ModuleInterfaceUnit)
+      return LinkageInfo(ModuleInternalLinkage, DefaultVisibility, false);
+
+  return LinkageInfo::internal();
+}
+
+static LinkageInfo getExternalLinkageFor(const NamedDecl *D) {
+  // C++ Modules TS [basic.link]/6.8:
+  //   - A name declared at namespace scope that does not have internal linkage
+  //     by the previous rules and that is introduced by a non-exported
+  //     declaration has module linkage.
+  if (auto *M = D->getOwningModule())
+    if (M->Kind == Module::ModuleInterfaceUnit)
+      if (!isExportedFromModuleIntefaceUnit(D))
+        return LinkageInfo(ModuleLinkage, DefaultVisibility, false);
+
+  return LinkageInfo::external();
+}
+
 static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
                                               LVComputationKind computation) {
   assert(D->getDeclContext()->getRedeclContext()->isFileContext() &&
@@ -588,16 +626,18 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
   if (const auto *Var = dyn_cast(D)) {
     // Explicitly declared static.
     if (Var->getStorageClass() == SC_Static)
-      return LinkageInfo::internal();
+      return getInternalLinkageFor(Var);
 
     // - a non-inline, non-volatile object or reference that is explicitly
     //   declared const or constexpr and neither explicitly declared extern
     //   nor previously declared to have external linkage; or (there is no
     //   equivalent in C99)
+    // The C++ modules TS adds "non-exported" to this list.
     if (Context.getLangOpts().CPlusPlus &&
         Var->getType().isConstQualified() && 
         !Var->getType().isVolatileQualified() &&
-        !Var->isInline()) {
+        !Var->isInline() &&
+        !isExportedFromModuleIntefaceUnit(Var)) {
       const VarDecl *PrevVar = Var->getPreviousDecl();
       if (PrevVar)
         return getLVForDecl(PrevVar, computation);
@@ -605,7 +645,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
       if (Var->getStorageClass() != SC_Extern &&
           Var->getStorageClass() != SC_PrivateExtern &&
           !isSingleLineLanguageLinkage(*Var))
-        return LinkageInfo::internal();
+        return getInternalLinkageFor(Var);
     }
 
     for (const VarDecl *PrevVar = Var->getPreviousDecl(); PrevVar;
@@ -615,7 +655,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
         return PrevVar->getLinkageAndVisibility();
       // Explicitly declared static.
       if (PrevVar->getStorageClass() == SC_Static)
-        return LinkageInfo::internal();
+        return getInternalLinkageFor(Var);
     }
   } else if (const FunctionDecl *Function = D->getAsFunction()) {
     // C++ [temp]p4:
@@ -624,7 +664,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
 
     // Explicitly declared static.
     if (Function->getCanonicalDecl()->getStorageClass() == SC_Static)
-      return LinkageInfo(InternalLinkage, DefaultVisibility, false);
+      return getInternalLinkageFor(Function);
   } else if (const auto *IFD = dyn_cast(D)) {
     //   - a data member of an anonymous union.
     const VarDecl *VD = IFD->getVarDecl();
@@ -637,7 +677,12 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
     const auto *Var = dyn_cast(D);
     const auto *Func = dyn_cast(D);
     // FIXME: In C++11 onwards, anonymous namespaces should give decls
-    // within them internal linkage, not unique external linkage.
+    // within them (including those inside extern "C" contexts) internal
+    // linkage, not unique external linkage:
+    //
+    // C++11 [basic.link]p4:
+    //   An unnamed namespace or a namespace declared directly or indirectly
+    //   within an unnamed namespace has internal linkage.
     if ((!Var || !isFirstInExternCContext(Var)) &&
         (!Func || !isFirstInExternCContext(Func)))
       return LinkageInfo::uniqueExternal();
@@ -718,7 +763,8 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
     // because of this, but unique-external linkage suits us.
     if (Context.getLangOpts().CPlusPlus && !isFirstInExternCContext(Var)) {
       LinkageInfo TypeLV = getLVForType(*Var->getType(), computation);
-      if (TypeLV.getLinkage() != ExternalLinkage)
+      if (TypeLV.getLinkage() != ExternalLinkage &&
+          TypeLV.getLinkage() != ModuleLinkage)
         return LinkageInfo::uniqueExternal();
       if (!LV.isVisibilityExplicit())
         LV.mergeVisibility(TypeLV);
@@ -816,7 +862,9 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
 
   //     - a namespace (7.3), unless it is declared within an unnamed
   //       namespace.
-  } else if (isa(D) && !D->isInAnonymousNamespace()) {
+  //
+  // We handled names in anonymous namespaces above.
+  } else if (isa(D)) {
     return LV;
 
   // By extension, we assign external linkage to Objective-C
@@ -1125,6 +1173,8 @@ static LinkageInfo getLVForClosure(const DeclContext *DC, Decl *ContextDecl,
   if (const auto *ND = dyn_cast(DC))
     return getLVForDecl(ND, computation);
 
+  // FIXME: We have a closure at TU scope with no context declaration. This
+  // should probably have no linkage.
   return LinkageInfo::external();
 }
 
@@ -1137,7 +1187,7 @@ static LinkageInfo getLVForLocalDecl(const NamedDecl *D,
 
     // This is a "void f();" which got merged with a file static.
     if (Function->getCanonicalDecl()->getStorageClass() == SC_Static)
-      return LinkageInfo::internal();
+      return getInternalLinkageFor(Function);
 
     LinkageInfo LV;
     if (!hasExplicitVisibilityAlready(computation)) {
@@ -1226,7 +1276,7 @@ static LinkageInfo computeLVForDecl(const NamedDecl *D,
                                     LVComputationKind computation) {
   // Internal_linkage attribute overrides other considerations.
   if (D->hasAttr())
-    return LinkageInfo::internal();
+    return getInternalLinkageFor(D);
 
   // Objective-C: treat all Objective-C declarations as having external
   // linkage.
@@ -1251,14 +1301,15 @@ static LinkageInfo computeLVForDecl(const NamedDecl *D,
 
     case Decl::EnumConstant:
       // C++ [basic.link]p4: an enumerator has the linkage of its enumeration.
-      return getLVForDecl(cast(D->getDeclContext()), computation);
+      if (D->getASTContext().getLangOpts().CPlusPlus)
+        return getLVForDecl(cast(D->getDeclContext()), computation);
+      return LinkageInfo::visible_none();
 
     case Decl::Typedef:
     case Decl::TypeAlias:
       // A typedef declaration has linkage if it gives a type a name for
       // linkage purposes.
-      if (!D->getASTContext().getLangOpts().CPlusPlus ||
-          !cast(D)
+      if (!cast(D)
                ->getAnonDeclWithTypedefName(/*AnyRedecl*/true))
         return LinkageInfo::none();
       break;
@@ -1274,14 +1325,14 @@ static LinkageInfo computeLVForDecl(const NamedDecl *D,
     case Decl::ObjCProperty:
     case Decl::ObjCPropertyImpl:
     case Decl::ObjCProtocol:
-      return LinkageInfo::external();
+      return getExternalLinkageFor(D);
       
     case Decl::CXXRecord: {
       const auto *Record = cast(D);
       if (Record->isLambda()) {
         if (!Record->getLambdaManglingNumber()) {
           // This lambda has no mangling number, so it's internal.
-          return LinkageInfo::internal();
+          return getInternalLinkageFor(D);
         }
 
         // This lambda has its linkage/visibility determined:
@@ -1297,7 +1348,7 @@ static LinkageInfo computeLVForDecl(const NamedDecl *D,
         const CXXRecordDecl *OuterMostLambda = 
             getOutermostEnclosingLambda(Record);
         if (!OuterMostLambda->getLambdaManglingNumber())
-          return LinkageInfo::internal();
+          return getInternalLinkageFor(D);
         
         return getLVForClosure(
                   OuterMostLambda->getDeclContext()->getRedeclContext(),
@@ -1348,7 +1399,7 @@ class LinkageComputer {
                                   LVComputationKind computation) {
     // Internal_linkage attribute overrides other considerations.
     if (D->hasAttr())
-      return LinkageInfo::internal();
+      return getInternalLinkageFor(D);
 
     if (computation == LVForLinkageOnly && D->hasCachedLinkage())
       return LinkageInfo(D->getCachedLinkage(), DefaultVisibility, false);
@@ -2534,9 +2585,8 @@ bool FunctionDecl::hasTrivialBody() const
 
 bool FunctionDecl::isDefined(const FunctionDecl *&Definition) const {
   for (auto I : redecls()) {
-    if (I->IsDeleted || I->IsDefaulted || I->Body || I->IsLateTemplateParsed ||
-        I->hasDefiningAttr()) {
-      Definition = I->IsDeleted ? I->getCanonicalDecl() : I;
+    if (I->isThisDeclarationADefinition()) {
+      Definition = I;
       return true;
     }
   }
@@ -2631,7 +2681,7 @@ bool FunctionDecl::isReservedGlobalPlacementOperator() const {
   return (proto->getParamType(1).getCanonicalType() == Context.VoidPtrTy);
 }
 
-bool FunctionDecl::isReplaceableGlobalAllocationFunction() const {
+bool FunctionDecl::isReplaceableGlobalAllocationFunction(bool *IsAligned) const {
   if (getDeclName().getNameKind() != DeclarationName::CXXOperatorName)
     return false;
   if (getDeclName().getCXXOverloadedOperator() != OO_New &&
@@ -2677,8 +2727,11 @@ bool FunctionDecl::isReplaceableGlobalAllocationFunction() const {
 
   // In C++17, the next parameter can be a 'std::align_val_t' for aligned
   // new/delete.
-  if (Ctx.getLangOpts().AlignedAllocation && !Ty.isNull() && Ty->isAlignValT())
+  if (Ctx.getLangOpts().AlignedAllocation && !Ty.isNull() && Ty->isAlignValT()) {
+    if (IsAligned)
+      *IsAligned = true;
     Consume();
+  }
 
   // Finally, if this is not a sized delete, the final parameter can
   // be a 'const std::nothrow_t&'.
@@ -4107,15 +4160,19 @@ void ImplicitParamDecl::anchor() { }
 
 ImplicitParamDecl *ImplicitParamDecl::Create(ASTContext &C, DeclContext *DC,
                                              SourceLocation IdLoc,
-                                             IdentifierInfo *Id,
-                                             QualType Type) {
-  return new (C, DC) ImplicitParamDecl(C, DC, IdLoc, Id, Type);
+                                             IdentifierInfo *Id, QualType Type,
+                                             ImplicitParamKind ParamKind) {
+  return new (C, DC) ImplicitParamDecl(C, DC, IdLoc, Id, Type, ParamKind);
+}
+
+ImplicitParamDecl *ImplicitParamDecl::Create(ASTContext &C, QualType Type,
+                                             ImplicitParamKind ParamKind) {
+  return new (C, nullptr) ImplicitParamDecl(C, Type, ParamKind);
 }
 
 ImplicitParamDecl *ImplicitParamDecl::CreateDeserialized(ASTContext &C,
                                                          unsigned ID) {
-  return new (C, ID) ImplicitParamDecl(C, nullptr, SourceLocation(), nullptr,
-                                       QualType());
+  return new (C, ID) ImplicitParamDecl(C, QualType(), ImplicitParamKind::Other);
 }
 
 FunctionDecl *FunctionDecl::Create(ASTContext &C, DeclContext *DC,
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/DeclBase.cpp b/interpreter/llvm/src/tools/clang/lib/AST/DeclBase.cpp
index b300c851840c0..da1bc124b544e 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/DeclBase.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/DeclBase.cpp
@@ -274,12 +274,19 @@ void Decl::setLexicalDeclContext(DeclContext *DC) {
   } else {
     getMultipleDC()->LexicalDC = DC;
   }
-  Hidden = cast(DC)->Hidden;
-  if (Hidden && !isFromASTFile()) {
-    assert(hasLocalOwningModuleStorage() &&
-           "hidden local declaration without local submodule visibility?");
-    setLocalOwningModule(cast(DC)->getOwningModule());
+
+  // FIXME: We shouldn't be changing the lexical context of declarations
+  // imported from AST files.
+  if (!isFromASTFile()) {
+    setModuleOwnershipKind(getModuleOwnershipKindForChildOf(DC));
+    if (hasOwningModule())
+      setLocalOwningModule(cast(DC)->getOwningModule());
   }
+
+  assert(
+      (getModuleOwnershipKind() != ModuleOwnershipKind::VisibleWhenImported ||
+       getOwningModule()) &&
+      "hidden declaration has no owning module");
 }
 
 void Decl::setDeclContextsImpl(DeclContext *SemaDC, DeclContext *LexicalDC,
@@ -410,6 +417,27 @@ bool Decl::isExported() const {
   return false;
 }
 
+ExternalSourceSymbolAttr *Decl::getExternalSourceSymbolAttr() const {
+  const Decl *Definition = nullptr;
+  if (auto ID = dyn_cast(this)) {
+    Definition = ID->getDefinition();
+  } else if (auto PD = dyn_cast(this)) {
+    Definition = PD->getDefinition();
+  } else if (auto TD = dyn_cast(this)) {
+    Definition = TD->getDefinition();
+  }
+  if (!Definition)
+    Definition = this;
+
+  if (auto *attr = Definition->getAttr())
+    return attr;
+  if (auto *dcd = dyn_cast(getDeclContext())) {
+    return dcd->getAttr();
+  }
+
+  return nullptr;
+}
+
 bool Decl::hasDefiningAttr() const {
   return hasAttr() || hasAttr();
 }
@@ -422,8 +450,8 @@ const Attr *Decl::getDefiningAttr() const {
   return nullptr;
 }
 
-StringRef getRealizedPlatform(const AvailabilityAttr *A,
-                              const ASTContext &Context) {
+static StringRef getRealizedPlatform(const AvailabilityAttr *A,
+                                     const ASTContext &Context) {
   // Check if this is an App Extension "platform", and if so chop off
   // the suffix for matching with the actual platform.
   StringRef RealizedPlatform = A->getPlatform()->getName();
@@ -1323,8 +1351,8 @@ void DeclContext::removeDecl(Decl *D) {
   if (isa(D)) {
     NamedDecl *ND = cast(D);
 
-    // Remove only decls that have a name
-    if (!ND->getDeclName()) return;
+    // Remove only decls that have a name or registered in the lookup.
+    if (!ND->getDeclName() || ND->isHidden()) return;
 
     auto *DC = D->getDeclContext();
     do {
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/DeclCXX.cpp b/interpreter/llvm/src/tools/clang/lib/AST/DeclCXX.cpp
index 9f87fe12a9cd4..5782b7b56c963 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/DeclCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/DeclCXX.cpp
@@ -55,15 +55,18 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
       HasOnlyCMembers(true), HasInClassInitializer(false),
       HasUninitializedReferenceMember(false), HasUninitializedFields(false),
       HasInheritedConstructor(false), HasInheritedAssignment(false),
+      NeedOverloadResolutionForCopyConstructor(false),
       NeedOverloadResolutionForMoveConstructor(false),
       NeedOverloadResolutionForMoveAssignment(false),
       NeedOverloadResolutionForDestructor(false),
+      DefaultedCopyConstructorIsDeleted(false),
       DefaultedMoveConstructorIsDeleted(false),
       DefaultedMoveAssignmentIsDeleted(false),
       DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All),
       DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true),
       HasConstexprNonCopyMoveConstructor(false),
       HasDefaultedDefaultConstructor(false),
+      CanPassInRegisters(true),
       DefaultedDefaultConstructorIsConstexpr(true),
       HasConstexprDefaultConstructor(false),
       HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
@@ -352,8 +355,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
       setHasVolatileMember(true);
 
     // Keep track of the presence of mutable fields.
-    if (BaseClassDecl->hasMutableFields())
+    if (BaseClassDecl->hasMutableFields()) {
       data().HasMutableFields = true;
+      data().NeedOverloadResolutionForCopyConstructor = true;
+    }
 
     if (BaseClassDecl->hasUninitializedReferenceMember())
       data().HasUninitializedReferenceMember = true;
@@ -406,6 +411,8 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
   //    -- a direct or virtual base class B that cannot be copied/moved [...]
   //    -- a non-static data member of class type M (or array thereof)
   //       that cannot be copied or moved [...]
+  if (!Subobj->hasSimpleCopyConstructor())
+    data().NeedOverloadResolutionForCopyConstructor = true;
   if (!Subobj->hasSimpleMoveConstructor())
     data().NeedOverloadResolutionForMoveConstructor = true;
 
@@ -426,6 +433,7 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
   //    -- any non-static data member has a type with a destructor
   //       that is deleted or inaccessible from the defaulted [ctor or dtor].
   if (!Subobj->hasSimpleDestructor()) {
+    data().NeedOverloadResolutionForCopyConstructor = true;
     data().NeedOverloadResolutionForMoveConstructor = true;
     data().NeedOverloadResolutionForDestructor = true;
   }
@@ -711,8 +719,10 @@ void CXXRecordDecl::addedMember(Decl *D) {
       data().IsStandardLayout = false;
 
     // Keep track of the presence of mutable fields.
-    if (Field->isMutable())
+    if (Field->isMutable()) {
       data().HasMutableFields = true;
+      data().NeedOverloadResolutionForCopyConstructor = true;
+    }
 
     // C++11 [class.union]p8, DR1460:
     //   If X is a union, a non-static data member of X that is not an anonymous
@@ -756,6 +766,12 @@ void CXXRecordDecl::addedMember(Decl *D) {
       //   A standard-layout class is a class that:
       //    -- has no non-static data members of type [...] reference,
       data().IsStandardLayout = false;
+
+      // C++1z [class.copy.ctor]p10:
+      //   A defaulted copy constructor for a class X is defined as deleted if X has:
+      //    -- a non-static data member of rvalue reference type
+      if (T->isRValueReferenceType())
+        data().DefaultedCopyConstructorIsDeleted = true;
     }
 
     if (!Field->hasInClassInitializer() && !Field->isMutable()) {
@@ -809,6 +825,10 @@ void CXXRecordDecl::addedMember(Decl *D) {
         // We may need to perform overload resolution to determine whether a
         // field can be moved if it's const or volatile qualified.
         if (T.getCVRQualifiers() & (Qualifiers::Const | Qualifiers::Volatile)) {
+          // We need to care about 'const' for the copy constructor because an
+          // implicit copy constructor might be declared with a non-const
+          // parameter.
+          data().NeedOverloadResolutionForCopyConstructor = true;
           data().NeedOverloadResolutionForMoveConstructor = true;
           data().NeedOverloadResolutionForMoveAssignment = true;
         }
@@ -819,6 +839,8 @@ void CXXRecordDecl::addedMember(Decl *D) {
         //    -- X is a union-like class that has a variant member with a
         //       non-trivial [corresponding special member]
         if (isUnion()) {
+          if (FieldRec->hasNonTrivialCopyConstructor())
+            data().DefaultedCopyConstructorIsDeleted = true;
           if (FieldRec->hasNonTrivialMoveConstructor())
             data().DefaultedMoveConstructorIsDeleted = true;
           if (FieldRec->hasNonTrivialMoveAssignment())
@@ -830,6 +852,8 @@ void CXXRecordDecl::addedMember(Decl *D) {
         // For an anonymous union member, our overload resolution will perform
         // overload resolution for its members.
         if (Field->isAnonymousStructOrUnion()) {
+          data().NeedOverloadResolutionForCopyConstructor |=
+              FieldRec->data().NeedOverloadResolutionForCopyConstructor;
           data().NeedOverloadResolutionForMoveConstructor |=
               FieldRec->data().NeedOverloadResolutionForMoveConstructor;
           data().NeedOverloadResolutionForMoveAssignment |=
@@ -915,8 +939,10 @@ void CXXRecordDecl::addedMember(Decl *D) {
         }
         
         // Keep track of the presence of mutable fields.
-        if (FieldRec->hasMutableFields())
+        if (FieldRec->hasMutableFields()) {
           data().HasMutableFields = true;
+          data().NeedOverloadResolutionForCopyConstructor = true;
+        }
 
         // C++11 [class.copy]p13:
         //   If the implicitly-defined constructor would satisfy the
@@ -1417,11 +1443,8 @@ CXXDestructorDecl *CXXRecordDecl::getDestructor() const {
                                           Context.getCanonicalType(ClassType));
 
   DeclContext::lookup_result R = lookup(Name);
-  if (R.empty())
-    return nullptr;
 
-  CXXDestructorDecl *Dtor = cast(R.front());
-  return Dtor;
+  return R.empty() ? nullptr : dyn_cast(R.front());
 }
 
 bool CXXRecordDecl::isAnyDestructorNoReturn() const {
@@ -1453,7 +1476,7 @@ void CXXRecordDecl::completeDefinition() {
 
 void CXXRecordDecl::completeDefinition(CXXFinalOverriderMap *FinalOverriders) {
   RecordDecl::completeDefinition();
-  
+
   // If the class may be abstract (but hasn't been marked as such), check for
   // any pure final overriders.
   if (mayBeAbstract()) {
@@ -1608,6 +1631,84 @@ CXXMethodDecl *CXXMethodDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
                                    SC_None, false, false, SourceLocation());
 }
 
+CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base,
+                                                     bool IsAppleKext) {
+  assert(isVirtual() && "this method is expected to be virtual");
+
+  // When building with -fapple-kext, all calls must go through the vtable since
+  // the kernel linker can do runtime patching of vtables.
+  if (IsAppleKext)
+    return nullptr;
+
+  // If the member function is marked 'final', we know that it can't be
+  // overridden and can therefore devirtualize it unless it's pure virtual.
+  if (hasAttr())
+    return isPure() ? nullptr : this;
+
+  // If Base is unknown, we cannot devirtualize.
+  if (!Base)
+    return nullptr;
+
+  // If the base expression (after skipping derived-to-base conversions) is a
+  // class prvalue, then we can devirtualize.
+  Base = Base->getBestDynamicClassTypeExpr();
+  if (Base->isRValue() && Base->getType()->isRecordType())
+    return this;
+
+  // If we don't even know what we would call, we can't devirtualize.
+  const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
+  if (!BestDynamicDecl)
+    return nullptr;
+
+  // There may be a method corresponding to MD in a derived class.
+  CXXMethodDecl *DevirtualizedMethod =
+      getCorrespondingMethodInClass(BestDynamicDecl);
+
+  // If that method is pure virtual, we can't devirtualize. If this code is
+  // reached, the result would be UB, not a direct call to the derived class
+  // function, and we can't assume the derived class function is defined.
+  if (DevirtualizedMethod->isPure())
+    return nullptr;
+
+  // If that method is marked final, we can devirtualize it.
+  if (DevirtualizedMethod->hasAttr())
+    return DevirtualizedMethod;
+
+  // Similarly, if the class itself is marked 'final' it can't be overridden
+  // and we can therefore devirtualize the member function call.
+  if (BestDynamicDecl->hasAttr())
+    return DevirtualizedMethod;
+
+  if (const DeclRefExpr *DRE = dyn_cast(Base)) {
+    if (const VarDecl *VD = dyn_cast(DRE->getDecl()))
+      if (VD->getType()->isRecordType())
+        // This is a record decl. We know the type and can devirtualize it.
+        return DevirtualizedMethod;
+
+    return nullptr;
+  }
+
+  // We can devirtualize calls on an object accessed by a class member access
+  // expression, since by C++11 [basic.life]p6 we know that it can't refer to
+  // a derived class object constructed in the same location.
+  if (const MemberExpr *ME = dyn_cast(Base))
+    if (const ValueDecl *VD = dyn_cast(ME->getMemberDecl()))
+      return VD->getType()->isRecordType() ? DevirtualizedMethod : nullptr;
+
+  // Likewise for calls on an object accessed by a (non-reference) pointer to
+  // member access.
+  if (auto *BO = dyn_cast(Base)) {
+    if (BO->isPtrMemOp()) {
+      auto *MPT = BO->getRHS()->getType()->castAs();
+      if (MPT->getPointeeType()->isRecordType())
+        return DevirtualizedMethod;
+    }
+  }
+
+  // We can't devirtualize the call.
+  return nullptr;
+}
+
 bool CXXMethodDecl::isUsualDeallocationFunction() const {
   if (getOverloadedOperator() != OO_Delete &&
       getOverloadedOperator() != OO_Array_Delete)
@@ -1762,9 +1863,10 @@ bool CXXMethodDecl::hasInlineBody() const {
   const FunctionDecl *CheckFn = getTemplateInstantiationPattern();
   if (!CheckFn)
     CheckFn = this;
-  
+
   const FunctionDecl *fn;
-  return CheckFn->hasBody(fn) && !fn->isOutOfLine();
+  return CheckFn->isDefined(fn) && !fn->isOutOfLine() &&
+         (fn->doesThisDeclarationHaveABody() || fn->willHaveBody());
 }
 
 bool CXXMethodDecl::isLambdaStaticInvoker() const {
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/DeclObjC.cpp b/interpreter/llvm/src/tools/clang/lib/AST/DeclObjC.cpp
index a12a38033c4af..d8bdb6369e947 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/DeclObjC.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/DeclObjC.cpp
@@ -1070,20 +1070,20 @@ void ObjCMethodDecl::createImplicitParams(ASTContext &Context,
   bool selfIsPseudoStrong, selfIsConsumed;
   QualType selfTy =
     getSelfType(Context, OID, selfIsPseudoStrong, selfIsConsumed);
-  ImplicitParamDecl *self
-    = ImplicitParamDecl::Create(Context, this, SourceLocation(),
-                                &Context.Idents.get("self"), selfTy);
-  setSelfDecl(self);
+  auto *Self = ImplicitParamDecl::Create(Context, this, SourceLocation(),
+                                         &Context.Idents.get("self"), selfTy,
+                                         ImplicitParamDecl::ObjCSelf);
+  setSelfDecl(Self);
 
   if (selfIsConsumed)
-    self->addAttr(NSConsumedAttr::CreateImplicit(Context));
+    Self->addAttr(NSConsumedAttr::CreateImplicit(Context));
 
   if (selfIsPseudoStrong)
-    self->setARCPseudoStrong(true);
+    Self->setARCPseudoStrong(true);
 
-  setCmdDecl(ImplicitParamDecl::Create(Context, this, SourceLocation(),
-                                       &Context.Idents.get("_cmd"),
-                                       Context.getObjCSelType()));
+  setCmdDecl(ImplicitParamDecl::Create(
+      Context, this, SourceLocation(), &Context.Idents.get("_cmd"),
+      Context.getObjCSelType(), ImplicitParamDecl::ObjCCmd));
 }
 
 ObjCInterfaceDecl *ObjCMethodDecl::getClassInterface() {
@@ -1889,25 +1889,23 @@ void ObjCProtocolDecl::collectPropertiesToImplement(PropertyMap &PM,
   }
 }
 
-    
 void ObjCProtocolDecl::collectInheritedProtocolProperties(
-                                                const ObjCPropertyDecl *Property,
-                                                ProtocolPropertyMap &PM) const {
+    const ObjCPropertyDecl *Property, ProtocolPropertySet &PS,
+    PropertyDeclOrder &PO) const {
   if (const ObjCProtocolDecl *PDecl = getDefinition()) {
-    bool MatchFound = false;
+    if (!PS.insert(PDecl).second)
+      return;
     for (auto *Prop : PDecl->properties()) {
       if (Prop == Property)
         continue;
       if (Prop->getIdentifier() == Property->getIdentifier()) {
-        PM[PDecl] = Prop;
-        MatchFound = true;
-        break;
+        PO.push_back(Prop);
+        return;
       }
     }
     // Scan through protocol's protocols which did not have a matching property.
-    if (!MatchFound)
-      for (const auto *PI : PDecl->protocols())
-        PI->collectInheritedProtocolProperties(Property, PM);
+    for (const auto *PI : PDecl->protocols())
+      PI->collectInheritedProtocolProperties(Property, PS, PO);
   }
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/DeclPrinter.cpp b/interpreter/llvm/src/tools/clang/lib/AST/DeclPrinter.cpp
index bc8a34c936536..6eeba88e40336 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/DeclPrinter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/DeclPrinter.cpp
@@ -1189,7 +1189,9 @@ void DeclPrinter::VisitObjCMethodDecl(ObjCMethodDecl *OMD) {
   for (const auto *PI : OMD->parameters()) {
     // FIXME: selector is missing here!
     pos = name.find_first_of(':', lastPos);
-    Out << " " << name.substr(lastPos, pos - lastPos) << ':';
+    if (lastPos != 0)
+      Out << " ";
+    Out << name.substr(lastPos, pos - lastPos) << ':';
     PrintObjCMethodType(OMD->getASTContext(), 
                         PI->getObjCDeclQualifier(),
                         PI->getType());
@@ -1198,7 +1200,7 @@ void DeclPrinter::VisitObjCMethodDecl(ObjCMethodDecl *OMD) {
   }
 
   if (OMD->param_begin() == OMD->param_end())
-    Out << " " << name;
+    Out << name;
 
   if (OMD->isVariadic())
       Out << ", ...";
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/Expr.cpp b/interpreter/llvm/src/tools/clang/lib/AST/Expr.cpp
index d523a0f93cf6b..afc7fa8ea0949 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/Expr.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/Expr.cpp
@@ -1576,6 +1576,7 @@ bool CastExpr::CastConsistency() const {
            getSubExpr()->getType()->isBlockPointerType());
     assert(getType()->getPointeeType().getAddressSpace() !=
            getSubExpr()->getType()->getPointeeType().getAddressSpace());
+    LLVM_FALLTHROUGH;
   // These should not have an inheritance path.
   case CK_Dynamic:
   case CK_ToUnion:
@@ -1640,25 +1641,32 @@ const char *CastExpr::getCastKindName() const {
   llvm_unreachable("Unhandled cast kind!");
 }
 
+namespace {
+  Expr *skipImplicitTemporary(Expr *expr) {
+    // Skip through reference binding to temporary.
+    if (MaterializeTemporaryExpr *Materialize
+                                  = dyn_cast(expr))
+      expr = Materialize->GetTemporaryExpr();
+
+    // Skip any temporary bindings; they're implicit.
+    if (CXXBindTemporaryExpr *Binder = dyn_cast(expr))
+      expr = Binder->getSubExpr();
+
+    return expr;
+  }
+}
+
 Expr *CastExpr::getSubExprAsWritten() {
   Expr *SubExpr = nullptr;
   CastExpr *E = this;
   do {
-    SubExpr = E->getSubExpr();
+    SubExpr = skipImplicitTemporary(E->getSubExpr());
 
-    // Skip through reference binding to temporary.
-    if (MaterializeTemporaryExpr *Materialize 
-                                  = dyn_cast(SubExpr))
-      SubExpr = Materialize->GetTemporaryExpr();
-        
-    // Skip any temporary bindings; they're implicit.
-    if (CXXBindTemporaryExpr *Binder = dyn_cast(SubExpr))
-      SubExpr = Binder->getSubExpr();
-    
     // Conversions by constructor and conversion functions have a
     // subexpression describing the call; strip it off.
     if (E->getCastKind() == CK_ConstructorConversion)
-      SubExpr = cast(SubExpr)->getArg(0);
+      SubExpr =
+        skipImplicitTemporary(cast(SubExpr)->getArg(0));
     else if (E->getCastKind() == CK_UserDefinedConversion) {
       assert((isa(SubExpr) ||
               isa(SubExpr)) &&
@@ -2102,6 +2110,7 @@ bool Expr::isUnusedResultAWarning(const Expr *&WarnE, SourceLocation &Loc,
     }
 
     // Fallthrough for generic call handling.
+    LLVM_FALLTHROUGH;
   }
   case CallExprClass:
   case CXXMemberCallExprClass:
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ExprCXX.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ExprCXX.cpp
index 6713fca04571d..fe45b5e47f36d 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ExprCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ExprCXX.cpp
@@ -1052,7 +1052,9 @@ CXXUnresolvedConstructExpr::CXXUnresolvedConstructExpr(TypeSourceInfo *Type,
           :Type->getType()->isRValueReferenceType()? VK_XValue
           :VK_RValue),
          OK_Ordinary,
-         Type->getType()->isDependentType(), true, true,
+         Type->getType()->isDependentType() ||
+             Type->getType()->getContainedDeducedType(),
+         true, true,
          Type->getType()->containsUnexpandedParameterPack()),
     Type(Type),
     LParenLoc(LParenLoc),
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ExprClassification.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ExprClassification.cpp
index c035a42439a39..d149bdd0cdf97 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ExprClassification.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ExprClassification.cpp
@@ -190,7 +190,6 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::ArrayInitIndexExprClass:
   case Expr::NoInitExprClass:
   case Expr::DesignatedInitUpdateExprClass:
-  case Expr::CoyieldExprClass:
     return Cl::CL_PRValue;
 
     // Next come the complicated cases.
@@ -414,7 +413,8 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
     return ClassifyInternal(Ctx, cast(E)->getInit(0));
 
   case Expr::CoawaitExprClass:
-    return ClassifyInternal(Ctx, cast(E)->getResumeExpr());
+  case Expr::CoyieldExprClass:
+    return ClassifyInternal(Ctx, cast(E)->getResumeExpr());
   }
 
   llvm_unreachable("unhandled expression kind in classification");
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ExprConstant.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ExprConstant.cpp
index 75bb0cac51b83..a26b608082f52 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ExprConstant.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ExprConstant.cpp
@@ -736,6 +736,7 @@ namespace {
             if (!HasFoldFailureDiagnostic)
               break;
             // We've already failed to fold something. Keep that diagnostic.
+            LLVM_FALLTHROUGH;
           case EM_ConstantExpression:
           case EM_PotentialConstantExpression:
           case EM_ConstantExpressionUnevaluated:
@@ -1230,8 +1231,7 @@ namespace {
       IsNullPtr = V.isNullPointer();
     }
 
-    void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false,
-             bool IsNullPtr_ = false, uint64_t Offset_ = 0) {
+    void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false) {
 #ifndef NDEBUG
       // We only allow a few types of invalid bases. Enforce that here.
       if (BInvalid) {
@@ -1242,11 +1242,20 @@ namespace {
 #endif
 
       Base = B;
-      Offset = CharUnits::fromQuantity(Offset_);
+      Offset = CharUnits::fromQuantity(0);
       InvalidBase = BInvalid;
       CallIndex = I;
       Designator = SubobjectDesignator(getType(B));
-      IsNullPtr = IsNullPtr_;
+      IsNullPtr = false;
+    }
+
+    void setNull(QualType PointerTy, uint64_t TargetVal) {
+      Base = (Expr *)nullptr;
+      Offset = CharUnits::fromQuantity(TargetVal);
+      InvalidBase = false;
+      CallIndex = 0;
+      Designator = SubobjectDesignator(PointerTy->getPointeeType());
+      IsNullPtr = true;
     }
 
     void setInvalid(APValue::LValueBase B, unsigned I = 0) {
@@ -1656,6 +1665,19 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc,
   return true;
 }
 
+/// Member pointers are constant expressions unless they point to a
+/// non-virtual dllimport member function.
+static bool CheckMemberPointerConstantExpression(EvalInfo &Info,
+                                                 SourceLocation Loc,
+                                                 QualType Type,
+                                                 const APValue &Value) {
+  const ValueDecl *Member = Value.getMemberPointerDecl();
+  const auto *FD = dyn_cast_or_null(Member);
+  if (!FD)
+    return true;
+  return FD->isVirtual() || !FD->hasAttr();
+}
+
 /// Check that this core constant expression is of literal type, and if not,
 /// produce an appropriate diagnostic.
 static bool CheckLiteralType(EvalInfo &Info, const Expr *E,
@@ -1748,6 +1770,9 @@ static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc,
     return CheckLValueConstantExpression(Info, DiagLoc, Type, LVal);
   }
 
+  if (Value.isMemberPointer())
+    return CheckMemberPointerConstantExpression(Info, DiagLoc, Type, Value);
+
   // Everything else is fine.
   return true;
 }
@@ -5471,8 +5496,8 @@ class PointerExprEvaluator
     return true;
   }
   bool ZeroInitialization(const Expr *E) {
-    auto Offset = Info.Ctx.getTargetNullPointerValue(E->getType());
-    Result.set((Expr*)nullptr, 0, false, true, Offset);
+    auto TargetVal = Info.Ctx.getTargetNullPointerValue(E->getType());
+    Result.setNull(E->getType(), TargetVal);
     return true;
   }
 
@@ -6217,10 +6242,6 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
     // the initializer list.
     ImplicitValueInitExpr VIE(HaveInit ? Info.Ctx.IntTy : Field->getType());
     const Expr *Init = HaveInit ? E->getInit(ElementNo++) : &VIE;
-    if (Init->isValueDependent()) {
-      Success = false;
-      continue;
-    }
 
     // Temporarily override This, in case there's a CXXDefaultInitExpr in here.
     ThisOverrideRAII ThisOverride(*Info.CurrentCall, &This,
@@ -9503,7 +9524,7 @@ bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
   case BO_Mul:
     if (Result.isComplexFloat()) {
       // This is an implementation of complex multiplication according to the
-      // constraints laid out in C11 Annex G. The implemantion uses the
+      // constraints laid out in C11 Annex G. The implemention uses the
       // following naming scheme:
       //   (a + ib) * (c + id)
       ComplexValue LHS = Result;
@@ -9584,7 +9605,7 @@ bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
   case BO_Div:
     if (Result.isComplexFloat()) {
       // This is an implementation of complex division according to the
-      // constraints laid out in C11 Annex G. The implemantion uses the
+      // constraints laid out in C11 Annex G. The implemention uses the
       // following naming scheme:
       //   (a + ib) / (c + id)
       ComplexValue LHS = Result;
@@ -9767,6 +9788,8 @@ class VoidExprEvaluator
 
   bool Success(const APValue &V, const Expr *e) { return true; }
 
+  bool ZeroInitialization(const Expr *E) { return true; }
+
   bool VisitCastExpr(const CastExpr *E) {
     switch (E->getCastKind()) {
     default:
@@ -9931,8 +9954,7 @@ static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) {
 }
 
 static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result,
-                                 const ASTContext &Ctx, bool &IsConst,
-                                 bool IsCheckingForOverflow) {
+                                 const ASTContext &Ctx, bool &IsConst) {
   // Fast-path evaluations of integer literals, since we sometimes see files
   // containing vast quantities of these.
   if (const IntegerLiteral *L = dyn_cast(Exp)) {
@@ -9953,7 +9975,7 @@ static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result,
   // performance problems. Only do so in C++11 for now.
   if (Exp->isRValue() && (Exp->getType()->isArrayType() ||
                           Exp->getType()->isRecordType()) &&
-      !Ctx.getLangOpts().CPlusPlus11 && !IsCheckingForOverflow) {
+      !Ctx.getLangOpts().CPlusPlus11) {
     IsConst = false;
     return true;
   }
@@ -9968,7 +9990,7 @@ static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result,
 /// will be applied to the result.
 bool Expr::EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx) const {
   bool IsConst;
-  if (FastEvaluateAsRValue(this, Result, Ctx, IsConst, false))
+  if (FastEvaluateAsRValue(this, Result, Ctx, IsConst))
     return IsConst;
   
   EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects);
@@ -10093,7 +10115,7 @@ APSInt Expr::EvaluateKnownConstInt(const ASTContext &Ctx,
 void Expr::EvaluateForOverflow(const ASTContext &Ctx) const {
   bool IsConst;
   EvalResult EvalResult;
-  if (!FastEvaluateAsRValue(this, EvalResult, Ctx, IsConst, true)) {
+  if (!FastEvaluateAsRValue(this, EvalResult, Ctx, IsConst)) {
     EvalInfo Info(Ctx, EvalResult, EvalInfo::EM_EvaluateForOverflow);
     (void)::EvaluateAsRValue(Info, this, EvalResult.Val);
   }
@@ -10343,6 +10365,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
     }
 
     // OffsetOf falls through here.
+    LLVM_FALLTHROUGH;
   }
   case Expr::OffsetOfExprClass: {
     // Note that per C99, offsetof must be an ICE. And AFAIK, using
@@ -10445,6 +10468,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
       return Worst(LHSResult, RHSResult);
     }
     }
+    LLVM_FALLTHROUGH;
   }
   case Expr::ImplicitCastExprClass:
   case Expr::CStyleCastExprClass:
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ExternalASTMerger.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ExternalASTMerger.cpp
index 1dc472a5f7534..4f4a99794c5b0 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ExternalASTMerger.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ExternalASTMerger.cpp
@@ -41,6 +41,7 @@ class LazyASTImporter : public ASTImporter {
   Decl *Imported(Decl *From, Decl *To) override {
     if (auto ToTag = dyn_cast(To)) {
       ToTag->setHasExternalLexicalStorage();
+      ToTag->setMustBuildLookupTable();
     } else if (auto ToNamespace = dyn_cast(To)) {
       ToNamespace->setHasExternalVisibleStorage();
     }
@@ -179,8 +180,3 @@ void ExternalASTMerger::FindExternalLexicalDecls(
       });
 }
 
-void ExternalASTMerger::CompleteType(TagDecl *Tag) {
-  SmallVector Result;
-  FindExternalLexicalDecls(Tag, [](Decl::Kind) { return true; }, Result);
-  Tag->setHasExternalLexicalStorage(false);
-}
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ItaniumMangle.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ItaniumMangle.cpp
index 7db0b4d8e4ff7..4e7c6c4edf370 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ItaniumMangle.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ItaniumMangle.cpp
@@ -1459,8 +1459,6 @@ void CXXNameMangler::mangleNestedName(const NamedDecl *ND,
     // We do not consider restrict a distinguishing attribute for overloading
     // purposes so we must not mangle it.
     MethodQuals.removeRestrict();
-    // __unaligned is not currently mangled in any way, so remove it.
-    MethodQuals.removeUnaligned();
     mangleQualifiers(MethodQuals);
     mangleRefQualifier(Method->getRefQualifier());
   }
@@ -2140,7 +2138,8 @@ CXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO, unsigned Arity) {
 }
 
 void CXXNameMangler::mangleQualifiers(Qualifiers Quals) {
-  // Vendor qualifiers come first.
+  // Vendor qualifiers come first and if they are order-insensitive they must
+  // be emitted in reversed alphabetical order, see Itanium ABI 5.1.5.
 
   // Address space qualifiers start with an ordinary letter.
   if (Quals.hasAddressSpace()) {
@@ -2176,17 +2175,28 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals) {
   }
 
   // The ARC ownership qualifiers start with underscores.
-  switch (Quals.getObjCLifetime()) {
   // Objective-C ARC Extension:
   //
   //    ::= U "__strong"
   //    ::= U "__weak"
   //    ::= U "__autoreleasing"
+  //
+  // Note: we emit __weak first to preserve the order as
+  // required by the Itanium ABI.
+  if (Quals.getObjCLifetime() == Qualifiers::OCL_Weak)
+    mangleVendorQualifier("__weak");
+
+  // __unaligned (from -fms-extensions)
+  if (Quals.hasUnaligned())
+    mangleVendorQualifier("__unaligned");
+
+  // Remaining ARC ownership qualifiers.
+  switch (Quals.getObjCLifetime()) {
   case Qualifiers::OCL_None:
     break;
     
   case Qualifiers::OCL_Weak:
-    mangleVendorQualifier("__weak");
+    // Do nothing as we already handled this case above.
     break;
     
   case Qualifiers::OCL_Strong:
@@ -2519,7 +2529,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
   case CC_X86ThisCall:
   case CC_X86VectorCall:
   case CC_X86Pascal:
-  case CC_X86_64Win64:
+  case CC_Win64:
   case CC_X86_64SysV:
   case CC_X86RegCall:
   case CC_AAPCS:
@@ -3775,6 +3785,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
     Out << "v1U" << Kind.size() << Kind;
   }
   // Fall through to mangle the cast itself.
+  LLVM_FALLTHROUGH;
       
   case Expr::CStyleCastExprClass:
     mangleCastExpression(E, "cv");
@@ -4327,7 +4338,7 @@ bool CXXNameMangler::mangleSubstitution(const NamedDecl *ND) {
 /// substitutions.
 static bool hasMangledSubstitutionQualifiers(QualType T) {
   Qualifiers Qs = T.getQualifiers();
-  return Qs.getCVRQualifiers() || Qs.hasAddressSpace();
+  return Qs.getCVRQualifiers() || Qs.hasAddressSpace() || Qs.hasUnaligned();
 }
 
 bool CXXNameMangler::mangleSubstitution(QualType T) {
@@ -4539,9 +4550,11 @@ CXXNameMangler::makeFunctionReturnTypeTags(const FunctionDecl *FD) {
 
   const FunctionProtoType *Proto =
       cast(FD->getType()->getAs());
+  FunctionTypeDepthState saved = TrackReturnTypeTags.FunctionTypeDepth.push();
   TrackReturnTypeTags.FunctionTypeDepth.enterResultType();
   TrackReturnTypeTags.mangleType(Proto->getReturnType());
   TrackReturnTypeTags.FunctionTypeDepth.leaveResultType();
+  TrackReturnTypeTags.FunctionTypeDepth.pop(saved);
 
   return TrackReturnTypeTags.AbiTagsRoot.getSortedUniqueUsedAbiTags();
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/MicrosoftMangle.cpp b/interpreter/llvm/src/tools/clang/lib/AST/MicrosoftMangle.cpp
index 6e14dd055cf8e..24b16f892e7a9 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/MicrosoftMangle.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/MicrosoftMangle.cpp
@@ -966,16 +966,71 @@ void MicrosoftCXXNameMangler::mangleNestedName(const NamedDecl *ND) {
     }
 
     if (const BlockDecl *BD = dyn_cast(DC)) {
-      DiagnosticsEngine &Diags = Context.getDiags();
-      unsigned DiagID =
-          Diags.getCustomDiagID(DiagnosticsEngine::Error,
-                                "cannot mangle a local inside this block yet");
-      Diags.Report(BD->getLocation(), DiagID);
-
-      // FIXME: This is completely, utterly, wrong; see ItaniumMangle
-      // for how this should be done.
-      Out << "__block_invoke" << Context.getBlockId(BD, false);
-      Out << '@';
+      auto Discriminate =
+          [](StringRef Name, const unsigned Discriminator,
+             const unsigned ParameterDiscriminator) -> std::string {
+        std::string Buffer;
+        llvm::raw_string_ostream Stream(Buffer);
+        Stream << Name;
+        if (Discriminator)
+          Stream << '_' << Discriminator;
+        if (ParameterDiscriminator)
+          Stream << '_' << ParameterDiscriminator;
+        return Stream.str();
+      };
+
+      unsigned Discriminator = BD->getBlockManglingNumber();
+      if (!Discriminator)
+        Discriminator = Context.getBlockId(BD, /*Local=*/false);
+
+      // Mangle the parameter position as a discriminator to deal with unnamed
+      // parameters.  Rather than mangling the unqualified parameter name,
+      // always use the position to give a uniform mangling.
+      unsigned ParameterDiscriminator = 0;
+      if (const auto *MC = BD->getBlockManglingContextDecl())
+        if (const auto *P = dyn_cast(MC))
+          if (const auto *F = dyn_cast(P->getDeclContext()))
+            ParameterDiscriminator =
+                F->getNumParams() - P->getFunctionScopeIndex();
+
+      DC = getEffectiveDeclContext(BD);
+
+      Out << '?';
+      mangleSourceName(Discriminate("_block_invoke", Discriminator,
+                                    ParameterDiscriminator));
+      // If we have a block mangling context, encode that now.  This allows us
+      // to discriminate between named static data initializers in the same
+      // scope.  This is handled differently from parameters, which use
+      // positions to discriminate between multiple instances.
+      if (const auto *MC = BD->getBlockManglingContextDecl())
+        if (!isa(MC))
+          if (const auto *ND = dyn_cast(MC))
+            mangleUnqualifiedName(ND);
+      // MS ABI and Itanium manglings are in inverted scopes.  In the case of a
+      // RecordDecl, mangle the entire scope hierachy at this point rather than
+      // just the unqualified name to get the ordering correct.
+      if (const auto *RD = dyn_cast(DC))
+        mangleName(RD);
+      else
+        Out << '@';
+      // void __cdecl
+      Out << "YAX";
+      // struct __block_literal *
+      Out << 'P';
+      // __ptr64
+      if (PointersAre64Bit)
+        Out << 'E';
+      Out << 'A';
+      mangleArtificalTagType(TTK_Struct,
+                             Discriminate("__block_literal", Discriminator,
+                                          ParameterDiscriminator));
+      Out << "@Z";
+
+      // If the effective context was a Record, we have fully mangled the
+      // qualified name and do not need to continue.
+      if (isa(DC))
+        break;
+      continue;
     } else if (const ObjCMethodDecl *Method = dyn_cast(DC)) {
       mangleObjCMethodName(Method);
     } else if (isa(DC)) {
@@ -1689,6 +1744,8 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
   //                 ::= _N # bool
   //                     _O # 
   //                 ::= _T # __float80 (Intel)
+  //                 ::= _S # char16_t
+  //                 ::= _U # char32_t
   //                 ::= _W # wchar_t
   //                 ::= _Z # __float80 (Digital Mars)
   switch (T->getKind()) {
@@ -2065,7 +2122,7 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) {
   switch (CC) {
     default:
       llvm_unreachable("Unsupported CC for mangling");
-    case CC_X86_64Win64:
+    case CC_Win64:
     case CC_X86_64SysV:
     case CC_C: Out << 'A'; break;
     case CC_X86Pascal: Out << 'C'; break;
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/NestedNameSpecifier.cpp b/interpreter/llvm/src/tools/clang/lib/AST/NestedNameSpecifier.cpp
index 514c7c9f5b338..e2e0dbeec0dd3 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/NestedNameSpecifier.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/NestedNameSpecifier.cpp
@@ -290,6 +290,7 @@ NestedNameSpecifier::print(raw_ostream &OS,
   case TypeSpecWithTemplate:
     OS << "template ";
     // Fall through to print the type.
+    LLVM_FALLTHROUGH;
 
   case TypeSpec: {
     const Type *T = getAsType();
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/ODRHash.cpp b/interpreter/llvm/src/tools/clang/lib/AST/ODRHash.cpp
index f4d314a6dd0d2..121724a731526 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/ODRHash.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/ODRHash.cpp
@@ -81,9 +81,83 @@ void ODRHash::AddDeclarationName(DeclarationName Name) {
   }
 }
 
-void ODRHash::AddNestedNameSpecifier(const NestedNameSpecifier *NNS) {}
-void ODRHash::AddTemplateName(TemplateName Name) {}
-void ODRHash::AddTemplateArgument(TemplateArgument TA) {}
+void ODRHash::AddNestedNameSpecifier(const NestedNameSpecifier *NNS) {
+  assert(NNS && "Expecting non-null pointer.");
+  const auto *Prefix = NNS->getPrefix();
+  AddBoolean(Prefix);
+  if (Prefix) {
+    AddNestedNameSpecifier(Prefix);
+  }
+  auto Kind = NNS->getKind();
+  ID.AddInteger(Kind);
+  switch (Kind) {
+  case NestedNameSpecifier::Identifier:
+    AddIdentifierInfo(NNS->getAsIdentifier());
+    break;
+  case NestedNameSpecifier::Namespace:
+    AddDecl(NNS->getAsNamespace());
+    break;
+  case NestedNameSpecifier::NamespaceAlias:
+    AddDecl(NNS->getAsNamespaceAlias());
+    break;
+  case NestedNameSpecifier::TypeSpec:
+  case NestedNameSpecifier::TypeSpecWithTemplate:
+    AddType(NNS->getAsType());
+    break;
+  case NestedNameSpecifier::Global:
+  case NestedNameSpecifier::Super:
+    break;
+  }
+}
+
+void ODRHash::AddTemplateName(TemplateName Name) {
+  auto Kind = Name.getKind();
+  ID.AddInteger(Kind);
+
+  switch (Kind) {
+  case TemplateName::Template:
+    AddDecl(Name.getAsTemplateDecl());
+    break;
+  // TODO: Support these cases.
+  case TemplateName::OverloadedTemplate:
+  case TemplateName::QualifiedTemplate:
+  case TemplateName::DependentTemplate:
+  case TemplateName::SubstTemplateTemplateParm:
+  case TemplateName::SubstTemplateTemplateParmPack:
+    break;
+  }
+}
+
+void ODRHash::AddTemplateArgument(TemplateArgument TA) {
+  const auto Kind = TA.getKind();
+  ID.AddInteger(Kind);
+
+  switch (Kind) {
+    case TemplateArgument::Null:
+      llvm_unreachable("Expected valid TemplateArgument");
+    case TemplateArgument::Type:
+      AddQualType(TA.getAsType());
+      break;
+    case TemplateArgument::Declaration:
+    case TemplateArgument::NullPtr:
+    case TemplateArgument::Integral:
+      break;
+    case TemplateArgument::Template:
+    case TemplateArgument::TemplateExpansion:
+      AddTemplateName(TA.getAsTemplateOrTemplatePattern());
+      break;
+    case TemplateArgument::Expression:
+      AddStmt(TA.getAsExpr());
+      break;
+    case TemplateArgument::Pack:
+      ID.AddInteger(TA.pack_size());
+      for (auto SubTA : TA.pack_elements()) {
+        AddTemplateArgument(SubTA);
+      }
+      break;
+  }
+}
+
 void ODRHash::AddTemplateParameterList(const TemplateParameterList *TPL) {}
 
 void ODRHash::clear() {
@@ -154,6 +228,13 @@ class ODRDeclVisitor : public ConstDeclVisitor {
     Hash.AddQualType(T);
   }
 
+  void AddDecl(const Decl *D) {
+    Hash.AddBoolean(D);
+    if (D) {
+      Hash.AddDecl(D);
+    }
+  }
+
   void Visit(const Decl *D) {
     ID.AddInteger(D->getKind());
     Inherited::Visit(D);
@@ -165,10 +246,23 @@ class ODRDeclVisitor : public ConstDeclVisitor {
   }
 
   void VisitValueDecl(const ValueDecl *D) {
-    AddQualType(D->getType());
+    if (!isa(D)) {
+      AddQualType(D->getType());
+    }
     Inherited::VisitValueDecl(D);
   }
 
+  void VisitVarDecl(const VarDecl *D) {
+    Hash.AddBoolean(D->isStaticLocal());
+    Hash.AddBoolean(D->isConstexpr());
+    const bool HasInit = D->hasInit();
+    Hash.AddBoolean(HasInit);
+    if (HasInit) {
+      AddStmt(D->getInit());
+    }
+    Inherited::VisitVarDecl(D);
+  }
+
   void VisitParmVarDecl(const ParmVarDecl *D) {
     // TODO: Handle default arguments.
     Inherited::VisitParmVarDecl(D);
@@ -213,6 +307,8 @@ class ODRDeclVisitor : public ConstDeclVisitor {
       Hash.AddSubDecl(Param);
     }
 
+    AddQualType(D->getReturnType());
+
     Inherited::VisitFunctionDecl(D);
   }
 
@@ -236,6 +332,16 @@ class ODRDeclVisitor : public ConstDeclVisitor {
   void VisitTypeAliasDecl(const TypeAliasDecl *D) {
     Inherited::VisitTypeAliasDecl(D);
   }
+
+  void VisitFriendDecl(const FriendDecl *D) {
+    TypeSourceInfo *TSI = D->getFriendType();
+    Hash.AddBoolean(TSI);
+    if (TSI) {
+      AddQualType(TSI->getType());
+    } else {
+      AddDecl(D->getFriendDecl());
+    }
+  }
 };
 
 // Only allow a small portion of Decl's to be processed.  Remove this once
@@ -248,11 +354,15 @@ bool ODRHash::isWhitelistedDecl(const Decl *D, const CXXRecordDecl *Parent) {
     default:
       return false;
     case Decl::AccessSpec:
+    case Decl::CXXConstructor:
+    case Decl::CXXDestructor:
     case Decl::CXXMethod:
     case Decl::Field:
+    case Decl::Friend:
     case Decl::StaticAssert:
     case Decl::TypeAlias:
     case Decl::Typedef:
+    case Decl::Var:
       return true;
   }
 }
@@ -268,8 +378,12 @@ void ODRHash::AddCXXRecordDecl(const CXXRecordDecl *Record) {
   assert(Record && Record->hasDefinition() &&
          "Expected non-null record to be a definition.");
 
-  if (isa(Record)) {
-    return;
+  const DeclContext *DC = Record;
+  while (DC) {
+    if (isa(DC)) {
+      return;
+    }
+    DC = DC->getParent();
   }
 
   AddDecl(Record);
@@ -335,6 +449,27 @@ class ODRTypeVisitor : public TypeVisitor {
     Hash.AddQualType(T);
   }
 
+  void AddType(const Type *T) {
+    Hash.AddBoolean(T);
+    if (T) {
+      Hash.AddType(T);
+    }
+  }
+
+  void AddNestedNameSpecifier(const NestedNameSpecifier *NNS) {
+    Hash.AddBoolean(NNS);
+    if (NNS) {
+      Hash.AddNestedNameSpecifier(NNS);
+    }
+  }
+
+  void AddIdentifierInfo(const IdentifierInfo *II) {
+    Hash.AddBoolean(II);
+    if (II) {
+      Hash.AddIdentifierInfo(II);
+    }
+  }
+
   void VisitQualifiers(Qualifiers Quals) {
     ID.AddInteger(Quals.getAsOpaqueValue());
   }
@@ -411,9 +546,67 @@ class ODRTypeVisitor : public TypeVisitor {
 
   void VisitTypedefType(const TypedefType *T) {
     AddDecl(T->getDecl());
-    AddQualType(T->getDecl()->getUnderlyingType().getCanonicalType());
+    QualType UnderlyingType = T->getDecl()->getUnderlyingType();
+    VisitQualifiers(UnderlyingType.getQualifiers());
+    while (const TypedefType *Underlying =
+               dyn_cast(UnderlyingType.getTypePtr())) {
+      UnderlyingType = Underlying->getDecl()->getUnderlyingType();
+    }
+    AddType(UnderlyingType.getTypePtr());
     VisitType(T);
   }
+
+  void VisitTagType(const TagType *T) {
+    AddDecl(T->getDecl());
+    VisitType(T);
+  }
+
+  void VisitRecordType(const RecordType *T) { VisitTagType(T); }
+  void VisitEnumType(const EnumType *T) { VisitTagType(T); }
+
+  void VisitTypeWithKeyword(const TypeWithKeyword *T) {
+    ID.AddInteger(T->getKeyword());
+    VisitType(T);
+  };
+
+  void VisitDependentNameType(const DependentNameType *T) {
+    AddNestedNameSpecifier(T->getQualifier());
+    AddIdentifierInfo(T->getIdentifier());
+    VisitTypeWithKeyword(T);
+  }
+
+  void VisitDependentTemplateSpecializationType(
+      const DependentTemplateSpecializationType *T) {
+    AddIdentifierInfo(T->getIdentifier());
+    AddNestedNameSpecifier(T->getQualifier());
+    ID.AddInteger(T->getNumArgs());
+    for (const auto &TA : T->template_arguments()) {
+      Hash.AddTemplateArgument(TA);
+    }
+    VisitTypeWithKeyword(T);
+  }
+
+  void VisitElaboratedType(const ElaboratedType *T) {
+    AddNestedNameSpecifier(T->getQualifier());
+    AddQualType(T->getNamedType());
+    VisitTypeWithKeyword(T);
+  }
+
+  void VisitTemplateSpecializationType(const TemplateSpecializationType *T) {
+    ID.AddInteger(T->getNumArgs());
+    for (const auto &TA : T->template_arguments()) {
+      Hash.AddTemplateArgument(TA);
+    }
+    Hash.AddTemplateName(T->getTemplateName());
+    VisitType(T);
+  }
+
+  void VisitTemplateTypeParmType(const TemplateTypeParmType *T) {
+    ID.AddInteger(T->getDepth());
+    ID.AddInteger(T->getIndex());
+    Hash.AddBoolean(T->isParameterPack());
+    AddDecl(T->getDecl());
+  }
 };
 
 void ODRHash::AddType(const Type *T) {
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/OpenMPClause.cpp b/interpreter/llvm/src/tools/clang/lib/AST/OpenMPClause.cpp
index 77470a9b76d07..2c4d159a1bc82 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/OpenMPClause.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/OpenMPClause.cpp
@@ -46,6 +46,8 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
     return static_cast(C);
   case OMPC_reduction:
     return static_cast(C);
+  case OMPC_task_reduction:
+    return static_cast(C);
   case OMPC_linear:
     return static_cast(C);
   case OMPC_if:
@@ -112,6 +114,8 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C)
     return static_cast(C);
   case OMPC_reduction:
     return static_cast(C);
+  case OMPC_task_reduction:
+    return static_cast(C);
   case OMPC_linear:
     return static_cast(C);
   case OMPC_schedule:
@@ -505,6 +509,59 @@ OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPReductionClause(N);
 }
 
+void OMPTaskReductionClause::setPrivates(ArrayRef Privates) {
+  assert(Privates.size() == varlist_size() &&
+         "Number of private copies is not the same as the preallocated buffer");
+  std::copy(Privates.begin(), Privates.end(), varlist_end());
+}
+
+void OMPTaskReductionClause::setLHSExprs(ArrayRef LHSExprs) {
+  assert(
+      LHSExprs.size() == varlist_size() &&
+      "Number of LHS expressions is not the same as the preallocated buffer");
+  std::copy(LHSExprs.begin(), LHSExprs.end(), getPrivates().end());
+}
+
+void OMPTaskReductionClause::setRHSExprs(ArrayRef RHSExprs) {
+  assert(
+      RHSExprs.size() == varlist_size() &&
+      "Number of RHS expressions is not the same as the preallocated buffer");
+  std::copy(RHSExprs.begin(), RHSExprs.end(), getLHSExprs().end());
+}
+
+void OMPTaskReductionClause::setReductionOps(ArrayRef ReductionOps) {
+  assert(ReductionOps.size() == varlist_size() && "Number of task reduction "
+                                                  "expressions is not the same "
+                                                  "as the preallocated buffer");
+  std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end());
+}
+
+OMPTaskReductionClause *OMPTaskReductionClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef VL,
+    NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
+    ArrayRef Privates, ArrayRef LHSExprs,
+    ArrayRef RHSExprs, ArrayRef ReductionOps, Stmt *PreInit,
+    Expr *PostUpdate) {
+  void *Mem = C.Allocate(totalSizeToAlloc(5 * VL.size()));
+  OMPTaskReductionClause *Clause = new (Mem) OMPTaskReductionClause(
+      StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo);
+  Clause->setVarRefs(VL);
+  Clause->setPrivates(Privates);
+  Clause->setLHSExprs(LHSExprs);
+  Clause->setRHSExprs(RHSExprs);
+  Clause->setReductionOps(ReductionOps);
+  Clause->setPreInitStmt(PreInit);
+  Clause->setPostUpdateExpr(PostUpdate);
+  return Clause;
+}
+
+OMPTaskReductionClause *OMPTaskReductionClause::CreateEmpty(const ASTContext &C,
+                                                            unsigned N) {
+  void *Mem = C.Allocate(totalSizeToAlloc(5 * N));
+  return new (Mem) OMPTaskReductionClause(N);
+}
+
 OMPFlushClause *OMPFlushClause::Create(const ASTContext &C,
                                        SourceLocation StartLoc,
                                        SourceLocation LParenLoc,
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/RecordLayoutBuilder.cpp b/interpreter/llvm/src/tools/clang/lib/AST/RecordLayoutBuilder.cpp
index cf981be0a4dde..c0b9cadca4227 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -3073,6 +3073,41 @@ uint64_t ASTContext::getFieldOffset(const ValueDecl *VD) const {
   return OffsetInBits;
 }
 
+uint64_t ASTContext::lookupFieldBitOffset(const ObjCInterfaceDecl *OID,
+                                          const ObjCImplementationDecl *ID,
+                                          const ObjCIvarDecl *Ivar) const {
+  const ObjCInterfaceDecl *Container = Ivar->getContainingInterface();
+
+  // FIXME: We should eliminate the need to have ObjCImplementationDecl passed
+  // in here; it should never be necessary because that should be the lexical
+  // decl context for the ivar.
+
+  // If we know have an implementation (and the ivar is in it) then
+  // look up in the implementation layout.
+  const ASTRecordLayout *RL;
+  if (ID && declaresSameEntity(ID->getClassInterface(), Container))
+    RL = &getASTObjCImplementationLayout(ID);
+  else
+    RL = &getASTObjCInterfaceLayout(Container);
+
+  // Compute field index.
+  //
+  // FIXME: The index here is closely tied to how ASTContext::getObjCLayout is
+  // implemented. This should be fixed to get the information from the layout
+  // directly.
+  unsigned Index = 0;
+
+  for (const ObjCIvarDecl *IVD = Container->all_declared_ivar_begin();
+       IVD; IVD = IVD->getNextIvar()) {
+    if (Ivar == IVD)
+      break;
+    ++Index;
+  }
+  assert(Index < RL->getFieldCount() && "Ivar is not inside record layout!");
+
+  return RL->getFieldOffset(Index);
+}
+
 /// getObjCLayout - Get or compute information about the layout of the
 /// given interface.
 ///
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/Stmt.cpp b/interpreter/llvm/src/tools/clang/lib/AST/Stmt.cpp
index 69e65f558f899..2367cadf645c6 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/Stmt.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/Stmt.cpp
@@ -1112,7 +1112,7 @@ void CapturedStmt::setCapturedRegionKind(CapturedRegionKind Kind) {
 
 bool CapturedStmt::capturesVariable(const VarDecl *Var) const {
   for (const auto &I : captures()) {
-    if (!I.capturesVariable())
+    if (!I.capturesVariable() && !I.capturesVariableByCopy())
       continue;
 
     // This does not handle variable redeclarations. This should be
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/StmtCXX.cpp b/interpreter/llvm/src/tools/clang/lib/AST/StmtCXX.cpp
index aade13ed3bd4a..666f5dcc9d978 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/StmtCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/StmtCXX.cpp
@@ -88,7 +88,7 @@ const VarDecl *CXXForRangeStmt::getLoopVariable() const {
 }
 
 CoroutineBodyStmt *CoroutineBodyStmt::Create(
-    const ASTContext &C, CoroutineBodyStmt::CtorArgs const& Args) {
+    const ASTContext &C, CoroutineBodyStmt::CtorArgs const &Args) {
   std::size_t Size = totalSizeToAlloc(
       CoroutineBodyStmt::FirstParamMove + Args.ParamMoves.size());
 
@@ -96,6 +96,20 @@ CoroutineBodyStmt *CoroutineBodyStmt::Create(
   return new (Mem) CoroutineBodyStmt(Args);
 }
 
+CoroutineBodyStmt *CoroutineBodyStmt::Create(const ASTContext &C, EmptyShell,
+                                             unsigned NumParams) {
+  std::size_t Size = totalSizeToAlloc(
+      CoroutineBodyStmt::FirstParamMove + NumParams);
+
+  void *Mem = C.Allocate(Size, alignof(CoroutineBodyStmt));
+  auto *Result = new (Mem) CoroutineBodyStmt(CtorArgs());
+  Result->NumParams = NumParams;
+  auto *ParamBegin = Result->getStoredStmts() + SubStmt::FirstParamMove;
+  std::uninitialized_fill(ParamBegin, ParamBegin + NumParams,
+                          static_cast(nullptr));
+  return Result;
+}
+
 CoroutineBodyStmt::CoroutineBodyStmt(CoroutineBodyStmt::CtorArgs const &Args)
     : Stmt(CoroutineBodyStmtClass), NumParams(Args.ParamMoves.size()) {
   Stmt **SubStmts = getStoredStmts();
@@ -108,6 +122,8 @@ CoroutineBodyStmt::CoroutineBodyStmt(CoroutineBodyStmt::CtorArgs const &Args)
   SubStmts[CoroutineBodyStmt::Allocate] = Args.Allocate;
   SubStmts[CoroutineBodyStmt::Deallocate] = Args.Deallocate;
   SubStmts[CoroutineBodyStmt::ReturnValue] = Args.ReturnValue;
+  SubStmts[CoroutineBodyStmt::ResultDecl] = Args.ResultDecl;
+  SubStmts[CoroutineBodyStmt::ReturnStmt] = Args.ReturnStmt;
   SubStmts[CoroutineBodyStmt::ReturnStmtOnAllocFailure] =
       Args.ReturnStmtOnAllocFailure;
   std::copy(Args.ParamMoves.begin(), Args.ParamMoves.end(),
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/StmtOpenMP.cpp b/interpreter/llvm/src/tools/clang/lib/AST/StmtOpenMP.cpp
index cccb2f075b657..1dcb4fd5196be 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/StmtOpenMP.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/StmtOpenMP.cpp
@@ -522,23 +522,28 @@ OMPTaskwaitDirective *OMPTaskwaitDirective::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPTaskwaitDirective();
 }
 
-OMPTaskgroupDirective *OMPTaskgroupDirective::Create(const ASTContext &C,
-                                                     SourceLocation StartLoc,
-                                                     SourceLocation EndLoc,
-                                                     Stmt *AssociatedStmt) {
-  unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective), alignof(Stmt *));
+OMPTaskgroupDirective *OMPTaskgroupDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef Clauses, Stmt *AssociatedStmt) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective) +
+                                    sizeof(OMPClause *) * Clauses.size(),
+                                alignof(Stmt *));
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   OMPTaskgroupDirective *Dir =
-      new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc);
+      new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setClauses(Clauses);
   return Dir;
 }
 
 OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C,
+                                                          unsigned NumClauses,
                                                           EmptyShell) {
-  unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective), alignof(Stmt *));
+  unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective) +
+                                    sizeof(OMPClause *) * NumClauses,
+                                alignof(Stmt *));
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  return new (Mem) OMPTaskgroupDirective();
+  return new (Mem) OMPTaskgroupDirective(NumClauses);
 }
 
 OMPCancellationPointDirective *OMPCancellationPointDirective::Create(
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/StmtPrinter.cpp b/interpreter/llvm/src/tools/clang/lib/AST/StmtPrinter.cpp
index 21f5259c3ca89..5ebaa32b49c80 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/StmtPrinter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/StmtPrinter.cpp
@@ -836,6 +836,29 @@ void OMPClausePrinter::VisitOMPReductionClause(OMPReductionClause *Node) {
   }
 }
 
+void OMPClausePrinter::VisitOMPTaskReductionClause(
+    OMPTaskReductionClause *Node) {
+  if (!Node->varlist_empty()) {
+    OS << "task_reduction(";
+    NestedNameSpecifier *QualifierLoc =
+        Node->getQualifierLoc().getNestedNameSpecifier();
+    OverloadedOperatorKind OOK =
+        Node->getNameInfo().getName().getCXXOverloadedOperator();
+    if (QualifierLoc == nullptr && OOK != OO_None) {
+      // Print reduction identifier in C format
+      OS << getOperatorSpelling(OOK);
+    } else {
+      // Use C++ format
+      if (QualifierLoc != nullptr)
+        QualifierLoc->print(OS, Policy);
+      OS << Node->getNameInfo();
+    }
+    OS << ":";
+    VisitOMPClauseList(Node, ' ');
+    OS << ")";
+  }
+}
+
 void OMPClausePrinter::VisitOMPLinearClause(OMPLinearClause *Node) {
   if (!Node->varlist_empty()) {
     OS << "linear";
@@ -1081,7 +1104,7 @@ void StmtPrinter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *Node) {
 }
 
 void StmtPrinter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *Node) {
-  Indent() << "#pragma omp taskgroup";
+  Indent() << "#pragma omp taskgroup ";
   PrintOMPExecutableDirective(Node);
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/StmtProfile.cpp b/interpreter/llvm/src/tools/clang/lib/AST/StmtProfile.cpp
index f1fbe2806b5d0..7ec0d1d5e0176 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/StmtProfile.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/StmtProfile.cpp
@@ -549,6 +549,30 @@ void OMPClauseProfiler::VisitOMPReductionClause(
       Profiler->VisitStmt(E);
   }
 }
+void OMPClauseProfiler::VisitOMPTaskReductionClause(
+    const OMPTaskReductionClause *C) {
+  Profiler->VisitNestedNameSpecifier(
+      C->getQualifierLoc().getNestedNameSpecifier());
+  Profiler->VisitName(C->getNameInfo().getName());
+  VisitOMPClauseList(C);
+  VistOMPClauseWithPostUpdate(C);
+  for (auto *E : C->privates()) {
+    if (E)
+      Profiler->VisitStmt(E);
+  }
+  for (auto *E : C->lhs_exprs()) {
+    if (E)
+      Profiler->VisitStmt(E);
+  }
+  for (auto *E : C->rhs_exprs()) {
+    if (E)
+      Profiler->VisitStmt(E);
+  }
+  for (auto *E : C->reduction_ops()) {
+    if (E)
+      Profiler->VisitStmt(E);
+  }
+}
 void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) {
   VisitOMPClauseList(C);
   VistOMPClauseWithPostUpdate(C);
@@ -1364,6 +1388,15 @@ static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S,
   llvm_unreachable("Invalid overloaded operator expression");
 }
 
+#if defined(_MSC_VER)
+#if _MSC_VER == 1911
+// Work around https://developercommunity.visualstudio.com/content/problem/84002/clang-cl-when-built-with-vc-2017-crashes-cause-vc.html
+// MSVC 2017 update 3 miscompiles this function, and a clang built with it
+// will crash in stage 2 of a bootstrap build.
+#pragma optimize("", off)
+#endif
+#endif
+
 void StmtProfiler::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) {
   if (S->isTypeDependent()) {
     // Type-dependent operator calls are profiled like their underlying
@@ -1396,6 +1429,12 @@ void StmtProfiler::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) {
   ID.AddInteger(S->getOperator());
 }
 
+#if defined(_MSC_VER)
+#if _MSC_VER == 1911
+#pragma optimize("", on)
+#endif
+#endif
+
 void StmtProfiler::VisitCXXMemberCallExpr(const CXXMemberCallExpr *S) {
   VisitCallExpr(S);
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/Type.cpp b/interpreter/llvm/src/tools/clang/lib/AST/Type.cpp
index 22d52bcd3f315..d21781dc38992 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/Type.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/Type.cpp
@@ -1344,7 +1344,7 @@ Optional> Type::getObjCSubstitutions(
   } else if (getAs()) {
     ASTContext &ctx = dc->getParentASTContext();
     objectType = ctx.getObjCObjectType(ctx.ObjCBuiltinIdTy, { }, { })
-                   ->castAs();;
+                   ->castAs();
   } else {
     objectType = getAs();
   }
@@ -2313,6 +2313,15 @@ bool Type::isAlignValT() const {
   return false;
 }
 
+bool Type::isStdByteType() const {
+  if (auto *ET = getAs()) {
+    auto *II = ET->getDecl()->getIdentifier();
+    if (II && II->isStr("byte") && ET->getDecl()->isInStdNamespace())
+      return true;
+  }
+  return false;
+}
+
 bool Type::isPromotableIntegerType() const {
   if (const BuiltinType *BT = getAs())
     switch (BT->getKind()) {
@@ -2630,7 +2639,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
   case CC_X86ThisCall: return "thiscall";
   case CC_X86Pascal: return "pascal";
   case CC_X86VectorCall: return "vectorcall";
-  case CC_X86_64Win64: return "ms_abi";
+  case CC_Win64: return "ms_abi";
   case CC_X86_64SysV: return "sysv_abi";
   case CC_X86RegCall : return "regcall";
   case CC_AAPCS: return "aapcs";
@@ -3023,6 +3032,7 @@ bool AttributedType::isQualifier() const {
   case AttributedType::attr_sptr:
   case AttributedType::attr_uptr:
   case AttributedType::attr_objc_kindof:
+  case AttributedType::attr_ns_returns_retained:
     return false;
   }
   llvm_unreachable("bad attributed type kind");
@@ -3056,6 +3066,7 @@ bool AttributedType::isCallingConv() const {
   case attr_objc_inert_unsafe_unretained:
   case attr_noreturn:
   case attr_nonnull:
+  case attr_ns_returns_retained:
   case attr_nullable:
   case attr_null_unspecified:
   case attr_objc_kindof:
@@ -3531,7 +3542,7 @@ Optional Type::getNullability(const ASTContext &context) const
   } while (true);
 }
 
-bool Type::canHaveNullability() const {
+bool Type::canHaveNullability(bool ResultIfUnknown) const {
   QualType type = getCanonicalTypeInternal();
   
   switch (type->getTypeClass()) {
@@ -3559,7 +3570,8 @@ bool Type::canHaveNullability() const {
   case Type::SubstTemplateTypeParmPack:
   case Type::DependentName:
   case Type::DependentTemplateSpecialization:
-    return true;
+  case Type::Auto:
+    return ResultIfUnknown;
 
   // Dependent template specializations can instantiate to pointer
   // types unless they're known to be specializations of a class
@@ -3571,12 +3583,7 @@ bool Type::canHaveNullability() const {
       if (isa(templateDecl))
         return false;
     }
-    return true;
-
-  // auto is considered dependent when it isn't deduced.
-  case Type::Auto:
-  case Type::DeducedTemplateSpecialization:
-    return !cast(type.getTypePtr())->isDeduced();
+    return ResultIfUnknown;
 
   case Type::Builtin:
     switch (cast(type.getTypePtr())->getKind()) {
@@ -3595,7 +3602,7 @@ bool Type::canHaveNullability() const {
     case BuiltinType::PseudoObject:
     case BuiltinType::UnknownAny:
     case BuiltinType::ARCUnbridgedCast:
-      return true;
+      return ResultIfUnknown;
 
     case BuiltinType::Void:
     case BuiltinType::ObjCId:
@@ -3614,6 +3621,7 @@ bool Type::canHaveNullability() const {
     case BuiltinType::OMPArraySection:
       return false;
     }
+    llvm_unreachable("unknown builtin type");
 
   // Non-pointer types.
   case Type::Complex:
@@ -3629,6 +3637,7 @@ bool Type::canHaveNullability() const {
   case Type::FunctionProto:
   case Type::FunctionNoProto:
   case Type::Record:
+  case Type::DeducedTemplateSpecialization:
   case Type::Enum:
   case Type::InjectedClassName:
   case Type::PackExpansion:
diff --git a/interpreter/llvm/src/tools/clang/lib/AST/TypePrinter.cpp b/interpreter/llvm/src/tools/clang/lib/AST/TypePrinter.cpp
index 2be14ab621230..15c63bf4ed988 100644
--- a/interpreter/llvm/src/tools/clang/lib/AST/TypePrinter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/AST/TypePrinter.cpp
@@ -104,6 +104,7 @@ namespace {
     void printAfter(QualType T, raw_ostream &OS);
     void AppendScope(DeclContext *DC, raw_ostream &OS);
     void printTag(TagDecl *T, raw_ostream &OS);
+    void printFunctionAfter(const FunctionType::ExtInfo &Info, raw_ostream &OS);
 #define ABSTRACT_TYPE(CLASS, PARENT)
 #define TYPE(CLASS, PARENT) \
     void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \
@@ -685,6 +686,36 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T,
 
   FunctionType::ExtInfo Info = T->getExtInfo();
 
+  printFunctionAfter(Info, OS);
+
+  if (unsigned quals = T->getTypeQuals()) {
+    OS << ' ';
+    AppendTypeQualList(OS, quals, Policy.Restrict);
+  }
+
+  switch (T->getRefQualifier()) {
+  case RQ_None:
+    break;
+
+  case RQ_LValue:
+    OS << " &";
+    break;
+
+  case RQ_RValue:
+    OS << " &&";
+    break;
+  }
+  T->printExceptionSpecification(OS, Policy);
+
+  if (T->hasTrailingReturn()) {
+    OS << " -> ";
+    print(T->getReturnType(), OS, StringRef());
+  } else
+    printAfter(T->getReturnType(), OS);
+}
+
+void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
+                                     raw_ostream &OS) {
   if (!InsideCCAttribute) {
     switch (Info.getCC()) {
     case CC_C:
@@ -720,7 +751,7 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T,
     case CC_IntelOclBicc:
       OS << " __attribute__((intel_ocl_bicc))";
       break;
-    case CC_X86_64Win64:
+    case CC_Win64:
       OS << " __attribute__((ms_abi))";
       break;
     case CC_X86_64SysV:
@@ -747,36 +778,13 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T,
 
   if (Info.getNoReturn())
     OS << " __attribute__((noreturn))";
+  if (Info.getProducesResult())
+    OS << " __attribute__((ns_returns_retained))";
   if (Info.getRegParm())
     OS << " __attribute__((regparm ("
        << Info.getRegParm() << ")))";
   if (Info.getNoCallerSavedRegs())
-    OS << "__attribute__((no_caller_saved_registers))";
-
-  if (unsigned quals = T->getTypeQuals()) {
-    OS << ' ';
-    AppendTypeQualList(OS, quals, Policy.Restrict);
-  }
-
-  switch (T->getRefQualifier()) {
-  case RQ_None:
-    break;
-    
-  case RQ_LValue:
-    OS << " &";
-    break;
-    
-  case RQ_RValue:
-    OS << " &&";
-    break;
-  }
-  T->printExceptionSpecification(OS, Policy);
-
-  if (T->hasTrailingReturn()) {
-    OS << " -> ";
-    print(T->getReturnType(), OS, StringRef());
-  } else
-    printAfter(T->getReturnType(), OS);
+    OS << " __attribute__((no_caller_saved_registers))";
 }
 
 void TypePrinter::printFunctionNoProtoBefore(const FunctionNoProtoType *T, 
@@ -795,8 +803,7 @@ void TypePrinter::printFunctionNoProtoAfter(const FunctionNoProtoType *T,
   SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false);
   
   OS << "()";
-  if (T->getNoReturnAttr())
-    OS << " __attribute__((noreturn))";
+  printFunctionAfter(T->getExtInfo(), OS);
   printAfter(T->getReturnType(), OS);
 }
 
@@ -1270,6 +1277,12 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   if (T->getAttrKind() == AttributedType::attr_objc_inert_unsafe_unretained)
     return;
 
+  // Don't print ns_returns_retained unless it had an effect.
+  if (T->getAttrKind() == AttributedType::attr_ns_returns_retained &&
+      !T->getEquivalentType()->castAs()
+                             ->getExtInfo().getProducesResult())
+    return;
+
   // Print nullability type specifiers that occur after
   if (T->getAttrKind() == AttributedType::attr_nonnull ||
       T->getAttrKind() == AttributedType::attr_nullable ||
@@ -1361,6 +1374,10 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
     OS << ')';
     break;
 
+  case AttributedType::attr_ns_returns_retained:
+    OS << "ns_returns_retained";
+    break;
+
   // FIXME: When Sema learns to form this AttributedType, avoid printing the
   // attribute again in printFunctionProtoAfter.
   case AttributedType::attr_noreturn: OS << "noreturn"; break;
@@ -1668,9 +1685,9 @@ void Qualifiers::print(raw_ostream &OS, const PrintingPolicy& Policy,
         OS << "__shared";
         break;
       default:
-        assert(addrspace >= LangAS::Count);
+        assert(addrspace >= LangAS::FirstTargetAddressSpace);
         OS << "__attribute__((address_space(";
-        OS << addrspace - LangAS::Count;
+        OS << addrspace - LangAS::FirstTargetAddressSpace;
         OS << ")))";
     }
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Diagnostics.cpp b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Diagnostics.cpp
index 787b780c4243a..9cddcf93caef5 100644
--- a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Diagnostics.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Diagnostics.cpp
@@ -118,8 +118,8 @@ static StringRef errorTypeToFormatString(Diagnostics::ErrorType Type) {
     return "Malformed bind() expression.";
   case Diagnostics::ET_ParserTrailingCode:
     return "Expected end of code.";
-  case Diagnostics::ET_ParserUnsignedError:
-    return "Error parsing unsigned token: <$0>";
+  case Diagnostics::ET_ParserNumberError:
+    return "Error parsing numeric literal: <$0>";
   case Diagnostics::ET_ParserOverloadedType:
     return "Input value has unresolved overloaded type: $0";
 
diff --git a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h
index fb6b349a811c7..c557ff162691a 100644
--- a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h
+++ b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h
@@ -65,6 +65,26 @@ template  struct ArgTypeTraits > {
   }
 };
 
+template <> struct ArgTypeTraits {
+  static bool is(const VariantValue &Value) { return Value.isBoolean(); }
+  static bool get(const VariantValue &Value) {
+    return Value.getBoolean();
+  }
+  static ArgKind getKind() {
+    return ArgKind(ArgKind::AK_Boolean);
+  }
+};
+
+template <> struct ArgTypeTraits {
+  static bool is(const VariantValue &Value) { return Value.isDouble(); }
+  static double get(const VariantValue &Value) {
+    return Value.getDouble();
+  }
+  static ArgKind getKind() {
+    return ArgKind(ArgKind::AK_Double);
+  }
+};
+
 template <> struct ArgTypeTraits {
   static bool is(const VariantValue &Value) { return Value.isUnsigned(); }
   static unsigned get(const VariantValue &Value) {
diff --git a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp
index ce8d0a9a02062..f5bd296689951 100644
--- a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp
@@ -130,8 +130,8 @@ class Parser::CodeTokenizer {
 
     case '0': case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8': case '9':
-      // Parse an unsigned literal.
-      consumeUnsignedLiteral(&Result);
+      // Parse an unsigned and float literal.
+      consumeNumberLiteral(&Result);
       break;
 
     default:
@@ -153,8 +153,16 @@ class Parser::CodeTokenizer {
             break;
           ++TokenLength;
         }
-        Result.Kind = TokenInfo::TK_Ident;
-        Result.Text = Code.substr(0, TokenLength);
+        if (TokenLength == 4 && Code.startswith("true")) {
+          Result.Kind = TokenInfo::TK_Literal;
+          Result.Value = true;
+        } else if (TokenLength == 5 && Code.startswith("false")) {
+          Result.Kind = TokenInfo::TK_Literal;
+          Result.Value = false;
+        } else {
+          Result.Kind = TokenInfo::TK_Ident;
+          Result.Text = Code.substr(0, TokenLength);
+        }
         Code = Code.drop_front(TokenLength);
       } else {
         Result.Kind = TokenInfo::TK_InvalidChar;
@@ -168,8 +176,9 @@ class Parser::CodeTokenizer {
     return Result;
   }
 
-  /// \brief Consume an unsigned literal.
-  void consumeUnsignedLiteral(TokenInfo *Result) {
+  /// \brief Consume an unsigned and float literal.
+  void consumeNumberLiteral(TokenInfo *Result) {
+    bool isFloatingLiteral = false;
     unsigned Length = 1;
     if (Code.size() > 1) {
       // Consume the 'x' or 'b' radix modifier, if present.
@@ -180,20 +189,44 @@ class Parser::CodeTokenizer {
     while (Length < Code.size() && isHexDigit(Code[Length]))
       ++Length;
 
+    // Try to recognize a floating point literal.
+    while (Length < Code.size()) {
+      char c = Code[Length];
+      if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
+        isFloatingLiteral = true;
+        Length++;
+      } else {
+        break;
+      }
+    }
+
     Result->Text = Code.substr(0, Length);
     Code = Code.drop_front(Length);
 
-    unsigned Value;
-    if (!Result->Text.getAsInteger(0, Value)) {
-      Result->Kind = TokenInfo::TK_Literal;
-      Result->Value = Value;
+    if (isFloatingLiteral) {
+      char *end;
+      errno = 0;
+      std::string Text = Result->Text.str();
+      double doubleValue = strtod(Text.c_str(), &end);
+      if (*end == 0 && errno == 0) {
+        Result->Kind = TokenInfo::TK_Literal;
+        Result->Value = doubleValue;
+        return;
+      }
     } else {
-      SourceRange Range;
-      Range.Start = Result->Range.Start;
-      Range.End = currentLocation();
-      Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
-      Result->Kind = TokenInfo::TK_Error;
+      unsigned Value;
+      if (!Result->Text.getAsInteger(0, Value)) {
+        Result->Kind = TokenInfo::TK_Literal;
+        Result->Value = Value;
+        return;
+      }
     }
+
+    SourceRange Range;
+    Range.Start = Result->Range.Start;
+    Range.End = currentLocation();
+    Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
+    Result->Kind = TokenInfo::TK_Error;
   }
 
   /// \brief Consume a string literal.
diff --git a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 26743d86f5e73..031ceb320306d 100644
--- a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -56,20 +56,24 @@ void RegistryMaps::registerMatcher(
   registerMatcher(#name, internal::makeMatcherAutoMarshall(                    \
                              ::clang::ast_matchers::name, #name));
 
+#define REGISTER_MATCHER_OVERLOAD(name)                                        \
+  registerMatcher(#name,                                                       \
+      llvm::make_unique(name##Callbacks))
+
 #define SPECIFIC_MATCHER_OVERLOAD(name, Id)                                    \
   static_cast<::clang::ast_matchers::name##_Type##Id>(                         \
       ::clang::ast_matchers::name)
 
+#define MATCHER_OVERLOAD_ENTRY(name, Id)                                       \
+        internal::makeMatcherAutoMarshall(SPECIFIC_MATCHER_OVERLOAD(name, Id), \
+                                          #name)
+
 #define REGISTER_OVERLOADED_2(name)                                            \
   do {                                                                         \
-    std::unique_ptr Callbacks[] = {                         \
-        internal::makeMatcherAutoMarshall(SPECIFIC_MATCHER_OVERLOAD(name, 0),  \
-                                          #name),                              \
-        internal::makeMatcherAutoMarshall(SPECIFIC_MATCHER_OVERLOAD(name, 1),  \
-                                          #name)};                             \
-    registerMatcher(                                                           \
-        #name,                                                                 \
-        llvm::make_unique(Callbacks));  \
+    std::unique_ptr name##Callbacks[] = {                   \
+        MATCHER_OVERLOAD_ENTRY(name, 0),                                       \
+        MATCHER_OVERLOAD_ENTRY(name, 1)};                                      \
+    REGISTER_MATCHER_OVERLOAD(name);                                           \
   } while (0)
 
 /// \brief Generate a registry map with all the known matchers.
@@ -83,7 +87,6 @@ RegistryMaps::RegistryMaps() {
   // findAll
   //
   // Other:
-  // equals
   // equalsNode
 
   REGISTER_OVERLOADED_2(callee);
@@ -96,6 +99,13 @@ RegistryMaps::RegistryMaps() {
   REGISTER_OVERLOADED_2(references);
   REGISTER_OVERLOADED_2(thisPointerType);
 
+  std::unique_ptr equalsCallbacks[] = {
+      MATCHER_OVERLOAD_ENTRY(equals, 0),
+      MATCHER_OVERLOAD_ENTRY(equals, 1),
+      MATCHER_OVERLOAD_ENTRY(equals, 2),
+  };
+  REGISTER_MATCHER_OVERLOAD(equals);
+
   REGISTER_MATCHER(accessSpecDecl);
   REGISTER_MATCHER(addrLabelExpr);
   REGISTER_MATCHER(alignOfExpr);
diff --git a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp
index f0339ed479cd6..57858d00acb45 100644
--- a/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp
@@ -24,6 +24,10 @@ std::string ArgKind::asString() const {
   switch (getArgKind()) {
   case AK_Matcher:
     return (Twine("Matcher<") + MatcherKind.asStringRef() + ">").str();
+  case AK_Boolean:
+    return "boolean";
+  case AK_Double:
+    return "double";
   case AK_Unsigned:
     return "unsigned";
   case AK_String:
@@ -247,6 +251,14 @@ VariantValue::VariantValue(const VariantValue &Other) : Type(VT_Nothing) {
   *this = Other;
 }
 
+VariantValue::VariantValue(bool Boolean) : Type(VT_Nothing) {
+  setBoolean(Boolean);
+}
+
+VariantValue::VariantValue(double Double) : Type(VT_Nothing) {
+  setDouble(Double);
+}
+
 VariantValue::VariantValue(unsigned Unsigned) : Type(VT_Nothing) {
   setUnsigned(Unsigned);
 }
@@ -265,6 +277,12 @@ VariantValue &VariantValue::operator=(const VariantValue &Other) {
   if (this == &Other) return *this;
   reset();
   switch (Other.Type) {
+  case VT_Boolean:
+    setBoolean(Other.getBoolean());
+    break;
+  case VT_Double:
+    setDouble(Other.getDouble());
+    break;
   case VT_Unsigned:
     setUnsigned(Other.getUnsigned());
     break;
@@ -290,6 +308,8 @@ void VariantValue::reset() {
     delete Value.Matcher;
     break;
   // Cases that do nothing.
+  case VT_Boolean:
+  case VT_Double:
   case VT_Unsigned:
   case VT_Nothing:
     break;
@@ -297,6 +317,36 @@ void VariantValue::reset() {
   Type = VT_Nothing;
 }
 
+bool VariantValue::isBoolean() const {
+  return Type == VT_Boolean;
+}
+
+bool VariantValue::getBoolean() const {
+  assert(isBoolean());
+  return Value.Boolean;
+}
+
+void VariantValue::setBoolean(bool NewValue) {
+  reset();
+  Type = VT_Boolean;
+  Value.Boolean = NewValue;
+}
+
+bool VariantValue::isDouble() const {
+  return Type == VT_Double;
+}
+
+double VariantValue::getDouble() const {
+  assert(isDouble());
+  return Value.Double;
+}
+
+void VariantValue::setDouble(double NewValue) {
+  reset();
+  Type = VT_Double;
+  Value.Double = NewValue;
+}
+
 bool VariantValue::isUnsigned() const {
   return Type == VT_Unsigned;
 }
@@ -344,6 +394,18 @@ void VariantValue::setMatcher(const VariantMatcher &NewValue) {
 
 bool VariantValue::isConvertibleTo(ArgKind Kind, unsigned *Specificity) const {
   switch (Kind.getArgKind()) {
+  case ArgKind::AK_Boolean:
+    if (!isBoolean())
+      return false;
+    *Specificity = 1;
+    return true;
+
+  case ArgKind::AK_Double:
+    if (!isDouble())
+      return false;
+    *Specificity = 1;
+    return true;
+
   case ArgKind::AK_Unsigned:
     if (!isUnsigned())
       return false;
@@ -383,6 +445,8 @@ std::string VariantValue::getTypeAsString() const {
   switch (Type) {
   case VT_String: return "String";
   case VT_Matcher: return getMatcher().getTypeAsString();
+  case VT_Boolean: return "Boolean";
+  case VT_Double: return "Double";
   case VT_Unsigned: return "Unsigned";
   case VT_Nothing: return "Nothing";
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/Analysis/AnalysisDeclContext.cpp b/interpreter/llvm/src/tools/clang/lib/Analysis/AnalysisDeclContext.cpp
index 6b58916162f63..ec15f34fb231d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Analysis/AnalysisDeclContext.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Analysis/AnalysisDeclContext.cpp
@@ -67,6 +67,7 @@ AnalysisDeclContextManager::AnalysisDeclContextManager(bool useUnoptimizedCFG,
                                                        bool addImplicitDtors,
                                                        bool addInitializers,
                                                        bool addTemporaryDtors,
+                                                       bool addLifetime,
                                                        bool synthesizeBodies,
                                                        bool addStaticInitBranch,
                                                        bool addCXXNewAllocator,
@@ -77,6 +78,7 @@ AnalysisDeclContextManager::AnalysisDeclContextManager(bool useUnoptimizedCFG,
   cfgBuildOptions.AddImplicitDtors = addImplicitDtors;
   cfgBuildOptions.AddInitializers = addInitializers;
   cfgBuildOptions.AddTemporaryDtors = addTemporaryDtors;
+  cfgBuildOptions.AddLifetime = addLifetime;
   cfgBuildOptions.AddStaticInitBranches = addStaticInitBranch;
   cfgBuildOptions.AddCXXNewAllocator = addCXXNewAllocator;
 }
@@ -92,6 +94,8 @@ Stmt *AnalysisDeclContext::getBody(bool &IsAutosynthesized) const {
   IsAutosynthesized = false;
   if (const FunctionDecl *FD = dyn_cast(D)) {
     Stmt *Body = FD->getBody();
+    if (auto *CoroBody = dyn_cast_or_null(Body))
+      Body = CoroBody->getBody();
     if (Manager && Manager->synthesizeBodies()) {
       Stmt *SynthesizedBody =
           getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(FD);
diff --git a/interpreter/llvm/src/tools/clang/lib/Analysis/CFG.cpp b/interpreter/llvm/src/tools/clang/lib/Analysis/CFG.cpp
index 2a2b3d73b5caf..6a77455edeef6 100644
--- a/interpreter/llvm/src/tools/clang/lib/Analysis/CFG.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Analysis/CFG.cpp
@@ -233,6 +233,7 @@ class LocalScope {
     }
 
     int distance(const_iterator L);
+    const_iterator shared_parent(const_iterator L);
   };
 
   friend class const_iterator;
@@ -275,6 +276,30 @@ int LocalScope::const_iterator::distance(LocalScope::const_iterator L) {
   return D;
 }
 
+/// Calculates the closest parent of this iterator
+/// that is in a scope reachable through the parents of L.
+/// I.e. when using 'goto' from this to L, the lifetime of all variables
+/// between this and shared_parent(L) end.
+LocalScope::const_iterator
+LocalScope::const_iterator::shared_parent(LocalScope::const_iterator L) {
+  llvm::SmallPtrSet ScopesOfL;
+  while (true) {
+    ScopesOfL.insert(L.Scope);
+    if (L == const_iterator())
+      break;
+    L = L.Scope->Prev;
+  }
+
+  const_iterator F = *this;
+  while (true) {
+    if (ScopesOfL.count(F.Scope))
+      return F;
+    assert(F != const_iterator() &&
+           "L iterator is not reachable from F iterator.");
+    F = F.Scope->Prev;
+  }
+}
+
 /// Structure for specifying position in CFG during its build process. It
 /// consists of CFGBlock that specifies position in CFG and
 /// LocalScope::const_iterator that specifies position in LocalScope graph.
@@ -579,6 +604,10 @@ class CFGBuilder {
   CFGBlock *addInitializer(CXXCtorInitializer *I);
   void addAutomaticObjDtors(LocalScope::const_iterator B,
                             LocalScope::const_iterator E, Stmt *S);
+  void addLifetimeEnds(LocalScope::const_iterator B,
+                       LocalScope::const_iterator E, Stmt *S);
+  void addAutomaticObjHandling(LocalScope::const_iterator B,
+                               LocalScope::const_iterator E, Stmt *S);
   void addImplicitDtorsForDestructor(const CXXDestructorDecl *DD);
 
   // Local scopes creation.
@@ -619,6 +648,10 @@ class CFGBuilder {
     B->appendAutomaticObjDtor(VD, S, cfg->getBumpVectorContext());
   }
 
+  void appendLifetimeEnds(CFGBlock *B, VarDecl *VD, Stmt *S) {
+    B->appendLifetimeEnds(VD, S, cfg->getBumpVectorContext());
+  }
+
   void appendDeleteDtor(CFGBlock *B, CXXRecordDecl *RD, CXXDeleteExpr *DE) {
     B->appendDeleteDtor(RD, DE, cfg->getBumpVectorContext());
   }
@@ -626,6 +659,10 @@ class CFGBuilder {
   void prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk,
       LocalScope::const_iterator B, LocalScope::const_iterator E);
 
+  void prependAutomaticObjLifetimeWithTerminator(CFGBlock *Blk,
+                                                 LocalScope::const_iterator B,
+                                                 LocalScope::const_iterator E);
+
   void addSuccessor(CFGBlock *B, CFGBlock *S, bool IsReachable = true) {
     B->addSuccessor(CFGBlock::AdjacentBlock(S, IsReachable),
                     cfg->getBumpVectorContext());
@@ -957,7 +994,8 @@ class CFGBuilder {
 
     return TryResult();
   }
-  
+
+  bool hasTrivialDestructor(VarDecl *VD);
 };
 
 inline bool AddStmtChoice::alwaysAdd(CFGBuilder &builder,
@@ -1031,6 +1069,9 @@ std::unique_ptr CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) {
   assert(Succ == &cfg->getExit());
   Block = nullptr;  // the EXIT block is empty.  Create all other blocks lazily.
 
+  assert(!(BuildOpts.AddImplicitDtors && BuildOpts.AddLifetime) &&
+         "AddImplicitDtors and AddLifetime cannot be used at the same time");
+
   if (BuildOpts.AddImplicitDtors)
     if (const CXXDestructorDecl *DD = dyn_cast_or_null(D))
       addImplicitDtorsForDestructor(DD);
@@ -1067,6 +1108,8 @@ std::unique_ptr CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) {
     if (LI == LabelMap.end()) continue;
 
     JumpTarget JT = LI->second;
+    prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition,
+                                              JT.scopePosition);
     prependAutomaticObjDtorsWithTerminator(B, I->scopePosition,
                                            JT.scopePosition);
     addSuccessor(B, JT.block);
@@ -1209,7 +1252,61 @@ static QualType getReferenceInitTemporaryType(ASTContext &Context,
 
   return Init->getType();
 }
-  
+
+void CFGBuilder::addAutomaticObjHandling(LocalScope::const_iterator B,
+                                         LocalScope::const_iterator E,
+                                         Stmt *S) {
+  if (BuildOpts.AddImplicitDtors)
+    addAutomaticObjDtors(B, E, S);
+  if (BuildOpts.AddLifetime)
+    addLifetimeEnds(B, E, S);
+}
+
+/// Add to current block automatic objects that leave the scope.
+void CFGBuilder::addLifetimeEnds(LocalScope::const_iterator B,
+                                 LocalScope::const_iterator E, Stmt *S) {
+  if (!BuildOpts.AddLifetime)
+    return;
+
+  if (B == E)
+    return;
+
+  // To go from B to E, one first goes up the scopes from B to P
+  // then sideways in one scope from P to P' and then down
+  // the scopes from P' to E.
+  // The lifetime of all objects between B and P end.
+  LocalScope::const_iterator P = B.shared_parent(E);
+  int dist = B.distance(P);
+  if (dist <= 0)
+    return;
+
+  // We need to perform the scope leaving in reverse order
+  SmallVector DeclsTrivial;
+  SmallVector DeclsNonTrivial;
+  DeclsTrivial.reserve(dist);
+  DeclsNonTrivial.reserve(dist);
+
+  for (LocalScope::const_iterator I = B; I != P; ++I)
+    if (hasTrivialDestructor(*I))
+      DeclsTrivial.push_back(*I);
+    else
+      DeclsNonTrivial.push_back(*I);
+
+  autoCreateBlock();
+  // object with trivial destructor end their lifetime last (when storage
+  // duration ends)
+  for (SmallVectorImpl::reverse_iterator I = DeclsTrivial.rbegin(),
+                                                    E = DeclsTrivial.rend();
+       I != E; ++I)
+    appendLifetimeEnds(Block, *I, S);
+
+  for (SmallVectorImpl::reverse_iterator
+           I = DeclsNonTrivial.rbegin(),
+           E = DeclsNonTrivial.rend();
+       I != E; ++I)
+    appendLifetimeEnds(Block, *I, S);
+}
+
 /// addAutomaticObjDtors - Add to current block automatic objects destructors
 /// for objects in range of local scope positions. Use S as trigger statement
 /// for destructors.
@@ -1309,7 +1406,7 @@ LocalScope* CFGBuilder::createOrReuseLocalScope(LocalScope* Scope) {
 /// addLocalScopeForStmt - Add LocalScope to local scopes tree for statement
 /// that should create implicit scope (e.g. if/else substatements). 
 void CFGBuilder::addLocalScopeForStmt(Stmt *S) {
-  if (!BuildOpts.AddImplicitDtors)
+  if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime)
     return;
 
   LocalScope *Scope = nullptr;
@@ -1334,7 +1431,7 @@ void CFGBuilder::addLocalScopeForStmt(Stmt *S) {
 /// reuse Scope if not NULL.
 LocalScope* CFGBuilder::addLocalScopeForDeclStmt(DeclStmt *DS,
                                                  LocalScope* Scope) {
-  if (!BuildOpts.AddImplicitDtors)
+  if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime)
     return Scope;
 
   for (auto *DI : DS->decls())
@@ -1343,23 +1440,7 @@ LocalScope* CFGBuilder::addLocalScopeForDeclStmt(DeclStmt *DS,
   return Scope;
 }
 
-/// addLocalScopeForVarDecl - Add LocalScope for variable declaration. It will
-/// create add scope for automatic objects and temporary objects bound to
-/// const reference. Will reuse Scope if not NULL.
-LocalScope* CFGBuilder::addLocalScopeForVarDecl(VarDecl *VD,
-                                                LocalScope* Scope) {
-  if (!BuildOpts.AddImplicitDtors)
-    return Scope;
-
-  // Check if variable is local.
-  switch (VD->getStorageClass()) {
-  case SC_None:
-  case SC_Auto:
-  case SC_Register:
-    break;
-  default: return Scope;
-  }
-
+bool CFGBuilder::hasTrivialDestructor(VarDecl *VD) {
   // Check for const references bound to temporary. Set type to pointee.
   QualType QT = VD->getType();
   if (QT.getTypePtr()->isReferenceType()) {
@@ -1370,44 +1451,74 @@ LocalScope* CFGBuilder::addLocalScopeForVarDecl(VarDecl *VD,
     // temporaries, and a single declaration can extend multiple temporaries.
     // We should look at the storage duration on each nested
     // MaterializeTemporaryExpr instead.
+
     const Expr *Init = VD->getInit();
     if (!Init)
-      return Scope;
+      return true;
 
     // Lifetime-extending a temporary.
     bool FoundMTE = false;
     QT = getReferenceInitTemporaryType(*Context, Init, &FoundMTE);
     if (!FoundMTE)
-      return Scope;
+      return true;
   }
 
   // Check for constant size array. Set type to array element type.
   while (const ConstantArrayType *AT = Context->getAsConstantArrayType(QT)) {
     if (AT->getSize() == 0)
-      return Scope;
+      return true;
     QT = AT->getElementType();
   }
 
   // Check if type is a C++ class with non-trivial destructor.
   if (const CXXRecordDecl *CD = QT->getAsCXXRecordDecl())
-    if (CD->hasDefinition() && !CD->hasTrivialDestructor()) {
+    return !CD->hasDefinition() || CD->hasTrivialDestructor();
+  return true;
+}
+
+/// addLocalScopeForVarDecl - Add LocalScope for variable declaration. It will
+/// create add scope for automatic objects and temporary objects bound to
+/// const reference. Will reuse Scope if not NULL.
+LocalScope* CFGBuilder::addLocalScopeForVarDecl(VarDecl *VD,
+                                                LocalScope* Scope) {
+  assert(!(BuildOpts.AddImplicitDtors && BuildOpts.AddLifetime) &&
+         "AddImplicitDtors and AddLifetime cannot be used at the same time");
+  if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime)
+    return Scope;
+
+  // Check if variable is local.
+  switch (VD->getStorageClass()) {
+  case SC_None:
+  case SC_Auto:
+  case SC_Register:
+    break;
+  default: return Scope;
+  }
+
+  if (BuildOpts.AddImplicitDtors) {
+    if (!hasTrivialDestructor(VD)) {
       // Add the variable to scope
       Scope = createOrReuseLocalScope(Scope);
       Scope->addVar(VD);
       ScopePos = Scope->begin();
     }
+    return Scope;
+  }
+
+  assert(BuildOpts.AddLifetime);
+  // Add the variable to scope
+  Scope = createOrReuseLocalScope(Scope);
+  Scope->addVar(VD);
+  ScopePos = Scope->begin();
   return Scope;
 }
 
 /// addLocalScopeAndDtors - For given statement add local scope for it and
 /// add destructors that will cleanup the scope. Will reuse Scope if not NULL.
 void CFGBuilder::addLocalScopeAndDtors(Stmt *S) {
-  if (!BuildOpts.AddImplicitDtors)
-    return;
-
   LocalScope::const_iterator scopeBeginPos = ScopePos;
   addLocalScopeForStmt(S);
-  addAutomaticObjDtors(ScopePos, scopeBeginPos, S);
+  addAutomaticObjHandling(ScopePos, scopeBeginPos, S);
 }
 
 /// prependAutomaticObjDtorsWithTerminator - Prepend destructor CFGElements for
@@ -1419,6 +1530,8 @@ void CFGBuilder::addLocalScopeAndDtors(Stmt *S) {
 /// no-return destructors properly.
 void CFGBuilder::prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk,
     LocalScope::const_iterator B, LocalScope::const_iterator E) {
+  if (!BuildOpts.AddImplicitDtors)
+    return;
   BumpVectorContext &C = cfg->getBumpVectorContext();
   CFGBlock::iterator InsertPos
     = Blk->beginAutomaticObjDtorsInsert(Blk->end(), B.distance(E), C);
@@ -1427,6 +1540,21 @@ void CFGBuilder::prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk,
                                             Blk->getTerminator());
 }
 
+/// prependAutomaticObjLifetimeWithTerminator - Prepend lifetime CFGElements for
+/// variables with automatic storage duration to CFGBlock's elements vector.
+/// Elements will be prepended to physical beginning of the vector which
+/// happens to be logical end. Use blocks terminator as statement that specifies
+/// where lifetime ends.
+void CFGBuilder::prependAutomaticObjLifetimeWithTerminator(
+    CFGBlock *Blk, LocalScope::const_iterator B, LocalScope::const_iterator E) {
+  if (!BuildOpts.AddLifetime)
+    return;
+  BumpVectorContext &C = cfg->getBumpVectorContext();
+  CFGBlock::iterator InsertPos =
+      Blk->beginLifetimeEndsInsert(Blk->end(), B.distance(E), C);
+  for (LocalScope::const_iterator I = B; I != E; ++I)
+    InsertPos = Blk->insertLifetimeEnds(InsertPos, *I, Blk->getTerminator());
+}
 /// Visit - Walk the subtree of a statement and add extra
 ///   blocks for ternary operators, &&, and ||.  We also process "," and
 ///   DeclStmts (which may contain nested control-flow).
@@ -1815,7 +1943,7 @@ CFGBlock *CFGBuilder::VisitBreakStmt(BreakStmt *B) {
   // If there is no target for the break, then we are looking at an incomplete
   // AST.  This means that the CFG cannot be constructed.
   if (BreakJumpTarget.block) {
-    addAutomaticObjDtors(ScopePos, BreakJumpTarget.scopePosition, B);
+    addAutomaticObjHandling(ScopePos, BreakJumpTarget.scopePosition, B);
     addSuccessor(Block, BreakJumpTarget.block);
   } else
     badCFG = true;
@@ -1947,13 +2075,12 @@ CFGBlock *CFGBuilder::VisitChooseExpr(ChooseExpr *C,
 
 CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C) {
   LocalScope::const_iterator scopeBeginPos = ScopePos;
-  if (BuildOpts.AddImplicitDtors) {
-    addLocalScopeForStmt(C);
-  }
+  addLocalScopeForStmt(C);
+
   if (!C->body_empty() && !isa(*C->body_rbegin())) {
     // If the body ends with a ReturnStmt, the dtors will be added in
     // VisitReturnStmt.
-    addAutomaticObjDtors(ScopePos, scopeBeginPos, C);
+    addAutomaticObjHandling(ScopePos, scopeBeginPos, C);
   }
 
   CFGBlock *LastBlock = Block;
@@ -2183,7 +2310,7 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) {
   if (VarDecl *VD = I->getConditionVariable())
     addLocalScopeForVarDecl(VD);
 
-  addAutomaticObjDtors(ScopePos, save_scope_pos.get(), I);
+  addAutomaticObjHandling(ScopePos, save_scope_pos.get(), I);
 
   // The block we were processing is now finished.  Make it the successor
   // block.
@@ -2308,7 +2435,7 @@ CFGBlock *CFGBuilder::VisitReturnStmt(ReturnStmt *R) {
   // Create the new block.
   Block = createBlock(false);
 
-  addAutomaticObjDtors(ScopePos, LocalScope::const_iterator(), R);
+  addAutomaticObjHandling(ScopePos, LocalScope::const_iterator(), R);
 
   // If the one of the destructors does not return, we already have the Exit
   // block as a successor.
@@ -2389,7 +2516,7 @@ CFGBlock *CFGBuilder::VisitGotoStmt(GotoStmt *G) {
     BackpatchBlocks.push_back(JumpSource(Block, ScopePos));
   else {
     JumpTarget JT = I->second;
-    addAutomaticObjDtors(ScopePos, JT.scopePosition, G);
+    addAutomaticObjHandling(ScopePos, JT.scopePosition, G);
     addSuccessor(Block, JT.block);
   }
 
@@ -2414,7 +2541,7 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) {
     addLocalScopeForVarDecl(VD);
   LocalScope::const_iterator ContinueScopePos = ScopePos;
 
-  addAutomaticObjDtors(ScopePos, save_scope_pos.get(), F);
+  addAutomaticObjHandling(ScopePos, save_scope_pos.get(), F);
 
   // "for" is a control-flow statement.  Thus we stop processing the current
   // block.
@@ -2466,7 +2593,7 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) {
    ContinueJumpTarget.block->setLoopTarget(F);
 
     // Loop body should end with destructor of Condition variable (if any).
-    addAutomaticObjDtors(ScopePos, LoopBeginScopePos, F);
+   addAutomaticObjHandling(ScopePos, LoopBeginScopePos, F);
 
     // If body is not a compound statement create implicit scope
     // and add destructors.
@@ -2753,7 +2880,7 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) {
   LocalScope::const_iterator LoopBeginScopePos = ScopePos;
   if (VarDecl *VD = W->getConditionVariable()) {
     addLocalScopeForVarDecl(VD);
-    addAutomaticObjDtors(ScopePos, LoopBeginScopePos, W);
+    addAutomaticObjHandling(ScopePos, LoopBeginScopePos, W);
   }
 
   // "while" is a control-flow statement.  Thus we stop processing the current
@@ -2788,7 +2915,7 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) {
     BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos);
 
     // Loop body should end with destructor of Condition variable (if any).
-    addAutomaticObjDtors(ScopePos, LoopBeginScopePos, W);
+    addAutomaticObjHandling(ScopePos, LoopBeginScopePos, W);
 
     // If body is not a compound statement create implicit scope
     // and add destructors.
@@ -3030,7 +3157,7 @@ CFGBlock *CFGBuilder::VisitContinueStmt(ContinueStmt *C) {
   // If there is no target for the continue, then we are looking at an
   // incomplete AST.  This means the CFG cannot be constructed.
   if (ContinueJumpTarget.block) {
-    addAutomaticObjDtors(ScopePos, ContinueJumpTarget.scopePosition, C);
+    addAutomaticObjHandling(ScopePos, ContinueJumpTarget.scopePosition, C);
     addSuccessor(Block, ContinueJumpTarget.block);
   } else
     badCFG = true;
@@ -3085,7 +3212,7 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) {
   if (VarDecl *VD = Terminator->getConditionVariable())
     addLocalScopeForVarDecl(VD);
 
-  addAutomaticObjDtors(ScopePos, save_scope_pos.get(), Terminator);
+  addAutomaticObjHandling(ScopePos, save_scope_pos.get(), Terminator);
 
   if (Block) {
     if (badCFG)
@@ -3373,7 +3500,7 @@ CFGBlock *CFGBuilder::VisitCXXCatchStmt(CXXCatchStmt *CS) {
   if (VarDecl *VD = CS->getExceptionDecl()) {
     LocalScope::const_iterator BeginScopePos = ScopePos;
     addLocalScopeForVarDecl(VD);
-    addAutomaticObjDtors(ScopePos, BeginScopePos, CS);
+    addAutomaticObjHandling(ScopePos, BeginScopePos, CS);
   }
 
   if (CS->getHandlerBlock())
@@ -3427,7 +3554,7 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
     addLocalScopeForStmt(Begin);
   if (Stmt *End = S->getEndStmt())
     addLocalScopeForStmt(End);
-  addAutomaticObjDtors(ScopePos, save_scope_pos.get(), S);
+  addAutomaticObjHandling(ScopePos, save_scope_pos.get(), S);
 
   LocalScope::const_iterator ContinueScopePos = ScopePos;
 
@@ -3898,6 +4025,7 @@ CFGImplicitDtor::getDestructorDecl(ASTContext &astContext) const {
     case CFGElement::Statement:
     case CFGElement::Initializer:
     case CFGElement::NewAllocator:
+    case CFGElement::LifetimeEnds:
       llvm_unreachable("getDestructorDecl should only be used with "
                        "ImplicitDtors");
     case CFGElement::AutomaticObjectDtor: {
@@ -4308,6 +4436,12 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper,
     OS << ".~" << T->getAsCXXRecordDecl()->getName().str() << "()";
     OS << " (Implicit destructor)\n";
 
+  } else if (Optional DE = E.getAs()) {
+    const VarDecl *VD = DE->getVarDecl();
+    Helper.handleDecl(VD, OS);
+
+    OS << " (Lifetime ends)\n";
+
   } else if (Optional NE = E.getAs()) {
     OS << "CFGNewAllocator(";
     if (const CXXNewExpr *AllocExpr = NE->getAllocatorExpr())
diff --git a/interpreter/llvm/src/tools/clang/lib/Analysis/CloneDetection.cpp b/interpreter/llvm/src/tools/clang/lib/Analysis/CloneDetection.cpp
index 5bbcbe4e5722b..5ea74989a7ec9 100644
--- a/interpreter/llvm/src/tools/clang/lib/Analysis/CloneDetection.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Analysis/CloneDetection.cpp
@@ -16,13 +16,13 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
-#include "clang/AST/StmtVisitor.h"
 #include "clang/Lex/Lexer.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
 
 using namespace clang;
+using namespace clang::clone_detection;
 
 StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D,
                            unsigned StartIndex, unsigned EndIndex)
@@ -103,12 +103,8 @@ static void printMacroName(llvm::raw_string_ostream &MacroStack,
   MacroStack << " ";
 }
 
-/// Returns a string that represents all macro expansions that expanded into the
-/// given SourceLocation.
-///
-/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
-/// A and B are expanded from the same macros in the same order.
-static std::string getMacroStack(SourceLocation Loc, ASTContext &Context) {
+std::string clone_detection::getMacroStack(SourceLocation Loc,
+                                           ASTContext &Context) {
   std::string MacroStack;
   llvm::raw_string_ostream MacroStackStream(MacroStack);
   SourceManager &SM = Context.getSourceManager();
@@ -123,184 +119,6 @@ static std::string getMacroStack(SourceLocation Loc, ASTContext &Context) {
   return MacroStack;
 }
 
-namespace {
-typedef unsigned DataPiece;
-
-/// Collects the data of a single Stmt.
-///
-/// This class defines what a code clone is: If it collects for two statements
-/// the same data, then those two statements are considered to be clones of each
-/// other.
-///
-/// All collected data is forwarded to the given data consumer of the type T.
-/// The data consumer class needs to provide a member method with the signature:
-///   update(StringRef Str)
-template 
-class StmtDataCollector : public ConstStmtVisitor> {
-
-  ASTContext &Context;
-  /// The data sink to which all data is forwarded.
-  T &DataConsumer;
-
-public:
-  /// Collects data of the given Stmt.
-  /// \param S The given statement.
-  /// \param Context The ASTContext of S.
-  /// \param DataConsumer The data sink to which all data is forwarded.
-  StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer)
-      : Context(Context), DataConsumer(DataConsumer) {
-    this->Visit(S);
-  }
-
-  // Below are utility methods for appending different data to the vector.
-
-  void addData(DataPiece Integer) {
-    DataConsumer.update(
-        StringRef(reinterpret_cast(&Integer), sizeof(Integer)));
-  }
-
-  void addData(llvm::StringRef Str) { DataConsumer.update(Str); }
-
-  void addData(const QualType &QT) { addData(QT.getAsString()); }
-
-// The functions below collect the class specific data of each Stmt subclass.
-
-// Utility macro for defining a visit method for a given class. This method
-// calls back to the ConstStmtVisitor to visit all parent classes.
-#define DEF_ADD_DATA(CLASS, CODE)                                              \
-  void Visit##CLASS(const CLASS *S) {                                          \
-    CODE;                                                                      \
-    ConstStmtVisitor::Visit##CLASS(S);                      \
-  }
-
-  DEF_ADD_DATA(Stmt, {
-    addData(S->getStmtClass());
-    // This ensures that macro generated code isn't identical to macro-generated
-    // code.
-    addData(getMacroStack(S->getLocStart(), Context));
-    addData(getMacroStack(S->getLocEnd(), Context));
-  })
-  DEF_ADD_DATA(Expr, { addData(S->getType()); })
-
-  //--- Builtin functionality ----------------------------------------------//
-  DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
-  DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
-  DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
-  DEF_ADD_DATA(TypeTraitExpr, {
-    addData(S->getTrait());
-    for (unsigned i = 0; i < S->getNumArgs(); ++i)
-      addData(S->getArg(i)->getType());
-  })
-
-  //--- Calls --------------------------------------------------------------//
-  DEF_ADD_DATA(CallExpr, {
-    // Function pointers don't have a callee and we just skip hashing it.
-    if (const FunctionDecl *D = S->getDirectCallee()) {
-      // If the function is a template specialization, we also need to handle
-      // the template arguments as they are not included in the qualified name.
-      if (auto Args = D->getTemplateSpecializationArgs()) {
-        std::string ArgString;
-
-        // Print all template arguments into ArgString
-        llvm::raw_string_ostream OS(ArgString);
-        for (unsigned i = 0; i < Args->size(); ++i) {
-          Args->get(i).print(Context.getLangOpts(), OS);
-          // Add a padding character so that 'foo()' != 'foo()'.
-          OS << '\n';
-        }
-        OS.flush();
-
-        addData(ArgString);
-      }
-      addData(D->getQualifiedNameAsString());
-    }
-  })
-
-  //--- Exceptions ---------------------------------------------------------//
-  DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
-
-  //--- C++ OOP Stmts ------------------------------------------------------//
-  DEF_ADD_DATA(CXXDeleteExpr, {
-    addData(S->isArrayFormAsWritten());
-    addData(S->isGlobalDelete());
-  })
-
-  //--- Casts --------------------------------------------------------------//
-  DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
-
-  //--- Miscellaneous Exprs ------------------------------------------------//
-  DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
-  DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
-
-  //--- Control flow -------------------------------------------------------//
-  DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
-  DEF_ADD_DATA(IndirectGotoStmt, {
-    if (S->getConstantTarget())
-      addData(S->getConstantTarget()->getName());
-  })
-  DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
-  DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
-  DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
-
-  //--- Objective-C --------------------------------------------------------//
-  DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
-  DEF_ADD_DATA(ObjCPropertyRefExpr, {
-    addData(S->isSuperReceiver());
-    addData(S->isImplicitProperty());
-  })
-  DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
-
-  //--- Miscellaneous Stmts ------------------------------------------------//
-  DEF_ADD_DATA(CXXFoldExpr, {
-    addData(S->isRightFold());
-    addData(S->getOperator());
-  })
-  DEF_ADD_DATA(GenericSelectionExpr, {
-    for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
-      addData(S->getAssocType(i));
-    }
-  })
-  DEF_ADD_DATA(LambdaExpr, {
-    for (const LambdaCapture &C : S->captures()) {
-      addData(C.isPackExpansion());
-      addData(C.getCaptureKind());
-      if (C.capturesVariable())
-        addData(C.getCapturedVar()->getType());
-    }
-    addData(S->isGenericLambda());
-    addData(S->isMutable());
-  })
-  DEF_ADD_DATA(DeclStmt, {
-    auto numDecls = std::distance(S->decl_begin(), S->decl_end());
-    addData(static_cast(numDecls));
-    for (const Decl *D : S->decls()) {
-      if (const VarDecl *VD = dyn_cast(D)) {
-        addData(VD->getType());
-      }
-    }
-  })
-  DEF_ADD_DATA(AsmStmt, {
-    addData(S->isSimple());
-    addData(S->isVolatile());
-    addData(S->generateAsmString(Context));
-    for (unsigned i = 0; i < S->getNumInputs(); ++i) {
-      addData(S->getInputConstraint(i));
-    }
-    for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
-      addData(S->getOutputConstraint(i));
-    }
-    for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
-      addData(S->getClobber(i));
-    }
-  })
-  DEF_ADD_DATA(AttributedStmt, {
-    for (const Attr *A : S->getAttrs()) {
-      addData(std::string(A->getSpelling()));
-    }
-  })
-};
-} // end anonymous namespace
-
 void CloneDetector::analyzeCodeBody(const Decl *D) {
   assert(D);
   assert(D->hasBody());
@@ -366,6 +184,23 @@ void OnlyLargestCloneConstraint::constrain(
   }
 }
 
+bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) {
+  std::string Error;
+  if (IgnoredFilesPattern.empty() || Group.empty() || 
+      !IgnoredFilesRegex->isValid(Error))
+    return false;
+
+  for (const StmtSequence &S : Group) {
+    const SourceManager &SM = S.getASTContext().getSourceManager();
+    StringRef Filename = llvm::sys::path::filename(SM.getFilename(
+        S.getContainingDecl()->getLocation()));
+    if (IgnoredFilesRegex->match(Filename))
+      return true;
+  }
+
+  return false;
+}
+
 static size_t createHash(llvm::MD5 &Hash) {
   size_t HashCode;
 
@@ -404,16 +239,27 @@ size_t RecursiveCloneTypeIIConstraint::saveHash(
   }
 
   if (CS) {
-    for (unsigned Length = 2; Length <= CS->size(); ++Length) {
-      for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
-        llvm::MD5 Hash;
-        for (unsigned i = Pos; i < Pos + Length; ++i) {
-          size_t ChildHash = ChildHashes[i];
-          Hash.update(StringRef(reinterpret_cast(&ChildHash),
-                                sizeof(ChildHash)));
+    // If we're in a CompoundStmt, we hash all possible combinations of child
+    // statements to find clones in those subsequences.
+    // We first go through every possible starting position of a subsequence.
+    for (unsigned Pos = 0; Pos < CS->size(); ++Pos) {
+      // Then we try all possible lengths this subsequence could have and
+      // reuse the same hash object to make sure we only hash every child
+      // hash exactly once.
+      llvm::MD5 Hash;
+      for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
+        // Grab the current child hash and put it into our hash. We do
+        // -1 on the index because we start counting the length at 1.
+        size_t ChildHash = ChildHashes[Pos + Length - 1];
+        Hash.update(
+            StringRef(reinterpret_cast(&ChildHash), sizeof(ChildHash)));
+        // If we have at least two elements in our subsequence, we can start
+        // saving it.
+        if (Length > 1) {
+          llvm::MD5 SubHash = Hash;
+          StmtsByHash.push_back(std::make_pair(
+              createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length)));
         }
-        StmtsByHash.push_back(std::make_pair(
-            createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length)));
       }
     }
   }
@@ -618,8 +464,7 @@ void CloneConstraint::splitCloneGroups(
         if (Indexes[j])
           continue;
 
-        // If a following StmtSequence belongs to our CloneGroup, we add it to
-        // it.
+        // If a following StmtSequence belongs to our CloneGroup, we add it.
         const StmtSequence &Candidate = HashGroup[j];
 
         if (!Compare(Prototype, Candidate))
diff --git a/interpreter/llvm/src/tools/clang/lib/Analysis/PrintfFormatString.cpp b/interpreter/llvm/src/tools/clang/lib/Analysis/PrintfFormatString.cpp
index ed7193ecb4379..50a3aa20bd195 100644
--- a/interpreter/llvm/src/tools/clang/lib/Analysis/PrintfFormatString.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Analysis/PrintfFormatString.cpp
@@ -441,6 +441,7 @@ ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
       case LengthModifier::AsShort:
         if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
           return Ctx.IntTy;
+        LLVM_FALLTHROUGH;
       default:
         return ArgType::Invalid();
     }
@@ -465,8 +466,7 @@ ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
       case LengthModifier::AsIntMax:
         return ArgType(Ctx.getIntMaxType(), "intmax_t");
       case LengthModifier::AsSizeT:
-        // FIXME: How to get the corresponding signed version of size_t?
-        return ArgType();
+        return ArgType(Ctx.getSignedSizeType(), "ssize_t");
       case LengthModifier::AsInt3264:
         return Ctx.getTargetInfo().getTriple().isArch64Bit()
                    ? ArgType(Ctx.LongLongTy, "__int64")
@@ -536,7 +536,7 @@ ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
       case LengthModifier::AsIntMax:
         return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
       case LengthModifier::AsSizeT:
-        return ArgType(); // FIXME: ssize_t
+        return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
       case LengthModifier::AsPtrDiff:
         return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
       case LengthModifier::AsLongDouble:
diff --git a/interpreter/llvm/src/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp b/interpreter/llvm/src/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp
index 614f676fb1934..83b545a7be838 100644
--- a/interpreter/llvm/src/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp
@@ -109,6 +109,7 @@ void PseudoConstantAnalysis::RunAnalysis() {
           // Do not visit the children
           continue;
 
+        LLVM_FALLTHROUGH;
       }
       case BO_AddAssign:
       case BO_SubAssign:
diff --git a/interpreter/llvm/src/tools/clang/lib/Analysis/ScanfFormatString.cpp b/interpreter/llvm/src/tools/clang/lib/Analysis/ScanfFormatString.cpp
index 3b93f1a57f1ff..534225985460b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Analysis/ScanfFormatString.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Analysis/ScanfFormatString.cpp
@@ -341,6 +341,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
         case LengthModifier::AsShort:
           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
             return ArgType::PtrTo(ArgType::AnyCharTy);
+          LLVM_FALLTHROUGH;
         default:
           return ArgType::Invalid();
       }
@@ -357,6 +358,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
         case LengthModifier::AsShort:
           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
             return ArgType::PtrTo(ArgType::AnyCharTy);
+          LLVM_FALLTHROUGH;
         default:
           return ArgType::Invalid();
       }
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/Basic/CMakeLists.txt
index 2feb31851cfed..e971b55e8585d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/CMakeLists.txt
@@ -15,8 +15,23 @@ function(find_first_existing_file out_var)
 endfunction()
 
 macro(find_first_existing_vc_file out_var path)
+  set(git_path "${path}/.git")
+
+  # Normally '.git' is a directory that contains a 'logs/HEAD' file that
+  # is updated as modifications are made to the repository. In case the
+  # repository is a Git submodule, '.git' is a file that contains text that
+  # indicates where the repository's Git directory exists.
+  if (EXISTS "${git_path}" AND NOT IS_DIRECTORY "${git_path}")
+    FILE(READ "${git_path}" file_contents)
+    if("${file_contents}" MATCHES "^gitdir: ([^\n]+)")
+      # '.git' is indeed a link to the submodule's Git directory.
+      # Use the path to that Git directory.
+      set(git_path "${path}/${CMAKE_MATCH_1}")
+    endif()
+  endif()
+
   find_first_existing_file(${out_var}
-    "${path}/.git/logs/HEAD" # Git
+    "${git_path}/logs/HEAD"  # Git or Git submodule
     "${path}/.svn/wc.db"     # SVN 1.7
     "${path}/.svn/entries"   # SVN 1.6
     )
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/DiagnosticIDs.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/DiagnosticIDs.cpp
index ce493c1e5caba..0cdaf8e03643f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/DiagnosticIDs.cpp
@@ -510,6 +510,18 @@ StringRef DiagnosticIDs::getWarningOptionForDiag(unsigned DiagID) {
   return StringRef();
 }
 
+std::vector DiagnosticIDs::getDiagnosticFlags() {
+  std::vector Res;
+  for (size_t I = 1; DiagGroupNames[I] != '\0';) {
+    std::string Diag(DiagGroupNames + I + 1, DiagGroupNames[I]);
+    I += DiagGroupNames[I] + 1;
+    Res.push_back("-W" + Diag);
+    Res.push_back("-Wno-" + Diag);
+  }
+
+  return Res;
+}
+
 /// Return \c true if any diagnostics were found in this group, even if they
 /// were filtered out due to having the wrong flavor.
 static bool getDiagnosticsInGroup(diag::Flavor Flavor,
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/IdentifierTable.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/IdentifierTable.cpp
index 74c85376c7db7..372e0c417fd44 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/IdentifierTable.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/IdentifierTable.cpp
@@ -551,6 +551,7 @@ ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
     case 's':
       if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
       if (startsWithWord(name, "standard")) return OIT_Singleton;
+      break;
     case 'i':
       if (startsWithWord(name, "init")) return OIT_Init;
     default:
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/LangOptions.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/LangOptions.cpp
index c8a774311efe1..db81507aa209b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/LangOptions.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/LangOptions.cpp
@@ -29,9 +29,7 @@ void LangOptions::resetNonModularOptions() {
   Name = Default;
 #include "clang/Basic/LangOptions.def"
 
-  // FIXME: This should not be reset; modules can be different with different
-  // sanitizer options (this affects __has_feature(address_sanitizer) etc).
-  Sanitize.clear();
+  // These options do not affect AST generation.
   SanitizerBlacklistFiles.clear();
   XRayAlwaysInstrumentFiles.clear();
   XRayNeverInstrumentFiles.clear();
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/Module.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/Module.cpp
index a6fd931cb174c..1d96afd476ef4 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/Module.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/Module.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/Module.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
@@ -64,6 +65,7 @@ static bool hasFeature(StringRef Feature, const LangOptions &LangOpts,
   bool HasFeature = llvm::StringSwitch(Feature)
                         .Case("altivec", LangOpts.AltiVec)
                         .Case("blocks", LangOpts.Blocks)
+                        .Case("coroutines", LangOpts.CoroutinesTS)
                         .Case("cplusplus", LangOpts.CPlusPlus)
                         .Case("cplusplus11", LangOpts.CPlusPlus11)
                         .Case("freestanding", LangOpts.Freestanding)
@@ -124,7 +126,36 @@ const Module *Module::getTopLevelModule() const {
   return Result;
 }
 
-std::string Module::getFullModuleName() const {
+static StringRef getModuleNameFromComponent(
+    const std::pair &IdComponent) {
+  return IdComponent.first;
+}
+static StringRef getModuleNameFromComponent(StringRef R) { return R; }
+
+template
+static void printModuleId(raw_ostream &OS, InputIter Begin, InputIter End,
+                          bool AllowStringLiterals = true) {
+  for (InputIter It = Begin; It != End; ++It) {
+    if (It != Begin)
+      OS << ".";
+
+    StringRef Name = getModuleNameFromComponent(*It);
+    if (!AllowStringLiterals || isValidIdentifier(Name))
+      OS << Name;
+    else {
+      OS << '"';
+      OS.write_escaped(Name);
+      OS << '"';
+    }
+  }
+}
+
+template
+static void printModuleId(raw_ostream &OS, const Container &C) {
+  return printModuleId(OS, C.begin(), C.end());
+}
+
+std::string Module::getFullModuleName(bool AllowStringLiterals) const {
   SmallVector Names;
   
   // Build up the set of module names (from innermost to outermost).
@@ -132,15 +163,11 @@ std::string Module::getFullModuleName() const {
     Names.push_back(M->Name);
   
   std::string Result;
-  for (SmallVectorImpl::reverse_iterator I = Names.rbegin(),
-                                                 IEnd = Names.rend();
-       I != IEnd; ++I) {
-    if (!Result.empty())
-      Result += '.';
-    
-    Result += *I;
-  }
-  
+
+  llvm::raw_string_ostream Out(Result);
+  printModuleId(Out, Names.rbegin(), Names.rend(), AllowStringLiterals);
+  Out.flush(); 
+
   return Result;
 }
 
@@ -239,14 +266,6 @@ Module *Module::findSubmodule(StringRef Name) const {
   return SubModules[Pos->getValue()];
 }
 
-static void printModuleId(raw_ostream &OS, const ModuleId &Id) {
-  for (unsigned I = 0, N = Id.size(); I != N; ++I) {
-    if (I)
-      OS << ".";
-    OS << Id[I].first;
-  }
-}
-
 void Module::getExportedModules(SmallVectorImpl &Exported) const {
   // All non-explicit submodules are exported.
   for (std::vector::const_iterator I = SubModules.begin(),
@@ -333,7 +352,8 @@ void Module::print(raw_ostream &OS, unsigned Indent) const {
     OS << "framework ";
   if (IsExplicit)
     OS << "explicit ";
-  OS << "module " << Name;
+  OS << "module ";
+  printModuleId(OS, &Name, &Name + 1);
 
   if (IsSystem || IsExternC) {
     OS.indent(Indent + 2);
@@ -393,11 +413,30 @@ void Module::print(raw_ostream &OS, unsigned Indent) const {
                {"exclude ", HK_Excluded}};
 
   for (auto &K : Kinds) {
+    assert(&K == &Kinds[K.Kind] && "kinds in wrong order");
     for (auto &H : Headers[K.Kind]) {
       OS.indent(Indent + 2);
       OS << K.Prefix << "header \"";
       OS.write_escaped(H.NameAsWritten);
-      OS << "\"\n";
+      OS << "\" { size " << H.Entry->getSize()
+         << " mtime " << H.Entry->getModificationTime() << " }\n";
+    }
+  }
+  for (auto *Unresolved : {&UnresolvedHeaders, &MissingHeaders}) {
+    for (auto &U : *Unresolved) {
+      OS.indent(Indent + 2);
+      OS << Kinds[U.Kind].Prefix << "header \"";
+      OS.write_escaped(U.FileName);
+      OS << "\"";
+      if (U.Size || U.ModTime) {
+        OS << " {";
+        if (U.Size)
+          OS << " size " << *U.Size;
+        if (U.ModTime)
+          OS << " mtime " << *U.ModTime;
+        OS << " }";
+      }
+      OS << "\n";
     }
   }
 
@@ -414,7 +453,7 @@ void Module::print(raw_ostream &OS, unsigned Indent) const {
     OS.indent(Indent + 2);
     OS << "export ";
     if (Module *Restriction = Exports[I].getPointer()) {
-      OS << Restriction->getFullModuleName();
+      OS << Restriction->getFullModuleName(true);
       if (Exports[I].getInt())
         OS << ".*";
     } else {
@@ -435,7 +474,7 @@ void Module::print(raw_ostream &OS, unsigned Indent) const {
   for (unsigned I = 0, N = DirectUses.size(); I != N; ++I) {
     OS.indent(Indent + 2);
     OS << "use ";
-    OS << DirectUses[I]->getFullModuleName();
+    OS << DirectUses[I]->getFullModuleName(true);
     OS << "\n";
   }
 
@@ -468,7 +507,7 @@ void Module::print(raw_ostream &OS, unsigned Indent) const {
   for (unsigned I = 0, N = Conflicts.size(); I != N; ++I) {
     OS.indent(Indent + 2);
     OS << "conflict ";
-    OS << Conflicts[I].Other->getFullModuleName();
+    OS << Conflicts[I].Other->getFullModuleName(true);
     OS << ", \"";
     OS.write_escaped(Conflicts[I].Message);
     OS << "\"\n";
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/OpenMPKinds.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/OpenMPKinds.cpp
index 76a0e18c2d73f..050c0cc466db8 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/OpenMPKinds.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/OpenMPKinds.cpp
@@ -138,6 +138,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind,
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_aligned:
   case OMPC_copyin:
   case OMPC_copyprivate:
@@ -277,6 +278,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_aligned:
   case OMPC_copyin:
   case OMPC_copyprivate:
@@ -705,6 +707,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 #define OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(Name)                       \
   case OMPC_##Name:                                                            \
     return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_taskgroup:
+    switch (CKind) {
+#define OPENMP_TASKGROUP_CLAUSE(Name)                                          \
+  case OMPC_##Name:                                                            \
+    return true;
 #include "clang/Basic/OpenMPKinds.def"
     default:
       break;
@@ -719,7 +731,6 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
   case OMPD_taskyield:
   case OMPD_barrier:
   case OMPD_taskwait:
-  case OMPD_taskgroup:
   case OMPD_cancellation_point:
   case OMPD_declare_reduction:
     break;
@@ -840,7 +851,8 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
 bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
   return Kind == OMPC_private || Kind == OMPC_firstprivate ||
          Kind == OMPC_lastprivate || Kind == OMPC_linear ||
-         Kind == OMPC_reduction; // TODO add next clauses like 'reduction'.
+         Kind == OMPC_reduction ||
+         Kind == OMPC_task_reduction; // TODO add next clauses like 'reduction'.
 }
 
 bool clang::isOpenMPThreadPrivate(OpenMPClauseKind Kind) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/SourceLocation.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/SourceLocation.cpp
index a58d0465a6f42..89ddbc946a490 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/SourceLocation.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/SourceLocation.cpp
@@ -92,6 +92,76 @@ FullSourceLoc FullSourceLoc::getSpellingLoc() const {
   return FullSourceLoc(SrcMgr->getSpellingLoc(*this), *SrcMgr);
 }
 
+FullSourceLoc FullSourceLoc::getFileLoc() const {
+  assert(isValid());
+  return FullSourceLoc(SrcMgr->getFileLoc(*this), *SrcMgr);
+}
+
+std::pair
+FullSourceLoc::getImmediateExpansionRange() const {
+  assert(isValid());
+  std::pair Range =
+      SrcMgr->getImmediateExpansionRange(*this);
+  return std::make_pair(FullSourceLoc(Range.first, *SrcMgr),
+                        FullSourceLoc(Range.second, *SrcMgr));
+}
+
+PresumedLoc FullSourceLoc::getPresumedLoc(bool UseLineDirectives) const {
+  if (!isValid())
+    return PresumedLoc();
+
+  return SrcMgr->getPresumedLoc(*this, UseLineDirectives);
+}
+
+bool FullSourceLoc::isMacroArgExpansion(FullSourceLoc *StartLoc) const {
+  assert(isValid());
+  return SrcMgr->isMacroArgExpansion(*this, StartLoc);
+}
+
+FullSourceLoc FullSourceLoc::getImmediateMacroCallerLoc() const {
+  assert(isValid());
+  return FullSourceLoc(SrcMgr->getImmediateMacroCallerLoc(*this), *SrcMgr);
+}
+
+std::pair FullSourceLoc::getModuleImportLoc() const {
+  if (!isValid())
+    return std::make_pair(FullSourceLoc(), StringRef());
+
+  std::pair ImportLoc =
+      SrcMgr->getModuleImportLoc(*this);
+  return std::make_pair(FullSourceLoc(ImportLoc.first, *SrcMgr),
+                        ImportLoc.second);
+}
+
+unsigned FullSourceLoc::getFileOffset() const {
+  assert(isValid());
+  return SrcMgr->getFileOffset(*this);
+}
+
+unsigned FullSourceLoc::getLineNumber(bool *Invalid) const {
+  assert(isValid());
+  return SrcMgr->getLineNumber(getFileID(), getFileOffset(), Invalid);
+}
+
+unsigned FullSourceLoc::getColumnNumber(bool *Invalid) const {
+  assert(isValid());
+  return SrcMgr->getColumnNumber(getFileID(), getFileOffset(), Invalid);
+}
+
+std::pair
+FullSourceLoc::getExpansionRange() const {
+  assert(isValid());
+  std::pair Range =
+      SrcMgr->getExpansionRange(*this);
+  return std::make_pair(FullSourceLoc(Range.first, *SrcMgr),
+                        FullSourceLoc(Range.second, *SrcMgr));
+}
+
+const FileEntry *FullSourceLoc::getFileEntry() const {
+  assert(isValid());
+  return SrcMgr->getFileEntryForID(getFileID());
+}
+
 unsigned FullSourceLoc::getExpansionLineNumber(bool *Invalid) const {
   assert(isValid());
   return SrcMgr->getExpansionLineNumber(*this, Invalid);
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/SourceManager.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/SourceManager.cpp
index 7ddbd2a5af04d..b8e46abb53717 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/SourceManager.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/SourceManager.cpp
@@ -73,11 +73,11 @@ void ContentCache::replaceBuffer(llvm::MemoryBuffer *B, bool DoNotFree) {
     Buffer.setInt(DoNotFree? DoNotFreeFlag : 0);
     return;
   }
-  
+
   if (shouldFreeBuffer())
     delete Buffer.getPointer();
   Buffer.setPointer(B);
-  Buffer.setInt(DoNotFree? DoNotFreeFlag : 0);
+  Buffer.setInt((B && DoNotFree) ? DoNotFreeFlag : 0);
 }
 
 llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
@@ -183,48 +183,22 @@ unsigned LineTableInfo::getLineTableFilenameID(StringRef Name) {
   return IterBool.first->second;
 }
 
-/// AddLineNote - Add a line note to the line table that indicates that there
-/// is a \#line at the specified FID/Offset location which changes the presumed
-/// location to LineNo/FilenameID.
-void LineTableInfo::AddLineNote(FileID FID, unsigned Offset,
-                                unsigned LineNo, int FilenameID) {
-  std::vector &Entries = LineEntries[FID];
-
-  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
-         "Adding line entries out of order!");
-
-  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
-  unsigned IncludeOffset = 0;
-
-  if (!Entries.empty()) {
-    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
-    // that we are still in "foo.h".
-    if (FilenameID == -1)
-      FilenameID = Entries.back().FilenameID;
-
-    // If we are after a line marker that switched us to system header mode, or
-    // that set #include information, preserve it.
-    Kind = Entries.back().FileKind;
-    IncludeOffset = Entries.back().IncludeOffset;
-  }
-
-  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
-                                   IncludeOffset));
-}
-
-/// AddLineNote This is the same as the previous version of AddLineNote, but is
-/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
-/// presumed \#include stack.  If it is 1, this is a file entry, if it is 2 then
-/// this is a file exit.  FileKind specifies whether this is a system header or
-/// extern C system header.
-void LineTableInfo::AddLineNote(FileID FID, unsigned Offset,
-                                unsigned LineNo, int FilenameID,
-                                unsigned EntryExit,
+/// Add a line note to the line table that indicates that there is a \#line or
+/// GNU line marker at the specified FID/Offset location which changes the
+/// presumed location to LineNo/FilenameID. If EntryExit is 0, then this doesn't
+/// change the presumed \#include stack.  If it is 1, this is a file entry, if
+/// it is 2 then this is a file exit. FileKind specifies whether this is a
+/// system header or extern C system header.
+void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, unsigned LineNo,
+                                int FilenameID, unsigned EntryExit,
                                 SrcMgr::CharacteristicKind FileKind) {
-  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
-
   std::vector &Entries = LineEntries[FID];
 
+  // An unspecified FilenameID means use the last filename if available, or the
+  // main source file otherwise.
+  if (FilenameID == -1 && !Entries.empty())
+    FilenameID = Entries.back().FilenameID;
+
   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
          "Adding line entries out of order!");
 
@@ -281,47 +255,20 @@ unsigned SourceManager::getLineTableFilenameID(StringRef Name) {
   return getLineTable().getLineTableFilenameID(Name);
 }
 
-
 /// AddLineNote - Add a line note to the line table for the FileID and offset
 /// specified by Loc.  If FilenameID is -1, it is considered to be
 /// unspecified.
-void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
-                                int FilenameID) {
-  std::pair LocInfo = getDecomposedExpansionLoc(Loc);
-
-  bool Invalid = false;
-  const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid);
-  if (!Entry.isFile() || Invalid)
-    return;
-  
-  const SrcMgr::FileInfo &FileInfo = Entry.getFile();
-
-  // Remember that this file has #line directives now if it doesn't already.
-  const_cast(FileInfo).setHasLineDirectives();
-
-  getLineTable().AddLineNote(LocInfo.first, LocInfo.second, LineNo, FilenameID);
-}
-
-/// AddLineNote - Add a GNU line marker to the line table.
 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
                                 int FilenameID, bool IsFileEntry,
-                                bool IsFileExit, bool IsSystemHeader,
-                                bool IsExternCHeader) {
-  // If there is no filename and no flags, this is treated just like a #line,
-  // which does not change the flags of the previous line marker.
-  if (FilenameID == -1) {
-    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
-           "Can't set flags without setting the filename!");
-    return AddLineNote(Loc, LineNo, FilenameID);
-  }
-
+                                bool IsFileExit,
+                                SrcMgr::CharacteristicKind FileKind) {
   std::pair LocInfo = getDecomposedExpansionLoc(Loc);
 
   bool Invalid = false;
   const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid);
   if (!Entry.isFile() || Invalid)
     return;
-  
+
   const SrcMgr::FileInfo &FileInfo = Entry.getFile();
 
   // Remember that this file has #line directives now if it doesn't already.
@@ -329,14 +276,6 @@ void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
 
   (void) getLineTable();
 
-  SrcMgr::CharacteristicKind FileKind;
-  if (IsExternCHeader)
-    FileKind = SrcMgr::C_ExternCSystem;
-  else if (IsSystemHeader)
-    FileKind = SrcMgr::C_System;
-  else
-    FileKind = SrcMgr::C_User;
-
   unsigned EntryExit = 0;
   if (IsFileEntry)
     EntryExit = 1;
@@ -425,6 +364,34 @@ void SourceManager::invalidateCache(FileID FID) {
   getFileManager().invalidateCache(const_cast(Entry));
 }
 
+void SourceManager::initializeForReplay(const SourceManager &Old) {
+  assert(MainFileID.isInvalid() && "expected uninitialized SourceManager");
+
+  auto CloneContentCache = [&](const ContentCache *Cache) -> ContentCache * {
+    auto *Clone = new (ContentCacheAlloc.Allocate()) ContentCache;
+    Clone->OrigEntry = Cache->OrigEntry;
+    Clone->ContentsEntry = Cache->ContentsEntry;
+    Clone->BufferOverridden = Cache->BufferOverridden;
+    Clone->IsSystemFile = Cache->IsSystemFile;
+    Clone->IsTransient = Cache->IsTransient;
+    Clone->replaceBuffer(Cache->getRawBuffer(), /*DoNotFree*/true);
+    return Clone;
+  };
+
+  // Ensure all SLocEntries are loaded from the external source.
+  for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
+    if (!Old.SLocEntryLoaded[I])
+      Old.loadSLocEntry(I, nullptr);
+
+  // Inherit any content cache data from the old source manager.
+  for (auto &FileInfo : Old.FileInfos) {
+    SrcMgr::ContentCache *&Slot = FileInfos[FileInfo.first];
+    if (Slot)
+      continue;
+    Slot = CloneContentCache(FileInfo.second);
+  }
+}
+
 /// getOrCreateContentCache - Create or return a cached ContentCache for the
 /// specified file.
 const ContentCache *
@@ -2085,9 +2052,54 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
   if (LOffs.first.isInvalid() || ROffs.first.isInvalid())
     return LOffs.first.isInvalid() && !ROffs.first.isInvalid();
 
+  std::pair InSameTU = isInTheSameTranslationUnit(LOffs, ROffs);
+  if (InSameTU.first)
+    return InSameTU.second;
+
+  // If we arrived here, the location is either in a built-ins buffer or
+  // associated with global inline asm. PR5662 and PR22576 are examples.
+
+  StringRef LB = getBuffer(LOffs.first)->getBufferIdentifier();
+  StringRef RB = getBuffer(ROffs.first)->getBufferIdentifier();
+  bool LIsBuiltins = LB == "";
+  bool RIsBuiltins = RB == "";
+  // Sort built-in before non-built-in.
+  if (LIsBuiltins || RIsBuiltins) {
+    if (LIsBuiltins != RIsBuiltins)
+      return LIsBuiltins;
+    // Both are in built-in buffers, but from different files. We just claim that
+    // lower IDs come first.
+    return LOffs.first < ROffs.first;
+  }
+  bool LIsAsm = LB == "";
+  bool RIsAsm = RB == "";
+  // Sort assembler after built-ins, but before the rest.
+  if (LIsAsm || RIsAsm) {
+    if (LIsAsm != RIsAsm)
+      return RIsAsm;
+    assert(LOffs.first == ROffs.first);
+    return false;
+  }
+  bool LIsScratch = LB == "";
+  bool RIsScratch = RB == "";
+  // Sort scratch after inline asm, but before the rest.
+  if (LIsScratch || RIsScratch) {
+    if (LIsScratch != RIsScratch)
+      return LIsScratch;
+    return LOffs.second < ROffs.second;
+  }
+  //AXEL: Work around diags from include chains not rooted in main file.
+  //AXEL: llvm_unreachable("Unsortable locations found");
+  assert(0 && "Unsortable locations found");
+  return LOffs.first < ROffs.first;
+}
+
+std::pair SourceManager::isInTheSameTranslationUnit(
+    std::pair &LOffs,
+    std::pair &ROffs) const {
   // If the source locations are in the same file, just compare offsets.
   if (LOffs.first == ROffs.first)
-    return LOffs.second < ROffs.second;
+    return std::make_pair(true, LOffs.second < ROffs.second);
 
   // If we are comparing a source location with multiple locations in the same
   // file, we get a big win by caching the result.
@@ -2097,7 +2109,8 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
   // If we are comparing a source location with multiple locations in the same
   // file, we get a big win by caching the result.
   if (IsBeforeInTUCache.isCacheValid(LOffs.first, ROffs.first))
-    return IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second);
+    return std::make_pair(
+        true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
 
   // Okay, we missed in the cache, start updating the cache for this query.
   IsBeforeInTUCache.setQueryFIDs(LOffs.first, ROffs.first,
@@ -2127,47 +2140,12 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
   // locations within the common file and cache them.
   if (LOffs.first == ROffs.first) {
     IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second);
-    return IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second);
+    return std::make_pair(
+        true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
   }
-
-  // If we arrived here, the location is either in a built-ins buffer or
-  // associated with global inline asm. PR5662 and PR22576 are examples.
-
   // Clear the lookup cache, it depends on a common location.
   IsBeforeInTUCache.clear();
-  StringRef LB = getBuffer(LOffs.first)->getBufferIdentifier();
-  StringRef RB = getBuffer(ROffs.first)->getBufferIdentifier();
-  bool LIsBuiltins = LB == "";
-  bool RIsBuiltins = RB == "";
-  // Sort built-in before non-built-in.
-  if (LIsBuiltins || RIsBuiltins) {
-    if (LIsBuiltins != RIsBuiltins)
-      return LIsBuiltins;
-    // Both are in built-in buffers, but from different files. We just claim that
-    // lower IDs come first.
-    return LOffs.first < ROffs.first;
-  }
-  bool LIsAsm = LB == "";
-  bool RIsAsm = RB == "";
-  // Sort assembler after built-ins, but before the rest.
-  if (LIsAsm || RIsAsm) {
-    if (LIsAsm != RIsAsm)
-      return RIsAsm;
-    assert(LOffs.first == ROffs.first);
-    return false;
-  }
-  bool LIsScratch = LB == "";
-  bool RIsScratch = RB == "";
-  // Sort scratch after inline asm, but before the rest.
-  if (LIsScratch || RIsScratch) {
-    if (LIsScratch != RIsScratch)
-      return LIsScratch;
-    return LOffs.second < ROffs.second;
-  }
-  //AXEL: Work around diags from include chains not rooted in main file.
-  //AXEL: llvm_unreachable("Unsortable locations found");
-  assert(0 && "Unsortable locations found");
-  return LOffs.first < ROffs.first;
+  return std::make_pair(false, false);
 }
 
 void SourceManager::PrintStats() const {
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/TargetInfo.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/TargetInfo.cpp
index e19404dc54cba..4bcebadf458f6 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/TargetInfo.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/TargetInfo.cpp
@@ -143,9 +143,11 @@ const char *TargetInfo::getTypeConstantSuffix(IntType T) const {
   case UnsignedChar:
     if (getCharWidth() < getIntWidth())
       return "";
+    LLVM_FALLTHROUGH;
   case UnsignedShort:
     if (getShortWidth() < getIntWidth())
       return "";
+    LLVM_FALLTHROUGH;
   case UnsignedInt:      return "U";
   case UnsignedLong:     return "UL";
   case UnsignedLongLong: return "ULL";
@@ -505,6 +507,11 @@ bool TargetInfo::validateOutputConstraint(ConstraintInfo &Info) const {
     case '?': // Disparage slightly code.
     case '!': // Disparage severely.
     case '*': // Ignore for choosing register preferences.
+    case 'i': // Ignore i,n,E,F as output constraints (match from the other
+              // chars)
+    case 'n':
+    case 'E':
+    case 'F':
       break;  // Pass them.
     }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/Targets.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/Targets.cpp
index 92c561aa94134..5d75aa5a75288 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/Targets.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/Targets.cpp
@@ -111,6 +111,21 @@ class CloudABITargetInfo : public OSTargetInfo {
       : OSTargetInfo(Triple, Opts) {}
 };
 
+// Ananas target
+template
+class AnanasTargetInfo : public OSTargetInfo {
+protected:
+  void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
+                    MacroBuilder &Builder) const override {
+    // Ananas defines
+    Builder.defineMacro("__Ananas__");
+    Builder.defineMacro("__ELF__");
+  }
+public:
+  AnanasTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo(Triple, Opts) {}
+};
+
 static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
                              const llvm::Triple &Triple,
                              StringRef &PlatformName,
@@ -484,6 +499,10 @@ class LinuxTargetInfo : public OSTargetInfo {
     switch (Triple.getArch()) {
     default:
       break;
+    case llvm::Triple::mips:
+    case llvm::Triple::mipsel:
+    case llvm::Triple::mips64:
+    case llvm::Triple::mips64el:
     case llvm::Triple::ppc:
     case llvm::Triple::ppc64:
     case llvm::Triple::ppc64le:
@@ -552,8 +571,6 @@ class OpenBSDTargetInfo : public OSTargetInfo {
 public:
   OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : OSTargetInfo(Triple, Opts) {
-    this->TLSSupported = false;
-
       switch (Triple.getArch()) {
         case llvm::Triple::x86:
         case llvm::Triple::x86_64:
@@ -2034,25 +2051,45 @@ ArrayRef NVPTXTargetInfo::getGCCRegNames() const {
   return llvm::makeArrayRef(GCCRegNames);
 }
 
-static const LangAS::Map AMDGPUPrivateIsZeroMap = {
-    4,  // Default
-    1,  // opencl_global
-    3,  // opencl_local
-    2,  // opencl_constant
-    4,  // opencl_generic
-    1,  // cuda_device
-    2,  // cuda_constant
-    3   // cuda_shared
-};
-static const LangAS::Map AMDGPUGenericIsZeroMap = {
-    0,  // Default
-    1,  // opencl_global
-    3,  // opencl_local
-    2,  // opencl_constant
-    0,  // opencl_generic
-    1,  // cuda_device
-    2,  // cuda_constant
-    3   // cuda_shared
+static const LangAS::Map AMDGPUPrivIsZeroDefIsGenMap = {
+    4, // Default
+    1, // opencl_global
+    3, // opencl_local
+    2, // opencl_constant
+    4, // opencl_generic
+    1, // cuda_device
+    2, // cuda_constant
+    3  // cuda_shared
+};
+static const LangAS::Map AMDGPUGenIsZeroDefIsGenMap = {
+    0, // Default
+    1, // opencl_global
+    3, // opencl_local
+    2, // opencl_constant
+    0, // opencl_generic
+    1, // cuda_device
+    2, // cuda_constant
+    3  // cuda_shared
+};
+static const LangAS::Map AMDGPUPrivIsZeroDefIsPrivMap = {
+    0, // Default
+    1, // opencl_global
+    3, // opencl_local
+    2, // opencl_constant
+    4, // opencl_generic
+    1, // cuda_device
+    2, // cuda_constant
+    3  // cuda_shared
+};
+static const LangAS::Map AMDGPUGenIsZeroDefIsPrivMap = {
+    5, // Default
+    1, // opencl_global
+    3, // opencl_local
+    2, // opencl_constant
+    0, // opencl_generic
+    1, // cuda_device
+    2, // cuda_constant
+    3  // cuda_shared
 };
 
 // If you edit the description strings, make sure you update
@@ -2149,9 +2186,35 @@ class AMDGPUTargetInfo final : public TargetInfo {
                     : DataLayoutStringR600);
     assert(DataLayout->getAllocaAddrSpace() == AS.Private);
 
-    AddrSpaceMap = IsGenericZero ? &AMDGPUGenericIsZeroMap :
-        &AMDGPUPrivateIsZeroMap;
+    setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
+                       Triple.getEnvironment() == llvm::Triple::OpenCL ||
+                       Triple.getEnvironmentName() == "amdgizcl" ||
+                       !isAMDGCN(Triple));
     UseAddrSpaceMapMangling = true;
+
+    // Set pointer width and alignment for target address space 0.
+    PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
+    if (getMaxPointerWidth() == 64) {
+      LongWidth = LongAlign = 64;
+      SizeType = UnsignedLong;
+      PtrDiffType = SignedLong;
+      IntPtrType = SignedLong;
+    }
+  }
+
+  void setAddressSpaceMap(bool DefaultIsPrivate) {
+    if (isGenericZero(getTriple())) {
+      AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
+                                      : &AMDGPUGenIsZeroDefIsGenMap;
+    } else {
+      AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
+                                      : &AMDGPUPrivIsZeroDefIsGenMap;
+    }
+  }
+
+  void adjust(LangOptions &Opts) override {
+    TargetInfo::adjust(Opts);
+    setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
   }
 
   uint64_t getPointerWidthV(unsigned AddrSpace) const override {
@@ -2164,6 +2227,10 @@ class AMDGPUTargetInfo final : public TargetInfo {
     return 64;
   }
 
+  uint64_t getPointerAlignV(unsigned AddrSpace) const override {
+    return getPointerWidthV(AddrSpace);
+  }
+
   uint64_t getMaxPointerWidth() const override {
     return getTriple().getArch() == llvm::Triple::amdgcn ? 64 : 32;
   }
@@ -2339,14 +2406,13 @@ class AMDGPUTargetInfo final : public TargetInfo {
     return LangAS::opencl_constant;
   }
 
-  /// \returns Target specific vtbl ptr address space.
-  unsigned getVtblPtrAddressSpace() const override {
-    // \todo: We currently have address spaces defined in AMDGPU Backend. It
-    // would be nice if we could use it here instead of using bare numbers (same
-    // applies to getDWARFAddressSpace).
-    return 2; // constant.
+  llvm::Optional getConstantAddressSpace() const override {
+    return LangAS::FirstTargetAddressSpace + AS.Constant;
   }
 
+  /// \returns Target specific vtbl ptr address space.
+  unsigned getVtblPtrAddressSpace() const override { return AS.Constant; }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return the
   /// corresponding target specific DWARF address space.
@@ -2595,6 +2661,7 @@ class X86TargetInfo : public TargetInfo {
   bool HasFMA = false;
   bool HasF16C = false;
   bool HasAVX512CD = false;
+  bool HasAVX512VPOPCNTDQ = false;
   bool HasAVX512ER = false;
   bool HasAVX512PF = false;
   bool HasAVX512DQ = false;
@@ -2661,7 +2728,7 @@ class X86TargetInfo : public TargetInfo {
     CK_C3_2,
 
     /// This enumerator is a bit odd, as GCC no longer accepts -march=yonah.
-    /// Clang however has some logic to suport this.
+    /// Clang however has some logic to support this.
     // FIXME: Warn, deprecate, and potentially remove this.
     CK_Yonah,
     //@}
@@ -2692,6 +2759,7 @@ class X86TargetInfo : public TargetInfo {
     //@{
     CK_Bonnell,
     CK_Silvermont,
+    CK_Goldmont,
     //@}
 
     /// \name Nehalem
@@ -2833,6 +2901,7 @@ class X86TargetInfo : public TargetInfo {
         .Case("atom", CK_Bonnell) // Legacy name.
         .Case("silvermont", CK_Silvermont)
         .Case("slm", CK_Silvermont) // Legacy name.
+        .Case("goldmont", CK_Goldmont)
         .Case("nehalem", CK_Nehalem)
         .Case("corei7", CK_Nehalem) // Legacy name.
         .Case("westmere", CK_Westmere)
@@ -3048,6 +3117,7 @@ class X86TargetInfo : public TargetInfo {
     case CK_Penryn:
     case CK_Bonnell:
     case CK_Silvermont:
+    case CK_Goldmont:
     case CK_Nehalem:
     case CK_Westmere:
     case CK_SandyBridge:
@@ -3093,6 +3163,7 @@ class X86TargetInfo : public TargetInfo {
     case CC_Swift:
     case CC_X86Pascal:
     case CC_IntelOclBicc:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -3175,7 +3246,6 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
   case CK_Core2:
-  case CK_Bonnell:
     setFeatureEnabledImpl(Features, "ssse3", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
@@ -3230,7 +3300,6 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     LLVM_FALLTHROUGH;
   case CK_Westmere:
-  case CK_Silvermont:
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     LLVM_FALLTHROUGH;
@@ -3239,6 +3308,28 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
+  case CK_Goldmont:
+    setFeatureEnabledImpl(Features, "sha", true);
+    setFeatureEnabledImpl(Features, "rdrnd", true);
+    setFeatureEnabledImpl(Features, "rdseed", true);
+    setFeatureEnabledImpl(Features, "xsave", true);
+    setFeatureEnabledImpl(Features, "xsaveopt", true);
+    setFeatureEnabledImpl(Features, "xsavec", true);
+    setFeatureEnabledImpl(Features, "xsaves", true);
+    setFeatureEnabledImpl(Features, "clflushopt", true);
+    setFeatureEnabledImpl(Features, "mpx", true);
+    LLVM_FALLTHROUGH;
+  case CK_Silvermont:
+    setFeatureEnabledImpl(Features, "aes", true);
+    setFeatureEnabledImpl(Features, "pclmul", true);
+    setFeatureEnabledImpl(Features, "sse4.2", true);
+    LLVM_FALLTHROUGH;
+  case CK_Bonnell:
+    setFeatureEnabledImpl(Features, "movbe", true);
+    setFeatureEnabledImpl(Features, "ssse3", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
+    setFeatureEnabledImpl(Features, "cx16", true);
+    break;
   case CK_KNL:
     setFeatureEnabledImpl(Features, "avx512f", true);
     setFeatureEnabledImpl(Features, "avx512cd", true);
@@ -3308,6 +3399,7 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "bmi", true);
     setFeatureEnabledImpl(Features, "f16c", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
+    setFeatureEnabledImpl(Features, "movbe", true);
     LLVM_FALLTHROUGH;
   case CK_BTVER1:
     setFeatureEnabledImpl(Features, "ssse3", true);
@@ -3411,23 +3503,32 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features,
     switch (Level) {
     case AVX512F:
       Features["avx512f"] = true;
+      LLVM_FALLTHROUGH;
     case AVX2:
       Features["avx2"] = true;
+      LLVM_FALLTHROUGH;
     case AVX:
       Features["avx"] = true;
       Features["xsave"] = true;
+      LLVM_FALLTHROUGH;
     case SSE42:
       Features["sse4.2"] = true;
+      LLVM_FALLTHROUGH;
     case SSE41:
       Features["sse4.1"] = true;
+      LLVM_FALLTHROUGH;
     case SSSE3:
       Features["ssse3"] = true;
+      LLVM_FALLTHROUGH;
     case SSE3:
       Features["sse3"] = true;
+      LLVM_FALLTHROUGH;
     case SSE2:
       Features["sse2"] = true;
+      LLVM_FALLTHROUGH;
     case SSE1:
       Features["sse"] = true;
+      LLVM_FALLTHROUGH;
     case NoSSE:
       break;
     }
@@ -3438,29 +3539,38 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features,
   case NoSSE:
   case SSE1:
     Features["sse"] = false;
+    LLVM_FALLTHROUGH;
   case SSE2:
     Features["sse2"] = Features["pclmul"] = Features["aes"] =
       Features["sha"] = false;
+    LLVM_FALLTHROUGH;
   case SSE3:
     Features["sse3"] = false;
     setXOPLevel(Features, NoXOP, false);
+    LLVM_FALLTHROUGH;
   case SSSE3:
     Features["ssse3"] = false;
+    LLVM_FALLTHROUGH;
   case SSE41:
     Features["sse4.1"] = false;
+    LLVM_FALLTHROUGH;
   case SSE42:
     Features["sse4.2"] = false;
+    LLVM_FALLTHROUGH;
   case AVX:
     Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
       Features["xsaveopt"] = false;
     setXOPLevel(Features, FMA4, false);
+    LLVM_FALLTHROUGH;
   case AVX2:
     Features["avx2"] = false;
+    LLVM_FALLTHROUGH;
   case AVX512F:
     Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
-      Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
-      Features["avx512vl"] = Features["avx512vbmi"] =
-      Features["avx512ifma"] = false;
+        Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
+            Features["avx512vl"] = Features["avx512vbmi"] =
+                Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
+    break;
   }
 }
 
@@ -3470,10 +3580,13 @@ void X86TargetInfo::setMMXLevel(llvm::StringMap &Features,
     switch (Level) {
     case AMD3DNowAthlon:
       Features["3dnowa"] = true;
+      LLVM_FALLTHROUGH;
     case AMD3DNow:
       Features["3dnow"] = true;
+      LLVM_FALLTHROUGH;
     case MMX:
       Features["mmx"] = true;
+      LLVM_FALLTHROUGH;
     case NoMMX3DNow:
       break;
     }
@@ -3484,10 +3597,13 @@ void X86TargetInfo::setMMXLevel(llvm::StringMap &Features,
   case NoMMX3DNow:
   case MMX:
     Features["mmx"] = false;
+    LLVM_FALLTHROUGH;
   case AMD3DNow:
     Features["3dnow"] = false;
+    LLVM_FALLTHROUGH;
   case AMD3DNowAthlon:
     Features["3dnowa"] = false;
+    break;
   }
 }
 
@@ -3497,12 +3613,15 @@ void X86TargetInfo::setXOPLevel(llvm::StringMap &Features, XOPEnum Level,
     switch (Level) {
     case XOP:
       Features["xop"] = true;
+      LLVM_FALLTHROUGH;
     case FMA4:
       Features["fma4"] = true;
       setSSELevel(Features, AVX, true);
+      LLVM_FALLTHROUGH;
     case SSE4A:
       Features["sse4a"] = true;
       setSSELevel(Features, SSE3, true);
+      LLVM_FALLTHROUGH;
     case NoXOP:
       break;
     }
@@ -3513,10 +3632,13 @@ void X86TargetInfo::setXOPLevel(llvm::StringMap &Features, XOPEnum Level,
   case NoXOP:
   case SSE4A:
     Features["sse4a"] = false;
+    LLVM_FALLTHROUGH;
   case FMA4:
     Features["fma4"] = false;
+    LLVM_FALLTHROUGH;
   case XOP:
     Features["xop"] = false;
+    break;
   }
 }
 
@@ -3560,7 +3682,8 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap &Features,
     setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
              Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
-             Name == "avx512vbmi" || Name == "avx512ifma") {
+             Name == "avx512vbmi" || Name == "avx512ifma" ||
+             Name == "avx512vpopcntdq") {
     if (Enabled)
       setSSELevel(Features, AVX512F, Enabled);
     // Enable BWI instruction if VBMI is being enabled.
@@ -3644,6 +3767,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features,
       HasF16C = true;
     } else if (Feature == "+avx512cd") {
       HasAVX512CD = true;
+    } else if (Feature == "+avx512vpopcntdq") {
+      HasAVX512VPOPCNTDQ = true;
     } else if (Feature == "+avx512er") {
       HasAVX512ER = true;
     } else if (Feature == "+avx512pf") {
@@ -3772,7 +3897,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_PentiumMMX:
     Builder.defineMacro("__pentium_mmx__");
     Builder.defineMacro("__tune_pentium_mmx__");
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case CK_i586:
   case CK_Pentium:
     defineCPUMacros(Builder, "i586");
@@ -3782,15 +3907,15 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_Pentium3M:
   case CK_PentiumM:
     Builder.defineMacro("__tune_pentium3__");
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case CK_Pentium2:
   case CK_C3_2:
     Builder.defineMacro("__tune_pentium2__");
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case CK_PentiumPro:
     Builder.defineMacro("__tune_i686__");
     Builder.defineMacro("__tune_pentiumpro__");
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case CK_i686:
     Builder.defineMacro("__i686");
     Builder.defineMacro("__i686__");
@@ -3817,6 +3942,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_Silvermont:
     defineCPUMacros(Builder, "slm");
     break;
+  case CK_Goldmont:
+    defineCPUMacros(Builder, "goldmont");
+    break;
   case CK_Nehalem:
   case CK_Westmere:
   case CK_SandyBridge:
@@ -3843,7 +3971,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_K6_2:
     Builder.defineMacro("__k6_2__");
     Builder.defineMacro("__tune_k6_2__");
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case CK_K6_3:
     if (CPU != CK_K6_2) {  // In case of fallthrough
       // FIXME: GCC may be enabling these in cases where some other k6
@@ -3852,7 +3980,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
       Builder.defineMacro("__k6_3__");
       Builder.defineMacro("__tune_k6_3__");
     }
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case CK_K6:
     defineCPUMacros(Builder, "k6");
     break;
@@ -3962,10 +4090,13 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   switch (XOPLevel) {
   case XOP:
     Builder.defineMacro("__XOP__");
+    LLVM_FALLTHROUGH;
   case FMA4:
     Builder.defineMacro("__FMA4__");
+    LLVM_FALLTHROUGH;
   case SSE4A:
     Builder.defineMacro("__SSE4A__");
+    LLVM_FALLTHROUGH;
   case NoXOP:
     break;
   }
@@ -3978,6 +4109,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
 
   if (HasAVX512CD)
     Builder.defineMacro("__AVX512CD__");
+  if (HasAVX512VPOPCNTDQ)
+    Builder.defineMacro("__AVX512VPOPCNTDQ__");
   if (HasAVX512ER)
     Builder.defineMacro("__AVX512ER__");
   if (HasAVX512PF)
@@ -4027,24 +4160,33 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   switch (SSELevel) {
   case AVX512F:
     Builder.defineMacro("__AVX512F__");
+    LLVM_FALLTHROUGH;
   case AVX2:
     Builder.defineMacro("__AVX2__");
+    LLVM_FALLTHROUGH;
   case AVX:
     Builder.defineMacro("__AVX__");
+    LLVM_FALLTHROUGH;
   case SSE42:
     Builder.defineMacro("__SSE4_2__");
+    LLVM_FALLTHROUGH;
   case SSE41:
     Builder.defineMacro("__SSE4_1__");
+    LLVM_FALLTHROUGH;
   case SSSE3:
     Builder.defineMacro("__SSSE3__");
+    LLVM_FALLTHROUGH;
   case SSE3:
     Builder.defineMacro("__SSE3__");
+    LLVM_FALLTHROUGH;
   case SSE2:
     Builder.defineMacro("__SSE2__");
     Builder.defineMacro("__SSE2_MATH__");  // -mfp-math=sse always implied.
+    LLVM_FALLTHROUGH;
   case SSE1:
     Builder.defineMacro("__SSE__");
     Builder.defineMacro("__SSE_MATH__");   // -mfp-math=sse always implied.
+    LLVM_FALLTHROUGH;
   case NoSSE:
     break;
   }
@@ -4066,6 +4208,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
       break;
     default:
       Builder.defineMacro("_M_IX86_FP", Twine(0));
+      break;
     }
   }
 
@@ -4073,10 +4216,13 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   switch (MMX3DNowLevel) {
   case AMD3DNowAthlon:
     Builder.defineMacro("__3dNOW_A__");
+    LLVM_FALLTHROUGH;
   case AMD3DNow:
     Builder.defineMacro("__3dNOW__");
+    LLVM_FALLTHROUGH;
   case MMX:
     Builder.defineMacro("__MMX__");
+    LLVM_FALLTHROUGH;
   case NoMMX3DNow:
     break;
   }
@@ -4088,6 +4234,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   }
   if (CPU >= CK_i586)
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+
+  if (HasFloat128)
+    Builder.defineMacro("__SIZEOF_FLOAT128__", "16");
 }
 
 bool X86TargetInfo::hasFeature(StringRef Feature) const {
@@ -4097,6 +4246,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx2", SSELevel >= AVX2)
       .Case("avx512f", SSELevel >= AVX512F)
       .Case("avx512cd", HasAVX512CD)
+      .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
       .Case("avx512er", HasAVX512ER)
       .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
@@ -4182,6 +4332,7 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
       .Case("avx512bw", true)
       .Case("avx512dq", true)
       .Case("avx512cd", true)
+      .Case("avx512vpopcntdq", true)
       .Case("avx512er", true)
       .Case("avx512pf", true)
       .Case("avx512vbmi", true)
@@ -4565,7 +4716,9 @@ static void addMinGWDefines(const LangOptions &Opts, MacroBuilder &Builder) {
 class MinGWX86_32TargetInfo : public WindowsX86_32TargetInfo {
 public:
   MinGWX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
-      : WindowsX86_32TargetInfo(Triple, Opts) {}
+      : WindowsX86_32TargetInfo(Triple, Opts) {
+    HasFloat128 = true;
+  }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     WindowsX86_32TargetInfo::getTargetDefines(Opts, Builder);
@@ -4746,10 +4899,11 @@ class X86_64TargetInfo : public X86TargetInfo {
     case CC_Swift:
     case CC_X86VectorCall:
     case CC_IntelOclBicc:
-    case CC_X86_64Win64:
+    case CC_Win64:
     case CC_PreserveMost:
     case CC_PreserveAll:
     case CC_X86RegCall:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -4823,6 +4977,7 @@ class WindowsX86_64TargetInfo : public WindowsTargetInfo {
     case CC_X86_64SysV:
     case CC_Swift:
     case CC_X86RegCall:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -4857,6 +5012,7 @@ class MinGWX86_64TargetInfo : public WindowsX86_64TargetInfo {
     // with x86 FP ops. Weird.
     LongDoubleWidth = LongDoubleAlign = 128;
     LongDoubleFormat = &llvm::APFloat::x87DoubleExtended();
+    HasFloat128 = true;
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -5297,6 +5453,10 @@ class ARMTargetInfo : public TargetInfo {
     // ARM has atomics up to 8 bytes
     setAtomic();
 
+    // Maximum alignment for ARM NEON data types should be 64-bits (AAPCS)
+    if (IsAAPCS && (Triple.getEnvironment() != llvm::Triple::Android))
+       MaxVectorAlign = 64;
+
     // Do force alignment of members that follow zero length bitfields.  If
     // the alignment of the zero-length bitfield is greater than the member
     // that follows it, `bar', `bar' will be aligned as the  type of the
@@ -5306,7 +5466,7 @@ class ARMTargetInfo : public TargetInfo {
     if (Triple.getOS() == llvm::Triple::Linux ||
         Triple.getOS() == llvm::Triple::UnknownOS)
       this->MCountName =
-          Opts.EABIVersion == "gnu" ? "\01__gnu_mcount_nc" : "\01mcount";
+          Opts.EABIVersion == llvm::EABI::GNU ? "\01__gnu_mcount_nc" : "\01mcount";
   }
 
   StringRef getABI() const override { return ABI; }
@@ -5350,7 +5510,24 @@ class ARMTargetInfo : public TargetInfo {
       if (Feature[0] == '+')
         Features[Feature.drop_front(1)] = true;
 
-    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
+    // Enable or disable thumb-mode explicitly per function to enable mixed
+    // ARM and Thumb code generation.
+    if (isThumb())
+      Features["thumb-mode"] = true;
+    else
+      Features["thumb-mode"] = false;
+
+    // Convert user-provided arm and thumb GNU target attributes to
+    // [-|+]thumb-mode target features respectively.
+    std::vector UpdatedFeaturesVec(FeaturesVec);
+    for (auto &Feature : UpdatedFeaturesVec) {
+      if (Feature.compare("+arm") == 0)
+        Feature = "-thumb-mode";
+      else if (Feature.compare("+thumb") == 0)
+        Feature = "+thumb-mode";
+    }
+
+    return TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec);
   }
 
   bool handleTargetFeatures(std::vector &Features,
@@ -5470,6 +5647,17 @@ class ARMTargetInfo : public TargetInfo {
 
   bool setFPMath(StringRef Name) override;
 
+  void getTargetDefinesARMV81A(const LangOptions &Opts,
+                               MacroBuilder &Builder) const {
+    Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
+  }
+
+  void getTargetDefinesARMV82A(const LangOptions &Opts,
+                               MacroBuilder &Builder) const {
+    // Also include the ARMv8.1-A defines
+    getTargetDefinesARMV81A(Opts, Builder);
+  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     // Target identification.
@@ -5668,8 +5856,15 @@ class ARMTargetInfo : public TargetInfo {
     if (Opts.UnsafeFPMath)
       Builder.defineMacro("__ARM_FP_FAST", "1");
 
-    if (ArchKind == llvm::ARM::AK_ARMV8_1A)
-      Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
+    switch(ArchKind) {
+    default: break;
+    case llvm::ARM::AK_ARMV8_1A:
+      getTargetDefinesARMV81A(Opts, Builder);
+      break;
+    case llvm::ARM::AK_ARMV8_2A:
+      getTargetDefinesARMV82A(Opts, Builder);
+      break;
+    }
   }
 
   ArrayRef getTargetBuiltins() const override {
@@ -5775,6 +5970,7 @@ class ARMTargetInfo : public TargetInfo {
     case CC_AAPCS:
     case CC_AAPCS_VFP:
     case CC_Swift:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -5934,6 +6130,7 @@ class WindowsARMTargetInfo : public WindowsTargetInfo {
     case CC_X86VectorCall:
       return CCCR_Ignore;
     case CC_C:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -6053,14 +6250,16 @@ class AArch64TargetInfo : public TargetInfo {
 
   enum FPUModeEnum {
     FPUMode,
-    NeonMode
+    NeonMode = (1 << 0),
+    SveMode = (1 << 1)
   };
 
   unsigned FPU;
   unsigned CRC;
   unsigned Crypto;
   unsigned Unaligned;
-  unsigned V8_1A;
+  unsigned HasFullFP16;
+  llvm::AArch64::ArchKind ArchKind;
 
   static const Builtin::Info BuiltinInfo[];
 
@@ -6091,6 +6290,9 @@ class AArch64TargetInfo : public TargetInfo {
     LongDoubleWidth = LongDoubleAlign = SuitableAlign = 128;
     LongDoubleFormat = &llvm::APFloat::IEEEquad();
 
+    // Make __builtin_ms_va_list available.
+    HasBuiltinMSVaList = true;
+
     // {} in inline assembly are neon specifiers, not assembly variant
     // specifiers.
     NoAsmVariants = true;
@@ -6108,7 +6310,7 @@ class AArch64TargetInfo : public TargetInfo {
     if (Triple.getOS() == llvm::Triple::Linux)
       this->MCountName = "\01_mcount";
     else if (Triple.getOS() == llvm::Triple::UnknownOS)
-      this->MCountName = Opts.EABIVersion == "gnu" ? "\01_mcount" : "mcount";
+      this->MCountName = Opts.EABIVersion == llvm::EABI::GNU ? "\01_mcount" : "mcount";
   }
 
   StringRef getABI() const override { return ABI; }
@@ -6126,6 +6328,17 @@ class AArch64TargetInfo : public TargetInfo {
            static_cast(llvm::AArch64::ArchKind::AK_INVALID);
   }
 
+  void getTargetDefinesARMV81A(const LangOptions &Opts,
+                        MacroBuilder &Builder) const {
+    Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
+  }
+
+  void getTargetDefinesARMV82A(const LangOptions &Opts,
+                        MacroBuilder &Builder) const {
+    // Also include the ARMv8.1 defines
+    getTargetDefinesARMV81A(Opts, Builder);
+  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     // Target identification.
@@ -6175,12 +6388,15 @@ class AArch64TargetInfo : public TargetInfo {
     Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM",
                         Opts.ShortEnums ? "1" : "4");
 
-    if (FPU == NeonMode) {
+    if (FPU & NeonMode) {
       Builder.defineMacro("__ARM_NEON", "1");
       // 64-bit NEON supports half, single and double precision operations.
       Builder.defineMacro("__ARM_NEON_FP", "0xE");
     }
 
+    if (FPU & SveMode)
+      Builder.defineMacro("__ARM_FEATURE_SVE", "1");
+
     if (CRC)
       Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
 
@@ -6190,8 +6406,15 @@ class AArch64TargetInfo : public TargetInfo {
     if (Unaligned)
       Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1");
 
-    if (V8_1A)
-      Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
+    switch(ArchKind) {
+    default: break;
+    case llvm::AArch64::ArchKind::AK_ARMV8_1A:
+      getTargetDefinesARMV81A(Opts, Builder);
+      break;
+    case llvm::AArch64::ArchKind::AK_ARMV8_2A:
+      getTargetDefinesARMV82A(Opts, Builder);
+      break;
+    }
 
     // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
@@ -6209,7 +6432,8 @@ class AArch64TargetInfo : public TargetInfo {
     return Feature == "aarch64" ||
       Feature == "arm64" ||
       Feature == "arm" ||
-      (Feature == "neon" && FPU == NeonMode);
+      (Feature == "neon" && (FPU & NeonMode)) ||
+      (Feature == "sve" && (FPU & SveMode));
   }
 
   bool handleTargetFeatures(std::vector &Features,
@@ -6218,11 +6442,14 @@ class AArch64TargetInfo : public TargetInfo {
     CRC = 0;
     Crypto = 0;
     Unaligned = 1;
-    V8_1A = 0;
+    HasFullFP16 = 0;
+    ArchKind = llvm::AArch64::ArchKind::AK_ARMV8A;
 
     for (const auto &Feature : Features) {
       if (Feature == "+neon")
-        FPU = NeonMode;
+        FPU |= NeonMode;
+      if (Feature == "+sve")
+        FPU |= SveMode;
       if (Feature == "+crc")
         CRC = 1;
       if (Feature == "+crypto")
@@ -6230,7 +6457,11 @@ class AArch64TargetInfo : public TargetInfo {
       if (Feature == "+strict-align")
         Unaligned = 0;
       if (Feature == "+v8.1a")
-        V8_1A = 1;
+        ArchKind = llvm::AArch64::ArchKind::AK_ARMV8_1A;
+      if (Feature == "+v8.2a")
+        ArchKind = llvm::AArch64::ArchKind::AK_ARMV8_2A;
+      if (Feature == "+fullfp16")
+        HasFullFP16 = 1;
     }
 
     setDataLayout();
@@ -6244,6 +6475,8 @@ class AArch64TargetInfo : public TargetInfo {
     case CC_Swift:
     case CC_PreserveMost:
     case CC_PreserveAll:
+    case CC_OpenCLKernel:
+    case CC_Win64:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -6413,6 +6646,56 @@ class AArch64leTargetInfo : public AArch64TargetInfo {
   }
 };
 
+class MicrosoftARM64TargetInfo
+    : public WindowsTargetInfo {
+  const llvm::Triple Triple;
+
+public:
+  MicrosoftARM64TargetInfo(const llvm::Triple &Triple,
+                             const TargetOptions &Opts)
+      : WindowsTargetInfo(Triple, Opts), Triple(Triple) {
+
+    // This is an LLP64 platform.
+    // int:4, long:4, long long:8, long double:8.
+    WCharType = UnsignedShort;
+    IntWidth = IntAlign = 32;
+    LongWidth = LongAlign = 32;
+    DoubleAlign = LongLongAlign = 64;
+    LongDoubleWidth = LongDoubleAlign = 64;
+    LongDoubleFormat = &llvm::APFloat::IEEEdouble();
+    IntMaxType = SignedLongLong;
+    Int64Type = SignedLongLong;
+    SizeType = UnsignedLongLong;
+    PtrDiffType = SignedLongLong;
+    IntPtrType = SignedLongLong;
+
+    TheCXXABI.set(TargetCXXABI::Microsoft);
+  }
+
+  void setDataLayout() override {
+    resetDataLayout("e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128");
+  }
+
+  void getVisualStudioDefines(const LangOptions &Opts,
+                              MacroBuilder &Builder) const {
+    WindowsTargetInfo::getVisualStudioDefines(Opts,
+                                                                   Builder);
+    Builder.defineMacro("_WIN32", "1");
+    Builder.defineMacro("_WIN64", "1");
+    Builder.defineMacro("_M_ARM64", "1");
+  }
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    WindowsTargetInfo::getTargetDefines(Opts, Builder);
+    getVisualStudioDefines(Opts, Builder);
+  }
+
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::CharPtrBuiltinVaList;
+  }
+};
+
 class AArch64beTargetInfo : public AArch64TargetInfo {
   void setDataLayout() override {
     assert(!getTriple().isOSBinFormatMachO());
@@ -7169,13 +7452,14 @@ class SystemZTargetInfo : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char *const GCCRegNames[];
   std::string CPU;
+  int ISARevision;
   bool HasTransactionalExecution;
   bool HasVector;
 
 public:
   SystemZTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
-      : TargetInfo(Triple), CPU("z10"), HasTransactionalExecution(false),
-        HasVector(false) {
+      : TargetInfo(Triple), CPU("z10"), ISARevision(8),
+        HasTransactionalExecution(false), HasVector(false) {
     IntMaxType = SignedLong;
     Int64Type = SignedLong;
     TLSSupported = true;
@@ -7197,14 +7481,7 @@ class SystemZTargetInfo : public TargetInfo {
     Builder.defineMacro("__zarch__");
     Builder.defineMacro("__LONG_DOUBLE_128__");
 
-    const std::string ISARev = llvm::StringSwitch(CPU)
-                                   .Cases("arch8", "z10", "8")
-                                   .Cases("arch9", "z196", "9")
-                                   .Cases("arch10", "zEC12", "10")
-                                   .Cases("arch11", "z13", "11")
-                                   .Default("");
-    if (!ISARev.empty())
-      Builder.defineMacro("__ARCH__", ISARev);
+    Builder.defineMacro("__ARCH__", Twine(ISARevision));
 
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
@@ -7216,7 +7493,7 @@ class SystemZTargetInfo : public TargetInfo {
     if (HasVector)
       Builder.defineMacro("__VX__");
     if (Opts.ZVector)
-      Builder.defineMacro("__VEC__", "10301");
+      Builder.defineMacro("__VEC__", "10302");
   }
   ArrayRef getTargetBuiltins() const override {
     return llvm::makeArrayRef(BuiltinInfo,
@@ -7237,37 +7514,38 @@ class SystemZTargetInfo : public TargetInfo {
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::SystemZBuiltinVaList;
   }
+  int getISARevision(const StringRef &Name) const {
+    return llvm::StringSwitch(Name)
+      .Cases("arch8", "z10", 8)
+      .Cases("arch9", "z196", 9)
+      .Cases("arch10", "zEC12", 10)
+      .Cases("arch11", "z13", 11)
+      .Cases("arch12", "z14", 12)
+      .Default(-1);
+  }
   bool setCPU(const std::string &Name) override {
     CPU = Name;
-    bool CPUKnown = llvm::StringSwitch(Name)
-      .Case("z10", true)
-      .Case("arch8", true)
-      .Case("z196", true)
-      .Case("arch9", true)
-      .Case("zEC12", true)
-      .Case("arch10", true)
-      .Case("z13", true)
-      .Case("arch11", true)
-      .Default(false);
-
-    return CPUKnown;
+    ISARevision = getISARevision(CPU);
+    return ISARevision != -1;
   }
   bool
   initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags,
                  StringRef CPU,
                  const std::vector &FeaturesVec) const override {
-    if (CPU == "zEC12" || CPU == "arch10")
-      Features["transactional-execution"] = true;
-    if (CPU == "z13" || CPU == "arch11") {
+    int ISARevision = getISARevision(CPU);
+    if (ISARevision >= 10)
       Features["transactional-execution"] = true;
+    if (ISARevision >= 11)
       Features["vector"] = true;
-    }
+    if (ISARevision >= 12)
+      Features["vector-enhancements-1"] = true;
     return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
 
   bool handleTargetFeatures(std::vector &Features,
                             DiagnosticsEngine &Diags) override {
     HasTransactionalExecution = false;
+    HasVector = false;
     for (const auto &Feature : Features) {
       if (Feature == "+transactional-execution")
         HasTransactionalExecution = true;
@@ -7286,6 +7564,11 @@ class SystemZTargetInfo : public TargetInfo {
   bool hasFeature(StringRef Feature) const override {
     return llvm::StringSwitch(Feature)
         .Case("systemz", true)
+        .Case("arch8", ISARevision >= 8)
+        .Case("arch9", ISARevision >= 9)
+        .Case("arch10", ISARevision >= 10)
+        .Case("arch11", ISARevision >= 11)
+        .Case("arch12", ISARevision >= 12)
         .Case("htm", HasTransactionalExecution)
         .Case("vx", HasVector)
         .Default(false);
@@ -7295,6 +7578,7 @@ class SystemZTargetInfo : public TargetInfo {
     switch (CC) {
     case CC_C:
     case CC_Swift:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -7380,7 +7664,7 @@ class MSP430TargetInfo : public TargetInfo {
     IntPtrType = SignedInt;
     PtrDiffType = SignedInt;
     SigAtomicType = SignedLong;
-    resetDataLayout("e-m:e-p:16:16-i32:16:32-a:16-n8:16");
+    resetDataLayout("e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -7578,6 +7862,157 @@ class BPFTargetInfo : public TargetInfo {
   ArrayRef getGCCRegAliases() const override {
     return None;
   }
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    switch (CC) {
+      default:
+        return CCCR_Warning;
+      case CC_C:
+      case CC_OpenCLKernel:
+        return CCCR_OK;
+    }
+  }
+};
+
+class Nios2TargetInfo : public TargetInfo {
+  void setDataLayout() {
+    if (BigEndian)
+      resetDataLayout("E-p:32:32:32-i8:8:32-i16:16:32-n32");
+    else
+      resetDataLayout("e-p:32:32:32-i8:8:32-i16:16:32-n32");
+  }
+
+  static const Builtin::Info BuiltinInfo[];
+  std::string CPU;
+  std::string ABI;
+
+public:
+  Nios2TargetInfo(const llvm::Triple &triple, const TargetOptions &opts)
+      : TargetInfo(triple), CPU(opts.CPU), ABI(opts.ABI) {
+    SizeType = UnsignedInt;
+    PtrDiffType = SignedInt;
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
+    setDataLayout();
+  }
+
+  StringRef getABI() const override { return ABI; }
+  bool setABI(const std::string &Name) override {
+    if (Name == "o32" || Name == "eabi") {
+      ABI = Name;
+      return true;
+    }
+    return false;
+  }
+
+  bool setCPU(const std::string &Name) override {
+    if (Name == "nios2r1" || Name == "nios2r2") {
+      CPU = Name;
+      return true;
+    }
+    return false;
+  }
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    DefineStd(Builder, "nios2", Opts);
+    DefineStd(Builder, "NIOS2", Opts);
+
+    Builder.defineMacro("__nios2");
+    Builder.defineMacro("__NIOS2");
+    Builder.defineMacro("__nios2__");
+    Builder.defineMacro("__NIOS2__");
+  }
+
+  ArrayRef getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo, clang::Nios2::LastTSBuiltin -
+                                               Builtin::FirstTSBuiltin);
+  }
+
+  bool isFeatureSupportedByCPU(StringRef Feature, StringRef CPU) const {
+    const bool isR2 = CPU == "nios2r2";
+    return llvm::StringSwitch(Feature)
+        .Case("nios2r2mandatory", isR2)
+        .Case("nios2r2bmx", isR2)
+        .Case("nios2r2mpx", isR2)
+        .Case("nios2r2cdx", isR2)
+        .Default(false);
+  }
+
+  bool initFeatureMap(llvm::StringMap &Features,
+                      DiagnosticsEngine &Diags, StringRef CPU,
+                      const std::vector &FeatureVec) const override {
+    static const char *allFeatures[] = {
+      "nios2r2mandatory", "nios2r2bmx", "nios2r2mpx", "nios2r2cdx"
+    };
+    for (const char *feature : allFeatures) {
+        Features[feature] = isFeatureSupportedByCPU(feature, CPU);
+    }
+    return true;
+  }
+
+  bool hasFeature(StringRef Feature) const override {
+    return isFeatureSupportedByCPU(Feature, CPU);
+  }
+
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::VoidPtrBuiltinVaList;
+  }
+
+  ArrayRef getGCCRegNames() const override {
+    static const char *const GCCRegNames[] = {
+      // CPU register names
+      // Must match second column of GCCRegAliases
+      "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+      "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
+      "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30",
+      "r31",
+      // Floating point register names
+      "ctl0", "ctl1", "ctl2", "ctl3", "ctl4", "ctl5", "ctl6", "ctl7", "ctl8",
+      "ctl9", "ctl10", "ctl11", "ctl12", "ctl13", "ctl14", "ctl15"
+    };
+    return llvm::makeArrayRef(GCCRegNames);
+  }
+
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &Info) const override {
+    switch (*Name) {
+    default:
+      return false;
+
+    case 'r': // CPU registers.
+    case 'd': // Equivalent to "r" unless generating MIPS16 code.
+    case 'y': // Equivalent to "r", backwards compatibility only.
+    case 'f': // floating-point registers.
+    case 'c': // $25 for indirect jumps
+    case 'l': // lo register
+    case 'x': // hilo register pair
+      Info.setAllowsRegister();
+      return true;
+    }
+  }
+
+  const char *getClobbers() const override { return ""; }
+
+  ArrayRef getGCCRegAliases() const override {
+    static const TargetInfo::GCCRegAlias aliases[] = {
+        {{"zero"}, "r0"},      {{"at"}, "r1"},          {{"et"}, "r24"},
+        {{"bt"}, "r25"},       {{"gp"}, "r26"},         {{"sp"}, "r27"},
+        {{"fp"}, "r28"},       {{"ea"}, "r29"},         {{"ba"}, "r30"},
+        {{"ra"}, "r31"},       {{"status"}, "ctl0"},    {{"estatus"}, "ctl1"},
+        {{"bstatus"}, "ctl2"}, {{"ienable"}, "ctl3"},   {{"ipending"}, "ctl4"},
+        {{"cpuid"}, "ctl5"},   {{"exception"}, "ctl7"}, {{"pteaddr"}, "ctl8"},
+        {{"tlbacc"}, "ctl9"},  {{"tlbmisc"}, "ctl10"},  {{"badaddr"}, "ctl12"},
+        {{"config"}, "ctl13"}, {{"mpubase"}, "ctl14"},  {{"mpuacc"}, "ctl15"},
+    };
+    return llvm::makeArrayRef(aliases);
+  }
+};
+
+const Builtin::Info Nios2TargetInfo::BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS)                                               \
+  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
+  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
+#include "clang/Basic/BuiltinsNios2.def"
 };
 
 class MipsTargetInfo : public TargetInfo {
@@ -7615,6 +8050,7 @@ class MipsTargetInfo : public TargetInfo {
     NoDSP, DSP1, DSP2
   } DspRev;
   bool HasMSA;
+  bool DisableMadd4;
 
 protected:
   bool HasFP64;
@@ -7625,7 +8061,7 @@ class MipsTargetInfo : public TargetInfo {
       : TargetInfo(Triple), IsMips16(false), IsMicromips(false),
         IsNan2008(false), IsSingleFloat(false), IsNoABICalls(false),
         CanUseBSDABICalls(false), FloatABI(HardFloat), DspRev(NoDSP),
-        HasMSA(false), HasFP64(false) {
+        HasMSA(false), DisableMadd4(false), HasFP64(false) {
     TheCXXABI.set(TargetCXXABI::GenericMIPS);
 
     setABI((getTriple().getArch() == llvm::Triple::mips ||
@@ -7871,6 +8307,9 @@ class MipsTargetInfo : public TargetInfo {
     if (HasMSA)
       Builder.defineMacro("__mips_msa", Twine(1));
 
+    if (DisableMadd4)
+      Builder.defineMacro("__mips_no_madd4", Twine(1));
+
     Builder.defineMacro("_MIPS_SZPTR", Twine(getPointerWidth(0)));
     Builder.defineMacro("_MIPS_SZINT", Twine(getIntWidth()));
     Builder.defineMacro("_MIPS_SZLONG", Twine(getLongWidth()));
@@ -8033,6 +8472,8 @@ class MipsTargetInfo : public TargetInfo {
         DspRev = std::max(DspRev, DSP2);
       else if (Feature == "+msa")
         HasMSA = true;
+      else if (Feature == "+nomadd4")
+        DisableMadd4 = true;
       else if (Feature == "+fp64")
         HasFP64 = true;
       else if (Feature == "-fp64")
@@ -8365,7 +8806,7 @@ class WebAssembly32TargetInfo : public WebAssemblyTargetInfo {
   explicit WebAssembly32TargetInfo(const llvm::Triple &T,
                                    const TargetOptions &Opts)
       : WebAssemblyTargetInfo(T, Opts) {
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
     resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128");
   }
 
@@ -9111,6 +9552,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
       return new NetBSDTargetInfo(Triple, Opts);
     case llvm::Triple::OpenBSD:
       return new OpenBSDTargetInfo(Triple, Opts);
+    case llvm::Triple::Win32:
+      return new MicrosoftARM64TargetInfo(Triple, Opts);
     default:
       return new AArch64leTargetInfo(Triple, Opts);
     }
@@ -9200,6 +9643,9 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
   case llvm::Triple::msp430:
     return new MSP430TargetInfo(Triple, Opts);
 
+  case llvm::Triple::nios2:
+    return new LinuxTargetInfo(Triple, Opts);
+
   case llvm::Triple::mips:
     switch (os) {
     case llvm::Triple::Linux:
@@ -9392,6 +9838,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
       return new DarwinI386TargetInfo(Triple, Opts);
 
     switch (os) {
+    case llvm::Triple::Ananas:
+      return new AnanasTargetInfo(Triple, Opts);
     case llvm::Triple::CloudABI:
       return new CloudABITargetInfo(Triple, Opts);
     case llvm::Triple::Linux: {
@@ -9447,6 +9895,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
       return new DarwinX86_64TargetInfo(Triple, Opts);
 
     switch (os) {
+    case llvm::Triple::Ananas:
+      return new AnanasTargetInfo(Triple, Opts);
     case llvm::Triple::CloudABI:
       return new CloudABITargetInfo(Triple, Opts);
     case llvm::Triple::Linux: {
diff --git a/interpreter/llvm/src/tools/clang/lib/Basic/XRayLists.cpp b/interpreter/llvm/src/tools/clang/lib/Basic/XRayLists.cpp
index dccf3baa75e26..0a439c7af90df 100644
--- a/interpreter/llvm/src/tools/clang/lib/Basic/XRayLists.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Basic/XRayLists.cpp
@@ -26,6 +26,8 @@ XRayFunctionFilter::ImbueAttribute
 XRayFunctionFilter::shouldImbueFunction(StringRef FunctionName) const {
   // First apply the always instrument list, than if it isn't an "always" see
   // whether it's treated as a "never" instrument function.
+  if (AlwaysInstrument->inSection("fun", FunctionName, "arg1"))
+    return ImbueAttribute::ALWAYS_ARG1;
   if (AlwaysInstrument->inSection("fun", FunctionName))
     return ImbueAttribute::ALWAYS;
   if (NeverInstrument->inSection("fun", FunctionName))
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/ABIInfo.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/ABIInfo.h
index c0be60ef53bc5..575506da84d46 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/ABIInfo.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/ABIInfo.h
@@ -24,6 +24,7 @@ namespace llvm {
 
 namespace clang {
   class ASTContext;
+  class CodeGenOptions;
   class TargetInfo;
 
 namespace CodeGen {
@@ -68,6 +69,7 @@ namespace swiftcall {
     llvm::LLVMContext &getVMContext() const;
     const llvm::DataLayout &getDataLayout() const;
     const TargetInfo &getTarget() const;
+    const CodeGenOptions &getCodeGenOpts() const;
 
     /// Return the calling convention to use for system runtime
     /// functions.
@@ -149,7 +151,6 @@ namespace swiftcall {
       return info->supportsSwift();
     }
   };
-
 }  // end namespace CodeGen
 }  // end namespace clang
 
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/BackendUtil.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/BackendUtil.cpp
index 0f07169ac8b07..513896d986345 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/BackendUtil.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/BackendUtil.cpp
@@ -49,10 +49,12 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include 
 using namespace clang;
@@ -186,6 +188,7 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
   Opts.TracePC = CGOpts.SanitizeCoverageTracePC;
   Opts.TracePCGuard = CGOpts.SanitizeCoverageTracePCGuard;
   Opts.NoPrune = CGOpts.SanitizeCoverageNoPrune;
+  Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters;
   PM.add(createSanitizerCoverageModulePass(Opts));
 }
 
@@ -408,15 +411,11 @@ static void initTargetOptions(llvm::TargetOptions &Options,
 
   Options.UseInitArray = CodeGenOpts.UseInitArray;
   Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
-  Options.CompressDebugSections = CodeGenOpts.CompressDebugSections;
+  Options.CompressDebugSections = CodeGenOpts.getCompressDebugSections();
   Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations;
 
   // Set EABI version.
-  Options.EABIVersion = llvm::StringSwitch(TargetOpts.EABIVersion)
-                            .Case("4", llvm::EABI::EABI4)
-                            .Case("5", llvm::EABI::EABI5)
-                            .Case("gnu", llvm::EABI::GNU)
-                            .Default(llvm::EABI::Default);
+  Options.EABIVersion = TargetOpts.EABIVersion;
 
   if (LangOpts.SjLjExceptions)
     Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
@@ -489,7 +488,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
 
   PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
   PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
-  PMBuilder.BBVectorize = CodeGenOpts.VectorizeBB;
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
 
@@ -854,11 +852,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
   if (CodeGenOpts.hasProfileIRUse())
     PGOOpt.ProfileUseFile = CodeGenOpts.ProfileInstrumentUsePath;
 
+  if (!CodeGenOpts.SampleProfileFile.empty())
+    PGOOpt.SampleProfileFile = CodeGenOpts.SampleProfileFile;
+
   // Only pass a PGO options struct if -fprofile-generate or
   // -fprofile-use were passed on the cmdline.
   PassBuilder PB(TM.get(),
     (PGOOpt.RunProfileGen ||
-      !PGOOpt.ProfileUseFile.empty()) ?
+      !PGOOpt.ProfileUseFile.empty() ||
+      !PGOOpt.SampleProfileFile.empty()) ?
         Optional(PGOOpt) : None);
 
   LoopAnalysisManager LAM;
@@ -876,20 +878,34 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
   PB.registerLoopAnalyses(LAM);
   PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
 
-  ModulePassManager MPM;
+  ModulePassManager MPM(CodeGenOpts.DebugPassManager);
 
   if (!CodeGenOpts.DisableLLVMPasses) {
+    bool IsThinLTO = CodeGenOpts.EmitSummaryIndex;
+    bool IsLTO = CodeGenOpts.PrepareForLTO;
+
     if (CodeGenOpts.OptimizationLevel == 0) {
       // Build a minimal pipeline based on the semantics required by Clang,
       // which is just that always inlining occurs.
       MPM.addPass(AlwaysInlinerPass());
+      if (IsThinLTO)
+        MPM.addPass(NameAnonGlobalPass());
     } else {
-      // Otherwise, use the default pass pipeline. We also have to map our
-      // optimization levels into one of the distinct levels used to configure
-      // the pipeline.
+      // Map our optimization levels into one of the distinct levels used to
+      // configure the pipeline.
       PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts);
 
-      MPM = PB.buildPerModuleDefaultPipeline(Level);
+      if (IsThinLTO) {
+        MPM = PB.buildThinLTOPreLinkDefaultPipeline(
+            Level, CodeGenOpts.DebugPassManager);
+        MPM.addPass(NameAnonGlobalPass());
+      } else if (IsLTO) {
+        MPM = PB.buildLTOPreLinkDefaultPipeline(Level,
+                                                CodeGenOpts.DebugPassManager);
+      } else {
+        MPM = PB.buildPerModuleDefaultPipeline(Level,
+                                               CodeGenOpts.DebugPassManager);
+      }
     }
   }
 
@@ -897,6 +913,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
   // create that pass manager here and use it as needed below.
   legacy::PassManager CodeGenPasses;
   bool NeedCodeGen = false;
+  Optional ThinLinkOS;
 
   // Append any output we need to the pass manager.
   switch (Action) {
@@ -904,9 +921,24 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
     break;
 
   case Backend_EmitBC:
-    MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
-                                  CodeGenOpts.EmitSummaryIndex,
-                                  CodeGenOpts.EmitSummaryIndex));
+    if (CodeGenOpts.EmitSummaryIndex) {
+      if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
+        std::error_code EC;
+        ThinLinkOS.emplace(CodeGenOpts.ThinLinkBitcodeFile, EC,
+                           llvm::sys::fs::F_None);
+        if (EC) {
+          Diags.Report(diag::err_fe_unable_to_open_output)
+              << CodeGenOpts.ThinLinkBitcodeFile << EC.message();
+          return;
+        }
+      }
+      MPM.addPass(
+          ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &*ThinLinkOS : nullptr));
+    } else {
+      MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
+                                    CodeGenOpts.EmitSummaryIndex,
+                                    CodeGenOpts.EmitSummaryIndex));
+    }
     break;
 
   case Backend_EmitLL:
@@ -946,11 +978,11 @@ Expected clang::FindThinLTOModule(MemoryBufferRef MBRef) {
   if (!BMsOrErr)
     return BMsOrErr.takeError();
 
-  // The bitcode file may contain multiple modules, we want the one with a
-  // summary.
+  // The bitcode file may contain multiple modules, we want the one that is
+  // marked as being the ThinLTO module.
   for (BitcodeModule &BM : *BMsOrErr) {
-    Expected HasSummary = BM.hasSummary();
-    if (HasSummary && *HasSummary)
+    Expected LTOInfo = BM.getLTOInfo();
+    if (LTOInfo && LTOInfo->IsThinLTO)
       return BM;
   }
 
@@ -966,7 +998,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
                               std::unique_ptr OS,
                               std::string SampleProfile,
                               BackendAction Action) {
-  StringMap>
+  StringMap>
       ModuleToDefinedGVSummaries;
   CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
 
@@ -1029,6 +1061,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
   Conf.CGOptLevel = getCGOptLevel(CGOpts);
   initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
   Conf.SampleProfile = std::move(SampleProfile);
+  Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
   switch (Action) {
   case Backend_EmitNothing:
     Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) {
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGAtomic.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGAtomic.cpp
index 28e20b53d6562..a6e6fec206d57 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGAtomic.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGAtomic.cpp
@@ -95,7 +95,7 @@ namespace {
         BFI.StorageOffset += OffsetInChars;
         LVal = LValue::MakeBitfield(Address(Addr, lvalue.getAlignment()),
                                     BFI, lvalue.getType(),
-                                    lvalue.getAlignmentSource());
+                                    lvalue.getBaseInfo());
         LVal.setTBAAInfo(lvalue.getTBAAInfo());
         AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned);
         if (AtomicTy.isNull()) {
@@ -203,7 +203,7 @@ namespace {
         addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits());
 
       return LValue::MakeAddr(addr, getValueType(), CGF.getContext(),
-                              LVal.getAlignmentSource(), LVal.getTBAAInfo());
+                              LVal.getBaseInfo(), LVal.getTBAAInfo());
     }
 
     /// \brief Emits atomic load.
@@ -1181,15 +1181,15 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
   if (LVal.isBitField())
     return CGF.EmitLoadOfBitfieldLValue(
         LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(),
-                             LVal.getAlignmentSource()), loc);
+                             LVal.getBaseInfo()), loc);
   if (LVal.isVectorElt())
     return CGF.EmitLoadOfLValue(
         LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(),
-                              LVal.getAlignmentSource()), loc);
+                              LVal.getBaseInfo()), loc);
   assert(LVal.isExtVectorElt());
   return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
       addr, LVal.getExtVectorElts(), LVal.getType(),
-      LVal.getAlignmentSource()));
+      LVal.getBaseInfo()));
 }
 
 RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
@@ -1506,26 +1506,26 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal,
       UpdateLVal =
           LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
                                AtomicLVal.getType(),
-                               AtomicLVal.getAlignmentSource());
+                               AtomicLVal.getBaseInfo());
       DesiredLVal =
           LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
                                AtomicLVal.getType(),
-                               AtomicLVal.getAlignmentSource());
+                               AtomicLVal.getBaseInfo());
     } else if (AtomicLVal.isVectorElt()) {
       UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
                                          AtomicLVal.getType(),
-                                         AtomicLVal.getAlignmentSource());
+                                         AtomicLVal.getBaseInfo());
       DesiredLVal = LValue::MakeVectorElt(
           DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(),
-          AtomicLVal.getAlignmentSource());
+          AtomicLVal.getBaseInfo());
     } else {
       assert(AtomicLVal.isExtVectorElt());
       UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
                                             AtomicLVal.getType(),
-                                            AtomicLVal.getAlignmentSource());
+                                            AtomicLVal.getBaseInfo());
       DesiredLVal = LValue::MakeExtVectorElt(
           DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
-          AtomicLVal.getAlignmentSource());
+          AtomicLVal.getBaseInfo());
     }
     UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
     DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
@@ -1612,17 +1612,17 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics,
     DesiredLVal =
         LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
                              AtomicLVal.getType(),
-                             AtomicLVal.getAlignmentSource());
+                             AtomicLVal.getBaseInfo());
   } else if (AtomicLVal.isVectorElt()) {
     DesiredLVal =
         LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(),
                               AtomicLVal.getType(),
-                              AtomicLVal.getAlignmentSource());
+                              AtomicLVal.getBaseInfo());
   } else {
     assert(AtomicLVal.isExtVectorElt());
     DesiredLVal = LValue::MakeExtVectorElt(
         DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
-        AtomicLVal.getAlignmentSource());
+        AtomicLVal.getBaseInfo());
   }
   DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
   // Store new value in the corresponding memory area
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBlocks.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBlocks.cpp
index f1c20e9df1f30..1810489578798 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBlocks.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBlocks.cpp
@@ -736,9 +736,9 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   llvm::Constant *isa =
       (!CGM.getContext().getLangOpts().OpenCL)
           ? CGM.getNSConcreteStackBlock()
-          : CGM.getNullPointer(cast(
-                                   CGM.getNSConcreteStackBlock()->getType()),
-                               QualType(getContext().VoidPtrTy));
+          : CGM.getNullPointer(VoidPtrPtrTy,
+                               CGM.getContext().getPointerType(
+                                   QualType(CGM.getContext().VoidPtrTy)));
   isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy);
 
   // Build the block descriptor.
@@ -903,9 +903,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     } else {
       // Fake up a new variable so that EmitScalarInit doesn't think
       // we're referring to the variable in its own initializer.
-      ImplicitParamDecl blockFieldPseudoVar(getContext(), /*DC*/ nullptr,
-                                            SourceLocation(), /*name*/ nullptr,
-                                            type);
+      ImplicitParamDecl BlockFieldPseudoVar(getContext(), type,
+                                            ImplicitParamDecl::Other);
 
       // We use one of these or the other depending on whether the
       // reference is nested.
@@ -918,8 +917,9 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
       // FIXME: Pass a specific location for the expr init so that the store is
       // attributed to a reasonable location - otherwise it may be attributed to
       // locations of subexpressions in the initialization.
-      EmitExprAsInit(&l2r, &blockFieldPseudoVar,
-                     MakeAddrLValue(blockField, type, AlignmentSource::Decl),
+      LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+      EmitExprAsInit(&l2r, &BlockFieldPseudoVar,
+                     MakeAddrLValue(blockField, type, BaseInfo),
                      /*captured by init*/ false);
     }
 
@@ -1141,12 +1141,11 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
   auto fields = builder.beginStruct();
 
   // isa
-  fields.add(
-      (!CGM.getContext().getLangOpts().OpenCL)
-          ? CGM.getNSConcreteGlobalBlock()
-          : CGM.getNullPointer(cast(
-                                   CGM.getNSConcreteGlobalBlock()->getType()),
-                               QualType(CGM.getContext().VoidPtrTy)));
+  fields.add((!CGM.getContext().getLangOpts().OpenCL)
+                 ? CGM.getNSConcreteGlobalBlock()
+                 : CGM.getNullPointer(CGM.VoidPtrPtrTy,
+                                      CGM.getContext().getPointerType(QualType(
+                                          CGM.getContext().VoidPtrTy))));
 
   // __flags
   BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
@@ -1255,7 +1254,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
 
   // For OpenCL passed block pointer can be private AS local variable or
   // global AS program scope variable (for the case with and without captures).
-  // Generic AS is used therefore to be able to accomodate both private and
+  // Generic AS is used therefore to be able to accommodate both private and
   // generic AS in one implementation.
   if (getLangOpts().OpenCL)
     selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType(
@@ -1263,9 +1262,10 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
 
   IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor");
 
-  ImplicitParamDecl selfDecl(getContext(), const_cast(blockDecl),
-                             SourceLocation(), II, selfTy);
-  args.push_back(&selfDecl);
+  ImplicitParamDecl SelfDecl(getContext(), const_cast(blockDecl),
+                             SourceLocation(), II, selfTy,
+                             ImplicitParamDecl::ObjCSelf);
+  args.push_back(&SelfDecl);
 
   // Now add the rest of the parameters.
   args.append(blockDecl->param_begin(), blockDecl->param_end());
@@ -1498,12 +1498,12 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
   ASTContext &C = getContext();
 
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(getContext(), nullptr, SourceLocation(), nullptr,
-                            C.VoidPtrTy);
-  args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
-                            C.VoidPtrTy);
-  args.push_back(&srcDecl);
+  ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy,
+                            ImplicitParamDecl::Other);
+  args.push_back(&DstDecl);
+  ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
+                            ImplicitParamDecl::Other);
+  args.push_back(&SrcDecl);
 
   const CGFunctionInfo &FI =
     CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -1535,11 +1535,11 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
   auto AL = ApplyDebugLocation::CreateArtificial(*this);
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
-  Address src = GetAddrOfLocalVar(&srcDecl);
+  Address src = GetAddrOfLocalVar(&SrcDecl);
   src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign);
   src = Builder.CreateBitCast(src, structPtrTy, "block.source");
 
-  Address dst = GetAddrOfLocalVar(&dstDecl);
+  Address dst = GetAddrOfLocalVar(&DstDecl);
   dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign);
   dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest");
 
@@ -1675,9 +1675,9 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
   ASTContext &C = getContext();
 
   FunctionArgList args;
-  ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
-                            C.VoidPtrTy);
-  args.push_back(&srcDecl);
+  ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
+                            ImplicitParamDecl::Other);
+  args.push_back(&SrcDecl);
 
   const CGFunctionInfo &FI =
     CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -1708,7 +1708,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
-  Address src = GetAddrOfLocalVar(&srcDecl);
+  Address src = GetAddrOfLocalVar(&SrcDecl);
   src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign);
   src = Builder.CreateBitCast(src, structPtrTy, "block");
 
@@ -1917,13 +1917,13 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
   QualType R = Context.VoidTy;
 
   FunctionArgList args;
-  ImplicitParamDecl dst(CGF.getContext(), nullptr, SourceLocation(), nullptr,
-                        Context.VoidPtrTy);
-  args.push_back(&dst);
+  ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy,
+                        ImplicitParamDecl::Other);
+  args.push_back(&Dst);
 
-  ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
-                        Context.VoidPtrTy);
-  args.push_back(&src);
+  ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
+                        ImplicitParamDecl::Other);
+  args.push_back(&Src);
 
   const CGFunctionInfo &FI =
     CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
@@ -1954,7 +1954,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
     llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0);
 
     // dst->x
-    Address destField = CGF.GetAddrOfLocalVar(&dst);
+    Address destField = CGF.GetAddrOfLocalVar(&Dst);
     destField = Address(CGF.Builder.CreateLoad(destField),
                         byrefInfo.ByrefAlignment);
     destField = CGF.Builder.CreateBitCast(destField, byrefPtrType);
@@ -1962,7 +1962,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
                                           "dest-object");
 
     // src->x
-    Address srcField = CGF.GetAddrOfLocalVar(&src);
+    Address srcField = CGF.GetAddrOfLocalVar(&Src);
     srcField = Address(CGF.Builder.CreateLoad(srcField),
                        byrefInfo.ByrefAlignment);
     srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType);
@@ -1994,9 +1994,9 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
   QualType R = Context.VoidTy;
 
   FunctionArgList args;
-  ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
-                        Context.VoidPtrTy);
-  args.push_back(&src);
+  ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
+                        ImplicitParamDecl::Other);
+  args.push_back(&Src);
 
   const CGFunctionInfo &FI =
     CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
@@ -2025,7 +2025,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
   CGF.StartFunction(FD, R, Fn, FI, args);
 
   if (generator.needsDispose()) {
-    Address addr = CGF.GetAddrOfLocalVar(&src);
+    Address addr = CGF.GetAddrOfLocalVar(&Src);
     addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.ByrefAlignment);
     auto byrefPtrType = byrefInfo.Type->getPointerTo(0);
     addr = CGF.Builder.CreateBitCast(addr, byrefPtrType);
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBuiltin.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBuiltin.cpp
index 50c9e22801c78..f3527b0f39d13 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1810,12 +1810,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   case Builtin::BI__atomic_signal_fence:
   case Builtin::BI__c11_atomic_thread_fence:
   case Builtin::BI__c11_atomic_signal_fence: {
-    llvm::SynchronizationScope Scope;
+    llvm::SyncScope::ID SSID;
     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
-      Scope = llvm::SingleThread;
+      SSID = llvm::SyncScope::SingleThread;
     else
-      Scope = llvm::CrossThread;
+      SSID = llvm::SyncScope::System;
     Value *Order = EmitScalarExpr(E->getArg(0));
     if (isa(Order)) {
       int ord = cast(Order)->getZExtValue();
@@ -1825,17 +1825,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
         break;
       case 1:  // memory_order_consume
       case 2:  // memory_order_acquire
-        Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
         break;
       case 3:  // memory_order_release
-        Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
         break;
       case 4:  // memory_order_acq_rel
-        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
         break;
       case 5:  // memory_order_seq_cst
-        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
-                            Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
         break;
       }
       return RValue::get(nullptr);
@@ -1852,23 +1851,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
 
     Builder.SetInsertPoint(AcquireBB);
-    Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(1), AcquireBB);
     SI->addCase(Builder.getInt32(2), AcquireBB);
 
     Builder.SetInsertPoint(ReleaseBB);
-    Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(3), ReleaseBB);
 
     Builder.SetInsertPoint(AcqRelBB);
-    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(4), AcqRelBB);
 
     Builder.SetInsertPoint(SeqCstBB);
-    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(5), SeqCstBB);
 
@@ -2659,6 +2658,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
                              llvm::ArrayRef(Args)));
     }
+    LLVM_FALLTHROUGH;
   }
   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
   // parameter.
@@ -2795,6 +2795,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
   }
+
+  case Builtin::BI__builtin_ms_va_start:
+  case Builtin::BI__builtin_ms_va_end:
+    return RValue::get(
+        EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
+                       BuiltinID == Builtin::BI__builtin_ms_va_start));
+
+  case Builtin::BI__builtin_ms_va_copy: {
+    // Lower this manually. We can't reliably determine whether or not any
+    // given va_copy() is for a Win64 va_list from the calling convention
+    // alone, because it's legal to do this from a System V ABI function.
+    // With opaque pointer types, we won't have enough information in LLVM
+    // IR to determine this from the argument types, either. Best to do it
+    // now, while we have enough information.
+    Address DestAddr = EmitMSVAListRef(E->getArg(0));
+    Address SrcAddr = EmitMSVAListRef(E->getArg(1));
+
+    llvm::Type *BPP = Int8PtrPtrTy;
+
+    DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
+                       DestAddr.getAlignment());
+    SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
+                      SrcAddr.getAlignment());
+
+    Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
+    return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -3813,6 +3840,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vcalt_v:
   case NEON::BI__builtin_neon_vcaltq_v:
     std::swap(Ops[0], Ops[1]);
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vcage_v:
   case NEON::BI__builtin_neon_vcageq_v:
   case NEON::BI__builtin_neon_vcagt_v:
@@ -5056,6 +5084,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vsri_n_v:
   case NEON::BI__builtin_neon_vsriq_n_v:
     rightShift = true;
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vsli_n_v:
   case NEON::BI__builtin_neon_vsliq_n_v:
     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
@@ -7221,31 +7250,6 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
 
 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
-  if (BuiltinID == X86::BI__builtin_ms_va_start ||
-      BuiltinID == X86::BI__builtin_ms_va_end)
-    return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
-                          BuiltinID == X86::BI__builtin_ms_va_start);
-  if (BuiltinID == X86::BI__builtin_ms_va_copy) {
-    // Lower this manually. We can't reliably determine whether or not any
-    // given va_copy() is for a Win64 va_list from the calling convention
-    // alone, because it's legal to do this from a System V ABI function.
-    // With opaque pointer types, we won't have enough information in LLVM
-    // IR to determine this from the argument types, either. Best to do it
-    // now, while we have enough information.
-    Address DestAddr = EmitMSVAListRef(E->getArg(0));
-    Address SrcAddr = EmitMSVAListRef(E->getArg(1));
-
-    llvm::Type *BPP = Int8PtrPtrTy;
-
-    DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
-                       DestAddr.getAlignment());
-    SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
-                      SrcAddr.getAlignment());
-
-    Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
-    return Builder.CreateStore(ArgPtr, DestAddr);
-  }
-
   SmallVector Ops;
 
   // Find out if any arguments are required to be integer constant expressions.
@@ -7332,39 +7336,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       AVX512PF,
       AVX512VBMI,
       AVX512IFMA,
+      AVX5124VNNIW, // TODO implement this fully
+      AVX5124FMAPS, // TODO implement this fully
+      AVX512VPOPCNTDQ,
       MAX
     };
 
-    X86Features Feature = StringSwitch(FeatureStr)
-                              .Case("cmov", X86Features::CMOV)
-                              .Case("mmx", X86Features::MMX)
-                              .Case("popcnt", X86Features::POPCNT)
-                              .Case("sse", X86Features::SSE)
-                              .Case("sse2", X86Features::SSE2)
-                              .Case("sse3", X86Features::SSE3)
-                              .Case("ssse3", X86Features::SSSE3)
-                              .Case("sse4.1", X86Features::SSE4_1)
-                              .Case("sse4.2", X86Features::SSE4_2)
-                              .Case("avx", X86Features::AVX)
-                              .Case("avx2", X86Features::AVX2)
-                              .Case("sse4a", X86Features::SSE4_A)
-                              .Case("fma4", X86Features::FMA4)
-                              .Case("xop", X86Features::XOP)
-                              .Case("fma", X86Features::FMA)
-                              .Case("avx512f", X86Features::AVX512F)
-                              .Case("bmi", X86Features::BMI)
-                              .Case("bmi2", X86Features::BMI2)
-                              .Case("aes", X86Features::AES)
-                              .Case("pclmul", X86Features::PCLMUL)
-                              .Case("avx512vl", X86Features::AVX512VL)
-                              .Case("avx512bw", X86Features::AVX512BW)
-                              .Case("avx512dq", X86Features::AVX512DQ)
-                              .Case("avx512cd", X86Features::AVX512CD)
-                              .Case("avx512er", X86Features::AVX512ER)
-                              .Case("avx512pf", X86Features::AVX512PF)
-                              .Case("avx512vbmi", X86Features::AVX512VBMI)
-                              .Case("avx512ifma", X86Features::AVX512IFMA)
-                              .Default(X86Features::MAX);
+    X86Features Feature =
+        StringSwitch(FeatureStr)
+            .Case("cmov", X86Features::CMOV)
+            .Case("mmx", X86Features::MMX)
+            .Case("popcnt", X86Features::POPCNT)
+            .Case("sse", X86Features::SSE)
+            .Case("sse2", X86Features::SSE2)
+            .Case("sse3", X86Features::SSE3)
+            .Case("ssse3", X86Features::SSSE3)
+            .Case("sse4.1", X86Features::SSE4_1)
+            .Case("sse4.2", X86Features::SSE4_2)
+            .Case("avx", X86Features::AVX)
+            .Case("avx2", X86Features::AVX2)
+            .Case("sse4a", X86Features::SSE4_A)
+            .Case("fma4", X86Features::FMA4)
+            .Case("xop", X86Features::XOP)
+            .Case("fma", X86Features::FMA)
+            .Case("avx512f", X86Features::AVX512F)
+            .Case("bmi", X86Features::BMI)
+            .Case("bmi2", X86Features::BMI2)
+            .Case("aes", X86Features::AES)
+            .Case("pclmul", X86Features::PCLMUL)
+            .Case("avx512vl", X86Features::AVX512VL)
+            .Case("avx512bw", X86Features::AVX512BW)
+            .Case("avx512dq", X86Features::AVX512DQ)
+            .Case("avx512cd", X86Features::AVX512CD)
+            .Case("avx512er", X86Features::AVX512ER)
+            .Case("avx512pf", X86Features::AVX512PF)
+            .Case("avx512vbmi", X86Features::AVX512VBMI)
+            .Case("avx512ifma", X86Features::AVX512IFMA)
+            .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
+            .Default(X86Features::MAX);
     assert(Feature != X86Features::MAX && "Invalid feature!");
 
     // Matching the struct layout from the compiler-rt/libgcc structure that is
@@ -7517,7 +7526,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_storesd128_mask: {
     return EmitX86MaskedStore(*this, Ops, 16);
   }
-
+  case X86::BI__builtin_ia32_vpopcntd_512:
+  case X86::BI__builtin_ia32_vpopcntq_512: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
+    return Builder.CreateCall(F, Ops);
+  }
   case X86::BI__builtin_ia32_cvtmask2b128:
   case X86::BI__builtin_ia32_cvtmask2b256:
   case X86::BI__builtin_ia32_cvtmask2b512:
@@ -7912,6 +7926,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     }
 
     // We can't handle 8-31 immediates with native IR, use the intrinsic.
+    // Except for predicates that create constants.
     Intrinsic::ID ID;
     switch (BuiltinID) {
     default: llvm_unreachable("Unsupported intrinsic!");
@@ -7919,12 +7934,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       ID = Intrinsic::x86_sse_cmp_ps;
       break;
     case X86::BI__builtin_ia32_cmpps256:
+      // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+      // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+      if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
+         Value *Constant = (CC == 0xf || CC == 0x1f) ?
+                llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
+                llvm::Constant::getNullValue(Builder.getInt32Ty());
+         Value *Vec = Builder.CreateVectorSplat(
+                        Ops[0]->getType()->getVectorNumElements(), Constant);
+         return Builder.CreateBitCast(Vec, Ops[0]->getType());
+      }
       ID = Intrinsic::x86_avx_cmp_ps_256;
       break;
     case X86::BI__builtin_ia32_cmppd:
       ID = Intrinsic::x86_sse2_cmp_pd;
       break;
     case X86::BI__builtin_ia32_cmppd256:
+      // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+      // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+      if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
+         Value *Constant = (CC == 0xf || CC == 0x1f) ?
+                llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
+                llvm::Constant::getNullValue(Builder.getInt64Ty());
+         Value *Vec = Builder.CreateVectorSplat(
+                        Ops[0]->getType()->getVectorNumElements(), Constant);
+         return Builder.CreateBitCast(Vec, Ops[0]->getType());
+      }
       ID = Intrinsic::x86_avx_cmp_pd_256;
       break;
     }
@@ -8005,13 +8040,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
   case X86::BI__faststorefence: {
     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
-                               llvm::CrossThread);
+                               llvm::SyncScope::System);
   }
   case X86::BI_ReadWriteBarrier:
   case X86::BI_ReadBarrier:
   case X86::BI_WriteBarrier: {
     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
-                               llvm::SingleThread);
+                               llvm::SyncScope::SingleThread);
   }
   case X86::BI_BitScanForward:
   case X86::BI_BitScanForward64:
@@ -8442,6 +8477,80 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops);
     }
   }
+
+  case PPC::BI__builtin_vsx_xxpermdi: {
+    ConstantInt *ArgCI = dyn_cast(Ops[2]);
+    assert(ArgCI && "Third arg must be constant integer!");
+
+    unsigned Index = ArgCI->getZExtValue();
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
+
+    // Element zero comes from the first input vector and element one comes from
+    // the second. The element indices within each vector are numbered in big
+    // endian order so the shuffle mask must be adjusted for this on little
+    // endian platforms (i.e. index is complemented and source vector reversed).
+    unsigned ElemIdx0;
+    unsigned ElemIdx1;
+    if (getTarget().isLittleEndian()) {
+      ElemIdx0 = (~Index & 1) + 2;
+      ElemIdx1 = (~Index & 2) >> 1;
+    } else { // BigEndian
+      ElemIdx0 = (Index & 2) >> 1;
+      ElemIdx1 = 2 + (Index & 1);
+    }
+
+    Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
+                                ConstantInt::get(Int32Ty, ElemIdx1)};
+    Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+
+    Value *ShuffleCall =
+        Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+    QualType BIRetType = E->getType();
+    auto RetTy = ConvertType(BIRetType);
+    return Builder.CreateBitCast(ShuffleCall, RetTy);
+  }
+
+  case PPC::BI__builtin_vsx_xxsldwi: {
+    ConstantInt *ArgCI = dyn_cast(Ops[2]);
+    assert(ArgCI && "Third argument must be a compile time constant");
+    unsigned Index = ArgCI->getZExtValue() & 0x3;
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
+
+    // Create a shuffle mask
+    unsigned ElemIdx0;
+    unsigned ElemIdx1;
+    unsigned ElemIdx2;
+    unsigned ElemIdx3;
+    if (getTarget().isLittleEndian()) {
+      // Little endian element N comes from element 8+N-Index of the
+      // concatenated wide vector (of course, using modulo arithmetic on
+      // the total number of elements).
+      ElemIdx0 = (8 - Index) % 8;
+      ElemIdx1 = (9 - Index) % 8;
+      ElemIdx2 = (10 - Index) % 8;
+      ElemIdx3 = (11 - Index) % 8;
+    } else {
+      // Big endian ElemIdx = Index + N
+      ElemIdx0 = Index;
+      ElemIdx1 = Index + 1;
+      ElemIdx2 = Index + 2;
+      ElemIdx3 = Index + 3;
+    }
+
+    Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
+                                ConstantInt::get(Int32Ty, ElemIdx1),
+                                ConstantInt::get(Int32Ty, ElemIdx2),
+                                ConstantInt::get(Int32Ty, ElemIdx3)};
+
+    Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+    Value *ShuffleCall =
+        Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+    QualType BIRetType = E->getType();
+    auto RetTy = ConvertType(BIRetType);
+    return Builder.CreateBitCast(ShuffleCall, RetTy);
+  }
   }
 }
 
@@ -8683,12 +8792,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {X, Undef});
   }
 
+  case SystemZ::BI__builtin_s390_vfsqsb:
   case SystemZ::BI__builtin_s390_vfsqdb: {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
     return Builder.CreateCall(F, X);
   }
+  case SystemZ::BI__builtin_s390_vfmasb:
   case SystemZ::BI__builtin_s390_vfmadb: {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
@@ -8697,6 +8808,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
     return Builder.CreateCall(F, {X, Y, Z});
   }
+  case SystemZ::BI__builtin_s390_vfmssb:
   case SystemZ::BI__builtin_s390_vfmsdb: {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
@@ -8706,12 +8818,35 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
   }
+  case SystemZ::BI__builtin_s390_vfnmasb:
+  case SystemZ::BI__builtin_s390_vfnmadb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    Value *Z = EmitScalarExpr(E->getArg(2));
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
+    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+    return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
+  }
+  case SystemZ::BI__builtin_s390_vfnmssb:
+  case SystemZ::BI__builtin_s390_vfnmsdb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    Value *Z = EmitScalarExpr(E->getArg(2));
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
+    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+    Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
+    return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
+  }
+  case SystemZ::BI__builtin_s390_vflpsb:
   case SystemZ::BI__builtin_s390_vflpdb: {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
     return Builder.CreateCall(F, X);
   }
+  case SystemZ::BI__builtin_s390_vflnsb:
   case SystemZ::BI__builtin_s390_vflndb: {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
@@ -8719,6 +8854,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
   }
+  case SystemZ::BI__builtin_s390_vfisb:
   case SystemZ::BI__builtin_s390_vfidb: {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
@@ -8728,8 +8864,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
     (void)IsConstM4; (void)IsConstM5;
-    // Check whether this instance of vfidb can be represented via a LLVM
-    // standard intrinsic.  We only support some combinations of M4 and M5.
+    // Check whether this instance can be represented via a LLVM standard
+    // intrinsic.  We only support some combinations of M4 and M5.
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
     switch (M4.getZExtValue()) {
     default: break;
@@ -8754,11 +8890,76 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
       Function *F = CGM.getIntrinsic(ID, ResultType);
       return Builder.CreateCall(F, X);
     }
-    Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
+    switch (BuiltinID) {
+      case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
+      case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
+      default: llvm_unreachable("Unknown BuiltinID");
+    }
+    Function *F = CGM.getIntrinsic(ID);
     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
     return Builder.CreateCall(F, {X, M4Value, M5Value});
   }
+  case SystemZ::BI__builtin_s390_vfmaxsb:
+  case SystemZ::BI__builtin_s390_vfmaxdb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    // Constant-fold the M4 mask argument.
+    llvm::APSInt M4;
+    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
+    assert(IsConstM4 && "Constant arg isn't actually constant?");
+    (void)IsConstM4;
+    // Check whether this instance can be represented via a LLVM standard
+    // intrinsic.  We only support some values of M4.
+    Intrinsic::ID ID = Intrinsic::not_intrinsic;
+    switch (M4.getZExtValue()) {
+    default: break;
+    case 4: ID = Intrinsic::maxnum; break;
+    }
+    if (ID != Intrinsic::not_intrinsic) {
+      Function *F = CGM.getIntrinsic(ID, ResultType);
+      return Builder.CreateCall(F, {X, Y});
+    }
+    switch (BuiltinID) {
+      case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
+      case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
+      default: llvm_unreachable("Unknown BuiltinID");
+    }
+    Function *F = CGM.getIntrinsic(ID);
+    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
+    return Builder.CreateCall(F, {X, Y, M4Value});
+  }
+  case SystemZ::BI__builtin_s390_vfminsb:
+  case SystemZ::BI__builtin_s390_vfmindb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    // Constant-fold the M4 mask argument.
+    llvm::APSInt M4;
+    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
+    assert(IsConstM4 && "Constant arg isn't actually constant?");
+    (void)IsConstM4;
+    // Check whether this instance can be represented via a LLVM standard
+    // intrinsic.  We only support some values of M4.
+    Intrinsic::ID ID = Intrinsic::not_intrinsic;
+    switch (M4.getZExtValue()) {
+    default: break;
+    case 4: ID = Intrinsic::minnum; break;
+    }
+    if (ID != Intrinsic::not_intrinsic) {
+      Function *F = CGM.getIntrinsic(ID, ResultType);
+      return Builder.CreateCall(F, {X, Y});
+    }
+    switch (BuiltinID) {
+      case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
+      case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
+      default: llvm_unreachable("Unknown BuiltinID");
+    }
+    Function *F = CGM.getIntrinsic(ID);
+    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
+    return Builder.CreateCall(F, {X, Y, M4Value});
+  }
 
   // Vector intrisincs that output the post-instruction CC value.
 
@@ -8825,10 +9026,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
   INTRINSIC_WITH_CC(s390_vstrczhs);
   INTRINSIC_WITH_CC(s390_vstrczfs);
 
+  INTRINSIC_WITH_CC(s390_vfcesbs);
   INTRINSIC_WITH_CC(s390_vfcedbs);
+  INTRINSIC_WITH_CC(s390_vfchsbs);
   INTRINSIC_WITH_CC(s390_vfchdbs);
+  INTRINSIC_WITH_CC(s390_vfchesbs);
   INTRINSIC_WITH_CC(s390_vfchedbs);
 
+  INTRINSIC_WITH_CC(s390_vftcisb);
   INTRINSIC_WITH_CC(s390_vftcidb);
 
 #undef INTRINSIC_WITH_CC
@@ -8842,9 +9047,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
                                              const CallExpr *E) {
   auto MakeLdg = [&](unsigned IntrinsicID) {
     Value *Ptr = EmitScalarExpr(E->getArg(0));
-    AlignmentSource AlignSource;
     clang::CharUnits Align =
-        getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
+        getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
     return Builder.CreateCall(
         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
                                        Ptr->getType()}),
@@ -9096,6 +9300,16 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
     return Builder.CreateCall(Callee, X);
   }
+  case WebAssembly::BI__builtin_wasm_throw: {
+    Value *Tag = EmitScalarExpr(E->getArg(0));
+    Value *Obj = EmitScalarExpr(E->getArg(1));
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
+    return Builder.CreateCall(Callee, {Tag, Obj});
+  }
+  case WebAssembly::BI__builtin_wasm_rethrow: {
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
+    return Builder.CreateCall(Callee);
+  }
 
   default:
     return nullptr;
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCXXABI.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCXXABI.cpp
index 8702bc65f058c..033258643ddf9 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCXXABI.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCXXABI.cpp
@@ -30,49 +30,9 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) {
 }
 
 bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const {
-  // See also Sema::ShouldDeleteSpecialMember. These two functions
-  // should be kept consistent.
-
-  // If RD has a non-trivial move or copy constructor, we cannot copy the
-  // argument.
-  if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor())
-    return false;
-
-  // If RD has a non-trivial destructor, we cannot copy the argument.
-  if (RD->hasNonTrivialDestructor())
-    return false;
-
   // We can only copy the argument if there exists at least one trivial,
   // non-deleted copy or move constructor.
-  bool CopyOrMoveDeleted = false;
-  for (const CXXConstructorDecl *CD : RD->ctors()) {
-    if (CD->isCopyConstructor() || CD->isMoveConstructor()) {
-      assert(CD->isTrivial());
-      // We had at least one undeleted trivial copy or move ctor.  Return
-      // directly.
-      if (!CD->isDeleted())
-        return true;
-      CopyOrMoveDeleted = true;
-    }
-  }
-#if __clang_major__ < 5
-  // If a move constructor or move assignment operator was declared, the
-  // default copy constructors are implicitly deleted, except in one case
-  // related to compatibility with MSVC pre-2015.
-  if (RD->hasUserDeclaredMoveConstructor())
-    return false;
-  if (RD->hasUserDeclaredMoveAssignment()) {
-    const LangOptions &opts = CGM.getLangOpts();
-    bool DeletesOnlyMatchingCopy =
-      opts.MSVCCompat && !opts.isCompatibleWithMSVC(LangOptions::MSVC2015);
-    if (!DeletesOnlyMatchingCopy)
-      return false;
-  }
-#endif
-
-  // If all trivial copy and move constructors are deleted, we cannot copy the
-  // argument.
-  return !CopyOrMoveDeleted;
+  return RD->canPassInRegisters();
 }
 
 llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) {
@@ -170,10 +130,10 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) {
 
   // FIXME: I'm not entirely sure I like using a fake decl just for code
   // generation. Maybe we can come up with a better way?
-  ImplicitParamDecl *ThisDecl
-    = ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(),
-                                &CGM.getContext().Idents.get("this"),
-                                MD->getThisType(CGM.getContext()));
+  auto *ThisDecl = ImplicitParamDecl::Create(
+      CGM.getContext(), nullptr, MD->getLocation(),
+      &CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()),
+      ImplicitParamDecl::CXXThis);
   params.push_back(ThisDecl);
   CGF.CXXABIThisDecl = ThisDecl;
 
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCall.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCall.cpp
index c677d9887accc..316bf44cb1c3d 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCall.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCall.cpp
@@ -50,7 +50,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
   case CC_X86FastCall: return llvm::CallingConv::X86_FastCall;
   case CC_X86RegCall: return llvm::CallingConv::X86_RegCall;
   case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall;
-  case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64;
+  case CC_Win64: return llvm::CallingConv::Win64;
   case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV;
   case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS;
   case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
@@ -129,7 +129,7 @@ static void addExtParameterInfosForCall(
   paramInfos.resize(totalArgs);
 }
 
-/// Adds the formal paramaters in FPT to the given prefix. If any parameter in
+/// Adds the formal parameters in FPT to the given prefix. If any parameter in
 /// FPT has pass_object_size attrs, then we'll add parameters for those, too.
 static void appendParameterTypes(const CodeGenTypes &CGT,
                                  SmallVectorImpl &prefix,
@@ -218,7 +218,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
     return CC_IntelOclBicc;
 
   if (D->hasAttr())
-    return IsWindows ? CC_C : CC_X86_64Win64;
+    return IsWindows ? CC_C : CC_Win64;
 
   if (D->hasAttr())
     return IsWindows ? CC_X86_64SysV : CC_C;
@@ -707,6 +707,12 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature,
                                  signature.getRequiredArgs());
 }
 
+namespace clang {
+namespace CodeGen {
+void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI);
+}
+}
+
 /// Arrange the argument and result information for an abstract value
 /// of a given function type.  This is the method which all of the
 /// above functions ultimately defer to.
@@ -741,12 +747,16 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
   bool inserted = FunctionsBeingProcessed.insert(FI).second;
   (void)inserted;
   assert(inserted && "Recursively being processed?");
-  
+
   // Compute ABI information.
-  if (info.getCC() != CC_Swift) {
-    getABIInfo().computeInfo(*FI);
-  } else {
+  if (CC == llvm::CallingConv::SPIR_KERNEL) {
+    // Force target independent argument handling for the host visible
+    // kernel functions.
+    computeSPIRKernelABIInfo(CGM, *FI);
+  } else if (info.getCC() == CC_Swift) {
     swiftcall::computeABIInfo(CGM, *FI);
+  } else {
+    getABIInfo().computeInfo(*FI);
   }
 
   // Loop over all of the computed argument and return value info.  If any of
@@ -1287,7 +1297,7 @@ static void CreateCoercedStore(llvm::Value *Src,
 
   // If store is legal, just bitcast the src pointer.
   if (SrcSize <= DstSize) {
-    Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
+    Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
     BuildAggStore(CGF, Src, Dst, DstIsVolatile);
   } else {
     // Otherwise do coercion through memory. This is stupid, but
@@ -1785,6 +1795,8 @@ void CodeGenModule::ConstructAttributeList(
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     if (TargetDecl->hasAttr())
       FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
+    if (TargetDecl->hasAttr())
+      FuncAttrs.addAttribute(llvm::Attribute::Cold);
     if (TargetDecl->hasAttr())
       FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
     if (TargetDecl->hasAttr())
@@ -1865,8 +1877,8 @@ void CodeGenModule::ConstructAttributeList(
       // the function.
       const auto *TD = FD->getAttr();
       TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
-      if (ParsedAttr.second != "")
-        TargetCPU = ParsedAttr.second;
+      if (ParsedAttr.Architecture != "")
+        TargetCPU = ParsedAttr.Architecture;
       if (TargetCPU != "")
         FuncAttrs.addAttribute("target-cpu", TargetCPU);
       if (!Features.empty()) {
@@ -2400,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
         Address AddrToStoreInto = Address::invalid();
         if (SrcSize <= DstSize) {
-          AddrToStoreInto =
-            Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+          AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
         } else {
           AddrToStoreInto =
             CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
@@ -2894,7 +2905,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
 
   llvm::Instruction *Ret;
   if (RV) {
-    EmitReturnValueCheck(RV, EndLoc);
+    EmitReturnValueCheck(RV);
     Ret = Builder.CreateRet(RV);
   } else {
     Ret = Builder.CreateRetVoid();
@@ -2904,8 +2915,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
     Ret->setDebugLoc(std::move(RetDbgLoc));
 }
 
-void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV,
-                                           SourceLocation EndLoc) {
+void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) {
   // A current decl may not be available when emitting vtable thunks.
   if (!CurCodeDecl)
     return;
@@ -2938,27 +2948,30 @@ void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV,
 
   SanitizerScope SanScope(this);
 
-  llvm::BasicBlock *Check = nullptr;
-  llvm::BasicBlock *NoCheck = nullptr;
-  if (requiresReturnValueNullabilityCheck()) {
-    // Before doing the nullability check, make sure that the preconditions for
-    // the check are met.
-    Check = createBasicBlock("nullcheck");
-    NoCheck = createBasicBlock("no.nullcheck");
-    Builder.CreateCondBr(RetValNullabilityPrecondition, Check, NoCheck);
-    EmitBlock(Check);
-  }
+  // Make sure the "return" source location is valid. If we're checking a
+  // nullability annotation, make sure the preconditions for the check are met.
+  llvm::BasicBlock *Check = createBasicBlock("nullcheck");
+  llvm::BasicBlock *NoCheck = createBasicBlock("no.nullcheck");
+  llvm::Value *SLocPtr = Builder.CreateLoad(ReturnLocation, "return.sloc.load");
+  llvm::Value *CanNullCheck = Builder.CreateIsNotNull(SLocPtr);
+  if (requiresReturnValueNullabilityCheck())
+    CanNullCheck =
+        Builder.CreateAnd(CanNullCheck, RetValNullabilityPrecondition);
+  Builder.CreateCondBr(CanNullCheck, Check, NoCheck);
+  EmitBlock(Check);
 
-  // Now do the null check. If the returns_nonnull attribute is present, this
-  // is done unconditionally.
+  // Now do the null check.
   llvm::Value *Cond = Builder.CreateIsNotNull(RV);
-  llvm::Constant *StaticData[] = {
-      EmitCheckSourceLocation(EndLoc), EmitCheckSourceLocation(AttrLoc),
-  };
-  EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None);
+  llvm::Constant *StaticData[] = {EmitCheckSourceLocation(AttrLoc)};
+  llvm::Value *DynamicData[] = {SLocPtr};
+  EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, DynamicData);
 
-  if (requiresReturnValueNullabilityCheck())
-    EmitBlock(NoCheck);
+  EmitBlock(NoCheck);
+
+#ifndef NDEBUG
+  // The return location should not be used after the check has been emitted.
+  ReturnLocation = Address::invalid();
+#endif
 }
 
 static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
@@ -3375,6 +3388,14 @@ void CodeGenFunction::EmitCallArgs(
     unsigned Idx = LeftToRight ? I : E - I - 1;
     CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx;
     unsigned InitialArgSize = Args.size();
+    // If *Arg is an ObjCIndirectCopyRestoreExpr, check that either the types of
+    // the argument and parameter match or the objc method is parameterized.
+    assert((!isa(*Arg) ||
+            getContext().hasSameUnqualifiedType((*Arg)->getType(),
+                                                ArgTypes[Idx]) ||
+            (isa(AC.getDecl()) &&
+             isObjCMethodWithTypeParams(cast(AC.getDecl())))) &&
+           "Argument and parameter types don't match");
     EmitCallArg(Args, *Arg, ArgTypes[Idx]);
     // In particular, we depend on it being the last arg in Args, and the
     // objectsize bits depend on there only being one arg if !LeftToRight.
@@ -3435,7 +3456,6 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
   if (const ObjCIndirectCopyRestoreExpr *CRE
         = dyn_cast(E)) {
     assert(getLangOpts().ObjCAutoRefCount);
-    assert(getContext().hasSameUnqualifiedType(E->getType(), type));
     return emitWritebackArg(*this, args, CRE);
   }
 
@@ -3801,7 +3821,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       assert(NumIRArgs == 1);
       if (RV.isScalar() || RV.isComplex()) {
         // Make a temporary alloca to pass the argument.
-        Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+        Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+                                     "indirect-arg-temp", false);
         IRCallArgs[FirstIRArg] = Addr.getPointer();
 
         LValue argLV = MakeAddrLValue(Addr, I->Ty);
@@ -3830,7 +3851,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
                < Align.getQuantity()) ||
             (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
           // Create an aligned temporary, and copy to it.
-          Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+          Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+                                     "byval-temp", false);
           IRCallArgs[FirstIRArg] = AI.getPointer();
           EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
         } else {
@@ -4249,6 +4271,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         Builder.CreateStore(elt, eltAddr);
       }
       // FALLTHROUGH
+      LLVM_FALLTHROUGH;
     }
 
     case ABIArgInfo::InAlloca:
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGClass.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGClass.cpp
index 7ba03a0d42dd4..50d702c622688 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGClass.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGClass.cpp
@@ -129,14 +129,14 @@ Address
 CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
                                                  llvm::Value *memberPtr,
                                       const MemberPointerType *memberPtrType,
-                                                 AlignmentSource *alignSource) {
+                                                 LValueBaseInfo *BaseInfo) {
   // Ask the ABI to compute the actual address.
   llvm::Value *ptr =
     CGM.getCXXABI().EmitMemberDataPointerAddress(*this, E, base,
                                                  memberPtr, memberPtrType);
 
   QualType memberType = memberPtrType->getPointeeType();
-  CharUnits memberAlign = getNaturalTypeAlignment(memberType, alignSource);
+  CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo);
   memberAlign =
     CGM.getDynamicOffsetAlignment(base.getAlignment(),
                             memberPtrType->getClass()->getAsCXXRecordDecl(),
@@ -2716,79 +2716,6 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
       cast(VTable->getType())->getElementType());
 }
 
-bool
-CodeGenFunction::CanDevirtualizeMemberFunctionCall(const Expr *Base,
-                                                   const CXXMethodDecl *MD) {
-  // When building with -fapple-kext, all calls must go through the vtable since
-  // the kernel linker can do runtime patching of vtables.
-  if (getLangOpts().AppleKext)
-    return false;
-
-  // If the member function is marked 'final', we know that it can't be
-  // overridden and can therefore devirtualize it unless it's pure virtual.
-  if (MD->hasAttr())
-    return !MD->isPure();
-
-  // If the base expression (after skipping derived-to-base conversions) is a
-  // class prvalue, then we can devirtualize.
-  Base = Base->getBestDynamicClassTypeExpr();
-  if (Base->isRValue() && Base->getType()->isRecordType())
-    return true;
-
-  // If we don't even know what we would call, we can't devirtualize.
-  const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
-  if (!BestDynamicDecl)
-    return false;
-
-  // There may be a method corresponding to MD in a derived class.
-  const CXXMethodDecl *DevirtualizedMethod =
-      MD->getCorrespondingMethodInClass(BestDynamicDecl);
-
-  // If that method is pure virtual, we can't devirtualize. If this code is
-  // reached, the result would be UB, not a direct call to the derived class
-  // function, and we can't assume the derived class function is defined.
-  if (DevirtualizedMethod->isPure())
-    return false;
-
-  // If that method is marked final, we can devirtualize it.
-  if (DevirtualizedMethod->hasAttr())
-    return true;
-
-  // Similarly, if the class itself is marked 'final' it can't be overridden
-  // and we can therefore devirtualize the member function call.
-  if (BestDynamicDecl->hasAttr())
-    return true;
-
-  if (const DeclRefExpr *DRE = dyn_cast(Base)) {
-    if (const VarDecl *VD = dyn_cast(DRE->getDecl())) {
-      // This is a record decl. We know the type and can devirtualize it.
-      return VD->getType()->isRecordType();
-    }
-
-    return false;
-  }
-
-  // We can devirtualize calls on an object accessed by a class member access
-  // expression, since by C++11 [basic.life]p6 we know that it can't refer to
-  // a derived class object constructed in the same location.
-  if (const MemberExpr *ME = dyn_cast(Base))
-    if (const ValueDecl *VD = dyn_cast(ME->getMemberDecl()))
-      return VD->getType()->isRecordType();
-
-  // Likewise for calls on an object accessed by a (non-reference) pointer to
-  // member access.
-  if (auto *BO = dyn_cast(Base)) {
-    if (BO->isPtrMemOp()) {
-      auto *MPT = BO->getRHS()->getType()->castAs();
-      if (MPT->getPointeeType()->isRecordType())
-        return true;
-    }
-  }
-
-  // We can't devirtualize the call.
-  return false;
-}
-
 void CodeGenFunction::EmitForwardingCallToLambda(
                                       const CXXMethodDecl *callOperator,
                                       CallArgList &callArgs) {
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCleanup.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCleanup.cpp
index e8bcf0a3ac564..b5453bc11e305 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCleanup.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCleanup.cpp
@@ -448,6 +448,13 @@ void CodeGenFunction::PopCleanupBlocks(
     auto *Inst = dyn_cast_or_null(*ReloadedValue);
     if (!Inst)
       continue;
+
+    // Don't spill static allocas, they dominate all cleanups. These are created
+    // by binding a reference to a local variable or temporary.
+    auto *AI = dyn_cast(Inst);
+    if (AI && AI->isStaticAlloca())
+      continue;
+
     Address Tmp =
         CreateDefaultAlignTempAlloca(Inst->getType(), "tmp.exprcleanup");
 
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCoroutine.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCoroutine.cpp
index 0ef680ef66092..a65faa602b331 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGCoroutine.cpp
@@ -11,9 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CGCleanup.h"
 #include "CodeGenFunction.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtVisitor.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -57,6 +59,15 @@ struct clang::CodeGen::CGCoroData {
   // builtin.
   llvm::CallInst *CoroId = nullptr;
 
+  // Stores the llvm.coro.begin emitted in the function so that we can replace
+  // all coro.frame intrinsics with direct SSA value of coro.begin that returns
+  // the address of the coroutine frame of the current coroutine.
+  llvm::CallInst *CoroBegin = nullptr;
+
+  // Stores the last emitted coro.free for the deallocate expressions, we use it
+  // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem).
+  llvm::CallInst *LastCoroFree = nullptr;
+
   // If coro.id came from the builtin, remember the expression to give better
   // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by
   // EmitCoroutineBody.
@@ -137,11 +148,18 @@ static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) {
 //
 //  See llvm's docs/Coroutines.rst for more details.
 //
-static RValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
+namespace {
+  struct LValueOrRValue {
+    LValue LV;
+    RValue RV;
+  };
+}
+static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
                                     CoroutineSuspendExpr const &S,
                                     AwaitKind Kind, AggValueSlot aggSlot,
-                                    bool ignoreResult) {
+                                    bool ignoreResult, bool forLValue) {
   auto *E = S.getCommonExpr();
+
   auto Binder =
       CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
   auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); });
@@ -192,7 +210,12 @@ static RValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
 
   // Emit await_resume expression.
   CGF.EmitBlock(ReadyBlock);
-  return CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
+  LValueOrRValue Res;
+  if (forLValue)
+    Res.LV = CGF.EmitLValue(S.getResumeExpr());
+  else
+    Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
+  return Res;
 }
 
 RValue CodeGenFunction::EmitCoawaitExpr(const CoawaitExpr &E,
@@ -200,13 +223,13 @@ RValue CodeGenFunction::EmitCoawaitExpr(const CoawaitExpr &E,
                                         bool ignoreResult) {
   return emitSuspendExpression(*this, *CurCoro.Data, E,
                                CurCoro.Data->CurrentAwaitKind, aggSlot,
-                               ignoreResult);
+                               ignoreResult, /*forLValue*/false).RV;
 }
 RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E,
                                         AggValueSlot aggSlot,
                                         bool ignoreResult) {
   return emitSuspendExpression(*this, *CurCoro.Data, E, AwaitKind::Yield,
-                               aggSlot, ignoreResult);
+                               aggSlot, ignoreResult, /*forLValue*/false).RV;
 }
 
 void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) {
@@ -215,7 +238,99 @@ void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) {
   EmitBranchThroughCleanup(CurCoro.Data->FinalJD);
 }
 
-// For WinEH exception representation backend need to know what funclet coro.end
+
+#ifndef NDEBUG
+static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx,
+  const CoroutineSuspendExpr *E) {
+  const auto *RE = E->getResumeExpr();
+  // Is it possible for RE to be a CXXBindTemporaryExpr wrapping
+  // a MemberCallExpr?
+  assert(isa(RE) && "unexpected suspend expression type");
+  return cast(RE)->getCallReturnType(Ctx);
+}
+#endif
+
+LValue
+CodeGenFunction::EmitCoawaitLValue(const CoawaitExpr *E) {
+  assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() &&
+         "Can't have a scalar return unless the return type is a "
+         "reference type!");
+  return emitSuspendExpression(*this, *CurCoro.Data, *E,
+                               CurCoro.Data->CurrentAwaitKind, AggValueSlot::ignored(),
+                               /*ignoreResult*/false, /*forLValue*/true).LV;
+}
+
+LValue
+CodeGenFunction::EmitCoyieldLValue(const CoyieldExpr *E) {
+  assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() &&
+         "Can't have a scalar return unless the return type is a "
+         "reference type!");
+  return emitSuspendExpression(*this, *CurCoro.Data, *E,
+                               AwaitKind::Yield, AggValueSlot::ignored(),
+                               /*ignoreResult*/false, /*forLValue*/true).LV;
+}
+
+// Hunts for the parameter reference in the parameter copy/move declaration.
+namespace {
+struct GetParamRef : public StmtVisitor {
+public:
+  DeclRefExpr *Expr = nullptr;
+  GetParamRef() {}
+  void VisitDeclRefExpr(DeclRefExpr *E) {
+    assert(Expr == nullptr && "multilple declref in param move");
+    Expr = E;
+  }
+  void VisitStmt(Stmt *S) {
+    for (auto *C : S->children()) {
+      if (C)
+        Visit(C);
+    }
+  }
+};
+}
+
+// This class replaces references to parameters to their copies by changing
+// the addresses in CGF.LocalDeclMap and restoring back the original values in
+// its destructor.
+
+namespace {
+  struct ParamReferenceReplacerRAII {
+    CodeGenFunction::DeclMapTy SavedLocals;
+    CodeGenFunction::DeclMapTy& LocalDeclMap;
+
+    ParamReferenceReplacerRAII(CodeGenFunction::DeclMapTy &LocalDeclMap)
+        : LocalDeclMap(LocalDeclMap) {}
+
+    void addCopy(DeclStmt const *PM) {
+      // Figure out what param it refers to.
+
+      assert(PM->isSingleDecl());
+      VarDecl const*VD = static_cast(PM->getSingleDecl());
+      Expr const *InitExpr = VD->getInit();
+      GetParamRef Visitor;
+      Visitor.Visit(const_cast(InitExpr));
+      assert(Visitor.Expr);
+      auto *DREOrig = cast(Visitor.Expr);
+      auto *PD = DREOrig->getDecl();
+
+      auto it = LocalDeclMap.find(PD);
+      assert(it != LocalDeclMap.end() && "parameter is not found");
+      SavedLocals.insert({ PD, it->second });
+
+      auto copyIt = LocalDeclMap.find(VD);
+      assert(copyIt != LocalDeclMap.end() && "parameter copy is not found");
+      it->second = copyIt->getSecond();
+    }
+
+    ~ParamReferenceReplacerRAII() {
+      for (auto&& SavedLocal : SavedLocals) {
+        LocalDeclMap.insert({SavedLocal.first, SavedLocal.second});
+      }
+    }
+  };
+}
+
+// For WinEH exception representation backend needs to know what funclet coro.end
 // belongs to. That information is passed in a funclet bundle.
 static SmallVector
 getBundlesForCoroEnd(CodeGenFunction &CGF) {
@@ -257,24 +372,135 @@ namespace {
 struct CallCoroDelete final : public EHScopeStack::Cleanup {
   Stmt *Deallocate;
 
-  // TODO: Wrap deallocate in if(coro.free(...)) Deallocate.
+  // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;"
+
+  // Note: That deallocation will be emitted twice: once for a normal exit and
+  // once for exceptional exit. This usage is safe because Deallocate does not
+  // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr()
+  // builds a single call to a deallocation function which is safe to emit
+  // multiple times.
   void Emit(CodeGenFunction &CGF, Flags) override {
-    // Note: That deallocation will be emitted twice: once for a normal exit and
-    // once for exceptional exit. This usage is safe because Deallocate does not
-    // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr()
-    // builds a single call to a deallocation function which is safe to emit
-    // multiple times.
+    // Remember the current point, as we are going to emit deallocation code
+    // first to get to coro.free instruction that is an argument to a delete
+    // call.
+    BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock();
+
+    auto *FreeBB = CGF.createBasicBlock("coro.free");
+    CGF.EmitBlock(FreeBB);
     CGF.EmitStmt(Deallocate);
+
+    auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free");
+    CGF.EmitBlock(AfterFreeBB);
+
+    // We should have captured coro.free from the emission of deallocate.
+    auto *CoroFree = CGF.CurCoro.Data->LastCoroFree;
+    if (!CoroFree) {
+      CGF.CGM.Error(Deallocate->getLocStart(),
+                    "Deallocation expressoin does not refer to coro.free");
+      return;
+    }
+
+    // Get back to the block we were originally and move coro.free there.
+    auto *InsertPt = SaveInsertBlock->getTerminator();
+    CoroFree->moveBefore(InsertPt);
+    CGF.Builder.SetInsertPoint(InsertPt);
+
+    // Add if (auto *mem = coro.free) Deallocate;
+    auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy);
+    auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr);
+    CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB);
+
+    // No longer need old terminator.
+    InsertPt->eraseFromParent();
+    CGF.Builder.SetInsertPoint(AfterFreeBB);
   }
   explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {}
 };
 }
 
+namespace {
+struct GetReturnObjectManager {
+  CodeGenFunction &CGF;
+  CGBuilderTy &Builder;
+  const CoroutineBodyStmt &S;
+
+  Address GroActiveFlag;
+  CodeGenFunction::AutoVarEmission GroEmission;
+
+  GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S)
+      : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()),
+        GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {}
+
+  // The gro variable has to outlive coroutine frame and coroutine promise, but,
+  // it can only be initialized after coroutine promise was created, thus, we
+  // split its emission in two parts. EmitGroAlloca emits an alloca and sets up
+  // cleanups. Later when coroutine promise is available we initialize the gro
+  // and sets the flag that the cleanup is now active.
+
+  void EmitGroAlloca() {
+    auto *GroDeclStmt = dyn_cast(S.getResultDecl());
+    if (!GroDeclStmt) {
+      // If get_return_object returns void, no need to do an alloca.
+      return;
+    }
+
+    auto *GroVarDecl = cast(GroDeclStmt->getSingleDecl());
+
+    // Set GRO flag that it is not initialized yet
+    GroActiveFlag =
+      CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), "gro.active");
+    Builder.CreateStore(Builder.getFalse(), GroActiveFlag);
+
+    GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl);
+
+    // Remember the top of EHStack before emitting the cleanup.
+    auto old_top = CGF.EHStack.stable_begin();
+    CGF.EmitAutoVarCleanups(GroEmission);
+    auto top = CGF.EHStack.stable_begin();
+
+    // Make the cleanup conditional on gro.active
+    for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top);
+      b != e; b++) {
+      if (auto *Cleanup = dyn_cast(&*b)) {
+        assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?");
+        Cleanup->setActiveFlag(GroActiveFlag);
+        Cleanup->setTestFlagInEHCleanup();
+        Cleanup->setTestFlagInNormalCleanup();
+      }
+    }
+  }
+
+  void EmitGroInit() {
+    if (!GroActiveFlag.isValid()) {
+      // No Gro variable was allocated. Simply emit the call to
+      // get_return_object.
+      CGF.EmitStmt(S.getResultDecl());
+      return;
+    }
+
+    CGF.EmitAutoVarInit(GroEmission);
+    Builder.CreateStore(Builder.getTrue(), GroActiveFlag);
+  }
+};
+}
+
+static void emitBodyAndFallthrough(CodeGenFunction &CGF,
+                                   const CoroutineBodyStmt &S, Stmt *Body) {
+  CGF.EmitStmt(Body);
+  const bool CanFallthrough = CGF.Builder.GetInsertBlock();
+  if (CanFallthrough)
+    if (Stmt *OnFallthrough = S.getFallthroughHandler())
+      CGF.EmitStmt(OnFallthrough);
+}
+
 void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
   auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
   auto &TI = CGM.getContext().getTargetInfo();
   unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth();
 
+  auto *EntryBB = Builder.GetInsertBlock();
+  auto *AllocBB = createBasicBlock("coro.alloc");
+  auto *InitBB = createBasicBlock("coro.init");
   auto *FinalBB = createBasicBlock("coro.final");
   auto *RetBB = createBasicBlock("coro.ret");
 
@@ -284,12 +510,20 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
   createCoroData(*this, CurCoro, CoroId);
   CurCoro.Data->SuspendBB = RetBB;
 
+  // Backend is allowed to elide memory allocations, to help it, emit
+  // auto mem = coro.alloc() ? 0 : ... allocation code ...;
+  auto *CoroAlloc = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId});
+
+  Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB);
+
+  EmitBlock(AllocBB);
   auto *AllocateCall = EmitScalarExpr(S.getAllocate());
+  auto *AllocOrInvokeContBB = Builder.GetInsertBlock();
 
   // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided.
   if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) {
     auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure");
-    auto *InitBB = createBasicBlock("coro.init");
 
     // See if allocation was successful.
     auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy);
@@ -299,40 +533,95 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
     // If not, return OnAllocFailure object.
     EmitBlock(RetOnFailureBB);
     EmitStmt(RetOnAllocFailure);
-
-    EmitBlock(InitBB);
   }
+  else {
+    Builder.CreateBr(InitBB);
+  }
+
+  EmitBlock(InitBB);
+
+  // Pass the result of the allocation to coro.begin.
+  auto *Phi = Builder.CreatePHI(VoidPtrTy, 2);
+  Phi->addIncoming(NullPtr, EntryBB);
+  Phi->addIncoming(AllocateCall, AllocOrInvokeContBB);
+  auto *CoroBegin = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi});
+  CurCoro.Data->CoroBegin = CoroBegin;
+
+  GetReturnObjectManager GroManager(*this, S);
+  GroManager.EmitGroAlloca();
 
   CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB);
   {
+    ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap);
     CodeGenFunction::RunCleanupsScope ResumeScope(*this);
     EHStack.pushCleanup(NormalAndEHCleanup, S.getDeallocate());
 
+    // Create parameter copies. We do it before creating a promise, since an
+    // evolution of coroutine TS may allow promise constructor to observe
+    // parameter copies.
+    for (auto *PM : S.getParamMoves()) {
+      EmitStmt(PM);
+      ParamReplacer.addCopy(cast(PM));
+      // TODO: if(CoroParam(...)) need to surround ctor and dtor
+      // for the copy, so that llvm can elide it if the copy is
+      // not needed.
+    }
+
     EmitStmt(S.getPromiseDeclStmt());
 
+    Address PromiseAddr = GetAddrOfLocalVar(S.getPromiseDecl());
+    auto *PromiseAddrVoidPtr =
+        new llvm::BitCastInst(PromiseAddr.getPointer(), VoidPtrTy, "", CoroId);
+    // Update CoroId to refer to the promise. We could not do it earlier because
+    // promise local variable was not emitted yet.
+    CoroId->setArgOperand(1, PromiseAddrVoidPtr);
+
+    // Now we have the promise, initialize the GRO
+    GroManager.EmitGroInit();
+
     EHStack.pushCleanup(EHCleanup);
 
+    CurCoro.Data->CurrentAwaitKind = AwaitKind::Init;
+    EmitStmt(S.getInitSuspendStmt());
     CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB);
 
-    // FIXME: Emit initial suspend and more before the body.
-
     CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal;
-    EmitStmt(S.getBody());
+
+    if (auto *OnException = S.getExceptionHandler()) {
+      auto Loc = S.getLocStart();
+      CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException);
+      auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch);
+
+      EnterCXXTryStmt(*TryStmt);
+      emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock());
+      ExitCXXTryStmt(*TryStmt);
+    }
+    else {
+      emitBodyAndFallthrough(*this, S, S.getBody());
+    }
 
     // See if we need to generate final suspend.
     const bool CanFallthrough = Builder.GetInsertBlock();
     const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0;
     if (CanFallthrough || HasCoreturns) {
       EmitBlock(FinalBB);
-      // FIXME: Emit final suspend.
+      CurCoro.Data->CurrentAwaitKind = AwaitKind::Final;
+      EmitStmt(S.getFinalSuspendStmt());
+    } else {
+      // We don't need FinalBB. Emit it to make sure the block is deleted.
+      EmitBlock(FinalBB, /*IsFinished=*/true);
     }
   }
 
   EmitBlock(RetBB);
+  // Emit coro.end before getReturnStmt (and parameter destructors), since
+  // resume and destroy parts of the coroutine should not include them.
   llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
   Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()});
 
-  // FIXME: Emit return for the coroutine return object.
+  if (Stmt *Ret = S.getReturnStmt())
+    EmitStmt(Ret);
 }
 
 // Emit coroutine intrinsic and patch up arguments of the token type.
@@ -342,6 +631,17 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
   switch (IID) {
   default:
     break;
+  // The coro.frame builtin is replaced with an SSA value of the coro.begin
+  // intrinsic.
+  case llvm::Intrinsic::coro_frame: {
+    if (CurCoro.Data && CurCoro.Data->CoroBegin) {
+      return RValue::get(CurCoro.Data->CoroBegin);
+    }
+    CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin "
+      "has been used earlier in this function");
+    auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
+    return RValue::get(NullPtr);
+  }
   // The following three intrinsics take a token parameter referring to a token
   // returned by earlier call to @llvm.coro.id. Since we cannot represent it in
   // builtins, we patch it up here.
@@ -355,6 +655,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
     CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_id has"
                                 " been used earlier in this function");
     // Fallthrough to the next case to add TokenNone as the first argument.
+    LLVM_FALLTHROUGH;
   }
   // @llvm.coro.suspend takes a token parameter. Add token 'none' as the first
   // argument.
@@ -368,10 +669,22 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
   llvm::Value *F = CGM.getIntrinsic(IID);
   llvm::CallInst *Call = Builder.CreateCall(F, Args);
 
+  // Note: The following code is to enable to emit coro.id and coro.begin by
+  // hand to experiment with coroutines in C.
   // If we see @llvm.coro.id remember it in the CoroData. We will update
   // coro.alloc, coro.begin and coro.free intrinsics to refer to it.
   if (IID == llvm::Intrinsic::coro_id) {
     createCoroData(*this, CurCoro, Call, E);
   }
+  else if (IID == llvm::Intrinsic::coro_begin) {
+    if (CurCoro.Data)
+      CurCoro.Data->CoroBegin = Call;
+  }
+  else if (IID == llvm::Intrinsic::coro_free) {
+    // Remember the last coro_free as we need it to build the conditional
+    // deletion of the coroutine frame.
+    if (CurCoro.Data)
+      CurCoro.Data->LastCoroFree = Call;
+  }
   return RValue::get(Call);
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 9d77c61bd52cb..18b1d10a921d1 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -956,7 +956,7 @@ static unsigned getDwarfCC(CallingConv CC) {
     return llvm::dwarf::DW_CC_BORLAND_pascal;
 
   // FIXME: Create new DW_CC_ codes for these calling conventions.
-  case CC_X86_64Win64:
+  case CC_Win64:
   case CC_X86_64SysV:
   case CC_AAPCS:
   case CC_AAPCS_VFP:
@@ -1041,7 +1041,13 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl,
   assert(SizeInBits > 0 && "found named 0-width bitfield");
   uint64_t StorageOffsetInBits =
       CGM.getContext().toBits(BitFieldInfo.StorageOffset);
-  uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset;
+  uint64_t Offset = BitFieldInfo.Offset;
+  // The bit offsets for big endian machines are reversed for big
+  // endian target, compensate for that as the DIDerivedType requires
+  // un-reversed offsets.
+  if (CGM.getDataLayout().isBigEndian())
+    Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset;
+  uint64_t OffsetInBits = StorageOffsetInBits + Offset;
   llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD);
   return DBuilder.createBitFieldMemberType(
       RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits,
@@ -2613,7 +2619,7 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) {
     // best to make this behavior a command line or debugger tuning
     // option.
     FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager());
-    if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) {
+    if (Module *M = D->getOwningModule()) {
       // This is a (sub-)module.
       auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
       return getOrCreateModuleRef(Info, /*SkeletonCU=*/false);
@@ -2781,6 +2787,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
     // them distinct if they are ODR-uniqued.
     if (FullName.empty())
       break;
+    LLVM_FALLTHROUGH;
 
   case llvm::dwarf::DW_TAG_structure_type:
   case llvm::dwarf::DW_TAG_union_type:
@@ -2860,7 +2867,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
 
   if (DebugKind >= codegenoptions::LimitedDebugInfo) {
     if (const NamespaceDecl *NSDecl =
-        dyn_cast_or_null(FD->getLexicalDeclContext()))
+        dyn_cast_or_null(FD->getDeclContext()))
       FDContext = getOrCreateNamespace(NSDecl);
     else if (const RecordDecl *RDecl =
              dyn_cast_or_null(FD->getDeclContext())) {
@@ -3253,7 +3260,7 @@ void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) {
   llvm::DISubprogram *SP = nullptr;
   if (FI != SPCache.end())
     SP = dyn_cast_or_null(FI->second);
-  if (!SP)
+  if (!SP || !SP->isDefinition())
     SP = getFunctionStub(GD);
   FnBeginRegionCount.push_back(LexicalBlockStack.size());
   LexicalBlockStack.emplace_back(SP);
@@ -3263,7 +3270,7 @@ void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) {
 
 void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) {
   assert(CurInlinedAt && "unbalanced inline scope stack");
-  EmitFunctionEnd(Builder);
+  EmitFunctionEnd(Builder, nullptr);
   setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt());
 }
 
@@ -3332,7 +3339,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder,
   LexicalBlockStack.pop_back();
 }
 
-void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) {
+void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) {
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
   unsigned RCount = FnBeginRegionCount.back();
   assert(RCount <= LexicalBlockStack.size() && "Region stack mismatch");
@@ -3344,6 +3351,9 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) {
     LexicalBlockStack.pop_back();
   }
   FnBeginRegionCount.pop_back();
+
+  if (Fn && Fn->getSubprogram())
+    DBuilder.finalizeSubprogram(Fn->getSubprogram());
 }
 
 llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
@@ -3462,13 +3472,13 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
   unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType());
   AppendAddressSpaceXDeref(AddressSpace, Expr);
 
-  // If this is the first argument and it is implicit then
-  // give it an object pointer flag.
-  // FIXME: There has to be a better way to do this, but for static
-  // functions there won't be an implicit param at arg1 and
-  // otherwise it is 'self' or 'this'.
-  if (isa(VD) && ArgNo && *ArgNo == 1)
-  Flags |= llvm::DINode::FlagObjectPointer;
+  // If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an
+  // object pointer flag.
+  if (const auto *IPD = dyn_cast(VD)) {
+    if (IPD->getParameterKind() == ImplicitParamDecl::CXXThis ||
+        IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
+      Flags |= llvm::DINode::FlagObjectPointer;
+  }
 
   // Note: Older versions of clang used to emit byval references with an extra
   // DW_OP_deref, because they referenced the IR arg directly instead of
@@ -3480,13 +3490,13 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
     if (VD->hasAttr()) {
       // Here, we need an offset *into* the alloca.
       CharUnits offset = CharUnits::fromQuantity(32);
-      Expr.push_back(llvm::dwarf::DW_OP_plus);
+      Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
       // offset of __forwarding field
       offset = CGM.getContext().toCharUnitsFromBits(
           CGM.getTarget().getPointerWidth(0));
       Expr.push_back(offset.getQuantity());
       Expr.push_back(llvm::dwarf::DW_OP_deref);
-      Expr.push_back(llvm::dwarf::DW_OP_plus);
+      Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
       // offset of x field
       offset = CGM.getContext().toCharUnitsFromBits(XOffset);
       Expr.push_back(offset.getQuantity());
@@ -3579,8 +3589,9 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
 
   // Self is passed along as an implicit non-arg variable in a
   // block. Mark it as the object pointer.
-  if (isa(VD) && VD->getName() == "self")
-    Ty = CreateSelfType(VD->getType(), Ty);
+  if (const auto *IPD = dyn_cast(VD))
+    if (IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
+      Ty = CreateSelfType(VD->getType(), Ty);
 
   // Get location information.
   unsigned Line = getLineNumber(VD->getLocation());
@@ -3594,17 +3605,17 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
 
   SmallVector addr;
   addr.push_back(llvm::dwarf::DW_OP_deref);
-  addr.push_back(llvm::dwarf::DW_OP_plus);
+  addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
   addr.push_back(offset.getQuantity());
   if (isByRef) {
     addr.push_back(llvm::dwarf::DW_OP_deref);
-    addr.push_back(llvm::dwarf::DW_OP_plus);
+    addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
     // offset of __forwarding field
     offset =
         CGM.getContext().toCharUnitsFromBits(target.getPointerSizeInBits(0));
     addr.push_back(offset.getQuantity());
     addr.push_back(llvm::dwarf::DW_OP_deref);
-    addr.push_back(llvm::dwarf::DW_OP_plus);
+    addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
     // offset of x field
     offset = CGM.getContext().toCharUnitsFromBits(XOffset);
     addr.push_back(offset.getQuantity());
@@ -3959,10 +3970,10 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
   const NamespaceDecl *NSDecl = UD.getNominatedNamespace();
   if (!NSDecl->isAnonymousNamespace() ||
       CGM.getCodeGenOpts().DebugExplicitImport) {
+    auto Loc = UD.getLocation();
     DBuilder.createImportedModule(
         getCurrentContextDescriptor(cast(UD.getDeclContext())),
-        getOrCreateNamespace(NSDecl),
-        getLineNumber(UD.getLocation()));
+        getOrCreateNamespace(NSDecl), getOrCreateFile(Loc), getLineNumber(Loc));
   }
 }
 
@@ -3985,10 +3996,12 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
       if (AT->getDeducedType().isNull())
         return;
   if (llvm::DINode *Target =
-          getDeclarationOrDefinition(USD.getUnderlyingDecl()))
+          getDeclarationOrDefinition(USD.getUnderlyingDecl())) {
+    auto Loc = USD.getLocation();
     DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast(USD.getDeclContext())), Target,
-        getLineNumber(USD.getLocation()));
+        getOrCreateFile(Loc), getLineNumber(Loc));
+  }
 }
 
 void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
@@ -3996,10 +4009,11 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
     return;
   if (Module *M = ID.getImportedModule()) {
     auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
+    auto Loc = ID.getLocation();
     DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast(ID.getDeclContext())),
-        getOrCreateModuleRef(Info, DebugTypeExtRefs),
-        getLineNumber(ID.getLocation()));
+        getOrCreateModuleRef(Info, DebugTypeExtRefs), getOrCreateFile(Loc),
+        getLineNumber(Loc));
   }
 }
 
@@ -4011,18 +4025,19 @@ CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
   if (VH)
     return cast(VH);
   llvm::DIImportedEntity *R;
+  auto Loc = NA.getLocation();
   if (const auto *Underlying =
           dyn_cast(NA.getAliasedNamespace()))
     // This could cache & dedup here rather than relying on metadata deduping.
     R = DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast(NA.getDeclContext())),
-        EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()),
-        NA.getName());
+        EmitNamespaceAlias(*Underlying), getOrCreateFile(Loc),
+        getLineNumber(Loc), NA.getName());
   else
     R = DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast(NA.getDeclContext())),
         getOrCreateNamespace(cast(NA.getAliasedNamespace())),
-        getLineNumber(NA.getLocation()), NA.getName());
+        getOrCreateFile(Loc), getLineNumber(Loc), NA.getName());
   VH.reset(R);
   return R;
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.h
index 7de48f2789945..39249c7cf4da2 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDebugInfo.h
@@ -367,7 +367,7 @@ class CGDebugInfo {
   void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType);
 
   /// Constructs the debug code for exiting a function.
-  void EmitFunctionEnd(CGBuilderTy &Builder);
+  void EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn);
 
   /// Emit metadata to indicate the beginning of a new lexical block
   /// and push the block onto the stack.
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDecl.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDecl.cpp
index 10a0b46d9028a..23517867437c7 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDecl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDecl.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenFunction.h"
 #include "CGBlocks.h"
 #include "CGCXXABI.h"
 #include "CGCleanup.h"
 #include "CGDebugInfo.h"
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
 #include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Decl.h"
@@ -152,7 +153,14 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
 /// EmitVarDecl - This method handles emission of any variable declaration
 /// inside a function, including static vars etc.
 void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
-  if (D.isStaticLocal()) {
+  if (D.hasExternalStorage())
+    // Don't emit it now, allow it to be emitted lazily on its first use.
+    return;
+
+  // Some function-scope variable does not have static storage but still
+  // needs to be emitted like a static variable, e.g. a function-scope
+  // variable in constant address space in OpenCL.
+  if (D.getStorageDuration() != SD_Automatic) {
     llvm::GlobalValue::LinkageTypes Linkage =
         CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false);
 
@@ -163,10 +171,6 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
     return EmitStaticVarDecl(D, Linkage);
   }
 
-  if (D.hasExternalStorage())
-    // Don't emit it now, allow it to be emitted lazily on its first use.
-    return;
-
   if (D.getType().getAddressSpace() == LangAS::opencl_local)
     return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D);
 
@@ -217,8 +221,8 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
     Name = getStaticDeclName(*this, D);
 
   llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty);
-  unsigned AddrSpace =
-      GetGlobalVarAddressSpace(&D, getContext().getTargetAddressSpace(Ty));
+  unsigned AS = GetGlobalVarAddressSpace(&D);
+  unsigned TargetAS = getContext().getTargetAddressSpace(AS);
 
   // Local address space cannot have an initializer.
   llvm::Constant *Init = nullptr;
@@ -227,12 +231,9 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
   else
     Init = llvm::UndefValue::get(LTy);
 
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(), LTy,
-                             Ty.isConstant(getContext()), Linkage,
-                             Init, Name, nullptr,
-                             llvm::GlobalVariable::NotThreadLocal,
-                             AddrSpace);
+  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+      getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name,
+      nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
   GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
   setGlobalVisibility(GV, &D);
 
@@ -250,11 +251,12 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
   }
 
   // Make sure the result is of the correct type.
-  unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(Ty);
+  unsigned ExpectedAS = Ty.getAddressSpace();
   llvm::Constant *Addr = GV;
-  if (AddrSpace != ExpectedAddrSpace) {
-    llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace);
-    Addr = llvm::ConstantExpr::getAddrSpaceCast(GV, PTy);
+  if (AS != ExpectedAS) {
+    Addr = getTargetCodeGenInfo().performAddrSpaceCast(
+        *this, GV, AS, ExpectedAS,
+        LTy->getPointerTo(getContext().getTargetAddressSpace(ExpectedAS)));
   }
 
   setStaticLocalDeclAddress(&D, Addr);
@@ -402,6 +404,13 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   if (D.hasAttr())
     CGM.AddGlobalAnnotations(&D, var);
 
+  if (auto *SA = D.getAttr())
+    var->addAttribute("bss-section", SA->getName());
+  if (auto *SA = D.getAttr())
+    var->addAttribute("data-section", SA->getName());
+  if (auto *SA = D.getAttr())
+    var->addAttribute("rodata-section", SA->getName());
+
   if (const SectionAttr *SA = D.getAttr())
     var->setSection(SA->getName());
 
@@ -943,6 +952,7 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
 CodeGenFunction::AutoVarEmission
 CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   QualType Ty = D.getType();
+  assert(Ty.getAddressSpace() == LangAS::Default);
 
   AutoVarEmission emission(D);
 
@@ -1035,8 +1045,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
       // Create the alloca.  Note that we set the name separately from
       // building the instruction so that it's there even in no-asserts
       // builds.
-      address = CreateTempAlloca(allocaTy, allocaAlignment);
-      address.getPointer()->setName(D.getName());
+      address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName());
 
       // Don't emit lifetime markers for MSVC catch parameters. The lifetime of
       // the catch parameter starts in the catchpad instruction, and we can't
@@ -1096,10 +1105,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
     llvm::Type *llvmTy = ConvertTypeForMem(elementType);
 
     // Allocate memory for the array.
-    llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
-    vla->setAlignment(alignment.getQuantity());
-
-    address = Address(vla, alignment);
+    address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
   }
 
   setAddrOfLocalVar(&D, address);
@@ -1852,6 +1858,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
         lt = Qualifiers::OCL_ExplicitNone;
       }
 
+      // Load objects passed indirectly.
+      if (Arg.isIndirect() && !ArgVal)
+        ArgVal = Builder.CreateLoad(DeclPtr);
+
       if (lt == Qualifiers::OCL_Strong) {
         if (!isConsumed) {
           if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDeclCXX.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDeclCXX.cpp
index 00e833c5e8312..fe387b9f4b97d 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -609,9 +609,9 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
     Address addr, QualType type, Destroyer *destroyer,
     bool useEHCleanupForArray, const VarDecl *VD) {
   FunctionArgList args;
-  ImplicitParamDecl dst(getContext(), nullptr, SourceLocation(), nullptr,
-                        getContext().VoidPtrTy);
-  args.push_back(&dst);
+  ImplicitParamDecl Dst(getContext(), getContext().VoidPtrTy,
+                        ImplicitParamDecl::Other);
+  args.push_back(&Dst);
 
   const CGFunctionInfo &FI =
     CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args);
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGException.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGException.cpp
index e65fa863fe31d..40ae0921098cb 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGException.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGException.cpp
@@ -1649,18 +1649,19 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
     // parameters. Win32 filters take no parameters.
     if (IsFilter) {
       Args.push_back(ImplicitParamDecl::Create(
-          getContext(), nullptr, StartLoc,
+          getContext(), /*DC=*/nullptr, StartLoc,
           &getContext().Idents.get("exception_pointers"),
-          getContext().VoidPtrTy));
+          getContext().VoidPtrTy, ImplicitParamDecl::Other));
     } else {
       Args.push_back(ImplicitParamDecl::Create(
-          getContext(), nullptr, StartLoc,
+          getContext(), /*DC=*/nullptr, StartLoc,
           &getContext().Idents.get("abnormal_termination"),
-          getContext().UnsignedCharTy));
+          getContext().UnsignedCharTy, ImplicitParamDecl::Other));
     }
     Args.push_back(ImplicitParamDecl::Create(
-        getContext(), nullptr, StartLoc,
-        &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy));
+        getContext(), /*DC=*/nullptr, StartLoc,
+        &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy,
+        ImplicitParamDecl::Other));
   }
 
   QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy;
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExpr.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExpr.cpp
index cef6292c0e4d9..9572bd3543bd7 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExpr.cpp
@@ -61,18 +61,39 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
 Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
-                                          const Twine &Name) {
-  auto Alloca = CreateTempAlloca(Ty, Name);
+                                          const Twine &Name,
+                                          llvm::Value *ArraySize,
+                                          bool CastToDefaultAddrSpace) {
+  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
   Alloca->setAlignment(Align.getQuantity());
-  return Address(Alloca, Align);
-}
-
-/// CreateTempAlloca - This creates a alloca and inserts it into the entry
-/// block.
+  llvm::Value *V = Alloca;
+  // Alloca always returns a pointer in alloca address space, which may
+  // be different from the type defined by the language. For example,
+  // in C++ the auto variables are in the default address space. Therefore
+  // cast alloca to the default address space when necessary.
+  if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) {
+    auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
+    auto CurIP = Builder.saveIP();
+    Builder.SetInsertPoint(AllocaInsertPt);
+    V = getTargetHooks().performAddrSpaceCast(
+        *this, V, getASTAllocaAddressSpace(), LangAS::Default,
+        Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
+    Builder.restoreIP(CurIP);
+  }
+
+  return Address(V, Align);
+}
+
+/// CreateTempAlloca - This creates an alloca and inserts it into the entry
+/// block if \p ArraySize is nullptr, otherwise inserts it at the current
+/// insertion point of the builder.
 llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
-                                                    const Twine &Name) {
+                                                    const Twine &Name,
+                                                    llvm::Value *ArraySize) {
+  if (ArraySize)
+    return Builder.CreateAlloca(Ty, ArraySize, Name);
   return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(),
-                              nullptr, Name, AllocaInsertPt);
+                              ArraySize, Name, AllocaInsertPt);
 }
 
 /// CreateDefaultAlignTempAlloca - This creates an alloca with the
@@ -99,14 +120,18 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
   return CreateTempAlloca(ConvertType(Ty), Align, Name);
 }
 
-Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) {
+Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
+                                       bool CastToDefaultAddrSpace) {
   // FIXME: Should we prefer the preferred type alignment here?
-  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name);
+  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name,
+                       CastToDefaultAddrSpace);
 }
 
 Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
-                                       const Twine &Name) {
-  return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name);
+                                       const Twine &Name,
+                                       bool CastToDefaultAddrSpace) {
+  return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr,
+                          CastToDefaultAddrSpace);
 }
 
 /// EvaluateExprAsBool - Perform the usual unary conversions on the specified
@@ -316,9 +341,10 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
   }
 }
 
-static Address
-createReferenceTemporary(CodeGenFunction &CGF,
-                         const MaterializeTemporaryExpr *M, const Expr *Inner) {
+static Address createReferenceTemporary(CodeGenFunction &CGF,
+                                        const MaterializeTemporaryExpr *M,
+                                        const Expr *Inner) {
+  auto &TCG = CGF.getTargetHooks();
   switch (M->getStorageDuration()) {
   case SD_FullExpression:
   case SD_Automatic: {
@@ -331,13 +357,24 @@ createReferenceTemporary(CodeGenFunction &CGF,
         (Ty->isArrayType() || Ty->isRecordType()) &&
         CGF.CGM.isTypeConstant(Ty, true))
       if (llvm::Constant *Init = CGF.CGM.EmitConstantExpr(Inner, Ty, &CGF)) {
-        auto *GV = new llvm::GlobalVariable(
-            CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
-            llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp");
-        CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty);
-        GV->setAlignment(alignment.getQuantity());
-        // FIXME: Should we put the new global into a COMDAT?
-        return Address(GV, alignment);
+        if (auto AddrSpace = CGF.getTarget().getConstantAddressSpace()) {
+          auto AS = AddrSpace.getValue();
+          auto *GV = new llvm::GlobalVariable(
+              CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+              llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp", nullptr,
+              llvm::GlobalValue::NotThreadLocal,
+              CGF.getContext().getTargetAddressSpace(AS));
+          CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty);
+          GV->setAlignment(alignment.getQuantity());
+          llvm::Constant *C = GV;
+          if (AS != LangAS::Default)
+            C = TCG.performAddrSpaceCast(
+                CGF.CGM, GV, AS, LangAS::Default,
+                GV->getValueType()->getPointerTo(
+                    CGF.getContext().getTargetAddressSpace(LangAS::Default)));
+          // FIXME: Should we put the new global into a COMDAT?
+          return Address(C, alignment);
+        }
       }
     return CGF.CreateMemTemp(Ty, "ref.tmp");
   }
@@ -374,12 +411,14 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
       // dynamic initialization or a cleanup and we can just return the address
       // of the temporary.
       if (Var->hasInitializer())
-        return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
+        return MakeAddrLValue(Object, M->getType(),
+                              LValueBaseInfo(AlignmentSource::Decl, false));
 
       Var->setInitializer(CGM.EmitNullConstant(E->getType()));
     }
     LValue RefTempDst = MakeAddrLValue(Object, M->getType(),
-                                       AlignmentSource::Decl);
+                                       LValueBaseInfo(AlignmentSource::Decl,
+                                                      false));
 
     switch (getEvaluationKind(E->getType())) {
     default: llvm_unreachable("expected scalar or aggregate expression");
@@ -416,9 +455,11 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
 
   // Create and initialize the reference temporary.
   Address Object = createReferenceTemporary(*this, M, E);
-  if (auto *Var = dyn_cast(Object.getPointer())) {
+  if (auto *Var = dyn_cast(
+          Object.getPointer()->stripPointerCasts())) {
     Object = Address(llvm::ConstantExpr::getBitCast(
-        Var, ConvertTypeForMem(E->getType())->getPointerTo()),
+                         cast(Object.getPointer()),
+                         ConvertTypeForMem(E->getType())->getPointerTo()),
                      Object.getAlignment());
     // If the temporary is a global and has a constant initializer or is a
     // constant temporary that we promoted to a global, we may have already
@@ -465,7 +506,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
 
     case SubobjectAdjustment::FieldAdjustment: {
       LValue LV = MakeAddrLValue(Object, E->getType(),
-                                 AlignmentSource::Decl);
+                                 LValueBaseInfo(AlignmentSource::Decl, false));
       LV = EmitLValueForField(LV, Adjustment.Field);
       assert(LV.isSimple() &&
              "materialized temporary field is not a simple lvalue");
@@ -482,7 +523,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
     }
   }
 
-  return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
+  return MakeAddrLValue(Object, M->getType(),
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 RValue
@@ -546,6 +588,11 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
   if (Ptr->getType()->getPointerAddressSpace())
     return;
 
+  // Don't check pointers to volatile data. The behavior here is implementation-
+  // defined.
+  if (Ty.isVolatileQualified())
+    return;
+
   SanitizerScope SanScope(this);
 
   SmallVector, 3> Checks;
@@ -847,7 +894,7 @@ void CodeGenModule::EmitExplicitCastExprType(const ExplicitCastExpr *E,
 /// EmitPointerWithAlignment - Given an expression of pointer type, try to
 /// derive a more accurate bound on the alignment of the pointer.
 Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
-                                                  AlignmentSource  *Source) {
+                                                  LValueBaseInfo *BaseInfo) {
   // We allow this with ObjC object pointers because of fragile ABIs.
   assert(E->getType()->isPointerType() ||
          E->getType()->isObjCObjectPointerType());
@@ -866,16 +913,20 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
         if (PtrTy->getPointeeType()->isVoidType())
           break;
 
-        AlignmentSource InnerSource;
-        Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerSource);
-        if (Source) *Source = InnerSource;
+        LValueBaseInfo InnerInfo;
+        Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerInfo);
+        if (BaseInfo) *BaseInfo = InnerInfo;
 
         // If this is an explicit bitcast, and the source l-value is
         // opaque, honor the alignment of the casted-to type.
         if (isa(CE) &&
-            InnerSource != AlignmentSource::Decl) {
-          Addr = Address(Addr.getPointer(),
-                         getNaturalPointeeTypeAlignment(E->getType(), Source));
+            InnerInfo.getAlignmentSource() != AlignmentSource::Decl) {
+          LValueBaseInfo ExpInfo;
+          CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(),
+                                                           &ExpInfo);
+          if (BaseInfo)
+            BaseInfo->mergeForCast(ExpInfo);
+          Addr = Address(Addr.getPointer(), Align);
         }
 
         if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) &&
@@ -893,12 +944,12 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
 
     // Array-to-pointer decay.
     case CK_ArrayToPointerDecay:
-      return EmitArrayToPointerDecay(CE->getSubExpr(), Source);
+      return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo);
 
     // Derived-to-base conversions.
     case CK_UncheckedDerivedToBase:
     case CK_DerivedToBase: {
-      Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), Source);
+      Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo);
       auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
       return GetAddressOfBaseClass(Addr, Derived,
                                    CE->path_begin(), CE->path_end(),
@@ -917,7 +968,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
   if (const UnaryOperator *UO = dyn_cast(E)) {
     if (UO->getOpcode() == UO_AddrOf) {
       LValue LV = EmitLValue(UO->getSubExpr());
-      if (Source) *Source = LV.getAlignmentSource();
+      if (BaseInfo) *BaseInfo = LV.getBaseInfo();
       return LV.getAddress();
     }
   }
@@ -925,7 +976,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
   // TODO: conditional operators, comma.
 
   // Otherwise, use the alignment of the type.
-  CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), Source);
+  CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo);
   return Address(EmitScalarExpr(E), Align);
 }
 
@@ -1094,7 +1145,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
       llvm::Value *V = LV.getPointer();
       Scope.ForceCleanup({&V});
       return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(),
-                              getContext(), LV.getAlignmentSource(),
+                              getContext(), LV.getBaseInfo(),
                               LV.getTBAAInfo());
     }
     // FIXME: Is it possible to create an ExprWithCleanups that produces a
@@ -1151,6 +1202,11 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
 
   case Expr::MaterializeTemporaryExprClass:
     return EmitMaterializeTemporaryExpr(cast(E));
+
+  case Expr::CoawaitExprClass:
+    return EmitCoawaitLValue(cast(E));
+  case Expr::CoyieldExprClass:
+    return EmitCoyieldLValue(cast(E));
   }
 }
 
@@ -1269,7 +1325,7 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) {
 llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
                                                SourceLocation Loc) {
   return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
-                          lvalue.getType(), Loc, lvalue.getAlignmentSource(),
+                          lvalue.getType(), Loc, lvalue.getBaseInfo(),
                           lvalue.getTBAAInfo(),
                           lvalue.getTBAABaseType(), lvalue.getTBAAOffset(),
                           lvalue.isNontemporal());
@@ -1381,7 +1437,7 @@ bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty,
 llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
                                                QualType Ty,
                                                SourceLocation Loc,
-                                               AlignmentSource AlignSource,
+                                               LValueBaseInfo BaseInfo,
                                                llvm::MDNode *TBAAInfo,
                                                QualType TBAABaseType,
                                                uint64_t TBAAOffset,
@@ -1413,7 +1469,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
 
   // Atomic operations have to be done on integral types.
   LValue AtomicLValue =
-      LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
+      LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo);
   if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) {
     return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
@@ -1425,11 +1481,12 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
     Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   }
   if (TBAAInfo) {
-    llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
-                                                      TBAAOffset);
-    if (TBAAPath)
-      CGM.DecorateInstructionWithTBAA(Load, TBAAPath,
-                                      false /*ConvertTypeToTag*/);
+    bool MayAlias = BaseInfo.getMayAlias();
+    llvm::MDNode *TBAA = MayAlias
+        ? CGM.getTBAAInfo(getContext().CharTy)
+        : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset);
+    if (TBAA)
+      CGM.DecorateInstructionWithTBAA(Load, TBAA, MayAlias);
   }
 
   if (EmitScalarRangeCheck(Load, Ty, Loc)) {
@@ -1469,7 +1526,7 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
 
 void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
                                         bool Volatile, QualType Ty,
-                                        AlignmentSource AlignSource,
+                                        LValueBaseInfo BaseInfo,
                                         llvm::MDNode *TBAAInfo,
                                         bool isInit, QualType TBAABaseType,
                                         uint64_t TBAAOffset,
@@ -1479,9 +1536,9 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
     // Handle vectors differently to get better performance.
     if (Ty->isVectorType()) {
       llvm::Type *SrcTy = Value->getType();
-      auto *VecTy = cast(SrcTy);
+      auto *VecTy = dyn_cast(SrcTy);
       // Handle vec3 special.
-      if (VecTy->getNumElements() == 3) {
+      if (VecTy && VecTy->getNumElements() == 3) {
         // Our source is a vec3, do a shuffle vector to make it a vec4.
         llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
                                   Builder.getInt32(2),
@@ -1500,7 +1557,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
   Value = EmitToMemory(Value, Ty);
 
   LValue AtomicLValue =
-      LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
+      LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo);
   if (Ty->isAtomicType() ||
       (!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) {
     EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit);
@@ -1515,18 +1572,19 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
     Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   }
   if (TBAAInfo) {
-    llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
-                                                      TBAAOffset);
-    if (TBAAPath)
-      CGM.DecorateInstructionWithTBAA(Store, TBAAPath,
-                                      false /*ConvertTypeToTag*/);
+    bool MayAlias = BaseInfo.getMayAlias();
+    llvm::MDNode *TBAA = MayAlias
+        ? CGM.getTBAAInfo(getContext().CharTy)
+        : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset);
+    if (TBAA)
+      CGM.DecorateInstructionWithTBAA(Store, TBAA, MayAlias);
   }
 }
 
 void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
                                         bool isInit) {
   EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
-                    lvalue.getType(), lvalue.getAlignmentSource(),
+                    lvalue.getType(), lvalue.getBaseInfo(),
                     lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
                     lvalue.getTBAAOffset(), lvalue.isNontemporal());
 }
@@ -2058,38 +2116,39 @@ static LValue EmitThreadPrivateVarDeclLValue(
     llvm::Type *RealVarTy, SourceLocation Loc) {
   Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc);
   Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy);
-  return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
+  LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+  return CGF.MakeAddrLValue(Addr, T, BaseInfo);
 }
 
 Address CodeGenFunction::EmitLoadOfReference(Address Addr,
                                              const ReferenceType *RefTy,
-                                             AlignmentSource *Source) {
+                                             LValueBaseInfo *BaseInfo) {
   llvm::Value *Ptr = Builder.CreateLoad(Addr);
   return Address(Ptr, getNaturalTypeAlignment(RefTy->getPointeeType(),
-                                              Source, /*forPointee*/ true));
-  
+                                              BaseInfo, /*forPointee*/ true));
 }
 
 LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr,
                                                   const ReferenceType *RefTy) {
-  AlignmentSource Source;
-  Address Addr = EmitLoadOfReference(RefAddr, RefTy, &Source);
-  return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source);
+  LValueBaseInfo BaseInfo;
+  Address Addr = EmitLoadOfReference(RefAddr, RefTy, &BaseInfo);
+  return MakeAddrLValue(Addr, RefTy->getPointeeType(), BaseInfo);
 }
 
 Address CodeGenFunction::EmitLoadOfPointer(Address Ptr,
                                            const PointerType *PtrTy,
-                                           AlignmentSource *Source) {
+                                           LValueBaseInfo *BaseInfo) {
   llvm::Value *Addr = Builder.CreateLoad(Ptr);
-  return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source,
+  return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(),
+                                               BaseInfo,
                                                /*forPointeeType=*/true));
 }
 
 LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr,
                                                 const PointerType *PtrTy) {
-  AlignmentSource Source;
-  Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source);
-  return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source);
+  LValueBaseInfo BaseInfo;
+  Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &BaseInfo);
+  return MakeAddrLValue(Addr, PtrTy->getPointeeType(), BaseInfo);
 }
 
 static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
@@ -2115,7 +2174,8 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
   if (auto RefTy = VD->getType()->getAs()) {
     LV = CGF.EmitLoadOfReferenceLValue(Addr, RefTy);
   } else {
-    LV = CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
+    LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+    LV = CGF.MakeAddrLValue(Addr, T, BaseInfo);
   }
   setObjCGCLValueClass(CGF.getContext(), E, LV);
   return LV;
@@ -2149,7 +2209,8 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF,
                                      const Expr *E, const FunctionDecl *FD) {
   llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, FD);
   CharUnits Alignment = CGF.getContext().getDeclAlign(FD);
-  return CGF.MakeAddrLValue(V, E->getType(), Alignment, AlignmentSource::Decl);
+  LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+  return CGF.MakeAddrLValue(V, E->getType(), Alignment, BaseInfo);
 }
 
 static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD,
@@ -2214,8 +2275,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       // Should we be using the alignment of the constant pointer we emitted?
       CharUnits Alignment = getNaturalTypeAlignment(E->getType(), nullptr,
                                                     /*pointee*/ true);
-
-      return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl);
+      LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+      return MakeAddrLValue(Address(Val, Alignment), T, BaseInfo);
     }
 
     // Check for captured variables.
@@ -2232,14 +2293,16 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
         LValue CapLVal =
             EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
                                     CapturedStmtInfo->getContextValue());
+        bool MayAlias = CapLVal.getBaseInfo().getMayAlias();
         return MakeAddrLValue(
             Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)),
-            CapLVal.getType(), AlignmentSource::Decl);
+            CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl, MayAlias));
       }
 
       assert(isa(CurCodeDecl));
       Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr());
-      return MakeAddrLValue(addr, T, AlignmentSource::Decl);
+      LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+      return MakeAddrLValue(addr, T, BaseInfo);
     }
   }
 
@@ -2253,7 +2316,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   if (ND->hasAttr()) {
     const auto *VD = cast(ND);
     ConstantAddress Aliasee = CGM.GetWeakRefReference(VD);
-    return MakeAddrLValue(Aliasee, T, AlignmentSource::Decl);
+    return MakeAddrLValue(Aliasee, T,
+                          LValueBaseInfo(AlignmentSource::Decl, false));
   }
 
   if (const auto *VD = dyn_cast(ND)) {
@@ -2299,7 +2363,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
     if (auto RefTy = VD->getType()->getAs()) {
       LV = EmitLoadOfReferenceLValue(addr, RefTy);
     } else {
-      LV = MakeAddrLValue(addr, T, AlignmentSource::Decl);
+      LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+      LV = MakeAddrLValue(addr, T, BaseInfo);
     }
 
     bool isLocalStorage = VD->hasLocalStorage();
@@ -2344,9 +2409,9 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
     QualType T = E->getSubExpr()->getType()->getPointeeType();
     assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
 
-    AlignmentSource AlignSource;
-    Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &AlignSource);
-    LValue LV = MakeAddrLValue(Addr, T, AlignSource);
+    LValueBaseInfo BaseInfo;
+    Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo);
+    LValue LV = MakeAddrLValue(Addr, T, BaseInfo);
     LV.getQuals().setAddressSpace(ExprTy.getAddressSpace());
 
     // We should not generate __weak write barrier on indirect reference
@@ -2378,7 +2443,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
       (E->getOpcode() == UO_Real
          ? emitAddrOfRealComponent(LV.getAddress(), LV.getType())
          : emitAddrOfImagComponent(LV.getAddress(), LV.getType()));
-    LValue ElemLV = MakeAddrLValue(Component, T, LV.getAlignmentSource());
+    LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo());
     ElemLV.getQuals().addQualifiers(LV.getQuals());
     return ElemLV;
   }
@@ -2398,12 +2463,14 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
 
 LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) {
   return MakeAddrLValue(CGM.GetAddrOfConstantStringFromLiteral(E),
-                        E->getType(), AlignmentSource::Decl);
+                        E->getType(),
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 LValue CodeGenFunction::EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E) {
   return MakeAddrLValue(CGM.GetAddrOfConstantStringFromObjCEncode(E),
-                        E->getType(), AlignmentSource::Decl);
+                        E->getType(),
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
@@ -2415,6 +2482,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
   StringRef NameItems[] = {
       PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
   std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
+  LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
   if (auto *BD = dyn_cast(CurCodeDecl)) {
     std::string Name = SL->getString();
     if (!Name.empty()) {
@@ -2423,14 +2491,14 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
       if (Discriminator)
         Name += "_" + Twine(Discriminator + 1).str();
       auto C = CGM.GetAddrOfConstantCString(Name, GVName.c_str());
-      return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
+      return MakeAddrLValue(C, E->getType(), BaseInfo);
     } else {
       auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str());
-      return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
+      return MakeAddrLValue(C, E->getType(), BaseInfo);
     }
   }
   auto C = CGM.GetAddrOfConstantStringFromLiteral(SL, GVName);
-  return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
+  return MakeAddrLValue(C, E->getType(), BaseInfo);
 }
 
 /// Emit a type description suitable for use by a runtime sanitizer library. The
@@ -2828,10 +2896,10 @@ void CodeGenFunction::EmitCfiCheckStub() {
 void CodeGenFunction::EmitCfiCheckFail() {
   SanitizerScope SanScope(this);
   FunctionArgList Args;
-  ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr,
-                            getContext().VoidPtrTy);
-  ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr,
-                            getContext().VoidPtrTy);
+  ImplicitParamDecl ArgData(getContext(), getContext().VoidPtrTy,
+                            ImplicitParamDecl::Other);
+  ImplicitParamDecl ArgAddr(getContext(), getContext().VoidPtrTy,
+                            ImplicitParamDecl::Other);
   Args.push_back(&ArgData);
   Args.push_back(&ArgAddr);
 
@@ -2937,14 +3005,14 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) {
 }
 
 Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E,
-                                                 AlignmentSource *AlignSource) {
+                                                 LValueBaseInfo *BaseInfo) {
   assert(E->getType()->isArrayType() &&
          "Array to pointer decay must have array source type!");
 
   // Expressions of array type can't be bitfields or vector elements.
   LValue LV = EmitLValue(E);
   Address Addr = LV.getAddress();
-  if (AlignSource) *AlignSource = LV.getAlignmentSource();
+  if (BaseInfo) *BaseInfo = LV.getBaseInfo();
 
   // If the array type was an incomplete type, we need to make sure
   // the decay ends up being the right type.
@@ -2983,9 +3051,13 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF,
                                           llvm::Value *ptr,
                                           ArrayRef indices,
                                           bool inbounds,
+                                          bool signedIndices,
+                                          SourceLocation loc,
                                     const llvm::Twine &name = "arrayidx") {
   if (inbounds) {
-    return CGF.Builder.CreateInBoundsGEP(ptr, indices, name);
+    return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices,
+                                      CodeGenFunction::NotSubtraction, loc,
+                                      name);
   } else {
     return CGF.Builder.CreateGEP(ptr, indices, name);
   }
@@ -3016,8 +3088,9 @@ static QualType getFixedSizeElementType(const ASTContext &ctx,
 }
 
 static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
-                                     ArrayRef indices,
+                                     ArrayRef indices,
                                      QualType eltType, bool inbounds,
+                                     bool signedIndices, SourceLocation loc,
                                      const llvm::Twine &name = "arrayidx") {
   // All the indices except that last must be zero.
 #ifndef NDEBUG
@@ -3037,8 +3110,8 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
   CharUnits eltAlign =
     getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
 
-  llvm::Value *eltPtr =
-    emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, name);
+  llvm::Value *eltPtr = emitArraySubscriptGEP(
+      CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name);
   return Address(eltPtr, eltAlign);
 }
 
@@ -3048,6 +3121,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
   // in lexical order (this complexity is, sadly, required by C++17).
   llvm::Value *IdxPre =
       (E->getLHS() == E->getIdx()) ? EmitScalarExpr(E->getIdx()) : nullptr;
+  bool SignedIndices = false;
   auto EmitIdxAfterBase = [&, IdxPre](bool Promote) -> llvm::Value * {
     auto *Idx = IdxPre;
     if (E->getLHS() != E->getIdx()) {
@@ -3057,6 +3131,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
 
     QualType IdxTy = E->getIdx()->getType();
     bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
+    SignedIndices |= IdxSigned;
 
     if (SanOpts.has(SanitizerKind::ArrayBounds))
       EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed);
@@ -3079,7 +3154,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     assert(LHS.isSimple() && "Can only subscript lvalue vectors here!");
     return LValue::MakeVectorElt(LHS.getAddress(), Idx,
                                  E->getBase()->getType(),
-                                 LHS.getAlignmentSource());
+                                 LHS.getBaseInfo());
   }
 
   // All the other cases basically behave like simple offsetting.
@@ -3091,18 +3166,19 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     Address Addr = EmitExtVectorElementLValue(LV);
 
     QualType EltType = LV.getType()->castAs()->getElementType();
-    Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true);
-    return MakeAddrLValue(Addr, EltType, LV.getAlignmentSource());
+    Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true,
+                                 SignedIndices, E->getExprLoc());
+    return MakeAddrLValue(Addr, EltType, LV.getBaseInfo());
   }
 
-  AlignmentSource AlignSource;
+  LValueBaseInfo BaseInfo;
   Address Addr = Address::invalid();
   if (const VariableArrayType *vla =
            getContext().getAsVariableArrayType(E->getType())) {
     // The base must be a pointer, which is not an aggregate.  Emit
     // it.  It needs to be emitted first in case it's what captures
     // the VLA bounds.
-    Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+    Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
     // The element count here is the total number of non-VLA elements.
@@ -3119,13 +3195,14 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     }
 
     Addr = emitArraySubscriptGEP(*this, Addr, Idx, vla->getElementType(),
-                                 !getLangOpts().isSignedOverflowDefined());
+                                 !getLangOpts().isSignedOverflowDefined(),
+                                 SignedIndices, E->getExprLoc());
 
   } else if (const ObjCObjectType *OIT = E->getType()->getAs()){
     // Indexing over an interface, as in "NSString *P; P[4];"
 
     // Emit the base pointer.
-    Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+    Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
     CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT);
@@ -3145,7 +3222,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     CharUnits EltAlign =
       getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize);
     llvm::Value *EltPtr =
-      emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false);
+        emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false,
+                              SignedIndices, E->getExprLoc());
     Addr = Address(EltPtr, EltAlign);
 
     // Cast back.
@@ -3167,20 +3245,21 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
     // Propagate the alignment from the array itself to the result.
-    Addr = emitArraySubscriptGEP(*this, ArrayLV.getAddress(),
-                                 {CGM.getSize(CharUnits::Zero()), Idx},
-                                 E->getType(),
-                                 !getLangOpts().isSignedOverflowDefined());
-    AlignSource = ArrayLV.getAlignmentSource();
+    Addr = emitArraySubscriptGEP(
+        *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
+        E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices,
+        E->getExprLoc());
+    BaseInfo = ArrayLV.getBaseInfo();
   } else {
     // The base must be a pointer; emit it with an estimate of its alignment.
-    Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+    Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
     Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(),
-                                 !getLangOpts().isSignedOverflowDefined());
+                                 !getLangOpts().isSignedOverflowDefined(),
+                                 SignedIndices, E->getExprLoc());
   }
 
-  LValue LV = MakeAddrLValue(Addr, E->getType(), AlignSource);
+  LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo);
 
   // TODO: Preserve/extend path TBAA metadata?
 
@@ -3193,7 +3272,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
 }
 
 static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
-                                       AlignmentSource &AlignSource,
+                                       LValueBaseInfo &BaseInfo,
                                        QualType BaseTy, QualType ElTy,
                                        bool IsLowerBound) {
   LValue BaseLVal;
@@ -3201,7 +3280,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
     BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound);
     if (BaseTy->isArrayType()) {
       Address Addr = BaseLVal.getAddress();
-      AlignSource = BaseLVal.getAlignmentSource();
+      BaseInfo = BaseLVal.getBaseInfo();
 
       // If the array type was an incomplete type, we need to make sure
       // the decay ends up being the right type.
@@ -3220,10 +3299,12 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
       return CGF.Builder.CreateElementBitCast(Addr,
                                               CGF.ConvertTypeForMem(ElTy));
     }
-    CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource);
+    LValueBaseInfo TypeInfo;
+    CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeInfo);
+    BaseInfo.mergeForCast(TypeInfo);
     return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align);
   }
-  return CGF.EmitPointerWithAlignment(Base, &AlignSource);
+  return CGF.EmitPointerWithAlignment(Base, &BaseInfo);
 }
 
 LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
@@ -3327,13 +3408,13 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
   assert(Idx);
 
   Address EltPtr = Address::invalid();
-  AlignmentSource AlignSource;
+  LValueBaseInfo BaseInfo;
   if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) {
     // The base must be a pointer, which is not an aggregate.  Emit
     // it.  It needs to be emitted first in case it's what captures
     // the VLA bounds.
     Address Base =
-        emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy,
+        emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, BaseTy,
                                 VLA->getElementType(), IsLowerBound);
     // The element count here is the total number of non-VLA elements.
     llvm::Value *NumElements = getVLASize(VLA).first;
@@ -3347,7 +3428,8 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
     else
       Idx = Builder.CreateNSWMul(Idx, NumElements);
     EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(),
-                                   !getLangOpts().isSignedOverflowDefined());
+                                   !getLangOpts().isSignedOverflowDefined(),
+                                   /*SignedIndices=*/false, E->getExprLoc());
   } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
     // If this is A[i] where A is an array, the frontend will have decayed the
     // base to be a ArrayToPointerDecay implicit cast.  While correct, it is
@@ -3366,16 +3448,18 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
     // Propagate the alignment from the array itself to the result.
     EltPtr = emitArraySubscriptGEP(
         *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
-        ResultExprTy, !getLangOpts().isSignedOverflowDefined());
-    AlignSource = ArrayLV.getAlignmentSource();
+        ResultExprTy, !getLangOpts().isSignedOverflowDefined(),
+        /*SignedIndices=*/false, E->getExprLoc());
+    BaseInfo = ArrayLV.getBaseInfo();
   } else {
-    Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource,
+    Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo,
                                            BaseTy, ResultExprTy, IsLowerBound);
     EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy,
-                                   !getLangOpts().isSignedOverflowDefined());
+                                   !getLangOpts().isSignedOverflowDefined(),
+                                   /*SignedIndices=*/false, E->getExprLoc());
   }
 
-  return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource);
+  return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo);
 }
 
 LValue CodeGenFunction::
@@ -3387,10 +3471,10 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
   if (E->isArrow()) {
     // If it is a pointer to a vector, emit the address and form an lvalue with
     // it.
-    AlignmentSource AlignSource;
-    Address Ptr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+    LValueBaseInfo BaseInfo;
+    Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
     const PointerType *PT = E->getBase()->getType()->getAs();
-    Base = MakeAddrLValue(Ptr, PT->getPointeeType(), AlignSource);
+    Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo);
     Base.getQuals().removeObjCGCAttr();
   } else if (E->getBase()->isGLValue()) {
     // Otherwise, if the base is an lvalue ( as in the case of foo.x.x),
@@ -3407,7 +3491,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     Address VecMem = CreateMemTemp(E->getBase()->getType());
     Builder.CreateStore(Vec, VecMem);
     Base = MakeAddrLValue(VecMem, E->getBase()->getType(),
-                          AlignmentSource::Decl);
+                          LValueBaseInfo(AlignmentSource::Decl, false));
   }
 
   QualType type =
@@ -3421,7 +3505,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     llvm::Constant *CV =
         llvm::ConstantDataVector::get(getLLVMContext(), Indices);
     return LValue::MakeExtVectorElt(Base.getAddress(), CV, type,
-                                    Base.getAlignmentSource());
+                                    Base.getBaseInfo());
   }
   assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");
 
@@ -3432,17 +3516,16 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     CElts.push_back(BaseElts->getAggregateElement(Indices[i]));
   llvm::Constant *CV = llvm::ConstantVector::get(CElts);
   return LValue::MakeExtVectorElt(Base.getExtVectorAddress(), CV, type,
-                                  Base.getAlignmentSource());
+                                  Base.getBaseInfo());
 }
 
 LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
   Expr *BaseExpr = E->getBase();
-
   // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
   LValue BaseLV;
   if (E->isArrow()) {
-    AlignmentSource AlignSource;
-    Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource);
+    LValueBaseInfo BaseInfo;
+    Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo);
     QualType PtrTy = BaseExpr->getType()->getPointeeType();
     SanitizerSet SkippedChecks;
     bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr);
@@ -3452,7 +3535,7 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
       SkippedChecks.set(SanitizerKind::Null, true);
     EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy,
                   /*Alignment=*/CharUnits::Zero(), SkippedChecks);
-    BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource);
+    BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo);
   } else
     BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess);
 
@@ -3510,10 +3593,36 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
   return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName());
 }
 
+static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
+  const auto *RD = Type.getTypePtr()->getAsCXXRecordDecl();
+  if (!RD)
+    return false;
+
+  if (RD->isDynamicClass())
+    return true;
+
+  for (const auto &Base : RD->bases())
+    if (hasAnyVptr(Base.getType(), Context))
+      return true;
+
+  for (const FieldDecl *Field : RD->fields())
+    if (hasAnyVptr(Field->getType(), Context))
+      return true;
+
+  return false;
+}
+
 LValue CodeGenFunction::EmitLValueForField(LValue base,
                                            const FieldDecl *field) {
+  LValueBaseInfo BaseInfo = base.getBaseInfo();
   AlignmentSource fieldAlignSource =
-    getFieldAlignmentSource(base.getAlignmentSource());
+    getFieldAlignmentSource(BaseInfo.getAlignmentSource());
+  LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias());
+
+  const RecordDecl *rec = field->getParent();
+  if (rec->isUnion() || rec->hasAttr())
+    FieldBaseInfo.setMayAlias(true);
+  bool mayAlias = FieldBaseInfo.getMayAlias();
 
   if (field->isBitField()) {
     const CGRecordLayout &RL =
@@ -3533,14 +3642,10 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
 
     QualType fieldType =
       field->getType().withCVRQualifiers(base.getVRQualifiers());
-    return LValue::MakeBitfield(Addr, Info, fieldType, fieldAlignSource);
+    return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo);
   }
 
-  const RecordDecl *rec = field->getParent();
   QualType type = field->getType();
-
-  bool mayAlias = rec->hasAttr();
-
   Address addr = base.getAddress();
   unsigned cvr = base.getVRQualifiers();
   bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA;
@@ -3549,6 +3654,14 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
     assert(!type->isReferenceType() && "union has reference member");
     // TODO: handle path-aware TBAA for union.
     TBAAPath = false;
+
+    const auto FieldType = field->getType();
+    if (CGM.getCodeGenOpts().StrictVTablePointers &&
+        hasAnyVptr(FieldType, getContext()))
+      // Because unions can easily skip invariant.barriers, we need to add
+      // a barrier every time CXXRecord field with vptr is referenced.
+      addr = Address(Builder.CreateInvariantGroupBarrier(addr.getPointer()),
+                     addr.getAlignment());
   } else {
     // For structs, we GEP to the field that the record layout suggests.
     addr = emitAddrOfFieldStorage(*this, addr, field);
@@ -3574,7 +3687,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
       type = refType->getPointeeType();
 
       CharUnits alignment =
-        getNaturalTypeAlignment(type, &fieldAlignSource, /*pointee*/ true);
+        getNaturalTypeAlignment(type, &FieldBaseInfo, /*pointee*/ true);
+      FieldBaseInfo.setMayAlias(false);
       addr = Address(load, alignment);
 
       // Qualifiers on the struct don't apply to the referencee, and
@@ -3595,7 +3709,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
   if (field->hasAttr())
     addr = EmitFieldAnnotations(field, addr);
 
-  LValue LV = MakeAddrLValue(addr, type, fieldAlignSource);
+  LValue LV = MakeAddrLValue(addr, type, FieldBaseInfo);
   LV.getQuals().addCVRQualifiers(cvr);
   if (TBAAPath) {
     const ASTRecordLayout &Layout =
@@ -3636,14 +3750,18 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base,
   V = Builder.CreateElementBitCast(V, llvmType, Field->getName());
 
   // TODO: access-path TBAA?
-  auto FieldAlignSource = getFieldAlignmentSource(Base.getAlignmentSource());
-  return MakeAddrLValue(V, FieldType, FieldAlignSource);
+  LValueBaseInfo BaseInfo = Base.getBaseInfo();
+  LValueBaseInfo FieldBaseInfo(
+      getFieldAlignmentSource(BaseInfo.getAlignmentSource()),
+      BaseInfo.getMayAlias());
+  return MakeAddrLValue(V, FieldType, FieldBaseInfo);
 }
 
 LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){
+  LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
   if (E->isFileScope()) {
     ConstantAddress GlobalPtr = CGM.GetAddrOfConstantCompoundLiteral(E);
-    return MakeAddrLValue(GlobalPtr, E->getType(), AlignmentSource::Decl);
+    return MakeAddrLValue(GlobalPtr, E->getType(), BaseInfo);
   }
   if (E->getType()->isVariablyModifiedType())
     // make sure to emit the VLA size.
@@ -3651,7 +3769,7 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){
 
   Address DeclPtr = CreateMemTemp(E->getType(), ".compoundliteral");
   const Expr *InitExpr = E->getInitializer();
-  LValue Result = MakeAddrLValue(DeclPtr, E->getType(), AlignmentSource::Decl);
+  LValue Result = MakeAddrLValue(DeclPtr, E->getType(), BaseInfo);
 
   EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(),
                    /*Init*/ true);
@@ -3748,8 +3866,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
     phi->addIncoming(rhs->getPointer(), rhsBlock);
     Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment()));
     AlignmentSource alignSource =
-      std::max(lhs->getAlignmentSource(), rhs->getAlignmentSource());
-    return MakeAddrLValue(result, expr->getType(), alignSource);
+      std::max(lhs->getBaseInfo().getAlignmentSource(),
+               rhs->getBaseInfo().getAlignmentSource());
+    bool MayAlias = lhs->getBaseInfo().getMayAlias() ||
+                    rhs->getBaseInfo().getMayAlias();
+    return MakeAddrLValue(result, expr->getType(),
+                          LValueBaseInfo(alignSource, MayAlias));
   } else {
     assert((lhs || rhs) &&
            "both operands of glvalue conditional are throw-expressions?");
@@ -3847,7 +3969,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
         This, DerivedClassDecl, E->path_begin(), E->path_end(),
         /*NullCheckValue=*/false, E->getExprLoc());
 
-    return MakeAddrLValue(Base, E->getType(), LV.getAlignmentSource());
+    return MakeAddrLValue(Base, E->getType(), LV.getBaseInfo());
   }
   case CK_ToUnion:
     return EmitAggExprToLValue(E);
@@ -3874,7 +3996,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
                                 /*MayBeNull=*/false,
                                 CFITCK_DerivedCast, E->getLocStart());
 
-    return MakeAddrLValue(Derived, E->getType(), LV.getAlignmentSource());
+    return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo());
   }
   case CK_LValueBitCast: {
     // This must be a reinterpret_cast (or c-style equivalent).
@@ -3890,13 +4012,13 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
                                 /*MayBeNull=*/false,
                                 CFITCK_UnrelatedCast, E->getLocStart());
 
-    return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource());
+    return MakeAddrLValue(V, E->getType(), LV.getBaseInfo());
   }
   case CK_ObjCObjectLValueCast: {
     LValue LV = EmitLValue(E->getSubExpr());
     Address V = Builder.CreateElementBitCast(LV.getAddress(),
                                              ConvertType(E->getType()));
-    return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource());
+    return MakeAddrLValue(V, E->getType(), LV.getBaseInfo());
   }
   case CK_ZeroToOCLQueue:
     llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid");
@@ -4085,7 +4207,7 @@ LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) {
 
   if (!RV.isScalar())
     return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
-                          AlignmentSource::Decl);
+                          LValueBaseInfo(AlignmentSource::Decl, false));
 
   assert(E->getCallReturnType(getContext())->isReferenceType() &&
          "Can't have a scalar return unless the return type is a "
@@ -4105,7 +4227,7 @@ LValue CodeGenFunction::EmitCXXConstructLValue(const CXXConstructExpr *E) {
   AggValueSlot Slot = CreateAggTemp(E->getType());
   EmitCXXConstructExpr(E, Slot);
   return MakeAddrLValue(Slot.getAddress(), E->getType(),
-                        AlignmentSource::Decl);
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 LValue
@@ -4120,7 +4242,7 @@ Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) {
 
 LValue CodeGenFunction::EmitCXXUuidofLValue(const CXXUuidofExpr *E) {
   return MakeAddrLValue(EmitCXXUuidofExpr(E), E->getType(),
-                        AlignmentSource::Decl);
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 LValue
@@ -4130,7 +4252,7 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) {
   EmitAggExpr(E->getSubExpr(), Slot);
   EmitCXXTemporary(E->getTemporary(), E->getType(), Slot.getAddress());
   return MakeAddrLValue(Slot.getAddress(), E->getType(),
-                        AlignmentSource::Decl);
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 LValue
@@ -4138,7 +4260,7 @@ CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) {
   AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue");
   EmitLambdaExpr(E, Slot);
   return MakeAddrLValue(Slot.getAddress(), E->getType(),
-                        AlignmentSource::Decl);
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
@@ -4146,7 +4268,7 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
 
   if (!RV.isScalar())
     return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
-                          AlignmentSource::Decl);
+                          LValueBaseInfo(AlignmentSource::Decl, false));
 
   assert(E->getMethodDecl()->getReturnType()->isReferenceType() &&
          "Can't have a scalar return unless the return type is a "
@@ -4158,7 +4280,8 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
 LValue CodeGenFunction::EmitObjCSelectorLValue(const ObjCSelectorExpr *E) {
   Address V =
     CGM.getObjCRuntime().GetAddrOfSelector(*this, E->getSelector());
-  return MakeAddrLValue(V, E->getType(), AlignmentSource::Decl);
+  return MakeAddrLValue(V, E->getType(),
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 llvm::Value *CodeGenFunction::EmitIvarOffset(const ObjCInterfaceDecl *Interface,
@@ -4202,7 +4325,7 @@ LValue CodeGenFunction::EmitStmtExprLValue(const StmtExpr *E) {
   // Can only get l-value for message expression returning aggregate type
   RValue RV = EmitAnyExprToTemp(E);
   return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
-                        AlignmentSource::Decl);
+                        LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee,
@@ -4394,12 +4517,11 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
   const MemberPointerType *MPT
     = E->getRHS()->getType()->getAs();
 
-  AlignmentSource AlignSource;
+  LValueBaseInfo BaseInfo;
   Address MemberAddr =
-    EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT,
-                                    &AlignSource);
+    EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, &BaseInfo);
 
-  return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), AlignSource);
+  return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), BaseInfo);
 }
 
 /// Given the address of a temporary variable, produce an r-value of
@@ -4407,7 +4529,8 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
 RValue CodeGenFunction::convertTempToRValue(Address addr,
                                             QualType type,
                                             SourceLocation loc) {
-  LValue lvalue = MakeAddrLValue(addr, type, AlignmentSource::Decl);
+  LValue lvalue = MakeAddrLValue(addr, type,
+                                 LValueBaseInfo(AlignmentSource::Decl, false));
   switch (getEvaluationKind(type)) {
   case TEK_Complex:
     return RValue::getComplex(EmitLoadOfComplex(lvalue, loc));
@@ -4462,9 +4585,9 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
       if (ov == resultExpr && ov->isRValue() && !forLValue &&
           CodeGenFunction::hasAggregateEvaluationKind(ov->getType())) {
         CGF.EmitAggExpr(ov->getSourceExpr(), slot);
-
+        LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
         LValue LV = CGF.MakeAddrLValue(slot.getAddress(), ov->getType(),
-                                       AlignmentSource::Decl);
+                                       BaseInfo);
         opaqueData = OVMA::bind(CGF, ov, LV);
         result.RV = slot.asRValue();
 
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprCXX.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprCXX.cpp
index d02f074dd605f..ab170245284ce 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprCXX.cpp
@@ -199,7 +199,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
   bool CanUseVirtualCall = MD->isVirtual() && !HasQualifier;
 
   const CXXMethodDecl *DevirtualizedMethod = nullptr;
-  if (CanUseVirtualCall && CanDevirtualizeMemberFunctionCall(Base, MD)) {
+  if (CanUseVirtualCall &&
+      MD->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) {
     const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
     DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl);
     assert(DevirtualizedMethod);
@@ -1531,13 +1532,13 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
     assert(E->getNumPlacementArgs() == 1);
     const Expr *arg = *E->placement_arguments().begin();
 
-    AlignmentSource alignSource;
-    allocation = EmitPointerWithAlignment(arg, &alignSource);
+    LValueBaseInfo BaseInfo;
+    allocation = EmitPointerWithAlignment(arg, &BaseInfo);
 
     // The pointer expression will, in many cases, be an opaque void*.
     // In these cases, discard the computed alignment and use the
     // formal alignment of the allocated type.
-    if (alignSource != AlignmentSource::Decl)
+    if (BaseInfo.getAlignmentSource() != AlignmentSource::Decl)
       allocation = Address(allocation.getPointer(), allocAlign);
 
     // Set up allocatorArgs for the call to operator delete if it's not
@@ -1658,8 +1659,9 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
 
   // Passing pointer through invariant.group.barrier to avoid propagation of
   // vptrs information which may be included in previous type.
+  // To not break LTO with different optimizations levels, we do it regardless
+  // of optimization level.
   if (CGM.getCodeGenOpts().StrictVTablePointers &&
-      CGM.getCodeGenOpts().OptimizationLevel > 0 &&
       allocator->isReservedGlobalPlacementOperator())
     result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()),
                      result.getAlignment());
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprScalar.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 30f65e8b9ffa6..d997e06031acb 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
@@ -44,6 +45,43 @@ using llvm::Value;
 //===----------------------------------------------------------------------===//
 
 namespace {
+
+/// Determine whether the given binary operation may overflow.
+/// Sets \p Result to the value of the operation for BO_Add, BO_Sub, BO_Mul,
+/// and signed BO_{Div,Rem}. For these opcodes, and for unsigned BO_{Div,Rem},
+/// the returned overflow check is precise. The returned value is 'true' for
+/// all other opcodes, to be conservative.
+bool mayHaveIntegerOverflow(llvm::ConstantInt *LHS, llvm::ConstantInt *RHS,
+                             BinaryOperator::Opcode Opcode, bool Signed,
+                             llvm::APInt &Result) {
+  // Assume overflow is possible, unless we can prove otherwise.
+  bool Overflow = true;
+  const auto &LHSAP = LHS->getValue();
+  const auto &RHSAP = RHS->getValue();
+  if (Opcode == BO_Add) {
+    if (Signed)
+      Result = LHSAP.sadd_ov(RHSAP, Overflow);
+    else
+      Result = LHSAP.uadd_ov(RHSAP, Overflow);
+  } else if (Opcode == BO_Sub) {
+    if (Signed)
+      Result = LHSAP.ssub_ov(RHSAP, Overflow);
+    else
+      Result = LHSAP.usub_ov(RHSAP, Overflow);
+  } else if (Opcode == BO_Mul) {
+    if (Signed)
+      Result = LHSAP.smul_ov(RHSAP, Overflow);
+    else
+      Result = LHSAP.umul_ov(RHSAP, Overflow);
+  } else if (Opcode == BO_Div || Opcode == BO_Rem) {
+    if (Signed && !RHS->isZero())
+      Result = LHSAP.sdiv_ov(RHSAP, Overflow);
+    else
+      return false;
+  }
+  return Overflow;
+}
+
 struct BinOpInfo {
   Value *LHS;
   Value *RHS;
@@ -55,37 +93,14 @@ struct BinOpInfo {
   /// Check if the binop can result in integer overflow.
   bool mayHaveIntegerOverflow() const {
     // Without constant input, we can't rule out overflow.
-    const auto *LHSCI = dyn_cast(LHS);
-    const auto *RHSCI = dyn_cast(RHS);
+    auto *LHSCI = dyn_cast(LHS);
+    auto *RHSCI = dyn_cast(RHS);
     if (!LHSCI || !RHSCI)
       return true;
 
-    // Assume overflow is possible, unless we can prove otherwise.
-    bool Overflow = true;
-    const auto &LHSAP = LHSCI->getValue();
-    const auto &RHSAP = RHSCI->getValue();
-    if (Opcode == BO_Add) {
-      if (Ty->hasSignedIntegerRepresentation())
-        (void)LHSAP.sadd_ov(RHSAP, Overflow);
-      else
-        (void)LHSAP.uadd_ov(RHSAP, Overflow);
-    } else if (Opcode == BO_Sub) {
-      if (Ty->hasSignedIntegerRepresentation())
-        (void)LHSAP.ssub_ov(RHSAP, Overflow);
-      else
-        (void)LHSAP.usub_ov(RHSAP, Overflow);
-    } else if (Opcode == BO_Mul) {
-      if (Ty->hasSignedIntegerRepresentation())
-        (void)LHSAP.smul_ov(RHSAP, Overflow);
-      else
-        (void)LHSAP.umul_ov(RHSAP, Overflow);
-    } else if (Opcode == BO_Div || Opcode == BO_Rem) {
-      if (Ty->hasSignedIntegerRepresentation() && !RHSCI->isZero())
-        (void)LHSAP.sdiv_ov(RHSAP, Overflow);
-      else
-        return false;
-    }
-    return Overflow;
+    llvm::APInt Result;
+    return ::mayHaveIntegerOverflow(
+        LHSCI, RHSCI, Opcode, Ty->hasSignedIntegerRepresentation(), Result);
   }
 
   /// Check if the binop computes a division or a remainder.
@@ -1579,10 +1594,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     }
     // Since target may map different address spaces in AST to the same address
     // space, an address space conversion may end up as a bitcast.
-    auto *Src = Visit(E);
-    return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(CGF, Src,
-                                                               E->getType(),
-                                                               DestTy);
+    return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
+        CGF, Visit(E), E->getType()->getPointeeType().getAddressSpace(),
+        DestTy->getPointeeType().getAddressSpace(), ConvertType(DestTy));
   }
   case CK_AtomicToNonAtomic:
   case CK_NonAtomicToAtomic:
@@ -1837,6 +1851,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
   llvm::Value *input;
 
   int amount = (isInc ? 1 : -1);
+  bool isSubtraction = !isInc;
 
   if (const AtomicType *atomicTy = type->getAs()) {
     type = atomicTy->getValueType();
@@ -1926,7 +1941,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       if (CGF.getLangOpts().isSignedOverflowDefined())
         value = Builder.CreateGEP(value, numElts, "vla.inc");
       else
-        value = Builder.CreateInBoundsGEP(value, numElts, "vla.inc");
+        value = CGF.EmitCheckedInBoundsGEP(
+            value, numElts, /*SignedIndices=*/false, isSubtraction,
+            E->getExprLoc(), "vla.inc");
 
     // Arithmetic on function pointers (!) is just +-1.
     } else if (type->isFunctionType()) {
@@ -1936,7 +1953,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       if (CGF.getLangOpts().isSignedOverflowDefined())
         value = Builder.CreateGEP(value, amt, "incdec.funcptr");
       else
-        value = Builder.CreateInBoundsGEP(value, amt, "incdec.funcptr");
+        value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+                                           isSubtraction, E->getExprLoc(),
+                                           "incdec.funcptr");
       value = Builder.CreateBitCast(value, input->getType());
 
     // For everything else, we can just do a simple increment.
@@ -1945,7 +1964,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       if (CGF.getLangOpts().isSignedOverflowDefined())
         value = Builder.CreateGEP(value, amt, "incdec.ptr");
       else
-        value = Builder.CreateInBoundsGEP(value, amt, "incdec.ptr");
+        value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+                                           isSubtraction, E->getExprLoc(),
+                                           "incdec.ptr");
     }
 
   // Vector increment/decrement.
@@ -2026,7 +2047,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     if (CGF.getLangOpts().isSignedOverflowDefined())
       value = Builder.CreateGEP(value, sizeValue, "incdec.objptr");
     else
-      value = Builder.CreateInBoundsGEP(value, sizeValue, "incdec.objptr");
+      value = CGF.EmitCheckedInBoundsGEP(value, sizeValue,
+                                         /*SignedIndices=*/false, isSubtraction,
+                                         E->getExprLoc(), "incdec.objptr");
     value = Builder.CreateBitCast(value, input->getType());
   }
 
@@ -2643,13 +2666,14 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
     std::swap(pointerOperand, indexOperand);
   }
 
+  bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
+
   unsigned width = cast(index->getType())->getBitWidth();
   auto &DL = CGF.CGM.getDataLayout();
   auto PtrTy = cast(pointer->getType());
   if (width != DL.getTypeSizeInBits(PtrTy)) {
     // Zero-extend or sign-extend the pointer value according to
     // whether the index is signed or not.
-    bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
     index = CGF.Builder.CreateIntCast(index, DL.getIntPtrType(PtrTy), isSigned,
                                       "idx.ext");
   }
@@ -2693,7 +2717,9 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
       pointer = CGF.Builder.CreateGEP(pointer, index, "add.ptr");
     } else {
       index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index");
-      pointer = CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr");
+      pointer =
+          CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
+                                     op.E->getExprLoc(), "add.ptr");
     }
     return pointer;
   }
@@ -2710,7 +2736,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
   if (CGF.getLangOpts().isSignedOverflowDefined())
     return CGF.Builder.CreateGEP(pointer, index, "add.ptr");
 
-  return CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr");
+  return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
+                                    op.E->getExprLoc(), "add.ptr");
 }
 
 // Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and
@@ -2783,7 +2810,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
 Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
   if (op.LHS->getType()->isPointerTy() ||
       op.RHS->getType()->isPointerTy())
-    return emitPointerArithmetic(CGF, op, /*subtraction*/ false);
+    return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction);
 
   if (op.Ty->isSignedIntegerOrEnumerationType()) {
     switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
@@ -2854,7 +2881,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
   // If the RHS is not a pointer, then we have normal pointer
   // arithmetic.
   if (!op.RHS->getType()->isPointerTy())
-    return emitPointerArithmetic(CGF, op, /*subtraction*/ true);
+    return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction);
 
   // Otherwise, this is a pointer subtraction.
 
@@ -3825,3 +3852,136 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue(
 
   llvm_unreachable("Unhandled compound assignment operator");
 }
+
+Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
+                                               ArrayRef IdxList,
+                                               bool SignedIndices,
+                                               bool IsSubtraction,
+                                               SourceLocation Loc,
+                                               const Twine &Name) {
+  Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name);
+
+  // If the pointer overflow sanitizer isn't enabled, do nothing.
+  if (!SanOpts.has(SanitizerKind::PointerOverflow))
+    return GEPVal;
+
+  // If the GEP has already been reduced to a constant, leave it be.
+  if (isa(GEPVal))
+    return GEPVal;
+
+  // Only check for overflows in the default address space.
+  if (GEPVal->getType()->getPointerAddressSpace())
+    return GEPVal;
+
+  auto *GEP = cast(GEPVal);
+  assert(GEP->isInBounds() && "Expected inbounds GEP");
+
+  SanitizerScope SanScope(this);
+  auto &VMContext = getLLVMContext();
+  const auto &DL = CGM.getDataLayout();
+  auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType());
+
+  // Grab references to the signed add/mul overflow intrinsics for intptr_t.
+  auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy);
+  auto *SAddIntrinsic =
+      CGM.getIntrinsic(llvm::Intrinsic::sadd_with_overflow, IntPtrTy);
+  auto *SMulIntrinsic =
+      CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy);
+
+  // The total (signed) byte offset for the GEP.
+  llvm::Value *TotalOffset = nullptr;
+  // The offset overflow flag - true if the total offset overflows.
+  llvm::Value *OffsetOverflows = Builder.getFalse();
+
+  /// Return the result of the given binary operation.
+  auto eval = [&](BinaryOperator::Opcode Opcode, llvm::Value *LHS,
+                  llvm::Value *RHS) -> llvm::Value * {
+    assert((Opcode == BO_Add || Opcode == BO_Mul) && "Can't eval binop");
+
+    // If the operands are constants, return a constant result.
+    if (auto *LHSCI = dyn_cast(LHS)) {
+      if (auto *RHSCI = dyn_cast(RHS)) {
+        llvm::APInt N;
+        bool HasOverflow = mayHaveIntegerOverflow(LHSCI, RHSCI, Opcode,
+                                                  /*Signed=*/true, N);
+        if (HasOverflow)
+          OffsetOverflows = Builder.getTrue();
+        return llvm::ConstantInt::get(VMContext, N);
+      }
+    }
+
+    // Otherwise, compute the result with checked arithmetic.
+    auto *ResultAndOverflow = Builder.CreateCall(
+        (Opcode == BO_Add) ? SAddIntrinsic : SMulIntrinsic, {LHS, RHS});
+    OffsetOverflows = Builder.CreateOr(
+        Builder.CreateExtractValue(ResultAndOverflow, 1), OffsetOverflows);
+    return Builder.CreateExtractValue(ResultAndOverflow, 0);
+  };
+
+  // Determine the total byte offset by looking at each GEP operand.
+  for (auto GTI = llvm::gep_type_begin(GEP), GTE = llvm::gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    llvm::Value *LocalOffset;
+    auto *Index = GTI.getOperand();
+    // Compute the local offset contributed by this indexing step:
+    if (auto *STy = GTI.getStructTypeOrNull()) {
+      // For struct indexing, the local offset is the byte position of the
+      // specified field.
+      unsigned FieldNo = cast(Index)->getZExtValue();
+      LocalOffset = llvm::ConstantInt::get(
+          IntPtrTy, DL.getStructLayout(STy)->getElementOffset(FieldNo));
+    } else {
+      // Otherwise this is array-like indexing. The local offset is the index
+      // multiplied by the element size.
+      auto *ElementSize = llvm::ConstantInt::get(
+          IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType()));
+      auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true);
+      LocalOffset = eval(BO_Mul, ElementSize, IndexS);
+    }
+
+    // If this is the first offset, set it as the total offset. Otherwise, add
+    // the local offset into the running total.
+    if (!TotalOffset || TotalOffset == Zero)
+      TotalOffset = LocalOffset;
+    else
+      TotalOffset = eval(BO_Add, TotalOffset, LocalOffset);
+  }
+
+  // Common case: if the total offset is zero, don't emit a check.
+  if (TotalOffset == Zero)
+    return GEPVal;
+
+  // Now that we've computed the total offset, add it to the base pointer (with
+  // wrapping semantics).
+  auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy);
+  auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset);
+
+  // The GEP is valid if:
+  // 1) The total offset doesn't overflow, and
+  // 2) The sign of the difference between the computed address and the base
+  // pointer matches the sign of the total offset.
+  llvm::Value *ValidGEP;
+  auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows);
+  if (SignedIndices) {
+    auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+    auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero);
+    llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
+    ValidGEP = Builder.CreateAnd(
+        Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid),
+        NoOffsetOverflow);
+  } else if (!SignedIndices && !IsSubtraction) {
+    auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+    ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow);
+  } else {
+    auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr);
+    ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow);
+  }
+
+  llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)};
+  // Pass the computed GEP to the runtime to avoid emitting poisoned arguments.
+  llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP};
+  EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow),
+            SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs);
+
+  return GEPVal;
+}
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjC.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjC.cpp
index f4fbab3c2b832..90fcad2614154 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjC.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjC.cpp
@@ -162,7 +162,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
       const Expr *Rhs = ALE->getElement(i);
       LValue LV = MakeAddrLValue(
           Builder.CreateConstArrayGEP(Objects, i, getPointerSize()),
-          ElementType, AlignmentSource::Decl);
+          ElementType, LValueBaseInfo(AlignmentSource::Decl, false));
 
       llvm::Value *value = EmitScalarExpr(Rhs);
       EmitStoreThroughLValue(RValue::get(value), LV, true);
@@ -174,7 +174,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
       const Expr *Key = DLE->getKeyValueElement(i).Key;
       LValue KeyLV = MakeAddrLValue(
           Builder.CreateConstArrayGEP(Keys, i, getPointerSize()),
-          ElementType, AlignmentSource::Decl);
+          ElementType, LValueBaseInfo(AlignmentSource::Decl, false));
       llvm::Value *keyValue = EmitScalarExpr(Key);
       EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true);
 
@@ -182,7 +182,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
       const Expr *Value = DLE->getKeyValueElement(i).Value;
       LValue ValueLV = MakeAddrLValue(
           Builder.CreateConstArrayGEP(Objects, i, getPointerSize()),
-          ElementType, AlignmentSource::Decl);
+          ElementType, LValueBaseInfo(AlignmentSource::Decl, false));
       llvm::Value *valueValue = EmitScalarExpr(Value);
       EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true);
       if (TrackNeededObjects) {
@@ -3246,10 +3246,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
   SrcTy = C.getPointerType(SrcTy);
   
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
-  args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
-  args.push_back(&srcDecl);
+  ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+                            DestTy, ImplicitParamDecl::Other);
+  args.push_back(&DstDecl);
+  ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+                            SrcTy, ImplicitParamDecl::Other);
+  args.push_back(&SrcDecl);
 
   const CGFunctionInfo &FI =
     CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -3265,12 +3267,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
 
   StartFunction(FD, C.VoidTy, Fn, FI, args);
   
-  DeclRefExpr DstExpr(&dstDecl, false, DestTy,
+  DeclRefExpr DstExpr(&DstDecl, false, DestTy,
                       VK_RValue, SourceLocation());
   UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
                     VK_LValue, OK_Ordinary, SourceLocation());
   
-  DeclRefExpr SrcExpr(&srcDecl, false, SrcTy,
+  DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
                       VK_RValue, SourceLocation());
   UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
                     VK_LValue, OK_Ordinary, SourceLocation());
@@ -3327,10 +3329,12 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
   SrcTy = C.getPointerType(SrcTy);
   
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
-  args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
-  args.push_back(&srcDecl);
+  ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+                            DestTy, ImplicitParamDecl::Other);
+  args.push_back(&DstDecl);
+  ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+                            SrcTy, ImplicitParamDecl::Other);
+  args.push_back(&SrcDecl);
 
   const CGFunctionInfo &FI =
     CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -3345,7 +3349,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
 
   StartFunction(FD, C.VoidTy, Fn, FI, args);
   
-  DeclRefExpr SrcExpr(&srcDecl, false, SrcTy,
+  DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
                       VK_RValue, SourceLocation());
   
   UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
@@ -3371,7 +3375,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
                              CXXConstExpr->getConstructionKind(),
                              SourceRange());
   
-  DeclRefExpr DstExpr(&dstDecl, false, DestTy,
+  DeclRefExpr DstExpr(&DstDecl, false, DestTy,
                       VK_RValue, SourceLocation());
   
   RValue DV = EmitAnyExpr(&DstExpr);
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCMac.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCMac.cpp
index 9c048423285b1..98435fefbd2ea 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCMac.cpp
@@ -308,7 +308,7 @@ class ObjCCommonTypesHelper {
     SmallVector Params;
     Params.push_back(Ctx.VoidPtrTy);
     Params.push_back(Ctx.VoidPtrTy);
-    Params.push_back(Ctx.LongTy);
+    Params.push_back(Ctx.getSizeType());
     Params.push_back(Ctx.BoolTy);
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
@@ -1004,6 +1004,8 @@ class CGObjCCommonMac : public CodeGen::CGObjCRuntime {
                                       const ObjCInterfaceDecl *ID,
                                       ObjCCommonTypesHelper &ObjCTypes);
 
+  std::string GetSectionName(StringRef Section, StringRef MachOAttributes);
+
 public:
   /// CreateMetadataVar - Create a global variable with internal
   /// linkage for use by the Objective-C runtime.
@@ -1676,7 +1678,10 @@ struct NullReturnState {
 
   /// Complete the null-return operation.  It is valid to call this
   /// regardless of whether 'init' has been called.
-  RValue complete(CodeGenFunction &CGF, RValue result, QualType resultType,
+  RValue complete(CodeGenFunction &CGF,
+                  ReturnValueSlot returnSlot,
+                  RValue result,
+                  QualType resultType,
                   const CallArgList &CallArgs,
                   const ObjCMethodDecl *Method) {
     // If we never had to do a null-check, just use the raw result.
@@ -1743,7 +1748,8 @@ struct NullReturnState {
     // memory or (2) agg values in registers.
     if (result.isAggregate()) {
       assert(result.isAggregate() && "null init of non-aggregate result?");
-      CGF.EmitNullInitialization(result.getAggregateAddress(), resultType);
+      if (!returnSlot.isUnused())
+        CGF.EmitNullInitialization(result.getAggregateAddress(), resultType);
       if (contBB) CGF.EmitBlock(contBB);
       return result;
     }
@@ -2115,11 +2121,11 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
     }
   }
 
-  NullReturnState nullReturn;
+  bool RequiresNullCheck = false;
 
   llvm::Constant *Fn = nullptr;
   if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
-    if (ReceiverCanBeNull) nullReturn.init(CGF, Arg0);
+    if (ReceiverCanBeNull) RequiresNullCheck = true;
     Fn = (ObjCABI == 2) ?  ObjCTypes.getSendStretFn2(IsSuper)
       : ObjCTypes.getSendStretFn(IsSuper);
   } else if (CGM.ReturnTypeUsesFPRet(ResultType)) {
@@ -2132,23 +2138,30 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
     // arm64 uses objc_msgSend for stret methods and yet null receiver check
     // must be made for it.
     if (ReceiverCanBeNull && CGM.ReturnTypeUsesSRet(MSI.CallInfo))
-      nullReturn.init(CGF, Arg0);
+      RequiresNullCheck = true;
     Fn = (ObjCABI == 2) ? ObjCTypes.getSendFn2(IsSuper)
       : ObjCTypes.getSendFn(IsSuper);
   }
 
+  // We don't need to emit a null check to zero out an indirect result if the
+  // result is ignored.
+  if (Return.isUnused())
+    RequiresNullCheck = false;
+
   // Emit a null-check if there's a consumed argument other than the receiver.
-  bool RequiresNullCheck = false;
-  if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) {
+  if (!RequiresNullCheck && CGM.getLangOpts().ObjCAutoRefCount && Method) {
     for (const auto *ParamDecl : Method->parameters()) {
       if (ParamDecl->hasAttr()) {
-        if (!nullReturn.NullBB)
-          nullReturn.init(CGF, Arg0);
         RequiresNullCheck = true;
         break;
       }
     }
   }
+
+  NullReturnState nullReturn;
+  if (RequiresNullCheck) {
+    nullReturn.init(CGF, Arg0);
+  }
   
   llvm::Instruction *CallSite;
   Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType);
@@ -2162,7 +2175,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
     llvm::CallSite(CallSite).setDoesNotReturn();
   }
 
-  return nullReturn.complete(CGF, rvalue, ResultType, CallArgs,
+  return nullReturn.complete(CGF, Return, rvalue, ResultType, CallArgs,
                              RequiresNullCheck ? Method : nullptr);
 }
 
@@ -4786,6 +4799,27 @@ llvm::Value *CGObjCMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
 
 /* *** Private Interface *** */
 
+std::string CGObjCCommonMac::GetSectionName(StringRef Section,
+                                            StringRef MachOAttributes) {
+  switch (CGM.getTriple().getObjectFormat()) {
+  default:
+    llvm_unreachable("unexpected object file format");
+  case llvm::Triple::MachO: {
+    if (MachOAttributes.empty())
+      return ("__DATA," + Section).str();
+    return ("__DATA," + Section + "," + MachOAttributes).str();
+  }
+  case llvm::Triple::ELF:
+    assert(Section.substr(0, 2) == "__" &&
+           "expected the name to begin with __");
+    return Section.substr(2).str();
+  case llvm::Triple::COFF:
+    assert(Section.substr(0, 2) == "__" &&
+           "expected the name to begin with __");
+    return ("." + Section.substr(2) + "$B").str();
+  }
+}
+
 /// EmitImageInfo - Emit the image info marker used to encode some module
 /// level information.
 ///
@@ -4809,9 +4843,10 @@ enum ImageInfoFlags {
 
 void CGObjCCommonMac::EmitImageInfo() {
   unsigned version = 0; // Version is unused?
-  const char *Section = (ObjCABI == 1) ?
-    "__OBJC, __image_info,regular" :
-    "__DATA, __objc_imageinfo, regular, no_dead_strip";
+  std::string Section =
+      (ObjCABI == 1)
+          ? "__OBJC,__image_info,regular"
+          : GetSectionName("__objc_imageinfo", "regular,no_dead_strip");
 
   // Generate module-level named metadata to convey this information to the
   // linker and code-gen.
@@ -4822,7 +4857,7 @@ void CGObjCCommonMac::EmitImageInfo() {
   Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Version",
                     version);
   Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Section",
-                    llvm::MDString::get(VMContext,Section));
+                    llvm::MDString::get(VMContext, Section));
 
   if (CGM.getLangOpts().getGC() == LangOptions::NonGC) {
     // Non-GC overrides those files which specify GC.
@@ -5930,17 +5965,21 @@ void CGObjCNonFragileABIMac::FinishNonFragileABIModule() {
   }
 
   AddModuleClassList(DefinedClasses, "OBJC_LABEL_CLASS_$",
-                     "__DATA, __objc_classlist, regular, no_dead_strip");
+                     GetSectionName("__objc_classlist",
+                                    "regular,no_dead_strip"));
 
   AddModuleClassList(DefinedNonLazyClasses, "OBJC_LABEL_NONLAZY_CLASS_$",
-                     "__DATA, __objc_nlclslist, regular, no_dead_strip");
+                     GetSectionName("__objc_nlclslist",
+                                    "regular,no_dead_strip"));
 
   // Build list of all implemented category addresses in array
   // L_OBJC_LABEL_CATEGORY_$.
   AddModuleClassList(DefinedCategories, "OBJC_LABEL_CATEGORY_$",
-                     "__DATA, __objc_catlist, regular, no_dead_strip");
+                     GetSectionName("__objc_catlist",
+                                    "regular,no_dead_strip"));
   AddModuleClassList(DefinedNonLazyCategories, "OBJC_LABEL_NONLAZY_CATEGORY_$",
-                     "__DATA, __objc_nlcatlist, regular, no_dead_strip");
+                     GetSectionName("__objc_nlcatlist",
+                                    "regular,no_dead_strip"));
 
   EmitImageInfo();
 }
@@ -6353,15 +6392,15 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
   llvm::GlobalVariable *PTGV = CGM.getModule().getGlobalVariable(ProtocolName);
   if (PTGV)
     return CGF.Builder.CreateAlignedLoad(PTGV, Align);
-  PTGV = new llvm::GlobalVariable(
-    CGM.getModule(),
-    Init->getType(), false,
-    llvm::GlobalValue::WeakAnyLinkage,
-    Init,
-    ProtocolName);
-  PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip");
+  PTGV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
+                                  llvm::GlobalValue::WeakAnyLinkage, Init,
+                                  ProtocolName);
+  PTGV->setSection(GetSectionName("__objc_protorefs",
+                                  "coalesced,no_dead_strip"));
   PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
   PTGV->setAlignment(Align.getQuantity());
+  if (!CGM.getTriple().isOSBinFormatMachO())
+    PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName));
   CGM.addCompilerUsedGlobal(PTGV);
   return CGF.Builder.CreateAlignedLoad(PTGV, Align);
 }
@@ -6818,8 +6857,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
     PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef));
   PTGV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy));
-  if (CGM.getTriple().isOSBinFormatMachO())
-    PTGV->setSection("__DATA, __objc_protolist, coalesced, no_dead_strip");
+  PTGV->setSection(GetSectionName("__objc_protolist",
+                                  "coalesced,no_dead_strip"));
   PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
   CGM.addCompilerUsedGlobal(PTGV);
   return Entry;
@@ -7015,7 +7054,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
                                               /*constant*/ false,
                                         llvm::GlobalValue::WeakAnyLinkage);
     messageRef->setVisibility(llvm::GlobalValue::HiddenVisibility);
-    messageRef->setSection("__DATA, __objc_msgrefs, coalesced");
+    messageRef->setSection(GetSectionName("__objc_msgrefs", "coalesced"));
   }
   
   bool requiresnullCheck = false;
@@ -7045,7 +7084,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
   CGCallee callee(CGCalleeInfo(), calleePtr);
 
   RValue result = CGF.EmitCall(MSI.CallInfo, callee, returnSlot, args);
-  return nullReturn.complete(CGF, result, resultType, formalArgs,
+  return nullReturn.complete(CGF, returnSlot, result, resultType, formalArgs,
                              requiresnullCheck ? method : nullptr);
 }
 
@@ -7126,7 +7165,8 @@ CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
                                      false, llvm::GlobalValue::PrivateLinkage,
                                      ClassGV, "OBJC_CLASSLIST_REFERENCES_$_");
     Entry->setAlignment(Align.getQuantity());
-    Entry->setSection("__DATA, __objc_classrefs, regular, no_dead_strip");
+    Entry->setSection(GetSectionName("__objc_classrefs",
+                                     "regular,no_dead_strip"));
     CGM.addCompilerUsedGlobal(Entry);
   }
   return CGF.Builder.CreateAlignedLoad(Entry, Align);
@@ -7160,7 +7200,8 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
                                      false, llvm::GlobalValue::PrivateLinkage,
                                      ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_");
     Entry->setAlignment(Align.getQuantity());
-    Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
+    Entry->setSection(GetSectionName("__objc_superrefs",
+                                     "regular,no_dead_strip"));
     CGM.addCompilerUsedGlobal(Entry);
   }
   return CGF.Builder.CreateAlignedLoad(Entry, Align);
@@ -7182,7 +7223,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
                                      MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_");
     Entry->setAlignment(Align.getQuantity());
 
-    Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
+    Entry->setSection(GetSectionName("__objc_superrefs",
+                                     "regular,no_dead_strip"));
     CGM.addCompilerUsedGlobal(Entry);
   }
 
@@ -7278,7 +7320,8 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF,
                                      false, llvm::GlobalValue::PrivateLinkage,
                                      Casted, "OBJC_SELECTOR_REFERENCES_");
     Entry->setExternallyInitialized(true);
-    Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip");
+    Entry->setSection(GetSectionName("__objc_selrefs",
+                                     "literal_pointers,no_dead_strip"));
     Entry->setAlignment(Align.getQuantity());
     CGM.addCompilerUsedGlobal(Entry);
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCRuntime.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
index 3da7ed230eddb..4cfddcb107cb3 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -26,61 +26,27 @@
 using namespace clang;
 using namespace CodeGen;
 
-static uint64_t LookupFieldBitOffset(CodeGen::CodeGenModule &CGM,
-                                     const ObjCInterfaceDecl *OID,
-                                     const ObjCImplementationDecl *ID,
-                                     const ObjCIvarDecl *Ivar) {
-  const ObjCInterfaceDecl *Container = Ivar->getContainingInterface();
-
-  // FIXME: We should eliminate the need to have ObjCImplementationDecl passed
-  // in here; it should never be necessary because that should be the lexical
-  // decl context for the ivar.
-
-  // If we know have an implementation (and the ivar is in it) then
-  // look up in the implementation layout.
-  const ASTRecordLayout *RL;
-  if (ID && declaresSameEntity(ID->getClassInterface(), Container))
-    RL = &CGM.getContext().getASTObjCImplementationLayout(ID);
-  else
-    RL = &CGM.getContext().getASTObjCInterfaceLayout(Container);
-
-  // Compute field index.
-  //
-  // FIXME: The index here is closely tied to how ASTContext::getObjCLayout is
-  // implemented. This should be fixed to get the information from the layout
-  // directly.
-  unsigned Index = 0;
-
-  for (const ObjCIvarDecl *IVD = Container->all_declared_ivar_begin(); 
-       IVD; IVD = IVD->getNextIvar()) {
-    if (Ivar == IVD)
-      break;
-    ++Index;
-  }
-  assert(Index < RL->getFieldCount() && "Ivar is not inside record layout!");
-
-  return RL->getFieldOffset(Index);
-}
-
 uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM,
                                               const ObjCInterfaceDecl *OID,
                                               const ObjCIvarDecl *Ivar) {
-  return LookupFieldBitOffset(CGM, OID, nullptr, Ivar) /
-    CGM.getContext().getCharWidth();
+  return CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar) /
+         CGM.getContext().getCharWidth();
 }
 
 uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM,
                                               const ObjCImplementationDecl *OID,
                                               const ObjCIvarDecl *Ivar) {
-  return LookupFieldBitOffset(CGM, OID->getClassInterface(), OID, Ivar) / 
-    CGM.getContext().getCharWidth();
+  return CGM.getContext().lookupFieldBitOffset(OID->getClassInterface(), OID,
+                                               Ivar) /
+         CGM.getContext().getCharWidth();
 }
 
 unsigned CGObjCRuntime::ComputeBitfieldBitOffset(
     CodeGen::CodeGenModule &CGM,
     const ObjCInterfaceDecl *ID,
     const ObjCIvarDecl *Ivar) {
-  return LookupFieldBitOffset(CGM, ID, ID->getImplementation(), Ivar);
+  return CGM.getContext().lookupFieldBitOffset(ID, ID->getImplementation(),
+                                               Ivar);
 }
 
 LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
@@ -90,7 +56,11 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
                                                unsigned CVRQualifiers,
                                                llvm::Value *Offset) {
   // Compute (type*) ( (char *) BaseValue + Offset)
-  QualType IvarTy = Ivar->getType().withCVRQualifiers(CVRQualifiers);
+  QualType InterfaceTy{OID->getTypeForDecl(), 0};
+  QualType ObjectPtrTy =
+      CGF.CGM.getContext().getObjCObjectPointerType(InterfaceTy);
+  QualType IvarTy =
+      Ivar->getUsageType(ObjectPtrTy).withCVRQualifiers(CVRQualifiers);
   llvm::Type *LTy = CGF.CGM.getTypes().ConvertTypeForMem(IvarTy);
   llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, CGF.Int8PtrTy);
   V = CGF.Builder.CreateInBoundsGEP(V, Offset, "add.ptr");
@@ -115,7 +85,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
   // Note, there is a subtle invariant here: we can only call this routine on
   // non-synthesized ivars but we may be called for synthesized ivars.  However,
   // a synthesized ivar can never be a bit-field, so this is safe.
-  uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, nullptr, Ivar);
+  uint64_t FieldBitOffset =
+      CGF.CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar);
   uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth();
   uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign();
   uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext());
@@ -138,7 +109,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
   Addr = CGF.Builder.CreateElementBitCast(Addr,
                                    llvm::Type::getIntNTy(CGF.getLLVMContext(),
                                                          Info->StorageSize));
-  return LValue::MakeBitfield(Addr, *Info, IvarTy, AlignmentSource::Decl);
+  return LValue::MakeBitfield(Addr, *Info, IvarTy,
+                              LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 namespace {
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b256a88c47adb..d488bd4b30bf8 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -643,6 +643,12 @@ enum OpenMPRTLFunction {
   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
   // *vec);
   OMPRTL__kmpc_doacross_wait,
+  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
+  // *data);
+  OMPRTL__kmpc_task_reduction_init,
+  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+  // *d);
+  OMPRTL__kmpc_task_reduction_get_th_data,
 
   //
   // Offloading related calls
@@ -697,6 +703,414 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
   }
 }
 
+/// Check if the combiner is a call to UDR combiner and if it is so return the
+/// UDR decl used for reduction.
+static const OMPDeclareReductionDecl *
+getReductionInit(const Expr *ReductionOp) {
+  if (auto *CE = dyn_cast(ReductionOp))
+    if (auto *OVE = dyn_cast(CE->getCallee()))
+      if (auto *DRE =
+              dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts()))
+        if (auto *DRD = dyn_cast(DRE->getDecl()))
+          return DRD;
+  return nullptr;
+}
+
+static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
+                                             const OMPDeclareReductionDecl *DRD,
+                                             const Expr *InitOp,
+                                             Address Private, Address Original,
+                                             QualType Ty) {
+  if (DRD->getInitializer()) {
+    std::pair Reduction =
+        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
+    auto *CE = cast(InitOp);
+    auto *OVE = cast(CE->getCallee());
+    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+    auto *LHSDRE = cast(cast(LHS)->getSubExpr());
+    auto *RHSDRE = cast(cast(RHS)->getSubExpr());
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    PrivateScope.addPrivate(cast(LHSDRE->getDecl()),
+                            [=]() -> Address { return Private; });
+    PrivateScope.addPrivate(cast(RHSDRE->getDecl()),
+                            [=]() -> Address { return Original; });
+    (void)PrivateScope.Privatize();
+    RValue Func = RValue::get(Reduction.second);
+    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
+    CGF.EmitIgnoredExpr(InitOp);
+  } else {
+    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
+    auto *GV = new llvm::GlobalVariable(
+        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+        llvm::GlobalValue::PrivateLinkage, Init, ".init");
+    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
+    RValue InitRVal;
+    switch (CGF.getEvaluationKind(Ty)) {
+    case TEK_Scalar:
+      InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
+      break;
+    case TEK_Complex:
+      InitRVal =
+          RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
+      break;
+    case TEK_Aggregate:
+      InitRVal = RValue::getAggregate(LV.getAddress());
+      break;
+    }
+    OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
+    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
+    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
+                         /*IsInitializer=*/false);
+  }
+}
+
+/// \brief Emit initialization of arrays of complex types.
+/// \param DestAddr Address of the array.
+/// \param Type Type of array.
+/// \param Init Initial expression of array.
+/// \param SrcAddr Address of the original array.
+static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
+                                 QualType Type, const Expr *Init,
+                                 const OMPDeclareReductionDecl *DRD,
+                                 Address SrcAddr = Address::invalid()) {
+  // Perform element-by-element initialization.
+  QualType ElementTy;
+
+  // Drill down to the base element type on both arrays.
+  auto ArrayTy = Type->getAsArrayTypeUnsafe();
+  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
+  DestAddr =
+      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
+  if (DRD)
+    SrcAddr =
+        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
+
+  llvm::Value *SrcBegin = nullptr;
+  if (DRD)
+    SrcBegin = SrcAddr.getPointer();
+  auto DestBegin = DestAddr.getPointer();
+  // Cast from pointer to array type to pointer to single element.
+  auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
+  // The basic structure here is a while-do loop.
+  auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
+  auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
+  auto IsEmpty =
+      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
+  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+
+  // Enter the loop body, making that address the current address.
+  auto EntryBB = CGF.Builder.GetInsertBlock();
+  CGF.EmitBlock(BodyBB);
+
+  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
+
+  llvm::PHINode *SrcElementPHI = nullptr;
+  Address SrcElementCurrent = Address::invalid();
+  if (DRD) {
+    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
+                                          "omp.arraycpy.srcElementPast");
+    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
+    SrcElementCurrent =
+        Address(SrcElementPHI,
+                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+  }
+  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
+      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
+  DestElementPHI->addIncoming(DestBegin, EntryBB);
+  Address DestElementCurrent =
+      Address(DestElementPHI,
+              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+
+  // Emit copy.
+  {
+    CodeGenFunction::RunCleanupsScope InitScope(CGF);
+    if (DRD && (DRD->getInitializer() || !Init)) {
+      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
+                                       SrcElementCurrent, ElementTy);
+    } else
+      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
+                           /*IsInitializer=*/false);
+  }
+
+  if (DRD) {
+    // Shift the address forward by one element.
+    auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
+        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
+  }
+
+  // Shift the address forward by one element.
+  auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
+      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+  // Check whether we've reached the end.
+  auto Done =
+      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
+  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
+  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
+
+  // Done.
+  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
+LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
+  if (const auto *OASE = dyn_cast(E))
+    return CGF.EmitOMPArraySectionExpr(OASE);
+  if (const auto *ASE = dyn_cast(E))
+    return CGF.EmitLValue(ASE);
+  auto *OrigVD = cast(cast(E)->getDecl());
+  DeclRefExpr DRE(const_cast(OrigVD),
+                  CGF.CapturedStmtInfo &&
+                      CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                  E->getType(), VK_LValue, E->getExprLoc());
+  // Store the address of the original variable associated with the LHS
+  // implicit variable.
+  return CGF.EmitLValue(&DRE);
+}
+
+LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
+                                            const Expr *E) {
+  if (const auto *OASE = dyn_cast(E))
+    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
+  return LValue();
+}
+
+void ReductionCodeGen::emitAggregateInitialization(
+    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
+    const OMPDeclareReductionDecl *DRD) {
+  // Emit VarDecl with copy init for arrays.
+  // Get the address of the original variable captured in current
+  // captured region.
+  auto *PrivateVD =
+      cast(cast(ClausesData[N].Private)->getDecl());
+  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
+                       DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(),
+                       DRD, SharedLVal.getAddress());
+}
+
+ReductionCodeGen::ReductionCodeGen(ArrayRef Shareds,
+                                   ArrayRef Privates,
+                                   ArrayRef ReductionOps) {
+  ClausesData.reserve(Shareds.size());
+  SharedAddresses.reserve(Shareds.size());
+  Sizes.reserve(Shareds.size());
+  BaseDecls.reserve(Shareds.size());
+  auto IPriv = Privates.begin();
+  auto IRed = ReductionOps.begin();
+  for (const auto *Ref : Shareds) {
+    ClausesData.emplace_back(Ref, *IPriv, *IRed);
+    std::advance(IPriv, 1);
+    std::advance(IRed, 1);
+  }
+}
+
+void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
+  assert(SharedAddresses.size() == N &&
+         "Number of generated lvalues must be exactly N.");
+  SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
+                               emitSharedLValueUB(CGF, ClausesData[N].Ref));
+}
+
+void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
+  auto *PrivateVD =
+      cast(cast(ClausesData[N].Private)->getDecl());
+  QualType PrivateType = PrivateVD->getType();
+  bool AsArraySection = isa(ClausesData[N].Ref);
+  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+    Sizes.emplace_back(
+        CGF.getTypeSize(
+            SharedAddresses[N].first.getType().getNonReferenceType()),
+        nullptr);
+    return;
+  }
+  llvm::Value *Size;
+  llvm::Value *SizeInChars;
+  llvm::Type *ElemType =
+      cast(SharedAddresses[N].first.getPointer()->getType())
+          ->getElementType();
+  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
+  if (AsArraySection) {
+    Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
+                                     SharedAddresses[N].first.getPointer());
+    Size = CGF.Builder.CreateNUWAdd(
+        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
+    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
+  } else {
+    SizeInChars = CGF.getTypeSize(
+        SharedAddresses[N].first.getType().getNonReferenceType());
+    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
+  }
+  Sizes.emplace_back(SizeInChars, Size);
+  CodeGenFunction::OpaqueValueMapping OpaqueMap(
+      CGF,
+      cast(
+          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
+      RValue::get(Size));
+  CGF.EmitVariablyModifiedType(PrivateType);
+}
+
+void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
+                                         llvm::Value *Size) {
+  auto *PrivateVD =
+      cast(cast(ClausesData[N].Private)->getDecl());
+  QualType PrivateType = PrivateVD->getType();
+  bool AsArraySection = isa(ClausesData[N].Ref);
+  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+    assert(!Size && !Sizes[N].second &&
+           "Size should be nullptr for non-variably modified redution "
+           "items.");
+    return;
+  }
+  CodeGenFunction::OpaqueValueMapping OpaqueMap(
+      CGF,
+      cast(
+          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
+      RValue::get(Size));
+  CGF.EmitVariablyModifiedType(PrivateType);
+}
+
+void ReductionCodeGen::emitInitialization(
+    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
+    llvm::function_ref DefaultInit) {
+  assert(SharedAddresses.size() > N && "No variable was generated");
+  auto *PrivateVD =
+      cast(cast(ClausesData[N].Private)->getDecl());
+  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+  QualType PrivateType = PrivateVD->getType();
+  PrivateAddr = CGF.Builder.CreateElementBitCast(
+      PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
+  QualType SharedType = SharedAddresses[N].first.getType();
+  SharedLVal = CGF.MakeAddrLValue(
+      CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
+                                       CGF.ConvertTypeForMem(SharedType)),
+      SharedType, SharedAddresses[N].first.getBaseInfo());
+  if (isa(ClausesData[N].Ref) ||
+      CGF.getContext().getAsArrayType(PrivateVD->getType())) {
+    emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
+  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
+                                     PrivateAddr, SharedLVal.getAddress(),
+                                     SharedLVal.getType());
+  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
+             !CGF.isTrivialInitializer(PrivateVD->getInit())) {
+    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
+                         PrivateVD->getType().getQualifiers(),
+                         /*IsInitializer=*/false);
+  }
+}
+
+bool ReductionCodeGen::needCleanups(unsigned N) {
+  auto *PrivateVD =
+      cast(cast(ClausesData[N].Private)->getDecl());
+  QualType PrivateType = PrivateVD->getType();
+  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
+  return DTorKind != QualType::DK_none;
+}
+
+void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
+                                    Address PrivateAddr) {
+  auto *PrivateVD =
+      cast(cast(ClausesData[N].Private)->getDecl());
+  QualType PrivateType = PrivateVD->getType();
+  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
+  if (needCleanups(N)) {
+    PrivateAddr = CGF.Builder.CreateElementBitCast(
+        PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
+    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
+  }
+}
+
+static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+                          LValue BaseLV) {
+  BaseTy = BaseTy.getNonReferenceType();
+  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+    if (auto *PtrTy = BaseTy->getAs())
+      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
+    else {
+      BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
+                                             BaseTy->castAs());
+    }
+    BaseTy = BaseTy->getPointeeType();
+  }
+  return CGF.MakeAddrLValue(
+      CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
+                                       CGF.ConvertTypeForMem(ElTy)),
+      BaseLV.getType(), BaseLV.getBaseInfo());
+}
+
+static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+                          llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
+                          llvm::Value *Addr) {
+  Address Tmp = Address::invalid();
+  Address TopTmp = Address::invalid();
+  Address MostTopTmp = Address::invalid();
+  BaseTy = BaseTy.getNonReferenceType();
+  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+    Tmp = CGF.CreateMemTemp(BaseTy);
+    if (TopTmp.isValid())
+      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
+    else
+      MostTopTmp = Tmp;
+    TopTmp = Tmp;
+    BaseTy = BaseTy->getPointeeType();
+  }
+  llvm::Type *Ty = BaseLVType;
+  if (Tmp.isValid())
+    Ty = Tmp.getElementType();
+  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
+  if (Tmp.isValid()) {
+    CGF.Builder.CreateStore(Addr, Tmp);
+    return MostTopTmp;
+  }
+  return Address(Addr, BaseLVAlignment);
+}
+
+Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
+                                               Address PrivateAddr) {
+  const DeclRefExpr *DE;
+  const VarDecl *OrigVD = nullptr;
+  if (auto *OASE = dyn_cast(ClausesData[N].Ref)) {
+    auto *Base = OASE->getBase()->IgnoreParenImpCasts();
+    while (auto *TempOASE = dyn_cast(Base))
+      Base = TempOASE->getBase()->IgnoreParenImpCasts();
+    while (auto *TempASE = dyn_cast(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    DE = cast(Base);
+    OrigVD = cast(DE->getDecl());
+  } else if (auto *ASE = dyn_cast(ClausesData[N].Ref)) {
+    auto *Base = ASE->getBase()->IgnoreParenImpCasts();
+    while (auto *TempASE = dyn_cast(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    DE = cast(Base);
+    OrigVD = cast(DE->getDecl());
+  }
+  if (OrigVD) {
+    BaseDecls.emplace_back(OrigVD);
+    auto OriginalBaseLValue = CGF.EmitLValue(DE);
+    LValue BaseLValue =
+        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
+                    OriginalBaseLValue);
+    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
+        BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
+    llvm::Value *Ptr =
+        CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
+    return castToBase(CGF, OrigVD->getType(),
+                      SharedAddresses[N].first.getType(),
+                      OriginalBaseLValue.getPointer()->getType(),
+                      OriginalBaseLValue.getAlignment(), Ptr);
+  }
+  BaseDecls.emplace_back(
+      cast(cast(ClausesData[N].Ref)->getDecl()));
+  return PrivateAddr;
+}
+
+bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
+  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+  return DRD && DRD->getInitializer();
+}
+
 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
   return CGF.EmitLoadOfPointerLValue(
       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
@@ -720,7 +1134,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
     CodeGenFunction &CGF) {
   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
                             getThreadIDVariable()->getType(),
-                            AlignmentSource::Decl);
+                            LValueBaseInfo(AlignmentSource::Decl, false));
 }
 
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
@@ -747,9 +1161,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
   QualType PtrTy = C.getPointerType(Ty).withRestrict();
   FunctionArgList Args;
   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
-                               /*Id=*/nullptr, PtrTy);
+                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
-                              /*Id=*/nullptr, PtrTy);
+                              /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
   Args.push_back(&OmpOutParm);
   Args.push_back(&OmpInParm);
   auto &FnInfo =
@@ -760,6 +1174,7 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
   Fn->removeFnAttr(llvm::Attribute::NoInline);
+  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
   CodeGenFunction CGF(CGM);
   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
@@ -1553,6 +1968,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
     break;
   }
+  case OMPRTL__kmpc_task_reduction_init: {
+    // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
+    // *data);
+    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
+    break;
+  }
+  case OMPRTL__kmpc_task_reduction_get_th_data: {
+    // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+    // *d);
+    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
+    break;
+  }
   case OMPRTL__tgt_target: {
     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
@@ -1807,8 +2242,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
       // threadprivate copy of the variable VD
       CodeGenFunction CtorCGF(CGM);
       FunctionArgList Args;
-      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
-                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+      ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+                            ImplicitParamDecl::Other);
       Args.push_back(&Dst);
 
       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1838,8 +2273,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
       // of the variable VD
       CodeGenFunction DtorCGF(CGM);
       FunctionArgList Args;
-      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
-                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+      ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+                            ImplicitParamDecl::Other);
       Args.push_back(&Dst);
 
       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1903,6 +2338,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
   return nullptr;
 }
 
+Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+                                                          QualType VarType,
+                                                          StringRef Name) {
+  llvm::Twine VarName(Name, ".artificial.");
+  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
+  llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
+  llvm::Value *Args[] = {
+      emitUpdateLocation(CGF, SourceLocation()),
+      getThreadID(CGF, SourceLocation()),
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
+      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
+                                /*IsSigned=*/false),
+      getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
+  return Address(
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitRuntimeCall(
+              createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+          VarLVType->getPointerTo(/*AddrSpace=*/0)),
+      CGM.getPointerAlign());
+}
+
 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
 /// function. Here is the logic:
 /// if (Cond) {
@@ -2190,10 +2646,8 @@ static llvm::Value *emitCopyprivateCopyFunction(
   auto &C = CGM.getContext();
   // void copy_func(void *LHSArg, void *RHSArg);
   FunctionArgList Args;
-  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
-                           C.VoidPtrTy);
-  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
-                           C.VoidPtrTy);
+  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
@@ -2700,6 +3154,8 @@ enum KmpTaskTFields {
   KmpTaskTStride,
   /// (Taskloops only) Is last iteration flag.
   KmpTaskTLastIter,
+  /// (Taskloops only) Reduction data.
+  KmpTaskTReductions,
 };
 } // anonymous namespace
 
@@ -2784,8 +3240,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
                                          const RegionCodeGenTy &Codegen) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
-  ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
-                             /*Id=*/nullptr, C.VoidPtrTy);
+  ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   Args.push_back(&DummyPtr);
 
   CodeGenFunction CGF(CGM);
@@ -2888,7 +3343,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
-                                IdentInfo, C.CharTy);
+                                IdentInfo, C.CharTy, ImplicitParamDecl::Other);
 
   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
       CGM, ".omp_offloading.descriptor_unreg",
@@ -2903,6 +3358,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
                              Desc);
         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
       });
+  if (CGM.supportsCOMDAT()) {
+    // It is sufficient to call registration function only once, so create a
+    // COMDAT group for registration/unregistration functions and associated
+    // data. That would reduce startup time and code size. Registration
+    // function serves as a COMDAT group key.
+    auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
+    RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+    RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    RegFn->setComdat(ComdatKey);
+    UnRegFn->setComdat(ComdatKey);
+    DeviceImages->setComdat(ComdatKey);
+    Desc->setComdat(ComdatKey);
+  }
   return RegFn;
 }
 
@@ -3239,6 +3707,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
   //         kmp_uint64          ub;
   //         kmp_int64           st;
   //         kmp_int32           liter;
+  //         void *              reductions;
   //       };
   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
   UD->startDefinition();
@@ -3262,6 +3731,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
     addFieldToRecordDecl(C, RD, KmpInt64Ty);
     addFieldToRecordDecl(C, RD, KmpInt32Ty);
+    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   }
   RD->completeDefinition();
   return RD;
@@ -3292,7 +3762,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
 ///   For taskloops:
 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
-///   tt->shareds);
+///   tt->reductions, tt->shareds);
 ///   return 0;
 /// }
 /// \endcode
@@ -3305,10 +3775,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
                       llvm::Value *TaskPrivatesMap) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
-  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
-  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
-                                /*Id=*/nullptr,
-                                KmpTaskTWithPrivatesPtrQTy.withRestrict());
+  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
+                            ImplicitParamDecl::Other);
+  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
+                                ImplicitParamDecl::Other);
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
   auto &TaskEntryFnInfo =
@@ -3377,10 +3848,14 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
+    auto RLVal = CGF.EmitLValueForField(Base, *RFI);
+    auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
     CallArgs.push_back(LBParam);
     CallArgs.push_back(UBParam);
     CallArgs.push_back(StParam);
     CallArgs.push_back(LIParam);
+    CallArgs.push_back(RParam);
   }
   CallArgs.push_back(SharedsParam);
 
@@ -3399,10 +3874,11 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
                                             QualType KmpTaskTWithPrivatesQTy) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
-  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
-  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
-                                /*Id=*/nullptr,
-                                KmpTaskTWithPrivatesPtrQTy.withRestrict());
+  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
+                            ImplicitParamDecl::Other);
+  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
+                                ImplicitParamDecl::Other);
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
   FunctionType::ExtInfo Info;
@@ -3458,36 +3934,40 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
   FunctionArgList Args;
   ImplicitParamDecl TaskPrivatesArg(
       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-      C.getPointerType(PrivatesQTy).withConst().withRestrict());
+      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
+      ImplicitParamDecl::Other);
   Args.push_back(&TaskPrivatesArg);
   llvm::DenseMap PrivateVarsPos;
   unsigned Counter = 1;
   for (auto *E: PrivateVars) {
     Args.push_back(ImplicitParamDecl::Create(
-        C, /*DC=*/nullptr, Loc,
-        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
-                            .withConst()
-                            .withRestrict()));
+        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+        C.getPointerType(C.getPointerType(E->getType()))
+            .withConst()
+            .withRestrict(),
+        ImplicitParamDecl::Other));
     auto *VD = cast(cast(E)->getDecl());
     PrivateVarsPos[VD] = Counter;
     ++Counter;
   }
   for (auto *E : FirstprivateVars) {
     Args.push_back(ImplicitParamDecl::Create(
-        C, /*DC=*/nullptr, Loc,
-        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
-                            .withConst()
-                            .withRestrict()));
+        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+        C.getPointerType(C.getPointerType(E->getType()))
+            .withConst()
+            .withRestrict(),
+        ImplicitParamDecl::Other));
     auto *VD = cast(cast(E)->getDecl());
     PrivateVarsPos[VD] = Counter;
     ++Counter;
   }
   for (auto *E: LastprivateVars) {
     Args.push_back(ImplicitParamDecl::Create(
-        C, /*DC=*/nullptr, Loc,
-        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
-                            .withConst()
-                            .withRestrict()));
+        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+        C.getPointerType(C.getPointerType(E->getType()))
+            .withConst()
+            .withRestrict(),
+        ImplicitParamDecl::Other));
     auto *VD = cast(cast(E)->getDecl());
     PrivateVarsPos[VD] = Counter;
     ++Counter;
@@ -3502,6 +3982,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
                                     TaskPrivatesMapFnInfo);
   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
+  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
   CodeGenFunction CGF(CGM);
   CGF.disableDebugInfo();
@@ -3565,7 +4046,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
         SharedRefLValue = CGF.MakeAddrLValue(
             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
-            SharedRefLValue.getType(), AlignmentSource::Decl);
+            SharedRefLValue.getType(),
+            LValueBaseInfo(AlignmentSource::Decl,
+                           SharedRefLValue.getBaseInfo().getMayAlias()));
         QualType Type = OriginalVD->getType();
         if (Type->isArrayType()) {
           // Initialize firstprivate array.
@@ -3644,12 +4127,14 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
                     ArrayRef Privates, bool WithLastIter) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
-  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
-                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
-  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
-                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
-  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
-                                /*Id=*/nullptr, C.IntTy);
+  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                           KmpTaskTWithPrivatesPtrQTy,
+                           ImplicitParamDecl::Other);
+  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                           KmpTaskTWithPrivatesPtrQTy,
+                           ImplicitParamDecl::Other);
+  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                                ImplicitParamDecl::Other);
   Args.push_back(&DstArg);
   Args.push_back(&SrcArg);
   Args.push_back(&LastprivArg);
@@ -4133,11 +4618,27 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
       cast(cast(D.getStrideVariable())->getDecl());
   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
                        /*IsInitializer=*/true);
+  // Store reductions address.
+  LValue RedLVal = CGF.EmitLValueForField(
+      Result.TDBase,
+      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
+  if (Data.Reductions)
+    CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
+  else {
+    CGF.EmitNullInitialization(RedLVal.getAddress(),
+                               CGF.getContext().VoidPtrTy);
+  }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
   llvm::Value *TaskArgs[] = {
-      UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
-      UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
-      llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
+      UpLoc,
+      ThreadID,
+      Result.NewTask,
+      IfVal,
+      LBLVal.getPointer(),
+      UBLVal.getPointer(),
+      CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+      llvm::ConstantInt::getNullValue(
+          CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
       llvm::ConstantInt::getSigned(
           CGF.IntTy, Data.Schedule.getPointer()
                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -4146,10 +4647,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
                                       /*isSigned=*/false)
           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
-      Result.TaskDupFn
-          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
-                                                            CGF.VoidPtrTy)
-          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                             Result.TaskDupFn, CGF.VoidPtrTy)
+                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
 }
 
@@ -4261,10 +4761,8 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction(
 
   // void reduction_func(void *LHSArg, void *RHSArg);
   FunctionArgList Args;
-  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
-                           C.VoidPtrTy);
-  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
-                           C.VoidPtrTy);
+  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
@@ -4655,6 +5153,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
 }
 
+/// Generates unique name for artificial threadprivate variables.
+/// Format is:  "."  "_" 
+static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
+                                      unsigned N) {
+  SmallString<256> Buffer;
+  llvm::raw_svector_ostream Out(Buffer);
+  Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
+  return Out.str();
+}
+
+/// Emits reduction initializer function:
+/// \code
+/// void @.red_init(void* %arg) {
+/// %0 = bitcast void* %arg to *
+/// store  , * %0
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
+                                           SourceLocation Loc,
+                                           ReductionCodeGen &RCG, unsigned N) {
+  auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  Args.emplace_back(&Param);
+  auto &FnInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+                                    ".red_init.", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  Address PrivateAddr = CGF.EmitLoadOfPointer(
+      CGF.GetAddrOfLocalVar(&Param),
+      C.getPointerType(C.VoidPtrTy).castAs());
+  llvm::Value *Size = nullptr;
+  // If the size of the reduction item is non-constant, load it from global
+  // threadprivate variable.
+  if (RCG.getSizes(N).second) {
+    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+        CGF, CGM.getContext().getSizeType(),
+        generateUniqueName("reduction_size", Loc, N));
+    Size =
+        CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+                             CGM.getContext().getSizeType(), SourceLocation());
+  }
+  RCG.emitAggregateType(CGF, N, Size);
+  LValue SharedLVal;
+  // If initializer uses initializer from declare reduction construct, emit a
+  // pointer to the address of the original reduction item (reuired by reduction
+  // initializer)
+  if (RCG.usesReductionInitializer(N)) {
+    Address SharedAddr =
+        CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+            CGF, CGM.getContext().VoidPtrTy,
+            generateUniqueName("reduction", Loc, N));
+    SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
+  } else {
+    SharedLVal = CGF.MakeNaturalAlignAddrLValue(
+        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        CGM.getContext().VoidPtrTy);
+  }
+  // Emit the initializer:
+  // %0 = bitcast void* %arg to *
+  // store  , * %0
+  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
+                         [](CodeGenFunction &) { return false; });
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// Emits reduction combiner function:
+/// \code
+/// void @.red_comb(void* %arg0, void* %arg1) {
+/// %lhs = bitcast void* %arg0 to *
+/// %rhs = bitcast void* %arg1 to *
+/// %2 = (* %lhs, * %rhs)
+/// store  %2, * %lhs
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
+                                           SourceLocation Loc,
+                                           ReductionCodeGen &RCG, unsigned N,
+                                           const Expr *ReductionOp,
+                                           const Expr *LHS, const Expr *RHS,
+                                           const Expr *PrivateRef) {
+  auto &C = CGM.getContext();
+  auto *LHSVD = cast(cast(LHS)->getDecl());
+  auto *RHSVD = cast(cast(RHS)->getDecl());
+  FunctionArgList Args;
+  ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  Args.emplace_back(&ParamInOut);
+  Args.emplace_back(&ParamIn);
+  auto &FnInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+                                    ".red_comb.", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  llvm::Value *Size = nullptr;
+  // If the size of the reduction item is non-constant, load it from global
+  // threadprivate variable.
+  if (RCG.getSizes(N).second) {
+    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+        CGF, CGM.getContext().getSizeType(),
+        generateUniqueName("reduction_size", Loc, N));
+    Size =
+        CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+                             CGM.getContext().getSizeType(), SourceLocation());
+  }
+  RCG.emitAggregateType(CGF, N, Size);
+  // Remap lhs and rhs variables to the addresses of the function arguments.
+  // %lhs = bitcast void* %arg0 to *
+  // %rhs = bitcast void* %arg1 to *
+  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
+    // Pull out the pointer to the variable.
+    Address PtrAddr = CGF.EmitLoadOfPointer(
+        CGF.GetAddrOfLocalVar(&ParamInOut),
+        C.getPointerType(C.VoidPtrTy).castAs());
+    return CGF.Builder.CreateElementBitCast(
+        PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
+  });
+  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
+    // Pull out the pointer to the variable.
+    Address PtrAddr = CGF.EmitLoadOfPointer(
+        CGF.GetAddrOfLocalVar(&ParamIn),
+        C.getPointerType(C.VoidPtrTy).castAs());
+    return CGF.Builder.CreateElementBitCast(
+        PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
+  });
+  PrivateScope.Privatize();
+  // Emit the combiner body:
+  // %2 = ( *%lhs,  *%rhs)
+  // store  %2, * %lhs
+  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
+      CGF, ReductionOp, PrivateRef, cast(LHS),
+      cast(RHS));
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// Emits reduction finalizer function:
+/// \code
+/// void @.red_fini(void* %arg) {
+/// %0 = bitcast void* %arg to *
+/// (* %0)
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
+                                           SourceLocation Loc,
+                                           ReductionCodeGen &RCG, unsigned N) {
+  if (!RCG.needCleanups(N))
+    return nullptr;
+  auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  Args.emplace_back(&Param);
+  auto &FnInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+                                    ".red_fini.", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  Address PrivateAddr = CGF.EmitLoadOfPointer(
+      CGF.GetAddrOfLocalVar(&Param),
+      C.getPointerType(C.VoidPtrTy).castAs());
+  llvm::Value *Size = nullptr;
+  // If the size of the reduction item is non-constant, load it from global
+  // threadprivate variable.
+  if (RCG.getSizes(N).second) {
+    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+        CGF, CGM.getContext().getSizeType(),
+        generateUniqueName("reduction_size", Loc, N));
+    Size =
+        CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+                             CGM.getContext().getSizeType(), SourceLocation());
+  }
+  RCG.emitAggregateType(CGF, N, Size);
+  // Emit the finalizer body:
+  // (* %0)
+  RCG.emitCleanups(CGF, N, PrivateAddr);
+  CGF.FinishFunction();
+  return Fn;
+}
+
+llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
+    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef LHSExprs,
+    ArrayRef RHSExprs, const OMPTaskDataTy &Data) {
+  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
+    return nullptr;
+
+  // Build typedef struct:
+  // kmp_task_red_input {
+  //   void *reduce_shar; // shared reduction item
+  //   size_t reduce_size; // size of data item
+  //   void *reduce_init; // data initialization routine
+  //   void *reduce_fini; // data finalization routine
+  //   void *reduce_comb; // data combiner routine
+  //   kmp_task_red_flags_t flags; // flags for additional info from compiler
+  // } kmp_task_red_input_t;
+  ASTContext &C = CGM.getContext();
+  auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
+  RD->startDefinition();
+  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
+  const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  const FieldDecl *FlagsFD = addFieldToRecordDecl(
+      C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
+  RD->completeDefinition();
+  QualType RDType = C.getRecordType(RD);
+  unsigned Size = Data.ReductionVars.size();
+  llvm::APInt ArraySize(/*numBits=*/64, Size);
+  QualType ArrayRDType = C.getConstantArrayType(
+      RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+  // kmp_task_red_input_t .rd_input.[Size];
+  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
+  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
+                       Data.ReductionOps);
+  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
+    // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
+    llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
+                           llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
+    llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
+        TaskRedInput.getPointer(), Idxs,
+        /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
+        ".rd_input.gep.");
+    LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
+    // ElemLVal.reduce_shar = &Shareds[Cnt];
+    LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
+    RCG.emitSharedLValue(CGF, Cnt);
+    llvm::Value *CastedShared =
+        CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
+    CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
+    RCG.emitAggregateType(CGF, Cnt);
+    llvm::Value *SizeValInChars;
+    llvm::Value *SizeVal;
+    std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
+    // We use delayed creation/initialization for VLAs, array sections and
+    // custom reduction initializations. It is required because runtime does not
+    // provide the way to pass the sizes of VLAs/array sections to
+    // initializer/combiner/finalizer functions and does not pass the pointer to
+    // original reduction item to the initializer. Instead threadprivate global
+    // variables are used to store these values and use them in the functions.
+    bool DelayedCreation = !!SizeVal;
+    SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
+                                               /*isSigned=*/false);
+    LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
+    CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
+    // ElemLVal.reduce_init = init;
+    LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
+    llvm::Value *InitAddr =
+        CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
+    CGF.EmitStoreOfScalar(InitAddr, InitLVal);
+    DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
+    // ElemLVal.reduce_fini = fini;
+    LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
+    llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
+    llvm::Value *FiniAddr = Fini
+                                ? CGF.EmitCastToVoidPtr(Fini)
+                                : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+    CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
+    // ElemLVal.reduce_comb = comb;
+    LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
+    llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
+        CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
+        RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
+    CGF.EmitStoreOfScalar(CombAddr, CombLVal);
+    // ElemLVal.flags = 0;
+    LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
+    if (DelayedCreation) {
+      CGF.EmitStoreOfScalar(
+          llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
+          FlagsLVal);
+    } else
+      CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
+  }
+  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
+  // *data);
+  llvm::Value *Args[] = {
+      CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+                                /*isSigned=*/true),
+      llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
+                                                      CGM.VoidPtrTy)};
+  return CGF.EmitRuntimeCall(
+      createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
+}
+
+void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
+                                              SourceLocation Loc,
+                                              ReductionCodeGen &RCG,
+                                              unsigned N) {
+  auto Sizes = RCG.getSizes(N);
+  // Emit threadprivate global variable if the type is non-constant
+  // (Sizes.second = nullptr).
+  if (Sizes.second) {
+    llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
+                                                     /*isSigned=*/false);
+    Address SizeAddr = getAddrOfArtificialThreadPrivate(
+        CGF, CGM.getContext().getSizeType(),
+        generateUniqueName("reduction_size", Loc, N));
+    CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
+  }
+  // Store address of the original reduction item if custom initializer is used.
+  if (RCG.usesReductionInitializer(N)) {
+    Address SharedAddr = getAddrOfArtificialThreadPrivate(
+        CGF, CGM.getContext().VoidPtrTy,
+        generateUniqueName("reduction", Loc, N));
+    CGF.Builder.CreateStore(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
+        SharedAddr, /*IsVolatile=*/false);
+  }
+}
+
+Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
+                                              SourceLocation Loc,
+                                              llvm::Value *ReductionsPtr,
+                                              LValue SharedLVal) {
+  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+  // *d);
+  llvm::Value *Args[] = {
+      CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+                                /*isSigned=*/true),
+      ReductionsPtr,
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
+                                                      CGM.VoidPtrTy)};
+  return Address(
+      CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
+      SharedLVal.getAlignment());
+}
+
 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
                                        SourceLocation Loc) {
   if (!CGF.HaveInsertPoint())
@@ -5583,7 +6428,7 @@ class MappableExprsHandler {
     // We have to process the component lists that relate with the same
     // declaration in a single chunk so that we can generate the map flags
     // correctly. Therefore, we organize all lists in a map.
-    llvm::DenseMap> Info;
+    llvm::MapVector> Info;
 
     // Helper function to fill the information map for the different supported
     // clauses.
@@ -5907,16 +6752,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
 
     for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
       llvm::Value *BPVal = *BasePointers[i];
-      if (BPVal->getType()->isPointerTy())
-        BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
-      else {
-        assert(BPVal->getType()->isIntegerTy() &&
-               "If not a pointer, the value type must be an integer.");
-        BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
-      }
       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
           Info.BasePointersArray, 0, i);
+      BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
       CGF.Builder.CreateStore(BPVal, BPAddr);
 
@@ -5925,16 +6765,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
           Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
 
       llvm::Value *PVal = Pointers[i];
-      if (PVal->getType()->isPointerTy())
-        PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
-      else {
-        assert(PVal->getType()->isIntegerTy() &&
-               "If not a pointer, the value type must be an integer.");
-        PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
-      }
       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
           Info.PointersArray, 0, i);
+      P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
       CGF.Builder.CreateStore(PVal, PAddr);
 
@@ -6307,7 +7142,7 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
     }
   }
 
-  // If we are in target mode we do not emit any global (declare target is not
+  // If we are in target mode, we do not emit any global (declare target is not
   // implemented yet). Therefore we signal that GD was processed in this case.
   return true;
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
index 6f460f121791e..5dcf999bea376 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -96,15 +96,106 @@ struct OMPTaskDataTy final {
   SmallVector FirstprivateInits;
   SmallVector LastprivateVars;
   SmallVector LastprivateCopies;
+  SmallVector ReductionVars;
+  SmallVector ReductionCopies;
+  SmallVector ReductionOps;
   SmallVector, 4> Dependences;
   llvm::PointerIntPair Final;
   llvm::PointerIntPair Schedule;
   llvm::PointerIntPair Priority;
+  llvm::Value *Reductions = nullptr;
   unsigned NumberOfParts = 0;
   bool Tied = true;
   bool Nogroup = false;
 };
 
+/// Class intended to support codegen of all kind of the reduction clauses.
+class ReductionCodeGen {
+private:
+  /// Data required for codegen of reduction clauses.
+  struct ReductionData {
+    /// Reference to the original shared item.
+    const Expr *Ref = nullptr;
+    /// Helper expression for generation of private copy.
+    const Expr *Private = nullptr;
+    /// Helper expression for generation reduction operation.
+    const Expr *ReductionOp = nullptr;
+    ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp)
+        : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {}
+  };
+  /// List of reduction-based clauses.
+  SmallVector ClausesData;
+
+  /// List of addresses of original shared variables/expressions.
+  SmallVector, 4> SharedAddresses;
+  /// Sizes of the reduction items in chars.
+  SmallVector, 4> Sizes;
+  /// Base declarations for the reduction items.
+  SmallVector BaseDecls;
+
+  /// Emits lvalue for shared expresion.
+  LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E);
+  /// Emits upper bound for shared expression (if array section).
+  LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E);
+  /// Performs aggregate initialization.
+  /// \param N Number of reduction item in the common list.
+  /// \param PrivateAddr Address of the corresponding private item.
+  /// \param SharedLVal Address of the original shared variable.
+  /// \param DRD Declare reduction construct used for reduction item.
+  void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N,
+                                   Address PrivateAddr, LValue SharedLVal,
+                                   const OMPDeclareReductionDecl *DRD);
+
+public:
+  ReductionCodeGen(ArrayRef Shareds,
+                   ArrayRef Privates,
+                   ArrayRef ReductionOps);
+  /// Emits lvalue for a reduction item.
+  /// \param N Number of the reduction item.
+  void emitSharedLValue(CodeGenFunction &CGF, unsigned N);
+  /// Emits the code for the variable-modified type, if required.
+  /// \param N Number of the reduction item.
+  void emitAggregateType(CodeGenFunction &CGF, unsigned N);
+  /// Emits the code for the variable-modified type, if required.
+  /// \param N Number of the reduction item.
+  /// \param Size Size of the type in chars.
+  void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size);
+  /// Performs initialization of the private copy for the reduction item.
+  /// \param N Number of the reduction item.
+  /// \param PrivateAddr Address of the corresponding private item.
+  /// \param DefaultInit Default initialization sequence that should be
+  /// performed if no reduction specific initialization is found.
+  /// \param SharedLVal Address of the original shared variable.
+  void
+  emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr,
+                     LValue SharedLVal,
+                     llvm::function_ref DefaultInit);
+  /// Returns true if the private copy requires cleanups.
+  bool needCleanups(unsigned N);
+  /// Emits cleanup code for the reduction item.
+  /// \param N Number of the reduction item.
+  /// \param PrivateAddr Address of the corresponding private item.
+  void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr);
+  /// Adjusts \p PrivatedAddr for using instead of the original variable
+  /// address in normal operations.
+  /// \param N Number of the reduction item.
+  /// \param PrivateAddr Address of the corresponding private item.
+  Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
+                               Address PrivateAddr);
+  /// Returns LValue for the reduction item.
+  LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; }
+  /// Returns the size of the reduction item (in chars and total number of
+  /// elements in the item), or nullptr, if the size is a constant.
+  std::pair getSizes(unsigned N) const {
+    return Sizes[N];
+  }
+  /// Returns the base declaration of the reduction item.
+  const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; }
+  /// Returns true if the initialization of the reduction item uses initializer
+  /// from declare reduction construct.
+  bool usesReductionInitializer(unsigned N) const;
+};
+
 class CGOpenMPRuntime {
 protected:
   CodeGenModule &CGM;
@@ -121,7 +212,7 @@ class CGOpenMPRuntime {
   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
   /// \param IsOffloadEntry True if the outlined function is an offload entry.
   /// \param CodeGen Lambda codegen specific to an accelerator device.
-  /// An oulined function may not be an entry if, e.g. the if clause always
+  /// An outlined function may not be an entry if, e.g. the if clause always
   /// evaluates to false.
   virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D,
                                                 StringRef ParentName,
@@ -699,7 +790,7 @@ class CGOpenMPRuntime {
   /// \param Loc Clang source location.
   /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
   /// \param IVSize Size of the iteration variable in bits.
-  /// \param IVSigned Sign of the interation variable.
+  /// \param IVSigned Sign of the iteration variable.
   /// \param Ordered true if loop is ordered, false otherwise.
   /// \param DispatchValues struct containing llvm values for lower bound, upper
   /// bound, and chunk expression.
@@ -723,7 +814,7 @@ class CGOpenMPRuntime {
   /// \param Loc Clang source location.
   /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
   /// \param IVSize Size of the iteration variable in bits.
-  /// \param IVSigned Sign of the interation variable.
+  /// \param IVSigned Sign of the iteration variable.
   /// \param Ordered true if loop is ordered, false otherwise.
   /// \param IL Address of the output variable in which the flag of the
   /// last iteration is returned.
@@ -732,7 +823,7 @@ class CGOpenMPRuntime {
   /// \param UB Address of the output variable in which the upper iteration
   /// number is returned.
   /// \param ST Address of the output variable in which the stride value is
-  /// returned nesessary to generated the static_chunked scheduled loop.
+  /// returned necessary to generated the static_chunked scheduled loop.
   /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
   /// For the default (nullptr) value, the chunk 1 will be used.
   ///
@@ -747,7 +838,7 @@ class CGOpenMPRuntime {
   /// \param Loc Clang source location.
   /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
   /// \param IVSize Size of the iteration variable in bits.
-  /// \param IVSigned Sign of the interation variable.
+  /// \param IVSigned Sign of the iteration variable.
   /// \param Ordered true if loop is ordered, false otherwise.
   /// \param IL Address of the output variable in which the flag of the
   /// last iteration is returned.
@@ -756,7 +847,7 @@ class CGOpenMPRuntime {
   /// \param UB Address of the output variable in which the upper iteration
   /// number is returned.
   /// \param ST Address of the output variable in which the stride value is
-  /// returned nesessary to generated the static_chunked scheduled loop.
+  /// returned necessary to generated the static_chunked scheduled loop.
   /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
   /// For the default (nullptr) value, the chunk 1 will be used.
   ///
@@ -773,7 +864,7 @@ class CGOpenMPRuntime {
   /// \param CGF Reference to current CodeGenFunction.
   /// \param Loc Clang source location.
   /// \param IVSize Size of the iteration variable in bits.
-  /// \param IVSigned Sign of the interation variable.
+  /// \param IVSigned Sign of the iteration variable.
   ///
   virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF,
                                           SourceLocation Loc, unsigned IVSize,
@@ -792,7 +883,7 @@ class CGOpenMPRuntime {
   ///          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
   ///          kmp_int[32|64] *p_stride);
   /// \param IVSize Size of the iteration variable in bits.
-  /// \param IVSigned Sign of the interation variable.
+  /// \param IVSigned Sign of the iteration variable.
   /// \param IL Address of the output variable in which the flag of the
   /// last iteration is returned.
   /// \param LB Address of the output variable in which the lower iteration
@@ -844,6 +935,14 @@ class CGOpenMPRuntime {
                                  SourceLocation Loc, bool PerformInit,
                                  CodeGenFunction *CGF = nullptr);
 
+  /// Creates artificial threadprivate variable with name \p Name and type \p
+  /// VarType.
+  /// \param VarType Type of the artificial threadprivate variable.
+  /// \param Name Name of the artificial threadprivate variable.
+  virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+                                                   QualType VarType,
+                                                   StringRef Name);
+
   /// \brief Emit flush of the variables specified in 'omp flush' directive.
   /// \param Vars List of variables to flush.
   virtual void emitFlush(CodeGenFunction &CGF, ArrayRef Vars,
@@ -1002,6 +1101,51 @@ class CGOpenMPRuntime {
                              ArrayRef ReductionOps,
                              ReductionOptionsTy Options);
 
+  /// Emit a code for initialization of task reduction clause. Next code
+  /// should be emitted for reduction:
+  /// \code
+  ///
+  /// _task_red_item_t red_data[n];
+  /// ...
+  /// red_data[i].shar = &origs[i];
+  /// red_data[i].size = sizeof(origs[i]);
+  /// red_data[i].f_init = (void*)RedInit;
+  /// red_data[i].f_fini = (void*)RedDest;
+  /// red_data[i].f_comb = (void*)RedOp;
+  /// red_data[i].flags = ;
+  /// ...
+  /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+  /// \endcode
+  ///
+  /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
+  /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
+  /// \param Data Additional data for task generation like tiedness, final
+  /// state, list of privates, reductions etc.
+  virtual llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF,
+                                             SourceLocation Loc,
+                                             ArrayRef LHSExprs,
+                                             ArrayRef RHSExprs,
+                                             const OMPTaskDataTy &Data);
+
+  /// Required to resolve existing problems in the runtime. Emits threadprivate
+  /// variables to store the size of the VLAs/array sections for
+  /// initializer/combiner/finalizer functions + emits threadprivate variable to
+  /// store the pointer to the original reduction item for the custom
+  /// initializer defined by declare reduction construct.
+  /// \param RCG Allows to reuse an existing data for the reductions.
+  /// \param N Reduction item for which fixups must be emitted.
+  virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc,
+                                       ReductionCodeGen &RCG, unsigned N);
+
+  /// Get the address of `void *` type of the privatue copy of the reduction
+  /// item specified by the \p SharedLVal.
+  /// \param ReductionsPtr Pointer to the reduction data returned by the
+  /// emitTaskReductionInit function.
+  /// \param SharedLVal Address of the original reduction item.
+  virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc,
+                                       llvm::Value *ReductionsPtr,
+                                       LValue SharedLVal);
+
   /// \brief Emit code for 'taskwait' directive.
   virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
 
@@ -1029,7 +1173,7 @@ class CGOpenMPRuntime {
   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
   /// \param IsOffloadEntry True if the outlined function is an offload entry.
   /// \param CodeGen Code generation sequence for the \a D directive.
-  /// An oulined function may not be an entry if, e.g. the if clause always
+  /// An outlined function may not be an entry if, e.g. the if clause always
   /// evaluates to false.
   virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
                                           StringRef ParentName,
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index c3391d087b759..3ced05d08a47c 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -861,6 +861,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
       D, ThreadIDVar, InnermostKind, CodeGen);
   llvm::Function *OutlinedFun = cast(OutlinedFunVal);
   OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+  OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
   OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
 
   return OutlinedFun;
@@ -1243,32 +1244,27 @@ static void emitReductionListCopy(
 ///    local = local @ remote
 ///  else
 ///    local = remote
-llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM,
-                                          ArrayRef Privates,
-                                          QualType ReductionArrayTy,
-                                          llvm::Value *ReduceFn) {
+static llvm::Value *
+emitReduceScratchpadFunction(CodeGenModule &CGM,
+                             ArrayRef Privates,
+                             QualType ReductionArrayTy, llvm::Value *ReduceFn) {
   auto &C = CGM.getContext();
   auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
 
   // Destination of the copy.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
-                                  /*Id=*/nullptr, C.VoidPtrTy);
+  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   // Base address of the scratchpad array, with each element storing a
   // Reduce list per team.
-  ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(),
-                                  /*Id=*/nullptr, C.VoidPtrTy);
+  ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   // A source index into the scratchpad array.
-  ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(),
-                             /*Id=*/nullptr, Int32Ty);
+  ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
   // Row width of an element in the scratchpad array, typically
   // the number of teams.
-  ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(),
-                             /*Id=*/nullptr, Int32Ty);
+  ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
   // If should_reduce == 1, then it's load AND reduce,
   // If should_reduce == 0 (or otherwise), then it only loads (+ copy).
   // The latter case is used for initialization.
-  ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, SourceLocation(),
-                                    /*Id=*/nullptr, Int32Ty);
+  ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other);
 
   FunctionArgList Args;
   Args.push_back(&ReduceListArg);
@@ -1372,28 +1368,24 @@ llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM,
 ///  for elem in Reduce List:
 ///    scratchpad[elem_id][index] = elem
 ///
-llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
-                                  ArrayRef Privates,
-                                  QualType ReductionArrayTy) {
+static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
+                                         ArrayRef Privates,
+                                         QualType ReductionArrayTy) {
 
   auto &C = CGM.getContext();
   auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
 
   // Source of the copy.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
-                                  /*Id=*/nullptr, C.VoidPtrTy);
+  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   // Base address of the scratchpad array, with each element storing a
   // Reduce list per team.
-  ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(),
-                                  /*Id=*/nullptr, C.VoidPtrTy);
+  ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   // A destination index into the scratchpad array, typically the team
   // identifier.
-  ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(),
-                             /*Id=*/nullptr, Int32Ty);
+  ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
   // Row width of an element in the scratchpad array, typically
   // the number of teams.
-  ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(),
-                             /*Id=*/nullptr, Int32Ty);
+  ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
 
   FunctionArgList Args;
   Args.push_back(&ReduceListArg);
@@ -1474,13 +1466,12 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
   // ReduceList: thread local Reduce list.
   // At the stage of the computation when this function is called, partially
   // aggregated values reside in the first lane of every active warp.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
-                                  /*Id=*/nullptr, C.VoidPtrTy);
+  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   // NumWarps: number of warps active in the parallel region.  This could
   // be smaller than 32 (max warps in a CTA) for partial block reduction.
-  ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, SourceLocation(),
-                                /*Id=*/nullptr,
-                                C.getIntTypeForBitwidth(32, /* Signed */ true));
+  ImplicitParamDecl NumWarpsArg(C,
+                                C.getIntTypeForBitwidth(32, /* Signed */ true),
+                                ImplicitParamDecl::Other);
   FunctionArgList Args;
   Args.push_back(&ReduceListArg);
   Args.push_back(&NumWarpsArg);
@@ -1722,17 +1713,14 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM,
   auto &C = CGM.getContext();
 
   // Thread local Reduce list used to host the values of data to be reduced.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
-                                  /*Id=*/nullptr, C.VoidPtrTy);
+  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
   // Current lane id; could be logical.
-  ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, SourceLocation(),
-                              /*Id=*/nullptr, C.ShortTy);
+  ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other);
   // Offset of the remote source lane relative to the current lane.
-  ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, SourceLocation(),
-                                        /*Id=*/nullptr, C.ShortTy);
+  ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy,
+                                        ImplicitParamDecl::Other);
   // Algorithm version.  This is expected to be known at compile time.
-  ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, SourceLocation(),
-                               /*Id=*/nullptr, C.ShortTy);
+  ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other);
   FunctionArgList Args;
   Args.push_back(&ReduceListArg);
   Args.push_back(&LaneIDArg);
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmt.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmt.cpp
index 683f366ebe455..a13c386461647 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -1024,6 +1024,18 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
 /// if the function returns void, or may be missing one if the function returns
 /// non-void.  Fun stuff :).
 void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
+  if (requiresReturnValueCheck()) {
+    llvm::Constant *SLoc = EmitCheckSourceLocation(S.getLocStart());
+    auto *SLocPtr =
+        new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false,
+                                 llvm::GlobalVariable::PrivateLinkage, SLoc);
+    SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr);
+    assert(ReturnLocation.isValid() && "No valid return location");
+    Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy),
+                        ReturnLocation);
+  }
+
   // Returning from an outlined SEH helper is UB, and we already warn on it.
   if (IsOutlinedSEHHelper) {
     Builder.CreateUnreachable();
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 19b6cbab66c9b..6135cf31d176b 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -239,21 +239,47 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) {
   return C.getCanonicalParamType(T);
 }
 
-llvm::Function *
-CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
-  assert(
-      CapturedStmtInfo &&
-      "CapturedStmtInfo should be set when generating the captured function");
-  const CapturedDecl *CD = S.getCapturedDecl();
-  const RecordDecl *RD = S.getCapturedRecordDecl();
+namespace {
+  /// Contains required data for proper outlined function codegen.
+  struct FunctionOptions {
+    /// Captured statement for which the function is generated.
+    const CapturedStmt *S = nullptr;
+    /// true if cast to/from  UIntPtr is required for variables captured by
+    /// value.
+    bool UIntPtrCastRequired = true;
+    /// true if only casted argumefnts must be registered as local args or VLA
+    /// sizes.
+    bool RegisterCastedArgsOnly = false;
+    /// Name of the generated function.
+    StringRef FunctionName;
+    explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
+                             bool RegisterCastedArgsOnly,
+                             StringRef FunctionName)
+        : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
+          RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
+          FunctionName(FunctionName) {}
+  };
+}
+
+static std::pair emitOutlinedFunctionPrologue(
+    CodeGenFunction &CGF, FunctionArgList &Args,
+    llvm::DenseMap>
+        &LocalAddrs,
+    llvm::DenseMap>
+        &VLASizes,
+    llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
+  const CapturedDecl *CD = FO.S->getCapturedDecl();
+  const RecordDecl *RD = FO.S->getCapturedRecordDecl();
   assert(CD->hasBody() && "missing CapturedDecl body");
 
+  CXXThisValue = nullptr;
   // Build the argument list.
+  CodeGenModule &CGM = CGF.CGM;
   ASTContext &Ctx = CGM.getContext();
-  FunctionArgList Args;
+  bool HasUIntPtrArgs = false;
   Args.append(CD->param_begin(),
               std::next(CD->param_begin(), CD->getContextParamPosition()));
-  auto I = S.captures().begin();
+  auto I = FO.S->captures().begin();
   for (auto *FD : RD->fields()) {
     QualType ArgType = FD->getType();
     IdentifierInfo *II = nullptr;
@@ -265,24 +291,26 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
     // deal with pointers. We can pass in the same way the VLA type sizes to the
     // outlined function.
     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
-        I->capturesVariableArrayType())
-      ArgType = Ctx.getUIntPtrType();
+        I->capturesVariableArrayType()) {
+      HasUIntPtrArgs = true;
+      if (FO.UIntPtrCastRequired)
+        ArgType = Ctx.getUIntPtrType();
+    }
 
     if (I->capturesVariable() || I->capturesVariableByCopy()) {
       CapVar = I->getCapturedVar();
       II = CapVar->getIdentifier();
     } else if (I->capturesThis())
-      II = &getContext().Idents.get("this");
+      II = &Ctx.Idents.get("this");
     else {
       assert(I->capturesVariableArrayType());
-      II = &getContext().Idents.get("vla");
+      II = &Ctx.Idents.get("vla");
     }
-    if (ArgType->isVariablyModifiedType()) {
-      ArgType =
-          getCanonicalParamType(getContext(), ArgType.getNonReferenceType());
-    }
-    Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
-                                             FD->getLocation(), II, ArgType));
+    if (ArgType->isVariablyModifiedType())
+      ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
+    Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr,
+                                             FD->getLocation(), II, ArgType,
+                                             ImplicitParamDecl::Other));
     ++I;
   }
   Args.append(
@@ -295,89 +323,166 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
-  llvm::Function *F = llvm::Function::Create(
-      FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
-      CapturedStmtInfo->getHelperName(), &CGM.getModule());
+  llvm::Function *F =
+      llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
+                             FO.FunctionName, &CGM.getModule());
   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
   if (CD->isNothrow())
     F->addFnAttr(llvm::Attribute::NoUnwind);
 
   // Generate the function.
-  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
-                CD->getBody()->getLocStart());
+  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+                    CD->getBody()->getLocStart());
   unsigned Cnt = CD->getContextParamPosition();
-  I = S.captures().begin();
+  I = FO.S->captures().begin();
   for (auto *FD : RD->fields()) {
     // If we are capturing a pointer by copy we don't need to do anything, just
     // use the value that we get from the arguments.
     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
       const VarDecl *CurVD = I->getCapturedVar();
-      Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]);
+      Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
       // If the variable is a reference we need to materialize it here.
       if (CurVD->getType()->isReferenceType()) {
-        Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(),
-                                        ".materialized_ref");
-        EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false,
-                          CurVD->getType());
+        Address RefAddr = CGF.CreateMemTemp(
+            CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
+        CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
+                              /*Volatile=*/false, CurVD->getType());
         LocalAddr = RefAddr;
       }
-      setAddrOfLocalVar(CurVD, LocalAddr);
+      if (!FO.RegisterCastedArgsOnly)
+        LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
       ++Cnt;
       ++I;
       continue;
     }
 
-    LValue ArgLVal =
-        MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
-                       AlignmentSource::Decl);
+    LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+    LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]),
+                                        Args[Cnt]->getType(), BaseInfo);
     if (FD->hasCapturedVLAType()) {
-      LValue CastedArgLVal =
-          MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
-                                              Args[Cnt]->getName(), ArgLVal),
-                         FD->getType(), AlignmentSource::Decl);
+      if (FO.UIntPtrCastRequired) {
+        ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
+                                                          Args[Cnt]->getName(),
+                                                          ArgLVal),
+                                     FD->getType(), BaseInfo);
+      }
       auto *ExprArg =
-          EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
+          CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
       auto VAT = FD->getCapturedVLAType();
-      VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+      VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
     } else if (I->capturesVariable()) {
       auto *Var = I->getCapturedVar();
       QualType VarTy = Var->getType();
       Address ArgAddr = ArgLVal.getAddress();
       if (!VarTy->isReferenceType()) {
         if (ArgLVal.getType()->isLValueReferenceType()) {
-          ArgAddr = EmitLoadOfReference(
+          ArgAddr = CGF.EmitLoadOfReference(
               ArgAddr, ArgLVal.getType()->castAs());
         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
           assert(ArgLVal.getType()->isPointerType());
-          ArgAddr = EmitLoadOfPointer(
+          ArgAddr = CGF.EmitLoadOfPointer(
               ArgAddr, ArgLVal.getType()->castAs());
         }
       }
-      setAddrOfLocalVar(
-          Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
+      if (!FO.RegisterCastedArgsOnly) {
+        LocalAddrs.insert(
+            {Args[Cnt],
+             {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
+      }
     } else if (I->capturesVariableByCopy()) {
       assert(!FD->getType()->isAnyPointerType() &&
              "Not expecting a captured pointer.");
       auto *Var = I->getCapturedVar();
       QualType VarTy = Var->getType();
-      setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
-                                                  Args[Cnt]->getName(), ArgLVal,
-                                                  VarTy->isReferenceType()));
+      LocalAddrs.insert(
+          {Args[Cnt],
+           {Var,
+            FO.UIntPtrCastRequired
+                ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
+                                       ArgLVal, VarTy->isReferenceType())
+                : ArgLVal.getAddress()}});
     } else {
       // If 'this' is captured, load it into CXXThisValue.
       assert(I->capturesThis());
-      CXXThisValue =
-          EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
+      CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
+                         .getScalarVal();
+      LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
     }
     ++Cnt;
     ++I;
   }
 
+  return {F, HasUIntPtrArgs};
+}
+
+llvm::Function *
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+  assert(
+      CapturedStmtInfo &&
+      "CapturedStmtInfo should be set when generating the captured function");
+  const CapturedDecl *CD = S.getCapturedDecl();
+  // Build the argument list.
+  bool NeedWrapperFunction =
+      getDebugInfo() &&
+      CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
+  FunctionArgList Args;
+  llvm::DenseMap> LocalAddrs;
+  llvm::DenseMap> VLASizes;
+  FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
+                     CapturedStmtInfo->getHelperName());
+  llvm::Function *F;
+  bool HasUIntPtrArgs;
+  std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue(
+      *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO);
+  for (const auto &LocalAddrPair : LocalAddrs) {
+    if (LocalAddrPair.second.first) {
+      setAddrOfLocalVar(LocalAddrPair.second.first,
+                        LocalAddrPair.second.second);
+    }
+  }
+  for (const auto &VLASizePair : VLASizes)
+    VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
   PGO.assignRegionCounters(GlobalDecl(CD), F);
   CapturedStmtInfo->EmitBody(*this, CD->getBody());
   FinishFunction(CD->getBodyRBrace());
-
-  return F;
+  if (!NeedWrapperFunction || !HasUIntPtrArgs)
+    return F;
+
+  FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
+                            /*RegisterCastedArgsOnly=*/true,
+                            ".nondebug_wrapper.");
+  CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
+  WrapperCGF.disableDebugInfo();
+  Args.clear();
+  LocalAddrs.clear();
+  VLASizes.clear();
+  llvm::Function *WrapperF =
+      emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
+                                   WrapperCGF.CXXThisValue, WrapperFO).first;
+  LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+  llvm::SmallVector CallArgs;
+  for (const auto *Arg : Args) {
+    llvm::Value *CallArg;
+    auto I = LocalAddrs.find(Arg);
+    if (I != LocalAddrs.end()) {
+      LValue LV =
+          WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
+      CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+    } else {
+      auto EI = VLASizes.find(Arg);
+      if (EI != VLASizes.end())
+        CallArg = EI->second.second;
+      else {
+        LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
+                                              Arg->getType(), BaseInfo);
+        CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+      }
+    }
+    CallArgs.emplace_back(CallArg);
+  }
+  WrapperCGF.Builder.CreateCall(F, CallArgs);
+  WrapperCGF.FinishFunction();
+  return WrapperF;
 }
 
 //===----------------------------------------------------------------------===//
@@ -444,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign(
   EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
-/// Check if the combiner is a call to UDR combiner and if it is so return the
-/// UDR decl used for reduction.
-static const OMPDeclareReductionDecl *
-getReductionInit(const Expr *ReductionOp) {
-  if (auto *CE = dyn_cast(ReductionOp))
-    if (auto *OVE = dyn_cast(CE->getCallee()))
-      if (auto *DRE =
-              dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts()))
-        if (auto *DRD = dyn_cast(DRE->getDecl()))
-          return DRD;
-  return nullptr;
-}
-
-static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
-                                             const OMPDeclareReductionDecl *DRD,
-                                             const Expr *InitOp,
-                                             Address Private, Address Original,
-                                             QualType Ty) {
-  if (DRD->getInitializer()) {
-    std::pair Reduction =
-        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
-    auto *CE = cast(InitOp);
-    auto *OVE = cast(CE->getCallee());
-    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
-    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
-    auto *LHSDRE = cast(cast(LHS)->getSubExpr());
-    auto *RHSDRE = cast(cast(RHS)->getSubExpr());
-    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
-    PrivateScope.addPrivate(cast(LHSDRE->getDecl()),
-                            [=]() -> Address { return Private; });
-    PrivateScope.addPrivate(cast(RHSDRE->getDecl()),
-                            [=]() -> Address { return Original; });
-    (void)PrivateScope.Privatize();
-    RValue Func = RValue::get(Reduction.second);
-    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
-    CGF.EmitIgnoredExpr(InitOp);
-  } else {
-    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
-    auto *GV = new llvm::GlobalVariable(
-        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
-        llvm::GlobalValue::PrivateLinkage, Init, ".init");
-    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
-    RValue InitRVal;
-    switch (CGF.getEvaluationKind(Ty)) {
-    case TEK_Scalar:
-      InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
-      break;
-    case TEK_Complex:
-      InitRVal =
-          RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
-      break;
-    case TEK_Aggregate:
-      InitRVal = RValue::getAggregate(LV.getAddress());
-      break;
-    }
-    OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
-    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
-    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
-                         /*IsInitializer=*/false);
-  }
-}
-
-/// \brief Emit initialization of arrays of complex types.
-/// \param DestAddr Address of the array.
-/// \param Type Type of array.
-/// \param Init Initial expression of array.
-/// \param SrcAddr Address of the original array.
-static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
-                                 QualType Type, const Expr *Init,
-                                 Address SrcAddr = Address::invalid()) {
-  auto *DRD = getReductionInit(Init);
-  // Perform element-by-element initialization.
-  QualType ElementTy;
-
-  // Drill down to the base element type on both arrays.
-  auto ArrayTy = Type->getAsArrayTypeUnsafe();
-  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
-  DestAddr =
-      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
-  if (DRD)
-    SrcAddr =
-        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
-
-  llvm::Value *SrcBegin = nullptr;
-  if (DRD)
-    SrcBegin = SrcAddr.getPointer();
-  auto DestBegin = DestAddr.getPointer();
-  // Cast from pointer to array type to pointer to single element.
-  auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
-  // The basic structure here is a while-do loop.
-  auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
-  auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
-  auto IsEmpty =
-      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
-  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
-
-  // Enter the loop body, making that address the current address.
-  auto EntryBB = CGF.Builder.GetInsertBlock();
-  CGF.EmitBlock(BodyBB);
-
-  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
-
-  llvm::PHINode *SrcElementPHI = nullptr;
-  Address SrcElementCurrent = Address::invalid();
-  if (DRD) {
-    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
-                                          "omp.arraycpy.srcElementPast");
-    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
-    SrcElementCurrent =
-        Address(SrcElementPHI,
-                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
-  }
-  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
-      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
-  DestElementPHI->addIncoming(DestBegin, EntryBB);
-  Address DestElementCurrent =
-      Address(DestElementPHI,
-              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
-
-  // Emit copy.
-  {
-    CodeGenFunction::RunCleanupsScope InitScope(CGF);
-    if (DRD && (DRD->getInitializer() || !Init)) {
-      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
-                                       SrcElementCurrent, ElementTy);
-    } else
-      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
-                           /*IsInitializer=*/false);
-  }
-
-  if (DRD) {
-    // Shift the address forward by one element.
-    auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
-        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
-    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
-  }
-
-  // Shift the address forward by one element.
-  auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
-      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
-  // Check whether we've reached the end.
-  auto Done =
-      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
-  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
-  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
-
-  // Done.
-  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
-}
-
 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
                                   Address SrcAddr, const VarDecl *DestVD,
                                   const VarDecl *SrcVD, const Expr *Copy) {
@@ -946,253 +901,106 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
-static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
-                          LValue BaseLV, llvm::Value *Addr) {
-  Address Tmp = Address::invalid();
-  Address TopTmp = Address::invalid();
-  Address MostTopTmp = Address::invalid();
-  BaseTy = BaseTy.getNonReferenceType();
-  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
-         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
-    Tmp = CGF.CreateMemTemp(BaseTy);
-    if (TopTmp.isValid())
-      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
-    else
-      MostTopTmp = Tmp;
-    TopTmp = Tmp;
-    BaseTy = BaseTy->getPointeeType();
-  }
-  llvm::Type *Ty = BaseLV.getPointer()->getType();
-  if (Tmp.isValid())
-    Ty = Tmp.getElementType();
-  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
-  if (Tmp.isValid()) {
-    CGF.Builder.CreateStore(Addr, Tmp);
-    return MostTopTmp;
-  }
-  return Address(Addr, BaseLV.getAlignment());
-}
-
-static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
-                          LValue BaseLV) {
-  BaseTy = BaseTy.getNonReferenceType();
-  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
-         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
-    if (auto *PtrTy = BaseTy->getAs())
-      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
-    else {
-      BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
-                                             BaseTy->castAs());
-    }
-    BaseTy = BaseTy->getPointeeType();
-  }
-  return CGF.MakeAddrLValue(
-      Address(
-          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
-          BaseLV.getAlignment()),
-      BaseLV.getType(), BaseLV.getAlignmentSource());
-}
-
 void CodeGenFunction::EmitOMPReductionClauseInit(
     const OMPExecutableDirective &D,
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
   if (!HaveInsertPoint())
     return;
+  SmallVector Shareds;
+  SmallVector Privates;
+  SmallVector ReductionOps;
+  SmallVector LHSs;
+  SmallVector RHSs;
   for (const auto *C : D.getClausesOfKind()) {
-    auto ILHS = C->lhs_exprs().begin();
-    auto IRHS = C->rhs_exprs().begin();
     auto IPriv = C->privates().begin();
     auto IRed = C->reduction_ops().begin();
-    for (auto IRef : C->varlists()) {
-      auto *LHSVD = cast(cast(*ILHS)->getDecl());
-      auto *RHSVD = cast(cast(*IRHS)->getDecl());
-      auto *PrivateVD = cast(cast(*IPriv)->getDecl());
-      auto *DRD = getReductionInit(*IRed);
-      if (auto *OASE = dyn_cast(IRef)) {
-        auto *Base = OASE->getBase()->IgnoreParenImpCasts();
-        while (auto *TempOASE = dyn_cast(Base))
-          Base = TempOASE->getBase()->IgnoreParenImpCasts();
-        while (auto *TempASE = dyn_cast(Base))
-          Base = TempASE->getBase()->IgnoreParenImpCasts();
-        auto *DE = cast(Base);
-        auto *OrigVD = cast(DE->getDecl());
-        auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
-        auto OASELValueUB =
-            EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
-        auto OriginalBaseLValue = EmitLValue(DE);
-        LValue BaseLValue =
-            loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
-                        OriginalBaseLValue);
-        // Store the address of the original variable associated with the LHS
-        // implicit variable.
-        PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address {
-          return OASELValueLB.getAddress();
-        });
-        // Emit reduction copy.
-        bool IsRegistered = PrivateScope.addPrivate(
-            OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
-                     OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
-              // Emit VarDecl with copy init for arrays.
-              // Get the address of the original variable captured in current
-              // captured region.
-              auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
-                                                 OASELValueLB.getPointer());
-              Size = Builder.CreateNUWAdd(
-                  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
-              CodeGenFunction::OpaqueValueMapping OpaqueMap(
-                  *this, cast(
-                             getContext()
-                                 .getAsVariableArrayType(PrivateVD->getType())
-                                 ->getSizeExpr()),
-                  RValue::get(Size));
-              EmitVariablyModifiedType(PrivateVD->getType());
-              auto Emission = EmitAutoVarAlloca(*PrivateVD);
-              auto Addr = Emission.getAllocatedAddress();
-              auto *Init = PrivateVD->getInit();
-              EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
-                                   DRD ? *IRed : Init,
-                                   OASELValueLB.getAddress());
-              EmitAutoVarCleanups(Emission);
-              // Emit private VarDecl with reduction init.
-              auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
-                                                   OASELValueLB.getPointer());
-              auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
-              return castToBase(*this, OrigVD->getType(),
-                                OASELValueLB.getType(), OriginalBaseLValue,
-                                Ptr);
-            });
-        assert(IsRegistered && "private var already registered as private");
-        // Silence the warning about unused variable.
-        (void)IsRegistered;
-        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
-          return GetAddrOfLocalVar(PrivateVD);
-        });
-      } else if (auto *ASE = dyn_cast(IRef)) {
-        auto *Base = ASE->getBase()->IgnoreParenImpCasts();
-        while (auto *TempASE = dyn_cast(Base))
-          Base = TempASE->getBase()->IgnoreParenImpCasts();
-        auto *DE = cast(Base);
-        auto *OrigVD = cast(DE->getDecl());
-        auto ASELValue = EmitLValue(ASE);
-        auto OriginalBaseLValue = EmitLValue(DE);
-        LValue BaseLValue = loadToBegin(
-            *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
-        // Store the address of the original variable associated with the LHS
-        // implicit variable.
-        PrivateScope.addPrivate(
-            LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); });
-        // Emit reduction copy.
-        bool IsRegistered = PrivateScope.addPrivate(
-            OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
-                     OriginalBaseLValue, DRD, IRed]() -> Address {
-              // Emit private VarDecl with reduction init.
-              AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
-              auto Addr = Emission.getAllocatedAddress();
-              if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
-                emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
-                                                 ASELValue.getAddress(),
-                                                 ASELValue.getType());
-              } else
-                EmitAutoVarInit(Emission);
-              EmitAutoVarCleanups(Emission);
-              auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
-                                                   ASELValue.getPointer());
-              auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
-              return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
-                                OriginalBaseLValue, Ptr);
-            });
-        assert(IsRegistered && "private var already registered as private");
-        // Silence the warning about unused variable.
-        (void)IsRegistered;
-        PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
-          return Builder.CreateElementBitCast(
-              GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
-              "rhs.begin");
-        });
-      } else {
-        auto *OrigVD = cast(cast(IRef)->getDecl());
-        QualType Type = PrivateVD->getType();
-        if (getContext().getAsArrayType(Type)) {
-          // Store the address of the original variable associated with the LHS
-          // implicit variable.
-          DeclRefExpr DRE(const_cast(OrigVD),
-                          CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                          IRef->getType(), VK_LValue, IRef->getExprLoc());
-          Address OriginalAddr = EmitLValue(&DRE).getAddress();
-          PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
-                                          LHSVD]() -> Address {
-            OriginalAddr = Builder.CreateElementBitCast(
-                OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
-            return OriginalAddr;
-          });
-          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
-            if (Type->isVariablyModifiedType()) {
-              CodeGenFunction::OpaqueValueMapping OpaqueMap(
-                  *this, cast(
-                             getContext()
-                                 .getAsVariableArrayType(PrivateVD->getType())
-                                 ->getSizeExpr()),
-                  RValue::get(
-                      getTypeSize(OrigVD->getType().getNonReferenceType())));
-              EmitVariablyModifiedType(Type);
-            }
-            auto Emission = EmitAutoVarAlloca(*PrivateVD);
-            auto Addr = Emission.getAllocatedAddress();
-            auto *Init = PrivateVD->getInit();
-            EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
-                                 DRD ? *IRed : Init, OriginalAddr);
-            EmitAutoVarCleanups(Emission);
-            return Emission.getAllocatedAddress();
-          });
-          assert(IsRegistered && "private var already registered as private");
-          // Silence the warning about unused variable.
-          (void)IsRegistered;
-          PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
-            return Builder.CreateElementBitCast(
-                GetAddrOfLocalVar(PrivateVD),
-                ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
-          });
-        } else {
-          // Store the address of the original variable associated with the LHS
-          // implicit variable.
-          Address OriginalAddr = Address::invalid();
-          PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
-                                          &OriginalAddr]() -> Address {
-            DeclRefExpr DRE(const_cast(OrigVD),
-                            CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                            IRef->getType(), VK_LValue, IRef->getExprLoc());
-            OriginalAddr = EmitLValue(&DRE).getAddress();
-            return OriginalAddr;
-          });
-          // Emit reduction copy.
-          bool IsRegistered = PrivateScope.addPrivate(
-              OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
-                // Emit private VarDecl with reduction init.
-                AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
-                auto Addr = Emission.getAllocatedAddress();
-                if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
-                  emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
-                                                   OriginalAddr,
-                                                   PrivateVD->getType());
-                } else
-                  EmitAutoVarInit(Emission);
-                EmitAutoVarCleanups(Emission);
-                return Addr;
-              });
-          assert(IsRegistered && "private var already registered as private");
-          // Silence the warning about unused variable.
-          (void)IsRegistered;
-          PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
-            return GetAddrOfLocalVar(PrivateVD);
-          });
-        }
+    auto ILHS = C->lhs_exprs().begin();
+    auto IRHS = C->rhs_exprs().begin();
+    for (const auto *Ref : C->varlists()) {
+      Shareds.emplace_back(Ref);
+      Privates.emplace_back(*IPriv);
+      ReductionOps.emplace_back(*IRed);
+      LHSs.emplace_back(*ILHS);
+      RHSs.emplace_back(*IRHS);
+      std::advance(IPriv, 1);
+      std::advance(IRed, 1);
+      std::advance(ILHS, 1);
+      std::advance(IRHS, 1);
+    }
+  }
+  ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
+  unsigned Count = 0;
+  auto ILHS = LHSs.begin();
+  auto IRHS = RHSs.begin();
+  auto IPriv = Privates.begin();
+  for (const auto *IRef : Shareds) {
+    auto *PrivateVD = cast(cast(*IPriv)->getDecl());
+    // Emit private VarDecl with reduction init.
+    RedCG.emitSharedLValue(*this, Count);
+    RedCG.emitAggregateType(*this, Count);
+    auto Emission = EmitAutoVarAlloca(*PrivateVD);
+    RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
+                             RedCG.getSharedLValue(Count),
+                             [&Emission](CodeGenFunction &CGF) {
+                               CGF.EmitAutoVarInit(Emission);
+                               return true;
+                             });
+    EmitAutoVarCleanups(Emission);
+    Address BaseAddr = RedCG.adjustPrivateAddress(
+        *this, Count, Emission.getAllocatedAddress());
+    bool IsRegistered = PrivateScope.addPrivate(
+        RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
+    assert(IsRegistered && "private var already registered as private");
+    // Silence the warning about unused variable.
+    (void)IsRegistered;
+
+    auto *LHSVD = cast(cast(*ILHS)->getDecl());
+    auto *RHSVD = cast(cast(*IRHS)->getDecl());
+    if (isa(IRef)) {
+      // Store the address of the original variable associated with the LHS
+      // implicit variable.
+      PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+        return RedCG.getSharedLValue(Count).getAddress();
+      });
+      PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+        return GetAddrOfLocalVar(PrivateVD);
+      });
+    } else if (isa(IRef)) {
+      // Store the address of the original variable associated with the LHS
+      // implicit variable.
+      PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+        return RedCG.getSharedLValue(Count).getAddress();
+      });
+      PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+        return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
+                                            ConvertTypeForMem(RHSVD->getType()),
+                                            "rhs.begin");
+      });
+    } else {
+      QualType Type = PrivateVD->getType();
+      bool IsArray = getContext().getAsArrayType(Type) != nullptr;
+      Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
+      // Store the address of the original variable associated with the LHS
+      // implicit variable.
+      if (IsArray) {
+        OriginalAddr = Builder.CreateElementBitCast(
+            OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
       }
-      ++ILHS;
-      ++IRHS;
-      ++IPriv;
-      ++IRed;
+      PrivateScope.addPrivate(
+          LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
+      PrivateScope.addPrivate(
+          RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
+            return IsArray
+                       ? Builder.CreateElementBitCast(
+                             GetAddrOfLocalVar(PrivateVD),
+                             ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
+                       : GetAddrOfLocalVar(PrivateVD);
+          });
     }
+    ++ILHS;
+    ++IRHS;
+    ++IPriv;
+    ++Count;
   }
 }
 
@@ -2889,11 +2697,32 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
       ++ID;
     }
   }
+  SmallVector LHSs;
+  SmallVector RHSs;
+  for (const auto *C : S.getClausesOfKind()) {
+    auto IPriv = C->privates().begin();
+    auto IRed = C->reduction_ops().begin();
+    auto ILHS = C->lhs_exprs().begin();
+    auto IRHS = C->rhs_exprs().begin();
+    for (const auto *Ref : C->varlists()) {
+      Data.ReductionVars.emplace_back(Ref);
+      Data.ReductionCopies.emplace_back(*IPriv);
+      Data.ReductionOps.emplace_back(*IRed);
+      LHSs.emplace_back(*ILHS);
+      RHSs.emplace_back(*IRHS);
+      std::advance(IPriv, 1);
+      std::advance(IRed, 1);
+      std::advance(ILHS, 1);
+      std::advance(IRHS, 1);
+    }
+  }
+  Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
+      *this, S.getLocStart(), LHSs, RHSs, Data);
   // Build list of dependences.
   for (const auto *C : S.getClausesOfKind())
     for (auto *IRef : C->varlists())
       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
-  auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs](
+  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
       CodeGenFunction &CGF, PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
     OMPPrivateScope Scope(CGF);
@@ -2948,6 +2777,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
       }
     }
+    if (Data.Reductions) {
+      OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
+      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
+                             Data.ReductionOps);
+      llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
+      for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
+        RedCG.emitSharedLValue(CGF, Cnt);
+        RedCG.emitAggregateType(CGF, Cnt);
+        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
+            CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
+        Replacement =
+            Address(CGF.EmitScalarConversion(
+                        Replacement.getPointer(), CGF.getContext().VoidPtrTy,
+                        CGF.getContext().getPointerType(
+                            Data.ReductionCopies[Cnt]->getType()),
+                        SourceLocation()),
+                    Replacement.getAlignment());
+        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
+        Scope.addPrivate(RedCG.getBaseDecl(Cnt),
+                         [Replacement]() { return Replacement; });
+        // FIXME: This must removed once the runtime library is fixed.
+        // Emit required threadprivate variables for
+        // initilizer/combiner/finalizer.
+        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+                                                           RedCG, Cnt);
+      }
+    }
     (void)Scope.Privatize();
 
     Action.Enter(CGF);
@@ -3609,6 +3466,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_firstprivate:
   case OMPC_lastprivate:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_safelen:
   case OMPC_simdlen:
   case OMPC_collapse:
@@ -4258,7 +4116,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
                                                     CodeGen);
   };
-  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+  if (Data.Nogroup)
+    EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+  else {
+    CGM.getOpenMPRuntime().emitTaskgroupRegion(
+        *this,
+        [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
+                                        PrePostActionTy &Action) {
+          Action.Enter(CGF);
+          CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+        },
+        S.getLocStart());
+  }
 }
 
 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGVTables.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGVTables.cpp
index 1869c0e809dfd..64b6d0d3fe9f5 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGVTables.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGVTables.cpp
@@ -901,6 +901,8 @@ void CodeGenModule::EmitDeferredVTables() {
   for (const CXXRecordDecl *RD : DeferredVTables)
     if (shouldEmitVTableAtEndOfTranslationUnit(*this, RD))
       VTables.GenerateClassData(RD);
+    else if (shouldOpportunisticallyEmitVTables())
+      OpportunisticVTables.push_back(RD);
 
   assert(savedSize == DeferredVTables.size() &&
          "deferred extra vtables during vtable emission?");
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGValue.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGValue.h
index 53a376df6457b..b768eb86367be 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CGValue.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CGValue.h
@@ -146,6 +146,25 @@ static inline AlignmentSource getFieldAlignmentSource(AlignmentSource Source) {
   return AlignmentSource::Decl;
 }
 
+class LValueBaseInfo {
+  AlignmentSource AlignSource;
+  bool MayAlias;
+
+public:
+  explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type,
+                 bool Alias = false)
+    : AlignSource(Source), MayAlias(Alias) {}
+  AlignmentSource getAlignmentSource() const { return AlignSource; }
+  void setAlignmentSource(AlignmentSource Source) { AlignSource = Source; }
+  bool getMayAlias() const { return MayAlias; }
+  void setMayAlias(bool Alias) { MayAlias = Alias; }
+
+  void mergeForCast(const LValueBaseInfo &Info) {
+    setAlignmentSource(Info.getAlignmentSource());
+    setMayAlias(getMayAlias() || Info.getMayAlias());
+  }
+};
+
 /// LValue - This represents an lvalue references.  Because C/C++ allow
 /// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a
 /// bitrange.
@@ -200,7 +219,7 @@ class LValue {
   // to make the default bitfield pattern all-zeroes.
   bool ImpreciseLifetime : 1;
 
-  unsigned AlignSource : 2;
+  LValueBaseInfo BaseInfo;
 
   // This flag shows if a nontemporal load/stores should be used when accessing
   // this lvalue.
@@ -218,7 +237,7 @@ class LValue {
 
 private:
   void Initialize(QualType Type, Qualifiers Quals,
-                  CharUnits Alignment, AlignmentSource AlignSource,
+                  CharUnits Alignment, LValueBaseInfo BaseInfo,
                   llvm::MDNode *TBAAInfo = nullptr) {
     assert((!Alignment.isZero() || Type->isIncompleteType()) &&
            "initializing l-value with zero alignment!");
@@ -227,7 +246,7 @@ class LValue {
     this->Alignment = Alignment.getQuantity();
     assert(this->Alignment == Alignment.getQuantity() &&
            "Alignment exceeds allowed max!");
-    this->AlignSource = unsigned(AlignSource);
+    this->BaseInfo = BaseInfo;
 
     // Initialize Objective-C flags.
     this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
@@ -316,12 +335,8 @@ class LValue {
   CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); }
   void setAlignment(CharUnits A) { Alignment = A.getQuantity(); }
 
-  AlignmentSource getAlignmentSource() const {
-    return AlignmentSource(AlignSource);
-  }
-  void setAlignmentSource(AlignmentSource Source) {
-    AlignSource = unsigned(Source);
-  }
+  LValueBaseInfo getBaseInfo() const { return BaseInfo; }
+  void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; }
 
   // simple lvalue
   llvm::Value *getPointer() const {
@@ -370,7 +385,7 @@ class LValue {
 
   static LValue MakeAddr(Address address, QualType type,
                          ASTContext &Context,
-                         AlignmentSource alignSource,
+                         LValueBaseInfo BaseInfo,
                          llvm::MDNode *TBAAInfo = nullptr) {
     Qualifiers qs = type.getQualifiers();
     qs.setObjCGCAttr(Context.getObjCGCAttrKind(type));
@@ -379,29 +394,29 @@ class LValue {
     R.LVType = Simple;
     assert(address.getPointer()->getType()->isPointerTy());
     R.V = address.getPointer();
-    R.Initialize(type, qs, address.getAlignment(), alignSource, TBAAInfo);
+    R.Initialize(type, qs, address.getAlignment(), BaseInfo, TBAAInfo);
     return R;
   }
 
   static LValue MakeVectorElt(Address vecAddress, llvm::Value *Idx,
-                              QualType type, AlignmentSource alignSource) {
+                              QualType type, LValueBaseInfo BaseInfo) {
     LValue R;
     R.LVType = VectorElt;
     R.V = vecAddress.getPointer();
     R.VectorIdx = Idx;
     R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
-                 alignSource);
+                 BaseInfo);
     return R;
   }
 
   static LValue MakeExtVectorElt(Address vecAddress, llvm::Constant *Elts,
-                                 QualType type, AlignmentSource alignSource) {
+                                 QualType type, LValueBaseInfo BaseInfo) {
     LValue R;
     R.LVType = ExtVectorElt;
     R.V = vecAddress.getPointer();
     R.VectorElts = Elts;
     R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
-                 alignSource);
+                 BaseInfo);
     return R;
   }
 
@@ -414,12 +429,12 @@ class LValue {
   static LValue MakeBitfield(Address Addr,
                              const CGBitFieldInfo &Info,
                              QualType type,
-                             AlignmentSource alignSource) {
+                             LValueBaseInfo BaseInfo) {
     LValue R;
     R.LVType = BitField;
     R.V = Addr.getPointer();
     R.BitFieldInfo = &Info;
-    R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), alignSource);
+    R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo);
     return R;
   }
 
@@ -428,7 +443,7 @@ class LValue {
     R.LVType = GlobalReg;
     R.V = Reg.getPointer();
     R.Initialize(type, type.getQualifiers(), Reg.getAlignment(),
-                 AlignmentSource::Decl);
+                 LValueBaseInfo(AlignmentSource::Decl, false));
     return R;
   }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenABITypes.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
index 166f44f816f31..0735a9c3dfbc1 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
@@ -64,3 +64,19 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM,
       returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes,
       info, {}, args);
 }
+
+llvm::FunctionType *
+CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) {
+  assert(FD != nullptr && "Expected a non-null function declaration!");
+  llvm::Type *T = CGM.getTypes().ConvertFunctionType(FD->getType(), FD);
+
+  if (auto FT = dyn_cast(T))
+    return FT;
+
+  return nullptr;
+}
+
+llvm::Type *
+CodeGen::convertTypeForMemory(CodeGenModule &CGM, QualType T) {
+  return CGM.getTypes().ConvertTypeForMem(T);
+}
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenAction.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenAction.cpp
index c7e30fad7575d..4f03de55149b8 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenAction.cpp
@@ -228,7 +228,10 @@ namespace clang {
           Ctx.getDiagnosticHandler();
       void *OldDiagnosticContext = Ctx.getDiagnosticContext();
       Ctx.setDiagnosticHandler(DiagnosticHandler, this);
-      Ctx.setDiagnosticHotnessRequested(CodeGenOpts.DiagnosticsWithHotness);
+      Ctx.setDiagnosticsHotnessRequested(CodeGenOpts.DiagnosticsWithHotness);
+      if (CodeGenOpts.DiagnosticsHotnessThreshold != 0)
+        Ctx.setDiagnosticsHotnessThreshold(
+            CodeGenOpts.DiagnosticsHotnessThreshold);
 
       std::unique_ptr OptRecordFile;
       if (!CodeGenOpts.OptRecordFile.empty()) {
@@ -246,7 +249,7 @@ namespace clang {
             llvm::make_unique(OptRecordFile->os()));
 
         if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
-          Ctx.setDiagnosticHotnessRequested(true);
+          Ctx.setDiagnosticsHotnessRequested(true);
       }
 
       // Link each LinkModule into our module.
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index 7b42850df968c..c23b25ea461fe 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -22,6 +22,7 @@
 #include "CodeGenPGO.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTLambda.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/StmtCXX.h"
@@ -117,25 +118,27 @@ CodeGenFunction::~CodeGenFunction() {
 }
 
 CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T,
-                                                     AlignmentSource *Source) {
-  return getNaturalTypeAlignment(T->getPointeeType(), Source,
+                                                    LValueBaseInfo *BaseInfo) {
+  return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo,
                                  /*forPointee*/ true);
 }
 
 CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
-                                                   AlignmentSource *Source,
+                                                   LValueBaseInfo *BaseInfo,
                                                    bool forPointeeType) {
   // Honor alignment typedef attributes even on incomplete types.
   // We also honor them straight for C++ class types, even as pointees;
   // there's an expressivity gap here.
   if (auto TT = T->getAs()) {
     if (auto Align = TT->getDecl()->getMaxAlignment()) {
-      if (Source) *Source = AlignmentSource::AttributedType;
+      if (BaseInfo)
+        *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType, false);
       return getContext().toCharUnitsFromBits(Align);
     }
   }
 
-  if (Source) *Source = AlignmentSource::Type;
+  if (BaseInfo)
+    *BaseInfo = LValueBaseInfo(AlignmentSource::Type, false);
 
   CharUnits Alignment;
   if (T->isIncompleteType()) {
@@ -165,9 +168,9 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
 }
 
 LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
-  AlignmentSource AlignSource;
-  CharUnits Alignment = getNaturalTypeAlignment(T, &AlignSource);
-  return LValue::MakeAddr(Address(V, Alignment), T, getContext(), AlignSource,
+  LValueBaseInfo BaseInfo;
+  CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo);
+  return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo,
                           CGM.getTBAAInfo(T));
 }
 
@@ -175,9 +178,9 @@ LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
 /// construct an l-value with the natural pointee alignment of T.
 LValue
 CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) {
-  AlignmentSource AlignSource;
-  CharUnits Align = getNaturalTypeAlignment(T, &AlignSource, /*pointee*/ true);
-  return MakeAddrLValue(Address(V, Align), T, AlignSource);
+  LValueBaseInfo BaseInfo;
+  CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, /*pointee*/ true);
+  return MakeAddrLValue(Address(V, Align), T, BaseInfo);
 }
 
 
@@ -346,7 +349,7 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
 
   // Emit debug descriptor for function end.
   if (CGDebugInfo *DI = getDebugInfo())
-    DI->EmitFunctionEnd(Builder);
+    DI->EmitFunctionEnd(Builder, CurFn);
 
   // Reset the debug location to that of the simple 'return' expression, if any
   // rather than that of the end of the function's scope '}'.
@@ -858,6 +861,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
   Builder.SetInsertPoint(EntryBB);
 
+  // If we're checking the return value, allocate space for a pointer to a
+  // precise source location of the checked return statement.
+  if (requiresReturnValueCheck()) {
+    ReturnLocation = CreateDefaultAlignTempAlloca(Int8PtrTy, "return.sloc.ptr");
+    InitTempAlloca(ReturnLocation, llvm::ConstantPointerNull::get(Int8PtrTy));
+  }
+
   // Emit subprogram debug descriptor.
   if (CGDebugInfo *DI = getDebugInfo()) {
     // Reconstruct the type from the argument list so that implicit parameters,
@@ -885,8 +895,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
   if (CGM.getCodeGenOpts().InstrumentForProfiling) {
     if (CGM.getCodeGenOpts().CallFEntry)
       Fn->addFnAttr("fentry-call", "true");
-    else
-      Fn->addFnAttr("counting-function", getTarget().getMCountName());
+    else {
+      if (!CurFuncDecl || !CurFuncDecl->hasAttr())
+        Fn->addFnAttr("counting-function", getTarget().getMCountName());
+    }
   }
 
   if (RetTy->isVoidType()) {
@@ -972,11 +984,22 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
     }
 
     // Check the 'this' pointer once per function, if it's available.
-    if (CXXThisValue) {
+    if (CXXABIThisValue) {
       SanitizerSet SkippedChecks;
       SkippedChecks.set(SanitizerKind::ObjectSize, true);
       QualType ThisTy = MD->getThisType(getContext());
-      EmitTypeCheck(TCK_Load, Loc, CXXThisValue, ThisTy,
+
+      // If this is the call operator of a lambda with no capture-default, it
+      // may have a static invoker function, which may call this operator with
+      // a null 'this' pointer.
+      if (isLambdaCallOperator(MD) &&
+          cast(MD->getParent())->getLambdaCaptureDefault() ==
+              LCD_None)
+        SkippedChecks.set(SanitizerKind::Null, true);
+
+      EmitTypeCheck(isa(MD) ? TCK_ConstructorCall
+                                                : TCK_MemberCall,
+                    Loc, CXXABIThisValue, ThisTy,
                     getContext().getTypeAlignInChars(ThisTy->getPointeeType()),
                     SkippedChecks);
     }
@@ -1081,10 +1104,9 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
       if (!Param->hasAttr())
         continue;
 
-      IdentifierInfo *NoID = nullptr;
       auto *Implicit = ImplicitParamDecl::Create(
-          getContext(), Param->getDeclContext(), Param->getLocation(), NoID,
-          getContext().getSizeType());
+          getContext(), Param->getDeclContext(), Param->getLocation(),
+          /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamDecl::Other);
       SizeArguments[Param] = Implicit;
       Args.push_back(Implicit);
     }
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.h
index 459841aee5a21..753dd92f3071a 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenFunction.h
@@ -116,10 +116,11 @@ enum TypeEvaluationKind {
   SANITIZER_CHECK(MulOverflow, mul_overflow, 0)                                \
   SANITIZER_CHECK(NegateOverflow, negate_overflow, 0)                          \
   SANITIZER_CHECK(NullabilityArg, nullability_arg, 0)                          \
-  SANITIZER_CHECK(NullabilityReturn, nullability_return, 0)                    \
+  SANITIZER_CHECK(NullabilityReturn, nullability_return, 1)                    \
   SANITIZER_CHECK(NonnullArg, nonnull_arg, 0)                                  \
-  SANITIZER_CHECK(NonnullReturn, nonnull_return, 0)                            \
+  SANITIZER_CHECK(NonnullReturn, nonnull_return, 1)                            \
   SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0)                               \
+  SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0)                        \
   SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0)                    \
   SANITIZER_CHECK(SubOverflow, sub_overflow, 0)                                \
   SANITIZER_CHECK(TypeMismatch, type_mismatch, 1)                              \
@@ -1406,6 +1407,17 @@ class CodeGenFunction : public CodeGenTypeCache {
     return RetValNullabilityPrecondition;
   }
 
+  /// Used to store precise source locations for return statements by the
+  /// runtime return value checks.
+  Address ReturnLocation = Address::invalid();
+
+  /// Check if the return value of this function requires sanitization.
+  bool requiresReturnValueCheck() const {
+    return requiresReturnValueNullabilityCheck() ||
+           (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) &&
+            CurCodeDecl && CurCodeDecl->getAttr());
+  }
+
   llvm::BasicBlock *TerminateLandingPad;
   llvm::BasicBlock *TerminateHandler;
   llvm::BasicBlock *TrapBB;
@@ -1467,6 +1479,9 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   const TargetInfo &getTarget() const { return Target; }
   llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); }
+  const TargetCodeGenInfo &getTargetHooks() const {
+    return CGM.getTargetCodeGenInfo();
+  }
 
   //===--------------------------------------------------------------------===//
   //                                  Cleanups
@@ -1737,11 +1752,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitVTableTypeCheckedLoad(const CXXRecordDecl *RD, llvm::Value *VTable,
                                          uint64_t VTableByteOffset);
 
-  /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given
-  /// expr can be devirtualized.
-  bool CanDevirtualizeMemberFunctionCall(const Expr *Base,
-                                         const CXXMethodDecl *MD);
-
   /// EnterDtorCleanups - Enter the cleanups necessary to complete the
   /// given phase of destruction for a destructor.  The end result
   /// should call destructors on members and base classes in reverse
@@ -1777,7 +1787,7 @@ class CodeGenFunction : public CodeGenTypeCache {
                           SourceLocation EndLoc);
 
   /// Emit a test that checks if the return value \p RV is nonnull.
-  void EmitReturnValueCheck(llvm::Value *RV, SourceLocation EndLoc);
+  void EmitReturnValueCheck(llvm::Value *RV);
 
   /// EmitStartEHSpec - Emit the start of the exception spec.
   void EmitStartEHSpec(const Decl *D);
@@ -1886,40 +1896,65 @@ class CodeGenFunction : public CodeGenTypeCache {
   //===--------------------------------------------------------------------===//
 
   LValue MakeAddrLValue(Address Addr, QualType T,
-                        AlignmentSource AlignSource = AlignmentSource::Type) {
-    return LValue::MakeAddr(Addr, T, getContext(), AlignSource,
+                        LValueBaseInfo BaseInfo =
+                            LValueBaseInfo(AlignmentSource::Type)) {
+    return LValue::MakeAddr(Addr, T, getContext(), BaseInfo,
                             CGM.getTBAAInfo(T));
   }
 
   LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment,
-                        AlignmentSource AlignSource = AlignmentSource::Type) {
+                        LValueBaseInfo BaseInfo =
+                            LValueBaseInfo(AlignmentSource::Type)) {
     return LValue::MakeAddr(Address(V, Alignment), T, getContext(),
-                            AlignSource, CGM.getTBAAInfo(T));
+                            BaseInfo, CGM.getTBAAInfo(T));
   }
 
   LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T);
   LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T);
   CharUnits getNaturalTypeAlignment(QualType T,
-                                    AlignmentSource *Source = nullptr,
+                                    LValueBaseInfo *BaseInfo = nullptr,
                                     bool forPointeeType = false);
   CharUnits getNaturalPointeeTypeAlignment(QualType T,
-                                           AlignmentSource *Source = nullptr);
+                                           LValueBaseInfo *BaseInfo = nullptr);
 
   Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy,
-                              AlignmentSource *Source = nullptr);
+                              LValueBaseInfo *BaseInfo = nullptr);
   LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy);
 
   Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy,
-                            AlignmentSource *Source = nullptr);
+                            LValueBaseInfo *BaseInfo = nullptr);
   LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
 
-  /// CreateTempAlloca - This creates a alloca and inserts it into the entry
-  /// block. The caller is responsible for setting an appropriate alignment on
+  /// CreateTempAlloca - This creates an alloca and inserts it into the entry
+  /// block if \p ArraySize is nullptr, otherwise inserts it at the current
+  /// insertion point of the builder. The caller is responsible for setting an
+  /// appropriate alignment on
   /// the alloca.
-  llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty,
-                                     const Twine &Name = "tmp");
+  ///
+  /// \p ArraySize is the number of array elements to be allocated if it
+  ///    is not nullptr.
+  ///
+  /// LangAS::Default is the address space of pointers to local variables and
+  /// temporaries, as exposed in the source language. In certain
+  /// configurations, this is not the same as the alloca address space, and a
+  /// cast is needed to lift the pointer from the alloca AS into
+  /// LangAS::Default. This can happen when the target uses a restricted
+  /// address space for the stack but the source language requires
+  /// LangAS::Default to be a generic address space. The latter condition is
+  /// common for most programming languages; OpenCL is an exception in that
+  /// LangAS::Default is the private address space, which naturally maps
+  /// to the stack.
+  ///
+  /// Because the address of a temporary is often exposed to the program in
+  /// various ways, this function will perform the cast by default. The cast
+  /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is
+  /// more efficient if the caller knows that the address will not be exposed.
+  llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
+                                     llvm::Value *ArraySize = nullptr);
   Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
-                           const Twine &Name = "tmp");
+                           const Twine &Name = "tmp",
+                           llvm::Value *ArraySize = nullptr,
+                           bool CastToDefaultAddrSpace = true);
 
   /// CreateDefaultAlignedTempAlloca - This creates an alloca with the
   /// default ABI alignment of the given LLVM type.
@@ -1954,9 +1989,12 @@ class CodeGenFunction : public CodeGenTypeCache {
   Address CreateIRTemp(QualType T, const Twine &Name = "tmp");
 
   /// CreateMemTemp - Create a temporary memory object of the given type, with
-  /// appropriate alignment.
-  Address CreateMemTemp(QualType T, const Twine &Name = "tmp");
-  Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp");
+  /// appropriate alignment. Cast it to the default address space if
+  /// \p CastToDefaultAddrSpace is true.
+  Address CreateMemTemp(QualType T, const Twine &Name = "tmp",
+                        bool CastToDefaultAddrSpace = true);
+  Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp",
+                        bool CastToDefaultAddrSpace = true);
 
   /// CreateAggTemp - Create a temporary memory object for the given
   /// aggregate type.
@@ -2547,9 +2585,11 @@ class CodeGenFunction : public CodeGenTypeCache {
   RValue EmitCoawaitExpr(const CoawaitExpr &E,
                          AggValueSlot aggSlot = AggValueSlot::ignored(),
                          bool ignoreResult = false);
+  LValue EmitCoawaitLValue(const CoawaitExpr *E);
   RValue EmitCoyieldExpr(const CoyieldExpr &E,
                          AggValueSlot aggSlot = AggValueSlot::ignored(),
                          bool ignoreResult = false);
+  LValue EmitCoyieldLValue(const CoyieldExpr *E);
   RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID);
 
   void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
@@ -2992,8 +3032,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// the LLVM value representation.
   llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty,
                                 SourceLocation Loc,
-                                AlignmentSource AlignSource =
-                                  AlignmentSource::Type,
+                                LValueBaseInfo BaseInfo =
+                                    LValueBaseInfo(AlignmentSource::Type),
                                 llvm::MDNode *TBAAInfo = nullptr,
                                 QualType TBAABaseTy = QualType(),
                                 uint64_t TBAAOffset = 0,
@@ -3010,7 +3050,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// the LLVM value representation.
   void EmitStoreOfScalar(llvm::Value *Value, Address Addr,
                          bool Volatile, QualType Ty,
-                         AlignmentSource AlignSource = AlignmentSource::Type,
+                         LValueBaseInfo BaseInfo =
+                             LValueBaseInfo(AlignmentSource::Type),
                          llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
                          QualType TBAABaseTy = QualType(),
                          uint64_t TBAAOffset = 0, bool isNontemporal = false);
@@ -3083,7 +3124,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc);
 
   Address EmitArrayToPointerDecay(const Expr *Array,
-                                  AlignmentSource *AlignSource = nullptr);
+                                  LValueBaseInfo *BaseInfo = nullptr);
 
   class ConstantEmission {
     llvm::PointerIntPair ValueAndIsReference;
@@ -3224,7 +3265,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
                                           llvm::Value *memberPtr,
                                           const MemberPointerType *memberPtrType,
-                                          AlignmentSource *AlignSource = nullptr);
+                                          LValueBaseInfo *BaseInfo = nullptr);
   RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
                                       ReturnValueSlot ReturnValue);
 
@@ -3548,6 +3589,22 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// nonnull, if \p LHS is marked _Nonnull.
   void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc);
 
+  /// An enumeration which makes it easier to specify whether or not an
+  /// operation is a subtraction.
+  enum { NotSubtraction = false, IsSubtraction = true };
+
+  /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to
+  /// detect undefined behavior when the pointer overflow sanitizer is enabled.
+  /// \p SignedIndices indicates whether any of the GEP indices are signed.
+  /// \p IsSubtraction indicates whether the expression used to form the GEP
+  /// is a subtraction.
+  llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr,
+                                      ArrayRef IdxList,
+                                      bool SignedIndices,
+                                      bool IsSubtraction,
+                                      SourceLocation Loc,
+                                      const Twine &Name = "");
+
   /// \brief Emit a description of a type in a format suitable for passing to
   /// a runtime sanitizer handler.
   llvm::Constant *EmitCheckTypeDescriptor(QualType T);
@@ -3743,39 +3800,31 @@ class CodeGenFunction : public CodeGenTypeCache {
                     unsigned ParamsToSkip = 0,
                     EvaluationOrder Order = EvaluationOrder::Default);
 
-  /// EmitPointerWithAlignment - Given an expression with a pointer
-  /// type, emit the value and compute our best estimate of the
-  /// alignment of the pointee.
+  /// EmitPointerWithAlignment - Given an expression with a pointer type,
+  /// emit the value and compute our best estimate of the alignment of the
+  /// pointee.
   ///
-  /// Note that this function will conservatively fall back on the type
-  /// when it doesn't 
+  /// \param BaseInfo - If non-null, this will be initialized with
+  /// information about the source of the alignment and the may-alias
+  /// attribute.  Note that this function will conservatively fall back on
+  /// the type when it doesn't recognize the expression and may-alias will
+  /// be set to false.
   ///
-  /// \param Source - If non-null, this will be initialized with
-  ///   information about the source of the alignment.  Note that this
-  ///   function will conservatively fall back on the type when it
-  ///   doesn't recognize the expression, which means that sometimes
-  ///   
-  ///   a worst-case One
-  ///   reasonable way to use this information is when there's a
-  ///   language guarantee that the pointer must be aligned to some
-  ///   stricter value, and we're simply trying to ensure that
-  ///   sufficiently obvious uses of under-aligned objects don't get
-  ///   miscompiled; for example, a placement new into the address of
-  ///   a local variable.  In such a case, it's quite reasonable to
-  ///   just ignore the returned alignment when it isn't from an
-  ///   explicit source.
+  /// One reasonable way to use this information is when there's a language
+  /// guarantee that the pointer must be aligned to some stricter value, and
+  /// we're simply trying to ensure that sufficiently obvious uses of under-
+  /// aligned objects don't get miscompiled; for example, a placement new
+  /// into the address of a local variable.  In such a case, it's quite
+  /// reasonable to just ignore the returned alignment when it isn't from an
+  /// explicit source.
   Address EmitPointerWithAlignment(const Expr *Addr,
-                                   AlignmentSource *Source = nullptr);
+                                   LValueBaseInfo *BaseInfo = nullptr);
 
   void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
 
 private:
   QualType getVarArgType(const Expr *Arg);
 
-  const TargetCodeGenInfo &getTargetHooks() const {
-    return CGM.getTargetCodeGenInfo();
-  }
-
   void EmitDeclMetadata();
 
   BlockByrefHelpers *buildByrefHelpers(llvm::StructType &byrefType,
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.cpp
index d31d20efc15ff..13e30819641fd 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -45,6 +45,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
@@ -113,6 +114,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
   Int8PtrPtrTy = Int8PtrTy->getPointerTo(0);
   AllocaInt8PtrTy = Int8Ty->getPointerTo(
       M.getDataLayout().getAllocaAddrSpace());
+  ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace();
 
   RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
   BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC();
@@ -384,6 +386,7 @@ void CodeGenModule::Release() {
   DeferredDecls.insert(EmittedDeferredDecls.begin(),
                        EmittedDeferredDecls.end());
   EmittedDeferredDecls.clear();
+  EmitVTablesOpportunistically();
   applyGlobalValReplacements();
   applyReplacements();
   checkAliases();
@@ -402,8 +405,11 @@ void CodeGenModule::Release() {
   }
   if (OpenMPRuntime)
     if (llvm::Function *OpenMPRegistrationFunction =
-            OpenMPRuntime->emitRegistrationFunction())
-      AddGlobalCtor(OpenMPRegistrationFunction, 0);
+            OpenMPRuntime->emitRegistrationFunction()) {
+      auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ?
+        OpenMPRegistrationFunction : nullptr;
+      AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey);
+    }
   if (PGOReader) {
     getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext));
     if (PGOStats.hasDiagnostics())
@@ -468,18 +474,24 @@ void CodeGenModule::Release() {
     getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version",
                               llvm::DEBUG_METADATA_VERSION);
 
+  // Width of wchar_t in bytes
+  uint64_t WCharWidth =
+      Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity();
+  assert((LangOpts.ShortWChar ||
+          llvm::TargetLibraryInfoImpl::getTargetWCharSize(Target.getTriple()) ==
+              Target.getWCharWidth() / 8) &&
+         "LLVM wchar_t size out of sync");
+
   // We need to record the widths of enums and wchar_t, so that we can generate
-  // the correct build attributes in the ARM backend.
+  // the correct build attributes in the ARM backend. wchar_size is also used by
+  // TargetLibraryInfo.
+  getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
+
   llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch();
   if (   Arch == llvm::Triple::arm
       || Arch == llvm::Triple::armeb
       || Arch == llvm::Triple::thumb
       || Arch == llvm::Triple::thumbeb) {
-    // Width of wchar_t in bytes
-    uint64_t WCharWidth =
-        Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity();
-    getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
-
     // The minimum width of an enum in bytes
     uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4;
     getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth);
@@ -498,6 +510,26 @@ void CodeGenModule::Release() {
                               LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
   }
 
+  // Emit OpenCL specific module metadata: OpenCL/SPIR version.
+  if (LangOpts.OpenCL) {
+    EmitOpenCLMetadata();
+    // Emit SPIR version.
+    if (getTriple().getArch() == llvm::Triple::spir ||
+        getTriple().getArch() == llvm::Triple::spir64) {
+      // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
+      // opencl.spir.version named metadata.
+      llvm::Metadata *SPIRVerElts[] = {
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+              Int32Ty, LangOpts.OpenCLVersion / 100)),
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+              Int32Ty, (LangOpts.OpenCLVersion / 100 > 1) ? 0 : 2))};
+      llvm::NamedMDNode *SPIRVerMD =
+          TheModule.getOrInsertNamedMetadata("opencl.spir.version");
+      llvm::LLVMContext &Ctx = TheModule.getContext();
+      SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
+    }
+  }
+
   if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
     assert(PLevel < 3 && "Invalid PIC Level");
     getModule().setPICLevel(static_cast(PLevel));
@@ -521,6 +553,20 @@ void CodeGenModule::Release() {
   EmitTargetMetadata();
 }
 
+void CodeGenModule::EmitOpenCLMetadata() {
+  // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
+  // opencl.ocl.version named metadata node.
+  llvm::Metadata *OCLVerElts[] = {
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          Int32Ty, LangOpts.OpenCLVersion / 100)),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          Int32Ty, (LangOpts.OpenCLVersion % 100) / 10))};
+  llvm::NamedMDNode *OCLVerMD =
+      TheModule.getOrInsertNamedMetadata("opencl.ocl.version");
+  llvm::LLVMContext &Ctx = TheModule.getContext();
+  OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+}
+
 void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
   // Make sure that this type is translated.
   Types.UpdateCompletedType(TD);
@@ -916,7 +962,16 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     return;
   }
 
-  if (D->hasAttr()) {
+  // Track whether we need to add the optnone LLVM attribute,
+  // starting with the default for this optimization level.
+  bool ShouldAddOptNone =
+      !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0;
+  // We can't add optnone in the following cases, it won't pass the verifier.
+  ShouldAddOptNone &= !D->hasAttr();
+  ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline);
+  ShouldAddOptNone &= !D->hasAttr();
+
+  if (ShouldAddOptNone || D->hasAttr()) {
     B.addAttribute(llvm::Attribute::OptimizeNone);
 
     // OptimizeNone implies noinline; we should not be inlining such functions.
@@ -970,7 +1025,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   // function.
   if (!D->hasAttr()) {
     if (D->hasAttr()) {
-      B.addAttribute(llvm::Attribute::OptimizeForSize);
+      if (!ShouldAddOptNone)
+        B.addAttribute(llvm::Attribute::OptimizeForSize);
       B.addAttribute(llvm::Attribute::Cold);
     }
 
@@ -1024,9 +1080,25 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D,
                                           llvm::GlobalObject *GO) {
   SetCommonAttributes(D, GO);
 
-  if (D)
+  if (D) {
+    if (auto *GV = dyn_cast(GO)) {
+      if (auto *SA = D->getAttr())
+        GV->addAttribute("bss-section", SA->getName());
+      if (auto *SA = D->getAttr())
+        GV->addAttribute("data-section", SA->getName());
+      if (auto *SA = D->getAttr())
+        GV->addAttribute("rodata-section", SA->getName());
+    }
+
+    if (auto *F = dyn_cast(GO)) {
+      if (auto *SA = D->getAttr())
+       if (!D->getAttr())
+         F->addFnAttr("implicit-section-name", SA->getName());
+    }
+
     if (const SectionAttr *SA = D->getAttr())
       GO->setSection(SA->getName());
+  }
 
   getTargetCodeGenInfo().setTargetAttributes(D, GO, *this);
 }
@@ -1046,7 +1118,7 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
                                          const NamedDecl *ND) {
   // Set linkage and visibility in case we never see a definition.
   LinkageInfo LV = ND->getLinkageAndVisibility();
-  if (LV.getLinkage() != ExternalLinkage) {
+  if (!isExternallyVisible(LV.getLinkage())) {
     // Don't set internal linkage on declarations.
   } else {
     if (ND->hasAttr()) {
@@ -1125,6 +1197,10 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
 
   setLinkageAndVisibilityForGV(F, FD);
 
+  if (FD->getAttr()) {
+    F->addFnAttr("implicit-section-name");
+  }
+
   if (const SectionAttr *SA = FD->getAttr())
     F->setSection(SA->getName());
 
@@ -1223,7 +1299,7 @@ void CodeGenModule::AddDependentLib(StringRef Lib) {
 /// \brief Add link options implied by the given module, including modules
 /// it depends on, using a postorder walk.
 static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
-                                    SmallVectorImpl &Metadata,
+                                    SmallVectorImpl &Metadata,
                                     llvm::SmallPtrSet &Visited) {
   // Import this module's parent.
   if (Mod->Parent && Visited.insert(Mod->Parent).second) {
@@ -1311,7 +1387,7 @@ void CodeGenModule::EmitModuleLinkOptions() {
   // Add link options for all of the imported modules in reverse topological
   // order.  We don't do anything to try to order import link flags with respect
   // to linker options inserted by things like #pragma comment().
-  SmallVector MetadataArgs;
+  SmallVector MetadataArgs;
   Visited.clear();
   for (Module *M : LinkModules)
     if (Visited.insert(M).second)
@@ -1320,9 +1396,9 @@ void CodeGenModule::EmitModuleLinkOptions() {
   LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end());
 
   // Add the linker options metadata flag.
-  getModule().addModuleFlag(llvm::Module::AppendUnique, "Linker Options",
-                            llvm::MDNode::get(getLLVMContext(),
-                                              LinkerOptionsMetadata));
+  auto *NMD = getModule().getOrInsertNamedMetadata("llvm.linker.options");
+  for (auto *MD : LinkerOptionsMetadata)
+    NMD->addOperand(MD);
 }
 
 void CodeGenModule::EmitDeferred() {
@@ -1388,6 +1464,24 @@ void CodeGenModule::EmitDeferred() {
   }
 }
 
+void CodeGenModule::EmitVTablesOpportunistically() {
+  // Try to emit external vtables as available_externally if they have emitted
+  // all inlined virtual functions.  It runs after EmitDeferred() and therefore
+  // is not allowed to create new references to things that need to be emitted
+  // lazily. Note that it also uses fact that we eagerly emitting RTTI.
+
+  assert((OpportunisticVTables.empty() || shouldOpportunisticallyEmitVTables())
+         && "Only emit opportunistic vtables with optimizations");
+
+  for (const CXXRecordDecl *RD : OpportunisticVTables) {
+    assert(getVTables().isVTableExternal(RD) &&
+           "This queue should only contain external vtables");
+    if (getCXXABI().canSpeculativelyEmitVTable(RD))
+      VTables.GenerateClassData(RD);
+  }
+  OpportunisticVTables.clear();
+}
+
 void CodeGenModule::EmitGlobalAnnotations() {
   if (Annotations.empty())
     return;
@@ -1526,6 +1620,10 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
   case ImbueAttr::ALWAYS:
     Fn->addFnAttr("function-instrument", "xray-always");
     break;
+  case ImbueAttr::ALWAYS_ARG1:
+    Fn->addFnAttr("function-instrument", "xray-always");
+    Fn->addFnAttr("xray-log-args", "1");
+    break;
   case ImbueAttr::NEVER:
     Fn->addFnAttr("function-instrument", "xray-never");
     break;
@@ -1908,6 +2006,10 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
   return !isTriviallyRecursive(F);
 }
 
+bool CodeGenModule::shouldOpportunisticallyEmitVTables() {
+  return CodeGenOpts.OptimizationLevel > 0;
+}
+
 void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
   const auto *D = cast(GD.getDecl());
 
@@ -2292,11 +2394,13 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
       return llvm::ConstantExpr::getBitCast(Entry, Ty);
   }
 
-  unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
+  auto AddrSpace = GetGlobalVarAddressSpace(D);
+  auto TargetAddrSpace = getContext().getTargetAddressSpace(AddrSpace);
+
   auto *GV = new llvm::GlobalVariable(
       getModule(), Ty->getElementType(), false,
       llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr,
-      llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+      llvm::GlobalVariable::NotThreadLocal, TargetAddrSpace);
 
   // If we already created a global with the same mangled name (but different
   // type) before, take its name and remove it from its parent.
@@ -2353,8 +2457,15 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
       GV->setSection(".cp.rodata");
   }
 
-  if (AddrSpace != Ty->getAddressSpace())
-    return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty);
+  auto ExpectedAS =
+      D ? D->getType().getAddressSpace()
+        : static_cast(LangOpts.OpenCL ? LangAS::opencl_global
+                                                : LangAS::Default);
+  assert(getContext().getTargetAddressSpace(ExpectedAS) ==
+         Ty->getPointerAddressSpace());
+  if (AddrSpace != ExpectedAS)
+    return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace,
+                                                       ExpectedAS, Ty);
 
   return GV;
 }
@@ -2488,18 +2599,28 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const {
       getDataLayout().getTypeStoreSizeInBits(Ty));
 }
 
-unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
-                                                 unsigned AddrSpace) {
-  if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) {
-    if (D->hasAttr())
-      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
-    else if (D->hasAttr())
-      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared);
+unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
+  unsigned AddrSpace;
+  if (LangOpts.OpenCL) {
+    AddrSpace = D ? D->getType().getAddressSpace()
+                  : static_cast(LangAS::opencl_global);
+    assert(AddrSpace == LangAS::opencl_global ||
+           AddrSpace == LangAS::opencl_constant ||
+           AddrSpace == LangAS::opencl_local ||
+           AddrSpace >= LangAS::FirstTargetAddressSpace);
+    return AddrSpace;
+  }
+
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
+    if (D && D->hasAttr())
+      return LangAS::cuda_constant;
+    else if (D && D->hasAttr())
+      return LangAS::cuda_shared;
     else
-      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device);
+      return LangAS::cuda_device;
   }
 
-  return AddrSpace;
+  return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D);
 }
 
 template
@@ -2652,10 +2773,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
   // "extern int x[];") and then a definition of a different type (e.g.
   // "int x[10];"). This also happens when an initializer has a different type
   // from the type of the global (this happens with unions).
-  if (!GV ||
-      GV->getType()->getElementType() != InitType ||
+  if (!GV || GV->getType()->getElementType() != InitType ||
       GV->getType()->getAddressSpace() !=
-       GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) {
+          getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) {
 
     // Move the old entry aside so that we'll create a new one.
     Entry->setName(StringRef());
@@ -2807,6 +2927,14 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
   if (D->hasAttr())
     return true;
 
+  // A variable cannot be both common and exist in a section.
+  // We dont try to determine which is the right section in the front-end.
+  // If no specialized section name is applicable, it will resort to default.
+  if (D->hasAttr() ||
+      D->hasAttr() ||
+      D->hasAttr())
+    return true;
+
   // Thread local vars aren't considered common linkage.
   if (D->getTLSKind())
     return true;
@@ -3656,20 +3784,26 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
       Linkage = llvm::GlobalVariable::InternalLinkage;
     }
   }
-  unsigned AddrSpace = GetGlobalVarAddressSpace(
-      VD, getContext().getTargetAddressSpace(MaterializedType));
+  unsigned AddrSpace =
+      VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace();
+  auto TargetAS = getContext().getTargetAddressSpace(AddrSpace);
   auto *GV = new llvm::GlobalVariable(
       getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(),
-      /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
-      AddrSpace);
+      /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
   setGlobalVisibility(GV, VD);
   GV->setAlignment(Align.getQuantity());
   if (supportsCOMDAT() && GV->isWeakForLinker())
     GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
   if (VD->getTLSKind())
     setTLSMode(GV, *VD);
-  MaterializedGlobalTemporaryMap[E] = GV;
-  return ConstantAddress(GV, Align);
+  llvm::Constant *CV = GV;
+  if (AddrSpace != LangAS::Default)
+    CV = getTargetCodeGenInfo().performAddrSpaceCast(
+        *this, GV, AddrSpace, LangAS::Default,
+        Type->getPointerTo(
+            getContext().getTargetAddressSpace(LangAS::Default)));
+  MaterializedGlobalTemporaryMap[E] = CV;
+  return ConstantAddress(CV, Align);
 }
 
 /// EmitObjCPropertyImplementations - Emit information for synthesized
@@ -3820,6 +3954,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     // Skip variable templates
     if (cast(D)->getDescribedVarTemplate())
       return;
+    LLVM_FALLTHROUGH;
   case Decl::VarTemplateSpecialization:
     EmitGlobal(cast(D));
     if (auto *DD = dyn_cast(D))
@@ -4394,18 +4529,19 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap &FeatureMap,
 
     // Make a copy of the features as passed on the command line into the
     // beginning of the additional features from the function to override.
-    ParsedAttr.first.insert(ParsedAttr.first.begin(),
+    ParsedAttr.Features.insert(ParsedAttr.Features.begin(),
                             Target.getTargetOpts().FeaturesAsWritten.begin(),
                             Target.getTargetOpts().FeaturesAsWritten.end());
 
-    if (ParsedAttr.second != "")
-      TargetCPU = ParsedAttr.second;
+    if (ParsedAttr.Architecture != "")
+      TargetCPU = ParsedAttr.Architecture ;
 
     // Now populate the feature map, first with the TargetCPU which is either
     // the default or a new one from the target attribute string. Then we'll use
     // the passed in features (FeaturesAsWritten) along with the new ones from
     // the attribute.
-    Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.first);
+    Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
+                          ParsedAttr.Features);
   } else {
     Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
                           Target.getTargetOpts().Features);
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.h
index 291ee09ab5423..33dd0c4e9e828 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenModule.h
@@ -370,6 +370,9 @@ class CodeGenModule : public CodeGenTypeCache {
   /// A queue of (optional) vtables to consider emitting.
   std::vector DeferredVTables;
 
+  /// A queue of (optional) vtables that may be emitted opportunistically.
+  std::vector OpportunisticVTables;
+
   /// List of global values which are required to be present in the object file;
   /// bitcast to i8*. This is used for forcing visibility of symbols which may
   /// otherwise be optimized out.
@@ -455,7 +458,7 @@ class CodeGenModule : public CodeGenTypeCache {
   llvm::SmallPtrSet EmittedModuleInitializers;
 
   /// \brief A vector of metadata strings.
-  SmallVector LinkerOptionsMetadata;
+  SmallVector LinkerOptionsMetadata;
 
   /// @name Cache for Objective-C runtime types
   /// @{
@@ -479,7 +482,7 @@ class CodeGenModule : public CodeGenTypeCache {
 
   bool isTriviallyRecursive(const FunctionDecl *F);
   bool shouldEmitFunction(GlobalDecl GD);
-
+  bool shouldOpportunisticallyEmitVTables();
   /// Map used to be sure we don't emit the same CompoundLiteral twice.
   llvm::DenseMap
       EmittedCompoundLiterals;
@@ -736,11 +739,15 @@ class CodeGenModule : public CodeGenTypeCache {
                                      SourceLocation Loc = SourceLocation(),
                                      bool TLS = false);
 
-  /// Return the address space of the underlying global variable for D, as
+  /// Return the AST address space of the underlying global variable for D, as
   /// determined by its declaration. Normally this is the same as the address
   /// space of D's type, but in CUDA, address spaces are associated with
-  /// declarations, not types.
-  unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace);
+  /// declarations, not types. If D is nullptr, return the default address
+  /// space for global variable.
+  ///
+  /// For languages without explicit address spaces, if D has default address
+  /// space, target-specific global or constant address space may be returned.
+  unsigned GetGlobalVarAddressSpace(const VarDecl *D);
 
   /// Return the llvm::Constant for the address of the given global variable.
   /// If Ty is non-null and if the global doesn't exist, then it will be created
@@ -1084,13 +1091,14 @@ class CodeGenModule : public CodeGenTypeCache {
 
   void RefreshTypeCacheForClass(const CXXRecordDecl *Class);
 
-  /// \brief Appends Opts to the "Linker Options" metadata value.
+  /// \brief Appends Opts to the "llvm.linker.options" metadata value.
   void AppendLinkerOptions(StringRef Opts);
 
   /// \brief Appends a detect mismatch command to the linker options.
   void AddDetectMismatch(StringRef Name, StringRef Value);
 
-  /// \brief Appends a dependent lib to the "Linker Options" metadata value.
+  /// \brief Appends a dependent lib to the "llvm.linker.options" metadata
+  /// value.
   void AddDependentLib(StringRef Lib);
 
   llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD);
@@ -1307,6 +1315,12 @@ class CodeGenModule : public CodeGenTypeCache {
   /// Emit any needed decls for which code generation was deferred.
   void EmitDeferred();
 
+  /// Try to emit external vtables as available_externally if they have emitted
+  /// all inlined virtual functions.  It runs after EmitDeferred() and therefore
+  /// is not allowed to create new references to things that need to be emitted
+  /// lazily.
+  void EmitVTablesOpportunistically();
+
   /// Call replaceAllUsesWith on all pairs in Replacements.
   void applyReplacements();
 
@@ -1340,6 +1354,9 @@ class CodeGenModule : public CodeGenTypeCache {
   /// Emits target specific Metadata for global declarations.
   void EmitTargetMetadata();
 
+  /// Emits OpenCL specific Metadata e.g. OpenCL version.
+  void EmitOpenCLMetadata();
+
   /// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and
   /// .gcda files in a way that persists in .bc files.
   void EmitCoverageFile();
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenPGO.cpp
index 9e193531d0f60..c3d66c1dabc5e 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -617,6 +617,9 @@ uint64_t PGOHash::finalize() {
 
 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
   const Decl *D = GD.getDecl();
+  if (!D->hasBody())
+    return;
+
   bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
   if (!InstrumentRegions && !PGOReader)
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTBAA.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
index 04224e726797a..8a75a552d9faa 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -139,6 +139,12 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) {
     }
   }
 
+  // C++1z [basic.lval]p10: "If a program attempts to access the stored value of
+  // an object through a glvalue of other than one of the following types the
+  // behavior is undefined: [...] a char, unsigned char, or std::byte type."
+  if (Ty->isStdByteType())
+    return MetadataCache[Ty] = getChar();
+
   // Handle pointers.
   // TODO: Implement C++'s type "similarity" and consider dis-"similar"
   // pointers distinct.
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypeCache.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypeCache.h
index 8ce9860cc638f..450eab48a3b41 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -94,6 +94,8 @@ struct CodeGenTypeCache {
     unsigned char SizeAlignInBytes;
   };
 
+  unsigned ASTAllocaAddressSpace;
+
   CharUnits getSizeSize() const {
     return CharUnits::fromQuantity(SizeSizeInBytes);
   }
@@ -111,6 +113,8 @@ struct CodeGenTypeCache {
   llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
   llvm::CallingConv::ID BuiltinCC;
   llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; }
+
+  unsigned getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; }
 };
 
 }  // end namespace CodeGen
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.cpp
index 5ed9291358805..9306c4fbaff80 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -44,6 +44,10 @@ CodeGenTypes::~CodeGenTypes() {
     delete &*I++;
 }
 
+const CodeGenOptions &CodeGenTypes::getCodeGenOpts() const {
+  return CGM.getCodeGenOpts();
+}
+
 void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
                                      llvm::StructType *Ty,
                                      StringRef suffix) {
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.h
index f0b97ebde1c21..9d0e3ded23e47 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/CodeGenTypes.h
@@ -178,6 +178,7 @@ class CodeGenTypes {
   const TargetInfo &getTarget() const { return Target; }
   CGCXXABI &getCXXABI() const { return TheCXXABI; }
   llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); }
+  const CodeGenOptions &getCodeGenOpts() const;
 
   /// ConvertType - Convert type T into a llvm::Type.
   llvm::Type *ConvertType(QualType T);
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 88d401307d50d..bd4cb9a3667b2 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -62,13 +62,20 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
 
   bool classifyReturnType(CGFunctionInfo &FI) const override;
 
+  bool passClassIndirect(const CXXRecordDecl *RD) const {
+    // Clang <= 4 used the pre-C++11 rule, which ignores move operations.
+    // The PS4 platform ABI follows the behavior of Clang 3.2.
+    if (CGM.getCodeGenOpts().getClangABICompat() <=
+            CodeGenOptions::ClangABI::Ver4 ||
+        CGM.getTriple().getOS() == llvm::Triple::PS4)
+      return RD->hasNonTrivialDestructor() ||
+             RD->hasNonTrivialCopyConstructor();
+    return !canCopyArgument(RD);
+  }
+
   RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
     // If C++ prohibits us from making a copy, pass by address.
-#if __clang_major__ < 5
-    if (!canCopyArgument(RD))
-#else
-    if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
-#endif
+    if (passClassIndirect(RD))
       return RAA_Indirect;
     return RAA_Default;
   }
@@ -367,20 +374,30 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
   void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
 
  private:
-   bool hasAnyVirtualInlineFunction(const CXXRecordDecl *RD) const {
-    const auto &VtableLayout =
-        CGM.getItaniumVTableContext().getVTableLayout(RD);
-
-    for (const auto &VtableComponent : VtableLayout.vtable_components()) {
-      // Skip empty slot.
-      if (!VtableComponent.isUsedFunctionPointerKind())
-        continue;
-
-      const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
-      if (Method->getCanonicalDecl()->isInlined())
-        return true;
-    }
-    return false;
+   bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const {
+     const auto &VtableLayout =
+         CGM.getItaniumVTableContext().getVTableLayout(RD);
+
+     for (const auto &VtableComponent : VtableLayout.vtable_components()) {
+       // Skip empty slot.
+       if (!VtableComponent.isUsedFunctionPointerKind())
+         continue;
+
+       const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
+       if (!Method->getCanonicalDecl()->isInlined())
+         continue;
+
+       StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl());
+       auto *Entry = CGM.GetGlobalValue(Name);
+       // This checks if virtual inline function has already been emitted.
+       // Note that it is possible that this inline function would be emitted
+       // after trying to emit vtable speculatively. Because of this we do
+       // an extra pass after emitting all deferred vtables to find and emit
+       // these vtables opportunistically.
+       if (!Entry || Entry->isDeclaration())
+         return true;
+     }
+     return false;
   }
 
   bool isVTableHidden(const CXXRecordDecl *RD) const {
@@ -990,11 +1007,7 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
     return false;
 
   // If C++ prohibits us from making a copy, return by address.
-#if __clang_major__ < 5
-  if (!canCopyArgument(RD)) {
-#else
-  if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) {
-#endif
+  if (passClassIndirect(RD)) {
     auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
     FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
     return true;
@@ -1401,9 +1414,9 @@ void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
 
     // FIXME: avoid the fake decl
     QualType T = Context.getPointerType(Context.VoidPtrTy);
-    ImplicitParamDecl *VTTDecl
-      = ImplicitParamDecl::Create(Context, nullptr, MD->getLocation(),
-                                  &Context.Idents.get("vtt"), T);
+    auto *VTTDecl = ImplicitParamDecl::Create(
+        Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"),
+        T, ImplicitParamDecl::CXXVTT);
     Params.insert(Params.begin() + 1, VTTDecl);
     getStructorImplicitParamDecl(CGF) = VTTDecl;
   }
@@ -1690,11 +1703,11 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
   if (CGM.getLangOpts().AppleKext)
     return false;
 
-  // If we don't have any inline virtual functions, and if vtable is not hidden,
-  // then we are safe to emit available_externally copy of vtable.
+  // If we don't have any not emitted inline virtual function, and if vtable is
+  // not hidden, then we are safe to emit available_externally copy of vtable.
   // FIXME we can still emit a copy of the vtable if we
   // can emit definition of the inline functions.
-  return !hasAnyVirtualInlineFunction(RD) && !isVTableHidden(RD);
+  return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD);
 }
 static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
                                           Address InitialPtr,
@@ -2579,6 +2592,9 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
 
   if (!GV) {
     // Create a new global variable.
+    // Note for the future: If we would ever like to do deferred emission of
+    // RTTI, check if emitting vtables opportunistically need any adjustment.
+
     GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
                                   /*Constant=*/true,
                                   llvm::GlobalValue::ExternalLinkage, nullptr,
@@ -2722,7 +2738,9 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
     // function.
     bool IsDLLImport = RD->hasAttr();
     if (CGM.getVTables().isVTableExternal(RD))
-      return IsDLLImport ? false : true;
+      return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment()
+                 ? false
+                 : true;
 
     if (IsDLLImport)
       return true;
@@ -2947,6 +2965,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
     return llvm::GlobalValue::InternalLinkage;
 
   case VisibleNoLinkage:
+  case ModuleInternalLinkage:
+  case ModuleLinkage:
   case ExternalLinkage:
     // RTTI is not enabled, which means that this type info struct is going
     // to be used for exception handling. Give it linkonce_odr linkage.
@@ -2958,7 +2978,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
       if (RD->hasAttr())
         return llvm::GlobalValue::WeakODRLinkage;
       if (CGM.getTriple().isWindowsItaniumEnvironment())
-        if (RD->hasAttr())
+        if (RD->hasAttr() &&
+            ShouldUseExternalRTTIDescriptor(CGM, Ty))
           return llvm::GlobalValue::ExternalLinkage;
       if (RD->isDynamicClass()) {
         llvm::GlobalValue::LinkageTypes LT = CGM.getVTableLinkage(RD);
@@ -3171,7 +3192,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
     if (DLLExport || (RD && RD->hasAttr())) {
       TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
       GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
-    } else if (CGM.getLangOpts().RTTI && RD && RD->hasAttr()) {
+    } else if (RD && RD->hasAttr() &&
+               ShouldUseExternalRTTIDescriptor(CGM, Ty)) {
       TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
       GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
 
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp
index 6a31dfe53d644..a6f21d8ddcfb7 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp
@@ -26,8 +26,8 @@ void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II,
 
   if (MI.isFunctionLike()) {
     Name << '(';
-    if (!MI.arg_empty()) {
-      MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
+    if (!MI.param_empty()) {
+      MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
       for (; AI + 1 != E; ++AI) {
         Name << (*AI)->getName();
         Name << ',';
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 4cacf494e6941..1bd2937e47477 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -819,46 +819,44 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
     return RAA_Default;
 
   case llvm::Triple::x86_64:
-    // Win64 passes objects with non-trivial copy ctors indirectly.
-    if (RD->hasNonTrivialCopyConstructor())
-      return RAA_Indirect;
-
-    // If an object has a destructor, we'd really like to pass it indirectly
+    // If a class has a destructor, we'd really like to pass it indirectly
     // because it allows us to elide copies.  Unfortunately, MSVC makes that
     // impossible for small types, which it will pass in a single register or
     // stack slot. Most objects with dtors are large-ish, so handle that early.
     // We can't call out all large objects as being indirect because there are
     // multiple x64 calling conventions and the C++ ABI code shouldn't dictate
     // how we pass large POD types.
+    //
+    // Note: This permits small classes with nontrivial destructors to be
+    // passed in registers, which is non-conforming.
     if (RD->hasNonTrivialDestructor() &&
         getContext().getTypeSize(RD->getTypeForDecl()) > 64)
       return RAA_Indirect;
 
-    // If this is true, the implicit copy constructor that Sema would have
-    // created would not be deleted. FIXME: We should provide a more direct way
-    // for CodeGen to ask whether the constructor was deleted.
-    if (!RD->hasUserDeclaredCopyConstructor() &&
-        !RD->hasUserDeclaredMoveConstructor() &&
-        !RD->needsOverloadResolutionForMoveConstructor() &&
-        !RD->hasUserDeclaredMoveAssignment() &&
-        !RD->needsOverloadResolutionForMoveAssignment())
-      return RAA_Default;
-
-    // Otherwise, Sema should have created an implicit copy constructor if
-    // needed.
-    assert(!RD->needsImplicitCopyConstructor());
-
-    // We have to make sure the trivial copy constructor isn't deleted.
-    for (const CXXConstructorDecl *CD : RD->ctors()) {
-      if (CD->isCopyConstructor()) {
-        assert(CD->isTrivial());
-        // We had at least one undeleted trivial copy ctor.  Return directly.
-        if (!CD->isDeleted())
-          return RAA_Default;
+    // If a class has at least one non-deleted, trivial copy constructor, it
+    // is passed according to the C ABI. Otherwise, it is passed indirectly.
+    //
+    // Note: This permits classes with non-trivial copy or move ctors to be
+    // passed in registers, so long as they *also* have a trivial copy ctor,
+    // which is non-conforming.
+    if (RD->needsImplicitCopyConstructor()) {
+      // If the copy ctor has not yet been declared, we can read its triviality
+      // off the AST.
+      if (!RD->defaultedCopyConstructorIsDeleted() &&
+          RD->hasTrivialCopyConstructor())
+        return RAA_Default;
+    } else {
+      // Otherwise, we need to find the copy constructor(s) and ask.
+      for (const CXXConstructorDecl *CD : RD->ctors()) {
+        if (CD->isCopyConstructor()) {
+          // We had at least one nondeleted trivial copy ctor.  Return directly.
+          if (!CD->isDeleted() && CD->isTrivial())
+            return RAA_Default;
+        }
       }
     }
 
-    // The trivial copy constructor was deleted.  Return indirectly.
+    // We have no trivial, non-deleted copy constructor.
     return RAA_Indirect;
   }
 
@@ -1413,11 +1411,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
   const CXXMethodDecl *MD = cast(CGF.CurGD.getDecl());
   assert(isa(MD) || isa(MD));
   if (isa(MD) && MD->getParent()->getNumVBases()) {
-    ImplicitParamDecl *IsMostDerived
-      = ImplicitParamDecl::Create(Context, nullptr,
-                                  CGF.CurGD.getDecl()->getLocation(),
-                                  &Context.Idents.get("is_most_derived"),
-                                  Context.IntTy);
+    auto *IsMostDerived = ImplicitParamDecl::Create(
+        Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
+        &Context.Idents.get("is_most_derived"), Context.IntTy,
+        ImplicitParamDecl::Other);
     // The 'most_derived' parameter goes second if the ctor is variadic and last
     // if it's not.  Dtors can't be variadic.
     const FunctionProtoType *FPT = MD->getType()->castAs();
@@ -1427,11 +1424,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
       Params.push_back(IsMostDerived);
     getStructorImplicitParamDecl(CGF) = IsMostDerived;
   } else if (isDeletingDtor(CGF.CurGD)) {
-    ImplicitParamDecl *ShouldDelete
-      = ImplicitParamDecl::Create(Context, nullptr,
-                                  CGF.CurGD.getDecl()->getLocation(),
-                                  &Context.Idents.get("should_call_delete"),
-                                  Context.IntTy);
+    auto *ShouldDelete = ImplicitParamDecl::Create(
+        Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
+        &Context.Idents.get("should_call_delete"), Context.IntTy,
+        ImplicitParamDecl::Other);
     Params.push_back(ShouldDelete);
     getStructorImplicitParamDecl(CGF) = ShouldDelete;
   }
@@ -3427,6 +3423,8 @@ static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) {
     return llvm::GlobalValue::InternalLinkage;
 
   case VisibleNoLinkage:
+  case ModuleInternalLinkage:
+  case ModuleLinkage:
   case ExternalLinkage:
     return llvm::GlobalValue::LinkOnceODRLinkage;
   }
@@ -3756,6 +3754,9 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
   if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
     return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
 
+  // Note for the future: If we would ever like to do deferred emission of
+  // RTTI, check if emitting vtables opportunistically need any adjustment.
+
   // Compute the fields for the TypeDescriptor.
   SmallString<256> TypeInfoString;
   {
@@ -3872,18 +3873,21 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
   // Following the 'this' pointer is a reference to the source object that we
   // are copying from.
   ImplicitParamDecl SrcParam(
-      getContext(), nullptr, SourceLocation(), &getContext().Idents.get("src"),
+      getContext(), /*DC=*/nullptr, SourceLocation(),
+      &getContext().Idents.get("src"),
       getContext().getLValueReferenceType(RecordTy,
-                                          /*SpelledAsLValue=*/true));
+                                          /*SpelledAsLValue=*/true),
+      ImplicitParamDecl::Other);
   if (IsCopy)
     FunctionArgs.push_back(&SrcParam);
 
   // Constructors for classes which utilize virtual bases have an additional
   // parameter which indicates whether or not it is being delegated to by a more
   // derived constructor.
-  ImplicitParamDecl IsMostDerived(getContext(), nullptr, SourceLocation(),
+  ImplicitParamDecl IsMostDerived(getContext(), /*DC=*/nullptr,
+                                  SourceLocation(),
                                   &getContext().Idents.get("is_most_derived"),
-                                  getContext().IntTy);
+                                  getContext().IntTy, ImplicitParamDecl::Other);
   // Only add the parameter to the list if thie class has virtual bases.
   if (RD->getNumVBases() > 0)
     FunctionArgs.push_back(&IsMostDerived);
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 37ecc05aa1eee..d0760b9cc2a60 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -152,6 +152,9 @@ class PCHContainerGenerator : public ASTConsumer {
     CodeGenOpts.CodeModel = "default";
     CodeGenOpts.ThreadModel = "single";
     CodeGenOpts.DebugTypeExtRefs = true;
+    // When building a module MainFileName is the name of the modulemap file.
+    CodeGenOpts.MainFileName =
+        LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule;
     CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo);
     CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning());
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/SwiftCallingConv.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
index 0bfe30a32c806..fc8e36d2c5990 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -57,6 +57,10 @@ static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) {
   return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type));
 }
 
+static CharUnits getTypeAllocSize(CodeGenModule &CGM, llvm::Type *type) {
+  return CharUnits::fromQuantity(CGM.getDataLayout().getTypeAllocSize(type));
+}
+
 void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) {
   // Deal with various aggregate types as special cases:
 
@@ -542,7 +546,9 @@ SwiftAggLowering::getCoerceAndExpandTypes() const {
       packed = true;
 
     elts.push_back(entry.Type);
-    lastEnd = entry.End;
+
+    lastEnd = entry.Begin + getTypeAllocSize(CGM, entry.Type);
+    assert(entry.End <= lastEnd);
   }
 
   // We don't need to adjust 'packed' to deal with possible tail padding
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.cpp b/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.cpp
index 18367d1602ba4..ece3a407eae3b 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.cpp
@@ -183,7 +183,11 @@ const TargetInfo &ABIInfo::getTarget() const {
   return CGT.getTarget();
 }
 
-bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); }
+const CodeGenOptions &ABIInfo::getCodeGenOpts() const {
+  return CGT.getCodeGenOpts();
+}
+
+bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); }
 
 bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   return false;
@@ -398,7 +402,17 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
 }
 
 unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
-  return llvm::CallingConv::C;
+  // OpenCL kernels are called via an explicit runtime API with arguments
+  // set with clSetKernelArg(), not as normal sub-functions.
+  // Return SPIR_KERNEL by default as the kernel calling convention to
+  // ensure the fingerprint is fixed such way that each OpenCL argument
+  // gets one matching argument in the produced kernel function argument
+  // list to enable feasible implementation of clSetKernelArg() with
+  // aggregates etc. In case we would use the default C calling conv here,
+  // clSetKernelArg() might break depending on the target-specific
+  // conventions; different targets might split structs passed as values
+  // to multiple function arguments etc.
+  return llvm::CallingConv::SPIR_KERNEL;
 }
 
 llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
@@ -406,13 +420,32 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &
   return llvm::ConstantPointerNull::get(T);
 }
 
+unsigned TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+                                                     const VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+         !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+         "Address space agnostic languages only");
+  return D ? D->getType().getAddressSpace()
+           : static_cast(LangAS::Default);
+}
+
 llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
-    CodeGen::CodeGenFunction &CGF, llvm::Value *Src, QualType SrcTy,
-    QualType DestTy) const {
+    CodeGen::CodeGenFunction &CGF, llvm::Value *Src, unsigned SrcAddr,
+    unsigned DestAddr, llvm::Type *DestTy, bool isNonNull) const {
   // Since target may map different address spaces in AST to the same address
   // space, an address space conversion may end up as a bitcast.
-  return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src,
-             CGF.ConvertType(DestTy));
+  if (auto *C = dyn_cast(Src))
+    return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy);
+  return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy);
+}
+
+llvm::Constant *
+TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
+                                        unsigned SrcAddr, unsigned DestAddr,
+                                        llvm::Type *DestTy) const {
+  // Since target may map different address spaces in AST to the same address
+  // space, an address space conversion may end up as a bitcast.
+  return llvm::ConstantExpr::getPointerCast(Src, DestTy);
 }
 
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
@@ -942,8 +975,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
-  ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, 
-                                  const ABIArgInfo& current) const;
+
   /// \brief Updates the number of available free registers, returns 
   /// true if any registers were allocated.
   bool updateFreeRegs(QualType Ty, CCState &State) const;
@@ -1527,27 +1559,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
   return true;
 }
 
-ABIArgInfo
-X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
-                                    const ABIArgInfo ¤t) const {
-  // Assumes vectorCall calling convention.
-  const Type *Base = nullptr;
-  uint64_t NumElts = 0;
-
-  if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
-      isHomogeneousAggregate(Ty, Base, NumElts)) {
-    if (State.FreeSSERegs >= NumElts) {
-      // HVA types get passed directly in registers if there is room.
-      State.FreeSSERegs -= NumElts;
-      return getDirectX86Hva();
-    }
-    // If there's no room, the HVA gets passed as normal indirect
-    // structure.
-    return getIndirectResult(Ty, /*ByVal=*/false, State);
-  } 
-  return current;
-}
-
 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
                                                CCState &State) const {
   // FIXME: Set alignment on indirect arguments.
@@ -1566,35 +1577,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
     }
   }
 
-  // vectorcall adds the concept of a homogenous vector aggregate, similar
-  // to other targets, regcall uses some of the HVA rules.
+  // Regcall uses the concept of a homogenous vector aggregate, similar
+  // to other targets.
   const Type *Base = nullptr;
   uint64_t NumElts = 0;
-  if ((State.CC == llvm::CallingConv::X86_VectorCall ||
-       State.CC == llvm::CallingConv::X86_RegCall) &&
+  if (State.CC == llvm::CallingConv::X86_RegCall &&
       isHomogeneousAggregate(Ty, Base, NumElts)) {
 
-    if (State.CC == llvm::CallingConv::X86_RegCall) {
-      if (State.FreeSSERegs >= NumElts) {
-        State.FreeSSERegs -= NumElts;
-        if (Ty->isBuiltinType() || Ty->isVectorType())
-          return ABIArgInfo::getDirect();
-        return ABIArgInfo::getExpand();
-
-      }
-      return getIndirectResult(Ty, /*ByVal=*/false, State);
-    } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
-      if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
-        // Actual floating-point types get registers first time through if
-        // there is registers available
-        State.FreeSSERegs -= NumElts;
+    if (State.FreeSSERegs >= NumElts) {
+      State.FreeSSERegs -= NumElts;
+      if (Ty->isBuiltinType() || Ty->isVectorType())
         return ABIArgInfo::getDirect();
-      }  else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
-        // HVA Types only get registers after everything else has been
-        // set, so it gets set as indirect for now.
-        return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
-      }
+      return ABIArgInfo::getExpand();
     }
+    return getIndirectResult(Ty, /*ByVal=*/false, State);
   }
 
   if (isAggregateTypeForABI(Ty)) {
@@ -1675,31 +1671,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
 
 void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
                                           bool &UsedInAlloca) const {
-  // Vectorcall only allows the first 6 parameters to be passed in registers,
-  // and homogeneous vector aggregates are only put into registers as a second
-  // priority.
-  unsigned Count = 0;
-  CCState ZeroState = State;
-  ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
-  // HVAs must be done as a second priority for registers, so the deferred
-  // items are dealt with by going through the pattern a second time.
+  // Vectorcall x86 works subtly different than in x64, so the format is
+  // a bit different than the x64 version.  First, all vector types (not HVAs)
+  // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
+  // This differs from the x64 implementation, where the first 6 by INDEX get
+  // registers.
+  // After that, integers AND HVAs are assigned Left to Right in the same pass.
+  // Integers are passed as ECX/EDX if one is available (in order).  HVAs will
+  // first take up the remaining YMM/XMM registers. If insufficient registers
+  // remain but an integer register (ECX/EDX) is available, it will be passed
+  // in that, else, on the stack.
   for (auto &I : FI.arguments()) {
-    if (Count < VectorcallMaxParamNumAsReg)
-      I.info = classifyArgumentType(I.type, State);
-    else
-      // Parameters after the 6th cannot be passed in registers,
-      // so pretend there are no registers left for them.
-      I.info = classifyArgumentType(I.type, ZeroState);
-    UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
-    ++Count;
+    // First pass do all the vector types.
+    const Type *Base = nullptr;
+    uint64_t NumElts = 0;
+    const QualType& Ty = I.type;
+    if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+        isHomogeneousAggregate(Ty, Base, NumElts)) {
+      if (State.FreeSSERegs >= NumElts) {
+        State.FreeSSERegs -= NumElts;
+        I.info = ABIArgInfo::getDirect();
+      } else {
+        I.info = classifyArgumentType(Ty, State);
+      }
+      UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+    }
   }
-  Count = 0;
-  // Go through the arguments a second time to get HVAs registers if there
-  // are still some available.
+
   for (auto &I : FI.arguments()) {
-    if (Count < VectorcallMaxParamNumAsReg)
-      I.info = reclassifyHvaArgType(I.type, State, I.info);
-    ++Count;
+    // Second pass, do the rest!
+    const Type *Base = nullptr;
+    uint64_t NumElts = 0;
+    const QualType& Ty = I.type;
+    bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts);
+
+    if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) {
+      // Assign true HVAs (non vector/native FP types).
+      if (State.FreeSSERegs >= NumElts) {
+        State.FreeSSERegs -= NumElts;
+        I.info = getDirectX86Hva();
+      } else {
+        I.info = getIndirectResult(Ty, /*ByVal=*/false, State);
+      }
+    } else if (!IsHva) {
+      // Assign all Non-HVAs, so this will exclude Vector/FP args.
+      I.info = classifyArgumentType(Ty, State);
+      UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+    }
   }
 }
 
@@ -2081,9 +2099,14 @@ class X86_64ABIInfo : public SwiftABIInfo {
     return !getTarget().getTriple().isOSDarwin();
   }
 
-  /// GCC classifies <1 x long long> as SSE but compatibility with older clang
-  // compilers require us to classify it as INTEGER.
+  /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
+  /// classify it as INTEGER (for compatibility with older clang compilers).
   bool classifyIntegerMMXAsSSE() const {
+    // Clang <= 3.8 did not do this.
+    if (getCodeGenOpts().getClangABICompat() <=
+        CodeGenOptions::ClangABI::Ver3_8)
+      return false;
+
     const llvm::Triple &Triple = getTarget().getTriple();
     if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4)
       return false;
@@ -3892,6 +3915,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
                                              bool IsRegCall) const {
   unsigned Count = 0;
   for (auto &I : FI.arguments()) {
+    // Vectorcall in x64 only permits the first 6 arguments to be passed
+    // as XMM/YMM registers.
     if (Count < VectorcallMaxParamNumAsReg)
       I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
     else {
@@ -3904,11 +3929,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
     ++Count;
   }
 
-  Count = 0;
   for (auto &I : FI.arguments()) {
-    if (Count < VectorcallMaxParamNumAsReg)
-      I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
-    ++Count;
+    I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
   }
 }
 
@@ -4772,7 +4794,8 @@ class AArch64ABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     AAPCS = 0,
-    DarwinPCS
+    DarwinPCS,
+    Win64
   };
 
 private:
@@ -4810,10 +4833,14 @@ class AArch64ABIInfo : public SwiftABIInfo {
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
-    return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
-                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+    return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
+                         : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
+                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
   }
 
+  Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                      QualType Ty) const override;
+
   bool shouldPassIndirectlyForSwift(CharUnits totalSize,
                                     ArrayRef scalars,
                                     bool asReturnValue) const override {
@@ -4822,6 +4849,9 @@ class AArch64ABIInfo : public SwiftABIInfo {
   bool isSwiftErrorInRegister() const override {
     return true;
   }
+
+  bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
+                                 unsigned elts) const override;
 };
 
 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4995,6 +5025,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
   return false;
 }
 
+bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize,
+                                               llvm::Type *eltTy,
+                                               unsigned elts) const {
+  if (!llvm::isPowerOf2_32(elts))
+    return false;
+  if (totalSize.getQuantity() != 8 &&
+      (totalSize.getQuantity() != 16 || elts == 1))
+    return false;
+  return true;
+}
+
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
   // point type or a short-vector type. This is the same as the 32-bit ABI,
@@ -5305,6 +5346,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                           TyInfo, SlotSize, /*AllowHigherAlign*/ true);
 }
 
+Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                    QualType Ty) const {
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+                          CGF.getContext().getTypeInfoInChars(Ty),
+                          CharUnits::fromQuantity(8),
+                          /*allowHigherAlign*/ false);
+}
+
 //===----------------------------------------------------------------------===//
 // ARM ABI Implementation
 //===----------------------------------------------------------------------===//
@@ -5383,6 +5432,8 @@ class ARMABIInfo : public SwiftABIInfo {
   bool isSwiftErrorInRegister() const override {
     return true;
   }
+  bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
+                                 unsigned elts) const override;
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5531,17 +5582,14 @@ void ARMABIInfo::setCCs() {
   // AAPCS apparently requires runtime support functions to be soft-float, but
   // that's almost certainly for historic reasons (Thumb1 not supporting VFP
   // most likely). It's more convenient for AAPCS16_VFP to be hard-float.
-  switch (getABIKind()) {
-  case APCS:
-  case AAPCS16_VFP:
-    if (abiCC != getLLVMDefaultCC())
+
+  // The Run-time ABI for the ARM Architecture section 4.1.2 requires
+  // AEABI-complying FP helper functions to use the base AAPCS.
+  // These AEABI functions are expanded in the ARM llvm backend, all the builtin
+  // support functions emitted by clang such as the _Complex helpers follow the
+  // abiCC.
+  if (abiCC != getLLVMDefaultCC())
       BuiltinCC = abiCC;
-    break;
-  case AAPCS:
-  case AAPCS_VFP:
-    BuiltinCC = llvm::CallingConv::ARM_AAPCS;
-    break;
-  }
 }
 
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
@@ -5895,6 +5943,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
   return false;
 }
 
+bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
+                                           llvm::Type *eltTy,
+                                           unsigned numElts) const {
+  if (!llvm::isPowerOf2_32(numElts))
+    return false;
+  unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy);
+  if (size > 64)
+    return false;
+  if (vectorSize.getQuantity() != 8 &&
+      (vectorSize.getQuantity() != 16 || numElts == 1))
+    return false;
+  return true;
+}
+
 bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // Homogeneous aggregates for AAPCS-VFP must have base types of float,
   // double, or 64-bit or 128-bit vectors.
@@ -6558,6 +6620,11 @@ class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
       Fn->addFnAttr("nomips16");
     }
 
+    if (FD->hasAttr())
+      Fn->addFnAttr("micromips");
+    else if (FD->hasAttr())
+      Fn->addFnAttr("nomicromips");
+
     const MipsInterruptAttr *Attr = FD->getAttr();
     if (!Attr)
       return;
@@ -6693,14 +6760,6 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     }
 
-    // Use indirect if the aggregate cannot fit into registers for
-    // passing arguments according to the ABI
-    unsigned Threshold = IsO32 ? 16 : 64;
-
-    if(getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(Threshold))
-      return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align), true,
-                                     getContext().getTypeAlign(Ty) / 8 > Align);
-
     // If we have reached here, aggregates are passed directly by coercing to
     // another structure type. Padding is inserted if the offset of the
     // aggregate is unaligned.
@@ -7292,11 +7351,16 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
 
   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
       llvm::PointerType *T, QualType QT) const override;
+
+  unsigned getASTAllocaAddressSpace() const override {
+    return LangAS::FirstTargetAddressSpace +
+           getABIInfo().getDataLayout().getAllocaAddrSpace();
+  }
+  unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
+                                    const VarDecl *D) const override;
 };
 }
 
-static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM);
-
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     const Decl *D,
     llvm::GlobalValue *GV,
@@ -7353,8 +7417,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     if (NumVGPR != 0)
       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
   }
-
-  appendOpenCLVersionMD(M);
 }
 
 unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
@@ -7379,6 +7441,31 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
       llvm::ConstantPointerNull::get(NPT), PT);
 }
 
+unsigned
+AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+                                                  const VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+         !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+         "Address space agnostic languages only");
+  unsigned DefaultGlobalAS =
+      LangAS::FirstTargetAddressSpace +
+      CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+  if (!D)
+    return DefaultGlobalAS;
+
+  unsigned AddrSpace = D->getType().getAddressSpace();
+  assert(AddrSpace == LangAS::Default ||
+         AddrSpace >= LangAS::FirstTargetAddressSpace);
+  if (AddrSpace != LangAS::Default)
+    return AddrSpace;
+
+  if (CGM.isTypeConstant(D->getType(), false)) {
+    if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
+      return ConstAS.getValue();
+  }
+  return DefaultGlobalAS;
+}
+
 //===----------------------------------------------------------------------===//
 // SPARC v8 ABI Implementation.
 // Based on the SPARC Compliance Definition version 2.4.1.
@@ -8025,45 +8112,18 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
-  void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
-                    CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
 };
+
 } // End anonymous namespace.
 
-/// Emit SPIR specific metadata: OpenCL and SPIR version.
-void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
-                                         CodeGen::CodeGenModule &CGM) const {
-  llvm::LLVMContext &Ctx = CGM.getModule().getContext();
-  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
-  llvm::Module &M = CGM.getModule();
-  // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
-  // opencl.spir.version named metadata.
-  llvm::Metadata *SPIRVerElts[] = {
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
-      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-          Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))};
-  llvm::NamedMDNode *SPIRVerMD =
-      M.getOrInsertNamedMetadata("opencl.spir.version");
-  SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
-  appendOpenCLVersionMD(CGM);
-}
-
-static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) {
-  llvm::LLVMContext &Ctx = CGM.getModule().getContext();
-  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
-  llvm::Module &M = CGM.getModule();
-  // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
-  // opencl.ocl.version named metadata node.
-  llvm::Metadata *OCLVerElts[] = {
-      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-          Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
-      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-          Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))};
-  llvm::NamedMDNode *OCLVerMD =
-      M.getOrInsertNamedMetadata("opencl.ocl.version");
-  OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+namespace clang {
+namespace CodeGen {
+void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
+  DefaultABIInfo SPIRABI(CGM.getTypes());
+  SPIRABI.computeInfo(FI);
+}
+}
 }
 
 unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
@@ -8445,6 +8505,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
     if (getTarget().getABI() == "darwinpcs")
       Kind = AArch64ABIInfo::DarwinPCS;
+    else if (Triple.isOSWindows())
+      Kind = AArch64ABIInfo::Win64;
 
     return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.h b/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.h
index 223d6d047af71..952ef96c4aef7 100644
--- a/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.h
+++ b/interpreter/llvm/src/tools/clang/lib/CodeGen/TargetInfo.h
@@ -229,13 +229,36 @@ class TargetCodeGenInfo {
   virtual llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
       llvm::PointerType *T, QualType QT) const;
 
+  /// Get target favored AST address space of a global variable for languages
+  /// other than OpenCL and CUDA.
+  /// If \p D is nullptr, returns the default target favored address space
+  /// for global variable.
+  virtual unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
+                                            const VarDecl *D) const;
+
+  /// Get the AST address space for alloca.
+  virtual unsigned getASTAllocaAddressSpace() const { return LangAS::Default; }
+
   /// Perform address space cast of an expression of pointer type.
   /// \param V is the LLVM value to be casted to another address space.
-  /// \param SrcTy is the QualType of \p V.
-  /// \param DestTy is the destination QualType.
+  /// \param SrcAddr is the language address space of \p V.
+  /// \param DestAddr is the targeted language address space.
+  /// \param DestTy is the destination LLVM pointer type.
+  /// \param IsNonNull is the flag indicating \p V is known to be non null.
   virtual llvm::Value *performAddrSpaceCast(CodeGen::CodeGenFunction &CGF,
-      llvm::Value *V, QualType SrcTy, QualType DestTy) const;
+                                            llvm::Value *V, unsigned SrcAddr,
+                                            unsigned DestAddr,
+                                            llvm::Type *DestTy,
+                                            bool IsNonNull = false) const;
 
+  /// Perform address space cast of a constant expression of pointer type.
+  /// \param V is the LLVM constant to be casted to another address space.
+  /// \param SrcAddr is the language address space of \p V.
+  /// \param DestAddr is the targeted language address space.
+  /// \param DestTy is the destination LLVM pointer type.
+  virtual llvm::Constant *
+  performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr,
+                       unsigned DestAddr, llvm::Type *DestTy) const;
 };
 
 } // namespace CodeGen
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/Driver/CMakeLists.txt
index 43d6aa8f99b8e..c7ca698d95a08 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  BinaryFormat
   Option
   Support
   )
@@ -27,9 +28,11 @@ add_clang_library(clangDriver
   ToolChains/Arch/Sparc.cpp
   ToolChains/Arch/SystemZ.cpp
   ToolChains/Arch/X86.cpp
+  ToolChains/Ananas.cpp
   ToolChains/AMDGPU.cpp
   ToolChains/AVR.cpp
   ToolChains/Bitrig.cpp
+  ToolChains/BareMetal.cpp
   ToolChains/Clang.cpp
   ToolChains/CloudABI.cpp
   ToolChains/CommonArgs.cpp
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/Compilation.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/Compilation.cpp
index 5c13e59a0d73b..cf86644fb8cdf 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/Compilation.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/Compilation.cpp
@@ -23,10 +23,11 @@ using namespace clang;
 using namespace llvm::opt;
 
 Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
-                         InputArgList *_Args, DerivedArgList *_TranslatedArgs)
+                         InputArgList *_Args, DerivedArgList *_TranslatedArgs,
+                         bool ContainsError)
     : TheDriver(D), DefaultToolChain(_DefaultToolChain), ActiveOffloadMask(0u),
       Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
-      ForDiagnostics(false) {
+      ForDiagnostics(false), ContainsError(ContainsError) {
   // The offloading host toolchain is the default tool chain.
   OrderedOffloadingToolchains.insert(
       std::make_pair(Action::OFK_Host, &DefaultToolChain));
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/Driver.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/Driver.cpp
index f36deff5d7342..ba4d0e836b44e 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/Driver.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/Driver.cpp
@@ -11,6 +11,7 @@
 #include "InputInfo.h"
 #include "ToolChains/AMDGPU.h"
 #include "ToolChains/AVR.h"
+#include "ToolChains/Ananas.h"
 #include "ToolChains/Bitrig.h"
 #include "ToolChains/Clang.h"
 #include "ToolChains/CloudABI.h"
@@ -22,6 +23,7 @@
 #include "ToolChains/FreeBSD.h"
 #include "ToolChains/Fuchsia.h"
 #include "ToolChains/Gnu.h"
+#include "ToolChains/BareMetal.h"
 #include "ToolChains/Haiku.h"
 #include "ToolChains/Hexagon.h"
 #include "ToolChains/Lanai.h"
@@ -150,8 +152,10 @@ void Driver::setDriverModeFromOption(StringRef Opt) {
     Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
 }
 
-InputArgList Driver::ParseArgStrings(ArrayRef ArgStrings) {
+InputArgList Driver::ParseArgStrings(ArrayRef ArgStrings,
+                                     bool &ContainsError) {
   llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
+  ContainsError = false;
 
   unsigned IncludedFlagsBitmask;
   unsigned ExcludedFlagsBitmask;
@@ -164,27 +168,41 @@ InputArgList Driver::ParseArgStrings(ArrayRef ArgStrings) {
                           IncludedFlagsBitmask, ExcludedFlagsBitmask);
 
   // Check for missing argument error.
-  if (MissingArgCount)
-    Diag(clang::diag::err_drv_missing_argument)
+  if (MissingArgCount) {
+    Diag(diag::err_drv_missing_argument)
         << Args.getArgString(MissingArgIndex) << MissingArgCount;
+    ContainsError |=
+        Diags.getDiagnosticLevel(diag::err_drv_missing_argument,
+                                 SourceLocation()) > DiagnosticsEngine::Warning;
+  }
 
   // Check for unsupported options.
   for (const Arg *A : Args) {
     if (A->getOption().hasFlag(options::Unsupported)) {
-      Diag(clang::diag::err_drv_unsupported_opt) << A->getAsString(Args);
+      Diag(diag::err_drv_unsupported_opt) << A->getAsString(Args);
+      ContainsError |= Diags.getDiagnosticLevel(diag::err_drv_unsupported_opt,
+                                                SourceLocation()) >
+                       DiagnosticsEngine::Warning;
       continue;
     }
 
     // Warn about -mcpu= without an argument.
     if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
-      Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
+      Diag(diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
+      ContainsError |= Diags.getDiagnosticLevel(
+                           diag::warn_drv_empty_joined_argument,
+                           SourceLocation()) > DiagnosticsEngine::Warning;
     }
   }
 
-  for (const Arg *A : Args.filtered(options::OPT_UNKNOWN))
-    Diags.Report(IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl :
-                              diag::err_drv_unknown_argument)
-      << A->getAsString(Args);
+  for (const Arg *A : Args.filtered(options::OPT_UNKNOWN)) {
+    auto ID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
+                         : diag::err_drv_unknown_argument;
+
+    Diags.Report(ID) << A->getAsString(Args);
+    ContainsError |= Diags.getDiagnosticLevel(ID, SourceLocation()) >
+                     DiagnosticsEngine::Warning;
+  }
 
   return Args;
 }
@@ -553,8 +571,22 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
           if (TT.getArch() == llvm::Triple::UnknownArch)
             Diag(clang::diag::err_drv_invalid_omp_target) << Val;
           else {
-            const ToolChain &TC = getToolChain(C.getInputArgs(), TT);
-            C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
+            const ToolChain *TC;
+            // CUDA toolchains have to be selected differently. They pair host
+            // and device in their implementation.
+            if (TT.isNVPTX()) {
+              const ToolChain *HostTC =
+                  C.getSingleOffloadToolChain();
+              assert(HostTC && "Host toolchain should be always defined.");
+              auto &CudaTC =
+                  ToolChains[TT.str() + "/" + HostTC->getTriple().str()];
+              if (!CudaTC)
+                CudaTC = llvm::make_unique(
+                    *this, TT, *HostTC, C.getInputArgs());
+              TC = CudaTC.get();
+            } else
+              TC = &getToolChain(C.getInputArgs(), TT);
+            C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP);
           }
         }
       } else
@@ -597,7 +629,8 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) {
   // FIXME: This stuff needs to go into the Compilation, not the driver.
   bool CCCPrintPhases;
 
-  InputArgList Args = ParseArgStrings(ArgList.slice(1));
+  bool ContainsError;
+  InputArgList Args = ParseArgStrings(ArgList.slice(1), ContainsError);
 
   // Silence driver warnings if requested
   Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
@@ -687,7 +720,8 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) {
       *UArgs, computeTargetTriple(*this, DefaultTargetTriple, *UArgs));
 
   // The compilation takes ownership of Args.
-  Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
+  Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs,
+                                   ContainsError);
 
   if (!HandleImmediateArgs(*C))
     return C;
@@ -1216,6 +1250,54 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
     return false;
   }
 
+  if (Arg *A = C.getArgs().getLastArg(options::OPT_autocomplete)) {
+    // Print out all options that start with a given argument. This is used for
+    // shell autocompletion.
+    StringRef PassedFlags = A->getValue();
+    std::vector SuggestedCompletions;
+
+    unsigned short DisableFlags = options::NoDriverOption | options::Unsupported | options::Ignored;
+    // We want to show cc1-only options only when clang is invoked as "clang -cc1".
+    // When clang is invoked as "clang -cc1", we add "#" to the beginning of an --autocomplete
+    // option so that the clang driver can distinguish whether it is requested to show cc1-only options or not.
+    if (PassedFlags[0] == '#') {
+      DisableFlags &= ~options::NoDriverOption;
+      PassedFlags = PassedFlags.substr(1);
+    }
+
+    if (PassedFlags.find(',') == StringRef::npos) {
+      // If the flag is in the form of "--autocomplete=-foo",
+      // we were requested to print out all option names that start with "-foo".
+      // For example, "--autocomplete=-fsyn" is expanded to "-fsyntax-only".
+      SuggestedCompletions = Opts->findByPrefix(PassedFlags, DisableFlags);
+
+      // We have to query the -W flags manually as they're not in the OptTable.
+      // TODO: Find a good way to add them to OptTable instead and them remove
+      // this code.
+      for (StringRef S : DiagnosticIDs::getDiagnosticFlags())
+        if (S.startswith(PassedFlags))
+          SuggestedCompletions.push_back(S);
+    } else {
+      // If the flag is in the form of "--autocomplete=foo,bar", we were
+      // requested to print out all option values for "-foo" that start with
+      // "bar". For example,
+      // "--autocomplete=-stdlib=,l" is expanded to "libc++" and "libstdc++".
+      StringRef Option, Arg;
+      std::tie(Option, Arg) = PassedFlags.split(',');
+      SuggestedCompletions = Opts->suggestValueCompletions(Option, Arg);
+    }
+
+    // Sort the autocomplete candidates so that shells print them out in a
+    // deterministic order. We could sort in any way, but we chose
+    // case-insensitive sorting for consistency with the -help option
+    // which prints out options in the case-insensitive alphabetical order.
+    std::sort(SuggestedCompletions.begin(), SuggestedCompletions.end(),
+              [](StringRef A, StringRef B) { return A.compare_lower(B) < 0; });
+
+    llvm::outs() << llvm::join(SuggestedCompletions, "\n") << '\n';
+    return false;
+  }
+
   if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
     ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
     switch (RLT) {
@@ -2657,6 +2739,8 @@ Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
       OutputTy = Input->getType();
       if (!Args.hasFlag(options::OPT_frewrite_includes,
                         options::OPT_fno_rewrite_includes, false) &&
+          !Args.hasFlag(options::OPT_frewrite_imports,
+                        options::OPT_fno_rewrite_imports, false) &&
           !CCGenDiagnostics)
         OutputTy = types::getPreprocessedType(OutputTy);
       assert(OutputTy != types::TY_INVALID &&
@@ -3707,6 +3791,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
     case llvm::Triple::Haiku:
       TC = llvm::make_unique(*this, Target, Args);
       break;
+    case llvm::Triple::Ananas:
+      TC = llvm::make_unique(*this, Target, Args);
+      break;
     case llvm::Triple::CloudABI:
       TC = llvm::make_unique(*this, Target, Args);
       break;
@@ -3819,6 +3906,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
         if (Target.getVendor() == llvm::Triple::Myriad)
           TC = llvm::make_unique(*this, Target,
                                                               Args);
+        else if (toolchains::BareMetal::handlesTarget(Target))
+          TC = llvm::make_unique(*this, Target, Args);
         else if (Target.isOSBinFormatELF())
           TC = llvm::make_unique(*this, Target, Args);
         else if (Target.isOSBinFormatMachO())
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/DriverOptions.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/DriverOptions.cpp
index 6a7410901d254..ac63b96cf96d1 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/DriverOptions.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/DriverOptions.cpp
@@ -21,10 +21,10 @@ using namespace llvm::opt;
 #undef PREFIX
 
 static const OptTable::Info InfoTable[] = {
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
-               HELPTEXT, METAVAR)   \
-  { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, Option::KIND##Class, PARAM, \
-    FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS },
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  {PREFIX, NAME,  HELPTEXT,    METAVAR,     OPT_##ID,  Option::KIND##Class,    \
+   PARAM,  FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS, VALUES},
 #include "clang/Driver/Options.inc"
 #undef OPTION
 };
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/SanitizerArgs.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/SanitizerArgs.cpp
index 9ab2e176845cf..7a442c83e1588 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/SanitizerArgs.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/SanitizerArgs.cpp
@@ -31,8 +31,8 @@ enum : SanitizerMask {
   NotAllowedWithTrap = Vptr,
   RequiresPIE = DataFlow,
   NeedsUnwindTables = Address | Thread | Memory | DataFlow,
-  SupportsCoverage =
-      Address | Memory | Leak | Undefined | Integer | Nullability | DataFlow,
+  SupportsCoverage = Address | KernelAddress | Memory | Leak | Undefined |
+                     Integer | Nullability | DataFlow | Fuzzer,
   RecoverableByDefault = Undefined | Integer | Nullability,
   Unrecoverable = Unreachable | Return,
   LegacyFsanitizeRecoverMask = Undefined | Integer,
@@ -48,13 +48,14 @@ enum CoverageFeature {
   CoverageBB = 1 << 1,
   CoverageEdge = 1 << 2,
   CoverageIndirCall = 1 << 3,
-  CoverageTraceBB = 1 << 4,
+  CoverageTraceBB = 1 << 4,  // Deprecated.
   CoverageTraceCmp = 1 << 5,
   CoverageTraceDiv = 1 << 6,
   CoverageTraceGep = 1 << 7,
-  Coverage8bitCounters = 1 << 8,
+  Coverage8bitCounters = 1 << 8,  // Deprecated.
   CoverageTracePC = 1 << 9,
   CoverageTracePCGuard = 1 << 10,
+  CoverageInline8bitCounters = 1 << 12,
   CoverageNoPrune = 1 << 11,
 };
 
@@ -207,12 +208,28 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   SanitizerMask TrappingKinds = parseSanitizeTrapArgs(D, Args);
   SanitizerMask InvalidTrappingKinds = TrappingKinds & NotAllowedWithTrap;
 
+  // The object size sanitizer should not be enabled at -O0.
+  Arg *OptLevel = Args.getLastArg(options::OPT_O_Group);
+  bool RemoveObjectSizeAtO0 =
+      !OptLevel || OptLevel->getOption().matches(options::OPT_O0);
+
   for (ArgList::const_reverse_iterator I = Args.rbegin(), E = Args.rend();
        I != E; ++I) {
     const auto *Arg = *I;
     if (Arg->getOption().matches(options::OPT_fsanitize_EQ)) {
       Arg->claim();
-      SanitizerMask Add = parseArgValues(D, Arg, true);
+      SanitizerMask Add = parseArgValues(D, Arg, /*AllowGroups=*/true);
+
+      if (RemoveObjectSizeAtO0) {
+        AllRemove |= SanitizerKind::ObjectSize;
+
+        // The user explicitly enabled the object size sanitizer. Warn that
+        // that this does nothing at -O0.
+        if (Add & SanitizerKind::ObjectSize)
+          D.Diag(diag::warn_drv_object_size_disabled_O0)
+              << Arg->getAsString(Args);
+      }
+
       AllAddedKinds |= expandSanitizerGroups(Add);
 
       // Avoid diagnosing any sanitizer which is disabled later.
@@ -530,7 +547,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   }
 
   // trace-pc w/o func/bb/edge implies edge.
-  if ((CoverageFeatures & (CoverageTracePC | CoverageTracePCGuard)) &&
+  if ((CoverageFeatures &
+       (CoverageTracePC | CoverageTracePCGuard | CoverageInline8bitCounters)) &&
       !(CoverageFeatures & InsertionPointTypes))
     CoverageFeatures |= CoverageEdge;
 
@@ -637,6 +655,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
     std::make_pair(Coverage8bitCounters, "-fsanitize-coverage-8bit-counters"),
     std::make_pair(CoverageTracePC, "-fsanitize-coverage-trace-pc"),
     std::make_pair(CoverageTracePCGuard, "-fsanitize-coverage-trace-pc-guard"),
+    std::make_pair(CoverageInline8bitCounters, "-fsanitize-coverage-inline-8bit-counters"),
     std::make_pair(CoverageNoPrune, "-fsanitize-coverage-no-prune")};
   for (auto F : CoverageFlags) {
     if (CoverageFeatures & F.first)
@@ -798,6 +817,7 @@ int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A) {
         .Case("trace-pc", CoverageTracePC)
         .Case("trace-pc-guard", CoverageTracePCGuard)
         .Case("no-prune", CoverageNoPrune)
+        .Case("inline-8bit-counters", CoverageInline8bitCounters)
         .Default(0);
     if (F == 0)
       D.Diag(clang::diag::err_drv_unsupported_option_argument)
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChain.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChain.cpp
index 4f82503276f4b..9a858df8ab2dd 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChain.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChain.cpp
@@ -217,7 +217,7 @@ StringRef ToolChain::getDefaultUniversalArchName() const {
   }
 }
 
-bool ToolChain::IsUnwindTablesDefault() const {
+bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
   return false;
 }
 
@@ -544,9 +544,9 @@ void ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   // Each toolchain should provide the appropriate include flags.
 }
 
-void ToolChain::addClangTargetOptions(const ArgList &DriverArgs,
-                                      ArgStringList &CC1Args) const {
-}
+void ToolChain::addClangTargetOptions(
+    const ArgList &DriverArgs, ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadKind) const {}
 
 void ToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {}
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Ananas.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Ananas.cpp
new file mode 100644
index 0000000000000..a67e1d2378f5d
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Ananas.cpp
@@ -0,0 +1,120 @@
+//===--- Ananas.cpp - Ananas ToolChain Implementations ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Ananas.h"
+#include "InputInfo.h"
+#include "CommonArgs.h"
+#include "clang/Config/config.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/Options.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/Path.h"
+
+using namespace clang::driver;
+using namespace clang::driver::tools;
+using namespace clang::driver::toolchains;
+using namespace clang;
+using namespace llvm::opt;
+
+void ananas::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                     const InputInfo &Output,
+                                     const InputInfoList &Inputs,
+                                     const ArgList &Args,
+                                     const char *LinkingOutput) const {
+  claimNoWarnArgs(Args);
+  ArgStringList CmdArgs;
+
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
+
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  for (const auto &II : Inputs)
+    CmdArgs.push_back(II.getFilename());
+
+  const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
+  C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs));
+}
+
+void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args,
+                                  const char *LinkingOutput) const {
+  const ToolChain &ToolChain = getToolChain();
+  const Driver &D = ToolChain.getDriver();
+  ArgStringList CmdArgs;
+
+  // Silence warning for "clang -g foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_g_Group);
+  // and "clang -emit-llvm foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_emit_llvm);
+  // and for "clang -w foo.o -o foo". Other warning options are already
+  // handled somewhere else.
+  Args.ClaimAllArgs(options::OPT_w);
+
+  if (!D.SysRoot.empty())
+    CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+
+  // Ananas only supports static linkage for now.
+  CmdArgs.push_back("-Bstatic");
+
+  if (Output.isFilename()) {
+    CmdArgs.push_back("-o");
+    CmdArgs.push_back(Output.getFilename());
+  } else {
+    assert(Output.isNothing() && "Invalid output.");
+  }
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
+  }
+
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
+  ToolChain.AddFilePathLibArgs(Args, CmdArgs);
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_T_Group, options::OPT_e, options::OPT_s,
+                   options::OPT_t, options::OPT_Z_Flag, options::OPT_r});
+
+  if (D.isUsingLTO())
+    AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D);
+
+  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+    if (D.CCCIsCXX())
+      ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
+    CmdArgs.push_back("-lc");
+  }
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
+  }
+
+  const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
+  C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs));
+}
+
+// Ananas - Ananas tool chain which can call as(1) and ld(1) directly.
+
+Ananas::Ananas(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
+  getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
+}
+
+Tool *Ananas::buildAssembler() const {
+  return new tools::ananas::Assembler(*this);
+}
+
+Tool *Ananas::buildLinker() const { return new tools::ananas::Linker(*this); }
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Ananas.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Ananas.h
new file mode 100644
index 0000000000000..2563dd2d49a9f
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Ananas.h
@@ -0,0 +1,67 @@
+//===--- Ananas.h - Ananas ToolChain Implementations --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ANANAS_H
+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ANANAS_H
+
+#include "Gnu.h"
+#include "clang/Driver/Tool.h"
+#include "clang/Driver/ToolChain.h"
+
+namespace clang {
+namespace driver {
+namespace tools {
+
+/// ananas -- Directly call GNU Binutils assembler and linker
+namespace ananas {
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("ananas::Assembler", "assembler", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("ananas::Linker", "linker", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace ananas
+} // end namespace tools
+
+namespace toolchains {
+
+class LLVM_LIBRARY_VISIBILITY Ananas : public Generic_ELF {
+public:
+  Ananas(const Driver &D, const llvm::Triple &Triple,
+         const llvm::opt::ArgList &Args);
+
+protected:
+  Tool *buildAssembler() const override;
+  Tool *buildLinker() const override;
+};
+
+} // end namespace toolchains
+} // end namespace driver
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ANANAS_H
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index 4eac976201145..95b86f784f914 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -179,6 +179,18 @@ arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) {
       ABI = FloatABI::Hard;
       break;
 
+    case llvm::Triple::NetBSD:
+      switch (Triple.getEnvironment()) {
+      case llvm::Triple::EABIHF:
+      case llvm::Triple::GNUEABIHF:
+        ABI = FloatABI::Hard;
+        break;
+      default:
+        ABI = FloatABI::Soft;
+        break;
+      }
+      break;
+
     case llvm::Triple::FreeBSD:
       switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
@@ -392,9 +404,7 @@ void arm::getARMTargetFeatures(const ToolChain &TC,
           if (B->getOption().matches(options::OPT_mlong_calls))
             D.Diag(diag::err_opt_not_valid_with_opt) << A->getAsString(Args) << B->getAsString(Args);
         }
-
-        CmdArgs.push_back("-backend-option");
-        CmdArgs.push_back("-arm-execute-only");
+	Features.push_back("+execute-only");
       }
     }
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp
index f33542477fb5d..b45dcd6db6780 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -227,12 +227,11 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
          O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie));
   }
 
-  if (IsN64 && NonPIC) {
+  if (IsN64 && NonPIC)
     Features.push_back("+noabicalls");
-  } else {
+  else
     AddTargetFeature(Args, Features, options::OPT_mno_abicalls,
                      options::OPT_mabicalls, "noabicalls");
-  }
 
   mips::FloatABI FloatABI = mips::getMipsFloatABI(D, Args);
   if (FloatABI == mips::FloatABI::Soft) {
@@ -298,6 +297,11 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
 
   AddTargetFeature(Args, Features, options::OPT_mno_odd_spreg,
                    options::OPT_modd_spreg, "nooddspreg");
+  AddTargetFeature(Args, Features, options::OPT_mno_madd4, options::OPT_mmadd4,
+                   "nomadd4");
+  AddTargetFeature(Args, Features, options::OPT_mlong_calls,
+                   options::OPT_mno_long_calls, "long-calls");
+  AddTargetFeature(Args, Features, options::OPT_mmt, options::OPT_mno_mt,"mt");
 }
 
 mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/BareMetal.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/BareMetal.cpp
new file mode 100644
index 0000000000000..5dc6dfad927b6
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -0,0 +1,211 @@
+//===--- BaremMetal.cpp - Bare Metal ToolChain ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BareMetal.h"
+
+#include "CommonArgs.h"
+#include "InputInfo.h"
+#include "Gnu.h"
+
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Driver/Options.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm::opt;
+using namespace clang;
+using namespace clang::driver;
+using namespace clang::driver::tools;
+using namespace clang::driver::toolchains;
+
+BareMetal::BareMetal(const Driver &D, const llvm::Triple &Triple,
+                           const ArgList &Args)
+    : ToolChain(D, Triple, Args) {
+  getProgramPaths().push_back(getDriver().getInstalledDir());
+  if (getDriver().getInstalledDir() != getDriver().Dir)
+    getProgramPaths().push_back(getDriver().Dir);
+}
+
+BareMetal::~BareMetal() {}
+
+/// Is the triple {arm,thumb}-none-none-{eabi,eabihf} ?
+static bool isARMBareMetal(const llvm::Triple &Triple) {
+  if (Triple.getArch() != llvm::Triple::arm &&
+      Triple.getArch() != llvm::Triple::thumb)
+    return false;
+
+  if (Triple.getVendor() != llvm::Triple::UnknownVendor)
+    return false;
+
+  if (Triple.getOS() != llvm::Triple::UnknownOS)
+    return false;
+
+  if (Triple.getEnvironment() != llvm::Triple::EABI &&
+      Triple.getEnvironment() != llvm::Triple::EABIHF)
+    return false;
+
+  return true;
+}
+
+bool BareMetal::handlesTarget(const llvm::Triple &Triple) {
+  return isARMBareMetal(Triple);
+}
+
+Tool *BareMetal::buildLinker() const {
+  return new tools::baremetal::Linker(*this);
+}
+
+std::string BareMetal::getThreadModel() const {
+  return "single";
+}
+
+bool BareMetal::isThreadModelSupported(const StringRef Model) const {
+  return Model == "single";
+}
+
+std::string BareMetal::getRuntimesDir() const {
+  SmallString<128> Dir(getDriver().ResourceDir);
+  llvm::sys::path::append(Dir, "lib", "baremetal");
+  return Dir.str();
+}
+
+void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                          ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdinc))
+    return;
+
+  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+    SmallString<128> Dir(getDriver().ResourceDir);
+    llvm::sys::path::append(Dir, "include");
+    addSystemInclude(DriverArgs, CC1Args, Dir.str());
+  }
+
+  if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) {
+    SmallString<128> Dir(getDriver().SysRoot);
+    llvm::sys::path::append(Dir, "include");
+    addSystemInclude(DriverArgs, CC1Args, Dir.str());
+  }
+}
+
+void BareMetal::addClangTargetOptions(const ArgList &DriverArgs,
+                                      ArgStringList &CC1Args,
+                                      Action::OffloadKind) const {
+  CC1Args.push_back("-nostdsysteminc");
+}
+
+std::string BareMetal::findLibCxxIncludePath(CXXStdlibType LibType) const {
+  StringRef SysRoot = getDriver().SysRoot;
+  if (SysRoot.empty())
+    return "";
+
+  switch (LibType) {
+  case ToolChain::CST_Libcxx: {
+    SmallString<128> Dir(SysRoot);
+    llvm::sys::path::append(Dir, "include", "c++", "v1");
+    return Dir.str();
+  }
+  case ToolChain::CST_Libstdcxx: {
+    SmallString<128> Dir(SysRoot);
+    llvm::sys::path::append(Dir, "include", "c++");
+    std::error_code EC;
+    Generic_GCC::GCCVersion Version = {"", -1, -1, -1, "", "", ""};
+    // Walk the subdirs, and find the one with the newest gcc version:
+    for (vfs::directory_iterator LI =
+           getDriver().getVFS().dir_begin(Dir.str(), EC), LE;
+         !EC && LI != LE; LI = LI.increment(EC)) {
+      StringRef VersionText = llvm::sys::path::filename(LI->getName());
+      auto CandidateVersion = Generic_GCC::GCCVersion::Parse(VersionText);
+      if (CandidateVersion.Major == -1)
+        continue;
+      if (CandidateVersion <= Version)
+        continue;
+      Version = CandidateVersion;
+    }
+    if (Version.Major == -1)
+      return "";
+    llvm::sys::path::append(Dir, Version.Text);
+    return Dir.str();
+  }
+  }
+  llvm_unreachable("unhandled LibType");
+}
+
+void BareMetal::AddClangCXXStdlibIncludeArgs(
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdinc) ||
+      DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+      DriverArgs.hasArg(options::OPT_nostdincxx))
+    return;
+
+  std::string Path = findLibCxxIncludePath(GetCXXStdlibType(DriverArgs));
+  if (!Path.empty())
+    addSystemInclude(DriverArgs, CC1Args, Path);
+}
+
+void BareMetal::AddCXXStdlibLibArgs(const ArgList &Args,
+                                    ArgStringList &CmdArgs) const {
+  switch (GetCXXStdlibType(Args)) {
+  case ToolChain::CST_Libcxx:
+    CmdArgs.push_back("-lc++");
+    CmdArgs.push_back("-lc++abi");
+    break;
+  case ToolChain::CST_Libstdcxx:
+    CmdArgs.push_back("-lstdc++");
+    CmdArgs.push_back("-lsupc++");
+    break;
+  }
+  CmdArgs.push_back("-lunwind");
+}
+
+void BareMetal::AddLinkRuntimeLib(const ArgList &Args,
+                                  ArgStringList &CmdArgs) const {
+  CmdArgs.push_back(Args.MakeArgString("-lclang_rt.builtins-" +
+                                       getTriple().getArchName() + ".a"));
+}
+
+void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                     const InputInfo &Output,
+                                     const InputInfoList &Inputs,
+                                     const ArgList &Args,
+                                     const char *LinkingOutput) const {
+  ArgStringList CmdArgs;
+
+  auto &TC = static_cast(getToolChain());
+
+  AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA);
+
+  CmdArgs.push_back("-Bstatic");
+
+  CmdArgs.push_back(Args.MakeArgString("-L" + TC.getRuntimesDir()));
+
+  Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
+                            options::OPT_e, options::OPT_s, options::OPT_t,
+                            options::OPT_Z_Flag, options::OPT_r});
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+    if (C.getDriver().CCCIsCXX())
+      TC.AddCXXStdlibLibArgs(Args, CmdArgs);
+
+    CmdArgs.push_back("-lc");
+    CmdArgs.push_back("-lm");
+
+    TC.AddLinkRuntimeLib(Args, CmdArgs);
+  }
+
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  C.addCommand(llvm::make_unique(JA, *this,
+                                          Args.MakeArgString(TC.GetLinkerPath()),
+                                          CmdArgs, Inputs));
+}
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/BareMetal.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/BareMetal.h
new file mode 100644
index 0000000000000..4b74899fa53e7
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/BareMetal.h
@@ -0,0 +1,91 @@
+//===--- BareMetal.h - Bare Metal Tool and ToolChain -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H
+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H
+
+#include "clang/Driver/Tool.h"
+#include "clang/Driver/ToolChain.h"
+
+#include 
+
+namespace clang {
+namespace driver {
+
+namespace toolchains {
+
+class LLVM_LIBRARY_VISIBILITY BareMetal : public ToolChain {
+public:
+  BareMetal(const Driver &D, const llvm::Triple &Triple,
+            const llvm::opt::ArgList &Args);
+  ~BareMetal() override;
+
+  static bool handlesTarget(const llvm::Triple &Triple);
+protected:
+  Tool *buildLinker() const override;
+
+public:
+  bool useIntegratedAs() const override { return true; }
+  bool isCrossCompiling() const override { return true; }
+  bool isPICDefault() const override { return false; }
+  bool isPIEDefault() const override { return false; }
+  bool isPICDefaultForced() const override { return false; }
+  bool SupportsProfiling() const override { return false; }
+  bool SupportsObjCGC() const override { return false; }
+  std::string getThreadModel() const override;
+  bool isThreadModelSupported(const StringRef Model) const override;
+
+  RuntimeLibType GetDefaultRuntimeLibType() const override {
+    return ToolChain::RLT_CompilerRT;
+  }
+  CXXStdlibType GetDefaultCXXStdlibType() const override {
+    return ToolChain::CST_Libcxx;
+  }
+
+  const char *getDefaultLinker() const override { return "ld.lld"; }
+
+  std::string getRuntimesDir() const;
+  void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                                 llvm::opt::ArgStringList &CC1Args) const override;
+  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
+  std::string findLibCxxIncludePath(ToolChain::CXXStdlibType LibType) const;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
+  void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
+  void AddLinkRuntimeLib(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const;
+};
+
+} // namespace toolchains
+
+namespace tools {
+namespace baremetal {
+
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+public:
+  Linker(const ToolChain &TC) : Tool("baremetal::Linker", "ld.lld", TC) {}
+  bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+} // namespace baremetal
+} // namespace tools
+
+} // namespace driver
+} // namespace clang
+
+#endif
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Clang.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Clang.cpp
index 555847aeeb23d..6a6b90f868250 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Clang.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/YAMLParser.h"
 
 #ifdef LLVM_ON_UNIX
@@ -129,6 +130,13 @@ forAllAssociatedToolChains(Compilation &C, const JobAction &JA,
   else if (JA.isDeviceOffloading(Action::OFK_Cuda))
     Work(*C.getSingleOffloadToolChain());
 
+  if (JA.isHostOffloading(Action::OFK_OpenMP)) {
+    auto TCs = C.getOffloadToolChains();
+    for (auto II = TCs.first, IE = TCs.second; II != IE; ++II)
+      Work(*II->second);
+  } else if (JA.isDeviceOffloading(Action::OFK_OpenMP))
+    Work(*C.getSingleOffloadToolChain());
+
   //
   // TODO: Add support for other offloading programming models here.
   //
@@ -781,15 +789,14 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
     CmdArgs.push_back("-femit-coverage-data");
 
   if (Args.hasFlag(options::OPT_fcoverage_mapping,
-                   options::OPT_fno_coverage_mapping, false) &&
-      !ProfileGenerateArg)
-    D.Diag(clang::diag::err_drv_argument_only_allowed_with)
-        << "-fcoverage-mapping"
-        << "-fprofile-instr-generate";
+                   options::OPT_fno_coverage_mapping, false)) {
+    if (!ProfileGenerateArg)
+      D.Diag(clang::diag::err_drv_argument_only_allowed_with)
+          << "-fcoverage-mapping"
+          << "-fprofile-instr-generate";
 
-  if (Args.hasFlag(options::OPT_fcoverage_mapping,
-                   options::OPT_fno_coverage_mapping, false))
     CmdArgs.push_back("-fcoverage-mapping");
+  }
 
   if (C.getArgs().hasArg(options::OPT_c) ||
       C.getArgs().hasArg(options::OPT_S)) {
@@ -910,6 +917,37 @@ static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
   }
 }
 
+static void RenderDebugInfoCompressionArgs(const ArgList &Args,
+                                           ArgStringList &CmdArgs,
+                                           const Driver &D) {
+  const Arg *A = Args.getLastArg(options::OPT_gz, options::OPT_gz_EQ);
+  if (!A)
+    return;
+
+  if (A->getOption().getID() == options::OPT_gz) {
+    if (llvm::zlib::isAvailable())
+      CmdArgs.push_back("-compress-debug-sections");
+    else
+      D.Diag(diag::warn_debug_compression_unavailable);
+    return;
+  }
+
+  StringRef Value = A->getValue();
+  if (Value == "none") {
+    CmdArgs.push_back("-compress-debug-sections=none");
+  } else if (Value == "zlib" || Value == "zlib-gnu") {
+    if (llvm::zlib::isAvailable()) {
+      CmdArgs.push_back(
+          Args.MakeArgString("-compress-debug-sections=" + Twine(Value)));
+    } else {
+      D.Diag(diag::warn_debug_compression_unavailable);
+    }
+  } else {
+    D.Diag(diag::err_drv_unsupported_option_argument)
+        << A->getOption().getName() << Value;
+  }
+}
+
 static const char *RelocationModelName(llvm::Reloc::Model Model) {
   switch (Model) {
   case llvm::Reloc::Static:
@@ -980,6 +1018,9 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
         DepTarget = Args.MakeArgString(llvm::sys::path::filename(P));
       }
 
+      if (!A->getOption().matches(options::OPT_MD) && !A->getOption().matches(options::OPT_MMD)) {
+        CmdArgs.push_back("-w");
+      }
       CmdArgs.push_back("-MT");
       SmallString<128> Quoted;
       QuoteTarget(DepTarget, Quoted);
@@ -1275,43 +1316,13 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
   // FIXME: Support -meabi.
   // FIXME: Parts of this are duplicated in the backend, unify this somehow.
   const char *ABIName = nullptr;
-  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
+  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
     ABIName = A->getValue();
-  } else if (Triple.isOSBinFormatMachO()) {
-    if (arm::useAAPCSForMachO(Triple)) {
-      ABIName = "aapcs";
-    } else if (Triple.isWatchABI()) {
-      ABIName = "aapcs16";
-    } else {
-      ABIName = "apcs-gnu";
-    }
-  } else if (Triple.isOSWindows()) {
-    // FIXME: this is invalid for WindowsCE
-    ABIName = "aapcs";
-  } else {
-    // Select the default based on the platform.
-    switch (Triple.getEnvironment()) {
-    case llvm::Triple::Android:
-    case llvm::Triple::GNUEABI:
-    case llvm::Triple::GNUEABIHF:
-    case llvm::Triple::MuslEABI:
-    case llvm::Triple::MuslEABIHF:
-      ABIName = "aapcs-linux";
-      break;
-    case llvm::Triple::EABIHF:
-    case llvm::Triple::EABI:
-      ABIName = "aapcs";
-      break;
-    default:
-      if (Triple.getOS() == llvm::Triple::NetBSD)
-        ABIName = "apcs-gnu";
-      else if (Triple.getOS() == llvm::Triple::OpenBSD)
-        ABIName = "aapcs-linux";
-      else
-        ABIName = "aapcs";
-      break;
-    }
+  else {
+    std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false);
+    ABIName = llvm::ARM::computeDefaultTargetABI(Triple, CPU).data();
   }
+
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName);
 
@@ -1744,10 +1755,6 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
   // arg after parsing the '-I' arg.
   bool TakeNextArg = false;
 
-  // When using an integrated assembler, translate -Wa, and -Xassembler
-  // options.
-  bool CompressDebugSections = false;
-
   bool UseRelaxRelocations = ENABLE_X86_RELAX_RELOCATIONS;
   const char *MipsTargetFeature = nullptr;
   for (const Arg *A :
@@ -1822,12 +1829,11 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
         CmdArgs.push_back("-massembler-fatal-warnings");
       } else if (Value == "--noexecstack") {
         CmdArgs.push_back("-mnoexecstack");
-      } else if (Value == "-compress-debug-sections" ||
-                 Value == "--compress-debug-sections") {
-        CompressDebugSections = true;
-      } else if (Value == "-nocompress-debug-sections" ||
+      } else if (Value.startswith("-compress-debug-sections") ||
+                 Value.startswith("--compress-debug-sections") ||
+                 Value == "-nocompress-debug-sections" ||
                  Value == "--nocompress-debug-sections") {
-        CompressDebugSections = false;
+        CmdArgs.push_back(Value.data());
       } else if (Value == "-mrelax-relocations=yes" ||
                  Value == "--mrelax-relocations=yes") {
         UseRelaxRelocations = true;
@@ -1880,12 +1886,6 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
       }
     }
   }
-  if (CompressDebugSections) {
-    if (llvm::zlib::isAvailable())
-      CmdArgs.push_back("-compress-debug-sections");
-    else
-      D.Diag(diag::warn_debug_compression_unavailable);
-  }
   if (UseRelaxRelocations)
     CmdArgs.push_back("--mrelax-relocations");
   if (MipsTargetFeature != nullptr) {
@@ -1970,6 +1970,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
   }
 
+  if (IsOpenMPDevice) {
+    // We have to pass the triple of the host if compiling for an OpenMP device.
+    std::string NormalizedTriple =
+        C.getSingleOffloadToolChain()
+            ->getTriple()
+            .normalize();
+    CmdArgs.push_back("-aux-triple");
+    CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
+  }
+
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
                                Triple.getArch() == llvm::Triple::thumb)) {
     unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
@@ -2060,10 +2070,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (D.isUsingLTO()) {
       Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ);
 
-      // The Darwin linker currently uses the legacy LTO API, which does not
-      // support LTO unit features (CFI, whole program vtable opt) under
-      // ThinLTO.
-      if (!getToolChain().getTriple().isOSDarwin() ||
+      // The Darwin and PS4 linkers currently use the legacy LTO API, which
+      // does not support LTO unit features (CFI, whole program vtable opt)
+      // under ThinLTO.
+      if (!(getToolChain().getTriple().isOSDarwin() ||
+            getToolChain().getTriple().isPS4()) ||
           D.getLTOMode() == LTOK_Full)
         CmdArgs.push_back("-flto-unit");
     }
@@ -2527,14 +2538,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool AsynchronousUnwindTables =
       Args.hasFlag(options::OPT_fasynchronous_unwind_tables,
                    options::OPT_fno_asynchronous_unwind_tables,
-                   (getToolChain().IsUnwindTablesDefault() ||
+                   (getToolChain().IsUnwindTablesDefault(Args) ||
                     getToolChain().getSanitizerArgs().needsUnwindTables()) &&
                        !KernelOrKext);
   if (Args.hasFlag(options::OPT_funwind_tables, options::OPT_fno_unwind_tables,
                    AsynchronousUnwindTables))
     CmdArgs.push_back("-munwind-tables");
 
-  getToolChain().addClangTargetOptions(Args, CmdArgs);
+  getToolChain().addClangTargetOptions(Args, CmdArgs,
+                                       JA.getOffloadingDeviceKind());
 
   if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
     CmdArgs.push_back("-mlimit-float-precision");
@@ -2821,6 +2833,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-generate-type-units");
   }
 
+  RenderDebugInfoCompressionArgs(Args, CmdArgs, D);
+
   bool UseSeparateSections = isUseSeparateSections(Triple);
 
   if (Args.hasFlag(options::OPT_ffunction_sections,
@@ -2841,6 +2855,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
 
+  if (auto *ABICompatArg = Args.getLastArg(options::OPT_fclang_abi_compat_EQ))
+    ABICompatArg->render(Args, CmdArgs);
+
   // Add runtime flag for PS4 when PGO or Coverage are enabled.
   if (getToolChain().getTriple().isPS4CPU())
     PS4cpu::addProfileRTArgs(getToolChain(), Args, CmdArgs);
@@ -2962,6 +2979,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     A->claim();
   }
 
+  for (const Arg *A :
+       Args.filtered(options::OPT_clang_ignored_legacy_options_Group)) {
+    D.Diag(diag::warn_ignored_clang_option) << A->getAsString(Args);
+    A->claim();
+  }
+
   claimNoWarnArgs(Args);
 
   Args.AddAllArgs(CmdArgs, options::OPT_R_Group);
@@ -3181,9 +3204,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
   Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
   Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names);
-  // Emulated TLS is enabled by default on Android, and can be enabled manually
-  // with -femulated-tls.
-  bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isWindowsCygwinEnvironment();
+  // Emulated TLS is enabled by default on Android and OpenBSD, and can be enabled
+  // manually with -femulated-tls.
+  bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isOSOpenBSD() ||
+                            Triple.isWindowsCygwinEnvironment();
   if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
                    EmulatedTLSDefault))
     CmdArgs.push_back("-femulated-tls");
@@ -3985,9 +4009,30 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                                           << value;
   }
 
+  bool CaretDefault = true;
+  bool ColumnDefault = true;
+  if (Arg *DiagArg = Args.getLastArg(options::OPT__SLASH_diagnostics_classic,
+                                     options::OPT__SLASH_diagnostics_column,
+                                     options::OPT__SLASH_diagnostics_caret)) {
+    switch (DiagArg->getOption().getID()) {
+    case options::OPT__SLASH_diagnostics_caret:
+      CaretDefault = true;
+      ColumnDefault = true;
+      break;
+    case options::OPT__SLASH_diagnostics_column:
+      CaretDefault = false;
+      ColumnDefault = true;
+      break;
+    case options::OPT__SLASH_diagnostics_classic:
+      CaretDefault = false;
+      ColumnDefault = false;
+      break;
+    }
+  }
+
   // -fcaret-diagnostics is default.
   if (!Args.hasFlag(options::OPT_fcaret_diagnostics,
-                    options::OPT_fno_caret_diagnostics, true))
+                    options::OPT_fno_caret_diagnostics, CaretDefault))
     CmdArgs.push_back("-fno-caret-diagnostics");
 
   // -fdiagnostics-fixit-info is default, only pass non-default.
@@ -4010,6 +4055,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                    options::OPT_fno_diagnostics_show_hotness, false))
     CmdArgs.push_back("-fdiagnostics-show-hotness");
 
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ)) {
+    std::string Opt = std::string("-fdiagnostics-hotness-threshold=") + A->getValue();
+    CmdArgs.push_back(Args.MakeArgString(Opt));
+  }
+
   if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
     CmdArgs.push_back("-fdiagnostics-format");
     CmdArgs.push_back(A->getValue());
@@ -4059,7 +4110,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fdiagnostics-absolute-paths");
 
   if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column,
-                    true))
+                    ColumnDefault))
     CmdArgs.push_back("-fno-show-column");
 
   if (!Args.hasFlag(options::OPT_fspell_checking,
@@ -4094,11 +4145,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                    options::OPT_fno_slp_vectorize, EnableSLPVec))
     CmdArgs.push_back("-vectorize-slp");
 
-  // -fno-slp-vectorize-aggressive is default.
-  if (Args.hasFlag(options::OPT_fslp_vectorize_aggressive,
-                   options::OPT_fno_slp_vectorize_aggressive, false))
-    CmdArgs.push_back("-vectorize-slp-aggressive");
-
   if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ))
     A->render(Args, CmdArgs);
 
@@ -4183,13 +4229,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 #endif
 
+  bool RewriteImports = Args.hasFlag(options::OPT_frewrite_imports,
+                                     options::OPT_fno_rewrite_imports, false);
+  if (RewriteImports)
+    CmdArgs.push_back("-frewrite-imports");
+
   // Enable rewrite includes if the user's asked for it or if we're generating
   // diagnostics.
   // TODO: Once -module-dependency-dir works with -frewrite-includes it'd be
   // nice to enable this when doing a crashdump for modules as well.
   if (Args.hasFlag(options::OPT_frewrite_includes,
                    options::OPT_fno_rewrite_includes, false) ||
-      (C.isForDiagnostics() && !HaveAnyModules))
+      (C.isForDiagnostics() && (RewriteImports || !HaveAnyModules)))
     CmdArgs.push_back("-frewrite-includes");
 
   // Only allow -traditional or -traditional-cpp outside in preprocessing modes.
@@ -4362,10 +4413,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // device declarations can be identified. Also, -fopenmp-is-device is passed
   // along to tell the frontend that it is generating code for a device, so that
   // only the relevant declarations are emitted.
-  if (IsOpenMPDevice && Inputs.size() == 2) {
+  if (IsOpenMPDevice) {
     CmdArgs.push_back("-fopenmp-is-device");
-    CmdArgs.push_back("-fopenmp-host-ir-file-path");
-    CmdArgs.push_back(Args.MakeArgString(Inputs.back().getFilename()));
+    if (Inputs.size() == 2) {
+      CmdArgs.push_back("-fopenmp-host-ir-file-path");
+      CmdArgs.push_back(Args.MakeArgString(Inputs.back().getFilename()));
+    }
   }
 
   // For all the host OpenMP offloading compile jobs we need to pass the targets
@@ -4781,14 +4834,36 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
       CmdArgs.push_back("-fms-memptr-rep=virtual");
   }
 
-  if (Args.getLastArg(options::OPT__SLASH_Gd))
-     CmdArgs.push_back("-fdefault-calling-conv=cdecl");
-  else if (Args.getLastArg(options::OPT__SLASH_Gr))
-     CmdArgs.push_back("-fdefault-calling-conv=fastcall");
-  else if (Args.getLastArg(options::OPT__SLASH_Gz))
-     CmdArgs.push_back("-fdefault-calling-conv=stdcall");
-  else if (Args.getLastArg(options::OPT__SLASH_Gv))
-     CmdArgs.push_back("-fdefault-calling-conv=vectorcall");
+  // Parse the default calling convention options.
+  if (Arg *CCArg =
+          Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr,
+                          options::OPT__SLASH_Gz, options::OPT__SLASH_Gv)) {
+    unsigned DCCOptId = CCArg->getOption().getID();
+    const char *DCCFlag = nullptr;
+    bool ArchSupported = true;
+    llvm::Triple::ArchType Arch = getToolChain().getArch();
+    switch (DCCOptId) {
+    case options::OPT__SLASH_Gd:
+      DCCFlag = "-fdefault-calling-conv=cdecl";
+      break;
+    case options::OPT__SLASH_Gr:
+      ArchSupported = Arch == llvm::Triple::x86;
+      DCCFlag = "-fdefault-calling-conv=fastcall";
+      break;
+    case options::OPT__SLASH_Gz:
+      ArchSupported = Arch == llvm::Triple::x86;
+      DCCFlag = "-fdefault-calling-conv=stdcall";
+      break;
+    case options::OPT__SLASH_Gv:
+      ArchSupported = Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64;
+      DCCFlag = "-fdefault-calling-conv=vectorcall";
+      break;
+    }
+
+    // MSVC doesn't warn if /Gr or /Gz is used on x64, so we don't either.
+    if (ArchSupported && DCCFlag)
+      CmdArgs.push_back(DCCFlag);
+  }
 
   if (Arg *A = Args.getLastArg(options::OPT_vtordisp_mode_EQ))
     A->render(Args, CmdArgs);
@@ -4876,6 +4951,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
 
   const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
   const std::string &TripleStr = Triple.getTriple();
+  const auto &D = getToolChain().getDriver();
 
   // Don't warn about "clang -w -c foo.s"
   Args.ClaimAllArgs(options::OPT_w);
@@ -4963,6 +5039,8 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   }
   RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
                           llvm::DebuggerKind::Default);
+  RenderDebugInfoCompressionArgs(Args, CmdArgs, D);
+
 
   // Handle -fPIC et al -- the relocation-model affects the assembler
   // for some targets.
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 5e360f62e21ab..00bd60bc24bba 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -215,6 +215,21 @@ static std::string getR600TargetGPU(const ArgList &Args) {
   return "";
 }
 
+static std::string getNios2TargetCPU(const ArgList &Args) {
+  Arg *A = Args.getLastArg(options::OPT_mcpu_EQ);
+  if (!A)
+    A = Args.getLastArg(options::OPT_march_EQ);
+
+  if (!A)
+    return "";
+
+  const char *name = A->getValue();
+  return llvm::StringSwitch(name)
+      .Case("r1", "nios2r1")
+      .Case("r2", "nios2r2")
+      .Default(name);
+}
+
 static std::string getLanaiTargetCPU(const ArgList &Args) {
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     return A->getValue();
@@ -267,6 +282,10 @@ std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
       return A->getValue();
     return "";
 
+  case llvm::Triple::nios2: {
+    return getNios2TargetCPU(Args);
+  }
+
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
@@ -505,6 +524,7 @@ void tools::linkSanitizerRuntimeDeps(const ToolChain &TC,
   CmdArgs.push_back("-lm");
   // There's no libdl on FreeBSD or RTEMS.
   if (TC.getTriple().getOS() != llvm::Triple::FreeBSD &&
+      TC.getTriple().getOS() != llvm::Triple::NetBSD &&
       TC.getTriple().getOS() != llvm::Triple::RTEMS)
     CmdArgs.push_back("-ldl");
 }
@@ -598,7 +618,8 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
                            NonWholeStaticRuntimes, HelperStaticRuntimes,
                            RequiredSymbols);
   // Inject libfuzzer dependencies.
-  if (TC.getSanitizerArgs().needsFuzzer()) {
+  if (TC.getSanitizerArgs().needsFuzzer()
+      && !Args.hasArg(options::OPT_shared)) {
     addLibFuzzerRuntime(TC, Args, CmdArgs);
   }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp
index d290c62a056ae..04b71c48cd4cd 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp
@@ -214,7 +214,7 @@ CrossWindowsToolChain::CrossWindowsToolChain(const Driver &D,
   }
 }
 
-bool CrossWindowsToolChain::IsUnwindTablesDefault() const {
+bool CrossWindowsToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
   // FIXME: all non-x86 targets need unwind tables, however, LLVM currently does
   // not know how to emit them.
   return getArch() == llvm::Triple::x86_64;
@@ -238,8 +238,15 @@ AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
   const Driver &D = getDriver();
   const std::string &SysRoot = D.SysRoot;
 
-  if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+  auto AddSystemAfterIncludes = [&]() {
+    for (const auto &P : DriverArgs.getAllArgValues(options::OPT_isystem_after))
+      addSystemInclude(DriverArgs, CC1Args, P);
+  };
+
+  if (DriverArgs.hasArg(options::OPT_nostdinc)) {
+    AddSystemAfterIncludes();
     return;
+  }
 
   addSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/local/include");
   if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
@@ -247,8 +254,7 @@ AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
     llvm::sys::path::append(ResourceDir, "include");
     addSystemInclude(DriverArgs, CC1Args, ResourceDir);
   }
-  for (const auto &P : DriverArgs.getAllArgValues(options::OPT_isystem_after))
-    addSystemInclude(DriverArgs, CC1Args, P);
+  AddSystemAfterIncludes();
   addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include");
 }
 
@@ -258,7 +264,7 @@ AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
   const llvm::Triple &Triple = getTriple();
   const std::string &SysRoot = getDriver().SysRoot;
 
-  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+  if (DriverArgs.hasArg(options::OPT_nostdinc) ||
       DriverArgs.hasArg(options::OPT_nostdincxx))
     return;
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.h
index 5375a6324a3f6..2f66446ec732c 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/CrossWindows.h
@@ -56,7 +56,7 @@ class LLVM_LIBRARY_VISIBILITY CrossWindowsToolChain : public Generic_GCC {
                         const llvm::opt::ArgList &Args);
 
   bool IsIntegratedAssemblerDefault() const override { return true; }
-  bool IsUnwindTablesDefault() const override;
+  bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
   bool isPICDefault() const override;
   bool isPIEDefault() const override;
   bool isPICDefaultForced() const override;
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.cpp
index 42bf164f1b3fe..935a5a37ada52 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -338,24 +338,31 @@ CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
 
 void CudaToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
-    llvm::opt::ArgStringList &CC1Args) const {
-  HostTC.addClangTargetOptions(DriverArgs, CC1Args);
+    llvm::opt::ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadingKind) const {
+  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
 
-  CC1Args.push_back("-fcuda-is-device");
+  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
+  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
+          DeviceOffloadingKind == Action::OFK_Cuda) &&
+         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
 
-  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
-                         options::OPT_fno_cuda_flush_denormals_to_zero, false))
-    CC1Args.push_back("-fcuda-flush-denormals-to-zero");
+  if (DeviceOffloadingKind == Action::OFK_Cuda) {
+    CC1Args.push_back("-fcuda-is-device");
 
-  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
-                         options::OPT_fno_cuda_approx_transcendentals, false))
-    CC1Args.push_back("-fcuda-approx-transcendentals");
+    if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+                           options::OPT_fno_cuda_flush_denormals_to_zero, false))
+      CC1Args.push_back("-fcuda-flush-denormals-to-zero");
 
-  if (DriverArgs.hasArg(options::OPT_nocudalib))
-    return;
+    if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
+                           options::OPT_fno_cuda_approx_transcendentals, false))
+      CC1Args.push_back("-fcuda-approx-transcendentals");
+
+    if (DriverArgs.hasArg(options::OPT_nocudalib))
+      return;
+  }
 
-  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
-  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
   std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
 
   if (LibDeviceFile.empty()) {
@@ -396,6 +403,24 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
 
   const OptTable &Opts = getDriver().getOpts();
 
+  // For OpenMP device offloading, append derived arguments. Make sure
+  // flags are not duplicated.
+  // TODO: Append the compute capability.
+  if (DeviceOffloadKind == Action::OFK_OpenMP) {
+    for (Arg *A : Args){
+      bool IsDuplicate = false;
+      for (Arg *DALArg : *DAL){
+        if (A == DALArg) {
+          IsDuplicate = true;
+          break;
+        }
+      }
+      if (!IsDuplicate)
+        DAL->append(A);
+    }
+    return DAL;
+  }
+
   for (Arg *A : Args) {
     if (A->getOption().matches(options::OPT_Xarch__)) {
       // Skip this argument unless the architecture matches BoundArch
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.h
index acdb4c4efd6d7..e66fc23d82f35 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Cuda.h
@@ -130,7 +130,8 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
   TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
                 Action::OffloadKind DeviceOffloadKind) const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
 
   // Never try to use the integrated assembler with CUDA; always fork out to
   // ptxas.
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.cpp
index e41b50c40b289..32103a6120d40 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1053,7 +1053,7 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
     AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan");
   if (Sanitize.needsTsanRt())
     AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan");
-  if (Sanitize.needsFuzzer())
+  if (Sanitize.needsFuzzer() && !Args.hasArg(options::OPT_dynamiclib))
     AddFuzzerLinkArgs(Args, CmdArgs);
   if (Sanitize.needsStatsRt()) {
     StringRef OS = isTargetMacOS() ? "osx" : "iossim";
@@ -1118,6 +1118,27 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
   }
 }
 
+/// Returns the most appropriate macOS target version for the current process.
+///
+/// If the macOS SDK version is the same or earlier than the system version,
+/// then the SDK version is returned. Otherwise the system version is returned.
+static std::string getSystemOrSDKMacOSVersion(StringRef MacOSSDKVersion) {
+  unsigned Major, Minor, Micro;
+  llvm::Triple SystemTriple(llvm::sys::getProcessTriple());
+  if (!SystemTriple.isMacOSX())
+    return MacOSSDKVersion;
+  SystemTriple.getMacOSXVersion(Major, Minor, Micro);
+  VersionTuple SystemVersion(Major, Minor, Micro);
+  bool HadExtra;
+  if (!Driver::GetReleaseVersion(MacOSSDKVersion, Major, Minor, Micro,
+                                 HadExtra))
+    return MacOSSDKVersion;
+  VersionTuple SDKVersion(Major, Minor, Micro);
+  if (SDKVersion > SystemVersion)
+    return SystemVersion.getAsString();
+  return MacOSSDKVersion;
+}
+
 void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
   const OptTable &Opts = getDriver().getOpts();
 
@@ -1150,6 +1171,16 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
       Args.getLastArg(options::OPT_mwatchos_version_min_EQ,
                       options::OPT_mwatchos_simulator_version_min_EQ);
 
+  unsigned Major, Minor, Micro;
+  bool HadExtra;
+
+  // The iOS deployment target that is explicitly specified via a command line
+  // option or an environment variable.
+  std::string ExplicitIOSDeploymentTargetStr;
+
+  if (iOSVersion)
+    ExplicitIOSDeploymentTargetStr = iOSVersion->getAsString(Args);
+
   // Add a macro to differentiate between m(iphone|tv|watch)os-version-min=X.Y and
   // -m(iphone|tv|watch)simulator-version-min=X.Y.
   if (Args.hasArg(options::OPT_mios_simulator_version_min_EQ) ||
@@ -1191,6 +1222,10 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
     if (char *env = ::getenv("WATCHOS_DEPLOYMENT_TARGET"))
       WatchOSTarget = env;
 
+    if (!iOSTarget.empty())
+      ExplicitIOSDeploymentTargetStr =
+          std::string("IPHONEOS_DEPLOYMENT_TARGET=") + iOSTarget;
+
     // If there is no command-line argument to specify the Target version and
     // no environment variable defined, see if we can set the default based
     // on -isysroot.
@@ -1210,7 +1245,7 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
                 SDK.startswith("iPhoneSimulator"))
               iOSTarget = Version;
             else if (SDK.startswith("MacOSX"))
-              OSXTarget = Version;
+              OSXTarget = getSystemOrSDKMacOSVersion(Version);
             else if (SDK.startswith("WatchOS") ||
                      SDK.startswith("WatchSimulator"))
               WatchOSTarget = Version;
@@ -1222,28 +1257,58 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
       }
     }
 
-    // If no OSX or iOS target has been specified, try to guess platform
-    // from arch name and compute the version from the triple.
+    // If no OS targets have been specified, try to guess platform from -target
+    // or arch name and compute the version from the triple.
     if (OSXTarget.empty() && iOSTarget.empty() && TvOSTarget.empty() &&
         WatchOSTarget.empty()) {
-      StringRef MachOArchName = getMachOArchName(Args);
-      unsigned Major, Minor, Micro;
-      if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
-          MachOArchName == "arm64") {
-        getTriple().getiOSVersion(Major, Minor, Micro);
-        llvm::raw_string_ostream(iOSTarget) << Major << '.' << Minor << '.'
-                                            << Micro;
-      } else if (MachOArchName == "armv7k") {
-        getTriple().getWatchOSVersion(Major, Minor, Micro);
-        llvm::raw_string_ostream(WatchOSTarget) << Major << '.' << Minor << '.'
-                                                << Micro;
-      } else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
-                 MachOArchName != "armv7em") {
-        if (!getTriple().getMacOSXVersion(Major, Minor, Micro)) {
-          getDriver().Diag(diag::err_drv_invalid_darwin_version)
-              << getTriple().getOSName();
+      llvm::Triple::OSType OSTy = llvm::Triple::UnknownOS;
+
+      // Set the OSTy based on -target if -arch isn't present.
+      if (Args.hasArg(options::OPT_target) && !Args.hasArg(options::OPT_arch)) {
+        OSTy = getTriple().getOS();
+      } else {
+        StringRef MachOArchName = getMachOArchName(Args);
+        if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
+            MachOArchName == "arm64")
+          OSTy = llvm::Triple::IOS;
+        else if (MachOArchName == "armv7k")
+          OSTy = llvm::Triple::WatchOS;
+        else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
+                 MachOArchName != "armv7em")
+          OSTy = llvm::Triple::MacOSX;
+      }
+
+
+      if (OSTy != llvm::Triple::UnknownOS) {
+        unsigned Major, Minor, Micro;
+        std::string *OSTarget;
+
+        switch (OSTy) {
+        case llvm::Triple::Darwin:
+        case llvm::Triple::MacOSX:
+          if (!getTriple().getMacOSXVersion(Major, Minor, Micro))
+            getDriver().Diag(diag::err_drv_invalid_darwin_version)
+                << getTriple().getOSName();
+          OSTarget = &OSXTarget;
+          break;
+        case llvm::Triple::IOS:
+          getTriple().getiOSVersion(Major, Minor, Micro);
+          OSTarget = &iOSTarget;
+          break;
+        case llvm::Triple::TvOS:
+          getTriple().getOSVersion(Major, Minor, Micro);
+          OSTarget = &TvOSTarget;
+          break;
+        case llvm::Triple::WatchOS:
+          getTriple().getWatchOSVersion(Major, Minor, Micro);
+          OSTarget = &WatchOSTarget;
+          break;
+        default:
+          llvm_unreachable("Unexpected OS type");
+          break;
         }
-        llvm::raw_string_ostream(OSXTarget) << Major << '.' << Minor << '.'
+
+        llvm::raw_string_ostream(*OSTarget) << Major << '.' << Minor << '.'
                                             << Micro;
       }
     }
@@ -1308,8 +1373,6 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
     llvm_unreachable("Unable to infer Darwin variant");
 
   // Set the tool chain target information.
-  unsigned Major, Minor, Micro;
-  bool HadExtra;
   if (Platform == MacOS) {
     assert((!iOSVersion && !TvOSVersion && !WatchOSVersion) &&
            "Unknown target platform!");
@@ -1325,6 +1388,20 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
         HadExtra || Major >= 100 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
           << iOSVersion->getAsString(Args);
+    // For 32-bit targets, the deployment target for iOS has to be earlier than
+    // iOS 11.
+    if (getTriple().isArch32Bit() && Major >= 11) {
+      // If the deployment target is explicitly specified, print a diagnostic.
+      if (!ExplicitIOSDeploymentTargetStr.empty()) {
+        getDriver().Diag(diag::warn_invalid_ios_deployment_target)
+            << ExplicitIOSDeploymentTargetStr;
+      // Otherwise, set it to 10.99.99.
+      } else {
+        Major = 10;
+        Minor = 99;
+        Micro = 99;
+      }
+    }
   } else if (Platform == TvOS) {
     if (!Driver::GetReleaseVersion(TvOSVersion->getValue(), Major, Minor,
                                    Micro, HadExtra) || HadExtra ||
@@ -1667,6 +1744,29 @@ void MachO::AddLinkRuntimeLibArgs(const ArgList &Args,
   AddLinkRuntimeLib(Args, CmdArgs, CompilerRT, false, true);
 }
 
+bool Darwin::isAlignedAllocationUnavailable() const {
+  switch (TargetPlatform) {
+  case MacOS: // Earlier than 10.13.
+    return TargetVersion < VersionTuple(10U, 13U, 0U);
+  case IPhoneOS:
+  case IPhoneOSSimulator:
+  case TvOS:
+  case TvOSSimulator: // Earlier than 11.0.
+    return TargetVersion < VersionTuple(11U, 0U, 0U);
+  case WatchOS:
+  case WatchOSSimulator: // Earlier than 4.0.
+    return TargetVersion < VersionTuple(4U, 0U, 0U);
+  }
+  llvm_unreachable("Unsupported platform");
+}
+
+void Darwin::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+                                   llvm::opt::ArgStringList &CC1Args,
+                                   Action::OffloadKind DeviceOffloadKind) const {
+  if (isAlignedAllocationUnavailable())
+    CC1Args.push_back("-faligned-alloc-unavailable");
+}
+
 DerivedArgList *
 Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
                       Action::OffloadKind DeviceOffloadKind) const {
@@ -1736,8 +1836,13 @@ Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
   return DAL;
 }
 
-bool MachO::IsUnwindTablesDefault() const {
-  return getArch() == llvm::Triple::x86_64;
+bool MachO::IsUnwindTablesDefault(const ArgList &Args) const {
+  // Unwind tables are not emitted if -fno-exceptions is supplied (except when
+  // targeting x86_64).
+  return getArch() == llvm::Triple::x86_64 ||
+         (!UseSjLjExceptions(Args) &&
+          Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
+                       true));
 }
 
 bool MachO::UseDwarfDebugFlags() const {
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.h
index 16ed04286ac06..77c569e8f865d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Darwin.h
@@ -216,7 +216,7 @@ class LLVM_LIBRARY_VISIBILITY MachO : public ToolChain {
 
   bool UseObjCMixedDispatch() const override { return true; }
 
-  bool IsUnwindTablesDefault() const override;
+  bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
 
   RuntimeLibType GetDefaultRuntimeLibType() const override {
     return ToolChain::RLT_CompilerRT;
@@ -384,6 +384,15 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public MachO {
     return TargetVersion < VersionTuple(V0, V1, V2);
   }
 
+  /// Return true if c++17 aligned allocation/deallocation functions are not
+  /// implemented in the c++ standard library of the deployment target we are
+  /// targeting.
+  bool isAlignedAllocationUnavailable() const;
+
+  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
+
   StringRef getPlatformFamily() const;
   static StringRef getSDKName(StringRef isysroot);
   StringRef getOSLibraryNameSuffix() const;
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp
index b8757cf4aa73d..78053aafd16e5 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp
@@ -46,6 +46,9 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (llvm::sys::path::stem(Exec).equals_lower("lld")) {
     CmdArgs.push_back("-flavor");
     CmdArgs.push_back("gnu");
+
+    CmdArgs.push_back("-z");
+    CmdArgs.push_back("rodynamic");
   }
 
   if (!D.SysRoot.empty())
@@ -131,16 +134,44 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
 /// Fuchsia - Fuchsia tool chain which can call as(1) and ld(1) directly.
 
+static std::string normalizeTriple(llvm::Triple Triple) {
+  SmallString<64> T;
+  T += Triple.getArchName();
+  T += "-";
+  T += Triple.getOSName();
+  return T.str();
+}
+
+static std::string getTargetDir(const Driver &D,
+                                llvm::Triple Triple) {
+  SmallString<128> P(llvm::sys::path::parent_path(D.Dir));
+  llvm::sys::path::append(P, "lib", normalizeTriple(Triple));
+  return P.str();
+}
+
 Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple,
                  const ArgList &Args)
-    : Generic_ELF(D, Triple, Args) {
-
-  getFilePaths().push_back(D.SysRoot + "/lib");
-  getFilePaths().push_back(D.ResourceDir + "/lib/fuchsia");
+    : ToolChain(D, Triple, Args) {
+  getProgramPaths().push_back(getDriver().getInstalledDir());
+  if (getDriver().getInstalledDir() != D.Dir)
+    getProgramPaths().push_back(D.Dir);
+
+  SmallString<128> P(getTargetDir(D, getTriple()));
+  llvm::sys::path::append(P, "lib");
+  getFilePaths().push_back(P.str());
+
+  if (!D.SysRoot.empty()) {
+    SmallString<128> P(D.SysRoot);
+    llvm::sys::path::append(P, "lib");
+    getFilePaths().push_back(P.str());
+  }
 }
 
-Tool *Fuchsia::buildAssembler() const {
-  return new tools::gnutools::Assembler(*this);
+std::string Fuchsia::ComputeEffectiveClangTriple(const ArgList &Args,
+                                                 types::ID InputType) const {
+  llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
+  Triple.setTriple(normalizeTriple(Triple));
+  return Triple.getTriple();
 }
 
 Tool *Fuchsia::buildLinker() const {
@@ -172,7 +203,8 @@ Fuchsia::GetCXXStdlibType(const ArgList &Args) const {
 }
 
 void Fuchsia::addClangTargetOptions(const ArgList &DriverArgs,
-                                    ArgStringList &CC1Args) const {
+                                    ArgStringList &CC1Args,
+                                    Action::OffloadKind) const {
   if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
                          options::OPT_fno_use_init_array, true))
     CC1Args.push_back("-fuse-init-array");
@@ -207,19 +239,44 @@ void Fuchsia::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     return;
   }
 
-  addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/include");
+  if (!D.SysRoot.empty()) {
+    SmallString<128> P(D.SysRoot);
+    llvm::sys::path::append(P, "include");
+    addExternCSystemInclude(DriverArgs, CC1Args, P.str());
+  }
 }
 
-std::string Fuchsia::findLibCxxIncludePath() const {
-  return getDriver().SysRoot + "/include/c++/v1";
+void Fuchsia::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
+                                           ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+      DriverArgs.hasArg(options::OPT_nostdincxx))
+    return;
+
+  switch (GetCXXStdlibType(DriverArgs)) {
+  case ToolChain::CST_Libcxx: {
+    SmallString<128> P(getTargetDir(getDriver(), getTriple()));
+    llvm::sys::path::append(P, "include", "c++", "v1");
+    addSystemInclude(DriverArgs, CC1Args, P.str());
+    break;
+  }
+
+  default:
+    llvm_unreachable("invalid stdlib name");
+  }
 }
 
 void Fuchsia::AddCXXStdlibLibArgs(const ArgList &Args,
                                   ArgStringList &CmdArgs) const {
-  (void) GetCXXStdlibType(Args);
-  CmdArgs.push_back("-lc++");
-  CmdArgs.push_back("-lc++abi");
-  CmdArgs.push_back("-lunwind");
+  switch (GetCXXStdlibType(Args)) {
+  case ToolChain::CST_Libcxx:
+    CmdArgs.push_back("-lc++");
+    CmdArgs.push_back("-lc++abi");
+    CmdArgs.push_back("-lunwind");
+    break;
+
+  case ToolChain::CST_Libstdcxx:
+    llvm_unreachable("invalid stdlib name");
+  }
 }
 
 SanitizerMask Fuchsia::getSupportedSanitizers() const {
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.h
index 1a8c9903fe4d5..a723a99dfa3bf 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Fuchsia.h
@@ -35,18 +35,29 @@ class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 
 namespace toolchains {
 
-class LLVM_LIBRARY_VISIBILITY Fuchsia : public Generic_ELF {
+class LLVM_LIBRARY_VISIBILITY Fuchsia : public ToolChain {
 public:
   Fuchsia(const Driver &D, const llvm::Triple &Triple,
           const llvm::opt::ArgList &Args);
 
-  bool isPIEDefault() const override { return true; }
   bool HasNativeLLVMSupport() const override { return true; }
   bool IsIntegratedAssemblerDefault() const override { return true; }
+  RuntimeLibType GetDefaultRuntimeLibType() const override {
+    return ToolChain::RLT_CompilerRT;
+  }
+  CXXStdlibType GetDefaultCXXStdlibType() const override {
+    return ToolChain::CST_Libcxx;
+  }
+  bool isPICDefault() const override { return false; }
+  bool isPIEDefault() const override { return true; }
+  bool isPICDefaultForced() const override { return false; }
   llvm::DebuggerKind getDefaultDebuggerTuning() const override {
     return llvm::DebuggerKind::GDB;
   }
 
+  std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
+                                          types::ID InputType) const override;
+
   SanitizerMask getSupportedSanitizers() const override;
 
   RuntimeLibType
@@ -55,11 +66,14 @@ class LLVM_LIBRARY_VISIBILITY Fuchsia : public Generic_ELF {
   GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
 
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
-  std::string findLibCxxIncludePath() const override;
+  void
+  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                               llvm::opt::ArgStringList &CC1Args) const override;
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
@@ -68,7 +82,6 @@ class LLVM_LIBRARY_VISIBILITY Fuchsia : public Generic_ELF {
   }
 
 protected:
-  Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
 };
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.cpp
index f1015e62eec8b..72a9f85ba389a 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -278,20 +278,20 @@ static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
 
   LksStream << "SECTIONS\n";
   LksStream << "{\n";
-  LksStream << "  .omp_offloading :\n";
-  LksStream << "  ALIGN(0x10)\n";
-  LksStream << "  {\n";
 
-  for (auto &BI : InputBinaryInfo) {
-    LksStream << "    . = ALIGN(0x10);\n";
+  // Put each target binary into a separate section.
+  for (const auto &BI : InputBinaryInfo) {
+    LksStream << "  .omp_offloading." << BI.first << " :\n";
+    LksStream << "  ALIGN(0x10)\n";
+    LksStream << "  {\n";
     LksStream << "    PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first
               << " = .);\n";
     LksStream << "    " << BI.second << "\n";
     LksStream << "    PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first
               << " = .);\n";
+    LksStream << "  }\n";
   }
 
-  LksStream << "  }\n";
   // Add commands to define host entries begin and end. We use 1-byte subalign
   // so that the linker does not add any padding and the elements in this
   // section form an array.
@@ -650,6 +650,8 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
                                               const InputInfoList &Inputs,
                                               const ArgList &Args,
                                               const char *LinkingOutput) const {
+  const auto &D = getToolChain().getDriver();
+
   claimNoWarnArgs(Args);
 
   ArgStringList CmdArgs;
@@ -660,6 +662,23 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
   std::tie(RelocationModel, PICLevel, IsPIE) =
       ParsePICArgs(getToolChain(), Args);
 
+  if (const Arg *A = Args.getLastArg(options::OPT_gz, options::OPT_gz_EQ)) {
+    if (A->getOption().getID() == options::OPT_gz) {
+      CmdArgs.push_back("-compress-debug-sections");
+    } else {
+      StringRef Value = A->getValue();
+      if (Value == "none") {
+        CmdArgs.push_back("-compress-debug-sections=none");
+      } else if (Value == "zlib" || Value == "zlib-gnu") {
+        CmdArgs.push_back(
+            Args.MakeArgString("-compress-debug-sections=" + Twine(Value)));
+      } else {
+        D.Diag(diag::err_drv_unsupported_option_argument)
+            << A->getOption().getName() << Value;
+      }
+    }
+  }
+
   switch (getToolChain().getArch()) {
   default:
     break;
@@ -1598,6 +1617,49 @@ bool Generic_GCC::GCCVersion::isOlderThan(int RHSMajor, int RHSMinor,
   return false;
 }
 
+/// \brief Parse a GCCVersion object out of a string of text.
+///
+/// This is the primary means of forming GCCVersion objects.
+/*static*/
+Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) {
+  const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
+  std::pair First = VersionText.split('.');
+  std::pair Second = First.second.split('.');
+
+  GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
+  if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0)
+    return BadVersion;
+  GoodVersion.MajorStr = First.first.str();
+  if (First.second.empty())
+    return GoodVersion;
+  if (Second.first.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0)
+    return BadVersion;
+  GoodVersion.MinorStr = Second.first.str();
+
+  // First look for a number prefix and parse that if present. Otherwise just
+  // stash the entire patch string in the suffix, and leave the number
+  // unspecified. This covers versions strings such as:
+  //   5        (handled above)
+  //   4.4
+  //   4.4.0
+  //   4.4.x
+  //   4.4.2-rc4
+  //   4.4.x-patched
+  // And retains any patch number it finds.
+  StringRef PatchText = GoodVersion.PatchSuffix = Second.second.str();
+  if (!PatchText.empty()) {
+    if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) {
+      // Try to parse the number and any suffix.
+      if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) ||
+          GoodVersion.Patch < 0)
+        return BadVersion;
+      GoodVersion.PatchSuffix = PatchText.substr(EndNumber);
+    }
+  }
+
+  return GoodVersion;
+}
+
 static llvm::StringRef getGCCToolchainDir(const ArgList &Args) {
   const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_toolchain);
   if (A)
@@ -2229,7 +2291,7 @@ void Generic_GCC::printVerboseInfo(raw_ostream &OS) const {
   CudaInstallation.print(OS);
 }
 
-bool Generic_GCC::IsUnwindTablesDefault() const {
+bool Generic_GCC::IsUnwindTablesDefault(const ArgList &Args) const {
   return getArch() == llvm::Triple::x86_64;
 }
 
@@ -2276,9 +2338,11 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
     return true;
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
-    // Enabled for Debian mips64/mips64el only. Other targets are unable to
-    // distinguish N32 from N64.
-    if (getTriple().getEnvironment() == llvm::Triple::GNUABI64)
+    // Enabled for Debian and Android mips64/mipsel, as they can precisely
+    // identify the ABI in use (Debian) or only use N64 for MIPS64 (Android).
+    // Other targets are unable to distinguish N32 from N64.
+    if (getTriple().getEnvironment() == llvm::Triple::GNUABI64 ||
+        getTriple().isAndroid())
       return true;
     return false;
   default:
@@ -2397,7 +2461,8 @@ Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef,
 void Generic_ELF::anchor() {}
 
 void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
-                                        ArgStringList &CC1Args) const {
+                                        ArgStringList &CC1Args,
+                                        Action::OffloadKind) const {
   const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
   bool UseInitArrayDefault =
       getTriple().getArch() == llvm::Triple::aarch64 ||
@@ -2406,7 +2471,8 @@ void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
        (!V.isOlderThan(4, 7, 0) || getTriple().isAndroid())) ||
       getTriple().getOS() == llvm::Triple::NaCl ||
       (getTriple().getVendor() == llvm::Triple::MipsTechnologies &&
-       !getTriple().hasEnvironment());
+       !getTriple().hasEnvironment()) ||
+      getTriple().getOS() == llvm::Triple::Solaris;
 
   if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
                          options::OPT_fno_use_init_array, UseInitArrayDefault))
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.h
index 1dc1ad49e3053..f29342b95a07e 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Gnu.h
@@ -284,7 +284,7 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
 
   void printVerboseInfo(raw_ostream &OS) const override;
 
-  bool IsUnwindTablesDefault() const override;
+  bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
   bool isPICDefault() const override;
   bool isPIEDefault() const override;
   bool isPICDefaultForced() const override;
@@ -341,7 +341,8 @@ class LLVM_LIBRARY_VISIBILITY Generic_ELF : public Generic_GCC {
       : Generic_GCC(D, Triple, Args) {}
 
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
 };
 
 } // end namespace toolchains
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.cpp
index 1d7bcf8e4df00..9bf1590e6a37e 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.cpp
@@ -428,7 +428,8 @@ unsigned HexagonToolChain::getOptimizationLevel(
 }
 
 void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs,
-                                             ArgStringList &CC1Args) const {
+                                             ArgStringList &CC1Args,
+                                             Action::OffloadKind) const {
   if (DriverArgs.hasArg(options::OPT_ffp_contract))
     return;
   unsigned OptLevel = getOptimizationLevel(DriverArgs);
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.h
index 78f97a3d59fdc..7f9a6b2f34b92 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Hexagon.h
@@ -69,7 +69,8 @@ class LLVM_LIBRARY_VISIBILITY HexagonToolChain : public Linux {
   ~HexagonToolChain() override;
 
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Linux.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Linux.cpp
index 50443a1252444..08a27fa7fed10 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Linux.cpp
@@ -372,49 +372,6 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   addPathIfExists(D, SysRoot + "/usr/lib", Paths);
 }
 
-/// \brief Parse a GCCVersion object out of a string of text.
-///
-/// This is the primary means of forming GCCVersion objects.
-/*static*/
-Generic_GCC::GCCVersion Linux::GCCVersion::Parse(StringRef VersionText) {
-  const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
-  std::pair First = VersionText.split('.');
-  std::pair Second = First.second.split('.');
-
-  GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
-  if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0)
-    return BadVersion;
-  GoodVersion.MajorStr = First.first.str();
-  if (First.second.empty())
-    return GoodVersion;
-  if (Second.first.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0)
-    return BadVersion;
-  GoodVersion.MinorStr = Second.first.str();
-
-  // First look for a number prefix and parse that if present. Otherwise just
-  // stash the entire patch string in the suffix, and leave the number
-  // unspecified. This covers versions strings such as:
-  //   5        (handled above)
-  //   4.4
-  //   4.4.0
-  //   4.4.x
-  //   4.4.2-rc4
-  //   4.4.x-patched
-  // And retains any patch number it finds.
-  StringRef PatchText = GoodVersion.PatchSuffix = Second.second.str();
-  if (!PatchText.empty()) {
-    if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) {
-      // Try to parse the number and any suffix.
-      if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) ||
-          GoodVersion.Patch < 0)
-        return BadVersion;
-      GoodVersion.PatchSuffix = PatchText.substr(EndNumber);
-    }
-  }
-
-  return GoodVersion;
-}
-
 bool Linux::HasNativeLLVMSupport() const { return true; }
 
 Tool *Linux::buildLinker() const { return new tools::gnutools::Linker(*this); }
@@ -865,8 +822,9 @@ SanitizerMask Linux::getSupportedSanitizers() const {
   const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64 ||
                          getTriple().getArch() == llvm::Triple::aarch64_be;
   const bool IsArmArch = getTriple().getArch() == llvm::Triple::arm ||
-                         llvm::Triple::thumb || llvm::Triple::armeb ||
-                         llvm::Triple::thumbeb;
+                         getTriple().getArch() == llvm::Triple::thumb ||
+                         getTriple().getArch() == llvm::Triple::armeb ||
+                         getTriple().getArch() == llvm::Triple::thumbeb;
   SanitizerMask Res = ToolChain::getSupportedSanitizers();
   Res |= SanitizerKind::Address;
   Res |= SanitizerKind::Fuzzer;
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.cpp
index a09304814ca69..7978a6941cb85 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -76,7 +76,7 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName,
 
 // Check various environment variables to try and find a toolchain.
 static bool findVCToolChainViaEnvironment(std::string &Path,
-                                          bool &IsVS2017OrNewer) {
+                                          MSVCToolChain::ToolsetLayout &VSLayout) {
   // These variables are typically set by vcvarsall.bat
   // when launching a developer command prompt.
   if (llvm::Optional VCToolsInstallDir =
@@ -84,7 +84,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path,
     // This is only set by newer Visual Studios, and it leads straight to
     // the toolchain directory.
     Path = std::move(*VCToolsInstallDir);
-    IsVS2017OrNewer = true;
+    VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
     return true;
   }
   if (llvm::Optional VCInstallDir =
@@ -94,7 +94,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path,
     // so this check has to appear second.
     // In older Visual Studios, the VC directory is the toolchain.
     Path = std::move(*VCInstallDir);
-    IsVS2017OrNewer = false;
+    VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
     return true;
   }
 
@@ -125,11 +125,25 @@ static bool findVCToolChainViaEnvironment(std::string &Path,
         continue;
 
       // whatever/VC/bin --> old toolchain, VC dir is toolchain dir.
-      if (llvm::sys::path::filename(PathEntry) == "bin") {
-        llvm::StringRef ParentPath = llvm::sys::path::parent_path(PathEntry);
-        if (llvm::sys::path::filename(ParentPath) == "VC") {
+      llvm::StringRef TestPath = PathEntry;
+      bool IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
+      if (!IsBin) {
+        // Strip any architecture subdir like "amd64".
+        TestPath = llvm::sys::path::parent_path(TestPath);
+        IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
+      }
+      if (IsBin) {
+        llvm::StringRef ParentPath = llvm::sys::path::parent_path(TestPath);
+        llvm::StringRef ParentFilename = llvm::sys::path::filename(ParentPath);
+        if (ParentFilename == "VC") {
+          Path = ParentPath;
+          VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
+          return true;
+        }
+        if (ParentFilename == "x86ret" || ParentFilename == "x86chk"
+          || ParentFilename == "amd64ret" || ParentFilename == "amd64chk") {
           Path = ParentPath;
-          IsVS2017OrNewer = false;
+          VSLayout = MSVCToolChain::ToolsetLayout::DevDivInternal;
           return true;
         }
 
@@ -158,7 +172,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path,
           ToolChainPath = llvm::sys::path::parent_path(ToolChainPath);
 
         Path = ToolChainPath;
-        IsVS2017OrNewer = true;
+        VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
         return true;
       }
 
@@ -174,7 +188,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path,
 // This is the preferred way to discover new Visual Studios, as they're no
 // longer listed in the registry.
 static bool findVCToolChainViaSetupConfig(std::string &Path,
-                                          bool &IsVS2017OrNewer) {
+                                          MSVCToolChain::ToolsetLayout &VSLayout) {
 #if !defined(USE_MSVC_SETUP_API)
   return false;
 #else
@@ -256,7 +270,7 @@ static bool findVCToolChainViaSetupConfig(std::string &Path,
     return false;
 
   Path = ToolchainPath.str();
-  IsVS2017OrNewer = true;
+  VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
   return true;
 #endif
 }
@@ -265,7 +279,7 @@ static bool findVCToolChainViaSetupConfig(std::string &Path,
 // a toolchain path. VS2017 and newer don't get added to the registry.
 // So if we find something here, we know that it's an older version.
 static bool findVCToolChainViaRegistry(std::string &Path,
-                                       bool &IsVS2017OrNewer) {
+                                       MSVCToolChain::ToolsetLayout &VSLayout) {
   std::string VSInstallPath;
   if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)",
                               "InstallDir", VSInstallPath, nullptr) ||
@@ -277,7 +291,7 @@ static bool findVCToolChainViaRegistry(std::string &Path,
       llvm::sys::path::append(VCPath, "VC");
 
       Path = VCPath.str();
-      IsVS2017OrNewer = false;
+      VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
       return true;
     }
   }
@@ -468,6 +482,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     // native target bin directory.
     // e.g. when compiling for x86 on an x64 host, PATH should start with:
     // /bin/HostX64/x86;/bin/HostX64/x64
+    // This doesn't attempt to handle ToolsetLayout::DevDivInternal.
     if (TC.getIsVS2017OrNewer() &&
         llvm::Triple(llvm::sys::getProcessTriple()).getArch() != TC.getArch()) {
       auto HostArch = llvm::Triple(llvm::sys::getProcessTriple()).getArch();
@@ -522,9 +537,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   SkipSettingEnvironment:;
 #endif
   } else {
-    linkPath = Linker;
-    llvm::sys::path::replace_extension(linkPath, "exe");
-    linkPath = TC.GetProgramPath(linkPath.c_str());
+    linkPath = TC.GetProgramPath(Linker.str().c_str());
   }
 
   auto LinkCmd = llvm::make_unique(
@@ -672,9 +685,9 @@ MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
   // what they want to use.
   // Failing that, just try to find the newest Visual Studio version we can
   // and use its default VC toolchain.
-  findVCToolChainViaEnvironment(VCToolChainPath, IsVS2017OrNewer) ||
-      findVCToolChainViaSetupConfig(VCToolChainPath, IsVS2017OrNewer) ||
-      findVCToolChainViaRegistry(VCToolChainPath, IsVS2017OrNewer);
+  findVCToolChainViaEnvironment(VCToolChainPath, VSLayout) ||
+      findVCToolChainViaSetupConfig(VCToolChainPath, VSLayout) ||
+      findVCToolChainViaRegistry(VCToolChainPath, VSLayout);
 }
 
 Tool *MSVCToolChain::buildLinker() const {
@@ -694,7 +707,7 @@ bool MSVCToolChain::IsIntegratedAssemblerDefault() const {
   return true;
 }
 
-bool MSVCToolChain::IsUnwindTablesDefault() const {
+bool MSVCToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
   // Emit unwind tables by default on Win64. All non-x86_32 Windows platforms
   // such as ARM and PPC actually require unwind tables, but LLVM doesn't know
   // how to generate them yet.
@@ -761,6 +774,21 @@ static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) {
   }
 }
 
+// Similar to the above function, but for DevDiv internal builds.
+static const char *llvmArchToDevDivInternalArch(llvm::Triple::ArchType Arch) {
+  using ArchType = llvm::Triple::ArchType;
+  switch (Arch) {
+  case ArchType::x86:
+    return "i386";
+  case ArchType::x86_64:
+    return "amd64";
+  case ArchType::arm:
+    return "arm";
+  default:
+    return "";
+  }
+}
+
 // Get the path to a specific subdirectory in the current toolchain for
 // a given target architecture.
 // VS2017 changed the VC toolchain layout, so this should be used instead
@@ -768,26 +796,40 @@ static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) {
 std::string
 MSVCToolChain::getSubDirectoryPath(SubDirectoryType Type,
                                    llvm::Triple::ArchType TargetArch) const {
+  const char *SubdirName;
+  const char *IncludeName;
+  switch (VSLayout) {
+  case ToolsetLayout::OlderVS:
+    SubdirName = llvmArchToLegacyVCArch(TargetArch);
+    IncludeName = "include";
+    break;
+  case ToolsetLayout::VS2017OrNewer:
+    SubdirName = llvmArchToWindowsSDKArch(TargetArch);
+    IncludeName = "include";
+    break;
+  case ToolsetLayout::DevDivInternal:
+    SubdirName = llvmArchToDevDivInternalArch(TargetArch);
+    IncludeName = "inc";
+    break;
+  }
+
   llvm::SmallString<256> Path(VCToolChainPath);
   switch (Type) {
   case SubDirectoryType::Bin:
-    if (IsVS2017OrNewer) {
-      bool HostIsX64 =
+    if (VSLayout == ToolsetLayout::VS2017OrNewer) {
+      const bool HostIsX64 =
           llvm::Triple(llvm::sys::getProcessTriple()).isArch64Bit();
-      llvm::sys::path::append(Path, "bin", (HostIsX64 ? "HostX64" : "HostX86"),
-                              llvmArchToWindowsSDKArch(TargetArch));
-
-    } else {
-      llvm::sys::path::append(Path, "bin", llvmArchToLegacyVCArch(TargetArch));
+      const char *const HostName = HostIsX64 ? "HostX64" : "HostX86";
+      llvm::sys::path::append(Path, "bin", HostName, SubdirName);
+    } else { // OlderVS or DevDivInternal
+      llvm::sys::path::append(Path, "bin", SubdirName);
     }
     break;
   case SubDirectoryType::Include:
-    llvm::sys::path::append(Path, "include");
+    llvm::sys::path::append(Path, IncludeName);
     break;
   case SubDirectoryType::Lib:
-    llvm::sys::path::append(
-        Path, "lib", IsVS2017OrNewer ? llvmArchToWindowsSDKArch(TargetArch)
-                                     : llvmArchToLegacyVCArch(TargetArch));
+    llvm::sys::path::append(Path, "lib", SubdirName);
     break;
   }
   return Path.str();
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.h
index 055830c52e0da..854f88a36fd2f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MSVC.h
@@ -73,7 +73,7 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain {
                 Action::OffloadKind DeviceOffloadKind) const override;
 
   bool IsIntegratedAssemblerDefault() const override;
-  bool IsUnwindTablesDefault() const override;
+  bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
   bool isPICDefault() const override;
   bool isPIEDefault() const override;
   bool isPICDefaultForced() const override;
@@ -92,7 +92,12 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain {
     return getSubDirectoryPath(Type, getArch());
   }
 
-  bool getIsVS2017OrNewer() const { return IsVS2017OrNewer; }
+  enum class ToolsetLayout {
+    OlderVS,
+    VS2017OrNewer,
+    DevDivInternal,
+  };
+  bool getIsVS2017OrNewer() const { return VSLayout == ToolsetLayout::VS2017OrNewer; }
 
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
@@ -130,7 +135,7 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain {
   Tool *buildAssembler() const override;
 private:
   std::string VCToolChainPath;
-  bool IsVS2017OrNewer = false;
+  ToolsetLayout VSLayout = ToolsetLayout::OlderVS;
   CudaInstallationDetector CudaInstallation;
 };
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.cpp
index 7550bab486f1b..632b76d92bddf 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.cpp
@@ -347,7 +347,7 @@ Tool *toolchains::MinGW::buildLinker() const {
   return new tools::MinGW::Linker(*this);
 }
 
-bool toolchains::MinGW::IsUnwindTablesDefault() const {
+bool toolchains::MinGW::IsUnwindTablesDefault(const ArgList &Args) const {
   return getArch() == llvm::Triple::x86_64;
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.h
index cf1628a4ccdd9..9b3d7c553f1d8 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MinGW.h
@@ -60,7 +60,7 @@ class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain {
         const llvm::opt::ArgList &Args);
 
   bool IsIntegratedAssemblerDefault() const override;
-  bool IsUnwindTablesDefault() const override;
+  bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
   bool isPICDefault() const override;
   bool isPIEDefault() const override;
   bool isPICDefaultForced() const override;
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MipsLinux.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MipsLinux.cpp
index 709c396a64b79..b394208336edb 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MipsLinux.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/MipsLinux.cpp
@@ -109,7 +109,7 @@ std::string MipsLLVMToolChain::findLibCxxIncludePath() const {
 void MipsLLVMToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
                                             ArgStringList &CmdArgs) const {
   assert((GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) &&
-         "Only -lc++ (aka libxx) is suported in this toolchain.");
+         "Only -lc++ (aka libxx) is supported in this toolchain.");
 
   CmdArgs.push_back("-lc++");
   CmdArgs.push_back("-lc++abi");
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Myriad.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Myriad.cpp
index f70ce93c45ce5..6fdb5a2248dd1 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Myriad.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Myriad.cpp
@@ -217,6 +217,7 @@ MyriadToolChain::MyriadToolChain(const Driver &D, const llvm::Triple &Triple,
   default:
     D.Diag(clang::diag::err_target_unsupported_arch)
         << Triple.getArchName() << "myriad";
+    LLVM_FALLTHROUGH;
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::shave:
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.cpp
index d7d3ad61df420..a1a3108cb28d7 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.cpp
@@ -15,6 +15,7 @@
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/Options.h"
+#include "clang/Driver/SanitizerArgs.h"
 #include "llvm/Option/ArgList.h"
 
 using namespace clang::driver;
@@ -246,6 +247,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
+  bool NeedsSanitizerDeps = addSanitizerRuntimes(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
 
   unsigned Major, Minor, Micro;
@@ -279,6 +281,8 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       CmdArgs.push_back("-lm");
     }
+    if (NeedsSanitizerDeps)
+      linkSanitizerRuntimeDeps(getToolChain(), CmdArgs);
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
     CmdArgs.push_back("-lc");
@@ -410,3 +414,12 @@ void NetBSD::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
   addLibStdCXXIncludePaths(getDriver().SysRoot, "/usr/include/g++", "", "", "",
                            "", DriverArgs, CC1Args);
 }
+
+SanitizerMask NetBSD::getSupportedSanitizers() const {
+  const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  if (IsX86_64) {
+    Res |= SanitizerKind::Address;
+  }
+  return Res;
+}
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.h
index d53aa6867872e..5163ff72d81bb 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/NetBSD.h
@@ -65,7 +65,11 @@ class LLVM_LIBRARY_VISIBILITY NetBSD : public Generic_ELF {
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
 
-  bool IsUnwindTablesDefault() const override { return true; }
+  bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override {
+    return true;
+  }
+
+  SanitizerMask getSupportedSanitizers() const override;
 
 protected:
   Tool *buildAssembler() const override;
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/OpenBSD.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/OpenBSD.cpp
index c5f266ec8fdca..1d54a1e9cbb05 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -133,6 +133,8 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  if (Args.hasArg(options::OPT_pie))
+    CmdArgs.push_back("-pie");
   if (Args.hasArg(options::OPT_nopie))
     CmdArgs.push_back("-nopie");
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.cpp
index 78797c49d7b6f..de98d11b2dc7b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.cpp
@@ -126,7 +126,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
 Solaris::Solaris(const Driver &D, const llvm::Triple &Triple,
                  const ArgList &Args)
-    : Generic_GCC(D, Triple, Args) {
+    : Generic_ELF(D, Triple, Args) {
 
   GCCInstallation.init(Triple, Args);
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.h
index edb44373b31db..787917afab6ea 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/Solaris.h
@@ -50,7 +50,7 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
 
 namespace toolchains {
 
-class LLVM_LIBRARY_VISIBILITY Solaris : public Generic_GCC {
+class LLVM_LIBRARY_VISIBILITY Solaris : public Generic_ELF {
 public:
   Solaris(const Driver &D, const llvm::Triple &Triple,
           const llvm::opt::ArgList &Args);
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.cpp
index 3471569b68849..058bc42323e2f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -83,6 +83,8 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
 
+    CmdArgs.push_back("-allow-undefined-file");
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("wasm.syms")));
     CmdArgs.push_back("-lc");
     CmdArgs.push_back("-lcompiler_rt");
   }
@@ -104,8 +106,7 @@ WebAssembly::WebAssembly(const Driver &D, const llvm::Triple &Triple,
 
   getProgramPaths().push_back(getDriver().getInstalledDir());
 
-  getFilePaths().push_back(
-      getDriver().SysRoot + "/lib" + (Triple.isArch32Bit() ? "32" : "64"));
+  getFilePaths().push_back(getDriver().SysRoot + "/lib");
 }
 
 bool WebAssembly::IsMathErrnoDefault() const { return false; }
@@ -133,7 +134,8 @@ bool WebAssembly::SupportsProfiling() const { return false; }
 bool WebAssembly::HasNativeLLVMSupport() const { return true; }
 
 void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
-                                        ArgStringList &CC1Args) const {
+                                        ArgStringList &CC1Args,
+                                        Action::OffloadKind) const {
   if (DriverArgs.hasFlag(clang::driver::options::OPT_fuse_init_array,
                          options::OPT_fno_use_init_array, true))
     CC1Args.push_back("-fuse-init-array");
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.h
index ca42fc651a6d2..2999db477f799 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/WebAssembly.h
@@ -53,7 +53,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain {
   bool SupportsProfiling() const override;
   bool HasNativeLLVMSupport() const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   RuntimeLibType GetDefaultRuntimeLibType() const override;
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
   void AddClangSystemIncludeArgs(
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.cpp b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.cpp
index c3ae9582124fc..43175ad7d6320 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.cpp
@@ -124,7 +124,8 @@ void XCoreToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
 }
 
 void XCoreToolChain::addClangTargetOptions(const ArgList &DriverArgs,
-                                           ArgStringList &CC1Args) const {
+                                           ArgStringList &CC1Args,
+                                           Action::OffloadKind) const {
   CC1Args.push_back("-nostdsysteminc");
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.h b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.h
index 4084b1cdec136..00c89bd15f787 100644
--- a/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.h
+++ b/interpreter/llvm/src/tools/clang/lib/Driver/ToolChains/XCore.h
@@ -67,7 +67,8 @@ class LLVM_LIBRARY_VISIBILITY XCoreToolChain : public ToolChain {
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
diff --git a/interpreter/llvm/src/tools/clang/lib/Edit/EditedSource.cpp b/interpreter/llvm/src/tools/clang/lib/Edit/EditedSource.cpp
index 1a7a68cffb625..444d0393cccd4 100644
--- a/interpreter/llvm/src/tools/clang/lib/Edit/EditedSource.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Edit/EditedSource.cpp
@@ -25,17 +25,21 @@ void EditsReceiver::remove(CharSourceRange range) {
 
 void EditedSource::deconstructMacroArgLoc(SourceLocation Loc,
                                           SourceLocation &ExpansionLoc,
-                                          IdentifierInfo *&II) {
+                                          MacroArgUse &ArgUse) {
   assert(SourceMgr.isMacroArgExpansion(Loc));
   SourceLocation DefArgLoc = SourceMgr.getImmediateExpansionRange(Loc).first;
-  ExpansionLoc = SourceMgr.getImmediateExpansionRange(DefArgLoc).first;
+  SourceLocation ImmediateExpansionLoc =
+      SourceMgr.getImmediateExpansionRange(DefArgLoc).first;
+  ExpansionLoc = ImmediateExpansionLoc;
+  while (SourceMgr.isMacroBodyExpansion(ExpansionLoc))
+    ExpansionLoc = SourceMgr.getImmediateExpansionRange(ExpansionLoc).first;
   SmallString<20> Buf;
   StringRef ArgName = Lexer::getSpelling(SourceMgr.getSpellingLoc(DefArgLoc),
                                          Buf, SourceMgr, LangOpts);
-  II = nullptr;
-  if (!ArgName.empty()) {
-    II = &IdentTable.get(ArgName);
-  }
+  ArgUse = MacroArgUse{nullptr, SourceLocation(), SourceLocation()};
+  if (!ArgName.empty())
+    ArgUse = {&IdentTable.get(ArgName), ImmediateExpansionLoc,
+              SourceMgr.getSpellingLoc(DefArgLoc)};
 }
 
 void EditedSource::startingCommit() {}
@@ -43,12 +47,11 @@ void EditedSource::startingCommit() {}
 void EditedSource::finishedCommit() {
   for (auto &ExpArg : CurrCommitMacroArgExps) {
     SourceLocation ExpLoc;
-    IdentifierInfo *II;
-    std::tie(ExpLoc, II) = ExpArg;
-    auto &ArgNames = ExpansionToArgMap[ExpLoc.getRawEncoding()];
-    if (std::find(ArgNames.begin(), ArgNames.end(), II) == ArgNames.end()) {
-      ArgNames.push_back(II);
-    }
+    MacroArgUse ArgUse;
+    std::tie(ExpLoc, ArgUse) = ExpArg;
+    auto &ArgUses = ExpansionToArgMap[ExpLoc.getRawEncoding()];
+    if (std::find(ArgUses.begin(), ArgUses.end(), ArgUse) == ArgUses.end())
+      ArgUses.push_back(ArgUse);
   }
   CurrCommitMacroArgExps.clear();
 }
@@ -66,12 +69,16 @@ bool EditedSource::canInsertInOffset(SourceLocation OrigLoc, FileOffset Offs) {
   }
 
   if (SourceMgr.isMacroArgExpansion(OrigLoc)) {
-    IdentifierInfo *II;
     SourceLocation ExpLoc;
-    deconstructMacroArgLoc(OrigLoc, ExpLoc, II);
+    MacroArgUse ArgUse;
+    deconstructMacroArgLoc(OrigLoc, ExpLoc, ArgUse);
     auto I = ExpansionToArgMap.find(ExpLoc.getRawEncoding());
     if (I != ExpansionToArgMap.end() &&
-        std::find(I->second.begin(), I->second.end(), II) != I->second.end()) {
+        find_if(I->second, [&](const MacroArgUse &U) {
+          return ArgUse.Identifier == U.Identifier &&
+                 std::tie(ArgUse.ImmediateExpansionLoc, ArgUse.UseLoc) !=
+                     std::tie(U.ImmediateExpansionLoc, U.UseLoc);
+        }) != I->second.end()) {
       // Trying to write in a macro argument input that has already been
       // written by a previous commit for another expansion of the same macro
       // argument name. For example:
@@ -88,7 +95,6 @@ bool EditedSource::canInsertInOffset(SourceLocation OrigLoc, FileOffset Offs) {
       return false;
     }
   }
-
   return true;
 }
 
@@ -101,13 +107,13 @@ bool EditedSource::commitInsert(SourceLocation OrigLoc,
     return true;
 
   if (SourceMgr.isMacroArgExpansion(OrigLoc)) {
-    IdentifierInfo *II;
+    MacroArgUse ArgUse;
     SourceLocation ExpLoc;
-    deconstructMacroArgLoc(OrigLoc, ExpLoc, II);
-    if (II)
-      CurrCommitMacroArgExps.emplace_back(ExpLoc, II);
+    deconstructMacroArgLoc(OrigLoc, ExpLoc, ArgUse);
+    if (ArgUse.Identifier)
+      CurrCommitMacroArgExps.emplace_back(ExpLoc, ArgUse);
   }
-  
+
   FileEdit &FA = FileEdits[Offs];
   if (FA.Text.empty()) {
     FA.Text = copyString(text);
diff --git a/interpreter/llvm/src/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/interpreter/llvm/src/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
index 2148316532dec..dc501b564eea0 100644
--- a/interpreter/llvm/src/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
@@ -798,24 +798,28 @@ static bool rewriteToNumberLiteral(const ObjCMessageExpr *Msg,
   case NSAPI::NSNumberWithUnsignedInt:
   case NSAPI::NSNumberWithUnsignedInteger:
     CallIsUnsigned = true;
+    LLVM_FALLTHROUGH;
   case NSAPI::NSNumberWithInt:
   case NSAPI::NSNumberWithInteger:
     break;
 
   case NSAPI::NSNumberWithUnsignedLong:
     CallIsUnsigned = true;
+    LLVM_FALLTHROUGH;
   case NSAPI::NSNumberWithLong:
     CallIsLong = true;
     break;
 
   case NSAPI::NSNumberWithUnsignedLongLong:
     CallIsUnsigned = true;
+    LLVM_FALLTHROUGH;
   case NSAPI::NSNumberWithLongLong:
     CallIsLongLong = true;
     break;
 
   case NSAPI::NSNumberWithDouble:
     CallIsDouble = true;
+    LLVM_FALLTHROUGH;
   case NSAPI::NSNumberWithFloat:
     CallIsFloating = true;
     break;
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/BreakableToken.cpp b/interpreter/llvm/src/tools/clang/lib/Format/BreakableToken.cpp
index c97486e4e4a79..3c9df62f80dca 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/BreakableToken.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/BreakableToken.cpp
@@ -41,7 +41,8 @@ static bool IsBlank(char C) {
 }
 
 static StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = {"///", "//", "//!"};
+  static const char *const KnownPrefixes[] = {
+      "///<", "//!<", "///", "//", "//!"};
   StringRef LongestPrefix;
   for (StringRef KnownPrefix : KnownPrefixes) {
     if (Comment.startswith(KnownPrefix)) {
@@ -77,6 +78,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
   }
 
   StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
+
+  // Do not split before a number followed by a dot: this would be interpreted
+  // as a numbered list, which would prevent re-flowing in subsequent passes.
+  static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
+  if (SpaceOffset != StringRef::npos &&
+      kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks)))
+    SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
+
   if (SpaceOffset == StringRef::npos ||
       // Don't break at leading whitespace.
       Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
@@ -298,8 +307,9 @@ const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
 static bool mayReflowContent(StringRef Content) {
   Content = Content.trim(Blanks);
   // Lines starting with '@' commonly have special meaning.
-  static const SmallVector kSpecialMeaningPrefixes = {
-      "@", "TODO", "FIXME", "XXX"};
+  // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
+  static const SmallVector kSpecialMeaningPrefixes = {
+      "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " };
   bool hasSpecialMeaningPrefix = false;
   for (StringRef Prefix : kSpecialMeaningPrefixes) {
     if (Content.startswith(Prefix)) {
@@ -307,6 +317,14 @@ static bool mayReflowContent(StringRef Content) {
       break;
     }
   }
+
+  // Numbered lists may also start with a number followed by '.'
+  // To avoid issues if a line starts with a number which is actually the end
+  // of a previous line, we only consider numbers with up to 2 digits.
+  static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
+  hasSpecialMeaningPrefix = hasSpecialMeaningPrefix ||
+                            kNumberedListRegexp.match(Content);
+
   // Simple heuristic for what to reflow: content should contain at least two
   // characters and either the first or second character must be
   // non-punctuation.
@@ -692,6 +710,10 @@ BreakableLineCommentSection::BreakableLineCommentSection(
           Prefix[i] = "/// ";
         else if (Prefix[i] == "//!")
           Prefix[i] = "//! ";
+        else if (Prefix[i] == "///<")
+          Prefix[i] = "///< ";
+        else if (Prefix[i] == "//!<")
+          Prefix[i] = "//!< ";
       }
 
       Tokens[i] = LineTok;
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/Format/CMakeLists.txt
index 0c7511c1bb07e..42e6d53d9fe6e 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/Format/CMakeLists.txt
@@ -13,6 +13,7 @@ add_clang_library(clangFormat
   TokenAnnotator.cpp
   UnwrappedLineFormatter.cpp
   UnwrappedLineParser.cpp
+  UsingDeclarationsSorter.cpp
   WhitespaceManager.cpp
 
   LINK_LIBS
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/ContinuationIndenter.cpp b/interpreter/llvm/src/tools/clang/lib/Format/ContinuationIndenter.cpp
index 709eeb1539ac6..3bf1cd8f7c131 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/ContinuationIndenter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/ContinuationIndenter.cpp
@@ -54,13 +54,26 @@ static bool startsNextParameter(const FormatToken &Current,
                                 const FormatStyle &Style) {
   const FormatToken &Previous = *Current.Previous;
   if (Current.is(TT_CtorInitializerComma) &&
-      Style.BreakConstructorInitializersBeforeComma)
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
+    return true;
+  if (Style.Language == FormatStyle::LK_Proto && Current.is(TT_SelectorName))
     return true;
   return Previous.is(tok::comma) && !Current.isTrailingComment() &&
          ((Previous.isNot(TT_CtorInitializerComma) ||
-          !Style.BreakConstructorInitializersBeforeComma) &&
+           Style.BreakConstructorInitializers !=
+               FormatStyle::BCIS_BeforeComma) &&
           (Previous.isNot(TT_InheritanceComma) ||
-          !Style.BreakBeforeInheritanceComma));
+           !Style.BreakBeforeInheritanceComma));
+}
+
+static bool opensProtoMessageField(const FormatToken &LessTok,
+                                   const FormatStyle &Style) {
+  if (LessTok.isNot(tok::less))
+    return false;
+  return Style.Language == FormatStyle::LK_TextProto ||
+         (Style.Language == FormatStyle::LK_Proto &&
+          (LessTok.NestingLevel > 0 ||
+           (LessTok.Previous && LessTok.Previous->is(tok::equal))));
 }
 
 ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
@@ -91,6 +104,13 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
   State.LowestLevelOnLine = 0;
   State.IgnoreStackForComparison = false;
 
+  if (Style.Language == FormatStyle::LK_TextProto) {
+    // We need this in order to deal with the bin packing of text fields at
+    // global scope.
+    State.Stack.back().AvoidBinPacking = true;
+    State.Stack.back().BreakBeforeParameter = true;
+  }
+
   // The first token has already been indented and thus consumed.
   moveStateToNextToken(State, DryRun, /*Newline=*/false);
   return State;
@@ -173,18 +193,26 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
     return true;
   if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) ||
        (Previous.is(TT_ArrayInitializerLSquare) &&
-        Previous.ParameterCount > 1)) &&
+        Previous.ParameterCount > 1) ||
+       opensProtoMessageField(Previous, Style)) &&
       Style.ColumnLimit > 0 &&
       getLengthToMatchingParen(Previous) + State.Column - 1 >
           getColumnLimit(State))
     return true;
-  if (Current.is(TT_CtorInitializerColon) &&
-      (State.Column + State.Line->Last->TotalLength - Current.TotalLength + 2 >
+
+  const FormatToken &BreakConstructorInitializersToken =
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon
+          ? Previous
+          : Current;
+  if (BreakConstructorInitializersToken.is(TT_CtorInitializerColon) &&
+      (State.Column + State.Line->Last->TotalLength - Previous.TotalLength >
            getColumnLimit(State) ||
        State.Stack.back().BreakBeforeParameter) &&
-      ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) ||
-       Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0))
+      (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All ||
+       Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon ||
+       Style.ColumnLimit != 0))
     return true;
+
   if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) &&
       State.Line->startsWith(TT_ObjCMethodSpecifier))
     return true;
@@ -445,7 +473,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
   State.Column += Spaces;
   if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) &&
       Previous.Previous &&
-      Previous.Previous->isOneOf(tok::kw_if, tok::kw_for)) {
+      (Previous.Previous->isOneOf(tok::kw_if, tok::kw_for) ||
+       Previous.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) {
     // Treat the condition inside an if as if it was a second function
     // parameter, i.e. let nested calls have a continuation indent.
     State.Stack.back().LastSpace = State.Column;
@@ -455,6 +484,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
                !Previous.is(TT_OverloadedOperator)) ||
               (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) {
     State.Stack.back().LastSpace = State.Column;
+  } else if (Previous.is(TT_CtorInitializerColon) &&
+             Style.BreakConstructorInitializers ==
+                 FormatStyle::BCIS_AfterColon) {
+    State.Stack.back().Indent = State.Column;
+    State.Stack.back().LastSpace = State.Column;
   } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
                                TT_CtorInitializerColon)) &&
              ((Previous.getPrecedence() != prec::Assignment &&
@@ -587,8 +621,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
   if (!DryRun) {
     unsigned Newlines = std::max(
         1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
+    bool ContinuePPDirective =
+        State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
     Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
-                                  State.Line->InPPDirective);
+                                  ContinuePPDirective);
   }
 
   if (!Current.isTrailingComment())
@@ -612,7 +648,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
       State.Stack[i].BreakBeforeParameter = true;
 
   if (PreviousNonComment &&
-      !PreviousNonComment->isOneOf(tok::comma, tok::semi) &&
+      !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) &&
       (PreviousNonComment->isNot(TT_TemplateCloser) ||
        Current.NestingLevel != 0) &&
       !PreviousNonComment->isOneOf(
@@ -625,6 +661,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
   // before the corresponding } or ].
   if (PreviousNonComment &&
       (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+       opensProtoMessageField(*PreviousNonComment, Style) ||
        (PreviousNonComment->is(TT_TemplateString) &&
         PreviousNonComment->opensScope())))
     State.Stack.back().BreakBeforeClosingBrace = true;
@@ -666,7 +703,11 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
   if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block)
     return Current.NestingLevel == 0 ? State.FirstIndent
                                      : State.Stack.back().Indent;
-  if (Current.isOneOf(tok::r_brace, tok::r_square) && State.Stack.size() > 1) {
+  if ((Current.isOneOf(tok::r_brace, tok::r_square) ||
+       (Current.is(tok::greater) &&
+        (Style.Language == FormatStyle::LK_Proto ||
+         Style.Language == FormatStyle::LK_TextProto))) &&
+      State.Stack.size() > 1) {
     if (Current.closesBlockOrBlockTypeList(Style))
       return State.Stack[State.Stack.size() - 2].NestedBlockIndent;
     if (Current.MatchingParen &&
@@ -674,6 +715,19 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
       return State.Stack[State.Stack.size() - 2].LastSpace;
     return State.FirstIndent;
   }
+  // Indent a closing parenthesis at the previous level if followed by a semi or
+  // opening brace. This allows indentations such as:
+  //     foo(
+  //       a,
+  //     );
+  //     function foo(
+  //       a,
+  //     ) {
+  //       code(); //
+  //     }
+  if (Current.is(tok::r_paren) && State.Stack.size() > 1 &&
+      (!Current.Next || Current.Next->isOneOf(tok::semi, tok::l_brace)))
+    return State.Stack[State.Stack.size() - 2].LastSpace;
   if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
     return State.Stack[State.Stack.size() - 2].LastSpace;
   if (Current.is(tok::identifier) && Current.Next &&
@@ -746,6 +800,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
     return ContinuationIndent;
   if (NextNonComment->is(TT_CtorInitializerComma))
     return State.Stack.back().Indent;
+  if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) &&
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon)
+    return State.Stack.back().Indent;
   if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon,
                               TT_InheritanceComma))
     return State.FirstIndent + Style.ConstructorInitializerIndentWidth;
@@ -806,19 +863,29 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
           State.FirstIndent + Style.ContinuationIndentWidth;
     }
   }
-  if (Current.is(TT_CtorInitializerColon)) {
+  if (Current.is(TT_CtorInitializerColon) &&
+      Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon) {
     // Indent 2 from the column, so:
     // SomeClass::SomeClass()
     //     : First(...), ...
     //       Next(...)
     //       ^ line up here.
     State.Stack.back().Indent =
-        State.Column + (Style.BreakConstructorInitializersBeforeComma ? 0 : 2);
+        State.Column + (Style.BreakConstructorInitializers ==
+                            FormatStyle::BCIS_BeforeComma ? 0 : 2);
     State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
     if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
       State.Stack.back().AvoidBinPacking = true;
     State.Stack.back().BreakBeforeParameter = false;
   }
+  if (Current.is(TT_CtorInitializerColon) &&
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) {
+    State.Stack.back().Indent =
+        State.FirstIndent + Style.ConstructorInitializerIndentWidth;
+    State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
+    if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+        State.Stack.back().AvoidBinPacking = true;
+  }
   if (Current.is(TT_InheritanceColon))
     State.Stack.back().Indent =
         State.FirstIndent + Style.ContinuationIndentWidth;
@@ -993,10 +1060,11 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
   bool BreakBeforeParameter = false;
   unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall,
                                         State.Stack.back().NestedBlockIndent);
-  if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
+  if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+      opensProtoMessageField(Current, Style)) {
     if (Current.opensBlockOrBlockTypeList(Style)) {
-      NewIndent = State.Stack.back().NestedBlockIndent + Style.IndentWidth;
-      NewIndent = std::min(State.Column + 2, NewIndent);
+      NewIndent = Style.IndentWidth +
+                  std::min(State.Column, State.Stack.back().NestedBlockIndent);
     } else {
       NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth;
     }
@@ -1005,10 +1073,14 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
                        Current.MatchingParen->Previous &&
                        Current.MatchingParen->Previous->is(tok::comma);
     AvoidBinPacking =
-        (Current.is(TT_ArrayInitializerLSquare) && EndsInComma) ||
-        Current.is(TT_DictLiteral) ||
-        Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments ||
-        (NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod));
+        EndsInComma || Current.is(TT_DictLiteral) ||
+        Style.Language == FormatStyle::LK_Proto ||
+        Style.Language == FormatStyle::LK_TextProto ||
+        !Style.BinPackArguments ||
+        (NextNoComment &&
+         NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,
+                                TT_DesignatedInitializerLSquare));
+    BreakBeforeParameter = EndsInComma;
     if (Current.ParameterCount > 1)
       NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1);
   } else {
@@ -1038,13 +1110,20 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
       NestedBlockIndent = Column;
     }
 
+    bool EndsInComma =
+        Current.MatchingParen &&
+        Current.MatchingParen->getPreviousNonComment() &&
+        Current.MatchingParen->getPreviousNonComment()->is(tok::comma);
+
     AvoidBinPacking =
+        (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) ||
         (State.Line->MustBeDeclaration && !Style.BinPackParameters) ||
         (!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
         (Style.ExperimentalAutoDetectBinPacking &&
          (Current.PackingKind == PPK_OnePerLine ||
           (!BinPackInconclusiveFunctions &&
            Current.PackingKind == PPK_Inconclusive)));
+
     if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) {
       if (Style.ColumnLimit) {
         // If this '[' opens an ObjC call, determine whether all parameters fit
@@ -1065,6 +1144,9 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
         }
       }
     }
+
+    if (Style.Language == FormatStyle::LK_JavaScript && EndsInComma)
+      BreakBeforeParameter = true;
   }
   // Generally inherit NoLineBreak from the current scope to nested scope.
   // However, don't do this for non-empty nested blocks, dict literals and
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/Format.cpp b/interpreter/llvm/src/tools/clang/lib/Format/Format.cpp
index c8677e805179c..aa4ed8c42a700 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/Format.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/Format.cpp
@@ -23,6 +23,7 @@
 #include "TokenAnnotator.h"
 #include "UnwrappedLineFormatter.h"
 #include "UnwrappedLineParser.h"
+#include "UsingDeclarationsSorter.h"
 #include "WhitespaceManager.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
@@ -43,7 +44,6 @@
 
 using clang::format::FormatStyle;
 
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
 
 namespace llvm {
@@ -56,6 +56,7 @@ template <> struct ScalarEnumerationTraits {
     IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC);
     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
+    IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto);
   }
 };
 
@@ -96,6 +97,7 @@ template <> struct ScalarEnumerationTraits {
     IO.enumCase(Value, "All", FormatStyle::SFS_All);
     IO.enumCase(Value, "true", FormatStyle::SFS_All);
     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
+    IO.enumCase(Value, "InlineOnly", FormatStyle::SFS_InlineOnly);
     IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
   }
 };
@@ -123,6 +125,14 @@ template <> struct ScalarEnumerationTraits {
   }
 };
 
+template <> struct ScalarEnumerationTraits {
+  static void enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) {
+    IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon);
+    IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma);
+    IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon);
+  }
+};
+
 template <>
 struct ScalarEnumerationTraits {
   static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
@@ -302,17 +312,29 @@ template <> struct MappingTraits {
     IO.mapOptional("BreakBeforeBinaryOperators",
                    Style.BreakBeforeBinaryOperators);
     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
+    IO.mapOptional("BreakBeforeInheritanceComma",
+                   Style.BreakBeforeInheritanceComma);
     IO.mapOptional("BreakBeforeTernaryOperators",
                    Style.BreakBeforeTernaryOperators);
+
+    bool BreakConstructorInitializersBeforeComma = false;
     IO.mapOptional("BreakConstructorInitializersBeforeComma",
-                   Style.BreakConstructorInitializersBeforeComma);
+                   BreakConstructorInitializersBeforeComma);
+    IO.mapOptional("BreakConstructorInitializers",
+                   Style.BreakConstructorInitializers);
+    // If BreakConstructorInitializersBeforeComma was specified but
+    // BreakConstructorInitializers was not, initialize the latter from the
+    // former for backwards compatibility.
+    if (BreakConstructorInitializersBeforeComma &&
+        Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon)
+      Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
+
     IO.mapOptional("BreakAfterJavaFieldAnnotations",
                    Style.BreakAfterJavaFieldAnnotations);
     IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
-    IO.mapOptional("BreakBeforeInheritanceComma",
-                   Style.BreakBeforeInheritanceComma);
+    IO.mapOptional("CompactNamespaces", Style.CompactNamespaces);
     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
     IO.mapOptional("ConstructorInitializerIndentWidth",
@@ -343,6 +365,8 @@ template <> struct MappingTraits {
     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
     IO.mapOptional("ObjCSpaceBeforeProtocolList",
                    Style.ObjCSpaceBeforeProtocolList);
+    IO.mapOptional("PenaltyBreakAssignment",
+                   Style.PenaltyBreakAssignment);
     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
                    Style.PenaltyBreakBeforeFirstCallParameter);
     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
@@ -355,6 +379,7 @@ template <> struct MappingTraits {
     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
     IO.mapOptional("ReflowComments", Style.ReflowComments);
     IO.mapOptional("SortIncludes", Style.SortIncludes);
+    IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations);
     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
     IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword);
     IO.mapOptional("SpaceBeforeAssignmentOperators",
@@ -389,6 +414,9 @@ template <> struct MappingTraits {
     IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
     IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
     IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
+    IO.mapOptional("SplitEmptyFunction", Wrapping.SplitEmptyFunction);
+    IO.mapOptional("SplitEmptyRecord", Wrapping.SplitEmptyRecord);
+    IO.mapOptional("SplitEmptyNamespace", Wrapping.SplitEmptyNamespace);
   }
 };
 
@@ -464,7 +492,8 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
     return Style;
   FormatStyle Expanded = Style;
   Expanded.BraceWrapping = {false, false, false, false, false, false,
-                            false, false, false, false, false};
+                            false, false, false, false, false, true,
+                            true,  true};
   switch (Style.BreakBeforeBraces) {
   case FormatStyle::BS_Linux:
     Expanded.BraceWrapping.AfterClass = true;
@@ -477,6 +506,8 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
     Expanded.BraceWrapping.AfterFunction = true;
     Expanded.BraceWrapping.AfterStruct = true;
     Expanded.BraceWrapping.AfterUnion = true;
+    Expanded.BraceWrapping.SplitEmptyFunction = false;
+    Expanded.BraceWrapping.SplitEmptyRecord = false;
     break;
   case FormatStyle::BS_Stroustrup:
     Expanded.BraceWrapping.AfterFunction = true;
@@ -496,7 +527,8 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
     break;
   case FormatStyle::BS_GNU:
     Expanded.BraceWrapping = {true, true, true, true, true, true,
-                              true, true, true, true, true};
+                              true, true, true, true, true, true,
+                              true, true};
     break;
   case FormatStyle::BS_WebKit:
     Expanded.BraceWrapping.AfterFunction = true;
@@ -527,19 +559,21 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
-  LLVMStyle.BinPackParameters = true;
   LLVMStyle.BinPackArguments = true;
+  LLVMStyle.BinPackParameters = true;
   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
   LLVMStyle.BreakBeforeTernaryOperators = true;
   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
   LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
-                             false, false, false, false, false};
+                             false, false, false, false, false, true,
+                             true,  true};
   LLVMStyle.BreakAfterJavaFieldAnnotations = false;
-  LLVMStyle.BreakConstructorInitializersBeforeComma = false;
+  LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon;
   LLVMStyle.BreakBeforeInheritanceComma = false;
   LLVMStyle.BreakStringLiterals = true;
   LLVMStyle.ColumnLimit = 80;
   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
+  LLVMStyle.CompactNamespaces = false;
   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
   LLVMStyle.ConstructorInitializerIndentWidth = 4;
   LLVMStyle.ContinuationIndentWidth = 4;
@@ -551,9 +585,9 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
   LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
-                                 {"^(<|\"(gtest|isl|json)/)", 3},
+                                 {"^(<|\"(gtest|gmock|isl|json)/)", 3},
                                  {".*", 1}};
-  LLVMStyle.IncludeIsMainRegex = "$";
+  LLVMStyle.IncludeIsMainRegex = "(Test)?$";
   LLVMStyle.IndentCaseLabels = false;
   LLVMStyle.IndentWrappedFunctionNames = false;
   LLVMStyle.IndentWidth = 2;
@@ -582,6 +616,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.SpaceBeforeAssignmentOperators = true;
   LLVMStyle.SpacesInAngles = false;
 
+  LLVMStyle.PenaltyBreakAssignment = prec::Assignment;
   LLVMStyle.PenaltyBreakComment = 300;
   LLVMStyle.PenaltyBreakFirstLessLess = 120;
   LLVMStyle.PenaltyBreakString = 1000;
@@ -591,11 +626,18 @@ FormatStyle getLLVMStyle() {
 
   LLVMStyle.DisableFormat = false;
   LLVMStyle.SortIncludes = true;
+  LLVMStyle.SortUsingDeclarations = true;
 
   return LLVMStyle;
 }
 
 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
+  if (Language == FormatStyle::LK_TextProto) {
+    FormatStyle GoogleStyle = getGoogleStyle(FormatStyle::LK_Proto);
+    GoogleStyle.Language = FormatStyle::LK_TextProto;
+    return GoogleStyle;
+  }
+
   FormatStyle GoogleStyle = getLLVMStyle();
   GoogleStyle.Language = Language;
 
@@ -691,7 +733,7 @@ FormatStyle getMozillaStyle() {
   MozillaStyle.BinPackParameters = false;
   MozillaStyle.BinPackArguments = false;
   MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
-  MozillaStyle.BreakConstructorInitializersBeforeComma = true;
+  MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
   MozillaStyle.BreakBeforeInheritanceComma = true;
   MozillaStyle.ConstructorInitializerIndentWidth = 2;
   MozillaStyle.ContinuationIndentWidth = 2;
@@ -714,7 +756,7 @@ FormatStyle getWebKitStyle() {
   Style.AlignTrailingComments = false;
   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
   Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
-  Style.BreakConstructorInitializersBeforeComma = true;
+  Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
   Style.Cpp11BracedListStyle = false;
   Style.ColumnLimit = 0;
   Style.FixNamespaceComments = false;
@@ -745,6 +787,7 @@ FormatStyle getNoStyle() {
   FormatStyle NoStyle = getLLVMStyle();
   NoStyle.DisableFormat = true;
   NoStyle.SortIncludes = false;
+  NoStyle.SortUsingDeclarations = false;
   return NoStyle;
 }
 
@@ -1378,7 +1421,7 @@ class IncludeCategoryManager {
       : Style(Style), FileName(FileName) {
     FileStem = llvm::sys::path::stem(FileName);
     for (const auto &Category : Style.IncludeCategories)
-      CategoryRegexs.emplace_back(Category.Regex);
+      CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase);
     IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
                  FileName.endswith(".cpp") || FileName.endswith(".c++") ||
                  FileName.endswith(".cxx") || FileName.endswith(".m") ||
@@ -1406,9 +1449,11 @@ class IncludeCategoryManager {
       return false;
     StringRef HeaderStem =
         llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
-    if (FileStem.startswith(HeaderStem)) {
+    if (FileStem.startswith(HeaderStem) ||
+        FileStem.startswith_lower(HeaderStem)) {
       llvm::Regex MainIncludeRegex(
-          (HeaderStem + Style.IncludeIsMainRegex).str());
+          (HeaderStem + Style.IncludeIsMainRegex).str(),
+          llvm::Regex::IgnoreCase);
       if (MainIncludeRegex.match(FileStem))
         return true;
     }
@@ -1851,43 +1896,61 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
     return tooling::Replacements();
   if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))
     return tooling::Replacements();
-  auto Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
-
-  auto reformatAfterApplying = [&] (TokenAnalyzer& Fixer) {
-    tooling::Replacements Fixes = Fixer.process();
-    if (!Fixes.empty()) {
-      auto NewCode = applyAllReplacements(Code, Fixes);
-      if (NewCode) {
-        auto NewEnv = Environment::CreateVirtualEnvironment(
-            *NewCode, FileName,
-            tooling::calculateRangesAfterReplacements(Fixes, Ranges));
-        Formatter Format(*NewEnv, Expanded, Status);
-        return Fixes.merge(Format.process());
-      }
-    }
-    Formatter Format(*Env, Expanded, Status);
-    return Format.process();
-  };
 
-  if (Style.Language == FormatStyle::LK_Cpp &&
-      Style.FixNamespaceComments) {
-    NamespaceEndCommentsFixer CommentsFixer(*Env, Expanded);
-    return reformatAfterApplying(CommentsFixer);
+  typedef std::function
+      AnalyzerPass;
+  SmallVector Passes;
+
+  if (Style.Language == FormatStyle::LK_Cpp) {
+    if (Style.FixNamespaceComments)
+      Passes.emplace_back([&](const Environment &Env) {
+        return NamespaceEndCommentsFixer(Env, Expanded).process();
+      });
+
+    if (Style.SortUsingDeclarations)
+      Passes.emplace_back([&](const Environment &Env) {
+        return UsingDeclarationsSorter(Env, Expanded).process();
+      });
   }
 
   if (Style.Language == FormatStyle::LK_JavaScript &&
-      Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) {
-    JavaScriptRequoter Requoter(*Env, Expanded);
-    return reformatAfterApplying(Requoter);
+      Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
+    Passes.emplace_back([&](const Environment &Env) {
+      return JavaScriptRequoter(Env, Expanded).process();
+    });
+
+  Passes.emplace_back([&](const Environment &Env) {
+    return Formatter(Env, Expanded, Status).process();
+  });
+
+  std::unique_ptr Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+  llvm::Optional CurrentCode = None;
+  tooling::Replacements Fixes;
+  for (size_t I = 0, E = Passes.size(); I < E; ++I) {
+    tooling::Replacements PassFixes = Passes[I](*Env);
+    auto NewCode = applyAllReplacements(
+        CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes);
+    if (NewCode) {
+      Fixes = Fixes.merge(PassFixes);
+      if (I + 1 < E) {
+        CurrentCode = std::move(*NewCode);
+        Env = Environment::CreateVirtualEnvironment(
+            *CurrentCode, FileName,
+            tooling::calculateRangesAfterReplacements(Fixes, Ranges));
+      }
+    }
   }
 
-  Formatter Format(*Env, Expanded, Status);
-  return Format.process();
+  return Fixes;
 }
 
 tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
                               ArrayRef Ranges,
                               StringRef FileName) {
+  // cleanups only apply to C++ (they mostly concern ctor commas etc.)
+  if (Style.Language != FormatStyle::LK_Cpp)
+    return tooling::Replacements();
   std::unique_ptr Env =
       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
   Cleaner Clean(*Env, Style);
@@ -1914,6 +1977,16 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
   return Fix.process();
 }
 
+tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
+                                            StringRef Code,
+                                            ArrayRef Ranges,
+                                            StringRef FileName) {
+  std::unique_ptr Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+  UsingDeclarationsSorter Sorter(*Env, Style);
+  return Sorter.process();
+}
+
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
   LangOptions LangOpts;
   LangOpts.CPlusPlus = 1;
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/FormatToken.h b/interpreter/llvm/src/tools/clang/lib/Format/FormatToken.h
index 0c5a5284627c7..a60361a8e5fa9 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/FormatToken.h
+++ b/interpreter/llvm/src/tools/clang/lib/Format/FormatToken.h
@@ -21,6 +21,7 @@
 #include "clang/Format/Format.h"
 #include "clang/Lex/Lexer.h"
 #include 
+#include 
 
 namespace clang {
 namespace format {
@@ -39,6 +40,7 @@ namespace format {
   TYPE(ConflictStart) \
   TYPE(CtorInitializerColon) \
   TYPE(CtorInitializerComma) \
+  TYPE(DesignatedInitializerLSquare) \
   TYPE(DesignatedInitializerPeriod) \
   TYPE(DictLiteral) \
   TYPE(ForEachMacro) \
@@ -464,7 +466,9 @@ struct FormatToken {
     return is(TT_ArrayInitializerLSquare) ||
            (is(tok::l_brace) &&
             (BlockKind == BK_Block || is(TT_DictLiteral) ||
-             (!Style.Cpp11BracedListStyle && NestingLevel == 0)));
+             (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
+           (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
+                              Style.Language == FormatStyle::LK_TextProto));
   }
 
   /// \brief Same as opensBlockOrBlockTypeList, but for the closing token.
@@ -474,6 +478,19 @@ struct FormatToken {
     return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
   }
 
+  /// \brief Return the actual namespace token, if this token starts a namespace
+  /// block.
+  const FormatToken *getNamespaceToken() const {
+    const FormatToken *NamespaceTok = this;
+    if (is(tok::comment))
+      NamespaceTok = NamespaceTok->getNextNonComment();
+    // Detect "(inline)? namespace" in the beginning of a line.
+    if (NamespaceTok && NamespaceTok->is(tok::kw_inline))
+      NamespaceTok = NamespaceTok->getNextNonComment();
+    return NamespaceTok && NamespaceTok->is(tok::kw_namespace) ? NamespaceTok
+                                                               : nullptr;
+  }
+
 private:
   // Disallow copying.
   FormatToken(const FormatToken &) = delete;
@@ -624,6 +641,7 @@ struct AdditionalKeywords {
     kw_is = &IdentTable.get("is");
     kw_let = &IdentTable.get("let");
     kw_module = &IdentTable.get("module");
+    kw_readonly = &IdentTable.get("readonly");
     kw_set = &IdentTable.get("set");
     kw_type = &IdentTable.get("type");
     kw_var = &IdentTable.get("var");
@@ -656,6 +674,15 @@ struct AdditionalKeywords {
     kw_qsignals = &IdentTable.get("Q_SIGNALS");
     kw_slots = &IdentTable.get("slots");
     kw_qslots = &IdentTable.get("Q_SLOTS");
+
+    // Keep this at the end of the constructor to make sure everything here is
+    // already initialized.
+    JsExtraKeywords = std::unordered_set(
+        {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
+         kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
+         kw_set, kw_type, kw_var, kw_yield,
+         // Keywords from the Java section.
+         kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
   }
 
   // Context sensitive keywords.
@@ -684,6 +711,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_is;
   IdentifierInfo *kw_let;
   IdentifierInfo *kw_module;
+  IdentifierInfo *kw_readonly;
   IdentifierInfo *kw_set;
   IdentifierInfo *kw_type;
   IdentifierInfo *kw_var;
@@ -717,6 +745,18 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_qsignals;
   IdentifierInfo *kw_slots;
   IdentifierInfo *kw_qslots;
+
+  /// \brief Returns \c true if \p Tok is a true JavaScript identifier, returns
+  /// \c false if it is a keyword or a pseudo keyword.
+  bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
+    return Tok.is(tok::identifier) &&
+           JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
+               JsExtraKeywords.end();
+  }
+
+private:
+  /// \brief The JavaScript keywords beyond the C++ keyword set.
+  std::unordered_set JsExtraKeywords;
 };
 
 } // namespace format
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/interpreter/llvm/src/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp
index 88cf123c18990..85b70b8c0a768 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp
@@ -107,6 +107,24 @@ void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
                  << llvm::toString(std::move(Err)) << "\n";
   }
 }
+
+const FormatToken *
+getNamespaceToken(const AnnotatedLine *line,
+                  const SmallVectorImpl &AnnotatedLines) {
+  if (!line->Affected || line->InPPDirective || !line->startsWith(tok::r_brace))
+    return nullptr;
+  size_t StartLineIndex = line->MatchingOpeningBlockLineIndex;
+  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
+    return nullptr;
+  assert(StartLineIndex < AnnotatedLines.size());
+  const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
+  // Detect "(inline)? namespace" in the beginning of a line.
+  if (NamespaceTok->is(tok::kw_inline))
+    NamespaceTok = NamespaceTok->getNextNonComment();
+  if (!NamespaceTok || NamespaceTok->isNot(tok::kw_namespace))
+    return nullptr;
+  return NamespaceTok;
+}
 } // namespace
 
 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
@@ -120,20 +138,14 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze(
   AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
                                         AnnotatedLines.end());
   tooling::Replacements Fixes;
+  std::string AllNamespaceNames = "";
+  size_t StartLineIndex = SIZE_MAX;
+  unsigned int CompactedNamespacesCount = 0;
   for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
-    if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective ||
-        !AnnotatedLines[I]->startsWith(tok::r_brace))
-      continue;
     const AnnotatedLine *EndLine = AnnotatedLines[I];
-    size_t StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
-    if (StartLineIndex == UnwrappedLine::kInvalidIndex)
-      continue;
-    assert(StartLineIndex < E);
-    const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
-    // Detect "(inline)? namespace" in the beginning of a line.
-    if (NamespaceTok->is(tok::kw_inline))
-      NamespaceTok = NamespaceTok->getNextNonComment();
-    if (!NamespaceTok || NamespaceTok->isNot(tok::kw_namespace))
+    const FormatToken *NamespaceTok =
+        getNamespaceToken(EndLine, AnnotatedLines);
+    if (!NamespaceTok)
       continue;
     FormatToken *RBraceTok = EndLine->First;
     if (RBraceTok->Finalized)
@@ -145,6 +157,27 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze(
     if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
       EndCommentPrevTok = RBraceTok->Next;
     }
+    if (StartLineIndex == SIZE_MAX)
+      StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
+    std::string NamespaceName = computeName(NamespaceTok);
+    if (Style.CompactNamespaces) {
+      if ((I + 1 < E) &&
+          getNamespaceToken(AnnotatedLines[I + 1], AnnotatedLines) &&
+          StartLineIndex - CompactedNamespacesCount - 1 ==
+              AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
+          !AnnotatedLines[I + 1]->First->Finalized) {
+        if (hasEndComment(EndCommentPrevTok)) {
+          // remove end comment, it will be merged in next one
+          updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
+        }
+        CompactedNamespacesCount++;
+        AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
+        continue;
+      }
+      NamespaceName += AllNamespaceNames;
+      CompactedNamespacesCount = 0;
+      AllNamespaceNames = std::string();
+    }
     // The next token in the token stream after the place where the end comment
     // token must be. This is either the next token on the current line or the
     // first token on the next line.
@@ -156,17 +189,16 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze(
     bool AddNewline = EndCommentNextTok &&
                       EndCommentNextTok->NewlinesBefore == 0 &&
                       EndCommentNextTok->isNot(tok::eof);
-    const std::string NamespaceName = computeName(NamespaceTok);
     const std::string EndCommentText =
         computeEndCommentText(NamespaceName, AddNewline);
     if (!hasEndComment(EndCommentPrevTok)) {
       bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1;
       if (!isShort)
         addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
-      continue;
-    }
-    if (!validEndComment(EndCommentPrevTok, NamespaceName))
+    } else if (!validEndComment(EndCommentPrevTok, NamespaceName)) {
       updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
+    }
+    StartLineIndex = SIZE_MAX;
   }
   return Fixes;
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/TokenAnnotator.cpp b/interpreter/llvm/src/tools/clang/lib/Format/TokenAnnotator.cpp
index d3b2cf4e84c24..46ea06b880ed2 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/TokenAnnotator.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/TokenAnnotator.cpp
@@ -89,7 +89,9 @@ class AnnotatingParser {
         continue;
       }
       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
-          (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
+          (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
+           Style.Language != FormatStyle::LK_Proto &&
+           Style.Language != FormatStyle::LK_TextProto))
         return false;
       // If a && or || is found and interpreted as a binary operator, this set
       // of angles is likely part of something like "a < b && c > d". If the
@@ -103,6 +105,14 @@ class AnnotatingParser {
           !Line.startsWith(tok::kw_template))
         return false;
       updateParameterCount(Left, CurrentToken);
+      if (Style.Language == FormatStyle::LK_Proto) {
+        if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
+          if (CurrentToken->is(tok::colon) ||
+              (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
+               Previous->isNot(tok::colon)))
+            Previous->Type = TT_SelectorName;
+        }
+      }
       if (!consumeToken())
         return false;
     }
@@ -135,13 +145,17 @@ class AnnotatingParser {
     if (Left->is(TT_OverloadedOperatorLParen)) {
       Contexts.back().IsExpression = false;
     } else if (Style.Language == FormatStyle::LK_JavaScript &&
-               Line.startsWith(Keywords.kw_type, tok::identifier)) {
+               (Line.startsWith(Keywords.kw_type, tok::identifier) ||
+                Line.startsWith(tok::kw_export, Keywords.kw_type,
+                                tok::identifier))) {
       // type X = (...);
+      // export type X = (...);
       Contexts.back().IsExpression = false;
     } else if (Left->Previous &&
         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
                                  tok::kw_if, tok::kw_while, tok::l_paren,
                                  tok::comma) ||
+         Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
          Left->Previous->is(TT_BinaryOperator))) {
       // static_assert, if and while usually contain expressions.
       Contexts.back().IsExpression = true;
@@ -336,6 +350,9 @@ class AnnotatingParser {
                  Contexts.back().ContextKind == tok::l_brace &&
                  Parent->isOneOf(tok::l_brace, tok::comma)) {
         Left->Type = TT_JsComputedPropertyName;
+      } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
+                 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
+        Left->Type = TT_DesignatedInitializerLSquare;
       } else if (CurrentToken->is(tok::r_square) && Parent &&
                  Parent->is(TT_TemplateCloser)) {
         Left->Type = TT_ArraySubscriptLSquare;
@@ -388,7 +405,8 @@ class AnnotatingParser {
       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
         return false;
       if (CurrentToken->is(tok::colon)) {
-        if (Left->is(TT_ArraySubscriptLSquare)) {
+        if (Left->isOneOf(TT_ArraySubscriptLSquare,
+                          TT_DesignatedInitializerLSquare)) {
           Left->Type = TT_ObjCMethodExpr;
           StartsObjCMethodExpr = true;
           Contexts.back().ColonIsObjCMethodExpr = true;
@@ -432,11 +450,12 @@ class AnnotatingParser {
         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
           return false;
         updateParameterCount(Left, CurrentToken);
-        if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
+        if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
           FormatToken *Previous = CurrentToken->getPreviousNonComment();
           if (((CurrentToken->is(tok::colon) &&
                 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
-               Style.Language == FormatStyle::LK_Proto) &&
+               Style.Language == FormatStyle::LK_Proto ||
+               Style.Language == FormatStyle::LK_TextProto) &&
               (Previous->Tok.getIdentifierInfo() ||
                Previous->is(tok::string_literal)))
             Previous->Type = TT_SelectorName;
@@ -519,8 +538,13 @@ class AnnotatingParser {
         }
       }
       if (Contexts.back().ColonIsDictLiteral ||
-          Style.Language == FormatStyle::LK_Proto) {
+          Style.Language == FormatStyle::LK_Proto ||
+          Style.Language == FormatStyle::LK_TextProto) {
         Tok->Type = TT_DictLiteral;
+        if (Style.Language == FormatStyle::LK_TextProto) {
+          if (FormatToken *Previous = Tok->getPreviousNonComment())
+            Previous->Type = TT_SelectorName;
+        }
       } else if (Contexts.back().ColonIsObjCMethodExpr ||
                  Line.startsWith(TT_ObjCMethodSpecifier)) {
         Tok->Type = TT_ObjCMethodExpr;
@@ -569,6 +593,8 @@ class AnnotatingParser {
       break;
     case tok::kw_if:
     case tok::kw_while:
+      if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->is(tok::kw_constexpr))
+        next();
       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
         next();
         if (!parseParens(/*LookForDecls=*/true))
@@ -576,9 +602,13 @@ class AnnotatingParser {
       }
       break;
     case tok::kw_for:
-      if (Style.Language == FormatStyle::LK_JavaScript && Tok->Previous &&
-          Tok->Previous->is(tok::period))
-        break;
+      if (Style.Language == FormatStyle::LK_JavaScript) {
+        if (Tok->Previous && Tok->Previous->is(tok::period))
+          break;
+        // JS' for await ( ...
+        if (CurrentToken && CurrentToken->is(Keywords.kw_await))
+          next();
+      }
       Contexts.back().ColonIsForRangeExpr = true;
       next();
       if (!parseParens())
@@ -612,12 +642,22 @@ class AnnotatingParser {
         return false;
       break;
     case tok::l_brace:
+      if (Style.Language == FormatStyle::LK_TextProto) {
+        FormatToken *Previous =Tok->getPreviousNonComment();
+        if (Previous && Previous->Type != TT_DictLiteral)
+          Previous->Type = TT_SelectorName;
+      }
       if (!parseBrace())
         return false;
       break;
     case tok::less:
       if (parseAngle()) {
         Tok->Type = TT_TemplateOpener;
+        if (Style.Language == FormatStyle::LK_TextProto) {
+          FormatToken *Previous = Tok->getPreviousNonComment();
+          if (Previous && Previous->Type != TT_DictLiteral)
+            Previous->Type = TT_SelectorName;
+        }
       } else {
         Tok->Type = TT_BinaryOperator;
         NonTemplateLess.insert(Tok);
@@ -700,9 +740,12 @@ class AnnotatingParser {
 
   void parseIncludeDirective() {
     if (CurrentToken && CurrentToken->is(tok::less)) {
-      next();
-      while (CurrentToken) {
-        if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
+     next();
+     while (CurrentToken) {
+        // Mark tokens up to the trailing line comments as implicit string
+        // literals.
+        if (CurrentToken->isNot(tok::comment) &&
+            !CurrentToken->TokenText.startswith("//"))
           CurrentToken->Type = TT_ImplicitStringLiteral;
         next();
       }
@@ -972,9 +1015,12 @@ class AnnotatingParser {
   void modifyContext(const FormatToken &Current) {
     if (Current.getPrecedence() == prec::Assignment &&
         !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
-        // Type aliases use `type X = ...;` in TypeScript.
+        // Type aliases use `type X = ...;` in TypeScript and can be exported
+        // using `export type ...`.
         !(Style.Language == FormatStyle::LK_JavaScript &&
-          Line.startsWith(Keywords.kw_type, tok::identifier)) &&
+          (Line.startsWith(Keywords.kw_type, tok::identifier) ||
+           Line.startsWith(tok::kw_export, Keywords.kw_type,
+                           tok::identifier))) &&
         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
       Contexts.back().IsExpression = true;
       if (!Line.startsWith(TT_UnaryOperator)) {
@@ -1034,9 +1080,9 @@ class AnnotatingParser {
     if (Style.Language == FormatStyle::LK_JavaScript) {
       if (Current.is(tok::exclaim)) {
         if (Current.Previous &&
-            (Current.Previous->Tok.getIdentifierInfo() ||
-             Current.Previous->isOneOf(tok::identifier, tok::r_paren,
-                                       tok::r_square, tok::r_brace) ||
+            (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
+                                       tok::r_paren, tok::r_square,
+                                       tok::r_brace) ||
              Current.Previous->Tok.isLiteral())) {
           Current.Type = TT_JsNonNullAssertion;
           return;
@@ -1337,7 +1383,8 @@ class AnnotatingParser {
 
     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
                            tok::comma, tok::semi, tok::kw_return, tok::colon,
-                           tok::equal, tok::kw_delete, tok::kw_sizeof) ||
+                           tok::equal, tok::kw_delete, tok::kw_sizeof,
+                           tok::kw_throw) ||
         PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
                            TT_UnaryOperator, TT_CastRParen))
       return TT_UnaryOperator;
@@ -1541,8 +1588,11 @@ class ExpressionParser {
       const FormatToken *NextNonComment = Current->getNextNonComment();
       if (Current->is(TT_ConditionalExpr))
         return prec::Conditional;
-      if (NextNonComment && NextNonComment->is(tok::colon) &&
-          NextNonComment->is(TT_DictLiteral))
+      if (NextNonComment && Current->is(TT_SelectorName) &&
+          (NextNonComment->is(TT_DictLiteral) ||
+           ((Style.Language == FormatStyle::LK_Proto ||
+             Style.Language == FormatStyle::LK_TextProto) &&
+            NextNonComment->is(tok::less))))
         return prec::Assignment;
       if (Current->is(TT_JsComputedPropertyName))
         return prec::Assignment;
@@ -1645,17 +1695,26 @@ void TokenAnnotator::setCommentLineLevels(
   for (SmallVectorImpl::reverse_iterator I = Lines.rbegin(),
                                                           E = Lines.rend();
        I != E; ++I) {
-    bool CommentLine = (*I)->First;
+    bool CommentLine = true;
     for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
       if (!Tok->is(tok::comment)) {
         CommentLine = false;
         break;
       }
     }
-    if (NextNonCommentLine && CommentLine)
-      (*I)->Level = NextNonCommentLine->Level;
-    else
+
+    if (NextNonCommentLine && CommentLine) {
+      // If the comment is currently aligned with the line immediately following
+      // it, that's probably intentional and we should keep it.
+      bool AlignedWithNextLine =
+          NextNonCommentLine->First->NewlinesBefore <= 1 &&
+          NextNonCommentLine->First->OriginalColumn ==
+              (*I)->First->OriginalColumn;
+      if (AlignedWithNextLine)
+        (*I)->Level = NextNonCommentLine->Level;
+    } else {
       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
+    }
 
     setCommentLineLevels((*I)->Children);
   }
@@ -1959,7 +2018,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
       return 35;
     if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
-                       TT_ArrayInitializerLSquare))
+                       TT_ArrayInitializerLSquare,
+                       TT_DesignatedInitializerLSquare))
       return 500;
   }
 
@@ -1989,7 +2049,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
   if (Left.is(tok::comment))
     return 1000;
 
-  if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon))
+  if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon))
     return 2;
 
   if (Right.isMemberAccess()) {
@@ -2047,7 +2107,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
       Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
     return 100;
   if (Left.is(tok::l_paren) && Left.Previous &&
-      Left.Previous->isOneOf(tok::kw_if, tok::kw_for))
+      (Left.Previous->isOneOf(tok::kw_if, tok::kw_for)
+       || Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
     return 1000;
   if (Left.is(tok::equal) && InFunctionDecl)
     return 110;
@@ -2086,9 +2147,10 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
   if (Left.is(TT_ConditionalExpr))
     return prec::Conditional;
   prec::Level Level = Left.getPrecedence();
-  if (Level != prec::Unknown)
-    return Level;
-  Level = Right.getPrecedence();
+  if (Level == prec::Unknown)
+    Level = Right.getPrecedence();
+  if (Level == prec::Assignment)
+    return Style.PenaltyBreakAssignment;
   if (Level != prec::Unknown)
     return Level;
 
@@ -2178,7 +2240,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
             (Style.SpacesInSquareBrackets &&
              Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
   if (Right.is(tok::l_square) &&
-      !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) &&
+      !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
+                     TT_DesignatedInitializerLSquare) &&
       !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
     return false;
   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
@@ -2197,6 +2260,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
             (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
                           tok::kw_switch, tok::kw_case, TT_ForEachMacro,
                           TT_ObjCForIn) ||
+             Left.endsSequence(tok::kw_constexpr, tok::kw_if) ||
              (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
                            tok::kw_new, tok::kw_delete) &&
               (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
@@ -2238,7 +2302,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   if (Style.isCpp()) {
     if (Left.is(tok::kw_operator))
       return Right.is(tok::coloncolon);
-  } else if (Style.Language == FormatStyle::LK_Proto) {
+  } else if (Style.Language == FormatStyle::LK_Proto ||
+             Style.Language == FormatStyle::LK_TextProto) {
     if (Right.is(tok::period) &&
         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
                      Keywords.kw_repeated, Keywords.kw_extend))
@@ -2246,9 +2311,15 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     if (Right.is(tok::l_paren) &&
         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
       return true;
+    if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
+      return true;
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
     if (Left.is(TT_JsFatArrow))
       return true;
+    // for await ( ...
+    if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) &&
+        Left.Previous && Left.Previous->is(tok::kw_for))
+      return true;
     if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
         Right.MatchingParen) {
       const FormatToken *Next = Right.MatchingParen->getNextNonComment();
@@ -2260,7 +2331,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
         (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
       return false;
-    if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
+    // In tagged template literals ("html`bar baz`"), there is no space between
+    // the tag identifier and the template string. getIdentifierInfo makes sure
+    // that the identifier is not a pseudo keyword like `yield`, either.
+    if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
+        Right.is(TT_TemplateString))
       return false;
     if (Right.is(tok::star) &&
         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
@@ -2373,8 +2448,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   if (Left.is(tok::greater) && Right.is(tok::greater))
     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
-  if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
-      Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar))
+  if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
+      Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
+      (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
     return false;
   if (!Style.SpaceBeforeAssignmentOperators &&
       Right.getPrecedence() == prec::Assignment)
@@ -2450,8 +2526,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
       return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
              Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
              (Left.NestingLevel == 0 && Line.Level == 0 &&
-              Style.AllowShortFunctionsOnASingleLine ==
-                  FormatStyle::SFS_Inline);
+              Style.AllowShortFunctionsOnASingleLine &
+                  FormatStyle::SFS_InlineOnly);
   } else if (Style.Language == FormatStyle::LK_Java) {
     if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
         Right.Next->is(tok::string_literal))
@@ -2463,20 +2539,27 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
       return true;
   }
 
-  // If the last token before a '}' is a comma or a trailing comment, the
-  // intention is to insert a line break after it in order to make shuffling
-  // around entries easier.
-  const FormatToken *BeforeClosingBrace = nullptr;
-  if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
-      Left.BlockKind != BK_Block && Left.MatchingParen)
-    BeforeClosingBrace = Left.MatchingParen->Previous;
-  else if (Right.MatchingParen &&
-           Right.MatchingParen->isOneOf(tok::l_brace,
-                                        TT_ArrayInitializerLSquare))
-    BeforeClosingBrace = &Left;
-  if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
-                             BeforeClosingBrace->isTrailingComment()))
-    return true;
+  // If the last token before a '}', ']', or ')' is a comma or a trailing
+  // comment, the intention is to insert a line break after it in order to make
+  // shuffling around entries easier. Import statements, especially in
+  // JavaScript, can be an exception to this rule.
+  if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
+    const FormatToken *BeforeClosingBrace = nullptr;
+    if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+         (Style.Language == FormatStyle::LK_JavaScript &&
+          Left.is(tok::l_paren))) &&
+        Left.BlockKind != BK_Block && Left.MatchingParen)
+      BeforeClosingBrace = Left.MatchingParen->Previous;
+    else if (Right.MatchingParen &&
+             (Right.MatchingParen->isOneOf(tok::l_brace,
+                                           TT_ArrayInitializerLSquare) ||
+              (Style.Language == FormatStyle::LK_JavaScript &&
+               Right.MatchingParen->is(tok::l_paren))))
+      BeforeClosingBrace = &Left;
+    if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
+                               BeforeClosingBrace->isTrailingComment()))
+      return true;
+  }
 
   if (Right.is(tok::comment))
     return Left.BlockKind != BK_BracedInit &&
@@ -2495,8 +2578,12 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
       Right.Previous->MatchingParen->NestingLevel == 0 &&
       Style.AlwaysBreakTemplateDeclarations)
     return true;
-  if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) &&
-      Style.BreakConstructorInitializersBeforeComma &&
+  if (Right.is(TT_CtorInitializerComma) &&
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
+      !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+    return true;
+  if (Right.is(TT_CtorInitializerColon) &&
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
       !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
     return true;
   // Break only if we have multiple inheritance.
@@ -2508,10 +2595,16 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     // deliberate choice and might have aligned the contents of the string
     // literal accordingly. Thus, we try keep existing line breaks.
     return Right.NewlinesBefore > 0;
-  if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
-      Style.Language == FormatStyle::LK_Proto)
-    // Don't put enums onto single lines in protocol buffers.
+  if ((Right.Previous->is(tok::l_brace) ||
+       (Right.Previous->is(tok::less) &&
+        Right.Previous->Previous &&
+        Right.Previous->Previous->is(tok::equal))
+        ) &&
+      Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
+    // Don't put enums or option definitions onto single lines in protocol
+    // buffers.
     return true;
+  }
   if (Right.is(TT_InlineASMBrace))
     return Right.HasUnescapedNewline;
   if (isAllmanBrace(Left) || isAllmanBrace(Right))
@@ -2546,12 +2639,13 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
     const FormatToken *NonComment = Right.getPreviousNonComment();
     if (NonComment &&
-        NonComment->isOneOf(
-            tok::kw_return, tok::kw_continue, tok::kw_break, tok::kw_throw,
-            Keywords.kw_interface, Keywords.kw_type, tok::kw_static,
-            tok::kw_public, tok::kw_private, tok::kw_protected,
-            Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set))
-      return false; // Otherwise a semicolon is inserted.
+        NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
+                            tok::kw_throw, Keywords.kw_interface,
+                            Keywords.kw_type, tok::kw_static, tok::kw_public,
+                            tok::kw_private, tok::kw_protected,
+                            Keywords.kw_readonly, Keywords.kw_abstract,
+                            Keywords.kw_get, Keywords.kw_set))
+      return false; // Otherwise automatic semicolon insertion would trigger.
     if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
       return false;
     if (Left.is(TT_JsTypeColon))
@@ -2606,7 +2700,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
     // The first comment in a braced lists is always interpreted as belonging to
     // the first list element. Otherwise, it should be placed outside of the
     // list.
-    return Left.BlockKind == BK_BracedInit;
+    return Left.BlockKind == BK_BracedInit ||
+           (Left.is(TT_CtorInitializerColon) &&
+            Style.BreakConstructorInitializers ==
+                FormatStyle::BCIS_AfterColon);
   if (Left.is(tok::question) && Right.is(tok::colon))
     return false;
   if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
@@ -2679,11 +2776,15 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
     return true;
 
+  if (Left.is(TT_CtorInitializerColon))
+    return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
+  if (Right.is(TT_CtorInitializerColon))
+    return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
   if (Left.is(TT_CtorInitializerComma) &&
-      Style.BreakConstructorInitializersBeforeComma)
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
     return false;
   if (Right.is(TT_CtorInitializerComma) &&
-      Style.BreakConstructorInitializersBeforeComma)
+      Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
     return true;
   if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
     return false;
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineFormatter.cpp b/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
index 8ff893426e255..2005a2822924f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -66,6 +66,13 @@ class LevelIndentTracker {
       Indent += Offset;
   }
 
+  /// \brief Update the indent state given that \p Line indent should be
+  /// skipped.
+  void skipLine(const AnnotatedLine &Line) {
+    while (IndentForLevel.size() <= Line.Level)
+      IndentForLevel.push_back(Indent);
+  }
+
   /// \brief Update the level indent to adapt to the given \p Line.
   ///
   /// When a line is not formatted, we move the subsequent lines on the same
@@ -127,12 +134,28 @@ class LevelIndentTracker {
   unsigned Indent = 0;
 };
 
+bool isNamespaceDeclaration(const AnnotatedLine *Line) {
+  const FormatToken *NamespaceTok = Line->First;
+  return NamespaceTok && NamespaceTok->getNamespaceToken();
+}
+
+bool isEndOfNamespace(const AnnotatedLine *Line,
+                      const SmallVectorImpl &AnnotatedLines) {
+  if (!Line->startsWith(tok::r_brace))
+    return false;
+  size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
+  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
+    return false;
+  assert(StartLineIndex < AnnotatedLines.size());
+  return isNamespaceDeclaration(AnnotatedLines[StartLineIndex]);
+}
+
 class LineJoiner {
 public:
   LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
              const SmallVectorImpl &Lines)
-      : Style(Style), Keywords(Keywords), End(Lines.end()),
-        Next(Lines.begin()) {}
+      : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()),
+        AnnotatedLines(Lines) {}
 
   /// \brief Returns the next line, merging multiple lines into one if possible.
   const AnnotatedLine *getNextMergedLine(bool DryRun,
@@ -142,7 +165,7 @@ class LineJoiner {
     const AnnotatedLine *Current = *Next;
     IndentTracker.nextLine(*Current);
     unsigned MergedLines =
-        tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End);
+        tryFitMultipleLinesInOne(IndentTracker, Next, End);
     if (MergedLines > 0 && Style.ColumnLimit == 0)
       // Disallow line merging if there is a break at the start of one of the
       // input lines.
@@ -159,9 +182,11 @@ class LineJoiner {
 private:
   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
   unsigned
-  tryFitMultipleLinesInOne(unsigned Indent,
+  tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker,
                            SmallVectorImpl::const_iterator I,
                            SmallVectorImpl::const_iterator E) {
+    const unsigned Indent = IndentTracker.getIndent();
+
     // Can't join the last line with anything.
     if (I + 1 == E)
       return 0;
@@ -186,15 +211,74 @@ class LineJoiner {
                 ? 0
                 : Limit - TheLine->Last->TotalLength;
 
+    if (TheLine->Last->is(TT_FunctionLBrace) &&
+        TheLine->First == TheLine->Last &&
+        !Style.BraceWrapping.SplitEmptyFunction &&
+        I[1]->First->is(tok::r_brace))
+      return tryMergeSimpleBlock(I, E, Limit);
+
+    // Handle empty record blocks where the brace has already been wrapped
+    if (TheLine->Last->is(tok::l_brace) && TheLine->First == TheLine->Last &&
+        I != AnnotatedLines.begin()) {
+      bool EmptyBlock = I[1]->First->is(tok::r_brace);
+
+      const FormatToken *Tok = I[-1]->First;
+      if (Tok && Tok->is(tok::comment))
+        Tok = Tok->getNextNonComment();
+
+      if (Tok && Tok->getNamespaceToken())
+        return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock
+            ? tryMergeSimpleBlock(I, E, Limit) : 0;
+
+      if (Tok && Tok->is(tok::kw_typedef))
+        Tok = Tok->getNextNonComment();
+      if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union,
+                              Keywords.kw_interface))
+        return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock
+            ? tryMergeSimpleBlock(I, E, Limit) : 0;
+    }
+
     // FIXME: TheLine->Level != 0 might or might not be the right check to do.
     // If necessary, change to something smarter.
     bool MergeShortFunctions =
         Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
         (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
          I[1]->First->is(tok::r_brace)) ||
-        (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
+        (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly &&
          TheLine->Level != 0);
 
+    if (Style.CompactNamespaces) {
+      if (isNamespaceDeclaration(TheLine)) {
+        int i = 0;
+        unsigned closingLine = TheLine->MatchingOpeningBlockLineIndex - 1;
+        for (; I + 1 + i != E && isNamespaceDeclaration(I[i + 1]) &&
+               closingLine == I[i + 1]->MatchingOpeningBlockLineIndex &&
+               I[i + 1]->Last->TotalLength < Limit;
+             i++, closingLine--) {
+          // No extra indent for compacted namespaces
+          IndentTracker.skipLine(*I[i + 1]);
+
+          Limit -= I[i + 1]->Last->TotalLength;
+        }
+        return i;
+      }
+
+      if (isEndOfNamespace(TheLine, AnnotatedLines)) {
+        int i = 0;
+        unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1;
+        for (; I + 1 + i != E && isEndOfNamespace(I[i + 1], AnnotatedLines) &&
+               openingLine == I[i + 1]->MatchingOpeningBlockLineIndex;
+             i++, openingLine--) {
+          // No space between consecutive braces
+          I[i + 1]->First->SpacesRequiredBefore = !I[i]->Last->is(tok::r_brace);
+
+          // Indent like the outer-most namespace
+          IndentTracker.nextLine(*I[i + 1]);
+        }
+        return i;
+      }
+    }
+
     if (TheLine->Last->is(TT_FunctionLBrace) &&
         TheLine->First != TheLine->Last) {
       return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
@@ -215,7 +299,10 @@ class LineJoiner {
       Limit -= 2;
 
       unsigned MergedLines = 0;
-      if (MergeShortFunctions) {
+      if (MergeShortFunctions ||
+          (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
+           I[1]->First == I[1]->Last && I + 2 != E &&
+           I[2]->First->is(tok::r_brace))) {
         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
         // If we managed to merge the block, count the function header, which is
         // on a separate line.
@@ -365,8 +452,11 @@ class LineJoiner {
     } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
                !startsExternCBlock(Line)) {
       // We don't merge short records.
-      if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
-                              Keywords.kw_interface))
+      FormatToken *RecordTok =
+          Line.First->is(tok::kw_typedef) ? Line.First->Next : Line.First;
+      if (RecordTok &&
+          RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
+                             Keywords.kw_interface))
         return 0;
 
       // Check that we still have three lines and they fit into the limit.
@@ -449,6 +539,7 @@ class LineJoiner {
   const SmallVectorImpl::const_iterator End;
 
   SmallVectorImpl::const_iterator Next;
+  const SmallVectorImpl &AnnotatedLines;
 };
 
 static void markFinalized(FormatToken *Tok) {
@@ -611,7 +702,8 @@ class NoLineBreakFormatter : public LineFormatter {
     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
     while (State.NextToken) {
       formatChildren(State, /*Newline=*/false, DryRun, Penalty);
-      Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
+      Indenter->addTokenToState(
+          State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun);
     }
     return Penalty;
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.cpp b/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.cpp
index c28565ddd7370..faac5a371c260 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.cpp
@@ -55,13 +55,33 @@ class ScopedDeclarationState {
   std::vector &Stack;
 };
 
+static bool isLineComment(const FormatToken &FormatTok) {
+  return FormatTok.is(tok::comment) &&
+         FormatTok.TokenText.startswith("//");
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// \p Previous. The original column of \p MinColumnToken is used to determine
+// whether \p FormatTok is indented enough to the right to continue \p Previous.
+static bool continuesLineComment(const FormatToken &FormatTok,
+                                 const FormatToken *Previous,
+                                 const FormatToken *MinColumnToken) {
+  if (!Previous || !MinColumnToken)
+    return false;
+  unsigned MinContinueColumn =
+      MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
+  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
+         isLineComment(*Previous) &&
+         FormatTok.OriginalColumn >= MinContinueColumn;
+}
+
 class ScopedMacroState : public FormatTokenSource {
 public:
   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
                    FormatToken *&ResetToken)
       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
-        Token(nullptr) {
+        Token(nullptr), PreviousToken(nullptr) {
     TokenSource = this;
     Line.Level = 0;
     Line.InPPDirective = true;
@@ -78,6 +98,7 @@ class ScopedMacroState : public FormatTokenSource {
     // The \c UnwrappedLineParser guards against this by never calling
     // \c getNextToken() after it has encountered the first eof token.
     assert(!eof());
+    PreviousToken = Token;
     Token = PreviousTokenSource->getNextToken();
     if (eof())
       return getFakeEOF();
@@ -87,12 +108,17 @@ class ScopedMacroState : public FormatTokenSource {
   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
 
   FormatToken *setPosition(unsigned Position) override {
+    PreviousToken = nullptr;
     Token = PreviousTokenSource->setPosition(Position);
     return Token;
   }
 
 private:
-  bool eof() { return Token && Token->HasUnescapedNewline; }
+  bool eof() {
+    return Token && Token->HasUnescapedNewline &&
+           !continuesLineComment(*Token, PreviousToken,
+                                 /*MinColumnToken=*/PreviousToken);
+  }
 
   FormatToken *getFakeEOF() {
     static bool EOFInitialized = false;
@@ -112,6 +138,7 @@ class ScopedMacroState : public FormatTokenSource {
   FormatTokenSource *PreviousTokenSource;
 
   FormatToken *Token;
+  FormatToken *PreviousToken;
 };
 
 } // end anonymous namespace
@@ -259,7 +286,10 @@ void UnwrappedLineParser::parseFile() {
       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
                                           MustBeDeclaration);
-  parseLevel(/*HasOpeningBrace=*/false);
+  if (Style.Language == FormatStyle::LK_TextProto)
+    parseBracedList();
+  else
+    parseLevel(/*HasOpeningBrace=*/false);
   // Make sure to format the remaining tokens.
   flushComments(true);
   addUnwrappedLine();
@@ -333,16 +363,21 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 
     switch (Tok->Tok.getKind()) {
     case tok::l_brace:
-      if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
-          PrevTok->is(tok::colon))
-        // A colon indicates this code is in a type, or a braced list following
-        // a label in an object literal ({a: {b: 1}}).
-        // The code below could be confused by semicolons between the individual
-        // members in a type member list, which would normally trigger BK_Block.
-        // In both cases, this must be parsed as an inline braced init.
-        Tok->BlockKind = BK_BracedInit;
-      else
+      if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
+        if (PrevTok->is(tok::colon))
+          // A colon indicates this code is in a type, or a braced list
+          // following a label in an object literal ({a: {b: 1}}). The code
+          // below could be confused by semicolons between the individual
+          // members in a type member list, which would normally trigger
+          // BK_Block. In both cases, this must be parsed as an inline braced
+          // init.
+          Tok->BlockKind = BK_BracedInit;
+        else if (PrevTok->is(tok::r_paren))
+          // `) { }` can only occur in function or method declarations in JS.
+          Tok->BlockKind = BK_Block;
+      } else {
         Tok->BlockKind = BK_Unknown;
+      }
       LBraceStack.push_back(Tok);
       break;
     case tok::r_brace:
@@ -364,6 +399,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
           // BlockKind later if we parse a braced list (where all blocks
           // inside are by default braced lists), or when we explicitly detect
           // blocks (for example while parsing lambdas).
+          // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
+          // braced list in JS.
           ProbablyBracedList =
               (Style.Language == FormatStyle::LK_JavaScript &&
                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
@@ -429,8 +466,9 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
     parseParens();
 
   addUnwrappedLine();
-  size_t OpeningLineIndex =
-      Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
+  size_t OpeningLineIndex = CurrentLines->empty()
+                                ? (UnwrappedLine::kInvalidIndex)
+                                : (CurrentLines->size() - 1);
 
   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
                                           MustBeDeclaration);
@@ -457,6 +495,11 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
     nextToken();
   Line->Level = InitialLevel;
   Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
+  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
+    // Update the opening line to add the forward reference as well
+    (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
+            CurrentLines->size() - 1;
+  }
 }
 
 static bool isGoogScope(const UnwrappedLine &Line) {
@@ -704,7 +747,7 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
               Keywords.kw_instanceof, Keywords.kw_interface,
-              Keywords.kw_throws));
+              Keywords.kw_throws, Keywords.kw_from));
 }
 
 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
@@ -792,6 +835,7 @@ void UnwrappedLineParser::parseStructuralElement() {
   case tok::at:
     nextToken();
     if (FormatTok->Tok.is(tok::l_brace)) {
+      nextToken();
       parseBracedList();
       break;
     }
@@ -956,8 +1000,10 @@ void UnwrappedLineParser::parseStructuralElement() {
     switch (FormatTok->Tok.getKind()) {
     case tok::at:
       nextToken();
-      if (FormatTok->Tok.is(tok::l_brace))
+      if (FormatTok->Tok.is(tok::l_brace)) {
+        nextToken();
         parseBracedList();
+      }
       break;
     case tok::kw_enum:
       // Ignore if this is part of "template Tok.is(tok::l_brace)) {
+        nextToken();
         parseBracedList();
+      } else if (Style.Language == FormatStyle::LK_Proto &&
+               FormatTok->Tok.is(tok::less)) {
+        nextToken();
+        parseBracedList(/*ContinueOnSemicolons=*/false,
+                        /*ClosingBraceKind=*/tok::greater);
       }
       break;
     case tok::l_square:
@@ -1302,13 +1354,14 @@ bool UnwrappedLineParser::tryToParseBracedList() {
   assert(FormatTok->BlockKind != BK_Unknown);
   if (FormatTok->BlockKind == BK_Block)
     return false;
+  nextToken();
   parseBracedList();
   return true;
 }
 
-bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
+bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
+                                          tok::TokenKind ClosingBraceKind) {
   bool HasError = false;
-  nextToken();
 
   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
   // replace this by using parseAssigmentExpression() inside.
@@ -1335,6 +1388,10 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
         parseChildBlock();
       }
     }
+    if (FormatTok->Tok.getKind() == ClosingBraceKind) {
+      nextToken();
+      return !HasError;
+    }
     switch (FormatTok->Tok.getKind()) {
     case tok::caret:
       nextToken();
@@ -1359,11 +1416,9 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
       // Assume there are no blocks inside a braced init list apart
       // from the ones we explicitly parse out (like lambdas).
       FormatTok->BlockKind = BK_BracedInit;
+      nextToken();
       parseBracedList();
       break;
-    case tok::r_brace:
-      nextToken();
-      return !HasError;
     case tok::semi:
       // JavaScript (or more precisely TypeScript) can have semicolons in braced
       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
@@ -1414,8 +1469,10 @@ void UnwrappedLineParser::parseParens() {
       break;
     case tok::at:
       nextToken();
-      if (FormatTok->Tok.is(tok::l_brace))
+      if (FormatTok->Tok.is(tok::l_brace)) {
+        nextToken();
         parseBracedList();
+      }
       break;
     case tok::kw_class:
       if (Style.Language == FormatStyle::LK_JavaScript)
@@ -1463,8 +1520,10 @@ void UnwrappedLineParser::parseSquare() {
     }
     case tok::at:
       nextToken();
-      if (FormatTok->Tok.is(tok::l_brace))
+      if (FormatTok->Tok.is(tok::l_brace)) {
+        nextToken();
         parseBracedList();
+      }
       break;
     default:
       nextToken();
@@ -1476,6 +1535,8 @@ void UnwrappedLineParser::parseSquare() {
 void UnwrappedLineParser::parseIfThenElse() {
   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
   nextToken();
+  if (FormatTok->Tok.is(tok::kw_constexpr))
+    nextToken();
   if (FormatTok->Tok.is(tok::l_paren))
     parseParens();
   bool NeedsUnwrappedLine = false;
@@ -1635,6 +1696,10 @@ void UnwrappedLineParser::parseForOrWhileLoop() {
   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
          "'for', 'while' or foreach macro expected");
   nextToken();
+  // JS' for await ( ...
+  if (Style.Language == FormatStyle::LK_JavaScript &&
+      FormatTok->is(Keywords.kw_await))
+    nextToken();
   if (FormatTok->Tok.is(tok::l_paren))
     parseParens();
   if (FormatTok->Tok.is(tok::l_brace)) {
@@ -1785,6 +1850,7 @@ bool UnwrappedLineParser::parseEnum() {
   }
 
   // Parse enum body.
+  nextToken();
   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
   if (HasError) {
     if (FormatTok->is(tok::semi))
@@ -1819,6 +1885,7 @@ void UnwrappedLineParser::parseJavaEnumBody() {
   FormatTok = Tokens->setPosition(StoredPosition);
 
   if (IsSimple) {
+    nextToken();
     parseBracedList();
     addUnwrappedLine();
     return;
@@ -2030,6 +2097,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
     }
     if (FormatTok->is(tok::l_brace)) {
       FormatTok->BlockKind = BK_Block;
+      nextToken();
       parseBracedList();
     } else {
       nextToken();
@@ -2087,16 +2155,11 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
          FormatTok.NewlinesBefore > 0;
 }
 
-static bool isLineComment(const FormatToken &FormatTok) {
-  return FormatTok.is(tok::comment) &&
-         FormatTok.TokenText.startswith("//");
-}
-
 // Checks if \p FormatTok is a line comment that continues the line comment
 // section on \p Line.
-static bool continuesLineComment(const FormatToken &FormatTok,
-                                 const UnwrappedLine &Line,
-                                 llvm::Regex &CommentPragmasRegex) {
+static bool continuesLineCommentSection(const FormatToken &FormatTok,
+                                        const UnwrappedLine &Line,
+                                        llvm::Regex &CommentPragmasRegex) {
   if (Line.Tokens.empty())
     return false;
 
@@ -2195,12 +2258,8 @@ static bool continuesLineComment(const FormatToken &FormatTok,
     MinColumnToken = PreviousToken;
   }
 
-  unsigned MinContinueColumn =
-      MinColumnToken->OriginalColumn +
-      (isLineComment(*MinColumnToken) ? 0 : 1);
-  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
-         isLineComment(*(Line.Tokens.back().Tok)) &&
-         FormatTok.OriginalColumn >= MinContinueColumn;
+  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
+                              MinColumnToken);
 }
 
 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
@@ -2218,7 +2277,7 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
     // FIXME: Consider putting separate line comment sections as children to the
     // unwrapped line instead.
     (*I)->ContinuesLineCommentSection =
-        continuesLineComment(**I, *Line, CommentPragmasRegex);
+        continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
       addUnwrappedLine();
     pushToken(*I);
@@ -2251,7 +2310,7 @@ void UnwrappedLineParser::distributeComments(
     const SmallVectorImpl &Comments,
     const FormatToken *NextTok) {
   // Whether or not a line comment token continues a line is controlled by
-  // the method continuesLineComment, with the following caveat:
+  // the method continuesLineCommentSection, with the following caveat:
   //
   // Define a trail of Comments to be a nonempty proper postfix of Comments such
   // that each comment line from the trail is aligned with the next token, if
@@ -2289,7 +2348,7 @@ void UnwrappedLineParser::distributeComments(
       FormatTok->ContinuesLineCommentSection = false;
     } else {
       FormatTok->ContinuesLineCommentSection =
-          continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
+          continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
     }
     if (!FormatTok->ContinuesLineCommentSection &&
         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.h b/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.h
index 15d1d9cda7a28..a2aa2f006728e 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.h
+++ b/interpreter/llvm/src/tools/clang/lib/Format/UnwrappedLineParser.h
@@ -93,7 +93,8 @@ class UnwrappedLineParser {
   void readTokenWithJavaScriptASI();
   void parseStructuralElement();
   bool tryToParseBracedList();
-  bool parseBracedList(bool ContinueOnSemicolons = false);
+  bool parseBracedList(bool ContinueOnSemicolons = false,
+                       tok::TokenKind ClosingBraceKind = tok::r_brace);
   void parseParens();
   void parseSquare();
   void parseIfThenElse();
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/UsingDeclarationsSorter.cpp b/interpreter/llvm/src/tools/clang/lib/Format/UsingDeclarationsSorter.cpp
new file mode 100644
index 0000000000000..fb4f59fbc9bcf
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Format/UsingDeclarationsSorter.cpp
@@ -0,0 +1,144 @@
+//===--- UsingDeclarationsSorter.cpp ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements UsingDeclarationsSorter, a TokenAnalyzer that
+/// sorts consecutive using declarations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "UsingDeclarationsSorter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Regex.h"
+
+#include 
+
+#define DEBUG_TYPE "using-declarations-sorter"
+
+namespace clang {
+namespace format {
+
+namespace {
+
+struct UsingDeclaration {
+  const AnnotatedLine *Line;
+  std::string Label;
+
+  UsingDeclaration(const AnnotatedLine *Line, const std::string &Label)
+      : Line(Line), Label(Label) {}
+
+  bool operator<(const UsingDeclaration &Other) const {
+    return Label < Other.Label;
+  }
+};
+
+/// Computes the label of a using declaration starting at tthe using token
+/// \p UsingTok.
+/// If \p UsingTok doesn't begin a using declaration, returns the empty string.
+/// Note that this detects specifically using declarations, as in:
+/// using A::B::C;
+/// and not type aliases, as in:
+/// using A = B::C;
+/// Type aliases are in general not safe to permute.
+std::string computeUsingDeclarationLabel(const FormatToken *UsingTok) {
+  assert(UsingTok && UsingTok->is(tok::kw_using) && "Expecting a using token");
+  std::string Label;
+  const FormatToken *Tok = UsingTok->Next;
+  if (Tok && Tok->is(tok::kw_typename)) {
+    Label.append("typename ");
+    Tok = Tok->Next;
+  }
+  if (Tok && Tok->is(tok::coloncolon)) {
+    Label.append("::");
+    Tok = Tok->Next;
+  }
+  bool HasIdentifier = false;
+  while (Tok && Tok->is(tok::identifier)) {
+    HasIdentifier = true;
+    Label.append(Tok->TokenText.str());
+    Tok = Tok->Next;
+    if (!Tok || Tok->isNot(tok::coloncolon))
+      break;
+    Label.append("::");
+    Tok = Tok->Next;
+  }
+  if (HasIdentifier && Tok && Tok->isOneOf(tok::semi, tok::comma))
+    return Label;
+  return "";
+}
+
+void endUsingDeclarationBlock(
+    SmallVectorImpl *UsingDeclarations,
+    const SourceManager &SourceMgr, tooling::Replacements *Fixes) {
+  SmallVector SortedUsingDeclarations(
+      UsingDeclarations->begin(), UsingDeclarations->end());
+  std::sort(SortedUsingDeclarations.begin(), SortedUsingDeclarations.end());
+  for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) {
+    if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line)
+      continue;
+    auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation();
+    auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc();
+    auto SortedBegin =
+        SortedUsingDeclarations[I].Line->First->Tok.getLocation();
+    auto SortedEnd = SortedUsingDeclarations[I].Line->Last->Tok.getEndLoc();
+    StringRef Text(SourceMgr.getCharacterData(SortedBegin),
+                   SourceMgr.getCharacterData(SortedEnd) -
+                       SourceMgr.getCharacterData(SortedBegin));
+    DEBUG({
+      StringRef OldText(SourceMgr.getCharacterData(Begin),
+                        SourceMgr.getCharacterData(End) -
+                            SourceMgr.getCharacterData(Begin));
+      llvm::dbgs() << "Replacing '" << OldText << "' with '" << Text << "'\n";
+    });
+    auto Range = CharSourceRange::getCharRange(Begin, End);
+    auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, Text));
+    if (Err) {
+      llvm::errs() << "Error while sorting using declarations: "
+                   << llvm::toString(std::move(Err)) << "\n";
+    }
+  }
+  UsingDeclarations->clear();
+}
+
+} // namespace
+
+UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env,
+                                                 const FormatStyle &Style)
+    : TokenAnalyzer(Env, Style) {}
+
+tooling::Replacements UsingDeclarationsSorter::analyze(
+    TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines,
+    FormatTokenLexer &Tokens) {
+  const SourceManager &SourceMgr = Env.getSourceManager();
+  AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+                                        AnnotatedLines.end());
+  tooling::Replacements Fixes;
+  SmallVector UsingDeclarations;
+  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
+    if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective ||
+        !AnnotatedLines[I]->startsWith(tok::kw_using) ||
+        AnnotatedLines[I]->First->Finalized) {
+      endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+      continue;
+    }
+    if (AnnotatedLines[I]->First->NewlinesBefore > 1)
+      endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+    std::string Label = computeUsingDeclarationLabel(AnnotatedLines[I]->First);
+    if (Label.empty()) {
+      endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+      continue;
+    }
+    UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label));
+  }
+  endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+  return Fixes;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/UsingDeclarationsSorter.h b/interpreter/llvm/src/tools/clang/lib/Format/UsingDeclarationsSorter.h
new file mode 100644
index 0000000000000..f7d5f97e3a2af
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Format/UsingDeclarationsSorter.h
@@ -0,0 +1,37 @@
+//===--- UsingDeclarationsSorter.h ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares UsingDeclarationsSorter, a TokenAnalyzer that
+/// sorts consecutive using declarations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H
+#define LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+class UsingDeclarationsSorter : public TokenAnalyzer {
+public:
+  UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style);
+
+  tooling::Replacements
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl &AnnotatedLines,
+          FormatTokenLexer &Tokens) override;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.cpp b/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.cpp
index 3b6311d15487f..377ec3a681b63 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.cpp
@@ -100,18 +100,56 @@ void WhitespaceManager::calculateLineBreakInformation() {
   Changes[0].PreviousEndOfTokenColumn = 0;
   Change *LastOutsideTokenChange = &Changes[0];
   for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
-    unsigned OriginalWhitespaceStart =
-        SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
-    unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
-        Changes[i - 1].OriginalWhitespaceRange.getEnd());
-    Changes[i - 1].TokenLength = OriginalWhitespaceStart -
-                                 PreviousOriginalWhitespaceEnd +
-                                 Changes[i].PreviousLinePostfix.size() +
-                                 Changes[i - 1].CurrentLinePrefix.size();
+    SourceLocation OriginalWhitespaceStart =
+        Changes[i].OriginalWhitespaceRange.getBegin();
+    SourceLocation PreviousOriginalWhitespaceEnd =
+        Changes[i - 1].OriginalWhitespaceRange.getEnd();
+    unsigned OriginalWhitespaceStartOffset =
+        SourceMgr.getFileOffset(OriginalWhitespaceStart);
+    unsigned PreviousOriginalWhitespaceEndOffset =
+        SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd);
+    assert(PreviousOriginalWhitespaceEndOffset <=
+           OriginalWhitespaceStartOffset);
+    const char *const PreviousOriginalWhitespaceEndData =
+        SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd);
+    StringRef Text(PreviousOriginalWhitespaceEndData,
+                   SourceMgr.getCharacterData(OriginalWhitespaceStart) -
+                       PreviousOriginalWhitespaceEndData);
+    // Usually consecutive changes would occur in consecutive tokens. This is
+    // not the case however when analyzing some preprocessor runs of the
+    // annotated lines. For example, in this code:
+    //
+    // #if A // line 1
+    // int i = 1;
+    // #else B // line 2
+    // int i = 2;
+    // #endif // line 3
+    //
+    // one of the runs will produce the sequence of lines marked with line 1, 2
+    // and 3. So the two consecutive whitespace changes just before '// line 2'
+    // and before '#endif // line 3' span multiple lines and tokens:
+    //
+    // #else B{change X}[// line 2
+    // int i = 2;
+    // ]{change Y}#endif // line 3
+    //
+    // For this reason, if the text between consecutive changes spans multiple
+    // newlines, the token length must be adjusted to the end of the original
+    // line of the token.
+    auto NewlinePos = Text.find_first_of('\n');
+    if (NewlinePos == StringRef::npos) {
+      Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset -
+                                   PreviousOriginalWhitespaceEndOffset +
+                                   Changes[i].PreviousLinePostfix.size() +
+                                   Changes[i - 1].CurrentLinePrefix.size();
+    } else {
+      Changes[i - 1].TokenLength =
+          NewlinePos + Changes[i - 1].CurrentLinePrefix.size();
+    }
 
     // If there are multiple changes in this token, sum up all the changes until
     // the end of the line.
-    if (Changes[i - 1].IsInsideToken)
+    if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0)
       LastOutsideTokenChange->TokenLength +=
           Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
     else
@@ -208,12 +246,12 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
 
   for (unsigned i = Start; i != End; ++i) {
     if (ScopeStack.size() != 0 &&
-        Changes[i].nestingAndIndentLevel() <
-            Changes[ScopeStack.back()].nestingAndIndentLevel())
+        Changes[i].indentAndNestingLevel() <
+            Changes[ScopeStack.back()].indentAndNestingLevel())
       ScopeStack.pop_back();
 
-    if (i != Start && Changes[i].nestingAndIndentLevel() >
-                          Changes[i - 1].nestingAndIndentLevel())
+    if (i != Start && Changes[i].indentAndNestingLevel() >
+                          Changes[i - 1].indentAndNestingLevel())
       ScopeStack.push_back(i);
 
     bool InsideNestedScope = ScopeStack.size() != 0;
@@ -289,8 +327,8 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
 
   // Measure the scope level (i.e. depth of (), [], {}) of the first token, and
   // abort when we hit any token in a higher scope than the starting one.
-  auto NestingAndIndentLevel = StartAt < Changes.size()
-                                   ? Changes[StartAt].nestingAndIndentLevel()
+  auto IndentAndNestingLevel = StartAt < Changes.size()
+                                   ? Changes[StartAt].indentAndNestingLevel()
                                    : std::pair(0, 0);
 
   // Keep track of the number of commas before the matching tokens, we will only
@@ -321,7 +359,7 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
 
   unsigned i = StartAt;
   for (unsigned e = Changes.size(); i != e; ++i) {
-    if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel)
+    if (Changes[i].indentAndNestingLevel() < IndentAndNestingLevel)
       break;
 
     if (Changes[i].NewlinesBefore != 0) {
@@ -337,7 +375,7 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
 
     if (Changes[i].Tok->is(tok::comma)) {
       ++CommasBeforeMatch;
-    } else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) {
+    } else if (Changes[i].indentAndNestingLevel() > IndentAndNestingLevel) {
       // Call AlignTokens recursively, skipping over this scope block.
       unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i);
       i = StoppedAt - 1;
@@ -434,7 +472,14 @@ void WhitespaceManager::alignTrailingComments() {
       continue;
 
     unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
-    unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
+    unsigned ChangeMaxColumn;
+
+    if (Style.ColumnLimit == 0)
+      ChangeMaxColumn = UINT_MAX;
+    else if (Style.ColumnLimit >= Changes[i].TokenLength)
+      ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
+    else
+      ChangeMaxColumn = ChangeMinColumn;
 
     // If we don't create a replacement for this change, we have to consider
     // it to be immovable.
diff --git a/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.h b/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.h
index 6be4af2622766..4e78ab43abaf7 100644
--- a/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.h
+++ b/interpreter/llvm/src/tools/clang/lib/Format/WhitespaceManager.h
@@ -43,6 +43,10 @@ class WhitespaceManager {
 
   /// \brief Replaces the whitespace in front of \p Tok. Only call once for
   /// each \c AnnotatedToken.
+  ///
+  /// \p StartOfTokenColumn is the column at which the token will start after
+  /// this replacement. It is needed for determining how \p Spaces is turned
+  /// into tabs and spaces for some format styles.
   void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
                          unsigned StartOfTokenColumn,
                          bool InPPDirective = false);
@@ -150,12 +154,11 @@ class WhitespaceManager {
     const Change *StartOfBlockComment;
     int IndentationOffset;
 
-    // A combination of nesting level and indent level, which are used in
+    // A combination of indent level and nesting level, which are used in
     // tandem to compute lexical scope, for the purposes of deciding
     // when to stop consecutive alignment runs.
-    std::pair
-    nestingAndIndentLevel() const {
-      return std::make_pair(Tok->NestingLevel, Tok->IndentLevel);
+    std::pair indentAndNestingLevel() const {
+      return std::make_pair(Tok->IndentLevel, Tok->NestingLevel);
     }
   };
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/ASTConsumers.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/ASTConsumers.cpp
index 720baa5e0f7a4..7dc475e26f769 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/ASTConsumers.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/ASTConsumers.cpp
@@ -142,7 +142,7 @@ std::unique_ptr clang::CreateASTDumper(StringRef FilterString,
                                                     bool DumpDecls,
                                                     bool Deserialize,
                                                     bool DumpLookups) {
-  assert((DumpDecls || DumpLookups) && "nothing to dump");
+  assert((DumpDecls || Deserialize || DumpLookups) && "nothing to dump");
   return llvm::make_unique(nullptr,
                                        Deserialize ? ASTPrinter::DumpFull :
                                        DumpDecls ? ASTPrinter::Dump :
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/ASTMerge.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/ASTMerge.cpp
index 51064da270cc9..354527db7badb 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/ASTMerge.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/ASTMerge.cpp
@@ -21,14 +21,13 @@ ASTMergeAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   return AdaptedAction->CreateASTConsumer(CI, InFile);
 }
 
-bool ASTMergeAction::BeginSourceFileAction(CompilerInstance &CI,
-                                           StringRef Filename) {
+bool ASTMergeAction::BeginSourceFileAction(CompilerInstance &CI) {
   // FIXME: This is a hack. We need a better way to communicate the
   // AST file, compiler instance, and file name than member variables
   // of FrontendAction.
   AdaptedAction->setCurrentInput(getCurrentInput(), takeCurrentASTUnit());
   AdaptedAction->setCompilerInstance(&CI);
-  return AdaptedAction->BeginSourceFileAction(CI, Filename);
+  return AdaptedAction->BeginSourceFileAction(CI);
 }
 
 void ASTMergeAction::ExecuteAction() {
@@ -45,9 +44,9 @@ void ASTMergeAction::ExecuteAction() {
                                     new ForwardingDiagnosticConsumer(
                                           *CI.getDiagnostics().getClient()),
                                     /*ShouldOwnClient=*/true));
-    std::unique_ptr Unit =
-        ASTUnit::LoadFromASTFile(ASTFiles[I], CI.getPCHContainerReader(),
-                                 Diags, CI.getFileSystemOpts(), false);
+    std::unique_ptr Unit = ASTUnit::LoadFromASTFile(
+        ASTFiles[I], CI.getPCHContainerReader(), ASTUnit::LoadEverything, Diags,
+        CI.getFileSystemOpts(), false);
 
     if (!Unit)
       continue;
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/ASTUnit.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/ASTUnit.cpp
index 32ee9d3e99611..1094e6d089a65 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/ASTUnit.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/ASTUnit.cpp
@@ -79,106 +79,83 @@ namespace {
       }
     }
   };
-  
-  struct OnDiskData {
-    /// \brief The file in which the precompiled preamble is stored.
-    std::string PreambleFile;
-
-    /// \brief Temporary files that should be removed when the ASTUnit is
-    /// destroyed.
-    SmallVector TemporaryFiles;
-
-    /// \brief Erase temporary files.
-    void CleanTemporaryFiles();
-
-    /// \brief Erase the preamble file.
-    void CleanPreambleFile();
-
-    /// \brief Erase temporary files and the preamble file.
-    void Cleanup();
-  };
-}
-
-static llvm::sys::SmartMutex &getOnDiskMutex() {
-  static llvm::sys::SmartMutex M(/* recursive = */ true);
-  return M;
-}
-
-static void cleanupOnDiskMapAtExit();
 
-typedef llvm::DenseMap> OnDiskDataMap;
-static OnDiskDataMap &getOnDiskDataMap() {
-  static OnDiskDataMap M;
-  static bool hasRegisteredAtExit = false;
-  if (!hasRegisteredAtExit) {
-    hasRegisteredAtExit = true;
-    atexit(cleanupOnDiskMapAtExit);
+  template 
+  std::unique_ptr valueOrNull(llvm::ErrorOr> Val) {
+    if (!Val)
+      return nullptr;
+    return std::move(*Val);
   }
-  return M;
-}
 
-static void cleanupOnDiskMapAtExit() {
-  // Use the mutex because there can be an alive thread destroying an ASTUnit.
-  llvm::MutexGuard Guard(getOnDiskMutex());
-  for (const auto &I : getOnDiskDataMap()) {
-    // We don't worry about freeing the memory associated with OnDiskDataMap.
-    // All we care about is erasing stale files.
-    I.second->Cleanup();
+  template 
+  bool moveOnNoError(llvm::ErrorOr Val, T &Output) {
+    if (!Val)
+      return false;
+    Output = std::move(*Val);
+    return true;
   }
-}
 
-static OnDiskData &getOnDiskData(const ASTUnit *AU) {
-  // We require the mutex since we are modifying the structure of the
-  // DenseMap.
-  llvm::MutexGuard Guard(getOnDiskMutex());
-  OnDiskDataMap &M = getOnDiskDataMap();
-  auto &D = M[AU];
-  if (!D)
-    D = llvm::make_unique();
-  return *D;
-}
-
-static void erasePreambleFile(const ASTUnit *AU) {
-  getOnDiskData(AU).CleanPreambleFile();
-}
+/// \brief Get a source buffer for \p MainFilePath, handling all file-to-file
+/// and file-to-buffer remappings inside \p Invocation.
+static std::unique_ptr
+getBufferForFileHandlingRemapping(const CompilerInvocation &Invocation,
+                                  vfs::FileSystem *VFS,
+                                  StringRef FilePath) {
+  const auto &PreprocessorOpts = Invocation.getPreprocessorOpts();
 
-static void removeOnDiskEntry(const ASTUnit *AU) {
-  // We require the mutex since we are modifying the structure of the
-  // DenseMap.
-  llvm::MutexGuard Guard(getOnDiskMutex());
-  OnDiskDataMap &M = getOnDiskDataMap();
-  OnDiskDataMap::iterator I = M.find(AU);
-  if (I != M.end()) {
-    I->second->Cleanup();
-    M.erase(I);
-  }
-}
-
-static void setPreambleFile(const ASTUnit *AU, StringRef preambleFile) {
-  getOnDiskData(AU).PreambleFile = preambleFile;
-}
+  // Try to determine if the main file has been remapped, either from the
+  // command line (to another file) or directly through the compiler
+  // invocation (to a memory buffer).
+  llvm::MemoryBuffer *Buffer = nullptr;
+  std::unique_ptr BufferOwner;
+  auto FileStatus = VFS->status(FilePath);
+  if (FileStatus) {
+    llvm::sys::fs::UniqueID MainFileID = FileStatus->getUniqueID();
 
-static const std::string &getPreambleFile(const ASTUnit *AU) {
-  return getOnDiskData(AU).PreambleFile;  
-}
+    // Check whether there is a file-file remapping of the main file
+    for (const auto &RF : PreprocessorOpts.RemappedFiles) {
+      std::string MPath(RF.first);
+      auto MPathStatus = VFS->status(MPath);
+      if (MPathStatus) {
+        llvm::sys::fs::UniqueID MID = MPathStatus->getUniqueID();
+        if (MainFileID == MID) {
+          // We found a remapping. Try to load the resulting, remapped source.
+          BufferOwner = valueOrNull(VFS->getBufferForFile(RF.second));
+          if (!BufferOwner)
+            return nullptr;
+        }
+      }
+    }
 
-void OnDiskData::CleanTemporaryFiles() {
-  for (StringRef File : TemporaryFiles)
-    llvm::sys::fs::remove(File);
-  TemporaryFiles.clear();
-}
+    // Check whether there is a file-buffer remapping. It supercedes the
+    // file-file remapping.
+    for (const auto &RB : PreprocessorOpts.RemappedFileBuffers) {
+      std::string MPath(RB.first);
+      auto MPathStatus = VFS->status(MPath);
+      if (MPathStatus) {
+        llvm::sys::fs::UniqueID MID = MPathStatus->getUniqueID();
+        if (MainFileID == MID) {
+          // We found a remapping.
+          BufferOwner.reset();
+          Buffer = const_cast(RB.second);
+        }
+      }
+    }
+  }
 
-void OnDiskData::CleanPreambleFile() {
-  if (!PreambleFile.empty()) {
-    llvm::sys::fs::remove(PreambleFile);
-    PreambleFile.clear();
+  // If the main source file was not remapped, load it now.
+  if (!Buffer && !BufferOwner) {
+    BufferOwner = valueOrNull(VFS->getBufferForFile(FilePath));
+    if (!BufferOwner)
+      return nullptr;
   }
-}
 
-void OnDiskData::Cleanup() {
-  CleanTemporaryFiles();
-  CleanPreambleFile();
+  if (BufferOwner)
+    return BufferOwner;
+  if (!Buffer)
+    return nullptr;
+  return llvm::MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(), FilePath);
+}
 }
 
 struct ASTUnit::ASTWriterData {
@@ -194,14 +171,6 @@ void ASTUnit::clearFileLevelDecls() {
   llvm::DeleteContainerSeconds(FileDecls);
 }
 
-void ASTUnit::CleanTemporaryFiles() {
-  getOnDiskData(this).CleanTemporaryFiles();
-}
-
-void ASTUnit::addTemporaryFile(StringRef TempFile) {
-  getOnDiskData(this).TemporaryFiles.push_back(TempFile);
-}
-
 /// \brief After failing to build a precompiled preamble (due to
 /// errors in the source that occurs in the preamble), the number of
 /// reparses during which we'll skip even trying to precompile the
@@ -240,9 +209,6 @@ ASTUnit::~ASTUnit() {
 
   clearFileLevelDecls();
 
-  // Clean up the temporary files and the preamble file.
-  removeOnDiskEntry(this);
-
   // Free the buffers associated with remapped files. We are required to
   // perform this operation here because we explicitly request that the
   // compiler instance *not* free these buffers for each invocation of the
@@ -492,7 +458,9 @@ namespace {
 /// a Preprocessor.
 class ASTInfoCollector : public ASTReaderListener {
   Preprocessor &PP;
-  ASTContext &Context;
+  ASTContext *Context;
+  HeaderSearchOptions &HSOpts;
+  PreprocessorOptions &PPOpts;
   LangOptions &LangOpt;
   std::shared_ptr &TargetOpts;
   IntrusiveRefCntPtr &Target;
@@ -500,11 +468,14 @@ class ASTInfoCollector : public ASTReaderListener {
 
   bool InitializedLanguage;
 public:
-  ASTInfoCollector(Preprocessor &PP, ASTContext &Context, LangOptions &LangOpt,
+  ASTInfoCollector(Preprocessor &PP, ASTContext *Context,
+                   HeaderSearchOptions &HSOpts, PreprocessorOptions &PPOpts,
+                   LangOptions &LangOpt,
                    std::shared_ptr &TargetOpts,
                    IntrusiveRefCntPtr &Target, unsigned &Counter)
-      : PP(PP), Context(Context), LangOpt(LangOpt), TargetOpts(TargetOpts),
-        Target(Target), Counter(Counter), InitializedLanguage(false) {}
+      : PP(PP), Context(Context), HSOpts(HSOpts), PPOpts(PPOpts),
+        LangOpt(LangOpt), TargetOpts(TargetOpts), Target(Target),
+        Counter(Counter), InitializedLanguage(false) {}
 
   bool ReadLanguageOptions(const LangOptions &LangOpts, bool Complain,
                            bool AllowCompatibleDifferences) override {
@@ -518,6 +489,20 @@ class ASTInfoCollector : public ASTReaderListener {
     return false;
   }
 
+  virtual bool ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
+                                       StringRef SpecificModuleCachePath,
+                                       bool Complain) override {
+    this->HSOpts = HSOpts;
+    return false;
+  }
+
+  virtual bool
+  ReadPreprocessorOptions(const PreprocessorOptions &PPOpts, bool Complain,
+                          std::string &SuggestedPredefines) override {
+    this->PPOpts = PPOpts;
+    return false;
+  }
+
   bool ReadTargetOptions(const TargetOptions &TargetOpts, bool Complain,
                          bool AllowCompatibleDifferences) override {
     // If we've already initialized the target, don't do it again.
@@ -551,28 +536,39 @@ class ASTInfoCollector : public ASTReaderListener {
     // Initialize the preprocessor.
     PP.Initialize(*Target);
 
+    if (!Context)
+      return;
+
     // Initialize the ASTContext
-    Context.InitBuiltinTypes(*Target);
+    Context->InitBuiltinTypes(*Target);
 
     // We didn't have access to the comment options when the ASTContext was
     // constructed, so register them now.
-    Context.getCommentCommandTraits().registerCommentOptions(
+    Context->getCommentCommandTraits().registerCommentOptions(
         LangOpt.CommentOpts);
   }
 };
 
   /// \brief Diagnostic consumer that saves each diagnostic it is given.
 class StoredDiagnosticConsumer : public DiagnosticConsumer {
-  SmallVectorImpl &StoredDiags;
+  SmallVectorImpl *StoredDiags;
+  SmallVectorImpl *StandaloneDiags;
+  const LangOptions *LangOpts;
   SourceManager *SourceMgr;
 
 public:
-  explicit StoredDiagnosticConsumer(
-                          SmallVectorImpl &StoredDiags)
-    : StoredDiags(StoredDiags), SourceMgr(nullptr) {}
+  StoredDiagnosticConsumer(
+      SmallVectorImpl *StoredDiags,
+      SmallVectorImpl *StandaloneDiags)
+      : StoredDiags(StoredDiags), StandaloneDiags(StandaloneDiags),
+        LangOpts(nullptr), SourceMgr(nullptr) {
+    assert((StoredDiags || StandaloneDiags) &&
+           "No output collections were passed to StoredDiagnosticConsumer.");
+  }
 
   void BeginSourceFile(const LangOptions &LangOpts,
                        const Preprocessor *PP = nullptr) override {
+    this->LangOpts = &LangOpts;
     if (PP)
       SourceMgr = &PP->getSourceManager();
   }
@@ -591,8 +587,9 @@ class CaptureDroppedDiagnostics {
 
 public:
   CaptureDroppedDiagnostics(bool RequestCapture, DiagnosticsEngine &Diags,
-                          SmallVectorImpl &StoredDiags)
-    : Diags(Diags), Client(StoredDiags), PreviousClient(nullptr)
+                            SmallVectorImpl *StoredDiags,
+                            SmallVectorImpl *StandaloneDiags)
+      : Diags(Diags), Client(StoredDiags, StandaloneDiags), PreviousClient(nullptr)
   {
     if (RequestCapture || Diags.getClient() == nullptr) {
       OwningPreviousClient = Diags.takeClient();
@@ -609,16 +606,35 @@ class CaptureDroppedDiagnostics {
 
 } // anonymous namespace
 
+static ASTUnit::StandaloneDiagnostic
+makeStandaloneDiagnostic(const LangOptions &LangOpts,
+                         const StoredDiagnostic &InDiag);
+
 void StoredDiagnosticConsumer::HandleDiagnostic(DiagnosticsEngine::Level Level,
-                                              const Diagnostic &Info) {
+                                                const Diagnostic &Info) {
   // Default implementation (Warnings/errors count).
   DiagnosticConsumer::HandleDiagnostic(Level, Info);
 
   // Only record the diagnostic if it's part of the source manager we know
   // about. This effectively drops diagnostics from modules we're building.
   // FIXME: In the long run, ee don't want to drop source managers from modules.
-  if (!Info.hasSourceManager() || &Info.getSourceManager() == SourceMgr)
-    StoredDiags.emplace_back(Level, Info);
+  if (!Info.hasSourceManager() || &Info.getSourceManager() == SourceMgr) {
+    StoredDiagnostic *ResultDiag = nullptr;
+    if (StoredDiags) {
+      StoredDiags->emplace_back(Level, Info);
+      ResultDiag = &StoredDiags->back();
+    }
+
+    if (StandaloneDiags) {
+      llvm::Optional StoredDiag = llvm::None;
+      if (!ResultDiag) {
+        StoredDiag.emplace(Level, Info);
+        ResultDiag = StoredDiag.getPointer();
+      }
+      StandaloneDiags->push_back(
+          makeStandaloneDiagnostic(*LangOpts, *ResultDiag));
+    }
+  }
 }
 
 IntrusiveRefCntPtr ASTUnit::getASTReader() const {
@@ -653,12 +669,12 @@ void ASTUnit::ConfigureDiags(IntrusiveRefCntPtr Diags,
                              ASTUnit &AST, bool CaptureDiagnostics) {
   assert(Diags.get() && "no DiagnosticsEngine was provided");
   if (CaptureDiagnostics)
-    Diags->setClient(new StoredDiagnosticConsumer(AST.StoredDiagnostics));
+    Diags->setClient(new StoredDiagnosticConsumer(&AST.StoredDiagnostics, nullptr));
 }
 
 std::unique_ptr ASTUnit::LoadFromASTFile(
     const std::string &Filename, const PCHContainerReader &PCHContainerRdr,
-    IntrusiveRefCntPtr Diags,
+    WhatToLoad ToLoad, IntrusiveRefCntPtr Diags,
     const FileSystemOptions &FileSystemOpts, bool UseDebugInfo,
     bool OnlyLocalDecls, ArrayRef RemappedFiles,
     bool CaptureDiagnostics, bool AllowPCHWithCompilerErrors,
@@ -674,6 +690,7 @@ std::unique_ptr ASTUnit::LoadFromASTFile(
 
   ConfigureDiags(Diags, *AST, CaptureDiagnostics);
 
+  AST->LangOpts = std::make_shared();
   AST->OnlyLocalDecls = OnlyLocalDecls;
   AST->CaptureDiagnostics = CaptureDiagnostics;
   AST->Diagnostics = Diags;
@@ -689,13 +706,12 @@ std::unique_ptr ASTUnit::LoadFromASTFile(
   AST->HeaderInfo.reset(new HeaderSearch(AST->HSOpts,
                                          AST->getSourceManager(),
                                          AST->getDiagnostics(),
-                                         AST->ASTFileLangOpts,
+                                         AST->getLangOpts(),
                                          /*Target=*/nullptr));
-
-  auto PPOpts = std::make_shared();
+  AST->PPOpts = std::make_shared();
 
   for (const auto &RemappedFile : RemappedFiles)
-    PPOpts->addRemappedFile(RemappedFile.first, RemappedFile.second);
+    AST->PPOpts->addRemappedFile(RemappedFile.first, RemappedFile.second);
 
   // Gather Info for preprocessor construction later on.
 
@@ -703,35 +719,36 @@ std::unique_ptr ASTUnit::LoadFromASTFile(
   unsigned Counter;
 
   AST->PP = std::make_shared(
-      std::move(PPOpts), AST->getDiagnostics(), AST->ASTFileLangOpts,
-      AST->getSourceManager(), *AST->PCMCache, HeaderInfo, *AST,
+      AST->PPOpts, AST->getDiagnostics(), *AST->LangOpts,
+      AST->getSourceManager(), *AST->PCMCache, HeaderInfo, AST->ModuleLoader,
       /*IILookup=*/nullptr,
       /*OwnsHeaderSearch=*/false);
   Preprocessor &PP = *AST->PP;
 
-  AST->Ctx = new ASTContext(AST->ASTFileLangOpts, AST->getSourceManager(),
-                            PP.getIdentifierTable(), PP.getSelectorTable(),
-                            PP.getBuiltinInfo());
-  ASTContext &Context = *AST->Ctx;
+  if (ToLoad >= LoadASTOnly)
+    AST->Ctx = new ASTContext(*AST->LangOpts, AST->getSourceManager(),
+                              PP.getIdentifierTable(), PP.getSelectorTable(),
+                              PP.getBuiltinInfo());
 
   bool disableValid = false;
   if (::getenv("LIBCLANG_DISABLE_PCH_VALIDATION"))
     disableValid = true;
-  AST->Reader = new ASTReader(PP, Context, PCHContainerRdr, { },
+  AST->Reader = new ASTReader(PP, AST->Ctx.get(), PCHContainerRdr, { },
                               /*isysroot=*/"",
                               /*DisableValidation=*/disableValid,
                               AllowPCHWithCompilerErrors);
 
   AST->Reader->setListener(llvm::make_unique(
-      *AST->PP, Context, AST->ASTFileLangOpts, AST->TargetOpts, AST->Target,
-      Counter));
+      *AST->PP, AST->Ctx.get(), *AST->HSOpts, *AST->PPOpts, *AST->LangOpts,
+      AST->TargetOpts, AST->Target, Counter));
 
   // Attach the AST reader to the AST context as an external AST
   // source, so that declarations will be deserialized from the
   // AST file as needed.
   // We need the external source to be set up before we read the AST, because
   // eagerly-deserialized declarations may use it.
-  Context.setExternalSource(AST->Reader);
+  if (AST->Ctx)
+    AST->Ctx->setExternalSource(AST->Reader);
 
   switch (AST->Reader->ReadAST(Filename, serialization::MK_MainFile,
                           SourceLocation(), ASTReader::ARR_None)) {
@@ -753,21 +770,29 @@ std::unique_ptr ASTUnit::LoadFromASTFile(
   PP.setCounterValue(Counter);
 
   // Create an AST consumer, even though it isn't used.
-  AST->Consumer.reset(new ASTConsumer);
-  
+  if (ToLoad >= LoadASTOnly)
+    AST->Consumer.reset(new ASTConsumer);
+
   // Create a semantic analysis object and tell the AST reader about it.
-  AST->TheSema.reset(new Sema(PP, Context, *AST->Consumer));
-  AST->TheSema->Initialize();
-  AST->Reader->InitializeSema(*AST->TheSema);
+  if (ToLoad >= LoadEverything) {
+    AST->TheSema.reset(new Sema(PP, *AST->Ctx, *AST->Consumer));
+    AST->TheSema->Initialize();
+    AST->Reader->InitializeSema(*AST->TheSema);
+  }
 
   // Tell the diagnostic client that we have started a source file.
-  AST->getDiagnostics().getClient()->BeginSourceFile(Context.getLangOpts(),&PP);
+  AST->getDiagnostics().getClient()->BeginSourceFile(PP.getLangOpts(), &PP);
 
   return AST;
 }
 
 namespace {
 
+/// \brief Add the given macro to the hash of all top-level entities.
+void AddDefinedMacroToHash(const Token &MacroNameTok, unsigned &Hash) {
+  Hash = llvm::HashString(MacroNameTok.getIdentifierInfo()->getName(), Hash);
+}
+
 /// \brief Preprocessor callback class that updates a hash value with the names 
 /// of all macros that have been defined by the translation unit.
 class MacroDefinitionTrackerPPCallbacks : public PPCallbacks {
@@ -778,7 +803,7 @@ class MacroDefinitionTrackerPPCallbacks : public PPCallbacks {
 
   void MacroDefined(const Token &MacroNameTok,
                     const MacroDirective *MD) override {
-    Hash = llvm::HashString(MacroNameTok.getIdentifierInfo()->getName(), Hash);
+    AddDefinedMacroToHash(MacroNameTok, Hash);
   }
 };
 
@@ -904,45 +929,27 @@ class TopLevelDeclTrackerAction : public ASTFrontendAction {
   }
 };
 
-class PrecompilePreambleAction : public ASTFrontendAction {
-  ASTUnit &Unit;
-  bool HasEmittedPreamblePCH;
-
+class ASTUnitPreambleCallbacks : public PreambleCallbacks {
 public:
-  explicit PrecompilePreambleAction(ASTUnit &Unit)
-      : Unit(Unit), HasEmittedPreamblePCH(false) {}
+  unsigned getHash() const { return Hash; }
 
-  std::unique_ptr CreateASTConsumer(CompilerInstance &CI,
-                                                 StringRef InFile) override;
-  bool hasEmittedPreamblePCH() const { return HasEmittedPreamblePCH; }
-  void setHasEmittedPreamblePCH() { HasEmittedPreamblePCH = true; }
-  bool shouldEraseOutputFiles() override { return !hasEmittedPreamblePCH(); }
+  std::vector takeTopLevelDecls() { return std::move(TopLevelDecls); }
 
-  bool hasCodeCompletionSupport() const override { return false; }
-  bool hasASTFileSupport() const override { return false; }
-  TranslationUnitKind getTranslationUnitKind() override { return TU_Prefix; }
-};
-
-class PrecompilePreambleConsumer : public PCHGenerator {
-  ASTUnit &Unit;
-  unsigned &Hash;
-  std::vector TopLevelDecls;
-  PrecompilePreambleAction *Action;
-  std::unique_ptr Out;
+  std::vector takeTopLevelDeclIDs() {
+    return std::move(TopLevelDeclIDs);
+  }
 
-public:
-  PrecompilePreambleConsumer(ASTUnit &Unit, PrecompilePreambleAction *Action,
-                             const Preprocessor &PP, StringRef isysroot,
-                             std::unique_ptr Out)
-      : PCHGenerator(PP, "", isysroot, std::make_shared(),
-                     ArrayRef>(),
-                     /*AllowASTWithErrors=*/true),
-        Unit(Unit), Hash(Unit.getCurrentTopLevelHashValue()), Action(Action),
-        Out(std::move(Out)) {
-    Hash = 0;
+  void AfterPCHEmitted(ASTWriter &Writer) override {
+    TopLevelDeclIDs.reserve(TopLevelDecls.size());
+    for (Decl *D : TopLevelDecls) {
+      // Invalid top-level decls may not have been serialized.
+      if (D->isInvalidDecl())
+        continue;
+      TopLevelDeclIDs.push_back(Writer.getDeclID(D));
+    }
   }
 
-  bool HandleTopLevelDecl(DeclGroupRef DG) override {
+  void HandleTopLevelDecl(DeclGroupRef DG) override {
     for (Decl *D : DG) {
       // FIXME: Currently ObjC method declarations are incorrectly being
       // reported as top-level declarations, even though their DeclContext
@@ -953,59 +960,22 @@ class PrecompilePreambleConsumer : public PCHGenerator {
       AddTopLevelDeclarationToHash(D, Hash);
       TopLevelDecls.push_back(D);
     }
-    return true;
   }
 
-  void HandleTranslationUnit(ASTContext &Ctx) override {
-    PCHGenerator::HandleTranslationUnit(Ctx);
-    if (hasEmittedPCH()) {
-      // Write the generated bitstream to "Out".
-      *Out << getPCH();
-      // Make sure it hits disk now.
-      Out->flush();
-      // Free the buffer.
-      llvm::SmallVector Empty;
-      getPCH() = std::move(Empty);
-
-      // Translate the top-level declarations we captured during
-      // parsing into declaration IDs in the precompiled
-      // preamble. This will allow us to deserialize those top-level
-      // declarations when requested.
-      for (Decl *D : TopLevelDecls) {
-        // Invalid top-level decls may not have been serialized.
-        if (D->isInvalidDecl())
-          continue;
-        Unit.addTopLevelDeclFromPreamble(getWriter().getDeclID(D));
-      }
-
-      Action->setHasEmittedPreamblePCH();
-    }
+  void HandleMacroDefined(const Token &MacroNameTok,
+                          const MacroDirective *MD) override {
+    AddDefinedMacroToHash(MacroNameTok, Hash);
   }
+
+private:
+  unsigned Hash = 0;
+  std::vector TopLevelDecls;
+  std::vector TopLevelDeclIDs;
+  llvm::SmallVector PreambleDiags;
 };
 
 } // anonymous namespace
 
-std::unique_ptr
-PrecompilePreambleAction::CreateASTConsumer(CompilerInstance &CI,
-                                            StringRef InFile) {
-  std::string Sysroot;
-  std::string OutputFile;
-  std::unique_ptr OS =
-      GeneratePCHAction::ComputeASTConsumerArguments(CI, InFile, Sysroot,
-                                                     OutputFile);
-  if (!OS)
-    return nullptr;
-
-  if (!CI.getFrontendOpts().RelocatablePCH)
-    Sysroot.clear();
-
-  CI.getPreprocessor().addPPCallbacks(
-      llvm::make_unique(
-                                           Unit.getCurrentTopLevelHashValue()));
-  return llvm::make_unique(
-      Unit, this, CI.getPreprocessor(), Sysroot, std::move(OS));
-}
-
 static bool isNonDriverDiag(const StoredDiagnostic &StoredDiag) {
   return StoredDiag.getLocation().isValid();
 }
@@ -1041,15 +1011,20 @@ static void checkAndSanitizeDiags(SmallVectorImpl &
 /// \returns True if a failure occurred that causes the ASTUnit not to
 /// contain any translation-unit information, false otherwise.
 bool ASTUnit::Parse(std::shared_ptr PCHContainerOps,
-                    std::unique_ptr OverrideMainBuffer) {
-  SavedMainFileBuffer.reset();
-
+                    std::unique_ptr OverrideMainBuffer,
+                    IntrusiveRefCntPtr VFS) {
   if (!Invocation)
     return true;
 
   // Create the compiler instance to use for building the AST.
   std::unique_ptr Clang(
       new CompilerInstance(std::move(PCHContainerOps)));
+  if (FileMgr && VFS) {
+    assert(VFS == FileMgr->getVirtualFileSystem() &&
+           "VFS passed to Parse and VFS in FileMgr are different");
+  } else if (VFS) {
+    Clang->setVirtualFileSystem(VFS);
+  }
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar
@@ -1090,18 +1065,11 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps,
     Clang->createFileManager();
     FileMgr = &Clang->getFileManager();
   }
-  SourceMgr = new SourceManager(getDiagnostics(), *FileMgr,
-                                UserFilesAreVolatile);
-  TheSema.reset();
-  Ctx = nullptr;
-  PP = nullptr;
-  Reader = nullptr;
 
-  // Clear out old caches and data.
-  TopLevelDecls.clear();
-  clearFileLevelDecls();
-  CleanTemporaryFiles();
+  ResetForParse();
 
+  SourceMgr = new SourceManager(getDiagnostics(), *FileMgr,
+                                UserFilesAreVolatile);
   if (!OverrideMainBuffer) {
     checkAndRemoveNonDriverDiags(StoredDiagnostics);
     TopLevelDeclsInPreamble.clear();
@@ -1115,15 +1083,9 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps,
   
   // If the main file has been overridden due to the use of a preamble,
   // make that override happen and introduce the preamble.
-  PreprocessorOptions &PreprocessorOpts = Clang->getPreprocessorOpts();
   if (OverrideMainBuffer) {
-    PreprocessorOpts.addRemappedFile(OriginalSourceFile,
-                                     OverrideMainBuffer.get());
-    PreprocessorOpts.PrecompiledPreambleBytes.first = Preamble.size();
-    PreprocessorOpts.PrecompiledPreambleBytes.second
-                                                    = PreambleEndsAtStartOfLine;
-    PreprocessorOpts.ImplicitPCHInclude = getPreambleFile(this);
-    PreprocessorOpts.DisablePCHValidation = true;
+    assert(Preamble && "No preamble was built, but OverrideMainBuffer is not null");
+    Preamble->AddImplicitPreamble(Clang->getInvocation(), OverrideMainBuffer.get());
     
     // The stored diagnostic has the old source manager in it; update
     // the locations to refer into the new source manager. Since we've
@@ -1149,6 +1111,8 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps,
   if (SavedMainFileBuffer)
     TranslateStoredDiagnostics(getFileManager(), getSourceManager(),
                                PreambleDiagnostics, StoredDiagnostics);
+  else
+    PreambleSrcLocCache.clear();
 
   if (!Act->Execute())
     goto error;
@@ -1174,111 +1138,6 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps,
   return true;
 }
 
-/// \brief Simple function to retrieve a path for a preamble precompiled header.
-static std::string GetPreamblePCHPath() {
-  // FIXME: This is a hack so that we can override the preamble file during
-  // crash-recovery testing, which is the only case where the preamble files
-  // are not necessarily cleaned up.
-  const char *TmpFile = ::getenv("CINDEXTEST_PREAMBLE_FILE");
-  if (TmpFile)
-    return TmpFile;
-
-  SmallString<128> Path;
-  llvm::sys::fs::createTemporaryFile("preamble", "pch", Path);
-
-  return Path.str();
-}
-
-/// \brief Compute the preamble for the main file, providing the source buffer
-/// that corresponds to the main file along with a pair (bytes, start-of-line)
-/// that describes the preamble.
-ASTUnit::ComputedPreamble
-ASTUnit::ComputePreamble(CompilerInvocation &Invocation, unsigned MaxLines) {
-  FrontendOptions &FrontendOpts = Invocation.getFrontendOpts();
-  PreprocessorOptions &PreprocessorOpts = Invocation.getPreprocessorOpts();
-  
-  // Try to determine if the main file has been remapped, either from the 
-  // command line (to another file) or directly through the compiler invocation
-  // (to a memory buffer).
-  llvm::MemoryBuffer *Buffer = nullptr;
-  std::unique_ptr BufferOwner;
-  std::string MainFilePath(FrontendOpts.Inputs[0].getFile());
-  llvm::sys::fs::UniqueID MainFileID;
-  if (!llvm::sys::fs::getUniqueID(MainFilePath, MainFileID)) {
-    // Check whether there is a file-file remapping of the main file
-    for (const auto &RF : PreprocessorOpts.RemappedFiles) {
-      std::string MPath(RF.first);
-      llvm::sys::fs::UniqueID MID;
-      if (!llvm::sys::fs::getUniqueID(MPath, MID)) {
-        if (MainFileID == MID) {
-          // We found a remapping. Try to load the resulting, remapped source.
-          BufferOwner = getBufferForFile(RF.second);
-          if (!BufferOwner)
-            return ComputedPreamble(nullptr, nullptr, 0, true);
-        }
-      }
-    }
-    
-    // Check whether there is a file-buffer remapping. It supercedes the
-    // file-file remapping.
-    for (const auto &RB : PreprocessorOpts.RemappedFileBuffers) {
-      std::string MPath(RB.first);
-      llvm::sys::fs::UniqueID MID;
-      if (!llvm::sys::fs::getUniqueID(MPath, MID)) {
-        if (MainFileID == MID) {
-          // We found a remapping.
-          BufferOwner.reset();
-          Buffer = const_cast(RB.second);
-        }
-      }
-    }
-  }
-  
-  // If the main source file was not remapped, load it now.
-  if (!Buffer && !BufferOwner) {
-    BufferOwner = getBufferForFile(FrontendOpts.Inputs[0].getFile());
-    if (!BufferOwner)
-      return ComputedPreamble(nullptr, nullptr, 0, true);
-  }
-
-  if (!Buffer)
-    Buffer = BufferOwner.get();
-  auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(),
-                                    *Invocation.getLangOpts(), MaxLines);
-  return ComputedPreamble(Buffer, std::move(BufferOwner), Pre.first,
-                          Pre.second);
-}
-
-ASTUnit::PreambleFileHash
-ASTUnit::PreambleFileHash::createForFile(off_t Size, time_t ModTime) {
-  PreambleFileHash Result;
-  Result.Size = Size;
-  Result.ModTime = ModTime;
-  Result.MD5 = {};
-  return Result;
-}
-
-ASTUnit::PreambleFileHash ASTUnit::PreambleFileHash::createForMemoryBuffer(
-    const llvm::MemoryBuffer *Buffer) {
-  PreambleFileHash Result;
-  Result.Size = Buffer->getBufferSize();
-  Result.ModTime = 0;
-
-  llvm::MD5 MD5Ctx;
-  MD5Ctx.update(Buffer->getBuffer().data());
-  MD5Ctx.final(Result.MD5);
-
-  return Result;
-}
-
-namespace clang {
-bool operator==(const ASTUnit::PreambleFileHash &LHS,
-                const ASTUnit::PreambleFileHash &RHS) {
-  return LHS.Size == RHS.Size && LHS.ModTime == RHS.ModTime &&
-         LHS.MD5 == RHS.MD5;
-}
-} // namespace clang
-
 static std::pair
 makeStandaloneRange(CharSourceRange Range, const SourceManager &SM,
                     const LangOptions &LangOpts) {
@@ -1347,135 +1206,44 @@ makeStandaloneDiagnostic(const LangOptions &LangOpts,
 std::unique_ptr
 ASTUnit::getMainBufferWithPrecompiledPreamble(
     std::shared_ptr PCHContainerOps,
-    const CompilerInvocation &PreambleInvocationIn, bool AllowRebuild,
+    const CompilerInvocation &PreambleInvocationIn,
+    IntrusiveRefCntPtr VFS, bool AllowRebuild,
     unsigned MaxLines) {
 
-  auto PreambleInvocation =
-      std::make_shared(PreambleInvocationIn);
-  FrontendOptions &FrontendOpts = PreambleInvocation->getFrontendOpts();
-  PreprocessorOptions &PreprocessorOpts
-    = PreambleInvocation->getPreprocessorOpts();
-
-  ComputedPreamble NewPreamble = ComputePreamble(*PreambleInvocation, MaxLines);
-
-  if (!NewPreamble.Size) {
-    // We couldn't find a preamble in the main source. Clear out the current
-    // preamble, if we have one. It's obviously no good any more.
-    Preamble.clear();
-    erasePreambleFile(this);
-
-    // The next time we actually see a preamble, precompile it.
-    PreambleRebuildCounter = 1;
+  auto MainFilePath =
+      PreambleInvocationIn.getFrontendOpts().Inputs[0].getFile();
+  std::unique_ptr MainFileBuffer =
+      getBufferForFileHandlingRemapping(PreambleInvocationIn, VFS.get(),
+                                        MainFilePath);
+  if (!MainFileBuffer)
     return nullptr;
-  }
-  
-  if (!Preamble.empty()) {
-    // We've previously computed a preamble. Check whether we have the same
-    // preamble now that we did before, and that there's enough space in
-    // the main-file buffer within the precompiled preamble to fit the
-    // new main file.
-    if (Preamble.size() == NewPreamble.Size &&
-        PreambleEndsAtStartOfLine == NewPreamble.PreambleEndsAtStartOfLine &&
-        memcmp(Preamble.getBufferStart(), NewPreamble.Buffer->getBufferStart(),
-               NewPreamble.Size) == 0) {
-      // The preamble has not changed. We may be able to re-use the precompiled
-      // preamble.
-
-      // Check that none of the files used by the preamble have changed.
-      bool AnyFileChanged = false;
-          
-      // First, make a record of those files that have been overridden via
-      // remapping or unsaved_files.
-      std::map OverriddenFiles;
-      for (const auto &R : PreprocessorOpts.RemappedFiles) {
-        if (AnyFileChanged)
-          break;
-
-        vfs::Status Status;
-        if (FileMgr->getNoncachedStatValue(R.second, Status)) {
-          // If we can't stat the file we're remapping to, assume that something
-          // horrible happened.
-          AnyFileChanged = true;
-          break;
-        }
 
-        OverriddenFiles[Status.getUniqueID()] = PreambleFileHash::createForFile(
-            Status.getSize(),
-            llvm::sys::toTimeT(Status.getLastModificationTime()));
-      }
+  PreambleBounds Bounds =
+      ComputePreambleBounds(*PreambleInvocationIn.getLangOpts(),
+                            MainFileBuffer.get(), MaxLines);
+  if (!Bounds.Size)
+    return nullptr;
 
-      for (const auto &RB : PreprocessorOpts.RemappedFileBuffers) {
-        if (AnyFileChanged)
-          break;
+  if (Preamble) {
+    if (Preamble->CanReuse(PreambleInvocationIn, MainFileBuffer.get(), Bounds,
+                           VFS.get())) {
+      // Okay! We can re-use the precompiled preamble.
 
-        vfs::Status Status;
-        if (FileMgr->getNoncachedStatValue(RB.first, Status)) {
-          AnyFileChanged = true;
-          break;
-        }
+      // Set the state of the diagnostic object to mimic its state
+      // after parsing the preamble.
+      getDiagnostics().Reset();
+      ProcessWarningOptions(getDiagnostics(),
+                            PreambleInvocationIn.getDiagnosticOpts());
+      getDiagnostics().setNumWarnings(NumWarningsInPreamble);
 
-        OverriddenFiles[Status.getUniqueID()] =
-            PreambleFileHash::createForMemoryBuffer(RB.second);
-      }
-       
-      // Check whether anything has changed.
-      for (llvm::StringMap::iterator
-             F = FilesInPreamble.begin(), FEnd = FilesInPreamble.end();
-           !AnyFileChanged && F != FEnd; 
-           ++F) {
-        vfs::Status Status;
-        if (FileMgr->getNoncachedStatValue(F->first(), Status)) {
-          // If we can't stat the file, assume that something horrible happened.
-          AnyFileChanged = true;
-          break;
-        }
-
-        std::map::iterator Overridden
-          = OverriddenFiles.find(Status.getUniqueID());
-        if (Overridden != OverriddenFiles.end()) {
-          // This file was remapped; check whether the newly-mapped file 
-          // matches up with the previous mapping.
-          if (Overridden->second != F->second)
-            AnyFileChanged = true;
-          continue;
-        }
-        
-        // The file was not remapped; check whether it has changed on disk.
-        if (Status.getSize() != uint64_t(F->second.Size) ||
-            llvm::sys::toTimeT(Status.getLastModificationTime()) !=
-                F->second.ModTime)
-          AnyFileChanged = true;
-      }
-          
-      if (!AnyFileChanged) {
-        // Okay! We can re-use the precompiled preamble.
-
-        // Set the state of the diagnostic object to mimic its state
-        // after parsing the preamble.
-        getDiagnostics().Reset();
-        ProcessWarningOptions(getDiagnostics(), 
-                              PreambleInvocation->getDiagnosticOpts());
-        getDiagnostics().setNumWarnings(NumWarningsInPreamble);
-
-        return llvm::MemoryBuffer::getMemBufferCopy(
-            NewPreamble.Buffer->getBuffer(), FrontendOpts.Inputs[0].getFile());
-      }
+      PreambleRebuildCounter = 1;
+      return MainFileBuffer;
+    } else {
+      Preamble.reset();
+      PreambleDiagnostics.clear();
+      TopLevelDeclsInPreamble.clear();
+      PreambleRebuildCounter = 1;
     }
-
-    // If we aren't allowed to rebuild the precompiled preamble, just
-    // return now.
-    if (!AllowRebuild)
-      return nullptr;
-
-    // We can't reuse the previously-computed preamble. Build a new one.
-    Preamble.clear();
-    PreambleDiagnostics.clear();
-    erasePreambleFile(this);
-    PreambleRebuildCounter = 1;
-  } else if (!AllowRebuild) {
-    // We aren't allowed to rebuild the precompiled preamble; just
-    // return now.
-    return nullptr;
   }
 
   // If the preamble rebuild counter > 1, it's because we previously
@@ -1486,164 +1254,61 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
     return nullptr;
   }
 
-  // Create a temporary file for the precompiled preamble. In rare 
-  // circumstances, this can fail.
-  std::string PreamblePCHPath = GetPreamblePCHPath();
-  if (PreamblePCHPath.empty()) {
-    // Try again next time.
-    PreambleRebuildCounter = 1;
+  assert(!Preamble && "No Preamble should be stored at that point");
+  // If we aren't allowed to rebuild the precompiled preamble, just
+  // return now.
+  if (!AllowRebuild)
     return nullptr;
-  }
-  
-  // We did not previously compute a preamble, or it can't be reused anyway.
-  SimpleTimer PreambleTimer(WantTiming);
-  PreambleTimer.setOutput("Precompiling preamble");
-
-  // Save the preamble text for later; we'll need to compare against it for
-  // subsequent reparses.
-  StringRef MainFilename = FrontendOpts.Inputs[0].getFile();
-  Preamble.assign(FileMgr->getFile(MainFilename),
-                  NewPreamble.Buffer->getBufferStart(),
-                  NewPreamble.Buffer->getBufferStart() + NewPreamble.Size);
-  PreambleEndsAtStartOfLine = NewPreamble.PreambleEndsAtStartOfLine;
-
-  PreambleBuffer = llvm::MemoryBuffer::getMemBufferCopy(
-      NewPreamble.Buffer->getBuffer().slice(0, Preamble.size()), MainFilename);
-
-  // Remap the main source file to the preamble buffer.
-  StringRef MainFilePath = FrontendOpts.Inputs[0].getFile();
-  PreprocessorOpts.addRemappedFile(MainFilePath, PreambleBuffer.get());
-
-  // Tell the compiler invocation to generate a temporary precompiled header.
-  FrontendOpts.ProgramAction = frontend::GeneratePCH;
-  // FIXME: Generate the precompiled header into memory?
-  FrontendOpts.OutputFile = PreamblePCHPath;
-  PreprocessorOpts.PrecompiledPreambleBytes.first = 0;
-  PreprocessorOpts.PrecompiledPreambleBytes.second = false;
-  
-  // Create the compiler instance to use for building the precompiled preamble.
-  std::unique_ptr Clang(
-      new CompilerInstance(std::move(PCHContainerOps)));
 
-  // Recover resources if we crash before exiting this method.
-  llvm::CrashRecoveryContextCleanupRegistrar
-    CICleanup(Clang.get());
-
-  Clang->setInvocation(std::move(PreambleInvocation));
-  OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile();
-  
-  // Set up diagnostics, capturing all of the diagnostics produced.
-  Clang->setDiagnostics(&getDiagnostics());
-  
-  // Create the target instance.
-  Clang->setTarget(TargetInfo::CreateTargetInfo(
-      Clang->getDiagnostics(), Clang->getInvocation().TargetOpts));
-  if (!Clang->hasTarget()) {
-    llvm::sys::fs::remove(FrontendOpts.OutputFile);
-    Preamble.clear();
-    PreambleRebuildCounter = DefaultPreambleRebuildInterval;
-    PreprocessorOpts.RemappedFileBuffers.pop_back();
-    return nullptr;
-  }
-  
-  // Inform the target of the language options.
-  //
-  // FIXME: We shouldn't need to do this, the target should be immutable once
-  // created. This complexity should be lifted elsewhere.
-  Clang->getTarget().adjust(Clang->getLangOpts());
-  
-  assert(Clang->getFrontendOpts().Inputs.size() == 1 &&
-         "Invocation must have exactly one source file!");
-  assert(Clang->getFrontendOpts().Inputs[0].getKind().getFormat() ==
-             InputKind::Source &&
-         "FIXME: AST inputs not yet supported here!");
-  assert(Clang->getFrontendOpts().Inputs[0].getKind().getLanguage() !=
-             InputKind::LLVM_IR &&
-         "IR inputs not support here!");
-  
-  // Clear out old caches and data.
-  getDiagnostics().Reset();
-  ProcessWarningOptions(getDiagnostics(), Clang->getDiagnosticOpts());
-  checkAndRemoveNonDriverDiags(StoredDiagnostics);
-  TopLevelDecls.clear();
-  TopLevelDeclsInPreamble.clear();
-  PreambleDiagnostics.clear();
-
-  IntrusiveRefCntPtr VFS =
-      createVFSFromCompilerInvocation(Clang->getInvocation(), getDiagnostics());
-  if (!VFS)
-    return nullptr;
-
-  // Create a file manager object to provide access to and cache the filesystem.
-  Clang->setFileManager(new FileManager(Clang->getFileSystemOpts(), VFS));
-  
-  // Create the source manager.
-  Clang->setSourceManager(new SourceManager(getDiagnostics(),
-                                            Clang->getFileManager()));
-
-  auto PreambleDepCollector = std::make_shared();
-  Clang->addDependencyCollector(PreambleDepCollector);
-
-  std::unique_ptr Act;
-  Act.reset(new PrecompilePreambleAction(*this));
-  if (!Act->BeginSourceFile(*Clang.get(), Clang->getFrontendOpts().Inputs[0])) {
-    llvm::sys::fs::remove(FrontendOpts.OutputFile);
-    Preamble.clear();
-    PreambleRebuildCounter = DefaultPreambleRebuildInterval;
-    PreprocessorOpts.RemappedFileBuffers.pop_back();
-    return nullptr;
+  SmallVector NewPreambleDiagsStandalone;
+  SmallVector NewPreambleDiags;
+  ASTUnitPreambleCallbacks Callbacks;
+  {
+    llvm::Optional Capture;
+    if (CaptureDiagnostics)
+      Capture.emplace(/*RequestCapture=*/true, *Diagnostics, &NewPreambleDiags,
+                      &NewPreambleDiagsStandalone);
+
+    // We did not previously compute a preamble, or it can't be reused anyway.
+    SimpleTimer PreambleTimer(WantTiming);
+    PreambleTimer.setOutput("Precompiling preamble");
+
+    llvm::ErrorOr NewPreamble = PrecompiledPreamble::Build(
+        PreambleInvocationIn, MainFileBuffer.get(), Bounds, *Diagnostics, VFS,
+        PCHContainerOps, Callbacks);
+    if (NewPreamble) {
+      Preamble = std::move(*NewPreamble);
+      PreambleRebuildCounter = 1;
+    } else {
+      switch (static_cast(NewPreamble.getError().value())) {
+      case BuildPreambleError::CouldntCreateTempFile:
+      case BuildPreambleError::PreambleIsEmpty:
+        // Try again next time.
+        PreambleRebuildCounter = 1;
+        return nullptr;
+      case BuildPreambleError::CouldntCreateTargetInfo:
+      case BuildPreambleError::BeginSourceFileFailed:
+      case BuildPreambleError::CouldntEmitPCH:
+      case BuildPreambleError::CouldntCreateVFSOverlay:
+        // These erros are more likely to repeat, retry after some period.
+        PreambleRebuildCounter = DefaultPreambleRebuildInterval;
+        return nullptr;
+      }
+      llvm_unreachable("unexpected BuildPreambleError");
+    }
   }
-  
-  Act->Execute();
 
-  // Transfer any diagnostics generated when parsing the preamble into the set
-  // of preamble diagnostics.
-  for (stored_diag_iterator I = stored_diag_afterDriver_begin(),
-                            E = stored_diag_end();
-       I != E; ++I)
-    PreambleDiagnostics.push_back(
-        makeStandaloneDiagnostic(Clang->getLangOpts(), *I));
+  assert(Preamble && "Preamble wasn't built");
 
-  Act->EndSourceFile();
-
-  checkAndRemoveNonDriverDiags(StoredDiagnostics);
+  TopLevelDecls.clear();
+  TopLevelDeclsInPreamble = Callbacks.takeTopLevelDeclIDs();
+  PreambleTopLevelHashValue = Callbacks.getHash();
 
-  if (!Act->hasEmittedPreamblePCH()) {
-    // The preamble PCH failed (e.g. there was a module loading fatal error),
-    // so no precompiled header was generated. Forget that we even tried.
-    // FIXME: Should we leave a note for ourselves to try again?
-    llvm::sys::fs::remove(FrontendOpts.OutputFile);
-    Preamble.clear();
-    TopLevelDeclsInPreamble.clear();
-    PreambleRebuildCounter = DefaultPreambleRebuildInterval;
-    PreprocessorOpts.RemappedFileBuffers.pop_back();
-    return nullptr;
-  }
-  
-  // Keep track of the preamble we precompiled.
-  setPreambleFile(this, FrontendOpts.OutputFile);
   NumWarningsInPreamble = getDiagnostics().getNumWarnings();
-  
-  // Keep track of all of the files that the source manager knows about,
-  // so we can verify whether they have changed or not.
-  FilesInPreamble.clear();
-  SourceManager &SourceMgr = Clang->getSourceManager();
-  for (auto &Filename : PreambleDepCollector->getDependencies()) {
-    const FileEntry *File = Clang->getFileManager().getFile(Filename);
-    if (!File || File == SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()))
-      continue;
-    if (time_t ModTime = File->getModificationTime()) {
-      FilesInPreamble[File->getName()] = PreambleFileHash::createForFile(
-          File->getSize(), ModTime);
-    } else {
-      llvm::MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File);
-      FilesInPreamble[File->getName()] =
-          PreambleFileHash::createForMemoryBuffer(Buffer);
-    }
-  }
 
-  PreambleRebuildCounter = 1;
-  PreprocessorOpts.RemappedFileBuffers.pop_back();
+  checkAndRemoveNonDriverDiags(NewPreambleDiags);
+  StoredDiagnostics = std::move(NewPreambleDiags);
+  PreambleDiagnostics = std::move(NewPreambleDiagsStandalone);
 
   // If the hash of top-level entities differs from the hash of the top-level
   // entities the last time we rebuilt the preamble, clear out the completion
@@ -1653,11 +1318,12 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
     PreambleTopLevelHashValue = CurrentTopLevelHashValue;
   }
 
-  return llvm::MemoryBuffer::getMemBufferCopy(NewPreamble.Buffer->getBuffer(),
-                                              MainFilename);
+  return MainFileBuffer;
 }
 
 void ASTUnit::RealizeTopLevelDeclsFromPreamble() {
+  assert(Preamble && "Should only be called when preamble was built");
+
   std::vector Resolved;
   Resolved.reserve(TopLevelDeclsInPreamble.size());
   ExternalASTSource &Source = *getASTContext().getExternalSource();
@@ -1886,10 +1552,13 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
 
 bool ASTUnit::LoadFromCompilerInvocation(
     std::shared_ptr PCHContainerOps,
-    unsigned PrecompilePreambleAfterNParses) {
+    unsigned PrecompilePreambleAfterNParses,
+    IntrusiveRefCntPtr VFS) {
   if (!Invocation)
     return true;
-  
+
+  assert(VFS && "VFS is null");
+
   // We'll manage file buffers ourselves.
   Invocation->getPreprocessorOpts().RetainRemappedFileBuffers = true;
   Invocation->getFrontendOpts().DisableFree = false;
@@ -1900,19 +1569,19 @@ bool ASTUnit::LoadFromCompilerInvocation(
   if (PrecompilePreambleAfterNParses > 0) {
     PreambleRebuildCounter = PrecompilePreambleAfterNParses;
     OverrideMainBuffer =
-        getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation);
+        getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation, VFS);
     getDiagnostics().Reset();
     ProcessWarningOptions(getDiagnostics(), Invocation->getDiagnosticOpts());
   }
-  
+
   SimpleTimer ParsingTimer(WantTiming);
   ParsingTimer.setOutput("Parsing " + getMainFileName());
-  
+
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar
     MemBufferCleanup(OverrideMainBuffer.get());
 
-  return Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer));
+  return Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer), VFS);
 }
 
 std::unique_ptr ASTUnit::LoadFromCompilerInvocation(
@@ -1946,7 +1615,8 @@ std::unique_ptr ASTUnit::LoadFromCompilerInvocation(
     DiagCleanup(Diags.get());
 
   if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps),
-                                      PrecompilePreambleAfterNParses))
+                                      PrecompilePreambleAfterNParses,
+                                      AST->FileMgr->getVirtualFileSystem()))
     return nullptr;
   return AST;
 }
@@ -1960,8 +1630,9 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
     unsigned PrecompilePreambleAfterNParses, TranslationUnitKind TUKind,
     bool CacheCodeCompletionResults, bool IncludeBriefCommentsInCodeCompletion,
     bool AllowPCHWithCompilerErrors, bool SkipFunctionBodies,
-    bool UserFilesAreVolatile, bool ForSerialization,
-    llvm::Optional ModuleFormat, std::unique_ptr *ErrAST) {
+    bool SingleFileParse, bool UserFilesAreVolatile, bool ForSerialization,
+    llvm::Optional ModuleFormat, std::unique_ptr *ErrAST,
+    IntrusiveRefCntPtr VFS) {
   assert(Diags.get() && "no DiagnosticsEngine was provided");
 
   SmallVector StoredDiagnostics;
@@ -1970,11 +1641,11 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
 
   {
 
-    CaptureDroppedDiagnostics Capture(CaptureDiagnostics, *Diags, 
-                                      StoredDiagnostics);
+    CaptureDroppedDiagnostics Capture(CaptureDiagnostics, *Diags,
+                                      &StoredDiagnostics, nullptr);
 
     CI = clang::createInvocationFromCommandLine(
-        llvm::makeArrayRef(ArgBegin, ArgEnd), Diags);
+        llvm::makeArrayRef(ArgBegin, ArgEnd), Diags, VFS);
     if (!CI)
       return nullptr;
   }
@@ -1987,6 +1658,8 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
   PreprocessorOptions &PPOpts = CI->getPreprocessorOpts();
   PPOpts.RemappedFilesKeepOriginalName = RemappedFilesKeepOriginalName;
   PPOpts.AllowPCHWithCompilerErrors = AllowPCHWithCompilerErrors;
+  PPOpts.GeneratePreamble = PrecompilePreambleAfterNParses != 0;
+  PPOpts.SingleFileParseMode = SingleFileParse;
   
   // Override the resources path.
   CI->getHeaderSearchOpts().ResourceDir = ResourceFilesPath;
@@ -2002,8 +1675,9 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
   ConfigureDiags(Diags, *AST, CaptureDiagnostics);
   AST->Diagnostics = Diags;
   AST->FileSystemOpts = CI->getFileSystemOpts();
-  IntrusiveRefCntPtr VFS =
-      createVFSFromCompilerInvocation(*CI, *Diags);
+  if (!VFS)
+    VFS = vfs::getRealFileSystem();
+  VFS = createVFSFromCompilerInvocation(*CI, *Diags, VFS);
   if (!VFS)
     return nullptr;
   AST->FileMgr = new FileManager(AST->FileSystemOpts, VFS);
@@ -2029,7 +1703,8 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
     ASTUnitCleanup(AST.get());
 
   if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps),
-                                      PrecompilePreambleAfterNParses)) {
+                                      PrecompilePreambleAfterNParses,
+                                      VFS)) {
     // Some error occurred, if caller wants to examine diagnostics, pass it the
     // ASTUnit.
     if (ErrAST) {
@@ -2043,10 +1718,16 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
 }
 
 bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps,
-                      ArrayRef RemappedFiles) {
+                      ArrayRef RemappedFiles,
+                      IntrusiveRefCntPtr VFS) {
   if (!Invocation)
     return true;
 
+  if (!VFS) {
+    assert(FileMgr && "FileMgr is null on Reparse call");
+    VFS = FileMgr->getVirtualFileSystem();
+  }
+
   clearFileLevelDecls();
   
   SimpleTimer ParsingTimer(WantTiming);
@@ -2066,9 +1747,10 @@ bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps,
   // If we have a preamble file lying around, or if we might try to
   // build a precompiled preamble, do so now.
   std::unique_ptr OverrideMainBuffer;
-  if (!getPreambleFile(this).empty() || PreambleRebuildCounter > 0)
+  if (Preamble || PreambleRebuildCounter > 0)
     OverrideMainBuffer =
-        getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation);
+        getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation, VFS);
+
 
   // Clear out the diagnostics state.
   FileMgr.reset();
@@ -2079,7 +1761,7 @@ bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps,
 
   // Parse the sources
   bool Result =
-      Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer));
+      Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer), VFS);
 
   // If we're caching global code-completion results, and the top-level 
   // declarations have changed, clear out the code-completion cache.
@@ -2094,6 +1776,19 @@ bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps,
   return Result;
 }
 
+void ASTUnit::ResetForParse() {
+  SavedMainFileBuffer.reset();
+
+  SourceMgr.reset();
+  TheSema.reset();
+  Ctx.reset();
+  PP.reset();
+  Reader.reset();
+
+  TopLevelDecls.clear();
+  clearFileLevelDecls();
+}
+
 //----------------------------------------------------------------------------//
 // Code completion
 //----------------------------------------------------------------------------//
@@ -2386,7 +2081,7 @@ void ASTUnit::CodeComplete(
   Clang->setDiagnostics(&Diag);
   CaptureDroppedDiagnostics Capture(true, 
                                     Clang->getDiagnostics(), 
-                                    StoredDiagnostics);
+                                    &StoredDiagnostics, nullptr);
   ProcessWarningOptions(Diag, Inv.getDiagnosticOpts());
 
   // Create the target instance.
@@ -2435,17 +2130,21 @@ void ASTUnit::CodeComplete(
   // point is within the main file, after the end of the precompiled
   // preamble.
   std::unique_ptr OverrideMainBuffer;
-  if (!getPreambleFile(this).empty()) {
+  if (Preamble) {
     std::string CompleteFilePath(File);
-    llvm::sys::fs::UniqueID CompleteFileID;
 
-    if (!llvm::sys::fs::getUniqueID(CompleteFilePath, CompleteFileID)) {
+    auto VFS = FileMgr.getVirtualFileSystem();
+    auto CompleteFileStatus = VFS->status(CompleteFilePath);
+    if (CompleteFileStatus) {
+      llvm::sys::fs::UniqueID CompleteFileID = CompleteFileStatus->getUniqueID();
+
       std::string MainPath(OriginalSourceFile);
-      llvm::sys::fs::UniqueID MainID;
-      if (!llvm::sys::fs::getUniqueID(MainPath, MainID)) {
+      auto MainStatus = VFS->status(MainPath);
+      if (MainStatus) {
+        llvm::sys::fs::UniqueID MainID = MainStatus->getUniqueID();
         if (CompleteFileID == MainID && Line > 1)
           OverrideMainBuffer = getMainBufferWithPrecompiledPreamble(
-              PCHContainerOps, Inv, false, Line - 1);
+              PCHContainerOps, Inv, VFS, false, Line - 1);
       }
     }
   }
@@ -2453,14 +2152,8 @@ void ASTUnit::CodeComplete(
   // If the main file has been overridden due to the use of a preamble,
   // make that override happen and introduce the preamble.
   if (OverrideMainBuffer) {
-    PreprocessorOpts.addRemappedFile(OriginalSourceFile,
-                                     OverrideMainBuffer.get());
-    PreprocessorOpts.PrecompiledPreambleBytes.first = Preamble.size();
-    PreprocessorOpts.PrecompiledPreambleBytes.second
-                                                    = PreambleEndsAtStartOfLine;
-    PreprocessorOpts.ImplicitPCHInclude = getPreambleFile(this);
-    PreprocessorOpts.DisablePCHValidation = true;
-
+    assert(Preamble && "No preamble was built, but OverrideMainBuffer is not null");
+    Preamble->AddImplicitPreamble(Clang->getInvocation(), OverrideMainBuffer.get());
     OwnedBuffers.push_back(OverrideMainBuffer.release());
   } else {
     PreprocessorOpts.PrecompiledPreambleBytes.first = 0;
@@ -2551,11 +2244,9 @@ void ASTUnit::TranslateStoredDiagnostics(
   // remap all the locations to the new view. This includes the diag location,
   // any associated source ranges, and the source ranges of associated fix-its.
   // FIXME: There should be a cleaner way to do this.
-
   SmallVector Result;
   Result.reserve(Diags.size());
-  const FileEntry *PreviousFE = nullptr;
-  FileID FID;
+
   for (const StandaloneDiagnostic &SD : Diags) {
     // Rebuild the StoredDiagnostic.
     if (SD.Filename.empty())
@@ -2563,11 +2254,16 @@ void ASTUnit::TranslateStoredDiagnostics(
     const FileEntry *FE = FileMgr.getFile(SD.Filename);
     if (!FE)
       continue;
-    if (FE != PreviousFE) {
-      FID = SrcMgr.translateFile(FE);
-      PreviousFE = FE;
+    SourceLocation FileLoc;
+    auto ItFileID = PreambleSrcLocCache.find(SD.Filename);
+    if (ItFileID == PreambleSrcLocCache.end()) {
+      FileID FID = SrcMgr.translateFile(FE);
+      FileLoc = SrcMgr.getLocForStartOfFile(FID);
+      PreambleSrcLocCache[SD.Filename] = FileLoc;
+    } else {
+      FileLoc = ItFileID->getValue();
     }
-    SourceLocation FileLoc = SrcMgr.getLocForStartOfFile(FID);
+
     if (FileLoc.isInvalid())
       continue;
     SourceLocation L = FileLoc.getLocWithOffset(SD.LocOffset);
@@ -2704,11 +2400,11 @@ SourceLocation ASTUnit::mapLocationFromPreamble(SourceLocation Loc) {
   if (SourceMgr)
     PreambleID = SourceMgr->getPreambleFileID();
 
-  if (Loc.isInvalid() || Preamble.empty() || PreambleID.isInvalid())
+  if (Loc.isInvalid() || !Preamble || PreambleID.isInvalid())
     return Loc;
 
   unsigned Offs;
-  if (SourceMgr->isInFileID(Loc, PreambleID, &Offs) && Offs < Preamble.size()) {
+  if (SourceMgr->isInFileID(Loc, PreambleID, &Offs) && Offs < Preamble->getBounds().Size) {
     SourceLocation FileLoc
         = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
     return FileLoc.getLocWithOffset(Offs);
@@ -2725,12 +2421,12 @@ SourceLocation ASTUnit::mapLocationToPreamble(SourceLocation Loc) {
   if (SourceMgr)
     PreambleID = SourceMgr->getPreambleFileID();
 
-  if (Loc.isInvalid() || Preamble.empty() || PreambleID.isInvalid())
+  if (Loc.isInvalid() || !Preamble || PreambleID.isInvalid())
     return Loc;
 
   unsigned Offs;
   if (SourceMgr->isInFileID(Loc, SourceMgr->getMainFileID(), &Offs) &&
-      Offs < Preamble.size()) {
+      Offs < Preamble->getBounds().Size) {
     SourceLocation FileLoc = SourceMgr->getLocForStartOfFile(PreambleID);
     return FileLoc.getLocWithOffset(Offs);
   }
@@ -2848,18 +2544,32 @@ const FileEntry *ASTUnit::getPCHFile() {
 }
 
 bool ASTUnit::isModuleFile() {
-  return isMainFileAST() && ASTFileLangOpts.isCompilingModule();
+  return isMainFileAST() && getLangOpts().isCompilingModule();
 }
 
-void ASTUnit::PreambleData::countLines() const {
-  NumLines = 0;
-  if (empty())
-    return;
+InputKind ASTUnit::getInputKind() const {
+  auto &LangOpts = getLangOpts();
+
+  InputKind::Language Lang;
+  if (LangOpts.OpenCL)
+    Lang = InputKind::OpenCL;
+  else if (LangOpts.CUDA)
+    Lang = InputKind::CUDA;
+  else if (LangOpts.RenderScript)
+    Lang = InputKind::RenderScript;
+  else if (LangOpts.CPlusPlus)
+    Lang = LangOpts.ObjC1 ? InputKind::ObjCXX : InputKind::CXX;
+  else
+    Lang = LangOpts.ObjC1 ? InputKind::ObjC : InputKind::C;
+
+  InputKind::Format Fmt = InputKind::Source;
+  if (LangOpts.getCompilingModule() == LangOptions::CMK_ModuleMap)
+    Fmt = InputKind::ModuleMap;
 
-  NumLines = std::count(Buffer.begin(), Buffer.end(), '\n');
+  // We don't know if input was preprocessed. Assume not.
+  bool PP = false;
 
-  if (Buffer.back() != '\n')
-    ++NumLines;
+  return InputKind(Lang, Fmt, PP);
 }
 
 #ifndef NDEBUG
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/Frontend/CMakeLists.txt
index 18abecd2071cc..ba3bd7d28c703 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/CMakeLists.txt
@@ -38,6 +38,7 @@ add_clang_library(clangFrontend
   ModuleDependencyCollector.cpp
   MultiplexConsumer.cpp
   PCHContainerOperations.cpp
+  PrecompiledPreamble.cpp
   PrintPreprocessedOutput.cpp
   SerializedDiagnosticPrinter.cpp
   SerializedDiagnosticReader.cpp
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/ChainedIncludesSource.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
index b984c2ed0dd51..534c7587f48d8 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
@@ -83,7 +83,7 @@ createASTReader(CompilerInstance &CI, StringRef pchFile,
                 ASTDeserializationListener *deserialListener = nullptr) {
   Preprocessor &PP = CI.getPreprocessor();
   std::unique_ptr Reader;
-  Reader.reset(new ASTReader(PP, CI.getASTContext(),
+  Reader.reset(new ASTReader(PP, &CI.getASTContext(),
                              CI.getPCHContainerReader(),
                              /*Extensions=*/{ },
                              /*isysroot=*/"", /*DisableValidation=*/true));
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInstance.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInstance.cpp
index 6e13d8289d493..bb6a665cb4565 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInstance.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInstance.cpp
@@ -11,6 +11,7 @@
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/MemoryBufferCache.h"
@@ -299,16 +300,12 @@ CompilerInstance::createDiagnostics(DiagnosticOptions *Opts,
 
 // File Manager
 
-FileManager *CompilerInstance::createFileManager() {
+void CompilerInstance::createFileManager() {
   if (!hasVirtualFileSystem()) {
-    if (IntrusiveRefCntPtr VFS =
-            createVFSFromCompilerInvocation(getInvocation(), getDiagnostics()))
-      setVirtualFileSystem(VFS);
-    else
-      return nullptr;
+    // TODO: choose the virtual file system based on the CompilerInvocation.
+    setVirtualFileSystem(vfs::getRealFileSystem());
   }
   FileMgr = new FileManager(getFileSystemOpts(), VirtualFileSystem);
-  return FileMgr.get();
 }
 
 // Source Manager
@@ -520,7 +517,7 @@ IntrusiveRefCntPtr CompilerInstance::createPCHExternalASTSource(
   HeaderSearchOptions &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts();
 
   IntrusiveRefCntPtr Reader(new ASTReader(
-      PP, Context, PCHContainerRdr, Extensions,
+      PP, &Context, PCHContainerRdr, Extensions,
       Sysroot.empty() ? "" : Sysroot.data(), DisablePCHValidation,
       AllowPCHWithCompilerErrors, /*AllowConfigurationMismatch*/ false,
       HSOpts.ModulesValidateSystemHeaders, UseGlobalModuleIndex));
@@ -671,6 +668,11 @@ void CompilerInstance::clearOutputFiles(bool EraseFiles) {
       llvm::sys::fs::remove(OF.Filename);
   }
   OutputFiles.clear();
+  if (DeleteBuiltModules) {
+    for (auto &Module : BuiltModules)
+      llvm::sys::fs::remove(Module.second);
+    BuiltModules.clear();
+  }
   NonSeekStream.reset();
 }
 
@@ -823,8 +825,11 @@ bool CompilerInstance::InitializeSourceManager(
     const FrontendInputFile &Input, DiagnosticsEngine &Diags,
     FileManager &FileMgr, SourceManager &SourceMgr, HeaderSearch *HS,
     DependencyOutputOptions &DepOpts, const FrontendOptions &Opts) {
-  SrcMgr::CharacteristicKind
-    Kind = Input.isSystem() ? SrcMgr::C_System : SrcMgr::C_User;
+  SrcMgr::CharacteristicKind Kind =
+      Input.getKind().getFormat() == InputKind::ModuleMap
+          ? Input.isSystem() ? SrcMgr::C_System_ModuleMap
+                             : SrcMgr::C_User_ModuleMap
+          : Input.isSystem() ? SrcMgr::C_System : SrcMgr::C_User;
 
   if (Input.isBuffer()) {
     SourceMgr.setMainFileID(SourceMgr.createFileID(
@@ -931,8 +936,9 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
   if (!hasTarget())
     return false;
 
-  // Create TargetInfo for the other side of CUDA compilation.
-  if (getLangOpts().CUDA && !getFrontendOpts().AuxTriple.empty()) {
+  // Create TargetInfo for the other side of CUDA and OpenMP compilation.
+  if ((getLangOpts().CUDA || getLangOpts().OpenMPIsDevice) &&
+      !getFrontendOpts().AuxTriple.empty()) {
     auto TO = std::make_shared();
     TO->Triple = getFrontendOpts().AuxTriple;
     TO->HostTriple = getTarget().getTriple().str();
@@ -1033,13 +1039,14 @@ static InputKind::Language getLanguageFromOptions(const LangOptions &LangOpts) {
 /// \brief Compile a module file for the given module, using the options 
 /// provided by the importing compiler instance. Returns true if the module
 /// was built without errors.
-static bool compileModuleImpl(CompilerInstance &ImportingInstance,
-                              SourceLocation ImportLoc,
-                              Module *Module,
-                              StringRef ModuleFileName) {
-  ModuleMap &ModMap 
-    = ImportingInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
-    
+static bool
+compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
+                  StringRef ModuleName, FrontendInputFile Input,
+                  StringRef OriginalModuleMapFile, StringRef ModuleFileName,
+                  llvm::function_ref PreBuildStep =
+                      [](CompilerInstance &) {},
+                  llvm::function_ref PostBuildStep =
+                      [](CompilerInstance &) {}) {
   // Construct a compiler invocation for creating this module.
   auto Invocation =
       std::make_shared(ImportingInstance.getInvocation());
@@ -1064,7 +1071,7 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
       PPOpts.Macros.end());
 
   // Note the name of the module we're building.
-  Invocation->getLangOpts()->CurrentModule = Module->getTopLevelModuleName();
+  Invocation->getLangOpts()->CurrentModule = ModuleName;
 
   // Make sure that the failed-module structure has been allocated in
   // the importing instance, and propagate the pointer to the newly-created
@@ -1084,13 +1091,10 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
   FrontendOpts.DisableFree = false;
   FrontendOpts.GenerateGlobalModuleIndex = false;
   FrontendOpts.BuildingImplicitModule = true;
-  FrontendOpts.OriginalModuleMap =
-      ModMap.getModuleMapFileForUniquing(Module)->getName();
+  FrontendOpts.OriginalModuleMap = OriginalModuleMapFile;
   // Force implicitly-built modules to hash the content of the module file.
   HSOpts.ModulesHashContent = true;
-  FrontendOpts.Inputs.clear();
-  InputKind IK(getLanguageFromOptions(*Invocation->getLangOpts()),
-               InputKind::ModuleMap);
+  FrontendOpts.Inputs = {Input};
 
   // Don't free the remapped file buffers; they are owned by our caller.
   PPOpts.RetainRemappedFileBuffers = true;
@@ -1121,7 +1125,7 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
   SourceManager &SourceMgr = Instance.getSourceManager();
   SourceMgr.setModuleBuildStack(
     ImportingInstance.getSourceManager().getModuleBuildStack());
-  SourceMgr.pushModuleBuildStack(Module->getTopLevelModuleName(),
+  SourceMgr.pushModuleBuildStack(ModuleName,
     FullSourceLoc(ImportLoc, ImportingInstance.getSourceManager()));
 
   // If we're collecting module dependencies, we need to share a collector
@@ -1130,32 +1134,11 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
   Instance.setModuleDepCollector(ImportingInstance.getModuleDepCollector());
   Inv.getDependencyOutputOpts() = DependencyOutputOptions();
 
-  // Get or create the module map that we'll use to build this module.
-  std::string InferredModuleMapContent;
-  if (const FileEntry *ModuleMapFile =
-          ModMap.getContainingModuleMapFile(Module)) {
-    // Use the module map where this module resides.
-    FrontendOpts.Inputs.emplace_back(ModuleMapFile->getName(), IK,
-                                     +Module->IsSystem);
-  } else {
-    SmallString<128> FakeModuleMapFile(Module->Directory->getName());
-    llvm::sys::path::append(FakeModuleMapFile, "__inferred_module.map");
-    FrontendOpts.Inputs.emplace_back(FakeModuleMapFile, IK, +Module->IsSystem);
-
-    llvm::raw_string_ostream OS(InferredModuleMapContent);
-    Module->print(OS);
-    OS.flush();
-
-    std::unique_ptr ModuleMapBuffer =
-        llvm::MemoryBuffer::getMemBuffer(InferredModuleMapContent);
-    ModuleMapFile = Instance.getFileManager().getVirtualFile(
-        FakeModuleMapFile, InferredModuleMapContent.size(), 0);
-    SourceMgr.overrideFileContents(ModuleMapFile, std::move(ModuleMapBuffer));
-  }
-
   ImportingInstance.getDiagnostics().Report(ImportLoc,
                                             diag::remark_module_build)
-    << Module->Name << ModuleFileName;
+    << ModuleName << ModuleFileName;
+
+  PreBuildStep(Instance);
 
   // Execute the action to actually build the module in-place. Use a separate
   // thread so that we get a stack large enough.
@@ -1168,9 +1151,11 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
       },
       ThreadStackSize);
 
+  PostBuildStep(Instance);
+
   ImportingInstance.getDiagnostics().Report(ImportLoc,
                                             diag::remark_module_build_done)
-    << Module->Name;
+    << ModuleName;
 
   // Delete the temporary module map file.
   // FIXME: Even though we're executing under crash protection, it would still
@@ -1178,13 +1163,66 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
   // doesn't make sense for all clients, so clean this up manually.
   Instance.clearOutputFiles(/*EraseFiles=*/true);
 
+  return !Instance.getDiagnostics().hasErrorOccurred();
+}
+
+/// \brief Compile a module file for the given module, using the options 
+/// provided by the importing compiler instance. Returns true if the module
+/// was built without errors.
+static bool compileModuleImpl(CompilerInstance &ImportingInstance,
+                              SourceLocation ImportLoc,
+                              Module *Module,
+                              StringRef ModuleFileName) {
+  InputKind IK(getLanguageFromOptions(ImportingInstance.getLangOpts()),
+               InputKind::ModuleMap);
+
+  // Get or create the module map that we'll use to build this module.
+  ModuleMap &ModMap 
+    = ImportingInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
+  bool Result;
+  if (const FileEntry *ModuleMapFile =
+          ModMap.getContainingModuleMapFile(Module)) {
+    // Use the module map where this module resides.
+    Result = compileModuleImpl(
+        ImportingInstance, ImportLoc, Module->getTopLevelModuleName(),
+        FrontendInputFile(ModuleMapFile->getName(), IK, +Module->IsSystem),
+        ModMap.getModuleMapFileForUniquing(Module)->getName(),
+        ModuleFileName);
+  } else {
+    // FIXME: We only need to fake up an input file here as a way of
+    // transporting the module's directory to the module map parser. We should
+    // be able to do that more directly, and parse from a memory buffer without
+    // inventing this file.
+    SmallString<128> FakeModuleMapFile(Module->Directory->getName());
+    llvm::sys::path::append(FakeModuleMapFile, "__inferred_module.map");
+
+    std::string InferredModuleMapContent;
+    llvm::raw_string_ostream OS(InferredModuleMapContent);
+    Module->print(OS);
+    OS.flush();
+
+    Result = compileModuleImpl(
+        ImportingInstance, ImportLoc, Module->getTopLevelModuleName(),
+        FrontendInputFile(FakeModuleMapFile, IK, +Module->IsSystem),
+        ModMap.getModuleMapFileForUniquing(Module)->getName(),
+        ModuleFileName,
+        [&](CompilerInstance &Instance) {
+      std::unique_ptr ModuleMapBuffer =
+          llvm::MemoryBuffer::getMemBuffer(InferredModuleMapContent);
+      ModuleMapFile = Instance.getFileManager().getVirtualFile(
+          FakeModuleMapFile, InferredModuleMapContent.size(), 0);
+      Instance.getSourceManager().overrideFileContents(
+          ModuleMapFile, std::move(ModuleMapBuffer));
+    });
+  }
+
   // We've rebuilt a module. If we're allowed to generate or update the global
   // module index, record that fact in the importing compiler instance.
   if (ImportingInstance.getFrontendOpts().GenerateGlobalModuleIndex) {
     ImportingInstance.setBuildGlobalModuleIndex(true);
   }
 
-  return !Instance.getDiagnostics().hasErrorOccurred();
+  return Result;
 }
 
 static bool compileAndLoadModule(CompilerInstance &ImportingInstance,
@@ -1435,7 +1473,7 @@ void CompilerInstance::createModuleManager() {
                                                  "Reading modules",
                                                  *FrontendTimerGroup);
     ModuleManager = new ASTReader(
-        getPreprocessor(), getASTContext(), getPCHContainerReader(),
+        getPreprocessor(), &getASTContext(), getPCHContainerReader(),
         getFrontendOpts().ModuleFileExtensions,
         Sysroot.empty() ? "" : Sysroot.c_str(), PPOpts.DisablePCHValidation,
         /*AllowASTWithCompilerErrors=*/false,
@@ -1590,24 +1628,36 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
         PP->getHeaderSearchInfo().getHeaderSearchOpts();
 
     std::string ModuleFileName;
-    bool LoadFromPrebuiltModulePath = false;
-    // We try to load the module from the prebuilt module paths. If not
-    // successful, we then try to find it in the module cache.
-    if (!HSOpts.PrebuiltModulePaths.empty()) {
-      // Load the module from the prebuilt module path.
+    enum ModuleSource {
+      ModuleNotFound, ModuleCache, PrebuiltModulePath, ModuleBuildPragma
+    } Source = ModuleNotFound;
+
+    // Check to see if the module has been built as part of this compilation
+    // via a module build pragma.
+    auto BuiltModuleIt = BuiltModules.find(ModuleName);
+    if (BuiltModuleIt != BuiltModules.end()) {
+      ModuleFileName = BuiltModuleIt->second;
+      Source = ModuleBuildPragma;
+    }
+
+    // Try to load the module from the prebuilt module path.
+    if (Source == ModuleNotFound && !HSOpts.PrebuiltModulePaths.empty()) {
       ModuleFileName = PP->getHeaderSearchInfo().getModuleFileName(
           ModuleName, "", /*UsePrebuiltPath*/ true);
       if (!ModuleFileName.empty())
-        LoadFromPrebuiltModulePath = true;
+        Source = PrebuiltModulePath;
     }
-    if (!LoadFromPrebuiltModulePath && Module) {
-      // Load the module from the module cache.
+
+    // Try to load the module from the module cache.
+    if (Source == ModuleNotFound && Module) {
       ModuleFileName = PP->getHeaderSearchInfo().getModuleFileName(Module);
-    } else if (!LoadFromPrebuiltModulePath) {
+      Source = ModuleCache;
+    }
+
+    if (Source == ModuleNotFound) {
       // We can't find a module, error out here.
       getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_found)
-      << ModuleName
-      << SourceRange(ImportLoc, ModuleNameLoc);
+          << ModuleName << SourceRange(ImportLoc, ModuleNameLoc);
       ModuleBuildFailed = true;
       return ModuleLoadResult();
     }
@@ -1635,20 +1685,20 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
                  *FrontendTimerGroup);
     llvm::TimeRegion TimeLoading(FrontendTimerGroup ? &Timer : nullptr);
 
-    // Try to load the module file. If we are trying to load from the prebuilt
-    // module path, we don't have the module map files and don't know how to
-    // rebuild modules.
-    unsigned ARRFlags = LoadFromPrebuiltModulePath ?
-                        ASTReader::ARR_ConfigurationMismatch :
-                        ASTReader::ARR_OutOfDate | ASTReader::ARR_Missing;
+    // Try to load the module file. If we are not trying to load from the
+    // module cache, we don't know how to rebuild modules.
+    unsigned ARRFlags = Source == ModuleCache ?
+                        ASTReader::ARR_OutOfDate | ASTReader::ARR_Missing :
+                        ASTReader::ARR_ConfigurationMismatch;
     switch (ModuleManager->ReadAST(ModuleFileName,
-                                   LoadFromPrebuiltModulePath ?
-                                   serialization::MK_PrebuiltModule :
-                                   serialization::MK_ImplicitModule,
-                                   ImportLoc,
-                                   ARRFlags)) {
+                                   Source == PrebuiltModulePath
+                                       ? serialization::MK_PrebuiltModule
+                                       : Source == ModuleBuildPragma
+                                             ? serialization::MK_ExplicitModule
+                                             : serialization::MK_ImplicitModule,
+                                   ImportLoc, ARRFlags)) {
     case ASTReader::Success: {
-      if (LoadFromPrebuiltModulePath && !Module) {
+      if (Source != ModuleCache && !Module) {
         Module = PP->getHeaderSearchInfo().lookupModule(ModuleName);
         if (!Module || !Module->getASTFile() ||
             FileMgr->getFile(ModuleFileName) != Module->getASTFile()) {
@@ -1666,10 +1716,10 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
 
     case ASTReader::OutOfDate:
     case ASTReader::Missing: {
-      if (LoadFromPrebuiltModulePath) {
-        // We can't rebuild the module without a module map. Since ReadAST
-        // already produces diagnostics for these two cases, we simply
-        // error out here.
+      if (Source != ModuleCache) {
+        // We don't know the desired configuration for this module and don't
+        // necessarily even have a module map. Since ReadAST already produces
+        // diagnostics for these two cases, we simply error out here.
         ModuleBuildFailed = true;
         KnownModules[Path[0].first] = nullptr;
         return ModuleLoadResult();
@@ -1726,11 +1776,14 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
     }
 
     case ASTReader::ConfigurationMismatch:
-      if (LoadFromPrebuiltModulePath)
+      if (Source == PrebuiltModulePath)
+        // FIXME: We shouldn't be setting HadFatalFailure below if we only
+        // produce a warning here!
         getDiagnostics().Report(SourceLocation(),
                                 diag::warn_module_config_mismatch)
             << ModuleFileName;
       // Fall through to error out.
+      LLVM_FALLTHROUGH;
     case ASTReader::VersionMismatch:
     case ASTReader::HadErrors:
       ModuleLoader::HadFatalFailure = true;
@@ -1754,7 +1807,7 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
   // If we never found the module, fail.
   if (!Module)
     return ModuleLoadResult();
-  
+
   // Verify that the rest of the module path actually corresponds to
   // a submodule.
   if (Path.size() > 1) {
@@ -1827,20 +1880,10 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
     }
 
     // Check whether this module is available.
-    clang::Module::Requirement Requirement;
-    clang::Module::UnresolvedHeaderDirective MissingHeader;
-    if (!Module->isAvailable(getLangOpts(), getTarget(), Requirement,
-                             MissingHeader)) {
-      if (MissingHeader.FileNameLoc.isValid()) {
-        getDiagnostics().Report(MissingHeader.FileNameLoc,
-                                diag::err_module_header_missing)
-          << MissingHeader.IsUmbrella << MissingHeader.FileName;
-      } else {
-        getDiagnostics().Report(ImportLoc, diag::err_module_unavailable)
-          << Module->getFullModuleName()
-          << Requirement.second << Requirement.first
-          << SourceRange(Path.front().second, Path.back().second);
-      }
+    if (Preprocessor::checkModuleIsAvailable(getLangOpts(), getTarget(),
+                                             getDiagnostics(), Module)) {
+      getDiagnostics().Report(ImportLoc, diag::note_module_import_here)
+        << SourceRange(Path.front().second, Path.back().second);
       LastModuleImportLoc = ImportLoc;
       LastModuleImportResult = ModuleLoadResult();
       return ModuleLoadResult();
@@ -1861,6 +1904,59 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
   return LastModuleImportResult;
 }
 
+void CompilerInstance::loadModuleFromSource(SourceLocation ImportLoc,
+                                            StringRef ModuleName,
+                                            StringRef Source) {
+  // Avoid creating filenames with special characters.
+  SmallString<128> CleanModuleName(ModuleName);
+  for (auto &C : CleanModuleName)
+    if (!isAlphanumeric(C))
+      C = '_';
+
+  // FIXME: Using a randomized filename here means that our intermediate .pcm
+  // output is nondeterministic (as .pcm files refer to each other by name).
+  // Can this affect the output in any way?
+  SmallString<128> ModuleFileName;
+  if (std::error_code EC = llvm::sys::fs::createTemporaryFile(
+          CleanModuleName, "pcm", ModuleFileName)) {
+    getDiagnostics().Report(ImportLoc, diag::err_fe_unable_to_open_output)
+        << ModuleFileName << EC.message();
+    return;
+  }
+  std::string ModuleMapFileName = (CleanModuleName + ".map").str();
+
+  FrontendInputFile Input(
+      ModuleMapFileName,
+      InputKind(getLanguageFromOptions(*Invocation->getLangOpts()),
+                InputKind::ModuleMap, /*Preprocessed*/true));
+
+  std::string NullTerminatedSource(Source.str());
+
+  auto PreBuildStep = [&](CompilerInstance &Other) {
+    // Create a virtual file containing our desired source.
+    // FIXME: We shouldn't need to do this.
+    const FileEntry *ModuleMapFile = Other.getFileManager().getVirtualFile(
+        ModuleMapFileName, NullTerminatedSource.size(), 0);
+    Other.getSourceManager().overrideFileContents(
+        ModuleMapFile,
+        llvm::MemoryBuffer::getMemBuffer(NullTerminatedSource.c_str()));
+
+    Other.BuiltModules = std::move(BuiltModules);
+    Other.DeleteBuiltModules = false;
+  };
+
+  auto PostBuildStep = [this](CompilerInstance &Other) {
+    BuiltModules = std::move(Other.BuiltModules);
+  };
+
+  // Build the module, inheriting any modules that we've built locally.
+  if (compileModuleImpl(*this, ImportLoc, ModuleName, Input, StringRef(),
+                        ModuleFileName, PreBuildStep, PostBuildStep)) {
+    BuiltModules[ModuleName] = ModuleFileName.str();
+    llvm::sys::RemoveFileOnSignal(ModuleFileName);
+  }
+}
+
 void CompilerInstance::makeModuleVisible(Module *Mod,
                                          Module::NameVisibilityKind Visibility,
                                          SourceLocation ImportLoc) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInvocation.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInvocation.cpp
index 51147b6f94997..0d0869c815d3b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/CompilerInvocation.cpp
@@ -476,6 +476,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       OPT_fexperimental_new_pass_manager, OPT_fno_experimental_new_pass_manager,
       /* Default */ false);
 
+  Opts.DebugPassManager =
+      Args.hasFlag(OPT_fdebug_pass_manager, OPT_fno_debug_pass_manager,
+                   /* Default */ false);
+
   if (Arg *A = Args.getLastArg(OPT_fveclib)) {
     StringRef Name = A->getValue();
     if (Name == "Accelerate")
@@ -534,6 +538,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
 
   Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes);
   Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers);
+  Opts.DisableO0ImplyOptNone = Args.hasArg(OPT_disable_O0_optnone);
   Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
   Opts.ForbidGuardVariables = Args.hasArg(OPT_fforbid_guard_variables);
   Opts.UseRegisterSizedBitfieldAccess = Args.hasArg(
@@ -568,6 +573,33 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   if (!Opts.ProfileInstrumentUsePath.empty())
     setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath);
 
+  if (Arg *A = Args.getLastArg(OPT_fclang_abi_compat_EQ)) {
+    Opts.setClangABICompat(CodeGenOptions::ClangABI::Latest);
+
+    StringRef Ver = A->getValue();
+    std::pair VerParts = Ver.split('.');
+    unsigned Major, Minor = 0;
+
+    // Check the version number is valid: either 3.x (0 <= x <= 9) or
+    // y or y.0 (4 <= y <= current version).
+    if (!VerParts.first.startswith("0") &&
+        !VerParts.first.getAsInteger(10, Major) &&
+        3 <= Major && Major <= CLANG_VERSION_MAJOR &&
+        (Major == 3 ? VerParts.second.size() == 1 &&
+                      !VerParts.second.getAsInteger(10, Minor)
+                    : VerParts.first.size() == Ver.size() ||
+                      VerParts.second == "0")) {
+      // Got a valid version number.
+      if (Major == 3 && Minor <= 8)
+        Opts.setClangABICompat(CodeGenOptions::ClangABI::Ver3_8);
+      else if (Major <= 4)
+        Opts.setClangABICompat(CodeGenOptions::ClangABI::Ver4);
+    } else if (Ver != "latest") {
+      Diags.Report(diag::err_drv_invalid_value)
+          << A->getAsString(Args) << A->getValue();
+    }
+  }
+
   Opts.CoverageMapping =
       Args.hasFlag(OPT_fcoverage_mapping, OPT_fno_coverage_mapping, false);
   Opts.DumpCoverageMapping = Args.hasArg(OPT_dump_coverage_mapping);
@@ -648,8 +680,14 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.NoUseJumpTables = Args.hasArg(OPT_fno_jump_tables);
 
   Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ);
-  const Arg *A = Args.getLastArg(OPT_flto, OPT_flto_EQ);
-  Opts.EmitSummaryIndex = A && A->containsValue("thin");
+  Opts.EmitSummaryIndex = false;
+  if (Arg *A = Args.getLastArg(OPT_flto_EQ)) {
+    StringRef S = A->getValue();
+    if (S == "thin")
+      Opts.EmitSummaryIndex = true;
+    else if (S != "full")
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << S;
+  }
   Opts.LTOUnit = Args.hasFlag(OPT_flto_unit, OPT_fno_lto_unit, false);
   if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) {
     if (IK.getLanguage() != InputKind::LLVM_IR)
@@ -661,7 +699,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
 
   Opts.MSVolatile = Args.hasArg(OPT_fms_volatile);
 
-  Opts.VectorizeBB = Args.hasArg(OPT_vectorize_slp_aggressive);
   Opts.VectorizeLoop = Args.hasArg(OPT_vectorize_loops);
   Opts.VectorizeSLP = Args.hasArg(OPT_vectorize_slp);
 
@@ -738,7 +775,22 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.InstrumentForProfiling = Args.hasArg(OPT_pg);
   Opts.CallFEntry = Args.hasArg(OPT_mfentry);
   Opts.EmitOpenCLArgMetadata = Args.hasArg(OPT_cl_kernel_arg_info);
-  Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
+
+  if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections,
+                                     OPT_compress_debug_sections_EQ)) {
+    if (A->getOption().getID() == OPT_compress_debug_sections) {
+      // TODO: be more clever about the compression type auto-detection
+      Opts.setCompressDebugSections(llvm::DebugCompressionType::GNU);
+    } else {
+      auto DCT = llvm::StringSwitch(A->getValue())
+                     .Case("none", llvm::DebugCompressionType::None)
+                     .Case("zlib", llvm::DebugCompressionType::Z)
+                     .Case("zlib-gnu", llvm::DebugCompressionType::GNU)
+                     .Default(llvm::DebugCompressionType::None);
+      Opts.setCompressDebugSections(DCT);
+    }
+  }
+
   Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations);
   Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
   for (auto A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) {
@@ -767,6 +819,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.SanitizeCoverageTracePCGuard =
       Args.hasArg(OPT_fsanitize_coverage_trace_pc_guard);
   Opts.SanitizeCoverageNoPrune = Args.hasArg(OPT_fsanitize_coverage_no_prune);
+  Opts.SanitizeCoverageInline8bitCounters =
+      Args.hasArg(OPT_fsanitize_coverage_inline_8bit_counters);
   Opts.SanitizeMemoryTrackOrigins =
       getLastArgIntValue(Args, OPT_fsanitize_memory_track_origins_EQ, 0, Diags);
   Opts.SanitizeMemoryUseAfterDtor =
@@ -881,14 +935,24 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
 
   Opts.DiagnosticsWithHotness =
       Args.hasArg(options::OPT_fdiagnostics_show_hotness);
-  if (Opts.DiagnosticsWithHotness &&
-      Opts.getProfileUse() == CodeGenOptions::ProfileNone)
-    Diags.Report(diag::warn_drv_fdiagnostics_show_hotness_requires_pgo);
+  bool UsingSampleProfile = !Opts.SampleProfileFile.empty();
+  bool UsingProfile = UsingSampleProfile ||
+      (Opts.getProfileUse() != CodeGenOptions::ProfileNone);
+
+  if (Opts.DiagnosticsWithHotness && !UsingProfile)
+    Diags.Report(diag::warn_drv_diagnostics_hotness_requires_pgo)
+        << "-fdiagnostics-show-hotness";
+
+  Opts.DiagnosticsHotnessThreshold = getLastArgUInt64Value(
+      Args, options::OPT_fdiagnostics_hotness_threshold_EQ, 0);
+  if (Opts.DiagnosticsHotnessThreshold > 0 && !UsingProfile)
+    Diags.Report(diag::warn_drv_diagnostics_hotness_requires_pgo)
+        << "-fdiagnostics-hotness-threshold=";
 
   // If the user requested to use a sample profile for PGO, then the
   // backend will need to track source location information so the profile
   // can be incorporated into the IR.
-  if (!Opts.SampleProfileFile.empty())
+  if (UsingSampleProfile)
     NeedLocTracking = true;
 
   // If the user requested a flag that requires source locations available in
@@ -1087,6 +1151,9 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
   Opts.SpellCheckingLimit = getLastArgIntValue(
       Args, OPT_fspell_checking_limit,
       DiagnosticOptions::DefaultSpellCheckingLimit, Diags);
+  Opts.SnippetLineLimit = getLastArgIntValue(
+      Args, OPT_fcaret_diagnostics_max_lines,
+      DiagnosticOptions::DefaultSnippetLineLimit, Diags);
   Opts.TabStop = getLastArgIntValue(Args, OPT_ftabstop,
                                     DiagnosticOptions::DefaultTabStop, Diags);
   if (Opts.TabStop == 0 || Opts.TabStop > DiagnosticOptions::MaxTabStop) {
@@ -1643,6 +1710,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
   Opts.CPlusPlus11 = Std.isCPlusPlus11();
   Opts.CPlusPlus14 = Std.isCPlusPlus14();
   Opts.CPlusPlus1z = Std.isCPlusPlus1z();
+  Opts.CPlusPlus2a = Std.isCPlusPlus2a();
   Opts.Digraphs = Std.hasDigraphs();
   Opts.GNUMode = Std.isGNUMode();
   Opts.GNUInline = !Opts.C99 && !Opts.CPlusPlus;
@@ -2075,6 +2143,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.AlignedAllocation =
       Args.hasFlag(OPT_faligned_allocation, OPT_fno_aligned_allocation,
                    Opts.AlignedAllocation);
+  Opts.AlignedAllocationUnavailable =
+      Opts.AlignedAllocation && Args.hasArg(OPT_aligned_alloc_unavailable);
   Opts.NewAlignOverride =
       getLastArgIntValue(Args, OPT_fnew_alignment_EQ, 0, Diags);
   if (Opts.NewAlignOverride && !llvm::isPowerOf2_32(Opts.NewAlignOverride)) {
@@ -2210,8 +2280,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
     llvm::Triple T(TargetOpts.Triple);
     llvm::Triple::ArchType Arch = T.getArch();
     bool emitError = (DefaultCC == LangOptions::DCC_FastCall ||
-                  DefaultCC == LangOptions::DCC_StdCall) &&
-                 Arch != llvm::Triple::x86;
+                      DefaultCC == LangOptions::DCC_StdCall) &&
+                     Arch != llvm::Triple::x86;
     emitError |= DefaultCC == LangOptions::DCC_VectorCall &&
                  !(Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64);
     if (emitError)
@@ -2365,9 +2435,51 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.AllowEditorPlaceholders = Args.hasArg(OPT_fallow_editor_placeholders);
 }
 
+static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) {
+  switch (Action) {
+  case frontend::ASTDeclList:
+  case frontend::ASTDump:
+  case frontend::ASTPrint:
+  case frontend::ASTView:
+  case frontend::EmitAssembly:
+  case frontend::EmitBC:
+  case frontend::EmitHTML:
+  case frontend::EmitLLVM:
+  case frontend::EmitLLVMOnly:
+  case frontend::EmitCodeGenOnly:
+  case frontend::EmitObj:
+  case frontend::FixIt:
+  case frontend::GenerateModule:
+  case frontend::GenerateModuleInterface:
+  case frontend::GeneratePCH:
+  case frontend::GeneratePTH:
+  case frontend::ParseSyntaxOnly:
+  case frontend::ModuleFileInfo:
+  case frontend::VerifyPCH:
+  case frontend::PluginAction:
+  case frontend::PrintDeclContext:
+  case frontend::RewriteObjC:
+  case frontend::RewriteTest:
+  case frontend::RunAnalysis:
+  case frontend::MigrateSource:
+    return false;
+
+  case frontend::DumpRawTokens:
+  case frontend::DumpTokens:
+  case frontend::InitOnly:
+  case frontend::PrintPreamble:
+  case frontend::PrintPreprocessedInput:
+  case frontend::RewriteMacros:
+  case frontend::RunPreprocessorOnly:
+    return true;
+  }
+  llvm_unreachable("invalid frontend action");
+}
+
 static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
                                   FileManager &FileMgr,
-                                  DiagnosticsEngine &Diags) {
+                                  DiagnosticsEngine &Diags,
+                                  frontend::ActionKind Action) {
   using namespace options;
   Opts.ImplicitPCHInclude = Args.getLastArgValue(OPT_include_pch);
   Opts.ImplicitPTHInclude = Args.getLastArgValue(OPT_include_pth);
@@ -2440,6 +2552,12 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
     else
       Opts.ObjCXXARCStandardLibrary = (ObjCXXARCStandardLibraryKind)Library;
   }
+
+  // Always avoid lexing editor placeholders when we're just running the
+  // preprocessor as we never want to emit the
+  // "editor placeholder in source file" error in PP only mode.
+  if (isStrictlyPreprocessorAction(Action))
+    Opts.LexEditorPlaceholders = false;
 }
 
 static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
@@ -2447,45 +2565,10 @@ static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
                                         frontend::ActionKind Action) {
   using namespace options;
 
-  switch (Action) {
-  case frontend::ASTDeclList:
-  case frontend::ASTDump:
-  case frontend::ASTPrint:
-  case frontend::ASTView:
-  case frontend::EmitAssembly:
-  case frontend::EmitBC:
-  case frontend::EmitHTML:
-  case frontend::EmitLLVM:
-  case frontend::EmitLLVMOnly:
-  case frontend::EmitCodeGenOnly:
-  case frontend::EmitObj:
-  case frontend::FixIt:
-  case frontend::GenerateModule:
-  case frontend::GenerateModuleInterface:
-  case frontend::GeneratePCH:
-  case frontend::GeneratePTH:
-  case frontend::ParseSyntaxOnly:
-  case frontend::ModuleFileInfo:
-  case frontend::VerifyPCH:
-  case frontend::PluginAction:
-  case frontend::PrintDeclContext:
-  case frontend::RewriteObjC:
-  case frontend::RewriteTest:
-  case frontend::RunAnalysis:
-  case frontend::MigrateSource:
-    Opts.ShowCPP = 0;
-    break;
-
-  case frontend::DumpRawTokens:
-  case frontend::DumpTokens:
-  case frontend::InitOnly:
-  case frontend::PrintPreamble:
-  case frontend::PrintPreprocessedInput:
-  case frontend::RewriteMacros:
-  case frontend::RunPreprocessorOnly:
+  if (isStrictlyPreprocessorAction(Action))
     Opts.ShowCPP = !Args.hasArg(OPT_dM);
-    break;
-  }
+  else
+    Opts.ShowCPP = 0;
 
   Opts.ShowComments = Args.hasArg(OPT_C);
   Opts.ShowLineMarkers = !Args.hasArg(OPT_P);
@@ -2493,6 +2576,7 @@ static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
   Opts.ShowMacros = Args.hasArg(OPT_dM) || Args.hasArg(OPT_dD);
   Opts.ShowIncludeDirectives = Args.hasArg(OPT_dI);
   Opts.RewriteIncludes = Args.hasArg(OPT_frewrite_includes);
+  Opts.RewriteImports = Args.hasArg(OPT_frewrite_imports);
   Opts.UseLineDirectives = Args.hasArg(OPT_fuse_line_directives);
 }
 
@@ -2512,7 +2596,7 @@ static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args,
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
                                                 << Value;
     else
-      Opts.EABIVersion = Value;
+      Opts.EABIVersion = EABIVersion;
   }
   Opts.CPU = Args.getLastArgValue(OPT_target_cpu);
   Opts.FPMath = Args.getLastArgValue(OPT_mfpmath);
@@ -2599,6 +2683,10 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
       Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
   }
 
+  // Set the triple of the host for OpenMP device compile.
+  if (LangOpts.OpenMPIsDevice)
+    Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+
   // FIXME: Override value name discarding when asan or msan is used because the
   // backend passes depend on the name of the alloca in order to print out
   // names.
@@ -2611,7 +2699,8 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
   // ParsePreprocessorArgs and remove the FileManager
   // parameters from the function and the "FileManager.h" #include.
   FileManager FileMgr(Res.getFileSystemOpts());
-  ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, FileMgr, Diags);
+  ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, FileMgr, Diags,
+                        Res.getFrontendOpts().ProgramAction);
   ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), Args,
                               Res.getFrontendOpts().ProgramAction);
 
@@ -2696,6 +2785,13 @@ std::string CompilerInvocation::getModuleHash() const {
     code = ext->hashExtension(code);
   }
 
+  // Extend the signature with the enabled sanitizers, if at least one is
+  // enabled. Sanitizers which cannot affect AST generation aren't hashed.
+  SanitizerSet SanHash = LangOpts->Sanitize;
+  SanHash.clear(getPPTransparentSanitizers());
+  if (!SanHash.empty())
+    code = hash_combine(code, SanHash.Mask);
+
   return llvm::APInt(64, code).toString(36, /*Signed=*/false);
 }
 
@@ -2747,15 +2843,22 @@ void BuryPointer(const void *Ptr) {
 IntrusiveRefCntPtr
 createVFSFromCompilerInvocation(const CompilerInvocation &CI,
                                 DiagnosticsEngine &Diags) {
+  return createVFSFromCompilerInvocation(CI, Diags, vfs::getRealFileSystem());
+}
+
+IntrusiveRefCntPtr
+createVFSFromCompilerInvocation(const CompilerInvocation &CI,
+                                DiagnosticsEngine &Diags,
+                                IntrusiveRefCntPtr BaseFS) {
   if (CI.getHeaderSearchOpts().VFSOverlayFiles.empty())
-    return vfs::getRealFileSystem();
+    return BaseFS;
 
-  IntrusiveRefCntPtr
-    Overlay(new vfs::OverlayFileSystem(vfs::getRealFileSystem()));
+  IntrusiveRefCntPtr Overlay(
+      new vfs::OverlayFileSystem(BaseFS));
   // earlier vfs files are on the bottom
   for (const std::string &File : CI.getHeaderSearchOpts().VFSOverlayFiles) {
     llvm::ErrorOr> Buffer =
-        llvm::MemoryBuffer::getFile(File);
+        BaseFS->getBufferForFile(File);
     if (!Buffer) {
       Diags.Report(diag::err_missing_vfs_overlay_file) << File;
       return IntrusiveRefCntPtr();
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
index 16269064b6e1b..c3ce7ce2b7420 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
@@ -31,8 +31,8 @@ using namespace llvm::opt;
 /// \return A CompilerInvocation, or 0 if none was built for the given
 /// argument vector.
 std::unique_ptr clang::createInvocationFromCommandLine(
-    ArrayRef ArgList,
-    IntrusiveRefCntPtr Diags) {
+    ArrayRef ArgList, IntrusiveRefCntPtr Diags,
+    IntrusiveRefCntPtr VFS) {
   if (!Diags.get()) {
     // No diagnostics engine was provided, so create our own diagnostics object
     // with the default options.
@@ -46,12 +46,14 @@ std::unique_ptr clang::createInvocationFromCommandLine(
 
   // FIXME: We shouldn't have to pass in the path info.
   driver::Driver TheDriver(Args[0], llvm::sys::getDefaultTargetTriple(),
-                           *Diags);
+                           *Diags, VFS);
 
   // Don't check that inputs exist, they may have been remapped.
   TheDriver.setCheckInputsExist(false);
 
   std::unique_ptr C(TheDriver.BuildCompilation(Args));
+  if (!C)
+    return nullptr;
 
   // Just print the cc1 options if -### was present.
   if (C->getArgs().hasArg(driver::options::OPT__HASH_HASH_HASH)) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/DependencyFile.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/DependencyFile.cpp
index bd14c53e4d15f..561eb9c4a3161 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/DependencyFile.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/DependencyFile.cpp
@@ -55,8 +55,8 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
         llvm::sys::path::remove_leading_dotslash(FE->getName());
 
     DepCollector.maybeAddDependency(Filename, /*FromModule*/false,
-                                   FileType != SrcMgr::C_User,
-                                   /*IsModuleFile*/false, /*IsMissing*/false);
+                                    isSystem(FileType),
+                                    /*IsModuleFile*/false, /*IsMissing*/false);
   }
 
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
@@ -265,7 +265,7 @@ bool DFGImpl::FileMatchesDepCriteria(const char *Filename,
   if (IncludeSystemHeaders)
     return true;
 
-  return FileType == SrcMgr::C_User;
+  return !isSystem(FileType);
 }
 
 void DFGImpl::FileChanged(SourceLocation Loc,
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/DiagnosticRenderer.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/DiagnosticRenderer.cpp
index 177feac974411..e3263843e29bf 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/DiagnosticRenderer.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/DiagnosticRenderer.cpp
@@ -76,20 +76,19 @@ static void mergeFixits(ArrayRef FixItHints,
   }
 }
 
-void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc,
+void DiagnosticRenderer::emitDiagnostic(FullSourceLoc Loc,
                                         DiagnosticsEngine::Level Level,
                                         StringRef Message,
                                         ArrayRef Ranges,
                                         ArrayRef FixItHints,
-                                        const SourceManager *SM,
                                         DiagOrStoredDiag D) {
-  assert(SM || Loc.isInvalid());
+  assert(Loc.hasManager() || Loc.isInvalid());
 
   beginDiagnostic(D, Level);
 
   if (!Loc.isValid())
     // If we have no source location, just emit the diagnostic message.
-    emitDiagnosticMessage(Loc, PresumedLoc(), Level, Message, Ranges, SM, D);
+    emitDiagnosticMessage(Loc, PresumedLoc(), Level, Message, Ranges, D);
   else {
     // Get the ranges into a local array we can hack on.
     SmallVector MutableRanges(Ranges.begin(),
@@ -97,7 +96,7 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc,
 
     SmallVector MergedFixits;
     if (!FixItHints.empty()) {
-      mergeFixits(FixItHints, *SM, LangOpts, MergedFixits);
+      mergeFixits(FixItHints, Loc.getManager(), LangOpts, MergedFixits);
       FixItHints = MergedFixits;
     }
 
@@ -107,25 +106,25 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc,
       if (I->RemoveRange.isValid())
         MutableRanges.push_back(I->RemoveRange);
 
-    SourceLocation UnexpandedLoc = Loc;
+    FullSourceLoc UnexpandedLoc = Loc;
 
     // Find the ultimate expansion location for the diagnostic.
-    Loc = SM->getFileLoc(Loc);
+    Loc = Loc.getFileLoc();
 
-    PresumedLoc PLoc = SM->getPresumedLoc(Loc, DiagOpts->ShowPresumedLoc);
+    PresumedLoc PLoc = Loc.getPresumedLoc(DiagOpts->ShowPresumedLoc);
 
     // First, if this diagnostic is not in the main file, print out the
     // "included from" lines.
-    emitIncludeStack(Loc, PLoc, Level, *SM);
+    emitIncludeStack(Loc, PLoc, Level);
 
     // Next, emit the actual diagnostic message and caret.
-    emitDiagnosticMessage(Loc, PLoc, Level, Message, Ranges, SM, D);
-    emitCaret(Loc, Level, MutableRanges, FixItHints, *SM);
+    emitDiagnosticMessage(Loc, PLoc, Level, Message, Ranges, D);
+    emitCaret(Loc, Level, MutableRanges, FixItHints);
 
     // If this location is within a macro, walk from UnexpandedLoc up to Loc
     // and produce a macro backtrace.
     if (UnexpandedLoc.isValid() && UnexpandedLoc.isMacroID()) {
-      emitMacroExpansions(UnexpandedLoc, Level, MutableRanges, FixItHints, *SM);
+      emitMacroExpansions(UnexpandedLoc, Level, MutableRanges, FixItHints);
     }
   }
 
@@ -139,15 +138,12 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc,
 void DiagnosticRenderer::emitStoredDiagnostic(StoredDiagnostic &Diag) {
   emitDiagnostic(Diag.getLocation(), Diag.getLevel(), Diag.getMessage(),
                  Diag.getRanges(), Diag.getFixIts(),
-                 Diag.getLocation().isValid() ? &Diag.getLocation().getManager()
-                                              : nullptr,
                  &Diag);
 }
 
 void DiagnosticRenderer::emitBasicNote(StringRef Message) {
-  emitDiagnosticMessage(
-      SourceLocation(), PresumedLoc(), DiagnosticsEngine::Note, Message,
-      None, nullptr, DiagOrStoredDiag());
+  emitDiagnosticMessage(FullSourceLoc(), PresumedLoc(), DiagnosticsEngine::Note,
+                        Message, None, DiagOrStoredDiag());
 }
 
 /// \brief Prints an include stack when appropriate for a particular
@@ -161,12 +157,11 @@ void DiagnosticRenderer::emitBasicNote(StringRef Message) {
 /// \param Loc   The diagnostic location.
 /// \param PLoc  The presumed location of the diagnostic location.
 /// \param Level The diagnostic level of the message this stack pertains to.
-void DiagnosticRenderer::emitIncludeStack(SourceLocation Loc,
-                                          PresumedLoc PLoc,
-                                          DiagnosticsEngine::Level Level,
-                                          const SourceManager &SM) {
-  SourceLocation IncludeLoc =
-      PLoc.isInvalid() ? SourceLocation() : PLoc.getIncludeLoc();
+void DiagnosticRenderer::emitIncludeStack(FullSourceLoc Loc, PresumedLoc PLoc,
+                                          DiagnosticsEngine::Level Level) {
+  FullSourceLoc IncludeLoc =
+      PLoc.isInvalid() ? FullSourceLoc()
+                       : FullSourceLoc(PLoc.getIncludeLoc(), Loc.getManager());
 
   // Skip redundant include stacks altogether.
   if (LastIncludeLoc == IncludeLoc)
@@ -178,74 +173,70 @@ void DiagnosticRenderer::emitIncludeStack(SourceLocation Loc,
     return;
 
   if (IncludeLoc.isValid())
-    emitIncludeStackRecursively(IncludeLoc, SM);
+    emitIncludeStackRecursively(IncludeLoc);
   else {
-    emitModuleBuildStack(SM);
-    emitImportStack(Loc, SM);
+    emitModuleBuildStack(Loc.getManager());
+    emitImportStack(Loc);
   }
 }
 
 /// \brief Helper to recursivly walk up the include stack and print each layer
 /// on the way back down.
-void DiagnosticRenderer::emitIncludeStackRecursively(SourceLocation Loc,
-                                                     const SourceManager &SM) {
+void DiagnosticRenderer::emitIncludeStackRecursively(FullSourceLoc Loc) {
   if (Loc.isInvalid()) {
-    emitModuleBuildStack(SM);
+    emitModuleBuildStack(Loc.getManager());
     return;
   }
-  
-  PresumedLoc PLoc = SM.getPresumedLoc(Loc, DiagOpts->ShowPresumedLoc);
+
+  PresumedLoc PLoc = Loc.getPresumedLoc(DiagOpts->ShowPresumedLoc);
   if (PLoc.isInvalid())
     return;
 
   // If this source location was imported from a module, print the module
   // import stack rather than the 
   // FIXME: We want submodule granularity here.
-  std::pair Imported = SM.getModuleImportLoc(Loc);
+  std::pair Imported = Loc.getModuleImportLoc();
   if (!Imported.second.empty()) {
     // This location was imported by a module. Emit the module import stack.
-    emitImportStackRecursively(Imported.first, Imported.second, SM);
+    emitImportStackRecursively(Imported.first, Imported.second);
     return;
   }
 
   // Emit the other include frames first.
-  emitIncludeStackRecursively(PLoc.getIncludeLoc(), SM);
-  
+  emitIncludeStackRecursively(
+      FullSourceLoc(PLoc.getIncludeLoc(), Loc.getManager()));
+
   // Emit the inclusion text/note.
-  emitIncludeLocation(Loc, PLoc, SM);
+  emitIncludeLocation(Loc, PLoc);
 }
 
 /// \brief Emit the module import stack associated with the current location.
-void DiagnosticRenderer::emitImportStack(SourceLocation Loc,
-                                         const SourceManager &SM) {
+void DiagnosticRenderer::emitImportStack(FullSourceLoc Loc) {
   if (Loc.isInvalid()) {
-    emitModuleBuildStack(SM);
+    emitModuleBuildStack(Loc.getManager());
     return;
   }
 
-  std::pair NextImportLoc
-    = SM.getModuleImportLoc(Loc);
-  emitImportStackRecursively(NextImportLoc.first, NextImportLoc.second, SM);
+  std::pair NextImportLoc = Loc.getModuleImportLoc();
+  emitImportStackRecursively(NextImportLoc.first, NextImportLoc.second);
 }
 
 /// \brief Helper to recursivly walk up the import stack and print each layer
 /// on the way back down.
-void DiagnosticRenderer::emitImportStackRecursively(SourceLocation Loc,
-                                                    StringRef ModuleName,
-                                                    const SourceManager &SM) {
+void DiagnosticRenderer::emitImportStackRecursively(FullSourceLoc Loc,
+                                                    StringRef ModuleName) {
   if (ModuleName.empty()) {
     return;
   }
 
-  PresumedLoc PLoc = SM.getPresumedLoc(Loc, DiagOpts->ShowPresumedLoc);
+  PresumedLoc PLoc = Loc.getPresumedLoc(DiagOpts->ShowPresumedLoc);
 
   // Emit the other import frames first.
-  std::pair NextImportLoc
-    = SM.getModuleImportLoc(Loc);
-  emitImportStackRecursively(NextImportLoc.first, NextImportLoc.second, SM);
+  std::pair NextImportLoc = Loc.getModuleImportLoc();
+  emitImportStackRecursively(NextImportLoc.first, NextImportLoc.second);
 
   // Emit the inclusion text/note.
-  emitImportLocation(Loc, PLoc, ModuleName, SM);
+  emitImportLocation(Loc, PLoc, ModuleName);
 }
 
 /// \brief Emit the module build stack, for cases where a module is (re-)built
@@ -253,13 +244,9 @@ void DiagnosticRenderer::emitImportStackRecursively(SourceLocation Loc,
 void DiagnosticRenderer::emitModuleBuildStack(const SourceManager &SM) {
   ModuleBuildStack Stack = SM.getModuleBuildStack();
   for (unsigned I = 0, N = Stack.size(); I != N; ++I) {
-    const SourceManager &CurSM = Stack[I].second.getManager();
-    SourceLocation CurLoc = Stack[I].second;
-    emitBuildingModuleLocation(CurLoc,
-                               CurSM.getPresumedLoc(CurLoc,
+    emitBuildingModuleLocation(Stack[I].second, Stack[I].second.getPresumedLoc(
                                                     DiagOpts->ShowPresumedLoc),
-                               Stack[I].first,
-                               CurSM);
+                               Stack[I].first);
   }
 }
 
@@ -348,12 +335,12 @@ static void computeCommonMacroArgExpansionFileIDs(
 // in the same expansion as the caret; otherwise, we crawl to the top of
 // each chain. Two locations are part of the same macro expansion
 // iff the FileID is the same.
-static void mapDiagnosticRanges(
-    SourceLocation CaretLoc,
-    ArrayRef Ranges,
-    SmallVectorImpl &SpellingRanges,
-    const SourceManager *SM) {
-  FileID CaretLocFileID = SM->getFileID(CaretLoc);
+static void
+mapDiagnosticRanges(FullSourceLoc CaretLoc, ArrayRef Ranges,
+                    SmallVectorImpl &SpellingRanges) {
+  FileID CaretLocFileID = CaretLoc.getFileID();
+
+  const SourceManager *SM = &CaretLoc.getManager();
 
   for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
     if (I->isInvalid()) continue;
@@ -404,42 +391,39 @@ static void mapDiagnosticRanges(
   }
 }
 
-void DiagnosticRenderer::emitCaret(SourceLocation Loc,
+void DiagnosticRenderer::emitCaret(FullSourceLoc Loc,
                                    DiagnosticsEngine::Level Level,
                                    ArrayRef Ranges,
-                                   ArrayRef Hints,
-                                   const SourceManager &SM) {
+                                   ArrayRef Hints) {
   SmallVector SpellingRanges;
-  mapDiagnosticRanges(Loc, Ranges, SpellingRanges, &SM);
-  emitCodeContext(Loc, Level, SpellingRanges, Hints, SM);
+  mapDiagnosticRanges(Loc, Ranges, SpellingRanges);
+  emitCodeContext(Loc, Level, SpellingRanges, Hints);
 }
 
 /// \brief A helper function for emitMacroExpansion to print the
 /// macro expansion message
 void DiagnosticRenderer::emitSingleMacroExpansion(
-    SourceLocation Loc,
-    DiagnosticsEngine::Level Level,
-    ArrayRef Ranges,
-    const SourceManager &SM) {
+    FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+    ArrayRef Ranges) {
   // Find the spelling location for the macro definition. We must use the
   // spelling location here to avoid emitting a macro backtrace for the note.
-  SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
+  FullSourceLoc SpellingLoc = Loc.getSpellingLoc();
 
   // Map the ranges into the FileID of the diagnostic location.
   SmallVector SpellingRanges;
-  mapDiagnosticRanges(Loc, Ranges, SpellingRanges, &SM);
+  mapDiagnosticRanges(Loc, Ranges, SpellingRanges);
 
   SmallString<100> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
-  StringRef MacroName =
-      Lexer::getImmediateMacroNameForDiagnostics(Loc, SM, LangOpts);
+  StringRef MacroName = Lexer::getImmediateMacroNameForDiagnostics(
+      Loc, Loc.getManager(), LangOpts);
   if (MacroName.empty())
     Message << "expanded from here";
   else
     Message << "expanded from macro '" << MacroName << "'";
 
   emitDiagnostic(SpellingLoc, DiagnosticsEngine::Note, Message.str(),
-                 SpellingRanges, None, &SM);
+                 SpellingRanges, None);
 }
 
 /// Check that the macro argument location of Loc starts with ArgumentLoc.
@@ -473,13 +457,12 @@ static bool checkRangeForMacroArgExpansion(CharSourceRange Range,
 
 /// A helper function to check if the current ranges are all inside the same
 /// macro argument expansion as Loc.
-static bool checkRangesForMacroArgExpansion(SourceLocation Loc,
-                                            ArrayRef Ranges,
-                                            const SourceManager &SM) {
+static bool checkRangesForMacroArgExpansion(FullSourceLoc Loc,
+                                            ArrayRef Ranges) {
   assert(Loc.isMacroID() && "Must be a macro expansion!");
 
   SmallVector SpellingRanges;
-  mapDiagnosticRanges(Loc, Ranges, SpellingRanges, &SM);
+  mapDiagnosticRanges(Loc, Ranges, SpellingRanges);
 
   /// Count all valid ranges.
   unsigned ValidCount = 0;
@@ -490,15 +473,15 @@ static bool checkRangesForMacroArgExpansion(SourceLocation Loc,
     return false;
 
   /// To store the source location of the argument location.
-  SourceLocation ArgumentLoc;
+  FullSourceLoc ArgumentLoc;
 
   /// Set the ArgumentLoc to the beginning location of the expansion of Loc
   /// so to check if the ranges expands to the same beginning location.
-  if (!SM.isMacroArgExpansion(Loc,&ArgumentLoc))
+  if (!Loc.isMacroArgExpansion(&ArgumentLoc))
     return false;
 
   for (auto I = SpellingRanges.begin(), E = SpellingRanges.end(); I != E; ++I) {
-    if (!checkRangeForMacroArgExpansion(*I, SM, ArgumentLoc))
+    if (!checkRangeForMacroArgExpansion(*I, Loc.getManager(), ArgumentLoc))
       return false;
   }
 
@@ -516,34 +499,33 @@ static bool checkRangesForMacroArgExpansion(SourceLocation Loc,
 /// \param Level The diagnostic level currently being emitted.
 /// \param Ranges The underlined ranges for this code snippet.
 /// \param Hints The FixIt hints active for this diagnostic.
-void DiagnosticRenderer::emitMacroExpansions(SourceLocation Loc,
+void DiagnosticRenderer::emitMacroExpansions(FullSourceLoc Loc,
                                              DiagnosticsEngine::Level Level,
                                              ArrayRef Ranges,
-                                             ArrayRef Hints,
-                                             const SourceManager &SM) {
+                                             ArrayRef Hints) {
   assert(Loc.isValid() && "must have a valid source location here");
 
   // Produce a stack of macro backtraces.
-  SmallVector LocationStack;
+  SmallVector LocationStack;
   unsigned IgnoredEnd = 0;
   while (Loc.isMacroID()) {
     // If this is the expansion of a macro argument, point the caret at the
     // use of the argument in the definition of the macro, not the expansion.
-    if (SM.isMacroArgExpansion(Loc))
-      LocationStack.push_back(SM.getImmediateExpansionRange(Loc).first);
+    if (Loc.isMacroArgExpansion())
+      LocationStack.push_back(Loc.getImmediateExpansionRange().first);
     else
       LocationStack.push_back(Loc);
 
-    if (checkRangesForMacroArgExpansion(Loc, Ranges, SM))
+    if (checkRangesForMacroArgExpansion(Loc, Ranges))
       IgnoredEnd = LocationStack.size();
 
-    Loc = SM.getImmediateMacroCallerLoc(Loc);
+    Loc = Loc.getImmediateMacroCallerLoc();
 
     // Once the location no longer points into a macro, try stepping through
     // the last found location.  This sometimes produces additional useful
     // backtraces.
     if (Loc.isFileID())
-      Loc = SM.getImmediateMacroCallerLoc(LocationStack.back());
+      Loc = LocationStack.back().getImmediateMacroCallerLoc();
     assert(Loc.isValid() && "must have a valid source location here");
   }
 
@@ -555,7 +537,7 @@ void DiagnosticRenderer::emitMacroExpansions(SourceLocation Loc,
   if (MacroDepth <= MacroLimit || MacroLimit == 0) {
     for (auto I = LocationStack.rbegin(), E = LocationStack.rend();
          I != E; ++I)
-      emitSingleMacroExpansion(*I, Level, Ranges, SM);
+      emitSingleMacroExpansion(*I, Level, Ranges);
     return;
   }
 
@@ -565,7 +547,7 @@ void DiagnosticRenderer::emitMacroExpansions(SourceLocation Loc,
   for (auto I = LocationStack.rbegin(),
             E = LocationStack.rbegin() + MacroStartMessages;
        I != E; ++I)
-    emitSingleMacroExpansion(*I, Level, Ranges, SM);
+    emitSingleMacroExpansion(*I, Level, Ranges);
 
   SmallString<200> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
@@ -577,26 +559,24 @@ void DiagnosticRenderer::emitMacroExpansions(SourceLocation Loc,
   for (auto I = LocationStack.rend() - MacroEndMessages,
             E = LocationStack.rend();
        I != E; ++I)
-    emitSingleMacroExpansion(*I, Level, Ranges, SM);
+    emitSingleMacroExpansion(*I, Level, Ranges);
 }
 
 DiagnosticNoteRenderer::~DiagnosticNoteRenderer() {}
 
-void DiagnosticNoteRenderer::emitIncludeLocation(SourceLocation Loc,
-                                                 PresumedLoc PLoc,
-                                                 const SourceManager &SM) {
+void DiagnosticNoteRenderer::emitIncludeLocation(FullSourceLoc Loc,
+                                                 PresumedLoc PLoc) {
   // Generate a note indicating the include location.
   SmallString<200> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
   Message << "in file included from " << PLoc.getFilename() << ':'
           << PLoc.getLine() << ":";
-  emitNote(Loc, Message.str(), &SM);
+  emitNote(Loc, Message.str());
 }
 
-void DiagnosticNoteRenderer::emitImportLocation(SourceLocation Loc,
+void DiagnosticNoteRenderer::emitImportLocation(FullSourceLoc Loc,
                                                 PresumedLoc PLoc,
-                                                StringRef ModuleName,
-                                                const SourceManager &SM) {
+                                                StringRef ModuleName) {
   // Generate a note indicating the include location.
   SmallString<200> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
@@ -605,14 +585,12 @@ void DiagnosticNoteRenderer::emitImportLocation(SourceLocation Loc,
     Message << "' imported from " << PLoc.getFilename() << ':'
             << PLoc.getLine();
   Message << ":";
-  emitNote(Loc, Message.str(), &SM);
+  emitNote(Loc, Message.str());
 }
 
-void
-DiagnosticNoteRenderer::emitBuildingModuleLocation(SourceLocation Loc,
-                                                   PresumedLoc PLoc,
-                                                   StringRef ModuleName,
-                                                   const SourceManager &SM) {
+void DiagnosticNoteRenderer::emitBuildingModuleLocation(FullSourceLoc Loc,
+                                                        PresumedLoc PLoc,
+                                                        StringRef ModuleName) {
   // Generate a note indicating the include location.
   SmallString<200> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
@@ -621,5 +599,5 @@ DiagnosticNoteRenderer::emitBuildingModuleLocation(SourceLocation Loc,
             << PLoc.getFilename() << ':' << PLoc.getLine() << ":";
   else
     Message << "while building module '" << ModuleName << "':";
-  emitNote(Loc, Message.str(), &SM);
+  emitNote(Loc, Message.str());
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendAction.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendAction.cpp
index 248039611b46d..704d51509851f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendAction.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendAction.cpp
@@ -200,12 +200,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
 ///
 /// \param CI The compiler instance.
 /// \param InputFile Populated with the filename from the line marker.
-/// \param AddLineNote If \c true, add a line note corresponding to this line
-///        directive. Only use this if the directive will not actually be
-///        visited by the preprocessor.
+/// \param IsModuleMap If \c true, add a line note corresponding to this line
+///        directive. (We need to do this because the directive will not be
+///        visited by the preprocessor.)
 static SourceLocation ReadOriginalFileName(CompilerInstance &CI,
                                            std::string &InputFile,
-                                           bool AddLineNote = false) {
+                                           bool IsModuleMap = false) {
   auto &SourceMgr = CI.getSourceManager();
   auto MainFileID = SourceMgr.getMainFileID();
 
@@ -231,7 +231,7 @@ static SourceLocation ReadOriginalFileName(CompilerInstance &CI,
 
   unsigned LineNo;
   SourceLocation LineNoLoc = T.getLocation();
-  if (AddLineNote) {
+  if (IsModuleMap) {
     llvm::SmallString<16> Buffer;
     if (Lexer::getSpelling(LineNoLoc, Buffer, SourceMgr, CI.getLangOpts())
             .getAsInteger(10, LineNo))
@@ -250,9 +250,10 @@ static SourceLocation ReadOriginalFileName(CompilerInstance &CI,
     return SourceLocation();
   InputFile = Literal.GetString().str();
 
-  if (AddLineNote)
+  if (IsModuleMap)
     CI.getSourceManager().AddLineNote(
-        LineNoLoc, LineNo, SourceMgr.getLineTableFilenameID(InputFile));
+        LineNoLoc, LineNo, SourceMgr.getLineTableFilenameID(InputFile), false,
+        false, SrcMgr::C_User_ModuleMap);
 
   return T.getLocation();
 }
@@ -288,14 +289,28 @@ static void addHeaderInclude(StringRef HeaderName,
 ///
 /// \param Includes Will be augmented with the set of \#includes or \#imports
 /// needed to load all of the named headers.
-static std::error_code
-collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
-                            ModuleMap &ModMap, clang::Module *Module,
-                            SmallVectorImpl &Includes) {
+static std::error_code collectModuleHeaderIncludes(
+    const LangOptions &LangOpts, FileManager &FileMgr, DiagnosticsEngine &Diag,
+    ModuleMap &ModMap, clang::Module *Module, SmallVectorImpl &Includes) {
   // Don't collect any headers for unavailable modules.
   if (!Module->isAvailable())
     return std::error_code();
 
+  // Resolve all lazy header directives to header files.
+  ModMap.resolveHeaderDirectives(Module);
+
+  // If any headers are missing, we can't build this module. In most cases,
+  // diagnostics for this should have already been produced; we only get here
+  // if explicit stat information was provided.
+  // FIXME: If the name resolves to a file with different stat information,
+  // produce a better diagnostic.
+  if (!Module->MissingHeaders.empty()) {
+    auto &MissingHeader = Module->MissingHeaders.front();
+    Diag.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
+      << MissingHeader.IsUmbrella << MissingHeader.FileName;
+    return std::error_code();
+  }
+
   // Add includes for each of these headers.
   for (auto HK : {Module::HK_Normal, Module::HK_Private}) {
     for (Module::Header &H : Module->Headers[HK]) {
@@ -366,16 +381,16 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
                                       SubEnd = Module->submodule_end();
        Sub != SubEnd; ++Sub)
     if (std::error_code Err = collectModuleHeaderIncludes(
-            LangOpts, FileMgr, ModMap, *Sub, Includes))
+            LangOpts, FileMgr, Diag, ModMap, *Sub, Includes))
       return Err;
 
   return std::error_code();
 }
 
-static bool
-loadModuleMapForModuleBuild(CompilerInstance &CI, StringRef Filename,
-                            bool IsSystem, bool IsPreprocessed,
-                            unsigned &Offset) {
+static bool loadModuleMapForModuleBuild(CompilerInstance &CI, bool IsSystem,
+                                        bool IsPreprocessed,
+                                        std::string &PresumedModuleMapFile,
+                                        unsigned &Offset) {
   auto &SrcMgr = CI.getSourceManager();
   HeaderSearch &HS = CI.getPreprocessor().getHeaderSearchInfo();
 
@@ -387,16 +402,15 @@ loadModuleMapForModuleBuild(CompilerInstance &CI, StringRef Filename,
   // line directives are not part of the module map syntax in general.
   Offset = 0;
   if (IsPreprocessed) {
-    std::string PresumedModuleMapFile;
     SourceLocation EndOfLineMarker =
-        ReadOriginalFileName(CI, PresumedModuleMapFile, /*AddLineNote*/true);
+        ReadOriginalFileName(CI, PresumedModuleMapFile, /*IsModuleMap*/ true);
     if (EndOfLineMarker.isValid())
       Offset = CI.getSourceManager().getDecomposedLoc(EndOfLineMarker).second;
-    // FIXME: Use PresumedModuleMapFile as the MODULE_MAP_FILE in the PCM.
   }
 
   // Load the module map file.
-  if (HS.loadModuleMapFile(ModuleMap, IsSystem, ModuleMapID, &Offset))
+  if (HS.loadModuleMapFile(ModuleMap, IsSystem, ModuleMapID, &Offset,
+                           PresumedModuleMapFile))
     return true;
 
   if (SrcMgr.getBuffer(ModuleMapID)->getBufferSize() == Offset)
@@ -429,21 +443,9 @@ static Module *prepareToBuildModule(CompilerInstance &CI,
   }
 
   // Check whether we can build this module at all.
-  clang::Module::Requirement Requirement;
-  clang::Module::UnresolvedHeaderDirective MissingHeader;
-  if (!M->isAvailable(CI.getLangOpts(), CI.getTarget(), Requirement,
-                      MissingHeader)) {
-    if (MissingHeader.FileNameLoc.isValid()) {
-      CI.getDiagnostics().Report(MissingHeader.FileNameLoc,
-                                 diag::err_module_header_missing)
-        << MissingHeader.IsUmbrella << MissingHeader.FileName;
-    } else {
-      CI.getDiagnostics().Report(diag::err_module_unavailable)
-        << M->getFullModuleName() << Requirement.second << Requirement.first;
-    }
-
+  if (Preprocessor::checkModuleIsAvailable(CI.getLangOpts(), CI.getTarget(),
+                                           CI.getDiagnostics(), M))
     return nullptr;
-  }
 
   // Inform the preprocessor that includes from within the input buffer should
   // be resolved relative to the build directory of the module map file.
@@ -493,7 +495,7 @@ getInputBufferForModule(CompilerInstance &CI, Module *M) {
     addHeaderInclude(UmbrellaHeader.NameAsWritten, HeaderContents,
                      CI.getLangOpts(), M->IsExternC);
   Err = collectModuleHeaderIncludes(
-      CI.getLangOpts(), FileMgr,
+      CI.getLangOpts(), FileMgr, CI.getDiagnostics(),
       CI.getPreprocessor().getHeaderSearchInfo().getModuleMap(), M,
       HeaderContents);
 
@@ -508,7 +510,8 @@ getInputBufferForModule(CompilerInstance &CI, Module *M) {
 }
 
 bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
-                                     const FrontendInputFile &Input) {
+                                     const FrontendInputFile &RealInput) {
+  FrontendInputFile Input(RealInput);
   assert(!Instance && "Already processing a source file!");
   assert(!Input.isEmpty() && "Unexpected empty filename!");
   setCurrentInput(Input);
@@ -516,23 +519,88 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
 
   StringRef InputFile = Input.getFile();
   bool HasBegunSourceFile = false;
+  bool ReplayASTFile = Input.getKind().getFormat() == InputKind::Precompiled &&
+                       usesPreprocessorOnly();
   if (!BeginInvocation(CI))
     goto failure;
 
+  // If we're replaying the build of an AST file, import it and set up
+  // the initial state from its build.
+  if (ReplayASTFile) {
+    IntrusiveRefCntPtr Diags(&CI.getDiagnostics());
+
+    // The AST unit populates its own diagnostics engine rather than ours.
+    IntrusiveRefCntPtr ASTDiags(
+        new DiagnosticsEngine(Diags->getDiagnosticIDs(),
+                              &Diags->getDiagnosticOptions()));
+    ASTDiags->setClient(Diags->getClient(), /*OwnsClient*/false);
+
+    std::unique_ptr AST = ASTUnit::LoadFromASTFile(
+        InputFile, CI.getPCHContainerReader(), ASTUnit::LoadPreprocessorOnly,
+        ASTDiags, CI.getFileSystemOpts(), CI.getCodeGenOpts().DebugTypeExtRefs);
+    if (!AST)
+      goto failure;
+
+    // Options relating to how we treat the input (but not what we do with it)
+    // are inherited from the AST unit.
+    CI.getHeaderSearchOpts() = AST->getHeaderSearchOpts();
+    CI.getPreprocessorOpts() = AST->getPreprocessorOpts();
+    CI.getLangOpts() = AST->getLangOpts();
+
+    // Set the shared objects, these are reset when we finish processing the
+    // file, otherwise the CompilerInstance will happily destroy them.
+    CI.setFileManager(&AST->getFileManager());
+    CI.createSourceManager(CI.getFileManager());
+    CI.getSourceManager().initializeForReplay(AST->getSourceManager());
+
+    // Preload all the module files loaded transitively by the AST unit. Also
+    // load all module map files that were parsed as part of building the AST
+    // unit.
+    if (auto ASTReader = AST->getASTReader()) {
+      auto &MM = ASTReader->getModuleManager();
+      auto &PrimaryModule = MM.getPrimaryModule();
+
+      for (ModuleFile &MF : MM)
+        if (&MF != &PrimaryModule)
+          CI.getFrontendOpts().ModuleFiles.push_back(MF.FileName);
+
+      ASTReader->visitTopLevelModuleMaps(PrimaryModule,
+                                         [&](const FileEntry *FE) {
+        CI.getFrontendOpts().ModuleMapFiles.push_back(FE->getName());
+      });
+    }
+
+    // Set up the input file for replay purposes.
+    auto Kind = AST->getInputKind();
+    if (Kind.getFormat() == InputKind::ModuleMap) {
+      Module *ASTModule =
+          AST->getPreprocessor().getHeaderSearchInfo().lookupModule(
+              AST->getLangOpts().CurrentModule, /*AllowSearch*/ false);
+      assert(ASTModule && "module file does not define its own module");
+      Input = FrontendInputFile(ASTModule->PresumedModuleMapFile, Kind);
+    } else {
+      auto &SM = CI.getSourceManager();
+      FileID ID = SM.getMainFileID();
+      if (auto *File = SM.getFileEntryForID(ID))
+        Input = FrontendInputFile(File->getName(), Kind);
+      else
+        Input = FrontendInputFile(SM.getBuffer(ID), Kind);
+    }
+    setCurrentInput(Input, std::move(AST));
+  }
+
   // AST files follow a very different path, since they share objects via the
   // AST unit.
   if (Input.getKind().getFormat() == InputKind::Precompiled) {
-    // FIXME: We should not be asserting on bad command-line arguments.
-    assert(!usesPreprocessorOnly() &&
-           "Attempt to pass AST file to preprocessor only action!");
+    assert(!usesPreprocessorOnly() && "this case was handled above");
     assert(hasASTFileSupport() &&
            "This action does not have AST file support!");
 
     IntrusiveRefCntPtr Diags(&CI.getDiagnostics());
 
     std::unique_ptr AST = ASTUnit::LoadFromASTFile(
-        InputFile, CI.getPCHContainerReader(), Diags, CI.getFileSystemOpts(),
-        CI.getCodeGenOpts().DebugTypeExtRefs);
+        InputFile, CI.getPCHContainerReader(), ASTUnit::LoadEverything, Diags,
+        CI.getFileSystemOpts(), CI.getCodeGenOpts().DebugTypeExtRefs);
 
     if (!AST)
       goto failure;
@@ -554,7 +622,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     setCurrentInput(Input, std::move(AST));
 
     // Initialize the action.
-    if (!BeginSourceFileAction(CI, InputFile))
+    if (!BeginSourceFileAction(CI))
       goto failure;
 
     // Create the AST consumer.
@@ -565,12 +633,18 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     return true;
   }
 
-  // Set up the file and source managers, if needed.
-  if (!CI.hasFileManager()) {
-    if (!CI.createFileManager()) {
+  if (!CI.hasVirtualFileSystem()) {
+    if (IntrusiveRefCntPtr VFS =
+          createVFSFromCompilerInvocation(CI.getInvocation(),
+                                          CI.getDiagnostics()))
+      CI.setVirtualFileSystem(VFS);
+    else
       goto failure;
-    }
   }
+
+  // Set up the file and source managers, if needed.
+  if (!CI.hasFileManager())
+    CI.createFileManager();
   if (!CI.hasSourceManager())
     CI.createSourceManager(CI.getFileManager());
 
@@ -595,7 +669,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     HasBegunSourceFile = true;
 
     // Initialize the action.
-    if (!BeginSourceFileAction(CI, InputFile))
+    if (!BeginSourceFileAction(CI))
       goto failure;
 
     // Initialize the main file entry.
@@ -657,15 +731,19 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
   if (Input.getKind().getFormat() == InputKind::ModuleMap) {
     CI.getLangOpts().setCompilingModule(LangOptions::CMK_ModuleMap);
 
+    std::string PresumedModuleMapFile;
     unsigned OffsetToContents;
-    if (loadModuleMapForModuleBuild(CI, Input.getFile(), Input.isSystem(),
-                                    Input.isPreprocessed(), OffsetToContents))
+    if (loadModuleMapForModuleBuild(CI, Input.isSystem(),
+                                    Input.isPreprocessed(),
+                                    PresumedModuleMapFile, OffsetToContents))
       goto failure;
 
     auto *CurrentModule = prepareToBuildModule(CI, Input.getFile());
     if (!CurrentModule)
       goto failure;
 
+    CurrentModule->PresumedModuleMapFile = PresumedModuleMapFile;
+
     if (OffsetToContents)
       // If the module contents are in the same file, skip to them.
       CI.getPreprocessor().setSkipMainFilePreamble(OffsetToContents, true);
@@ -684,7 +762,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
   }
 
   // Initialize the action.
-  if (!BeginSourceFileAction(CI, InputFile))
+  if (!BeginSourceFileAction(CI))
     goto failure;
 
   // Create the AST context and consumer unless this is a preprocessor only
@@ -804,14 +882,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
 
   // If we failed, reset state since the client will not end up calling the
   // matching EndSourceFile().
-  failure:
-  if (isCurrentFileAST()) {
-    CI.setASTContext(nullptr);
-    CI.setPreprocessor(nullptr);
-    CI.setSourceManager(nullptr);
-    CI.setFileManager(nullptr);
-  }
-
+failure:
   if (HasBegunSourceFile)
     CI.getDiagnosticClient().EndSourceFile();
   CI.clearOutputFiles(/*EraseFiles=*/true);
@@ -889,6 +960,7 @@ void FrontendAction::EndSourceFile() {
       CI.resetAndLeakPreprocessor();
       CI.resetAndLeakSourceManager();
       CI.resetAndLeakFileManager();
+      BuryPointer(CurrentASTUnit.release());
     } else {
       CI.setPreprocessor(nullptr);
       CI.setSourceManager(nullptr);
@@ -948,11 +1020,10 @@ WrapperFrontendAction::CreateASTConsumer(CompilerInstance &CI,
 bool WrapperFrontendAction::BeginInvocation(CompilerInstance &CI) {
   return WrappedAction->BeginInvocation(CI);
 }
-bool WrapperFrontendAction::BeginSourceFileAction(CompilerInstance &CI,
-                                                  StringRef Filename) {
+bool WrapperFrontendAction::BeginSourceFileAction(CompilerInstance &CI) {
   WrappedAction->setCurrentInput(getCurrentInput());
   WrappedAction->setCompilerInstance(&CI);
-  auto Ret = WrappedAction->BeginSourceFileAction(CI, Filename);
+  auto Ret = WrappedAction->BeginSourceFileAction(CI);
   // BeginSourceFileAction may change CurrentInput, e.g. during module builds.
   setCurrentInput(WrappedAction->getCurrentInput());
   return Ret;
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendActions.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendActions.cpp
index baaf93b167bc2..d42400183a433 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendActions.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/FrontendActions.cpp
@@ -134,8 +134,7 @@ bool GeneratePCHAction::shouldEraseOutputFiles() {
   return ASTFrontendAction::shouldEraseOutputFiles();
 }
 
-bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI,
-                                              StringRef Filename) {
+bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) {
   CI.getLangOpts().CompilingPCH = true;
   return true;
 }
@@ -165,8 +164,13 @@ GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
 }
 
 bool GenerateModuleFromModuleMapAction::BeginSourceFileAction(
-    CompilerInstance &CI, StringRef Filename) {
-  return GenerateModuleAction::BeginSourceFileAction(CI, Filename);
+    CompilerInstance &CI) {
+  if (!CI.getLangOpts().Modules) {
+    CI.getDiagnostics().Report(diag::err_module_build_requires_fmodules);
+    return false;
+  }
+
+  return GenerateModuleAction::BeginSourceFileAction(CI);
 }
 
 std::unique_ptr
@@ -194,8 +198,8 @@ GenerateModuleFromModuleMapAction::CreateOutputFile(CompilerInstance &CI,
                              /*CreateMissingDirectories=*/true);
 }
 
-bool GenerateModuleInterfaceAction::BeginSourceFileAction(CompilerInstance &CI,
-                                                          StringRef Filename) {
+bool GenerateModuleInterfaceAction::BeginSourceFileAction(
+    CompilerInstance &CI) {
   if (!CI.getLangOpts().ModulesTS) {
     CI.getDiagnostics().Report(diag::err_module_interface_requires_modules_ts);
     return false;
@@ -203,7 +207,7 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(CompilerInstance &CI,
 
   CI.getLangOpts().setCompilingModule(LangOptions::CMK_ModuleInterface);
 
-  return GenerateModuleAction::BeginSourceFileAction(CI, Filename);
+  return GenerateModuleAction::BeginSourceFileAction(CI);
 }
 
 std::unique_ptr
@@ -236,7 +240,7 @@ void VerifyPCHAction::ExecuteAction() {
   bool Preamble = CI.getPreprocessorOpts().PrecompiledPreambleBytes.first != 0;
   const std::string &Sysroot = CI.getHeaderSearchOpts().Sysroot;
   std::unique_ptr Reader(new ASTReader(
-      CI.getPreprocessor(), CI.getASTContext(), CI.getPCHContainerReader(),
+      CI.getPreprocessor(), &CI.getASTContext(), CI.getPCHContainerReader(),
       CI.getFrontendOpts().ModuleFileExtensions,
       Sysroot.empty() ? "" : Sysroot.c_str(),
       /*DisableValidation*/ false,
@@ -523,7 +527,7 @@ void PrintPreprocessedAction::ExecuteAction() {
     // file.  This is mostly a sanity check in case the file has no 
     // newlines whatsoever.
     if (end - cur > 256) end = cur + 256;
-	  
+
     while (next < end) {
       if (*cur == 0x0D) {  // CR
         if (*next == 0x0A)  // CRLF
@@ -546,8 +550,11 @@ void PrintPreprocessedAction::ExecuteAction() {
   // module itself before switching to the input buffer.
   auto &Input = getCurrentInput();
   if (Input.getKind().getFormat() == InputKind::ModuleMap) {
-    if (Input.isFile())
-      (*OS) << "# 1 \"" << Input.getFile() << "\"\n";
+    if (Input.isFile()) {
+      (*OS) << "# 1 \"";
+      OS->write_escaped(Input.getFile());
+      (*OS) << "\"\n";
+    }
     // FIXME: Include additional information here so that we don't need the
     // original source files to exist on disk.
     getCurrentModule()->print(*OS);
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/InitHeaderSearch.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/InitHeaderSearch.cpp
index d50fb6d788a4d..1d7c8a0c871bd 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/InitHeaderSearch.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/InitHeaderSearch.cpp
@@ -221,6 +221,7 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
     case llvm::Triple::Win32:
       if (triple.getEnvironment() != llvm::Triple::Cygnus)
         break;
+      LLVM_FALLTHROUGH;
     default:
       // FIXME: temporary hack: hard-coded paths.
       AddPath("/usr/local/include", System, false);
@@ -343,6 +344,7 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
     AddPath(BaseSDKPath + "/target/include", System, false);
     if (triple.isPS4CPU())
       AddPath(BaseSDKPath + "/target/include_common", System, false);
+    LLVM_FALLTHROUGH;
   }
   default:
     AddPath("/usr/include", ExternCSystem, false);
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/InitPreprocessor.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/InitPreprocessor.cpp
index 9257dcae84cd2..92d61369b40f0 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/InitPreprocessor.cpp
@@ -374,10 +374,13 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
     else if (!LangOpts.GNUMode && LangOpts.Digraphs)
       Builder.defineMacro("__STDC_VERSION__", "199409L");
   } else {
+    // FIXME: Use correct value for C++20.
+    if (LangOpts.CPlusPlus2a)
+      Builder.defineMacro("__cplusplus", "201707L");
     // C++17 [cpp.predefined]p1:
     //   The name __cplusplus is defined to the value 201703L when compiling a
     //   C++ translation unit.
-    if (LangOpts.CPlusPlus1z)
+    else if (LangOpts.CPlusPlus1z)
       Builder.defineMacro("__cplusplus", "201703L");
     // C++1y [cpp.predefined]p1:
     //   The name __cplusplus is defined to the value 201402L when compiling a
@@ -494,6 +497,8 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
     Builder.defineMacro("__cpp_ref_qualifiers", "200710");
     Builder.defineMacro("__cpp_alias_templates", "200704");
   }
+  if (LangOpts.ThreadsafeStatics)
+    Builder.defineMacro("__cpp_threadsafe_static_init", "200806");
 
   // C++14 features.
   if (LangOpts.CPlusPlus14) {
@@ -516,6 +521,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
     Builder.defineMacro("__cpp_noexcept_function_type", "201510");
     Builder.defineMacro("__cpp_capture_star_this", "201603");
     Builder.defineMacro("__cpp_if_constexpr", "201606");
+    Builder.defineMacro("__cpp_deduction_guides", "201611");
     Builder.defineMacro("__cpp_template_auto", "201606");
     Builder.defineMacro("__cpp_namespace_attributes", "201411");
     Builder.defineMacro("__cpp_enumerator_attributes", "201411");
@@ -525,8 +531,6 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
     Builder.defineMacro("__cpp_structured_bindings", "201606");
     Builder.defineMacro("__cpp_nontype_template_args", "201411");
     Builder.defineMacro("__cpp_fold_expressions", "201603");
-    // FIXME: This is not yet listed in SD-6.
-    Builder.defineMacro("__cpp_deduction_guides", "201611");
   }
   if (LangOpts.AlignedAllocation)
     Builder.defineMacro("__cpp_aligned_new", "201606");
@@ -535,7 +539,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
   if (LangOpts.ConceptsTS)
     Builder.defineMacro("__cpp_experimental_concepts", "1");
   if (LangOpts.CoroutinesTS)
-    Builder.defineMacro("__cpp_coroutines", "1");
+    Builder.defineMacro("__cpp_coroutines", "201703L");
 }
 
 static void InitializePredefinedMacros(const TargetInfo &TI,
@@ -1043,7 +1047,7 @@ void clang::InitializePreprocessor(
   if (InitOpts.UsePredefines) {
     // FIXME: This will create multiple definitions for most of the predefined
     // macros. This is not the right way to handle this.
-    if (LangOpts.CUDA && PP.getAuxTargetInfo())
+    if ((LangOpts.CUDA || LangOpts.OpenMPIsDevice) && PP.getAuxTargetInfo())
       InitializePredefinedMacros(*PP.getAuxTargetInfo(), LangOpts, FEOpts,
                                  Builder);
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp
index 9b34d42113532..ede12aab6e695 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp
@@ -248,7 +248,7 @@ std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src,
   // Always map a canonical src path to its real path into the YAML, by doing
   // this we map different virtual src paths to the same entry in the VFS
   // overlay, which is a way to emulate symlink inside the VFS; this is also
-  // needed for correctness, not doing that can lead to module redifinition
+  // needed for correctness, not doing that can lead to module redefinition
   // errors.
   addFileMapping(VirtualPath, CacheDst);
   return std::error_code();
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/PrecompiledPreamble.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/PrecompiledPreamble.cpp
new file mode 100644
index 0000000000000..15b24cbed4841
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/PrecompiledPreamble.cpp
@@ -0,0 +1,563 @@
+//===--- PrecompiledPreamble.cpp - Build precompiled preambles --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Helper class to build precompiled preamble.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/PrecompiledPreamble.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Frontend/FrontendOptions.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Serialization/ASTWriter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+
+using namespace clang;
+
+namespace {
+
+/// Keeps a track of files to be deleted in destructor.
+class TemporaryFiles {
+public:
+  // A static instance to be used by all clients.
+  static TemporaryFiles &getInstance();
+
+private:
+  // Disallow constructing the class directly.
+  TemporaryFiles() = default;
+  // Disallow copy.
+  TemporaryFiles(const TemporaryFiles &) = delete;
+
+public:
+  ~TemporaryFiles();
+
+  /// Adds \p File to a set of tracked files.
+  void addFile(StringRef File);
+
+  /// Remove \p File from disk and from the set of tracked files.
+  void removeFile(StringRef File);
+
+private:
+  llvm::sys::SmartMutex Mutex;
+  llvm::StringSet<> Files;
+};
+
+TemporaryFiles &TemporaryFiles::getInstance() {
+  static TemporaryFiles Instance;
+  return Instance;
+}
+
+TemporaryFiles::~TemporaryFiles() {
+  llvm::MutexGuard Guard(Mutex);
+  for (const auto &File : Files)
+    llvm::sys::fs::remove(File.getKey());
+}
+
+void TemporaryFiles::addFile(StringRef File) {
+  llvm::MutexGuard Guard(Mutex);
+  auto IsInserted = Files.insert(File).second;
+  (void)IsInserted;
+  assert(IsInserted && "File has already been added");
+}
+
+void TemporaryFiles::removeFile(StringRef File) {
+  llvm::MutexGuard Guard(Mutex);
+  auto WasPresent = Files.erase(File);
+  (void)WasPresent;
+  assert(WasPresent && "File was not tracked");
+  llvm::sys::fs::remove(File);
+}
+
+class PreambleMacroCallbacks : public PPCallbacks {
+public:
+  PreambleMacroCallbacks(PreambleCallbacks &Callbacks) : Callbacks(Callbacks) {}
+
+  void MacroDefined(const Token &MacroNameTok,
+                    const MacroDirective *MD) override {
+    Callbacks.HandleMacroDefined(MacroNameTok, MD);
+  }
+
+private:
+  PreambleCallbacks &Callbacks;
+};
+
+class PrecompilePreambleAction : public ASTFrontendAction {
+public:
+  PrecompilePreambleAction(PreambleCallbacks &Callbacks)
+      : Callbacks(Callbacks) {}
+
+  std::unique_ptr CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override;
+
+  bool hasEmittedPreamblePCH() const { return HasEmittedPreamblePCH; }
+
+  void setEmittedPreamblePCH(ASTWriter &Writer) {
+    this->HasEmittedPreamblePCH = true;
+    Callbacks.AfterPCHEmitted(Writer);
+  }
+
+  bool shouldEraseOutputFiles() override { return !hasEmittedPreamblePCH(); }
+  bool hasCodeCompletionSupport() const override { return false; }
+  bool hasASTFileSupport() const override { return false; }
+  TranslationUnitKind getTranslationUnitKind() override { return TU_Prefix; }
+
+private:
+  friend class PrecompilePreambleConsumer;
+
+  bool HasEmittedPreamblePCH = false;
+  PreambleCallbacks &Callbacks;
+};
+
+class PrecompilePreambleConsumer : public PCHGenerator {
+public:
+  PrecompilePreambleConsumer(PrecompilePreambleAction &Action,
+                             const Preprocessor &PP, StringRef isysroot,
+                             std::unique_ptr Out)
+      : PCHGenerator(PP, "", isysroot, std::make_shared(),
+                     ArrayRef>(),
+                     /*AllowASTWithErrors=*/true),
+        Action(Action), Out(std::move(Out)) {}
+
+  bool HandleTopLevelDecl(DeclGroupRef DG) override {
+    Action.Callbacks.HandleTopLevelDecl(DG);
+    return true;
+  }
+
+  void HandleTranslationUnit(ASTContext &Ctx) override {
+    PCHGenerator::HandleTranslationUnit(Ctx);
+    if (!hasEmittedPCH())
+      return;
+
+    // Write the generated bitstream to "Out".
+    *Out << getPCH();
+    // Make sure it hits disk now.
+    Out->flush();
+    // Free the buffer.
+    llvm::SmallVector Empty;
+    getPCH() = std::move(Empty);
+
+    Action.setEmittedPreamblePCH(getWriter());
+  }
+
+private:
+  PrecompilePreambleAction &Action;
+  std::unique_ptr Out;
+};
+
+std::unique_ptr
+PrecompilePreambleAction::CreateASTConsumer(CompilerInstance &CI,
+
+                                            StringRef InFile) {
+  std::string Sysroot;
+  std::string OutputFile;
+  std::unique_ptr OS =
+      GeneratePCHAction::ComputeASTConsumerArguments(CI, InFile, Sysroot,
+                                                     OutputFile);
+  if (!OS)
+    return nullptr;
+
+  if (!CI.getFrontendOpts().RelocatablePCH)
+    Sysroot.clear();
+
+  CI.getPreprocessor().addPPCallbacks(
+      llvm::make_unique(Callbacks));
+  return llvm::make_unique(
+      *this, CI.getPreprocessor(), Sysroot, std::move(OS));
+}
+
+template  bool moveOnNoError(llvm::ErrorOr Val, T &Output) {
+  if (!Val)
+    return false;
+  Output = std::move(*Val);
+  return true;
+}
+
+} // namespace
+
+PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
+                                            llvm::MemoryBuffer *Buffer,
+                                            unsigned MaxLines) {
+  auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
+  return PreambleBounds(Pre.first, Pre.second);
+}
+
+llvm::ErrorOr PrecompiledPreamble::Build(
+    const CompilerInvocation &Invocation,
+    const llvm::MemoryBuffer *MainFileBuffer, PreambleBounds Bounds,
+    DiagnosticsEngine &Diagnostics, IntrusiveRefCntPtr VFS,
+    std::shared_ptr PCHContainerOps,
+    PreambleCallbacks &Callbacks) {
+  assert(VFS && "VFS is null");
+
+  if (!Bounds.Size)
+    return BuildPreambleError::PreambleIsEmpty;
+
+  auto PreambleInvocation = std::make_shared(Invocation);
+  FrontendOptions &FrontendOpts = PreambleInvocation->getFrontendOpts();
+  PreprocessorOptions &PreprocessorOpts =
+      PreambleInvocation->getPreprocessorOpts();
+
+  // Create a temporary file for the precompiled preamble. In rare
+  // circumstances, this can fail.
+  llvm::ErrorOr PreamblePCHFile =
+      PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile();
+  if (!PreamblePCHFile)
+    return BuildPreambleError::CouldntCreateTempFile;
+
+  // Save the preamble text for later; we'll need to compare against it for
+  // subsequent reparses.
+  std::vector PreambleBytes(MainFileBuffer->getBufferStart(),
+                                  MainFileBuffer->getBufferStart() +
+                                      Bounds.Size);
+  bool PreambleEndsAtStartOfLine = Bounds.PreambleEndsAtStartOfLine;
+
+  // Tell the compiler invocation to generate a temporary precompiled header.
+  FrontendOpts.ProgramAction = frontend::GeneratePCH;
+  // FIXME: Generate the precompiled header into memory?
+  FrontendOpts.OutputFile = PreamblePCHFile->getFilePath();
+  PreprocessorOpts.PrecompiledPreambleBytes.first = 0;
+  PreprocessorOpts.PrecompiledPreambleBytes.second = false;
+
+  // Create the compiler instance to use for building the precompiled preamble.
+  std::unique_ptr Clang(
+      new CompilerInstance(std::move(PCHContainerOps)));
+
+  // Recover resources if we crash before exiting this method.
+  llvm::CrashRecoveryContextCleanupRegistrar CICleanup(
+      Clang.get());
+
+  Clang->setInvocation(std::move(PreambleInvocation));
+  Clang->setDiagnostics(&Diagnostics);
+
+  // Create the target instance.
+  Clang->setTarget(TargetInfo::CreateTargetInfo(
+      Clang->getDiagnostics(), Clang->getInvocation().TargetOpts));
+  if (!Clang->hasTarget())
+    return BuildPreambleError::CouldntCreateTargetInfo;
+
+  // Inform the target of the language options.
+  //
+  // FIXME: We shouldn't need to do this, the target should be immutable once
+  // created. This complexity should be lifted elsewhere.
+  Clang->getTarget().adjust(Clang->getLangOpts());
+
+  assert(Clang->getFrontendOpts().Inputs.size() == 1 &&
+         "Invocation must have exactly one source file!");
+  assert(Clang->getFrontendOpts().Inputs[0].getKind().getFormat() ==
+             InputKind::Source &&
+         "FIXME: AST inputs not yet supported here!");
+  assert(Clang->getFrontendOpts().Inputs[0].getKind().getLanguage() !=
+             InputKind::LLVM_IR &&
+         "IR inputs not support here!");
+
+  // Clear out old caches and data.
+  Diagnostics.Reset();
+  ProcessWarningOptions(Diagnostics, Clang->getDiagnosticOpts());
+
+  VFS =
+      createVFSFromCompilerInvocation(Clang->getInvocation(), Diagnostics, VFS);
+  if (!VFS)
+    return BuildPreambleError::CouldntCreateVFSOverlay;
+
+  // Create a file manager object to provide access to and cache the filesystem.
+  Clang->setFileManager(new FileManager(Clang->getFileSystemOpts(), VFS));
+
+  // Create the source manager.
+  Clang->setSourceManager(
+      new SourceManager(Diagnostics, Clang->getFileManager()));
+
+  auto PreambleDepCollector = std::make_shared();
+  Clang->addDependencyCollector(PreambleDepCollector);
+
+  // Remap the main source file to the preamble buffer.
+  StringRef MainFilePath = FrontendOpts.Inputs[0].getFile();
+  auto PreambleInputBuffer = llvm::MemoryBuffer::getMemBufferCopy(
+      MainFileBuffer->getBuffer().slice(0, Bounds.Size), MainFilePath);
+  if (PreprocessorOpts.RetainRemappedFileBuffers) {
+    // MainFileBuffer will be deleted by unique_ptr after leaving the method.
+    PreprocessorOpts.addRemappedFile(MainFilePath, PreambleInputBuffer.get());
+  } else {
+    // In that case, remapped buffer will be deleted by CompilerInstance on
+    // BeginSourceFile, so we call release() to avoid double deletion.
+    PreprocessorOpts.addRemappedFile(MainFilePath,
+                                     PreambleInputBuffer.release());
+  }
+
+  std::unique_ptr Act;
+  Act.reset(new PrecompilePreambleAction(Callbacks));
+  if (!Act->BeginSourceFile(*Clang.get(), Clang->getFrontendOpts().Inputs[0]))
+    return BuildPreambleError::BeginSourceFileFailed;
+
+  Act->Execute();
+
+  // Run the callbacks.
+  Callbacks.AfterExecute(*Clang);
+
+  Act->EndSourceFile();
+
+  if (!Act->hasEmittedPreamblePCH())
+    return BuildPreambleError::CouldntEmitPCH;
+
+  // Keep track of all of the files that the source manager knows about,
+  // so we can verify whether they have changed or not.
+  llvm::StringMap FilesInPreamble;
+
+  SourceManager &SourceMgr = Clang->getSourceManager();
+  for (auto &Filename : PreambleDepCollector->getDependencies()) {
+    const FileEntry *File = Clang->getFileManager().getFile(Filename);
+    if (!File || File == SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()))
+      continue;
+    if (time_t ModTime = File->getModificationTime()) {
+      FilesInPreamble[File->getName()] =
+          PrecompiledPreamble::PreambleFileHash::createForFile(File->getSize(),
+                                                               ModTime);
+    } else {
+      llvm::MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File);
+      FilesInPreamble[File->getName()] =
+          PrecompiledPreamble::PreambleFileHash::createForMemoryBuffer(Buffer);
+    }
+  }
+
+  return PrecompiledPreamble(
+      std::move(*PreamblePCHFile), std::move(PreambleBytes),
+      PreambleEndsAtStartOfLine, std::move(FilesInPreamble));
+}
+
+PreambleBounds PrecompiledPreamble::getBounds() const {
+  return PreambleBounds(PreambleBytes.size(), PreambleEndsAtStartOfLine);
+}
+
+bool PrecompiledPreamble::CanReuse(const CompilerInvocation &Invocation,
+                                   const llvm::MemoryBuffer *MainFileBuffer,
+                                   PreambleBounds Bounds,
+                                   vfs::FileSystem *VFS) const {
+
+  assert(
+      Bounds.Size <= MainFileBuffer->getBufferSize() &&
+      "Buffer is too large. Bounds were calculated from a different buffer?");
+
+  auto PreambleInvocation = std::make_shared(Invocation);
+  PreprocessorOptions &PreprocessorOpts =
+      PreambleInvocation->getPreprocessorOpts();
+
+  if (!Bounds.Size)
+    return false;
+
+  // We've previously computed a preamble. Check whether we have the same
+  // preamble now that we did before, and that there's enough space in
+  // the main-file buffer within the precompiled preamble to fit the
+  // new main file.
+  if (PreambleBytes.size() != Bounds.Size ||
+      PreambleEndsAtStartOfLine != Bounds.PreambleEndsAtStartOfLine ||
+      memcmp(PreambleBytes.data(), MainFileBuffer->getBufferStart(),
+             Bounds.Size) != 0)
+    return false;
+  // The preamble has not changed. We may be able to re-use the precompiled
+  // preamble.
+
+  // Check that none of the files used by the preamble have changed.
+  // First, make a record of those files that have been overridden via
+  // remapping or unsaved_files.
+  std::map OverriddenFiles;
+  for (const auto &R : PreprocessorOpts.RemappedFiles) {
+    vfs::Status Status;
+    if (!moveOnNoError(VFS->status(R.second), Status)) {
+      // If we can't stat the file we're remapping to, assume that something
+      // horrible happened.
+      return false;
+    }
+
+    OverriddenFiles[Status.getUniqueID()] = PreambleFileHash::createForFile(
+        Status.getSize(), llvm::sys::toTimeT(Status.getLastModificationTime()));
+  }
+
+  for (const auto &RB : PreprocessorOpts.RemappedFileBuffers) {
+    vfs::Status Status;
+    if (!moveOnNoError(VFS->status(RB.first), Status))
+      return false;
+
+    OverriddenFiles[Status.getUniqueID()] =
+        PreambleFileHash::createForMemoryBuffer(RB.second);
+  }
+
+  // Check whether anything has changed.
+  for (const auto &F : FilesInPreamble) {
+    vfs::Status Status;
+    if (!moveOnNoError(VFS->status(F.first()), Status)) {
+      // If we can't stat the file, assume that something horrible happened.
+      return false;
+    }
+
+    std::map::iterator Overridden =
+        OverriddenFiles.find(Status.getUniqueID());
+    if (Overridden != OverriddenFiles.end()) {
+      // This file was remapped; check whether the newly-mapped file
+      // matches up with the previous mapping.
+      if (Overridden->second != F.second)
+        return false;
+      continue;
+    }
+
+    // The file was not remapped; check whether it has changed on disk.
+    if (Status.getSize() != uint64_t(F.second.Size) ||
+        llvm::sys::toTimeT(Status.getLastModificationTime()) !=
+            F.second.ModTime)
+      return false;
+  }
+  return true;
+}
+
+void PrecompiledPreamble::AddImplicitPreamble(
+    CompilerInvocation &CI, llvm::MemoryBuffer *MainFileBuffer) const {
+  auto &PreprocessorOpts = CI.getPreprocessorOpts();
+
+  // Configure ImpicitPCHInclude.
+  PreprocessorOpts.PrecompiledPreambleBytes.first = PreambleBytes.size();
+  PreprocessorOpts.PrecompiledPreambleBytes.second = PreambleEndsAtStartOfLine;
+  PreprocessorOpts.ImplicitPCHInclude = PCHFile.getFilePath();
+  PreprocessorOpts.DisablePCHValidation = true;
+
+  // Remap main file to point to MainFileBuffer.
+  auto MainFilePath = CI.getFrontendOpts().Inputs[0].getFile();
+  PreprocessorOpts.addRemappedFile(MainFilePath, MainFileBuffer);
+}
+
+PrecompiledPreamble::PrecompiledPreamble(
+    TempPCHFile PCHFile, std::vector PreambleBytes,
+    bool PreambleEndsAtStartOfLine,
+    llvm::StringMap FilesInPreamble)
+    : PCHFile(std::move(PCHFile)), FilesInPreamble(FilesInPreamble),
+      PreambleBytes(std::move(PreambleBytes)),
+      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+
+llvm::ErrorOr
+PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile() {
+  // FIXME: This is a hack so that we can override the preamble file during
+  // crash-recovery testing, which is the only case where the preamble files
+  // are not necessarily cleaned up.
+  const char *TmpFile = ::getenv("CINDEXTEST_PREAMBLE_FILE");
+  if (TmpFile)
+    return TempPCHFile::createFromCustomPath(TmpFile);
+  return TempPCHFile::createInSystemTempDir("preamble", "pch");
+}
+
+llvm::ErrorOr
+PrecompiledPreamble::TempPCHFile::createInSystemTempDir(const Twine &Prefix,
+                                                        StringRef Suffix) {
+  llvm::SmallString<64> File;
+  auto EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, /*ref*/ File);
+  if (EC)
+    return EC;
+  return TempPCHFile(std::move(File).str());
+}
+
+llvm::ErrorOr
+PrecompiledPreamble::TempPCHFile::createFromCustomPath(const Twine &Path) {
+  return TempPCHFile(Path.str());
+}
+
+PrecompiledPreamble::TempPCHFile::TempPCHFile(std::string FilePath)
+    : FilePath(std::move(FilePath)) {
+  TemporaryFiles::getInstance().addFile(*this->FilePath);
+}
+
+PrecompiledPreamble::TempPCHFile::TempPCHFile(TempPCHFile &&Other) {
+  FilePath = std::move(Other.FilePath);
+  Other.FilePath = None;
+}
+
+PrecompiledPreamble::TempPCHFile &PrecompiledPreamble::TempPCHFile::
+operator=(TempPCHFile &&Other) {
+  RemoveFileIfPresent();
+
+  FilePath = std::move(Other.FilePath);
+  Other.FilePath = None;
+  return *this;
+}
+
+PrecompiledPreamble::TempPCHFile::~TempPCHFile() { RemoveFileIfPresent(); }
+
+void PrecompiledPreamble::TempPCHFile::RemoveFileIfPresent() {
+  if (FilePath) {
+    TemporaryFiles::getInstance().removeFile(*FilePath);
+    FilePath = None;
+  }
+}
+
+llvm::StringRef PrecompiledPreamble::TempPCHFile::getFilePath() const {
+  assert(FilePath && "TempPCHFile doesn't have a FilePath. Had it been moved?");
+  return *FilePath;
+}
+
+PrecompiledPreamble::PreambleFileHash
+PrecompiledPreamble::PreambleFileHash::createForFile(off_t Size,
+                                                     time_t ModTime) {
+  PreambleFileHash Result;
+  Result.Size = Size;
+  Result.ModTime = ModTime;
+  Result.MD5 = {};
+  return Result;
+}
+
+PrecompiledPreamble::PreambleFileHash
+PrecompiledPreamble::PreambleFileHash::createForMemoryBuffer(
+    const llvm::MemoryBuffer *Buffer) {
+  PreambleFileHash Result;
+  Result.Size = Buffer->getBufferSize();
+  Result.ModTime = 0;
+
+  llvm::MD5 MD5Ctx;
+  MD5Ctx.update(Buffer->getBuffer().data());
+  MD5Ctx.final(Result.MD5);
+
+  return Result;
+}
+
+void PreambleCallbacks::AfterExecute(CompilerInstance &CI) {}
+void PreambleCallbacks::AfterPCHEmitted(ASTWriter &Writer) {}
+void PreambleCallbacks::HandleTopLevelDecl(DeclGroupRef DG) {}
+void PreambleCallbacks::HandleMacroDefined(const Token &MacroNameTok,
+                                           const MacroDirective *MD) {}
+
+std::error_code clang::make_error_code(BuildPreambleError Error) {
+  return std::error_code(static_cast(Error), BuildPreambleErrorCategory());
+}
+
+const char *BuildPreambleErrorCategory::name() const noexcept {
+  return "build-preamble.error";
+}
+
+std::string BuildPreambleErrorCategory::message(int condition) const {
+  switch (static_cast(condition)) {
+  case BuildPreambleError::PreambleIsEmpty:
+    return "Preamble is empty";
+  case BuildPreambleError::CouldntCreateTempFile:
+    return "Could not create temporary file for PCH";
+  case BuildPreambleError::CouldntCreateTargetInfo:
+    return "CreateTargetInfo() return null";
+  case BuildPreambleError::CouldntCreateVFSOverlay:
+    return "Could not create VFS Overlay";
+  case BuildPreambleError::BeginSourceFileFailed:
+    return "BeginSourceFile() return an error";
+  case BuildPreambleError::CouldntEmitPCH:
+    return "Could not emit PCH";
+  }
+  llvm_unreachable("unexpected BuildPreambleError");
+}
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 832eaf2926f04..914039ad5bb1d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -38,8 +38,8 @@ static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
 
   if (MI.isFunctionLike()) {
     OS << '(';
-    if (!MI.arg_empty()) {
-      MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
+    if (!MI.param_empty()) {
+      MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
       for (; AI+1 != E; ++AI) {
         OS << (*AI)->getName();
         OS << ',';
@@ -349,7 +349,7 @@ void PrintPPOutputPPCallbacks::InclusionDirective(SourceLocation HashLoc,
     case tok::pp_include_next:
       startNewLineIfNeeded();
       MoveToLine(HashLoc);
-      OS << "#pragma clang module import " << Imported->getFullModuleName()
+      OS << "#pragma clang module import " << Imported->getFullModuleName(true)
          << " /* clang -E: implicit import for "
          << "#" << PP.getSpelling(IncludeTok) << " "
          << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
@@ -378,14 +378,14 @@ void PrintPPOutputPPCallbacks::InclusionDirective(SourceLocation HashLoc,
 /// Handle entering the scope of a module during a module compilation.
 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
   startNewLineIfNeeded();
-  OS << "#pragma clang module begin " << M->getFullModuleName();
+  OS << "#pragma clang module begin " << M->getFullModuleName(true);
   setEmittedDirectiveOnThisLine();
 }
 
 /// Handle leaving the scope of a module during a module compilation.
 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
   startNewLineIfNeeded();
-  OS << "#pragma clang module end /*" << M->getFullModuleName() << "*/";
+  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
   setEmittedDirectiveOnThisLine();
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/CMakeLists.txt
index 924bf5d5ee284..61a22b5b13b4d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/CMakeLists.txt
@@ -19,4 +19,5 @@ add_clang_library(clangRewriteFrontend
   clangFrontend
   clangLex
   clangRewrite
+  clangSerialization
   )
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
index 8c5eb161b5ab1..5efa6aeaf760a 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
@@ -9,6 +9,8 @@
 
 #include "clang/Rewrite/Frontend/FrontendActions.h"
 #include "clang/AST/ASTConsumer.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Config/config.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -18,6 +20,11 @@
 #include "clang/Rewrite/Frontend/ASTConsumers.h"
 #include "clang/Rewrite/Frontend/FixItRewriter.h"
 #include "clang/Rewrite/Frontend/Rewriters.h"
+#include "clang/Serialization/ASTReader.h"
+#include "clang/Serialization/Module.h"
+#include "clang/Serialization/ModuleManager.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -86,8 +93,7 @@ class FixItRewriteToTemp : public FixItOptions {
 };
 } // end anonymous namespace
 
-bool FixItAction::BeginSourceFileAction(CompilerInstance &CI,
-                                        StringRef Filename) {
+bool FixItAction::BeginSourceFileAction(CompilerInstance &CI) {
   const FrontendOptions &FEOpts = getCompilerInstance().getFrontendOpts();
   if (!FEOpts.FixItSuffix.empty()) {
     FixItOpts.reset(new FixItActionSuffixInserter(FEOpts.FixItSuffix,
@@ -190,24 +196,123 @@ void RewriteTestAction::ExecuteAction() {
   DoRewriteTest(CI.getPreprocessor(), OS.get());
 }
 
-void RewriteIncludesAction::ExecuteAction() {
-  CompilerInstance &CI = getCompilerInstance();
-  std::unique_ptr OS =
-      CI.createDefaultOutputFile(true, getCurrentFile());
-  if (!OS) return;
+class RewriteIncludesAction::RewriteImportsListener : public ASTReaderListener {
+  CompilerInstance &CI;
+  std::weak_ptr Out;
+
+  llvm::DenseSet Rewritten;
+
+public:
+  RewriteImportsListener(CompilerInstance &CI, std::shared_ptr Out)
+      : CI(CI), Out(Out) {}
+
+  void visitModuleFile(StringRef Filename,
+                       serialization::ModuleKind Kind) override {
+    auto *File = CI.getFileManager().getFile(Filename);
+    assert(File && "missing file for loaded module?");
+
+    // Only rewrite each module file once.
+    if (!Rewritten.insert(File).second)
+      return;
+
+    serialization::ModuleFile *MF =
+        CI.getModuleManager()->getModuleManager().lookup(File);
+    assert(File && "missing module file for loaded module?");
+
+    // Not interested in PCH / preambles.
+    if (!MF->isModule())
+      return;
+
+    auto OS = Out.lock();
+    assert(OS && "loaded module file after finishing rewrite action?");
+
+    (*OS) << "#pragma clang module build ";
+    if (isValidIdentifier(MF->ModuleName))
+      (*OS) << MF->ModuleName;
+    else {
+      (*OS) << '"';
+      OS->write_escaped(MF->ModuleName);
+      (*OS) << '"';
+    }
+    (*OS) << '\n';
+
+    // Rewrite the contents of the module in a separate compiler instance.
+    CompilerInstance Instance(CI.getPCHContainerOperations(),
+                              &CI.getPreprocessor().getPCMCache());
+    Instance.setInvocation(
+        std::make_shared(CI.getInvocation()));
+    Instance.createDiagnostics(
+        new ForwardingDiagnosticConsumer(CI.getDiagnosticClient()),
+        /*ShouldOwnClient=*/true);
+    Instance.getFrontendOpts().DisableFree = false;
+    Instance.getFrontendOpts().Inputs.clear();
+    Instance.getFrontendOpts().Inputs.emplace_back(
+        Filename, InputKind(InputKind::Unknown, InputKind::Precompiled));
+    Instance.getFrontendOpts().ModuleFiles.clear();
+    Instance.getFrontendOpts().ModuleMapFiles.clear();
+    // Don't recursively rewrite imports. We handle them all at the top level.
+    Instance.getPreprocessorOutputOpts().RewriteImports = false;
+
+    llvm::CrashRecoveryContext().RunSafelyOnThread([&]() {
+      RewriteIncludesAction Action;
+      Action.OutputStream = OS;
+      Instance.ExecuteAction(Action);
+    });
+
+    (*OS) << "#pragma clang module endbuild /*" << MF->ModuleName << "*/\n";
+  }
+};
+
+bool RewriteIncludesAction::BeginSourceFileAction(CompilerInstance &CI) {
+  if (!OutputStream) {
+    OutputStream = CI.createDefaultOutputFile(true, getCurrentFile());
+    if (!OutputStream)
+      return false;
+  }
+
+  auto &OS = *OutputStream;
 
   // If we're preprocessing a module map, start by dumping the contents of the
   // module itself before switching to the input buffer.
   auto &Input = getCurrentInput();
   if (Input.getKind().getFormat() == InputKind::ModuleMap) {
-    if (Input.isFile())
-      (*OS) << "# 1 \"" << Input.getFile() << "\"\n";
-    // FIXME: Include additional information here so that we don't need the
-    // original source files to exist on disk.
-    getCurrentModule()->print(*OS);
-    (*OS) << "#pragma clang module contents\n";
+    if (Input.isFile()) {
+      OS << "# 1 \"";
+      OS.write_escaped(Input.getFile());
+      OS << "\"\n";
+    }
+    getCurrentModule()->print(OS);
+    OS << "#pragma clang module contents\n";
+  }
+
+  // If we're rewriting imports, set up a listener to track when we import
+  // module files.
+  if (CI.getPreprocessorOutputOpts().RewriteImports) {
+    CI.createModuleManager();
+    CI.getModuleManager()->addListener(
+        llvm::make_unique(CI, OutputStream));
+  }
+
+  return true;
+}
+
+void RewriteIncludesAction::ExecuteAction() {
+  CompilerInstance &CI = getCompilerInstance();
+
+  // If we're rewriting imports, emit the module build output first rather
+  // than switching back and forth (potentially in the middle of a line).
+  if (CI.getPreprocessorOutputOpts().RewriteImports) {
+    std::string Buffer;
+    llvm::raw_string_ostream OS(Buffer);
+
+    RewriteIncludesInInput(CI.getPreprocessor(), &OS,
+                           CI.getPreprocessorOutputOpts());
+
+    (*OutputStream) << OS.str();
+  } else {
+    RewriteIncludesInInput(CI.getPreprocessor(), OutputStream.get(),
+                           CI.getPreprocessorOutputOpts());
   }
 
-  RewriteIncludesInInput(CI.getPreprocessor(), OS.get(),
-                         CI.getPreprocessorOutputOpts());
+  OutputStream.reset();
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index d45cbc01df8ce..e0477069b3406 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -140,7 +140,7 @@ void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line,
 }
 
 void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
-  OS << "#pragma clang module import " << Mod->getFullModuleName()
+  OS << "#pragma clang module import " << Mod->getFullModuleName(true)
      << " /* clang -frewrite-includes: implicit import */" << MainEOL;
 }
 
@@ -177,7 +177,9 @@ void InclusionRewriter::FileSkipped(const FileEntry &/*SkippedFile*/,
 /// directives. It does not say whether the file has been included, but it
 /// provides more information about the directive (hash location instead
 /// of location inside the included file). It is assumed that the matching
-/// FileChanged() or FileSkipped() is called after this.
+/// FileChanged() or FileSkipped() is called after this (or neither is
+/// called if this #include results in an error or does not textually include
+/// anything).
 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
                                            const Token &/*IncludeTok*/,
                                            StringRef /*FileName*/,
@@ -187,9 +189,6 @@ void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
                                            StringRef /*SearchPath*/,
                                            StringRef /*RelativePath*/,
                                            const Module *Imported) {
-  assert(LastInclusionLocation.isInvalid() &&
-         "Another inclusion directive was found before the previous one "
-         "was processed");
   if (Imported) {
     auto P = ModuleIncludes.insert(
         std::make_pair(HashLoc.getRawEncoding(), Imported));
@@ -472,15 +471,15 @@ void InclusionRewriter::Process(FileID FileId,
             else if (const IncludedFile *Inc = FindIncludeAtLocation(Loc)) {
               const Module *Mod = FindEnteredModule(Loc);
               if (Mod)
-                OS << "#pragma clang module begin " << Mod->getFullModuleName()
-                   << "\n";
+                OS << "#pragma clang module begin "
+                   << Mod->getFullModuleName(true) << "\n";
 
               // Include and recursively process the file.
               Process(Inc->Id, Inc->FileType);
 
               if (Mod)
-                OS << "#pragma clang module end /*" << Mod->getFullModuleName()
-                   << "*/\n";
+                OS << "#pragma clang module end /*"
+                   << Mod->getFullModuleName(true) << "*/\n";
 
               // Add line marker to indicate we're returning from an included
               // file.
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 83290a6fbc284..21686b8c78ea5 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -21,6 +21,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Config/config.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "llvm/ADT/DenseSet.h"
@@ -146,7 +147,7 @@ namespace {
     
     llvm::DenseMap RewrittenBlockExprs;
     llvm::DenseMap > ReferencedIvars;
+                    llvm::SmallSetVector > ReferencedIvars;
     
     // ivar bitfield grouping containers
     llvm::DenseSet ObjCInterefaceHasBitfieldGroups;
@@ -1013,7 +1014,7 @@ void RewriteModernObjC::RewritePropertyImplDecl(ObjCPropertyImplDecl *PID,
     Setr = "\nextern \"C\" __declspec(dllimport) "
     "void objc_setProperty (id, SEL, long, id, bool, bool);\n";
   }
-  
+
   RewriteObjCMethodDecl(OID->getContainingInterface(), 
                         PD->getSetterMethodDecl(), Setr);
   Setr += "{ ";
@@ -3965,10 +3966,11 @@ void RewriteModernObjC::RewriteIvarOffsetSymbols(ObjCInterfaceDecl *CDecl,
                                                   std::string &Result) {
   // write out ivar offset symbols which have been referenced in an ivar
   // access expression.
-  llvm::SmallPtrSet Ivars = ReferencedIvars[CDecl];
+  llvm::SmallSetVector Ivars = ReferencedIvars[CDecl];
+
   if (Ivars.empty())
     return;
-  
+
   llvm::DenseSet > GroupSymbolOutput;
   for (ObjCIvarDecl *IvarDecl : Ivars) {
     const ObjCInterfaceDecl *IDecl = IvarDecl->getContainingInterface();
@@ -6068,7 +6070,7 @@ void RewriteModernObjC::Initialize(ASTContext &context) {
   Preamble += "\n#define __OFFSETOFIVAR__(TYPE, MEMBER) ((long long) &((TYPE *)0)->MEMBER)\n";
 }
 
-/// RewriteIvarOffsetComputation - This rutine synthesizes computation of
+/// RewriteIvarOffsetComputation - This routine synthesizes computation of
 /// ivar offset.
 void RewriteModernObjC::RewriteIvarOffsetComputation(ObjCIvarDecl *ivar,
                                                          std::string &Result) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
index 7d809c610c86c..e0d813df70f8a 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -20,6 +20,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Config/config.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "llvm/ADT/DenseSet.h"
@@ -5052,7 +5053,7 @@ void RewriteObjCFragileABI::Initialize(ASTContext &context) {
   Preamble += "\n#define __OFFSETOFIVAR__(TYPE, MEMBER) ((long long) &((TYPE *)0)->MEMBER)\n";
 }
 
-/// RewriteIvarOffsetComputation - This rutine synthesizes computation of
+/// RewriteIvarOffsetComputation - This routine synthesizes computation of
 /// ivar offset.
 void RewriteObjCFragileABI::RewriteIvarOffsetComputation(ObjCIvarDecl *ivar,
                                                          std::string &Result) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
index 7f88c919e24ac..7666fe10b3818 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
@@ -63,27 +63,20 @@ class SDiagsRenderer : public DiagnosticNoteRenderer {
   ~SDiagsRenderer() override {}
 
 protected:
-  void emitDiagnosticMessage(SourceLocation Loc,
-                             PresumedLoc PLoc,
-                             DiagnosticsEngine::Level Level,
-                             StringRef Message,
+  void emitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc,
+                             DiagnosticsEngine::Level Level, StringRef Message,
                              ArrayRef Ranges,
-                             const SourceManager *SM,
                              DiagOrStoredDiag D) override;
 
-  void emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
+  void emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
                          DiagnosticsEngine::Level Level,
-                         ArrayRef Ranges,
-                         const SourceManager &SM) override {}
+                         ArrayRef Ranges) override {}
 
-  void emitNote(SourceLocation Loc, StringRef Message,
-                const SourceManager *SM) override;
+  void emitNote(FullSourceLoc Loc, StringRef Message) override;
 
-  void emitCodeContext(SourceLocation Loc,
-                       DiagnosticsEngine::Level Level,
-                       SmallVectorImpl& Ranges,
-                       ArrayRef Hints,
-                       const SourceManager &SM) override;
+  void emitCodeContext(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+                       SmallVectorImpl &Ranges,
+                       ArrayRef Hints) override;
 
   void beginDiagnostic(DiagOrStoredDiag D,
                        DiagnosticsEngine::Level Level) override;
@@ -193,11 +186,8 @@ class SDiagsWriter : public DiagnosticConsumer {
   void ExitDiagBlock();
 
   /// \brief Emit a DIAG record.
-  void EmitDiagnosticMessage(SourceLocation Loc,
-                             PresumedLoc PLoc,
-                             DiagnosticsEngine::Level Level,
-                             StringRef Message,
-                             const SourceManager *SM,
+  void EmitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc,
+                             DiagnosticsEngine::Level Level, StringRef Message,
                              DiagOrStoredDiag D);
 
   /// \brief Emit FIXIT and SOURCE_RANGE records for a diagnostic.
@@ -220,16 +210,14 @@ class SDiagsWriter : public DiagnosticConsumer {
   /// \brief Emit (lazily) the file string and retrieved the file identifier.
   unsigned getEmitFile(const char *Filename);
 
-  /// \brief Add SourceLocation information the specified record.  
-  void AddLocToRecord(SourceLocation Loc, const SourceManager *SM,
-                      PresumedLoc PLoc, RecordDataImpl &Record,
-                      unsigned TokSize = 0);
+  /// \brief Add SourceLocation information the specified record.
+  void AddLocToRecord(FullSourceLoc Loc, PresumedLoc PLoc,
+                      RecordDataImpl &Record, unsigned TokSize = 0);
 
   /// \brief Add SourceLocation information the specified record.
-  void AddLocToRecord(SourceLocation Loc, RecordDataImpl &Record,
-                      const SourceManager *SM,
+  void AddLocToRecord(FullSourceLoc Loc, RecordDataImpl &Record,
                       unsigned TokSize = 0) {
-    AddLocToRecord(Loc, SM, SM ? SM->getPresumedLoc(Loc) : PresumedLoc(),
+    AddLocToRecord(Loc, Loc.hasManager() ? Loc.getPresumedLoc() : PresumedLoc(),
                    Record, TokSize);
   }
 
@@ -350,11 +338,8 @@ static void EmitRecordID(unsigned ID, const char *Name,
   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
 }
 
-void SDiagsWriter::AddLocToRecord(SourceLocation Loc,
-                                  const SourceManager *SM,
-                                  PresumedLoc PLoc,
-                                  RecordDataImpl &Record,
-                                  unsigned TokSize) {
+void SDiagsWriter::AddLocToRecord(FullSourceLoc Loc, PresumedLoc PLoc,
+                                  RecordDataImpl &Record, unsigned TokSize) {
   if (PLoc.isInvalid()) {
     // Emit a "sentinel" location.
     Record.push_back((unsigned)0); // File.
@@ -367,19 +352,19 @@ void SDiagsWriter::AddLocToRecord(SourceLocation Loc,
   Record.push_back(getEmitFile(PLoc.getFilename()));
   Record.push_back(PLoc.getLine());
   Record.push_back(PLoc.getColumn()+TokSize);
-  Record.push_back(SM->getFileOffset(Loc));
+  Record.push_back(Loc.getFileOffset());
 }
 
 void SDiagsWriter::AddCharSourceRangeToRecord(CharSourceRange Range,
                                               RecordDataImpl &Record,
                                               const SourceManager &SM) {
-  AddLocToRecord(Range.getBegin(), Record, &SM);
+  AddLocToRecord(FullSourceLoc(Range.getBegin(), SM), Record);
   unsigned TokSize = 0;
   if (Range.isTokenRange())
     TokSize = Lexer::MeasureTokenLength(Range.getEnd(),
                                         SM, *LangOpts);
-  
-  AddLocToRecord(Range.getEnd(), Record, &SM, TokSize);
+
+  AddLocToRecord(FullSourceLoc(Range.getEnd(), SM), Record, TokSize);
 }
 
 unsigned SDiagsWriter::getEmitFile(const char *FileName){
@@ -506,7 +491,7 @@ void SDiagsWriter::EmitBlockInfoBlock() {
   Abbrev->Add(BitCodeAbbrevOp(RECORD_FILENAME));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // Mapped file ID.
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Size.
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Modifcation time.  
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Modification time.  
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Text size.
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name text.
   Abbrevs.set(RECORD_FILENAME, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG,
@@ -606,8 +591,8 @@ void SDiagsWriter::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
     if (DiagLevel == DiagnosticsEngine::Note)
       EnterDiagBlock();
 
-    EmitDiagnosticMessage(SourceLocation(), PresumedLoc(), DiagLevel,
-                          State->diagBuf, nullptr, &Info);
+    EmitDiagnosticMessage(FullSourceLoc(), PresumedLoc(), DiagLevel,
+                          State->diagBuf, &Info);
 
     if (DiagLevel == DiagnosticsEngine::Note)
       ExitDiagBlock();
@@ -618,12 +603,9 @@ void SDiagsWriter::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
   assert(Info.hasSourceManager() && LangOpts &&
          "Unexpected diagnostic with valid location outside of a source file");
   SDiagsRenderer Renderer(*this, *LangOpts, &*State->DiagOpts);
-  Renderer.emitDiagnostic(Info.getLocation(), DiagLevel,
-                          State->diagBuf,
-                          Info.getRanges(),
-                          Info.getFixItHints(),
-                          &Info.getSourceManager(),
-                          &Info);
+  Renderer.emitDiagnostic(
+      FullSourceLoc(Info.getLocation(), Info.getSourceManager()), DiagLevel,
+      State->diagBuf, Info.getRanges(), Info.getFixItHints(), &Info);
 }
 
 static serialized_diags::Level getStableLevel(DiagnosticsEngine::Level Level) {
@@ -641,11 +623,9 @@ static serialized_diags::Level getStableLevel(DiagnosticsEngine::Level Level) {
   llvm_unreachable("invalid diagnostic level");
 }
 
-void SDiagsWriter::EmitDiagnosticMessage(SourceLocation Loc,
-                                         PresumedLoc PLoc,
+void SDiagsWriter::EmitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc,
                                          DiagnosticsEngine::Level Level,
                                          StringRef Message,
-                                         const SourceManager *SM,
                                          DiagOrStoredDiag D) {
   llvm::BitstreamWriter &Stream = State->Stream;
   RecordData &Record = State->Record;
@@ -655,7 +635,7 @@ void SDiagsWriter::EmitDiagnosticMessage(SourceLocation Loc,
   Record.clear();
   Record.push_back(RECORD_DIAG);
   Record.push_back(getStableLevel(Level));
-  AddLocToRecord(Loc, SM, PLoc, Record);
+  AddLocToRecord(Loc, PLoc, Record);
 
   if (const Diagnostic *Info = D.dyn_cast()) {
     // Emit the category string lazily and get the category ID.
@@ -672,15 +652,11 @@ void SDiagsWriter::EmitDiagnosticMessage(SourceLocation Loc,
   Stream.EmitRecordWithBlob(Abbrevs.get(RECORD_DIAG), Record, Message);
 }
 
-void
-SDiagsRenderer::emitDiagnosticMessage(SourceLocation Loc,
-                                      PresumedLoc PLoc,
-                                      DiagnosticsEngine::Level Level,
-                                      StringRef Message,
-                                      ArrayRef Ranges,
-                                      const SourceManager *SM,
-                                      DiagOrStoredDiag D) {
-  Writer.EmitDiagnosticMessage(Loc, PLoc, Level, Message, SM, D);
+void SDiagsRenderer::emitDiagnosticMessage(
+    FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level,
+    StringRef Message, ArrayRef Ranges,
+    DiagOrStoredDiag D) {
+  Writer.EmitDiagnosticMessage(Loc, PLoc, Level, Message, D);
 }
 
 void SDiagsWriter::EnterDiagBlock() {
@@ -733,20 +709,18 @@ void SDiagsWriter::EmitCodeContext(SmallVectorImpl &Ranges,
   }
 }
 
-void SDiagsRenderer::emitCodeContext(SourceLocation Loc,
+void SDiagsRenderer::emitCodeContext(FullSourceLoc Loc,
                                      DiagnosticsEngine::Level Level,
                                      SmallVectorImpl &Ranges,
-                                     ArrayRef Hints,
-                                     const SourceManager &SM) {
-  Writer.EmitCodeContext(Ranges, Hints, SM);
+                                     ArrayRef Hints) {
+  Writer.EmitCodeContext(Ranges, Hints, Loc.getManager());
 }
 
-void SDiagsRenderer::emitNote(SourceLocation Loc, StringRef Message,
-                              const SourceManager *SM) {
+void SDiagsRenderer::emitNote(FullSourceLoc Loc, StringRef Message) {
   Writer.EnterDiagBlock();
-  PresumedLoc PLoc = SM ? SM->getPresumedLoc(Loc) : PresumedLoc();
-  Writer.EmitDiagnosticMessage(Loc, PLoc, DiagnosticsEngine::Note,
-                               Message, SM, DiagOrStoredDiag());
+  PresumedLoc PLoc = Loc.hasManager() ? Loc.getPresumedLoc() : PresumedLoc();
+  Writer.EmitDiagnosticMessage(Loc, PLoc, DiagnosticsEngine::Note, Message,
+                               DiagOrStoredDiag());
   Writer.ExitDiagBlock();
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticReader.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticReader.cpp
index c4461d452e7b4..08b7087fbad62 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticReader.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/SerializedDiagnosticReader.cpp
@@ -27,6 +27,9 @@ std::error_code SerializedDiagnosticReader::readDiagnostics(StringRef File) {
   llvm::BitstreamCursor Stream(**Buffer);
   Optional BlockInfo;
 
+  if (Stream.AtEndOfStream())
+    return SDError::InvalidSignature;
+
   // Sniff for the signature.
   if (Stream.Read(8) != 'D' ||
       Stream.Read(8) != 'I' ||
@@ -125,6 +128,7 @@ SerializedDiagnosticReader::readMetaBlock(llvm::BitstreamCursor &Stream) {
     case Cursor::BlockBegin:
       if (Stream.SkipBlock())
         return SDError::MalformedMetadataBlock;
+      LLVM_FALLTHROUGH;
     case Cursor::BlockEnd:
       if (!VersionChecked)
         return SDError::MissingVersion;
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnostic.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnostic.cpp
index a4937386b93f0..6a72b00c602b9 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnostic.cpp
@@ -672,20 +672,16 @@ TextDiagnostic::TextDiagnostic(raw_ostream &OS,
 
 TextDiagnostic::~TextDiagnostic() {}
 
-void
-TextDiagnostic::emitDiagnosticMessage(SourceLocation Loc,
-                                      PresumedLoc PLoc,
-                                      DiagnosticsEngine::Level Level,
-                                      StringRef Message,
-                                      ArrayRef Ranges,
-                                      const SourceManager *SM,
-                                      DiagOrStoredDiag D) {
+void TextDiagnostic::emitDiagnosticMessage(
+    FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level,
+    StringRef Message, ArrayRef Ranges,
+    DiagOrStoredDiag D) {
   uint64_t StartOfLocationInfo = OS.tell();
 
   // Emit the location of this particular diagnostic.
   if (Loc.isValid())
-    emitDiagnosticLoc(Loc, PLoc, Level, Ranges, *SM);
-  
+    emitDiagnosticLoc(Loc, PLoc, Level, Ranges);
+
   if (DiagOpts->ShowColors)
     OS.resetColor();
   
@@ -787,17 +783,16 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
 /// This includes extracting as much location information as is present for
 /// the diagnostic and printing it, as well as any include stack or source
 /// ranges necessary.
-void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
+void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
                                        DiagnosticsEngine::Level Level,
-                                       ArrayRef Ranges,
-                                       const SourceManager &SM) {
+                                       ArrayRef Ranges) {
   if (PLoc.isInvalid()) {
     // At least print the file name if available:
-    FileID FID = SM.getFileID(Loc);
+    FileID FID = Loc.getFileID();
     if (FID.isValid()) {
-      const FileEntry* FE = SM.getFileEntryForID(FID);
+      const FileEntry *FE = Loc.getFileEntry();
       if (FE && FE->isValid()) {
-        emitFilename(FE->getName(), SM);
+        emitFilename(FE->getName(), Loc.getManager());
         if (FE->isInPCH())
           OS << " (in PCH)";
         OS << ": ";
@@ -813,7 +808,7 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
   if (DiagOpts->ShowColors)
     OS.changeColor(savedColor, true);
 
-  emitFilename(PLoc.getFilename(), SM);
+  emitFilename(PLoc.getFilename(), Loc.getManager());
   switch (DiagOpts->getFormat()) {
   case DiagnosticOptions::Clang: OS << ':'  << LineNo; break;
   case DiagnosticOptions::MSVC:  OS << '('  << LineNo; break;
@@ -848,8 +843,7 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
   }
 
   if (DiagOpts->ShowSourceRanges && !Ranges.empty()) {
-    FileID CaretFileID =
-      SM.getFileID(SM.getExpansionLoc(Loc));
+    FileID CaretFileID = Loc.getExpansionLoc().getFileID();
     bool PrintedRange = false;
 
     for (ArrayRef::const_iterator RI = Ranges.begin(),
@@ -858,8 +852,10 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
       // Ignore invalid ranges.
       if (!RI->isValid()) continue;
 
-      SourceLocation B = SM.getExpansionLoc(RI->getBegin());
-      SourceLocation E = SM.getExpansionLoc(RI->getEnd());
+      FullSourceLoc B =
+          FullSourceLoc(RI->getBegin(), Loc.getManager()).getExpansionLoc();
+      FullSourceLoc E =
+          FullSourceLoc(RI->getEnd(), Loc.getManager()).getExpansionLoc();
 
       // If the End location and the start location are the same and are a
       // macro location, then the range was something that came from a
@@ -867,10 +863,12 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
       // best we can do is to highlight the range.  If this is a
       // function-like macro, we'd also like to highlight the arguments.
       if (B == E && RI->getEnd().isMacroID())
-        E = SM.getExpansionRange(RI->getEnd()).second;
+        E = FullSourceLoc(RI->getEnd(), Loc.getManager())
+                .getExpansionRange()
+                .second;
 
-      std::pair BInfo = SM.getDecomposedLoc(B);
-      std::pair EInfo = SM.getDecomposedLoc(E);
+      std::pair BInfo = B.getDecomposedLoc();
+      std::pair EInfo = E.getDecomposedLoc();
 
       // If the start or end of the range is in another file, just discard
       // it.
@@ -881,13 +879,10 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
       // tokens.
       unsigned TokSize = 0;
       if (RI->isTokenRange())
-        TokSize = Lexer::MeasureTokenLength(E, SM, LangOpts);
+        TokSize = Lexer::MeasureTokenLength(E, E.getManager(), LangOpts);
 
-      OS << '{' << SM.getLineNumber(BInfo.first, BInfo.second) << ':'
-        << SM.getColumnNumber(BInfo.first, BInfo.second) << '-'
-        << SM.getLineNumber(EInfo.first, EInfo.second) << ':'
-        << (SM.getColumnNumber(EInfo.first, EInfo.second)+TokSize)
-        << '}';
+      OS << '{' << B.getLineNumber() << ':' << B.getColumnNumber() << '-'
+         << E.getLineNumber() << ':' << (E.getColumnNumber() + TokSize) << '}';
       PrintedRange = true;
     }
 
@@ -897,9 +892,7 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
   OS << ' ';
 }
 
-void TextDiagnostic::emitIncludeLocation(SourceLocation Loc,
-                                         PresumedLoc PLoc,
-                                         const SourceManager &SM) {
+void TextDiagnostic::emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) {
   if (DiagOpts->ShowLocation && PLoc.isValid())
     OS << "In file included from " << PLoc.getFilename() << ':'
        << PLoc.getLine() << ":\n";
@@ -907,9 +900,8 @@ void TextDiagnostic::emitIncludeLocation(SourceLocation Loc,
     OS << "In included file:\n"; 
 }
 
-void TextDiagnostic::emitImportLocation(SourceLocation Loc, PresumedLoc PLoc,
-                                        StringRef ModuleName,
-                                        const SourceManager &SM) {
+void TextDiagnostic::emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                                        StringRef ModuleName) {
   if (DiagOpts->ShowLocation && PLoc.isValid())
     OS << "In module '" << ModuleName << "' imported from "
        << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
@@ -917,10 +909,9 @@ void TextDiagnostic::emitImportLocation(SourceLocation Loc, PresumedLoc PLoc,
     OS << "In module '" << ModuleName << "':\n";
 }
 
-void TextDiagnostic::emitBuildingModuleLocation(SourceLocation Loc,
+void TextDiagnostic::emitBuildingModuleLocation(FullSourceLoc Loc,
                                                 PresumedLoc PLoc,
-                                                StringRef ModuleName,
-                                                const SourceManager &SM) {
+                                                StringRef ModuleName) {
   if (DiagOpts->ShowLocation && PLoc.isValid())
     OS << "While building module '" << ModuleName << "' imported from "
       << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
@@ -928,6 +919,56 @@ void TextDiagnostic::emitBuildingModuleLocation(SourceLocation Loc,
     OS << "While building module '" << ModuleName << "':\n";
 }
 
+/// \brief Find the suitable set of lines to show to include a set of ranges.
+static llvm::Optional>
+findLinesForRange(const CharSourceRange &R, FileID FID,
+                  const SourceManager &SM) {
+  if (!R.isValid()) return None;
+
+  SourceLocation Begin = R.getBegin();
+  SourceLocation End = R.getEnd();
+  if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID)
+    return None;
+
+  return std::make_pair(SM.getExpansionLineNumber(Begin),
+                        SM.getExpansionLineNumber(End));
+}
+
+/// Add as much of range B into range A as possible without exceeding a maximum
+/// size of MaxRange. Ranges are inclusive.
+static std::pair
+maybeAddRange(std::pair A, std::pair B,
+              unsigned MaxRange) {
+  // If A is already the maximum size, we're done.
+  unsigned Slack = MaxRange - (A.second - A.first + 1);
+  if (Slack == 0)
+    return A;
+
+  // Easy case: merge succeeds within MaxRange.
+  unsigned Min = std::min(A.first, B.first);
+  unsigned Max = std::max(A.second, B.second);
+  if (Max - Min + 1 <= MaxRange)
+    return {Min, Max};
+
+  // If we can't reach B from A within MaxRange, there's nothing to do.
+  // Don't add lines to the range that contain nothing interesting.
+  if ((B.first > A.first && B.first - A.first + 1 > MaxRange) ||
+      (B.second < A.second && A.second - B.second + 1 > MaxRange))
+    return A;
+
+  // Otherwise, expand A towards B to produce a range of size MaxRange. We
+  // attempt to expand by the same amount in both directions if B strictly
+  // contains A.
+
+  // Expand downwards by up to half the available amount, then upwards as
+  // much as possible, then downwards as much as possible.
+  A.second = std::min(A.second + (Slack + 1) / 2, Max);
+  Slack = MaxRange - (A.second - A.first + 1);
+  A.first = std::max(Min + Slack, A.first) - Slack;
+  A.second = std::min(A.first + MaxRange - 1, Max);
+  return A;
+}
+
 /// \brief Highlight a SourceRange (with ~'s) for any characters on LineNo.
 static void highlightRange(const CharSourceRange &R,
                            unsigned LineNo, FileID FID,
@@ -990,9 +1031,12 @@ static void highlightRange(const CharSourceRange &R,
       EndColNo = map.startOfPreviousColumn(EndColNo);
 
     // If the start/end passed each other, then we are trying to highlight a
-    // range that just exists in whitespace, which must be some sort of other
-    // bug.
-    assert(StartColNo <= EndColNo && "Trying to highlight whitespace??");
+    // range that just exists in whitespace. That most likely means we have
+    // a multi-line highlighting range that covers a blank line.
+    if (StartColNo > EndColNo) {
+      assert(StartLineNo != EndLineNo && "trying to highlight whitespace");
+      StartColNo = EndColNo;
+    }
   }
 
   assert(StartColNo <= map.getSourceLine().size() && "Invalid range!");
@@ -1008,7 +1052,8 @@ static void highlightRange(const CharSourceRange &R,
   std::fill(CaretLine.begin()+StartColNo,CaretLine.begin()+EndColNo,'~');
 }
 
-static std::string buildFixItInsertionLine(unsigned LineNo,
+static std::string buildFixItInsertionLine(FileID FID,
+                                           unsigned LineNo,
                                            const SourceColumnMap &map,
                                            ArrayRef Hints,
                                            const SourceManager &SM,
@@ -1025,7 +1070,8 @@ static std::string buildFixItInsertionLine(unsigned LineNo,
       // code contains no newlines and is on the same line as the caret.
       std::pair HintLocInfo
         = SM.getDecomposedExpansionLoc(I->RemoveRange.getBegin());
-      if (LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) &&
+      if (FID == HintLocInfo.first &&
+          LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) &&
           StringRef(I->CodeToInsert).find_first_of("\n\r") == StringRef::npos) {
         // Insert the new code into the line just below the code
         // that the user wrote.
@@ -1061,9 +1107,6 @@ static std::string buildFixItInsertionLine(unsigned LineNo,
 
         PrevHintEndCol =
           HintCol + llvm::sys::locale::columnWidth(I->CodeToInsert);
-      } else {
-        FixItInsertionLine.clear();
-        break;
       }
     }
   }
@@ -1081,10 +1124,8 @@ static std::string buildFixItInsertionLine(unsigned LineNo,
 /// \param Ranges The underlined ranges for this code snippet.
 /// \param Hints The FixIt hints active for this diagnostic.
 void TextDiagnostic::emitSnippetAndCaret(
-    SourceLocation Loc, DiagnosticsEngine::Level Level,
-    SmallVectorImpl& Ranges,
-    ArrayRef Hints,
-    const SourceManager &SM) {
+    FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+    SmallVectorImpl &Ranges, ArrayRef Hints) {
   assert(Loc.isValid() && "must have a valid source location here");
   assert(Loc.isFileID() && "must have a file location here");
 
@@ -1101,111 +1142,128 @@ void TextDiagnostic::emitSnippetAndCaret(
     return;
 
   // Decompose the location into a FID/Offset pair.
-  std::pair LocInfo = SM.getDecomposedLoc(Loc);
+  std::pair LocInfo = Loc.getDecomposedLoc();
   FileID FID = LocInfo.first;
-  unsigned FileOffset = LocInfo.second;
+  const SourceManager &SM = Loc.getManager();
 
   // Get information about the buffer it points into.
   bool Invalid = false;
-  StringRef BufData = SM.getBufferData(FID, &Invalid);
+  StringRef BufData = Loc.getBufferData(&Invalid);
   if (Invalid)
     return;
 
-  const char *BufStart = BufData.data();
-  const char *BufEnd = BufStart + BufData.size();
+  unsigned CaretLineNo = Loc.getLineNumber();
+  unsigned CaretColNo = Loc.getColumnNumber();
 
-  unsigned LineNo = SM.getLineNumber(FID, FileOffset);
-  unsigned ColNo = SM.getColumnNumber(FID, FileOffset);
-  
   // Arbitrarily stop showing snippets when the line is too long.
   static const size_t MaxLineLengthToPrint = 4096;
-  if (ColNo > MaxLineLengthToPrint)
+  if (CaretColNo > MaxLineLengthToPrint)
     return;
 
-  // Rewind from the current position to the start of the line.
-  const char *TokPtr = BufStart+FileOffset;
-  const char *LineStart = TokPtr-ColNo+1; // Column # is 1-based.
-
-  // Compute the line end.  Scan forward from the error position to the end of
-  // the line.
-  const char *LineEnd = TokPtr;
-  while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd)
-    ++LineEnd;
-
-  // Arbitrarily stop showing snippets when the line is too long.
-  if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint)
-    return;
-
-  // Trim trailing null-bytes.
-  StringRef Line(LineStart, LineEnd - LineStart);
-  while (Line.size() > ColNo && Line.back() == '\0')
-    Line = Line.drop_back();
+  // Find the set of lines to include.
+  const unsigned MaxLines = DiagOpts->SnippetLineLimit;
+  std::pair Lines = {CaretLineNo, CaretLineNo};
+  for (SmallVectorImpl::iterator I = Ranges.begin(),
+                                                  E = Ranges.end();
+       I != E; ++I)
+    if (auto OptionalRange = findLinesForRange(*I, FID, SM))
+      Lines = maybeAddRange(Lines, *OptionalRange, MaxLines);
+
+  for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1; ++LineNo) {
+    const char *BufStart = BufData.data();
+    const char *BufEnd = BufStart + BufData.size();
+
+    // Rewind from the current position to the start of the line.
+    const char *LineStart =
+        BufStart +
+        SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second;
+    if (LineStart == BufEnd)
+      break;
 
-  // Copy the line of code into an std::string for ease of manipulation.
-  std::string SourceLine(Line.begin(), Line.end());
+    // Compute the line end.
+    const char *LineEnd = LineStart;
+    while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd)
+      ++LineEnd;
 
-  // Build the byte to column map.
-  const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop);
+    // Arbitrarily stop showing snippets when the line is too long.
+    // FIXME: Don't print any lines in this case.
+    if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint)
+      return;
 
-  // Create a line for the caret that is filled with spaces that is the same
-  // number of columns as the line of source code.
-  std::string CaretLine(sourceColMap.columns(), ' ');
+    // Trim trailing null-bytes.
+    StringRef Line(LineStart, LineEnd - LineStart);
+    while (!Line.empty() && Line.back() == '\0' &&
+           (LineNo != CaretLineNo || Line.size() > CaretColNo))
+      Line = Line.drop_back();
+
+    // Copy the line of code into an std::string for ease of manipulation.
+    std::string SourceLine(Line.begin(), Line.end());
+
+    // Build the byte to column map.
+    const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop);
+
+    // Create a line for the caret that is filled with spaces that is the same
+    // number of columns as the line of source code.
+    std::string CaretLine(sourceColMap.columns(), ' ');
+
+    // Highlight all of the characters covered by Ranges with ~ characters.
+    for (SmallVectorImpl::iterator I = Ranges.begin(),
+                                                    E = Ranges.end();
+         I != E; ++I)
+      highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM, LangOpts);
+
+    // Next, insert the caret itself.
+    if (CaretLineNo == LineNo) {
+      CaretColNo = sourceColMap.byteToContainingColumn(CaretColNo - 1);
+      if (CaretLine.size() < CaretColNo + 1)
+        CaretLine.resize(CaretColNo + 1, ' ');
+      CaretLine[CaretColNo] = '^';
+    }
 
-  // Highlight all of the characters covered by Ranges with ~ characters.
-  for (SmallVectorImpl::iterator I = Ranges.begin(),
-                                                  E = Ranges.end();
-       I != E; ++I)
-    highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM, LangOpts);
-
-  // Next, insert the caret itself.
-  ColNo = sourceColMap.byteToContainingColumn(ColNo-1);
-  if (CaretLine.size()MessageLength;
-  if (Columns)
-    selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine,
-                                  Columns, sourceColMap);
-
-  // If we are in -fdiagnostics-print-source-range-info mode, we are trying
-  // to produce easily machine parsable output.  Add a space before the
-  // source line and the caret to make it trivial to tell the main diagnostic
-  // line from what the user is intended to see.
-  if (DiagOpts->ShowSourceRanges) {
-    SourceLine = ' ' + SourceLine;
-    CaretLine = ' ' + CaretLine;
-  }
+    std::string FixItInsertionLine = buildFixItInsertionLine(
+        FID, LineNo, sourceColMap, Hints, SM, DiagOpts.get());
+
+    // If the source line is too long for our terminal, select only the
+    // "interesting" source region within that line.
+    unsigned Columns = DiagOpts->MessageLength;
+    if (Columns)
+      selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine,
+                                    Columns, sourceColMap);
+
+    // If we are in -fdiagnostics-print-source-range-info mode, we are trying
+    // to produce easily machine parsable output.  Add a space before the
+    // source line and the caret to make it trivial to tell the main diagnostic
+    // line from what the user is intended to see.
+    if (DiagOpts->ShowSourceRanges) {
+      SourceLine = ' ' + SourceLine;
+      CaretLine = ' ' + CaretLine;
+    }
 
-  // Finally, remove any blank spaces from the end of CaretLine.
-  while (CaretLine[CaretLine.size()-1] == ' ')
-    CaretLine.erase(CaretLine.end()-1);
+    // Finally, remove any blank spaces from the end of CaretLine.
+    while (!CaretLine.empty() && CaretLine[CaretLine.size() - 1] == ' ')
+      CaretLine.erase(CaretLine.end() - 1);
 
-  // Emit what we have computed.
-  emitSnippet(SourceLine);
+    // Emit what we have computed.
+    emitSnippet(SourceLine);
 
-  if (DiagOpts->ShowColors)
-    OS.changeColor(caretColor, true);
-  OS << CaretLine << '\n';
-  if (DiagOpts->ShowColors)
-    OS.resetColor();
+    if (!CaretLine.empty()) {
+      if (DiagOpts->ShowColors)
+        OS.changeColor(caretColor, true);
+      OS << CaretLine << '\n';
+      if (DiagOpts->ShowColors)
+        OS.resetColor();
+    }
 
-  if (!FixItInsertionLine.empty()) {
-    if (DiagOpts->ShowColors)
-      // Print fixit line in color
-      OS.changeColor(fixitColor, false);
-    if (DiagOpts->ShowSourceRanges)
-      OS << ' ';
-    OS << FixItInsertionLine << '\n';
-    if (DiagOpts->ShowColors)
-      OS.resetColor();
+    if (!FixItInsertionLine.empty()) {
+      if (DiagOpts->ShowColors)
+        // Print fixit line in color
+        OS.changeColor(fixitColor, false);
+      if (DiagOpts->ShowSourceRanges)
+        OS << ' ';
+      OS << FixItInsertionLine << '\n';
+      if (DiagOpts->ShowColors)
+        OS.resetColor();
+    }
   }
 
   // Print out any parseable fixit information requested by the options.
diff --git a/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnosticPrinter.cpp
index 17646b48e23dd..5dd3252d5b1ef 100644
--- a/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -150,10 +150,9 @@ void TextDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level,
          "Unexpected diagnostic with no source manager");
   assert(TextDiag && "Unexpected diagnostic outside source file processing");
 
-  TextDiag->emitDiagnostic(Info.getLocation(), Level, DiagMessageStream.str(),
-                           Info.getRanges(),
-                           Info.getFixItHints(),
-                           &Info.getSourceManager());
+  TextDiag->emitDiagnostic(
+      FullSourceLoc(Info.getLocation(), Info.getSourceManager()), Level,
+      DiagMessageStream.str(), Info.getRanges(), Info.getFixItHints());
 
   OS.flush();
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/interpreter/llvm/src/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index 1f7493c9e398e..1666315588065 100644
--- a/interpreter/llvm/src/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -15,6 +15,7 @@
 #include "clang/FrontendTool/Utils.h"
 #include "clang/ARCMigrate/ARCMTActions.h"
 #include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Config/config.h"
 #include "clang/Driver/Options.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/CompilerInvocation.h"
@@ -85,7 +86,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) {
   case PrintDeclContext:       return llvm::make_unique();
   case PrintPreamble:          return llvm::make_unique();
   case PrintPreprocessedInput: {
-    if (CI.getPreprocessorOutputOpts().RewriteIncludes)
+    if (CI.getPreprocessorOutputOpts().RewriteIncludes ||
+        CI.getPreprocessorOutputOpts().RewriteImports)
       return llvm::make_unique();
     return llvm::make_unique();
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/Headers/CMakeLists.txt
index 6091db08a93be..a621c02644e38 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/CMakeLists.txt
@@ -7,6 +7,7 @@ set(files
   avx2intrin.h
   avx512bwintrin.h
   avx512cdintrin.h
+  avx512vpopcntdqintrin.h
   avx512dqintrin.h
   avx512erintrin.h
   avx512fintrin.h
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/altivec.h b/interpreter/llvm/src/tools/clang/lib/Headers/altivec.h
index 421e2a7754a51..90fd477d9b983 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/altivec.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/altivec.h
@@ -2887,87 +2887,79 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
 
 /* vec_ctf */
 
-static __inline__ vector float __ATTRS_o_ai vec_ctf(vector int __a, int __b) {
-  return __builtin_altivec_vcfsx(__a, __b);
-}
-
-static __inline__ vector float __ATTRS_o_ai vec_ctf(vector unsigned int __a,
-                                                    int __b) {
-  return __builtin_altivec_vcfux((vector int)__a, __b);
-}
-
 #ifdef __VSX__
-static __inline__ vector double __ATTRS_o_ai
-vec_ctf(vector unsigned long long __a, int __b) {
-  vector double __ret = __builtin_convertvector(__a, vector double);
-  __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52);
-  return __ret;
-}
-
-static __inline__ vector double __ATTRS_o_ai
-vec_ctf(vector signed long long __a, int __b) {
-  vector double __ret = __builtin_convertvector(__a, vector double);
-  __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52);
-  return __ret;
-}
+#define vec_ctf(__a, __b)                                                      \
+  _Generic((__a), vector int                                                   \
+           : (vector float)__builtin_altivec_vcfsx((__a), (__b)),              \
+             vector unsigned int                                               \
+           : (vector float)__builtin_altivec_vcfux((vector int)(__a), (__b)),  \
+             vector unsigned long long                                         \
+           : (__builtin_convertvector((vector unsigned long long)(__a),        \
+                                      vector double) *                         \
+              (vector double)(vector unsigned long long)((0x3ffULL - (__b))    \
+                                                         << 52)),              \
+             vector signed long long                                           \
+           : (__builtin_convertvector((vector signed long long)(__a),          \
+                                      vector double) *                         \
+              (vector double)(vector unsigned long long)((0x3ffULL - (__b))    \
+                                                         << 52)))
+#else
+#define vec_ctf(__a, __b)                                                      \
+  _Generic((__a), vector int                                                   \
+           : (vector float)__builtin_altivec_vcfsx((__a), (__b)),              \
+             vector unsigned int                                               \
+           : (vector float)__builtin_altivec_vcfux((vector int)(__a), (__b)))
 #endif
 
 /* vec_vcfsx */
 
-static __inline__ vector float __attribute__((__always_inline__))
-vec_vcfsx(vector int __a, int __b) {
-  return __builtin_altivec_vcfsx(__a, __b);
-}
+#define vec_vcfux __builtin_altivec_vcfux
 
 /* vec_vcfux */
 
-static __inline__ vector float __attribute__((__always_inline__))
-vec_vcfux(vector unsigned int __a, int __b) {
-  return __builtin_altivec_vcfux((vector int)__a, __b);
-}
+#define vec_vcfsx(__a, __b) __builtin_altivec_vcfsx((vector int)(__a), (__b))
 
 /* vec_cts */
 
-static __inline__ vector int __ATTRS_o_ai vec_cts(vector float __a, int __b) {
-  return __builtin_altivec_vctsxs(__a, __b);
-}
-
 #ifdef __VSX__
-static __inline__ vector signed long long __ATTRS_o_ai
-vec_cts(vector double __a, int __b) {
-  __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52);
-  return __builtin_convertvector(__a, vector signed long long);
-}
+#define vec_cts(__a, __b)                                                      \
+  _Generic((__a), vector float                                                 \
+           : __builtin_altivec_vctsxs((__a), (__b)), vector double             \
+           : __extension__({                                                   \
+             vector double __ret =                                             \
+                 (__a) *                                                       \
+                 (vector double)(vector unsigned long long)((0x3ffULL + (__b)) \
+                                                            << 52);            \
+             __builtin_convertvector(__ret, vector signed long long);          \
+           }))
+#else
+#define vec_cts __builtin_altivec_vctsxs
 #endif
 
 /* vec_vctsxs */
 
-static __inline__ vector int __attribute__((__always_inline__))
-vec_vctsxs(vector float __a, int __b) {
-  return __builtin_altivec_vctsxs(__a, __b);
-}
+#define vec_vctsxs __builtin_altivec_vctsxs
 
 /* vec_ctu */
 
-static __inline__ vector unsigned int __ATTRS_o_ai vec_ctu(vector float __a,
-                                                           int __b) {
-  return __builtin_altivec_vctuxs(__a, __b);
-}
-
 #ifdef __VSX__
-static __inline__ vector unsigned long long __ATTRS_o_ai
-vec_ctu(vector double __a, int __b) {
-  __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52);
-  return __builtin_convertvector(__a, vector unsigned long long);
-}
+#define vec_ctu(__a, __b)                                                      \
+  _Generic((__a), vector float                                                 \
+           : __builtin_altivec_vctuxs((__a), (__b)), vector double             \
+           : __extension__({                                                   \
+             vector double __ret =                                             \
+                 (__a) *                                                       \
+                 (vector double)(vector unsigned long long)((0x3ffULL + __b)   \
+                                                            << 52);            \
+             __builtin_convertvector(__ret, vector unsigned long long);        \
+           }))
+#else
+#define vec_ctu __builtin_altivec_vctuxs
 #endif
 
 /* vec_vctuxs */
 
-static __inline__ vector unsigned int __attribute__((__always_inline__))
-vec_vctuxs(vector float __a, int __b) {
-  return __builtin_altivec_vctuxs(__a, __b);
-}
+#define vec_vctuxs __builtin_altivec_vctuxs
 
 /* vec_signed */
 
@@ -12156,6 +12148,11 @@ static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a,
 
 #endif
 
+#ifdef __VSX__
+#define vec_xxpermdi __builtin_vsx_xxpermdi
+#define vec_xxsldwi __builtin_vsx_xxsldwi
+#endif
+
 /* vec_xor */
 
 #define __builtin_altivec_vxor vec_xor
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/avx2intrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/avx2intrin.h
index 5d83a8db484b1..576f761b25426 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/avx2intrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/avx2intrin.h
@@ -832,7 +832,8 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_stream_load_si256(__m256i const *__V)
 {
-  return (__m256i)__builtin_nontemporal_load((const __v4di *)__V);
+  typedef __v4di __v4di_aligned __attribute__((aligned(32)));
+  return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/avx512fintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/avx512fintrin.h
index b556d04efbb7a..4ce694531100c 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/avx512fintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/avx512fintrin.h
@@ -4289,7 +4289,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-                  (__v16si) 
+                  (__v16si)
                   _mm512_setzero_si512 (),
                   (__mmask16) __U ,
                   _MM_FROUND_CUR_DIRECTION);
@@ -9035,25 +9035,29 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm512_stream_si512 (__m512i * __P, __m512i __A)
 {
-  __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
+  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
+  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_stream_load_si512 (void *__P)
 {
-  return (__m512i) __builtin_nontemporal_load((const __v8di *)__P);
+  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
+  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm512_stream_pd (double *__P, __m512d __A)
 {
-  __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
+  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
+  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm512_stream_ps (float *__P, __m512 __A)
 {
-  __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
+  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
+  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -9217,39 +9221,39 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
-  __m128 res = __A; 
+  __m128 res = __A;
   res[0] = (__U & 1) ? __B[0] : __W[0];
-  return res; 
+  return res;
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
-  __m128 res = __A; 
-  res[0] = (__U & 1) ? __B[0] : 0; 
-  return res; 
+  __m128 res = __A;
+  res[0] = (__U & 1) ? __B[0] : 0;
+  return res;
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
-  __m128d res = __A; 
+  __m128d res = __A;
   res[0] = (__U & 1) ? __B[0] : __W[0];
-  return res; 
+  return res;
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
-  __m128d res = __A; 
-  res[0] = (__U & 1) ? __B[0] : 0; 
-  return res; 
+  __m128d res = __A;
+  res[0] = (__U & 1) ? __B[0] : 0;
+  return res;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
 {
-  __builtin_ia32_storess128_mask ((__v16sf *)__W, 
+  __builtin_ia32_storess128_mask ((__v16sf *)__W,
                 (__v16sf) _mm512_castps128_ps512(__A),
                 (__mmask16) __U & (__mmask16)1);
 }
@@ -9257,7 +9261,7 @@ _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
 {
-  __builtin_ia32_storesd128_mask ((__v8df *)__W, 
+  __builtin_ia32_storesd128_mask ((__v8df *)__W,
                 (__v8df) _mm512_castpd128_pd512(__A),
                 (__mmask8) __U & 1);
 }
@@ -9606,7 +9610,7 @@ _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
                                              (__v2df)(__B),
-                                             (__v4sf)(__W), 
+                                             (__v4sf)(__W),
                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
 }
 
@@ -9615,7 +9619,7 @@ _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
                                              (__v2df)(__B),
-                                             (__v4sf)_mm_setzero_ps(), 
+                                             (__v4sf)_mm_setzero_ps(),
                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
 }
 
@@ -9680,7 +9684,7 @@ _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
                                               (__v4sf)(__B),
                                               (__v2df)(__W),
-                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
+                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -9688,8 +9692,8 @@ _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
 {
   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
                                               (__v4sf)(__B),
-                                              (__v2df)_mm_setzero_pd(), 
-                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
+                                              (__v2df)_mm_setzero_pd(),
+                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -9935,7 +9939,7 @@ static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
 }
 
 // Vec512 - Vector with size 512.
-// Vec512Neutral - All vector elements set to the identity element. 
+// Vec512Neutral - All vector elements set to the identity element.
 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
 // Operator - Can be one of following: +,*,&,|
 // Mask - Intrinsic Mask
@@ -9965,19 +9969,19 @@ _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
 
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
-  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 
+  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
                                     &, __M,  i, i, q);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
-  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, 
+  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
                                     i, i, q);
 }
 
 static __inline__ double __DEFAULT_FN_ATTRS
 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
-  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, 
+  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
                                     f, d, pd);
 }
 
@@ -10039,17 +10043,17 @@ _mm512_reduce_add_epi32(__m512i __W) {
   _mm512_reduce_operator_32bit(__W, +, i, i);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS 
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm512_reduce_mul_epi32(__m512i __W) {
   _mm512_reduce_operator_32bit(__W, *, i, i);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS 
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm512_reduce_and_epi32(__m512i __W) {
   _mm512_reduce_operator_32bit(__W, &, i, i);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS 
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm512_reduce_or_epi32(__m512i __W) {
   _mm512_reduce_operator_32bit(__W, |, i, i);
 }
@@ -10065,7 +10069,7 @@ _mm512_reduce_mul_ps(__m512 __W) {
 }
 
 // Vec512 - Vector with size 512.
-// Vec512Neutral - All vector elements set to the identity element. 
+// Vec512Neutral - All vector elements set to the identity element.
 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
 // Operator - Can be one of following: +,*,&,|
 // Mask - Intrinsic Mask
@@ -10095,7 +10099,7 @@ _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
-  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M, 
+  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
                                     i, i, d);
 }
 
@@ -10158,7 +10162,7 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
     return Vec512[0];                                                          \
   })
 
-static __inline__ long long __DEFAULT_FN_ATTRS 
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm512_reduce_max_epi64(__m512i __V) {
   _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
 }
@@ -10168,7 +10172,7 @@ _mm512_reduce_max_epu64(__m512i __V) {
   _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS 
+static __inline__ double __DEFAULT_FN_ATTRS
 _mm512_reduce_max_pd(__m512d __V) {
   _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
 }
@@ -10183,7 +10187,7 @@ _mm512_reduce_min_epu64(__m512i __V) {
   _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS 
+static __inline__ double __DEFAULT_FN_ATTRS
 _mm512_reduce_min_pd(__m512d __V) {
   _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/avx512vpopcntdqintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/avx512vpopcntdqintrin.h
new file mode 100644
index 0000000000000..34ab84932e7a9
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/avx512vpopcntdqintrin.h
@@ -0,0 +1,70 @@
+/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
+ *------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use  directly; include  instead."
+#endif
+
+#ifndef __AVX512VPOPCNTDQINTRIN_H
+#define __AVX512VPOPCNTDQINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd"   \
+                                                            "q")))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_popcnt_epi64(__A), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
+  return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_popcnt_epi32(__A), (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
+  return _mm512_mask_popcnt_epi32((__m512i)_mm512_setzero_si512(), __U, __A);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/avxintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/avxintrin.h
index cdb7aa4fb6268..dff5897b6bb6d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/avxintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/avxintrin.h
@@ -1458,12 +1458,13 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 /// \brief Computes two dot products in parallel, using the lower and upper
 ///    halves of two [8 x float] vectors as input to the two computations, and
 ///    returning the two dot products in the lower and upper halves of the
-///    [8 x float] result. The immediate integer operand controls which input
-///    elements will contribute to the dot product, and where the final results
-///    are returned. In general, for each dot product, the four corresponding
-///    elements of the input vectors are multiplied; the first two and second
-///    two products are summed, then the two sums are added to form the final
-///    result.
+///    [8 x float] result.
+///
+///    The immediate integer operand controls which input elements will
+///    contribute to the dot product, and where the final results are returned.
+///    In general, for each dot product, the four corresponding elements of the
+///    input vectors are multiplied; the first two and second two products are
+///    summed, then the two sums are added to form the final result.
 ///
 /// \headerfile 
 ///
@@ -1497,15 +1498,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /* Vector shuffle */
 /// \brief Selects 8 float values from the 256-bit operands of [8 x float], as
-///    specified by the immediate value operand. The four selected elements in
-///    each operand are copied to the destination according to the bits
-///    specified in the immediate operand. The selected elements from the first
-///    256-bit operand are copied to bits [63:0] and bits [191:128] of the
-///    destination, and the selected elements from the second 256-bit operand
-///    are copied to bits [127:64] and bits [255:192] of the destination. For
-///    example, if bits [7:0] of the immediate operand contain a value of 0xFF,
-///    the 256-bit destination vector would contain the following values: b[7],
-///    b[7], a[7], a[7], b[3], b[3], a[3], a[3].
+///    specified by the immediate value operand.
+///
+///    The four selected elements in each operand are copied to the destination
+///    according to the bits specified in the immediate operand. The selected
+///    elements from the first 256-bit operand are copied to bits [63:0] and
+///    bits [191:128] of the destination, and the selected elements from the
+///    second 256-bit operand are copied to bits [127:64] and bits [255:192] of
+///    the destination. For example, if bits [7:0] of the immediate operand
+///    contain a value of 0xFF, the 256-bit destination vector would contain the
+///    following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].
 ///
 /// \headerfile 
 ///
@@ -1557,13 +1559,14 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
                                   12 + (((mask) >> 6) & 0x3)); })
 
 /// \brief Selects four double-precision values from the 256-bit operands of
-///    [4 x double], as specified by the immediate value operand. The selected
-///    elements from the first 256-bit operand are copied to bits [63:0] and
-///    bits [191:128] in the destination, and the selected elements from the
-///    second 256-bit operand are copied to bits [127:64] and bits [255:192] in
-///    the destination. For example, if bits [3:0] of the immediate operand
-///    contain a value of 0xF, the 256-bit destination vector would contain the
-///    following values: b[3], a[3], b[1], a[1].
+///    [4 x double], as specified by the immediate value operand.
+///
+///    The selected elements from the first 256-bit operand are copied to bits
+///    [63:0] and bits [191:128] in the destination, and the selected elements
+///    from the second 256-bit operand are copied to bits [127:64] and bits
+///    [255:192] in the destination. For example, if bits [3:0] of the immediate
+///    operand contain a value of 0xF, the 256-bit destination vector would
+///    contain the following values: b[3], a[3], b[1], a[1].
 ///
 /// \headerfile 
 ///
@@ -1641,9 +1644,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding double-precision values of two
 ///    128-bit vectors of [2 x double], using the operation specified by the
-///    immediate integer operand. Returns a [2 x double] vector consisting of
-///    two doubles corresponding to the two comparison results: zero if the
-///    comparison is false, and all 1's if the comparison is true.
+///    immediate integer operand.
+///
+///    Returns a [2 x double] vector consisting of two doubles corresponding to
+///    the two comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
 ///
 /// \headerfile 
 ///
@@ -1699,9 +1704,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding values of two 128-bit vectors of
 ///    [4 x float], using the operation specified by the immediate integer
-///    operand. Returns a [4 x float] vector consisting of four floats
-///    corresponding to the four comparison results: zero if the comparison is
-///    false, and all 1's if the comparison is true.
+///    operand.
+///
+///    Returns a [4 x float] vector consisting of four floats corresponding to
+///    the four comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
 ///
 /// \headerfile 
 ///
@@ -1757,9 +1764,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding double-precision values of two
 ///    256-bit vectors of [4 x double], using the operation specified by the
-///    immediate integer operand. Returns a [4 x double] vector consisting of
-///    four doubles corresponding to the four comparison results: zero if the
-///    comparison is false, and all 1's if the comparison is true.
+///    immediate integer operand.
+///
+///    Returns a [4 x double] vector consisting of four doubles corresponding to
+///    the four comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
 ///
 /// \headerfile 
 ///
@@ -1815,9 +1824,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding values of two 256-bit vectors of
 ///    [8 x float], using the operation specified by the immediate integer
-///    operand. Returns a [8 x float] vector consisting of eight floats
-///    corresponding to the eight comparison results: zero if the comparison is
-///    false, and all 1's if the comparison is true.
+///    operand.
+///
+///    Returns a [8 x float] vector consisting of eight floats corresponding to
+///    the eight comparison results: zero if the comparison is false, and all
+///    1's if the comparison is true.
 ///
 /// \headerfile 
 ///
@@ -1873,8 +1884,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding scalar double-precision values of
 ///    two 128-bit vectors of [2 x double], using the operation specified by the
-///    immediate integer operand. If the result is true, all 64 bits of the
-///    destination vector are set; otherwise they are cleared.
+///    immediate integer operand.
+///
+///    If the result is true, all 64 bits of the destination vector are set;
+///    otherwise they are cleared.
 ///
 /// \headerfile 
 ///
@@ -1930,8 +1943,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding scalar values of two 128-bit
 ///    vectors of [4 x float], using the operation specified by the immediate
-///    integer operand. If the result is true, all 32 bits of the destination
-///    vector are set; otherwise they are cleared.
+///    integer operand.
+///
+///    If the result is true, all 32 bits of the destination vector are set;
+///    otherwise they are cleared.
 ///
 /// \headerfile 
 ///
@@ -2536,7 +2551,9 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b)
 /// \brief Given two 128-bit floating-point vectors of [2 x double], perform an
 ///    element-by-element comparison of the double-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2563,7 +2580,9 @@ _mm_testz_pd(__m128d __a, __m128d __b)
 /// \brief Given two 128-bit floating-point vectors of [2 x double], perform an
 ///    element-by-element comparison of the double-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2590,7 +2609,9 @@ _mm_testc_pd(__m128d __a, __m128d __b)
 /// \brief Given two 128-bit floating-point vectors of [2 x double], perform an
 ///    element-by-element comparison of the double-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2618,7 +2639,9 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
 /// \brief Given two 128-bit floating-point vectors of [4 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2645,7 +2668,9 @@ _mm_testz_ps(__m128 __a, __m128 __b)
 /// \brief Given two 128-bit floating-point vectors of [4 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2672,7 +2697,9 @@ _mm_testc_ps(__m128 __a, __m128 __b)
 /// \brief Given two 128-bit floating-point vectors of [4 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2700,7 +2727,9 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
 /// \brief Given two 256-bit floating-point vectors of [4 x double], perform an
 ///    element-by-element comparison of the double-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2727,7 +2756,9 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
 /// \brief Given two 256-bit floating-point vectors of [4 x double], perform an
 ///    element-by-element comparison of the double-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2754,7 +2785,9 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
 /// \brief Given two 256-bit floating-point vectors of [4 x double], perform an
 ///    element-by-element comparison of the double-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2782,7 +2815,9 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
 /// \brief Given two 256-bit floating-point vectors of [8 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2809,7 +2844,9 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
 /// \brief Given two 256-bit floating-point vectors of [8 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2836,7 +2873,9 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
 /// \brief Given two 256-bit floating-point vectors of [8 x float], perform an
 ///    element-by-element comparison of the single-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2862,7 +2901,9 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
 }
 
 /// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison
-///    of the two source vectors and update the EFLAGS register as follows: \n
+///    of the two source vectors.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of bits where both bits are 1, the ZF flag
 ///    is set to 0. Otherwise the ZF flag is set to 1. \n
 ///    If there is at least one pair of bits where the bit from the first source
@@ -2886,7 +2927,9 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
 }
 
 /// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison
-///    of the two source vectors and update the EFLAGS register as follows: \n
+///    of the two source vectors.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of bits where both bits are 1, the ZF flag
 ///    is set to 0. Otherwise the ZF flag is set to 1. \n
 ///    If there is at least one pair of bits where the bit from the first source
@@ -2910,7 +2953,9 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
 }
 
 /// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison
-///    of the two source vectors and update the EFLAGS register as follows: \n
+///    of the two source vectors.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of bits where both bits are 1, the ZF flag
 ///    is set to 0. Otherwise the ZF flag is set to 1. \n
 ///    If there is at least one pair of bits where the bit from the first source
@@ -3545,7 +3590,8 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_si256(__m256i *__a, __m256i __b)
 {
-  __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a);
+  typedef __v4di __v4di_aligned __attribute__((aligned(32)));
+  __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
 }
 
 /// \brief Moves double-precision values from a 256-bit vector of [4 x double]
@@ -3558,13 +3604,14 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
 ///
 /// \param __a
 ///    A pointer to a 32-byte aligned memory location that will receive the
-///    integer values.
+///    double-precision floating-point values.
 /// \param __b
 ///    A 256-bit vector of [4 x double] containing the values to be moved.
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_pd(double *__a, __m256d __b)
 {
-  __builtin_nontemporal_store((__v4df)__b, (__v4df*)__a);
+  typedef __v4df __v4df_aligned __attribute__((aligned(32)));
+  __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
 }
 
 /// \brief Moves single-precision floating point values from a 256-bit vector
@@ -3584,7 +3631,8 @@ _mm256_stream_pd(double *__a, __m256d __b)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_ps(float *__p, __m256 __a)
 {
-  __builtin_nontemporal_store((__v8sf)__a, (__v8sf*)__p);
+  typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
+  __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
 }
 
 /* Create vectors */
@@ -4466,9 +4514,10 @@ _mm256_castsi256_si128(__m256i __a)
 }
 
 /// \brief Constructs a 256-bit floating-point vector of [4 x double] from a
-///    128-bit floating-point vector of [2 x double]. The lower 128 bits
-///    contain the value of the source vector. The contents of the upper 128
-///    bits are undefined.
+///    128-bit floating-point vector of [2 x double].
+///
+///    The lower 128 bits contain the value of the source vector. The contents
+///    of the upper 128 bits are undefined.
 ///
 /// \headerfile 
 ///
@@ -4486,9 +4535,10 @@ _mm256_castpd128_pd256(__m128d __a)
 }
 
 /// \brief Constructs a 256-bit floating-point vector of [8 x float] from a
-///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
-///    the value of the source vector. The contents of the upper 128 bits are
-///    undefined.
+///    128-bit floating-point vector of [4 x float].
+///
+///    The lower 128 bits contain the value of the source vector. The contents
+///    of the upper 128 bits are undefined.
 ///
 /// \headerfile 
 ///
@@ -4506,6 +4556,7 @@ _mm256_castps128_ps256(__m128 __a)
 }
 
 /// \brief Constructs a 256-bit integer vector from a 128-bit integer vector.
+///
 ///    The lower 128 bits contain the value of the source vector. The contents
 ///    of the upper 128 bits are undefined.
 ///
@@ -4586,8 +4637,10 @@ _mm256_zextsi128_si256(__m128i __a)
 /// \brief Constructs a new 256-bit vector of [8 x float] by first duplicating
 ///    a 256-bit vector of [8 x float] given in the first parameter, and then
 ///    replacing either the upper or the lower 128 bits with the contents of a
-///    128-bit vector of [4 x float] in the second parameter. The immediate
-///    integer parameter determines between the upper or the lower 128 bits.
+///    128-bit vector of [4 x float] in the second parameter.
+///
+///    The immediate integer parameter determines between the upper or the lower
+///    128 bits.
 ///
 /// \headerfile 
 ///
@@ -4631,8 +4684,10 @@ _mm256_zextsi128_si256(__m128i __a)
 /// \brief Constructs a new 256-bit vector of [4 x double] by first duplicating
 ///    a 256-bit vector of [4 x double] given in the first parameter, and then
 ///    replacing either the upper or the lower 128 bits with the contents of a
-///    128-bit vector of [2 x double] in the second parameter. The immediate
-///    integer parameter determines between the upper or the lower 128 bits.
+///    128-bit vector of [2 x double] in the second parameter.
+///
+///    The immediate integer parameter determines between the upper or the lower
+///    128 bits.
 ///
 /// \headerfile 
 ///
@@ -4672,8 +4727,10 @@ _mm256_zextsi128_si256(__m128i __a)
 /// \brief Constructs a new 256-bit integer vector by first duplicating a
 ///    256-bit integer vector given in the first parameter, and then replacing
 ///    either the upper or the lower 128 bits with the contents of a 128-bit
-///    integer vector in the second parameter. The immediate integer parameter
-///    determines between the upper or the lower 128 bits.
+///    integer vector in the second parameter.
+///
+///    The immediate integer parameter determines between the upper or the lower
+///    128 bits.
 ///
 /// \headerfile 
 ///
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/bmiintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/bmiintrin.h
index e590cf8bc1ae6..e812a1632b919 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/bmiintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/bmiintrin.h
@@ -148,7 +148,7 @@ __blsi_u32(unsigned int __X)
 }
 
 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
-///    including the least siginificant bit that is set to 1 in the source
+///    including the least significant bit that is set to 1 in the source
 ///    operand and returns the result.
 ///
 /// \headerfile 
@@ -164,7 +164,7 @@ __blsmsk_u32(unsigned int __X)
   return __X ^ (__X - 1);
 }
 
-/// \brief Clears the least siginificant bit that is set to 1 in the source
+/// \brief Clears the least significant bit that is set to 1 in the source
 ///    operand and returns the result.
 ///
 /// \headerfile 
@@ -309,7 +309,7 @@ __blsi_u64(unsigned long long __X)
 }
 
 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
-///    including the least siginificant bit that is set to 1 in the source
+///    including the least significant bit that is set to 1 in the source
 ///    operand and returns the result.
 ///
 /// \headerfile 
@@ -318,14 +318,14 @@ __blsi_u64(unsigned long long __X)
 ///
 /// \param __X
 ///    An unsigned 64-bit integer used to create the mask.
-/// \returns A unsigned 64-bit integer containing the newly created mask.
+/// \returns An unsigned 64-bit integer containing the newly created mask.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsmsk_u64(unsigned long long __X)
 {
   return __X ^ (__X - 1);
 }
 
-/// \brief Clears the least siginificant bit that is set to 1 in the source
+/// \brief Clears the least significant bit that is set to 1 in the source
 ///    operand and returns the result.
 ///
 /// \headerfile 
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/cpuid.h b/interpreter/llvm/src/tools/clang/lib/Headers/cpuid.h
index 400dcfacd552c..2dd0add236b8b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/cpuid.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/cpuid.h
@@ -79,7 +79,7 @@
 #define signature_VORTEX_edx 0x36387865
 #define signature_VORTEX_ecx 0x436f5320
 
-/* Features in %ecx for level 1 */
+/* Features in %ecx for leaf 1 */
 #define bit_SSE3        0x00000001
 #define bit_PCLMULQDQ   0x00000002
 #define bit_PCLMUL      bit_PCLMULQDQ   /* for gcc compat */
@@ -114,7 +114,7 @@
 #define bit_F16C        0x20000000
 #define bit_RDRND       0x40000000
 
-/* Features in %edx for level 1 */
+/* Features in %edx for leaf 1 */
 #define bit_FPU         0x00000001
 #define bit_VME         0x00000002
 #define bit_DE          0x00000004
@@ -147,44 +147,95 @@
 #define bit_TM          0x20000000
 #define bit_PBE         0x80000000
 
-/* Features in %ebx for level 7 sub-leaf 0 */
+/* Features in %ebx for leaf 7 sub-leaf 0 */
 #define bit_FSGSBASE    0x00000001
+#define bit_SGX         0x00000004
+#define bit_BMI         0x00000008
+#define bit_HLE         0x00000010
+#define bit_AVX2        0x00000020
 #define bit_SMEP        0x00000080
+#define bit_BMI2        0x00000100
 #define bit_ENH_MOVSB   0x00000200
+#define bit_RTM         0x00000800
+#define bit_MPX         0x00004000
+#define bit_AVX512F     0x00010000
+#define bit_AVX512DQ    0x00020000
+#define bit_RDSEED      0x00040000
+#define bit_ADX         0x00080000
+#define bit_AVX512IFMA  0x00200000
+#define bit_CLFLUSHOPT  0x00800000
+#define bit_CLWB        0x01000000
+#define bit_AVX512PF    0x04000000
+#define bit_AVX51SER    0x08000000
+#define bit_AVX512CD    0x10000000
+#define bit_SHA         0x20000000
+#define bit_AVX512BW    0x40000000
+#define bit_AVX512VL    0x80000000
+
+/* Features in %ecx for leaf 7 sub-leaf 0 */
+#define bit_PREFTCHWT1  0x00000001
+#define bit_AVX512VBMI  0x00000002
+#define bit_PKU         0x00000004
+#define bit_OSPKE       0x00000010
+#define bit_AVX512VPOPCNTDQ  0x00004000
+#define bit_RDPID       0x00400000
+
+/* Features in %edx for leaf 7 sub-leaf 0 */
+#define bit_AVX5124VNNIW  0x00000004
+#define bit_AVX5124FMAPS  0x00000008
+
+/* Features in %eax for leaf 13 sub-leaf 1 */
+#define bit_XSAVEOPT    0x00000001
+#define bit_XSAVEC      0x00000002
+#define bit_XSAVES      0x00000008
+
+/* Features in %ecx for leaf 0x80000001 */
+#define bit_LAHF_LM     0x00000001
+#define bit_ABM         0x00000020
+#define bit_SSE4a       0x00000040
+#define bit_PRFCHW      0x00000100
+#define bit_XOP         0x00000800
+#define bit_LWP         0x00008000
+#define bit_FMA4        0x00010000
+#define bit_TBM         0x00200000
+#define bit_MWAITX      0x20000000
+
+/* Features in %edx for leaf 0x80000001 */
+#define bit_MMXEXT      0x00400000
+#define bit_LM          0x20000000
+#define bit_3DNOWP      0x40000000
+#define bit_3DNOW       0x80000000
+
+/* Features in %ebx for leaf 0x80000001 */
+#define bit_CLZERO      0x00000001
+
 
 #if __i386__
-#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
+#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
     __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
-                  : "0"(__level))
+                  : "0"(__leaf))
 
-#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
+#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \
     __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
-                  : "0"(__level), "2"(__count))
+                  : "0"(__leaf), "2"(__count))
 #else
 /* x86-64 uses %rbx as the base register, so preserve it. */
-#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
+#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
     __asm("  xchgq  %%rbx,%q1\n" \
           "  cpuid\n" \
           "  xchgq  %%rbx,%q1" \
         : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
-        : "0"(__level))
+        : "0"(__leaf))
 
-#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
+#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \
     __asm("  xchgq  %%rbx,%q1\n" \
           "  cpuid\n" \
           "  xchgq  %%rbx,%q1" \
         : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
-        : "0"(__level), "2"(__count))
+        : "0"(__leaf), "2"(__count))
 #endif
 
-static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
-                                 unsigned int *__ebx, unsigned int *__ecx,
-                                 unsigned int *__edx) {
-    __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx);
-    return 1;
-}
-
-static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig)
+static __inline int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig)
 {
     unsigned int __eax, __ebx, __ecx, __edx;
 #if __i386__
@@ -208,8 +259,35 @@ static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig)
         return 0;
 #endif
 
-    __cpuid(__level, __eax, __ebx, __ecx, __edx);
+    __cpuid(__leaf, __eax, __ebx, __ecx, __edx);
     if (__sig)
         *__sig = __ebx;
     return __eax;
 }
+
+static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,
+                                 unsigned int *__ebx, unsigned int *__ecx,
+                                 unsigned int *__edx)
+{
+    unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);
+
+    if (__max_leaf == 0 || __max_leaf < __leaf)
+        return 0;
+
+    __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);
+    return 1;
+}
+
+static __inline int __get_cpuid_count (unsigned int __leaf,
+                                       unsigned int __subleaf,
+                                       unsigned int *__eax, unsigned int *__ebx,
+                                       unsigned int *__ecx, unsigned int *__edx)
+{
+    unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);
+
+    if (__max_leaf == 0 || __max_leaf < __leaf)
+        return 0;
+
+    __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
+    return 1;
+}
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/emmintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/emmintrin.h
index 13b0db22ec442..709815cbb4c2d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/emmintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/emmintrin.h
@@ -302,7 +302,7 @@ _mm_min_pd(__m128d __a, __m128d __b)
   return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
 }
 
-/// \brief Compares lower 64-bits double-precision values of both operands, and
+/// \brief Compares lower 64-bit double-precision values of both operands, and
 ///    returns the greater of the pair of values in the lower 64-bits of the
 ///    result. The upper 64 bits of the result are copied from the upper double-
 ///    precision value of the first operand.
@@ -462,8 +462,9 @@ _mm_cmplt_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are less than or equal to those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are less than or equal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -482,8 +483,9 @@ _mm_cmple_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are greater than those in the second operand. Each comparison
-///    yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -502,8 +504,9 @@ _mm_cmpgt_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are greater than or equal to those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are greater than or equal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -522,9 +525,10 @@ _mm_cmpge_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are ordered with respect to those in the second operand. A pair
-///    of double-precision values are "ordered" with respect to each other if
-///    neither value is a NaN. Each comparison yields 0h for false,
+///    operand are ordered with respect to those in the second operand.
+///
+///    A pair of double-precision values are "ordered" with respect to each
+///    other if neither value is a NaN. Each comparison yields 0h for false,
 ///    FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
@@ -544,9 +548,10 @@ _mm_cmpord_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are unordered with respect to those in the second operand. A pair
-///    of double-precision values are "unordered" with respect to each other if
-///    one or both values are NaN. Each comparison yields 0h for false,
+///    operand are unordered with respect to those in the second operand.
+///
+///    A pair of double-precision values are "unordered" with respect to each
+///    other if one or both values are NaN. Each comparison yields 0h for false,
 ///    FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
@@ -567,8 +572,9 @@ _mm_cmpunord_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are unequal to those in the second operand. Each comparison
-///    yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are unequal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -587,8 +593,9 @@ _mm_cmpneq_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are not less than those in the second operand. Each comparison
-///    yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are not less than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -607,8 +614,9 @@ _mm_cmpnlt_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are not less than or equal to those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are not less than or equal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -627,8 +635,9 @@ _mm_cmpnle_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are not greater than those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are not greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -648,6 +657,7 @@ _mm_cmpngt_pd(__m128d __a, __m128d __b)
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are not greater than or equal to those in the second operand.
+///
 ///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
@@ -666,8 +676,9 @@ _mm_cmpnge_pd(__m128d __a, __m128d __b)
 }
 
 /// \brief Compares the lower double-precision floating-point values in each of
-///    the two 128-bit floating-point vectors of [2 x double] for equality. The
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    the two 128-bit floating-point vectors of [2 x double] for equality.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -690,8 +701,9 @@ _mm_cmpeq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -714,8 +726,9 @@ _mm_cmplt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h for
-///    false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -738,8 +751,9 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than the corresponding value
-///    in the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -763,8 +777,9 @@ _mm_cmpgt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h for
-///    false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -788,9 +803,11 @@ _mm_cmpge_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is "ordered" with respect to the
-///    corresponding value in the second parameter. The comparison yields 0h for
-///    false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are
-///    "ordered" with respect to each other if neither value is a NaN.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
+///    double-precision values are "ordered" with respect to each other if
+///    neither value is a NaN.
 ///
 /// \headerfile 
 ///
@@ -813,9 +830,11 @@ _mm_cmpord_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is "unordered" with respect to the
-///    corresponding value in the second parameter. The comparison yields 0h
-///    for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values
-///    are "unordered" with respect to each other if one or both values are NaN.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
+///    double-precision values are "unordered" with respect to each other if one
+///    or both values are NaN.
 ///
 /// \headerfile 
 ///
@@ -839,8 +858,9 @@ _mm_cmpunord_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is unequal to the corresponding value in
-///    the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -863,8 +883,9 @@ _mm_cmpneq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not less than the corresponding
-///    value in the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -887,8 +908,9 @@ _mm_cmpnlt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h
-///    for false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -911,8 +933,9 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not greater than the corresponding
-///    value in the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -936,8 +959,9 @@ _mm_cmpngt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h
-///    for false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -982,7 +1006,9 @@ _mm_comieq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile 
 ///
@@ -1004,8 +1030,9 @@ _mm_comilt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile 
 ///
@@ -1027,7 +1054,9 @@ _mm_comile_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than the corresponding value
-///    in the second parameter. The comparison yields 0 for false, 1 for true.
+///    in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile 
 ///
@@ -1049,8 +1078,9 @@ _mm_comigt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile 
 ///
@@ -1072,7 +1102,9 @@ _mm_comige_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is unequal to the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile 
 ///
@@ -1093,8 +1125,9 @@ _mm_comineq_sd(__m128d __a, __m128d __b)
 
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] for equality. The
-///    comparison yields 0 for false, 1 for true. If either of the two lower
-///    double-precision values is NaN, 1 is returned.
+///    comparison yields 0 for false, 1 for true.
+///
+///    If either of the two lower double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile 
 ///
@@ -1117,8 +1150,10 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true. If
-///    either of the two lower double-precision values is NaN, 1 is returned.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile 
 ///
@@ -1141,9 +1176,10 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true. If either of the two lower double-precision values is
-///    NaN, 1 is returned.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile 
 ///
@@ -1166,8 +1202,10 @@ _mm_ucomile_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than the corresponding value
-///    in the second parameter. The comparison yields 0 for false, 1 for true.
-///    If either of the two lower double-precision values is NaN, 0 is returned.
+///    in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile 
 ///
@@ -1190,9 +1228,10 @@ _mm_ucomigt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true.  If either of the two lower double-precision values
-///    is NaN, 0 is returned.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.  If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile 
 ///
@@ -1215,8 +1254,10 @@ _mm_ucomige_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is unequal to the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true. If
-///    either of the two lower double-precision values is NaN, 0 is returned.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile 
 ///
@@ -1278,8 +1319,9 @@ _mm_cvtps_pd(__m128 __a)
 
 /// \brief Converts the lower two integer elements of a 128-bit vector of
 ///    [4 x i32] into two double-precision floating-point values, returned in a
-///    128-bit vector of [2 x double]. The upper two elements of the input
-///    vector are unused.
+///    128-bit vector of [2 x double].
+///
+///    The upper two elements of the input vector are unused.
 ///
 /// \headerfile 
 ///
@@ -1287,7 +1329,9 @@ _mm_cvtps_pd(__m128 __a)
 ///
 /// \param __a
 ///    A 128-bit integer vector of [4 x i32]. The lower two integer elements are
-///    converted to double-precision values. The upper two elements are unused.
+///    converted to double-precision values.
+///
+///    The upper two elements are unused.
 /// \returns A 128-bit vector of [2 x double] containing the converted values.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtepi32_pd(__m128i __a)
@@ -1409,10 +1453,11 @@ _mm_cvtss_sd(__m128d __a, __m128 __b)
 
 /// \brief Converts the two double-precision floating-point elements of a
 ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
-///    returned in the lower 64 bits of a 128-bit vector of [4 x i32]. If the
-///    result of either conversion is inexact, the result is truncated (rounded
-///    towards zero) regardless of the current MXCSR setting. The upper 64 bits
-///    of the result vector are set to zero.
+///    returned in the lower 64 bits of a 128-bit vector of [4 x i32].
+///
+///    If the result of either conversion is inexact, the result is truncated
+///    (rounded towards zero) regardless of the current MXCSR setting. The upper
+///    64 bits of the result vector are set to zero.
 ///
 /// \headerfile 
 ///
@@ -1466,9 +1511,10 @@ _mm_cvtpd_pi32(__m128d __a)
 
 /// \brief Converts the two double-precision floating-point elements of a
 ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
-///    returned in a 64-bit vector of [2 x i32]. If the result of either
-///    conversion is inexact, the result is truncated (rounded towards zero)
-///    regardless of the current MXCSR setting.
+///    returned in a 64-bit vector of [2 x i32].
+///
+///    If the result of either conversion is inexact, the result is truncated
+///    (rounded towards zero) regardless of the current MXCSR setting.
 ///
 /// \headerfile 
 ///
@@ -1606,7 +1652,7 @@ _mm_loadu_pd(double const *__dp)
 ///
 /// This intrinsic corresponds to the  VMOVQ / MOVQ  instruction.
 ///
-/// \param __dp
+/// \param __a
 ///    A pointer to a 64-bit memory location. The address of the memory
 ///    location does not have to be aligned.
 /// \returns A 128-bit vector of [2 x i64] containing the loaded value.
@@ -1628,7 +1674,7 @@ _mm_loadu_si64(void const *__a)
 /// This intrinsic corresponds to the  VMOVSD / MOVSD  instruction.
 ///
 /// \param __dp
-///    An pointer to a memory location containing a double-precision value.
+///    A pointer to a memory location containing a double-precision value.
 ///    The address of the memory location does not have to be aligned.
 /// \returns A 128-bit vector of [2 x double] containing the loaded value.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -1865,12 +1911,38 @@ _mm_store_sd(double *__dp, __m128d __a)
   ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
 }
 
+/// \brief Moves packed double-precision values from a 128-bit vector of
+///    [2 x double] to a memory location.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the VMOVAPD / MOVAPS instruction.
+///
+/// \param __dp
+///    A pointer to an aligned memory location that can store two
+///    double-precision values.
+/// \param __a
+///    A packed 128-bit vector of [2 x double] containing the values to be
+///    moved.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_pd(double *__dp, __m128d __a)
 {
   *(__m128d*)__dp = __a;
 }
 
+/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
+///    the upper and lower 64 bits of a memory location.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS  instruction.
+///
+/// \param __dp
+///    A pointer to a memory location that can store two double-precision
+///    values.
+/// \param __a
+///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
+///    of the values in \a dp.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
@@ -1980,8 +2052,9 @@ _mm_storel_pd(double *__dp, __m128d __a)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [16 x i8],
 ///    saving the lower 8 bits of each sum in the corresponding element of a
-///    128-bit result vector of [16 x i8]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [16 x i8].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile 
 ///
@@ -2001,8 +2074,9 @@ _mm_add_epi8(__m128i __a, __m128i __b)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [8 x i16],
 ///    saving the lower 16 bits of each sum in the corresponding element of a
-///    128-bit result vector of [8 x i16]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [8 x i16].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile 
 ///
@@ -2022,8 +2096,9 @@ _mm_add_epi16(__m128i __a, __m128i __b)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [4 x i32],
 ///    saving the lower 32 bits of each sum in the corresponding element of a
-///    128-bit result vector of [4 x i32]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [4 x i32].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile 
 ///
@@ -2061,8 +2136,9 @@ _mm_add_si64(__m64 __a, __m64 __b)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [2 x i64],
 ///    saving the lower 64 bits of each sum in the corresponding element of a
-///    128-bit result vector of [2 x i64]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [2 x i64].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile 
 ///
@@ -2208,10 +2284,12 @@ _mm_avg_epu16(__m128i __a, __m128i __b)
 /// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]
 ///    vectors, producing eight intermediate 32-bit signed integer products, and
 ///    adds the consecutive pairs of 32-bit products to form a 128-bit signed
-///    [4 x i32] vector. For example, bits [15:0] of both parameters are
-///    multiplied producing a 32-bit product, bits [31:16] of both parameters
-///    are multiplied producing a 32-bit product, and the sum of those two
-///    products becomes bits [31:0] of the result.
+///    [4 x i32] vector.
+///
+///    For example, bits [15:0] of both parameters are multiplied producing a
+///    32-bit product, bits [31:16] of both parameters are multiplied producing
+///    a 32-bit product, and the sum of those two products becomes bits [31:0]
+///    of the result.
 ///
 /// \headerfile 
 ///
@@ -3146,8 +3224,9 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 16-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are greater than those in the second operand. Each comparison yields 0h
-///    for false, FFFFh for true.
+///    are greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -3166,8 +3245,9 @@ _mm_cmpgt_epi16(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 32-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are greater than those in the second operand. Each comparison yields 0h
-///    for false, FFFFFFFFh for true.
+///    are greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -3186,8 +3266,9 @@ _mm_cmpgt_epi32(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
 ///    integer vectors to determine if the values in the first operand are less
-///    than those in the second operand. Each comparison yields 0h for false,
-///    FFh for true.
+///    than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFh for true.
 ///
 /// \headerfile 
 ///
@@ -3206,8 +3287,9 @@ _mm_cmplt_epi8(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 16-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are less than those in the second operand. Each comparison yields 0h for
-///    false, FFFFh for true.
+///    are less than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -3226,8 +3308,9 @@ _mm_cmplt_epi16(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 32-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are less than those in the second operand. Each comparison yields 0h for
-///    false, FFFFFFFFh for true.
+///    are less than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFh for true.
 ///
 /// \headerfile 
 ///
@@ -3925,10 +4008,11 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
 
 /// \brief Moves bytes selected by the mask from the first operand to the
 ///    specified unaligned memory location. When a mask bit is 1, the
-///    corresponding byte is written, otherwise it is not written. To minimize
-///    caching, the date is flagged as non-temporal (unlikely to be used again
-///    soon). Exception and trap behavior for elements not selected for storage
-///    to memory are implementation dependent.
+///    corresponding byte is written, otherwise it is not written.
+///
+///    To minimize caching, the date is flagged as non-temporal (unlikely to be
+///    used again soon). Exception and trap behavior for elements not selected
+///    for storage to memory are implementation dependent.
 ///
 /// \headerfile 
 ///
@@ -3972,8 +4056,10 @@ _mm_storel_epi64(__m128i *__p, __m128i __a)
 }
 
 /// \brief Stores a 128-bit floating point vector of [2 x double] to a 128-bit
-///    aligned memory location. To minimize caching, the data is flagged as
-///    non-temporal (unlikely to be used again soon).
+///    aligned memory location.
+///
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
+///    used again soon).
 ///
 /// \headerfile 
 ///
@@ -3990,6 +4076,7 @@ _mm_stream_pd(double *__p, __m128d __a)
 }
 
 /// \brief Stores a 128-bit integer vector to a 128-bit aligned memory location.
+///
 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
@@ -4007,8 +4094,9 @@ _mm_stream_si128(__m128i *__p, __m128i __a)
   __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
 }
 
-/// \brief Stores a 32-bit integer value in the specified memory location. To
-///    minimize caching, the data is flagged as non-temporal (unlikely to be
+/// \brief Stores a 32-bit integer value in the specified memory location.
+///
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
 /// \headerfile 
@@ -4026,8 +4114,9 @@ _mm_stream_si32(int *__p, int __a)
 }
 
 #ifdef __x86_64__
-/// \brief Stores a 64-bit integer value in the specified memory location. To
-///    minimize caching, the data is flagged as non-temporal (unlikely to be
+/// \brief Stores a 64-bit integer value in the specified memory location.
+///
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
 /// \headerfile 
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/float.h b/interpreter/llvm/src/tools/clang/lib/Headers/float.h
index 0f453d87cbcbd..502143d4e4813 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/float.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/float.h
@@ -33,6 +33,15 @@
  */
 #if (defined(__APPLE__) || (defined(__MINGW32__) || defined(_MSC_VER))) && \
     __STDC_HOSTED__ && __has_include_next()
+
+/* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level
+ * of #include_next to keep Metrowerks compilers happy. Avoid this
+ * extra indirection.
+ */
+#ifdef __APPLE__
+#define _FLOAT_H_
+#endif
+
 #  include_next 
 
 /* Undefine anything that we'll be redefining below. */
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/immintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/immintrin.h
index 7f91d49fbcec0..c5f25bfcb5c14 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/immintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/immintrin.h
@@ -146,6 +146,10 @@ _mm256_cvtph_ps(__m128i __a)
 #include 
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
+#include 
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
 #include 
 #endif
@@ -208,6 +212,15 @@ _rdrand32_step(unsigned int *__p)
   return __builtin_ia32_rdrand32_step(__p);
 }
 
+#ifdef __x86_64__
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  return __builtin_ia32_rdrand64_step(__p);
+}
+#endif
+#endif /* __RDRND__ */
+
 /* __bit_scan_forward */
 static __inline__ int __attribute__((__always_inline__, __nodebug__))
 _bit_scan_forward(int __A) {
@@ -220,15 +233,6 @@ _bit_scan_reverse(int __A) {
   return 31 - __builtin_clz(__A);
 }
 
-#ifdef __x86_64__
-static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
-_rdrand64_step(unsigned long long *__p)
-{
-  return __builtin_ia32_rdrand64_step(__p);
-}
-#endif
-#endif /* __RDRND__ */
-
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
 #ifdef __x86_64__
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/mmintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/mmintrin.h
index 2b3618398cbfa..4b38d51713d86 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/mmintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/mmintrin.h
@@ -608,10 +608,11 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2)
 
 /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
 ///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
-///    element of the first 64-bit integer vector of [8 x i8]. If an element of
-///    the first vector is less than the corresponding element of the second
-///    vector, the result is saturated to 0. The results are packed into a
-///    64-bit integer vector of [8 x i8].
+///    element of the first 64-bit integer vector of [8 x i8].
+///
+///    If an element of the first vector is less than the corresponding element
+///    of the second vector, the result is saturated to 0. The results are
+///    packed into a 64-bit integer vector of [8 x i8].
 ///
 /// \headerfile 
 ///
@@ -631,10 +632,11 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
 
 /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
 ///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
-///    integer element of the first 64-bit integer vector of [4 x i16]. If an
-///    element of the first vector is less than the corresponding element of the
-///    second vector, the result is saturated to 0. The results are packed into
-///    a 64-bit integer vector of [4 x i16].
+///    integer element of the first 64-bit integer vector of [4 x i16].
+///
+///    If an element of the first vector is less than the corresponding element
+///    of the second vector, the result is saturated to 0. The results are
+///    packed into a 64-bit integer vector of [4 x i16].
 ///
 /// \headerfile 
 ///
@@ -657,9 +659,11 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2)
 ///    element of the second 64-bit integer vector of [4 x i16] and get four
 ///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
 ///    The lower 32 bits of these two sums are packed into a 64-bit integer
-///    vector of [2 x i32]. For example, bits [15:0] of both parameters are
-///    multiplied, bits [31:16] of both parameters are multiplied, and the sum
-///    of both results is written to bits [31:0] of the result.
+///    vector of [2 x i32].
+///
+///    For example, bits [15:0] of both parameters are multiplied, bits [31:16]
+///    of both parameters are multiplied, and the sum of both results is written
+///    to bits [31:0] of the result.
 ///
 /// \headerfile 
 ///
@@ -851,10 +855,11 @@ _mm_slli_si64(__m64 __m, int __count)
 
 /// \brief Right-shifts each 16-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are filled with the sign bit of the initial value of each 16-bit
-///    element. The 16-bit results are packed into a 64-bit integer vector of
-///    [4 x i16].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are filled with the sign bit of the initial value of each
+///    16-bit element. The 16-bit results are packed into a 64-bit integer
+///    vector of [4 x i16].
 ///
 /// \headerfile 
 ///
@@ -874,6 +879,7 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are filled with the sign bit of the initial value of each
 ///    16-bit element. The 16-bit results are packed into a 64-bit integer
 ///    vector of [4 x i16].
@@ -896,10 +902,11 @@ _mm_srai_pi16(__m64 __m, int __count)
 
 /// \brief Right-shifts each 32-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are filled with the sign bit of the initial value of each 32-bit
-///    element. The 32-bit results are packed into a 64-bit integer vector of
-///    [2 x i32].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are filled with the sign bit of the initial value of each
+///    32-bit element. The 32-bit results are packed into a 64-bit integer
+///    vector of [2 x i32].
 ///
 /// \headerfile 
 ///
@@ -919,6 +926,7 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are filled with the sign bit of the initial value of each
 ///    32-bit element. The 32-bit results are packed into a 64-bit integer
 ///    vector of [2 x i32].
@@ -941,9 +949,10 @@ _mm_srai_pi32(__m64 __m, int __count)
 
 /// \brief Right-shifts each 16-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are cleared. The 16-bit results are packed into a 64-bit integer
-///    vector of [4 x i16].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
+///    integer vector of [4 x i16].
 ///
 /// \headerfile 
 ///
@@ -963,6 +972,7 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
 ///    integer vector of [4 x i16].
 ///
@@ -984,9 +994,10 @@ _mm_srli_pi16(__m64 __m, int __count)
 
 /// \brief Right-shifts each 32-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are cleared. The 32-bit results are packed into a 64-bit integer
-///    vector of [2 x i32].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
+///    integer vector of [2 x i32].
 ///
 /// \headerfile 
 ///
@@ -1006,6 +1017,7 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
 ///    integer vector of [2 x i32].
 ///
@@ -1026,8 +1038,9 @@ _mm_srli_pi32(__m64 __m, int __count)
 }
 
 /// \brief Right-shifts the first 64-bit integer parameter by the number of bits
-///    specified by the second 64-bit integer parameter. High-order bits are
-///    cleared.
+///    specified by the second 64-bit integer parameter.
+///
+///    High-order bits are cleared.
 ///
 /// \headerfile 
 ///
@@ -1046,7 +1059,9 @@ _mm_srl_si64(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
 ///    number of bits specified by the second parameter, which is a 32-bit
-///    integer. High-order bits are cleared.
+///    integer.
+///
+///    High-order bits are cleared.
 ///
 /// \headerfile 
 ///
@@ -1140,8 +1155,9 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
 ///    [8 x i8] to determine if the element of the first vector is equal to the
-///    corresponding element of the second vector. The comparison yields 0 for
-///    false, 0xFF for true.
+///    corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFF for true.
 ///
 /// \headerfile 
 ///
@@ -1161,8 +1177,9 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
 ///    [4 x i16] to determine if the element of the first vector is equal to the
-///    corresponding element of the second vector. The comparison yields 0 for
-///    false, 0xFFFF for true.
+///    corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFF for true.
 ///
 /// \headerfile 
 ///
@@ -1182,8 +1199,9 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
 ///    [2 x i32] to determine if the element of the first vector is equal to the
-///    corresponding element of the second vector. The comparison yields 0 for
-///    false, 0xFFFFFFFF for true.
+///    corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFFFFFF for true.
 ///
 /// \headerfile 
 ///
@@ -1203,8 +1221,9 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
 ///    [8 x i8] to determine if the element of the first vector is greater than
-///    the corresponding element of the second vector. The comparison yields 0
-///    for false, 0xFF for true.
+///    the corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFF for true.
 ///
 /// \headerfile 
 ///
@@ -1224,8 +1243,9 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
 ///    [4 x i16] to determine if the element of the first vector is greater than
-///    the corresponding element of the second vector. The comparison yields 0
-///    for false, 0xFFFF for true.
+///    the corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFF for true.
 ///
 /// \headerfile 
 ///
@@ -1245,8 +1265,9 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
 ///    [2 x i32] to determine if the element of the first vector is greater than
-///    the corresponding element of the second vector. The comparison yields 0
-///    for false, 0xFFFFFFFF for true.
+///    the corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFFFFFF for true.
 ///
 /// \headerfile 
 ///
@@ -1268,7 +1289,7 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the the  VXORPS / XORPS  instruction.
+/// This intrinsic corresponds to the  VXORPS / XORPS  instruction.
 ///
 /// \returns An initialized 64-bit integer vector with all elements set to zero.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/pmmintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/pmmintrin.h
index a479d9ed2911f..559ece2e3974f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/pmmintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/pmmintrin.h
@@ -31,9 +31,11 @@
   __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
 
 /// \brief Loads data from an unaligned memory location to elements in a 128-bit
-///    vector. If the address of the data is not 16-byte aligned, the
-///    instruction may read two adjacent aligned blocks of memory to retrieve
-///    the requested data.
+///    vector.
+///
+///    If the address of the data is not 16-byte aligned, the instruction may
+///    read two adjacent aligned blocks of memory to retrieve the requested
+///    data.
 ///
 /// \headerfile 
 ///
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/prfchwintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/prfchwintrin.h
index a3789126ef07e..b52f31da27061 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/prfchwintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/prfchwintrin.h
@@ -50,8 +50,10 @@ _m_prefetch(void *__P)
 ///    the L1 data cache and sets the cache-coherency to modified. This
 ///    provides a hint to the processor that the cache line will be modified.
 ///    It is intended for use when the cache line will be written to shortly
-///    after the prefetch is performed. Note that the effect of this intrinsic
-///    is dependent on the processor implementation.
+///    after the prefetch is performed.
+///
+///    Note that the effect of this intrinsic is dependent on the processor
+///    implementation.
 ///
 /// \headerfile 
 ///
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/smmintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/smmintrin.h
index 1c94aca693819..c2fa5a452bcec 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/smmintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/smmintrin.h
@@ -586,7 +586,9 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
 /* SSE4 Floating Point Dot Product Instructions.  */
 /// \brief Computes the dot product of the two 128-bit vectors of [4 x float]
 ///    and returns it in the elements of the 128-bit result vector of
-///    [4 x float]. The immediate integer operand controls which input elements
+///    [4 x float].
+///
+///    The immediate integer operand controls which input elements
 ///    will contribute to the dot product, and where the final results are
 ///    returned.
 ///
@@ -620,7 +622,9 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
 
 /// \brief Computes the dot product of the two 128-bit vectors of [2 x double]
 ///    and returns it in the elements of the 128-bit result vector of
-///    [2 x double]. The immediate integer operand controls which input
+///    [2 x double].
+///
+///    The immediate integer operand controls which input
 ///    elements will contribute to the dot product, and where the final results
 ///    are returned.
 ///
@@ -875,7 +879,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
 /// int _mm_extract_ps(__m128 X, const int N);
 /// \endcode
 ///
-/// This intrinsic corresponds to the  VEXTRACTPS / EXTRACTPS  
+/// This intrinsic corresponds to the  VEXTRACTPS / EXTRACTPS 
 /// instruction.
 ///
 /// \param X
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/tmmintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/tmmintrin.h
index 80664043a06fb..042bfc7e3b0d1 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/tmmintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/tmmintrin.h
@@ -469,10 +469,11 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
 ///    values contained in the first source operand and packed 8-bit signed
 ///    integer values contained in the second source operand, adds pairs of
 ///    contiguous products with signed saturation, and writes the 16-bit sums to
-///    the corresponding bits in the destination. For example, bits [7:0] of
-///    both operands are multiplied, bits [15:8] of both operands are
-///    multiplied, and the sum of both results is written to bits [15:0] of the
-///    destination.
+///    the corresponding bits in the destination.
+///
+///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
+///    both operands are multiplied, and the sum of both results is written to
+///    bits [15:0] of the destination.
 ///
 /// \headerfile 
 ///
@@ -502,10 +503,11 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
 ///    values contained in the first source operand and packed 8-bit signed
 ///    integer values contained in the second source operand, adds pairs of
 ///    contiguous products with signed saturation, and writes the 16-bit sums to
-///    the corresponding bits in the destination. For example, bits [7:0] of
-///    both operands are multiplied, bits [15:8] of both operands are
-///    multiplied, and the sum of both results is written to bits [15:0] of the
-///    destination.
+///    the corresponding bits in the destination.
+///
+///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
+///    both operands are multiplied, and the sum of both results is written to
+///    bits [15:0] of the destination.
 ///
 /// \headerfile 
 ///
@@ -619,13 +621,14 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b)
 }
 
 /// \brief For each 8-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    byte in the second source is negative, calculate the two's complement of
-///    the corresponding byte in the first source, and write that value to the
-///    destination. If the byte in the second source is positive, copy the
-///    corresponding byte from the first source to the destination. If the byte
-///    in the second source is zero, clear the corresponding byte in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the byte in the second source is negative, calculate the two's
+///    complement of the corresponding byte in the first source, and write that
+///    value to the destination. If the byte in the second source is positive,
+///    copy the corresponding byte from the first source to the destination. If
+///    the byte in the second source is zero, clear the corresponding byte in
+///    the destination.
 ///
 /// \headerfile 
 ///
@@ -644,13 +647,14 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
 }
 
 /// \brief For each 16-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    word in the second source is negative, calculate the two's complement of
-///    the corresponding word in the first source, and write that value to the
-///    destination. If the word in the second source is positive, copy the
-///    corresponding word from the first source to the destination. If the word
-///    in the second source is zero, clear the corresponding word in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the word in the second source is negative, calculate the two's
+///    complement of the corresponding word in the first source, and write that
+///    value to the destination. If the word in the second source is positive,
+///    copy the corresponding word from the first source to the destination. If
+///    the word in the second source is zero, clear the corresponding word in
+///    the destination.
 ///
 /// \headerfile 
 ///
@@ -669,8 +673,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
 }
 
 /// \brief For each 32-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    doubleword in the second source is negative, calculate the two's
+///    the following actions as specified by the second source operand.
+///
+///    If the doubleword in the second source is negative, calculate the two's
 ///    complement of the corresponding word in the first source, and write that
 ///    value to the destination. If the doubleword in the second source is
 ///    positive, copy the corresponding word from the first source to the
@@ -694,13 +699,14 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
 }
 
 /// \brief For each 8-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    byte in the second source is negative, calculate the two's complement of
-///    the corresponding byte in the first source, and write that value to the
-///    destination. If the byte in the second source is positive, copy the
-///    corresponding byte from the first source to the destination. If the byte
-///    in the second source is zero, clear the corresponding byte in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the byte in the second source is negative, calculate the two's
+///    complement of the corresponding byte in the first source, and write that
+///    value to the destination. If the byte in the second source is positive,
+///    copy the corresponding byte from the first source to the destination. If
+///    the byte in the second source is zero, clear the corresponding byte in
+///    the destination.
 ///
 /// \headerfile 
 ///
@@ -719,13 +725,14 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
 }
 
 /// \brief For each 16-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    word in the second source is negative, calculate the two's complement of
-///    the corresponding word in the first source, and write that value to the
-///    destination. If the word in the second source is positive, copy the
-///    corresponding word from the first source to the destination. If the word
-///    in the second source is zero, clear the corresponding word in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the word in the second source is negative, calculate the two's
+///    complement of the corresponding word in the first source, and write that
+///    value to the destination. If the word in the second source is positive,
+///    copy the corresponding word from the first source to the destination. If
+///    the word in the second source is zero, clear the corresponding word in
+///    the destination.
 ///
 /// \headerfile 
 ///
@@ -744,8 +751,9 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
 }
 
 /// \brief For each 32-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    doubleword in the second source is negative, calculate the two's
+///    the following actions as specified by the second source operand.
+///
+///    If the doubleword in the second source is negative, calculate the two's
 ///    complement of the corresponding doubleword in the first source, and
 ///    write that value to the destination. If the doubleword in the second
 ///    source is positive, copy the corresponding doubleword from the first
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/vecintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/vecintrin.h
index ca7acb4731f9e..f7061e88949fb 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/vecintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/vecintrin.h
@@ -116,6 +116,13 @@ vec_extract(vector unsigned long long __vec, int __index) {
   return __vec[__index & 1];
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai float
+vec_extract(vector float __vec, int __index) {
+  return __vec[__index & 3];
+}
+#endif
+
 static inline __ATTRS_o_ai double
 vec_extract(vector double __vec, int __index) {
   return __vec[__index & 1];
@@ -129,6 +136,7 @@ vec_insert(signed char __scalar, vector signed char __vec, int __index) {
   return __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_insert(unsigned char __scalar, vector bool char __vec, int __index) {
   vector unsigned char __newvec = (vector unsigned char)__vec;
@@ -148,6 +156,7 @@ vec_insert(signed short __scalar, vector signed short __vec, int __index) {
   return __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_insert(unsigned short __scalar, vector bool short __vec, int __index) {
   vector unsigned short __newvec = (vector unsigned short)__vec;
@@ -167,6 +176,7 @@ vec_insert(signed int __scalar, vector signed int __vec, int __index) {
   return __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_insert(unsigned int __scalar, vector bool int __vec, int __index) {
   vector unsigned int __newvec = (vector unsigned int)__vec;
@@ -187,6 +197,7 @@ vec_insert(signed long long __scalar, vector signed long long __vec,
   return __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_insert(unsigned long long __scalar, vector bool long long __vec,
            int __index) {
@@ -202,6 +213,14 @@ vec_insert(unsigned long long __scalar, vector unsigned long long __vec,
   return __vec;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_insert(float __scalar, vector float __vec, int __index) {
+  __vec[__index & 1] = __scalar;
+  return __vec;
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_insert(double __scalar, vector double __vec, int __index) {
   __vec[__index & 1] = __scalar;
@@ -282,6 +301,16 @@ vec_promote(unsigned long long __scalar, int __index) {
   return __vec;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_promote(float __scalar, int __index) {
+  const vector float __zero = (vector float)0;
+  vector float __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
+  __vec[__index & 3] = __scalar;
+  return __vec;
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_promote(double __scalar, int __index) {
   const vector double __zero = (vector double)0;
@@ -348,6 +377,15 @@ vec_insert_and_zero(const unsigned long long *__ptr) {
   return __vec;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_insert_and_zero(const float *__ptr) {
+  vector float __vec = (vector float)0;
+  __vec[0] = *__ptr;
+  return __vec;
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_insert_and_zero(const double *__ptr) {
   vector double __vec = (vector double)0;
@@ -441,6 +479,15 @@ vec_perm(vector bool long long __a, vector bool long long __b,
            (vector unsigned char)__a, (vector unsigned char)__b, __c);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_perm(vector float __a, vector float __b,
+         vector unsigned char __c) {
+  return (vector float)__builtin_s390_vperm(
+           (vector unsigned char)__a, (vector unsigned char)__b, __c);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_perm(vector double __a, vector double __b,
          vector unsigned char __c) {
@@ -450,18 +497,22 @@ vec_perm(vector double __a, vector double __b,
 
 /*-- vec_permi --------------------------------------------------------------*/
 
+// This prototype is deprecated.
 extern __ATTRS_o vector signed long long
 vec_permi(vector signed long long __a, vector signed long long __b, int __c)
   __constant_range(__c, 0, 3);
 
+// This prototype is deprecated.
 extern __ATTRS_o vector unsigned long long
 vec_permi(vector unsigned long long __a, vector unsigned long long __b, int __c)
   __constant_range(__c, 0, 3);
 
+// This prototype is deprecated.
 extern __ATTRS_o vector bool long long
 vec_permi(vector bool long long __a, vector bool long long __b, int __c)
   __constant_range(__c, 0, 3);
 
+// This prototype is deprecated.
 extern __ATTRS_o vector double
 vec_permi(vector double __a, vector double __b, int __c)
   __constant_range(__c, 0, 3);
@@ -471,6 +522,15 @@ vec_permi(vector double __a, vector double __b, int __c)
                       (vector unsigned long long)(Y), \
                       (((Z) & 2) << 1) | ((Z) & 1)))
 
+/*-- vec_bperm_u128 ---------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_ai vector unsigned long long
+vec_bperm_u128(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_s390_vbperm(__a, __b);
+}
+#endif
+
 /*-- vec_sel ----------------------------------------------------------------*/
 
 static inline __ATTRS_o_ai vector signed char
@@ -614,6 +674,22 @@ vec_sel(vector unsigned long long __a, vector unsigned long long __b,
           (~(vector unsigned long long)__c & __a));
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_sel(vector float __a, vector float __b, vector unsigned int __c) {
+  return (vector float)((__c & (vector unsigned int)__b) |
+                        (~__c & (vector unsigned int)__a));
+}
+
+static inline __ATTRS_o_ai vector float
+vec_sel(vector float __a, vector float __b, vector bool int __c) {
+  vector unsigned int __ac = (vector unsigned int)__a;
+  vector unsigned int __bc = (vector unsigned int)__b;
+  vector unsigned int __cc = (vector unsigned int)__c;
+  return (vector float)((__cc & __bc) | (~__cc & __ac));
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_sel(vector double __a, vector double __b, vector unsigned long long __c) {
   return (vector double)((__c & (vector unsigned long long)__b) |
@@ -687,6 +763,17 @@ vec_gather_element(vector unsigned long long __vec,
   return __vec;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_gather_element(vector float __vec, vector unsigned int __offset,
+                   const float *__ptr, int __index)
+  __constant_range(__index, 0, 3) {
+  __vec[__index] = *(const float *)(
+    (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]);
+  return __vec;
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_gather_element(vector double __vec, vector unsigned long long __offset,
                    const double *__ptr, int __index)
@@ -749,6 +836,16 @@ vec_scatter_element(vector unsigned long long __vec,
     __vec[__index];
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai void
+vec_scatter_element(vector float __vec, vector unsigned int __offset,
+                    float *__ptr, int __index)
+  __constant_range(__index, 0, 3) {
+  *(float *)((__INTPTR_TYPE__)__ptr + __offset[__index]) =
+    __vec[__index];
+}
+#endif
+
 static inline __ATTRS_o_ai void
 vec_scatter_element(vector double __vec, vector unsigned long long __offset,
                     double *__ptr, int __index)
@@ -757,48 +854,111 @@ vec_scatter_element(vector double __vec, vector unsigned long long __offset,
     __vec[__index];
 }
 
+/*-- vec_xl -----------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai vector signed char
+vec_xl(long __offset, const signed char *__ptr) {
+  return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+static inline __ATTRS_o_ai vector unsigned char
+vec_xl(long __offset, const unsigned char *__ptr) {
+  return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+static inline __ATTRS_o_ai vector signed short
+vec_xl(long __offset, const signed short *__ptr) {
+  return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+static inline __ATTRS_o_ai vector unsigned short
+vec_xl(long __offset, const unsigned short *__ptr) {
+  return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+static inline __ATTRS_o_ai vector signed int
+vec_xl(long __offset, const signed int *__ptr) {
+  return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+static inline __ATTRS_o_ai vector unsigned int
+vec_xl(long __offset, const unsigned int *__ptr) {
+  return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+static inline __ATTRS_o_ai vector signed long long
+vec_xl(long __offset, const signed long long *__ptr) {
+  return *(const vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+static inline __ATTRS_o_ai vector unsigned long long
+vec_xl(long __offset, const unsigned long long *__ptr) {
+  return *(const vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_xl(long __offset, const float *__ptr) {
+  return *(const vector float *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
+vec_xl(long __offset, const double *__ptr) {
+  return *(const vector double *)((__INTPTR_TYPE__)__ptr + __offset);
+}
+
 /*-- vec_xld2 ---------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_xld2(long __offset, const signed char *__ptr) {
   return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_xld2(long __offset, const unsigned char *__ptr) {
   return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_xld2(long __offset, const signed short *__ptr) {
   return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_xld2(long __offset, const unsigned short *__ptr) {
   return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_xld2(long __offset, const signed int *__ptr) {
   return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_xld2(long __offset, const unsigned int *__ptr) {
   return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_xld2(long __offset, const signed long long *__ptr) {
   return *(const vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_xld2(long __offset, const unsigned long long *__ptr) {
   return *(const vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector double
 vec_xld2(long __offset, const double *__ptr) {
   return *(const vector double *)((__INTPTR_TYPE__)__ptr + __offset);
@@ -806,74 +966,145 @@ vec_xld2(long __offset, const double *__ptr) {
 
 /*-- vec_xlw4 ---------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_xlw4(long __offset, const signed char *__ptr) {
   return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_xlw4(long __offset, const unsigned char *__ptr) {
   return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_xlw4(long __offset, const signed short *__ptr) {
   return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_xlw4(long __offset, const unsigned short *__ptr) {
   return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_xlw4(long __offset, const signed int *__ptr) {
   return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_xlw4(long __offset, const unsigned int *__ptr) {
   return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset);
 }
 
+/*-- vec_xst ----------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai void
+vec_xst(vector signed char __vec, long __offset, signed char *__ptr) {
+  *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(vector unsigned char __vec, long __offset, unsigned char *__ptr) {
+  *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(vector signed short __vec, long __offset, signed short *__ptr) {
+  *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(vector unsigned short __vec, long __offset, unsigned short *__ptr) {
+  *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(vector signed int __vec, long __offset, signed int *__ptr) {
+  *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(vector unsigned int __vec, long __offset, unsigned int *__ptr) {
+  *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(vector signed long long __vec, long __offset,
+          signed long long *__ptr) {
+  *(vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(vector unsigned long long __vec, long __offset,
+          unsigned long long *__ptr) {
+  *(vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset) =
+    __vec;
+}
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai void
+vec_xst(vector float __vec, long __offset, float *__ptr) {
+  *(vector float *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+#endif
+
+static inline __ATTRS_o_ai void
+vec_xst(vector double __vec, long __offset, double *__ptr) {
+  *(vector double *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
+}
+
 /*-- vec_xstd2 --------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector signed char __vec, long __offset, signed char *__ptr) {
   *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector unsigned char __vec, long __offset, unsigned char *__ptr) {
   *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector signed short __vec, long __offset, signed short *__ptr) {
   *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector unsigned short __vec, long __offset, unsigned short *__ptr) {
   *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector signed int __vec, long __offset, signed int *__ptr) {
   *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector unsigned int __vec, long __offset, unsigned int *__ptr) {
   *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector signed long long __vec, long __offset,
           signed long long *__ptr) {
   *(vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector unsigned long long __vec, long __offset,
           unsigned long long *__ptr) {
@@ -881,6 +1112,7 @@ vec_xstd2(vector unsigned long long __vec, long __offset,
     __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstd2(vector double __vec, long __offset, double *__ptr) {
   *(vector double *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
@@ -888,31 +1120,37 @@ vec_xstd2(vector double __vec, long __offset, double *__ptr) {
 
 /*-- vec_xstw4 --------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstw4(vector signed char __vec, long __offset, signed char *__ptr) {
   *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstw4(vector unsigned char __vec, long __offset, unsigned char *__ptr) {
   *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstw4(vector signed short __vec, long __offset, signed short *__ptr) {
   *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstw4(vector unsigned short __vec, long __offset, unsigned short *__ptr) {
   *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstw4(vector signed int __vec, long __offset, signed int *__ptr) {
   *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai void
 vec_xstw4(vector unsigned int __vec, long __offset, unsigned int *__ptr) {
   *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec;
@@ -952,6 +1190,12 @@ extern __ATTRS_o vector unsigned long long
 vec_load_bndry(const unsigned long long *__ptr, unsigned short __len)
   __constant_pow2_range(__len, 64, 4096);
 
+#if __ARCH__ >= 12
+extern __ATTRS_o vector float
+vec_load_bndry(const float *__ptr, unsigned short __len)
+  __constant_pow2_range(__len, 64, 4096);
+#endif
+
 extern __ATTRS_o vector double
 vec_load_bndry(const double *__ptr, unsigned short __len)
   __constant_pow2_range(__len, 64, 4096);
@@ -1007,11 +1251,27 @@ vec_load_len(const unsigned long long *__ptr, unsigned int __len) {
   return (vector unsigned long long)__builtin_s390_vll(__len, __ptr);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_load_len(const float *__ptr, unsigned int __len) {
+  return (vector float)__builtin_s390_vll(__len, __ptr);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_load_len(const double *__ptr, unsigned int __len) {
   return (vector double)__builtin_s390_vll(__len, __ptr);
 }
 
+/*-- vec_load_len_r ---------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_ai vector unsigned char
+vec_load_len_r(const unsigned char *__ptr, unsigned int __len) {
+  return (vector unsigned char)__builtin_s390_vlrl(__len, __ptr);
+}
+#endif
+
 /*-- vec_store_len ----------------------------------------------------------*/
 
 static inline __ATTRS_o_ai void
@@ -1062,12 +1322,30 @@ vec_store_len(vector unsigned long long __vec, unsigned long long *__ptr,
   __builtin_s390_vstl((vector signed char)__vec, __len, __ptr);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai void
+vec_store_len(vector float __vec, float *__ptr,
+              unsigned int __len) {
+  __builtin_s390_vstl((vector signed char)__vec, __len, __ptr);
+}
+#endif
+
 static inline __ATTRS_o_ai void
 vec_store_len(vector double __vec, double *__ptr,
               unsigned int __len) {
   __builtin_s390_vstl((vector signed char)__vec, __len, __ptr);
 }
 
+/*-- vec_store_len_r --------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_ai void
+vec_store_len_r(vector unsigned char __vec, unsigned char *__ptr,
+                unsigned int __len) {
+  __builtin_s390_vstrl((vector signed char)__vec, __len, __ptr);
+}
+#endif
+
 /*-- vec_load_pair ----------------------------------------------------------*/
 
 static inline __ATTRS_o_ai vector signed long long
@@ -1232,6 +1510,14 @@ vec_splat(vector unsigned long long __vec, int __index)
   return (vector unsigned long long)__vec[__index];
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_splat(vector float __vec, int __index)
+  __constant_range(__index, 0, 3) {
+  return (vector float)__vec[__index];
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_splat(vector double __vec, int __index)
   __constant_range(__index, 0, 1) {
@@ -1332,6 +1618,13 @@ vec_splats(unsigned long long __scalar) {
   return (vector unsigned long long)__scalar;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_splats(float __scalar) {
+  return (vector float)__scalar;
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_splats(double __scalar) {
   return (vector double)__scalar;
@@ -1425,6 +1718,13 @@ vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)(__a[0], __b[0]);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_mergeh(vector float __a, vector float __b) {
+  return (vector float)(__a[0], __b[0], __a[1], __b[1]);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_mergeh(vector double __a, vector double __b) {
   return (vector double)(__a[0], __b[0]);
@@ -1501,6 +1801,13 @@ vec_mergel(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)(__a[1], __b[1]);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_mergel(vector float __a, vector float __b) {
+  return (vector float)(__a[2], __b[2], __a[3], __b[3]);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_mergel(vector double __a, vector double __b) {
   return (vector double)(__a[1], __b[1]);
@@ -1866,6 +2173,13 @@ vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)(__a == __b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool int
+vec_cmpeq(vector float __a, vector float __b) {
+  return (vector bool int)(__a == __b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector bool long long
 vec_cmpeq(vector double __a, vector double __b) {
   return (vector bool long long)(__a == __b);
@@ -1913,6 +2227,13 @@ vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)(__a >= __b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool int
+vec_cmpge(vector float __a, vector float __b) {
+  return (vector bool int)(__a >= __b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector bool long long
 vec_cmpge(vector double __a, vector double __b) {
   return (vector bool long long)(__a >= __b);
@@ -1960,6 +2281,13 @@ vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)(__a > __b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool int
+vec_cmpgt(vector float __a, vector float __b) {
+  return (vector bool int)(__a > __b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector bool long long
 vec_cmpgt(vector double __a, vector double __b) {
   return (vector bool long long)(__a > __b);
@@ -2007,6 +2335,13 @@ vec_cmple(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)(__a <= __b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool int
+vec_cmple(vector float __a, vector float __b) {
+  return (vector bool int)(__a <= __b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector bool long long
 vec_cmple(vector double __a, vector double __b) {
   return (vector bool long long)(__a <= __b);
@@ -2054,6 +2389,13 @@ vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)(__a < __b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool int
+vec_cmplt(vector float __a, vector float __b) {
+  return (vector bool int)(__a < __b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector bool long long
 vec_cmplt(vector double __a, vector double __b) {
   return (vector bool long long)(__a < __b);
@@ -2068,6 +2410,7 @@ vec_all_eq(vector signed char __a, vector signed char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -2075,6 +2418,7 @@ vec_all_eq(vector signed char __a, vector bool char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -2090,6 +2434,7 @@ vec_all_eq(vector unsigned char __a, vector unsigned char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -2098,6 +2443,7 @@ vec_all_eq(vector unsigned char __a, vector bool char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -2121,6 +2467,7 @@ vec_all_eq(vector signed short __a, vector signed short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -2128,6 +2475,7 @@ vec_all_eq(vector signed short __a, vector bool short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -2143,6 +2491,7 @@ vec_all_eq(vector unsigned short __a, vector unsigned short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -2151,6 +2500,7 @@ vec_all_eq(vector unsigned short __a, vector bool short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -2174,6 +2524,7 @@ vec_all_eq(vector signed int __a, vector signed int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -2181,6 +2532,7 @@ vec_all_eq(vector signed int __a, vector bool int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -2196,6 +2548,7 @@ vec_all_eq(vector unsigned int __a, vector unsigned int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -2204,6 +2557,7 @@ vec_all_eq(vector unsigned int __a, vector bool int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -2227,6 +2581,7 @@ vec_all_eq(vector signed long long __a, vector signed long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -2234,6 +2589,7 @@ vec_all_eq(vector signed long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -2249,6 +2605,7 @@ vec_all_eq(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -2257,6 +2614,7 @@ vec_all_eq(vector unsigned long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_eq(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -2273,6 +2631,15 @@ vec_all_eq(vector bool long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_eq(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfcesbs(__a, __b, &__cc);
+  return __cc == 0;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_all_eq(vector double __a, vector double __b) {
   int __cc;
@@ -2289,6 +2656,7 @@ vec_all_ne(vector signed char __a, vector signed char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -2296,6 +2664,7 @@ vec_all_ne(vector signed char __a, vector bool char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -2311,6 +2680,7 @@ vec_all_ne(vector unsigned char __a, vector unsigned char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -2319,6 +2689,7 @@ vec_all_ne(vector unsigned char __a, vector bool char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -2342,6 +2713,7 @@ vec_all_ne(vector signed short __a, vector signed short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -2349,6 +2721,7 @@ vec_all_ne(vector signed short __a, vector bool short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -2364,6 +2737,7 @@ vec_all_ne(vector unsigned short __a, vector unsigned short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -2372,6 +2746,7 @@ vec_all_ne(vector unsigned short __a, vector bool short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -2395,6 +2770,7 @@ vec_all_ne(vector signed int __a, vector signed int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -2402,6 +2778,7 @@ vec_all_ne(vector signed int __a, vector bool int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -2417,6 +2794,7 @@ vec_all_ne(vector unsigned int __a, vector unsigned int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -2425,6 +2803,7 @@ vec_all_ne(vector unsigned int __a, vector bool int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -2448,6 +2827,7 @@ vec_all_ne(vector signed long long __a, vector signed long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -2455,6 +2835,7 @@ vec_all_ne(vector signed long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -2470,6 +2851,7 @@ vec_all_ne(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -2478,6 +2860,7 @@ vec_all_ne(vector unsigned long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ne(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -2494,6 +2877,15 @@ vec_all_ne(vector bool long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_ne(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfcesbs(__a, __b, &__cc);
+  return __cc == 3;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_all_ne(vector double __a, vector double __b) {
   int __cc;
@@ -2510,6 +2902,7 @@ vec_all_ge(vector signed char __a, vector signed char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -2517,6 +2910,7 @@ vec_all_ge(vector signed char __a, vector bool char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -2531,6 +2925,7 @@ vec_all_ge(vector unsigned char __a, vector unsigned char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -2538,6 +2933,7 @@ vec_all_ge(vector unsigned char __a, vector bool char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -2545,6 +2941,7 @@ vec_all_ge(vector bool char __a, vector unsigned char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -2560,6 +2957,7 @@ vec_all_ge(vector signed short __a, vector signed short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -2567,6 +2965,7 @@ vec_all_ge(vector signed short __a, vector bool short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -2581,6 +2980,7 @@ vec_all_ge(vector unsigned short __a, vector unsigned short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -2588,6 +2988,7 @@ vec_all_ge(vector unsigned short __a, vector bool short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -2595,6 +2996,7 @@ vec_all_ge(vector bool short __a, vector unsigned short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -2610,6 +3012,7 @@ vec_all_ge(vector signed int __a, vector signed int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -2617,6 +3020,7 @@ vec_all_ge(vector signed int __a, vector bool int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -2631,6 +3035,7 @@ vec_all_ge(vector unsigned int __a, vector unsigned int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -2638,6 +3043,7 @@ vec_all_ge(vector unsigned int __a, vector bool int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -2645,6 +3051,7 @@ vec_all_ge(vector bool int __a, vector unsigned int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -2660,6 +3067,7 @@ vec_all_ge(vector signed long long __a, vector signed long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -2667,6 +3075,7 @@ vec_all_ge(vector signed long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -2681,6 +3090,7 @@ vec_all_ge(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -2688,6 +3098,7 @@ vec_all_ge(vector unsigned long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -2695,6 +3106,7 @@ vec_all_ge(vector bool long long __a, vector unsigned long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_ge(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -2703,6 +3115,15 @@ vec_all_ge(vector bool long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_ge(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__a, __b, &__cc);
+  return __cc == 0;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_all_ge(vector double __a, vector double __b) {
   int __cc;
@@ -2719,6 +3140,7 @@ vec_all_gt(vector signed char __a, vector signed char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -2726,6 +3148,7 @@ vec_all_gt(vector signed char __a, vector bool char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -2740,6 +3163,7 @@ vec_all_gt(vector unsigned char __a, vector unsigned char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -2747,6 +3171,7 @@ vec_all_gt(vector unsigned char __a, vector bool char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -2754,6 +3179,7 @@ vec_all_gt(vector bool char __a, vector unsigned char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -2769,6 +3195,7 @@ vec_all_gt(vector signed short __a, vector signed short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -2776,6 +3203,7 @@ vec_all_gt(vector signed short __a, vector bool short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -2790,6 +3218,7 @@ vec_all_gt(vector unsigned short __a, vector unsigned short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -2797,6 +3226,7 @@ vec_all_gt(vector unsigned short __a, vector bool short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -2804,6 +3234,7 @@ vec_all_gt(vector bool short __a, vector unsigned short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -2819,6 +3250,7 @@ vec_all_gt(vector signed int __a, vector signed int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -2826,6 +3258,7 @@ vec_all_gt(vector signed int __a, vector bool int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -2840,6 +3273,7 @@ vec_all_gt(vector unsigned int __a, vector unsigned int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -2847,6 +3281,7 @@ vec_all_gt(vector unsigned int __a, vector bool int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -2854,6 +3289,7 @@ vec_all_gt(vector bool int __a, vector unsigned int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -2869,6 +3305,7 @@ vec_all_gt(vector signed long long __a, vector signed long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -2876,6 +3313,7 @@ vec_all_gt(vector signed long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -2890,6 +3328,7 @@ vec_all_gt(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -2897,6 +3336,7 @@ vec_all_gt(vector unsigned long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -2904,6 +3344,7 @@ vec_all_gt(vector bool long long __a, vector unsigned long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_gt(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -2912,6 +3353,15 @@ vec_all_gt(vector bool long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_gt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__a, __b, &__cc);
+  return __cc == 0;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_all_gt(vector double __a, vector double __b) {
   int __cc;
@@ -2928,6 +3378,7 @@ vec_all_le(vector signed char __a, vector signed char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -2935,6 +3386,7 @@ vec_all_le(vector signed char __a, vector bool char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -2949,6 +3401,7 @@ vec_all_le(vector unsigned char __a, vector unsigned char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -2956,6 +3409,7 @@ vec_all_le(vector unsigned char __a, vector bool char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -2963,6 +3417,7 @@ vec_all_le(vector bool char __a, vector unsigned char __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -2978,6 +3433,7 @@ vec_all_le(vector signed short __a, vector signed short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -2985,6 +3441,7 @@ vec_all_le(vector signed short __a, vector bool short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -2999,6 +3456,7 @@ vec_all_le(vector unsigned short __a, vector unsigned short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -3006,6 +3464,7 @@ vec_all_le(vector unsigned short __a, vector bool short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -3013,6 +3472,7 @@ vec_all_le(vector bool short __a, vector unsigned short __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -3028,6 +3488,7 @@ vec_all_le(vector signed int __a, vector signed int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -3035,6 +3496,7 @@ vec_all_le(vector signed int __a, vector bool int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -3049,6 +3511,7 @@ vec_all_le(vector unsigned int __a, vector unsigned int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -3056,6 +3519,7 @@ vec_all_le(vector unsigned int __a, vector bool int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -3063,6 +3527,7 @@ vec_all_le(vector bool int __a, vector unsigned int __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -3078,6 +3543,7 @@ vec_all_le(vector signed long long __a, vector signed long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -3085,6 +3551,7 @@ vec_all_le(vector signed long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -3099,6 +3566,7 @@ vec_all_le(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -3106,6 +3574,7 @@ vec_all_le(vector unsigned long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -3113,6 +3582,7 @@ vec_all_le(vector bool long long __a, vector unsigned long long __b) {
   return __cc == 3;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_le(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -3121,6 +3591,15 @@ vec_all_le(vector bool long long __a, vector bool long long __b) {
   return __cc == 3;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_le(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__b, __a, &__cc);
+  return __cc == 0;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_all_le(vector double __a, vector double __b) {
   int __cc;
@@ -3137,6 +3616,7 @@ vec_all_lt(vector signed char __a, vector signed char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -3144,6 +3624,7 @@ vec_all_lt(vector signed char __a, vector bool char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -3158,6 +3639,7 @@ vec_all_lt(vector unsigned char __a, vector unsigned char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -3165,6 +3647,7 @@ vec_all_lt(vector unsigned char __a, vector bool char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -3172,6 +3655,7 @@ vec_all_lt(vector bool char __a, vector unsigned char __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -3187,6 +3671,7 @@ vec_all_lt(vector signed short __a, vector signed short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -3194,6 +3679,7 @@ vec_all_lt(vector signed short __a, vector bool short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -3208,6 +3694,7 @@ vec_all_lt(vector unsigned short __a, vector unsigned short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -3215,6 +3702,7 @@ vec_all_lt(vector unsigned short __a, vector bool short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -3222,6 +3710,7 @@ vec_all_lt(vector bool short __a, vector unsigned short __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -3237,6 +3726,7 @@ vec_all_lt(vector signed int __a, vector signed int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -3244,6 +3734,7 @@ vec_all_lt(vector signed int __a, vector bool int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -3258,6 +3749,7 @@ vec_all_lt(vector unsigned int __a, vector unsigned int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -3265,6 +3757,7 @@ vec_all_lt(vector unsigned int __a, vector bool int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -3272,6 +3765,7 @@ vec_all_lt(vector bool int __a, vector unsigned int __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -3287,6 +3781,7 @@ vec_all_lt(vector signed long long __a, vector signed long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -3294,6 +3789,7 @@ vec_all_lt(vector signed long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -3308,6 +3804,7 @@ vec_all_lt(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -3315,6 +3812,7 @@ vec_all_lt(vector unsigned long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -3322,6 +3820,7 @@ vec_all_lt(vector bool long long __a, vector unsigned long long __b) {
   return __cc == 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_all_lt(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -3330,6 +3829,15 @@ vec_all_lt(vector bool long long __a, vector bool long long __b) {
   return __cc == 0;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_lt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__b, __a, &__cc);
+  return __cc == 0;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_all_lt(vector double __a, vector double __b) {
   int __cc;
@@ -3339,7 +3847,16 @@ vec_all_lt(vector double __a, vector double __b) {
 
 /*-- vec_all_nge ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_nge(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__a, __b, &__cc);
+  return __cc == 3;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_all_nge(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchedbs(__a, __b, &__cc);
@@ -3348,7 +3865,16 @@ vec_all_nge(vector double __a, vector double __b) {
 
 /*-- vec_all_ngt ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_ngt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__a, __b, &__cc);
+  return __cc == 3;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_all_ngt(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchdbs(__a, __b, &__cc);
@@ -3357,7 +3883,16 @@ vec_all_ngt(vector double __a, vector double __b) {
 
 /*-- vec_all_nle ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_nle(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__b, __a, &__cc);
+  return __cc == 3;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_all_nle(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchedbs(__b, __a, &__cc);
@@ -3366,7 +3901,16 @@ vec_all_nle(vector double __a, vector double __b) {
 
 /*-- vec_all_nlt ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_nlt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__b, __a, &__cc);
+  return __cc == 3;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_all_nlt(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchdbs(__b, __a, &__cc);
@@ -3375,7 +3919,16 @@ vec_all_nlt(vector double __a, vector double __b) {
 
 /*-- vec_all_nan ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_nan(vector float __a) {
+  int __cc;
+  __builtin_s390_vftcisb(__a, 15, &__cc);
+  return __cc == 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_all_nan(vector double __a) {
   int __cc;
   __builtin_s390_vftcidb(__a, 15, &__cc);
@@ -3384,7 +3937,16 @@ vec_all_nan(vector double __a) {
 
 /*-- vec_all_numeric --------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_all_numeric(vector float __a) {
+  int __cc;
+  __builtin_s390_vftcisb(__a, 15, &__cc);
+  return __cc == 3;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_all_numeric(vector double __a) {
   int __cc;
   __builtin_s390_vftcidb(__a, 15, &__cc);
@@ -3400,6 +3962,7 @@ vec_any_eq(vector signed char __a, vector signed char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -3407,6 +3970,7 @@ vec_any_eq(vector signed char __a, vector bool char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -3422,6 +3986,7 @@ vec_any_eq(vector unsigned char __a, vector unsigned char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -3430,6 +3995,7 @@ vec_any_eq(vector unsigned char __a, vector bool char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -3453,6 +4019,7 @@ vec_any_eq(vector signed short __a, vector signed short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -3460,6 +4027,7 @@ vec_any_eq(vector signed short __a, vector bool short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -3475,6 +4043,7 @@ vec_any_eq(vector unsigned short __a, vector unsigned short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -3483,6 +4052,7 @@ vec_any_eq(vector unsigned short __a, vector bool short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -3506,6 +4076,7 @@ vec_any_eq(vector signed int __a, vector signed int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -3513,6 +4084,7 @@ vec_any_eq(vector signed int __a, vector bool int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -3528,6 +4100,7 @@ vec_any_eq(vector unsigned int __a, vector unsigned int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -3536,6 +4109,7 @@ vec_any_eq(vector unsigned int __a, vector bool int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -3559,6 +4133,7 @@ vec_any_eq(vector signed long long __a, vector signed long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -3566,6 +4141,7 @@ vec_any_eq(vector signed long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -3581,6 +4157,7 @@ vec_any_eq(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -3589,6 +4166,7 @@ vec_any_eq(vector unsigned long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_eq(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -3605,6 +4183,15 @@ vec_any_eq(vector bool long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_eq(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfcesbs(__a, __b, &__cc);
+  return __cc <= 1;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_any_eq(vector double __a, vector double __b) {
   int __cc;
@@ -3621,6 +4208,7 @@ vec_any_ne(vector signed char __a, vector signed char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -3628,6 +4216,7 @@ vec_any_ne(vector signed char __a, vector bool char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -3643,6 +4232,7 @@ vec_any_ne(vector unsigned char __a, vector unsigned char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -3651,6 +4241,7 @@ vec_any_ne(vector unsigned char __a, vector bool char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -3674,6 +4265,7 @@ vec_any_ne(vector signed short __a, vector signed short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -3681,6 +4273,7 @@ vec_any_ne(vector signed short __a, vector bool short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -3696,6 +4289,7 @@ vec_any_ne(vector unsigned short __a, vector unsigned short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -3704,6 +4298,7 @@ vec_any_ne(vector unsigned short __a, vector bool short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -3727,6 +4322,7 @@ vec_any_ne(vector signed int __a, vector signed int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -3734,6 +4330,7 @@ vec_any_ne(vector signed int __a, vector bool int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -3749,6 +4346,7 @@ vec_any_ne(vector unsigned int __a, vector unsigned int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -3757,6 +4355,7 @@ vec_any_ne(vector unsigned int __a, vector bool int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -3780,6 +4379,7 @@ vec_any_ne(vector signed long long __a, vector signed long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -3787,6 +4387,7 @@ vec_any_ne(vector signed long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -3802,6 +4403,7 @@ vec_any_ne(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -3810,6 +4412,7 @@ vec_any_ne(vector unsigned long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ne(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -3826,6 +4429,15 @@ vec_any_ne(vector bool long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_ne(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfcesbs(__a, __b, &__cc);
+  return __cc != 0;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_any_ne(vector double __a, vector double __b) {
   int __cc;
@@ -3842,6 +4454,7 @@ vec_any_ge(vector signed char __a, vector signed char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -3849,6 +4462,7 @@ vec_any_ge(vector signed char __a, vector bool char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -3863,6 +4477,7 @@ vec_any_ge(vector unsigned char __a, vector unsigned char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -3870,6 +4485,7 @@ vec_any_ge(vector unsigned char __a, vector bool char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -3877,6 +4493,7 @@ vec_any_ge(vector bool char __a, vector unsigned char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -3892,6 +4509,7 @@ vec_any_ge(vector signed short __a, vector signed short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -3899,6 +4517,7 @@ vec_any_ge(vector signed short __a, vector bool short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -3913,6 +4532,7 @@ vec_any_ge(vector unsigned short __a, vector unsigned short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -3920,6 +4540,7 @@ vec_any_ge(vector unsigned short __a, vector bool short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -3927,6 +4548,7 @@ vec_any_ge(vector bool short __a, vector unsigned short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -3942,6 +4564,7 @@ vec_any_ge(vector signed int __a, vector signed int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -3949,6 +4572,7 @@ vec_any_ge(vector signed int __a, vector bool int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -3963,6 +4587,7 @@ vec_any_ge(vector unsigned int __a, vector unsigned int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -3970,6 +4595,7 @@ vec_any_ge(vector unsigned int __a, vector bool int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -3977,6 +4603,7 @@ vec_any_ge(vector bool int __a, vector unsigned int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -3992,6 +4619,7 @@ vec_any_ge(vector signed long long __a, vector signed long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -3999,6 +4627,7 @@ vec_any_ge(vector signed long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -4013,6 +4642,7 @@ vec_any_ge(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -4020,6 +4650,7 @@ vec_any_ge(vector unsigned long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -4027,6 +4658,7 @@ vec_any_ge(vector bool long long __a, vector unsigned long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_ge(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -4035,6 +4667,15 @@ vec_any_ge(vector bool long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_ge(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__a, __b, &__cc);
+  return __cc <= 1;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_any_ge(vector double __a, vector double __b) {
   int __cc;
@@ -4051,6 +4692,7 @@ vec_any_gt(vector signed char __a, vector signed char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -4058,6 +4700,7 @@ vec_any_gt(vector signed char __a, vector bool char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -4072,6 +4715,7 @@ vec_any_gt(vector unsigned char __a, vector unsigned char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -4079,6 +4723,7 @@ vec_any_gt(vector unsigned char __a, vector bool char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -4086,6 +4731,7 @@ vec_any_gt(vector bool char __a, vector unsigned char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -4101,6 +4747,7 @@ vec_any_gt(vector signed short __a, vector signed short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -4108,6 +4755,7 @@ vec_any_gt(vector signed short __a, vector bool short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -4122,6 +4770,7 @@ vec_any_gt(vector unsigned short __a, vector unsigned short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -4129,6 +4778,7 @@ vec_any_gt(vector unsigned short __a, vector bool short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -4136,6 +4786,7 @@ vec_any_gt(vector bool short __a, vector unsigned short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -4151,6 +4802,7 @@ vec_any_gt(vector signed int __a, vector signed int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -4158,6 +4810,7 @@ vec_any_gt(vector signed int __a, vector bool int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -4172,6 +4825,7 @@ vec_any_gt(vector unsigned int __a, vector unsigned int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -4179,6 +4833,7 @@ vec_any_gt(vector unsigned int __a, vector bool int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -4186,6 +4841,7 @@ vec_any_gt(vector bool int __a, vector unsigned int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -4201,6 +4857,7 @@ vec_any_gt(vector signed long long __a, vector signed long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -4208,6 +4865,7 @@ vec_any_gt(vector signed long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -4222,6 +4880,7 @@ vec_any_gt(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -4229,6 +4888,7 @@ vec_any_gt(vector unsigned long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -4236,6 +4896,7 @@ vec_any_gt(vector bool long long __a, vector unsigned long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_gt(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -4244,6 +4905,15 @@ vec_any_gt(vector bool long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_gt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__a, __b, &__cc);
+  return __cc <= 1;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_any_gt(vector double __a, vector double __b) {
   int __cc;
@@ -4260,6 +4930,7 @@ vec_any_le(vector signed char __a, vector signed char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -4267,6 +4938,7 @@ vec_any_le(vector signed char __a, vector bool char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -4281,6 +4953,7 @@ vec_any_le(vector unsigned char __a, vector unsigned char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -4288,6 +4961,7 @@ vec_any_le(vector unsigned char __a, vector bool char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -4295,6 +4969,7 @@ vec_any_le(vector bool char __a, vector unsigned char __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -4310,6 +4985,7 @@ vec_any_le(vector signed short __a, vector signed short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -4317,6 +4993,7 @@ vec_any_le(vector signed short __a, vector bool short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -4331,6 +5008,7 @@ vec_any_le(vector unsigned short __a, vector unsigned short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -4338,6 +5016,7 @@ vec_any_le(vector unsigned short __a, vector bool short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -4345,6 +5024,7 @@ vec_any_le(vector bool short __a, vector unsigned short __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -4360,6 +5040,7 @@ vec_any_le(vector signed int __a, vector signed int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -4367,6 +5048,7 @@ vec_any_le(vector signed int __a, vector bool int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -4381,6 +5063,7 @@ vec_any_le(vector unsigned int __a, vector unsigned int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -4388,6 +5071,7 @@ vec_any_le(vector unsigned int __a, vector bool int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -4395,6 +5079,7 @@ vec_any_le(vector bool int __a, vector unsigned int __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -4410,6 +5095,7 @@ vec_any_le(vector signed long long __a, vector signed long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -4417,6 +5103,7 @@ vec_any_le(vector signed long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -4431,6 +5118,7 @@ vec_any_le(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -4438,6 +5126,7 @@ vec_any_le(vector unsigned long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -4445,6 +5134,7 @@ vec_any_le(vector bool long long __a, vector unsigned long long __b) {
   return __cc != 0;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_le(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -4453,6 +5143,15 @@ vec_any_le(vector bool long long __a, vector bool long long __b) {
   return __cc != 0;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_le(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__b, __a, &__cc);
+  return __cc <= 1;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_any_le(vector double __a, vector double __b) {
   int __cc;
@@ -4469,6 +5168,7 @@ vec_any_lt(vector signed char __a, vector signed char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector signed char __a, vector bool char __b) {
   int __cc;
@@ -4476,6 +5176,7 @@ vec_any_lt(vector signed char __a, vector bool char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool char __a, vector signed char __b) {
   int __cc;
@@ -4490,6 +5191,7 @@ vec_any_lt(vector unsigned char __a, vector unsigned char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector unsigned char __a, vector bool char __b) {
   int __cc;
@@ -4497,6 +5199,7 @@ vec_any_lt(vector unsigned char __a, vector bool char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool char __a, vector unsigned char __b) {
   int __cc;
@@ -4504,6 +5207,7 @@ vec_any_lt(vector bool char __a, vector unsigned char __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool char __a, vector bool char __b) {
   int __cc;
@@ -4519,6 +5223,7 @@ vec_any_lt(vector signed short __a, vector signed short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector signed short __a, vector bool short __b) {
   int __cc;
@@ -4526,6 +5231,7 @@ vec_any_lt(vector signed short __a, vector bool short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool short __a, vector signed short __b) {
   int __cc;
@@ -4540,6 +5246,7 @@ vec_any_lt(vector unsigned short __a, vector unsigned short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector unsigned short __a, vector bool short __b) {
   int __cc;
@@ -4547,6 +5254,7 @@ vec_any_lt(vector unsigned short __a, vector bool short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool short __a, vector unsigned short __b) {
   int __cc;
@@ -4554,6 +5262,7 @@ vec_any_lt(vector bool short __a, vector unsigned short __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool short __a, vector bool short __b) {
   int __cc;
@@ -4569,6 +5278,7 @@ vec_any_lt(vector signed int __a, vector signed int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector signed int __a, vector bool int __b) {
   int __cc;
@@ -4576,6 +5286,7 @@ vec_any_lt(vector signed int __a, vector bool int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool int __a, vector signed int __b) {
   int __cc;
@@ -4590,6 +5301,7 @@ vec_any_lt(vector unsigned int __a, vector unsigned int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector unsigned int __a, vector bool int __b) {
   int __cc;
@@ -4597,6 +5309,7 @@ vec_any_lt(vector unsigned int __a, vector bool int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool int __a, vector unsigned int __b) {
   int __cc;
@@ -4604,6 +5317,7 @@ vec_any_lt(vector bool int __a, vector unsigned int __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool int __a, vector bool int __b) {
   int __cc;
@@ -4619,6 +5333,7 @@ vec_any_lt(vector signed long long __a, vector signed long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector signed long long __a, vector bool long long __b) {
   int __cc;
@@ -4626,6 +5341,7 @@ vec_any_lt(vector signed long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool long long __a, vector signed long long __b) {
   int __cc;
@@ -4640,6 +5356,7 @@ vec_any_lt(vector unsigned long long __a, vector unsigned long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector unsigned long long __a, vector bool long long __b) {
   int __cc;
@@ -4647,6 +5364,7 @@ vec_any_lt(vector unsigned long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool long long __a, vector unsigned long long __b) {
   int __cc;
@@ -4654,6 +5372,7 @@ vec_any_lt(vector bool long long __a, vector unsigned long long __b) {
   return __cc <= 1;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai int
 vec_any_lt(vector bool long long __a, vector bool long long __b) {
   int __cc;
@@ -4662,6 +5381,15 @@ vec_any_lt(vector bool long long __a, vector bool long long __b) {
   return __cc <= 1;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_lt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__b, __a, &__cc);
+  return __cc <= 1;
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_any_lt(vector double __a, vector double __b) {
   int __cc;
@@ -4671,7 +5399,16 @@ vec_any_lt(vector double __a, vector double __b) {
 
 /*-- vec_any_nge ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_nge(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__a, __b, &__cc);
+  return __cc != 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_any_nge(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchedbs(__a, __b, &__cc);
@@ -4680,7 +5417,16 @@ vec_any_nge(vector double __a, vector double __b) {
 
 /*-- vec_any_ngt ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_ngt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__a, __b, &__cc);
+  return __cc != 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_any_ngt(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchdbs(__a, __b, &__cc);
@@ -4689,7 +5435,16 @@ vec_any_ngt(vector double __a, vector double __b) {
 
 /*-- vec_any_nle ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_nle(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchesbs(__b, __a, &__cc);
+  return __cc != 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_any_nle(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchedbs(__b, __a, &__cc);
@@ -4698,7 +5453,16 @@ vec_any_nle(vector double __a, vector double __b) {
 
 /*-- vec_any_nlt ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_nlt(vector float __a, vector float __b) {
+  int __cc;
+  __builtin_s390_vfchsbs(__b, __a, &__cc);
+  return __cc != 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_any_nlt(vector double __a, vector double __b) {
   int __cc;
   __builtin_s390_vfchdbs(__b, __a, &__cc);
@@ -4707,7 +5471,16 @@ vec_any_nlt(vector double __a, vector double __b) {
 
 /*-- vec_any_nan ------------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_nan(vector float __a) {
+  int __cc;
+  __builtin_s390_vftcisb(__a, 15, &__cc);
+  return __cc != 3;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_any_nan(vector double __a) {
   int __cc;
   __builtin_s390_vftcidb(__a, 15, &__cc);
@@ -4716,7 +5489,16 @@ vec_any_nan(vector double __a) {
 
 /*-- vec_any_numeric --------------------------------------------------------*/
 
-static inline __ATTRS_ai int
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_numeric(vector float __a) {
+  int __cc;
+  __builtin_s390_vftcisb(__a, 15, &__cc);
+  return __cc != 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
 vec_any_numeric(vector double __a) {
   int __cc;
   __builtin_s390_vftcidb(__a, 15, &__cc);
@@ -4735,11 +5517,13 @@ vec_andc(vector signed char __a, vector signed char __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_andc(vector bool char __a, vector signed char __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_andc(vector signed char __a, vector bool char __b) {
   return __a & ~__b;
@@ -4750,11 +5534,13 @@ vec_andc(vector unsigned char __a, vector unsigned char __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_andc(vector bool char __a, vector unsigned char __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_andc(vector unsigned char __a, vector bool char __b) {
   return __a & ~__b;
@@ -4770,11 +5556,13 @@ vec_andc(vector signed short __a, vector signed short __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_andc(vector bool short __a, vector signed short __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_andc(vector signed short __a, vector bool short __b) {
   return __a & ~__b;
@@ -4785,11 +5573,13 @@ vec_andc(vector unsigned short __a, vector unsigned short __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_andc(vector bool short __a, vector unsigned short __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_andc(vector unsigned short __a, vector bool short __b) {
   return __a & ~__b;
@@ -4805,11 +5595,13 @@ vec_andc(vector signed int __a, vector signed int __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_andc(vector bool int __a, vector signed int __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_andc(vector signed int __a, vector bool int __b) {
   return __a & ~__b;
@@ -4820,11 +5612,13 @@ vec_andc(vector unsigned int __a, vector unsigned int __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_andc(vector bool int __a, vector unsigned int __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_andc(vector unsigned int __a, vector bool int __b) {
   return __a & ~__b;
@@ -4840,11 +5634,13 @@ vec_andc(vector signed long long __a, vector signed long long __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_andc(vector bool long long __a, vector signed long long __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_andc(vector signed long long __a, vector bool long long __b) {
   return __a & ~__b;
@@ -4855,28 +5651,40 @@ vec_andc(vector unsigned long long __a, vector unsigned long long __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_andc(vector bool long long __a, vector unsigned long long __b) {
   return __a & ~__b;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_andc(vector unsigned long long __a, vector bool long long __b) {
   return __a & ~__b;
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_andc(vector float __a, vector float __b) {
+  return (vector float)((vector unsigned int)__a &
+                         ~(vector unsigned int)__b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_andc(vector double __a, vector double __b) {
   return (vector double)((vector unsigned long long)__a &
                          ~(vector unsigned long long)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector double
 vec_andc(vector bool long long __a, vector double __b) {
   return (vector double)((vector unsigned long long)__a &
                          ~(vector unsigned long long)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector double
 vec_andc(vector double __a, vector bool long long __b) {
   return (vector double)((vector unsigned long long)__a &
@@ -4895,11 +5703,13 @@ vec_nor(vector signed char __a, vector signed char __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_nor(vector bool char __a, vector signed char __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_nor(vector signed char __a, vector bool char __b) {
   return ~(__a | __b);
@@ -4910,11 +5720,13 @@ vec_nor(vector unsigned char __a, vector unsigned char __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_nor(vector bool char __a, vector unsigned char __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_nor(vector unsigned char __a, vector bool char __b) {
   return ~(__a | __b);
@@ -4930,11 +5742,13 @@ vec_nor(vector signed short __a, vector signed short __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_nor(vector bool short __a, vector signed short __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_nor(vector signed short __a, vector bool short __b) {
   return ~(__a | __b);
@@ -4945,11 +5759,13 @@ vec_nor(vector unsigned short __a, vector unsigned short __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_nor(vector bool short __a, vector unsigned short __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_nor(vector unsigned short __a, vector bool short __b) {
   return ~(__a | __b);
@@ -4965,11 +5781,13 @@ vec_nor(vector signed int __a, vector signed int __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_nor(vector bool int __a, vector signed int __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_nor(vector signed int __a, vector bool int __b) {
   return ~(__a | __b);
@@ -4980,11 +5798,13 @@ vec_nor(vector unsigned int __a, vector unsigned int __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_nor(vector bool int __a, vector unsigned int __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_nor(vector unsigned int __a, vector bool int __b) {
   return ~(__a | __b);
@@ -5000,11 +5820,13 @@ vec_nor(vector signed long long __a, vector signed long long __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_nor(vector bool long long __a, vector signed long long __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_nor(vector signed long long __a, vector bool long long __b) {
   return ~(__a | __b);
@@ -5015,34 +5837,274 @@ vec_nor(vector unsigned long long __a, vector unsigned long long __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_nor(vector bool long long __a, vector unsigned long long __b) {
   return ~(__a | __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_nor(vector unsigned long long __a, vector bool long long __b) {
   return ~(__a | __b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_nor(vector float __a, vector float __b) {
+  return (vector float)~((vector unsigned int)__a |
+                         (vector unsigned int)__b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_nor(vector double __a, vector double __b) {
   return (vector double)~((vector unsigned long long)__a |
                           (vector unsigned long long)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector double
 vec_nor(vector bool long long __a, vector double __b) {
   return (vector double)~((vector unsigned long long)__a |
                           (vector unsigned long long)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector double
 vec_nor(vector double __a, vector bool long long __b) {
   return (vector double)~((vector unsigned long long)__a |
                           (vector unsigned long long)__b);
 }
 
+/*-- vec_orc ----------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool char
+vec_orc(vector bool char __a, vector bool char __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector signed char
+vec_orc(vector signed char __a, vector signed char __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector unsigned char
+vec_orc(vector unsigned char __a, vector unsigned char __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector bool short
+vec_orc(vector bool short __a, vector bool short __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector signed short
+vec_orc(vector signed short __a, vector signed short __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector unsigned short
+vec_orc(vector unsigned short __a, vector unsigned short __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector bool int
+vec_orc(vector bool int __a, vector bool int __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector signed int
+vec_orc(vector signed int __a, vector signed int __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector unsigned int
+vec_orc(vector unsigned int __a, vector unsigned int __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector bool long long
+vec_orc(vector bool long long __a, vector bool long long __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector signed long long
+vec_orc(vector signed long long __a, vector signed long long __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector unsigned long long
+vec_orc(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai vector float
+vec_orc(vector float __a, vector float __b) {
+  return (vector float)((vector unsigned int)__a &
+                        ~(vector unsigned int)__b);
+}
+
+static inline __ATTRS_o_ai vector double
+vec_orc(vector double __a, vector double __b) {
+  return (vector double)((vector unsigned long long)__a &
+                         ~(vector unsigned long long)__b);
+}
+#endif
+
+/*-- vec_nand ---------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool char
+vec_nand(vector bool char __a, vector bool char __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector signed char
+vec_nand(vector signed char __a, vector signed char __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned char
+vec_nand(vector unsigned char __a, vector unsigned char __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector bool short
+vec_nand(vector bool short __a, vector bool short __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector signed short
+vec_nand(vector signed short __a, vector signed short __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned short
+vec_nand(vector unsigned short __a, vector unsigned short __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector bool int
+vec_nand(vector bool int __a, vector bool int __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector signed int
+vec_nand(vector signed int __a, vector signed int __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned int
+vec_nand(vector unsigned int __a, vector unsigned int __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector bool long long
+vec_nand(vector bool long long __a, vector bool long long __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector signed long long
+vec_nand(vector signed long long __a, vector signed long long __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned long long
+vec_nand(vector unsigned long long __a, vector unsigned long long __b) {
+  return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai vector float
+vec_nand(vector float __a, vector float __b) {
+  return (vector float)~((vector unsigned int)__a &
+                         (vector unsigned int)__b);
+}
+
+static inline __ATTRS_o_ai vector double
+vec_nand(vector double __a, vector double __b) {
+  return (vector double)~((vector unsigned long long)__a &
+                          (vector unsigned long long)__b);
+}
+#endif
+
+/*-- vec_eqv ----------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector bool char
+vec_eqv(vector bool char __a, vector bool char __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector signed char
+vec_eqv(vector signed char __a, vector signed char __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned char
+vec_eqv(vector unsigned char __a, vector unsigned char __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector bool short
+vec_eqv(vector bool short __a, vector bool short __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector signed short
+vec_eqv(vector signed short __a, vector signed short __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned short
+vec_eqv(vector unsigned short __a, vector unsigned short __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector bool int
+vec_eqv(vector bool int __a, vector bool int __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector signed int
+vec_eqv(vector signed int __a, vector signed int __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned int
+vec_eqv(vector unsigned int __a, vector unsigned int __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector bool long long
+vec_eqv(vector bool long long __a, vector bool long long __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector signed long long
+vec_eqv(vector signed long long __a, vector signed long long __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector unsigned long long
+vec_eqv(vector unsigned long long __a, vector unsigned long long __b) {
+  return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai vector float
+vec_eqv(vector float __a, vector float __b) {
+  return (vector float)~((vector unsigned int)__a ^
+                         (vector unsigned int)__b);
+}
+
+static inline __ATTRS_o_ai vector double
+vec_eqv(vector double __a, vector double __b) {
+  return (vector double)~((vector unsigned long long)__a ^
+                          (vector unsigned long long)__b);
+}
+#endif
+
 /*-- vec_cntlz --------------------------------------------------------------*/
 
 static inline __ATTRS_o_ai vector unsigned char
@@ -5323,30 +6385,35 @@ vec_sll(vector signed char __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_sll(vector signed char __a, vector unsigned short __b) {
   return (vector signed char)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_sll(vector signed char __a, vector unsigned int __b) {
   return (vector signed char)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_sll(vector bool char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_s390_vsl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_sll(vector bool char __a, vector unsigned short __b) {
   return (vector bool char)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_sll(vector bool char __a, vector unsigned int __b) {
   return (vector bool char)__builtin_s390_vsl(
@@ -5358,11 +6425,13 @@ vec_sll(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_s390_vsl(__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_sll(vector unsigned char __a, vector unsigned short __b) {
   return __builtin_s390_vsl(__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_sll(vector unsigned char __a, vector unsigned int __b) {
   return __builtin_s390_vsl(__a, (vector unsigned char)__b);
@@ -5374,30 +6443,35 @@ vec_sll(vector signed short __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_sll(vector signed short __a, vector unsigned short __b) {
   return (vector signed short)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_sll(vector signed short __a, vector unsigned int __b) {
   return (vector signed short)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_sll(vector bool short __a, vector unsigned char __b) {
   return (vector bool short)__builtin_s390_vsl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_sll(vector bool short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_sll(vector bool short __a, vector unsigned int __b) {
   return (vector bool short)__builtin_s390_vsl(
@@ -5410,12 +6484,14 @@ vec_sll(vector unsigned short __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_sll(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_sll(vector unsigned short __a, vector unsigned int __b) {
   return (vector unsigned short)__builtin_s390_vsl(
@@ -5428,30 +6504,35 @@ vec_sll(vector signed int __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_sll(vector signed int __a, vector unsigned short __b) {
   return (vector signed int)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_sll(vector signed int __a, vector unsigned int __b) {
   return (vector signed int)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_sll(vector bool int __a, vector unsigned char __b) {
   return (vector bool int)__builtin_s390_vsl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_sll(vector bool int __a, vector unsigned short __b) {
   return (vector bool int)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_sll(vector bool int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_s390_vsl(
@@ -5464,12 +6545,14 @@ vec_sll(vector unsigned int __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_sll(vector unsigned int __a, vector unsigned short __b) {
   return (vector unsigned int)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_sll(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_s390_vsl(
@@ -5482,30 +6565,35 @@ vec_sll(vector signed long long __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_sll(vector signed long long __a, vector unsigned short __b) {
   return (vector signed long long)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_sll(vector signed long long __a, vector unsigned int __b) {
   return (vector signed long long)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_sll(vector bool long long __a, vector unsigned char __b) {
   return (vector bool long long)__builtin_s390_vsl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_sll(vector bool long long __a, vector unsigned short __b) {
   return (vector bool long long)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_sll(vector bool long long __a, vector unsigned int __b) {
   return (vector bool long long)__builtin_s390_vsl(
@@ -5518,12 +6606,14 @@ vec_sll(vector unsigned long long __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_sll(vector unsigned long long __a, vector unsigned short __b) {
   return (vector unsigned long long)__builtin_s390_vsl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_sll(vector unsigned long long __a, vector unsigned int __b) {
   return (vector unsigned long long)__builtin_s390_vsl(
@@ -5626,6 +6716,20 @@ vec_slb(vector unsigned long long __a, vector unsigned long long __b) {
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_slb(vector float __a, vector signed int __b) {
+  return (vector float)__builtin_s390_vslb(
+    (vector unsigned char)__a, (vector unsigned char)__b);
+}
+
+static inline __ATTRS_o_ai vector float
+vec_slb(vector float __a, vector unsigned int __b) {
+  return (vector float)__builtin_s390_vslb(
+    (vector unsigned char)__a, (vector unsigned char)__b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_slb(vector double __a, vector signed long long __b) {
   return (vector double)__builtin_s390_vslb(
@@ -5644,6 +6748,10 @@ extern __ATTRS_o vector signed char
 vec_sld(vector signed char __a, vector signed char __b, int __c)
   __constant_range(__c, 0, 15);
 
+extern __ATTRS_o vector bool char
+vec_sld(vector bool char __a, vector bool char __b, int __c)
+  __constant_range(__c, 0, 15);
+
 extern __ATTRS_o vector unsigned char
 vec_sld(vector unsigned char __a, vector unsigned char __b, int __c)
   __constant_range(__c, 0, 15);
@@ -5652,6 +6760,10 @@ extern __ATTRS_o vector signed short
 vec_sld(vector signed short __a, vector signed short __b, int __c)
   __constant_range(__c, 0, 15);
 
+extern __ATTRS_o vector bool short
+vec_sld(vector bool short __a, vector bool short __b, int __c)
+  __constant_range(__c, 0, 15);
+
 extern __ATTRS_o vector unsigned short
 vec_sld(vector unsigned short __a, vector unsigned short __b, int __c)
   __constant_range(__c, 0, 15);
@@ -5660,6 +6772,10 @@ extern __ATTRS_o vector signed int
 vec_sld(vector signed int __a, vector signed int __b, int __c)
   __constant_range(__c, 0, 15);
 
+extern __ATTRS_o vector bool int
+vec_sld(vector bool int __a, vector bool int __b, int __c)
+  __constant_range(__c, 0, 15);
+
 extern __ATTRS_o vector unsigned int
 vec_sld(vector unsigned int __a, vector unsigned int __b, int __c)
   __constant_range(__c, 0, 15);
@@ -5668,10 +6784,20 @@ extern __ATTRS_o vector signed long long
 vec_sld(vector signed long long __a, vector signed long long __b, int __c)
   __constant_range(__c, 0, 15);
 
+extern __ATTRS_o vector bool long long
+vec_sld(vector bool long long __a, vector bool long long __b, int __c)
+  __constant_range(__c, 0, 15);
+
 extern __ATTRS_o vector unsigned long long
 vec_sld(vector unsigned long long __a, vector unsigned long long __b, int __c)
   __constant_range(__c, 0, 15);
 
+#if __ARCH__ >= 12
+extern __ATTRS_o vector float
+vec_sld(vector float __a, vector float __b, int __c)
+  __constant_range(__c, 0, 15);
+#endif
+
 extern __ATTRS_o vector double
 vec_sld(vector double __a, vector double __b, int __c)
   __constant_range(__c, 0, 15);
@@ -5714,6 +6840,7 @@ extern __ATTRS_o vector unsigned long long
 vec_sldw(vector unsigned long long __a, vector unsigned long long __b, int __c)
   __constant_range(__c, 0, 3);
 
+// This prototype is deprecated.
 extern __ATTRS_o vector double
 vec_sldw(vector double __a, vector double __b, int __c)
   __constant_range(__c, 0, 3);
@@ -5730,30 +6857,35 @@ vec_sral(vector signed char __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_sral(vector signed char __a, vector unsigned short __b) {
   return (vector signed char)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_sral(vector signed char __a, vector unsigned int __b) {
   return (vector signed char)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_sral(vector bool char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_s390_vsra(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_sral(vector bool char __a, vector unsigned short __b) {
   return (vector bool char)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_sral(vector bool char __a, vector unsigned int __b) {
   return (vector bool char)__builtin_s390_vsra(
@@ -5765,11 +6897,13 @@ vec_sral(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_s390_vsra(__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_sral(vector unsigned char __a, vector unsigned short __b) {
   return __builtin_s390_vsra(__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_sral(vector unsigned char __a, vector unsigned int __b) {
   return __builtin_s390_vsra(__a, (vector unsigned char)__b);
@@ -5781,30 +6915,35 @@ vec_sral(vector signed short __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_sral(vector signed short __a, vector unsigned short __b) {
   return (vector signed short)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_sral(vector signed short __a, vector unsigned int __b) {
   return (vector signed short)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_sral(vector bool short __a, vector unsigned char __b) {
   return (vector bool short)__builtin_s390_vsra(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_sral(vector bool short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_sral(vector bool short __a, vector unsigned int __b) {
   return (vector bool short)__builtin_s390_vsra(
@@ -5817,12 +6956,14 @@ vec_sral(vector unsigned short __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_sral(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_sral(vector unsigned short __a, vector unsigned int __b) {
   return (vector unsigned short)__builtin_s390_vsra(
@@ -5835,30 +6976,35 @@ vec_sral(vector signed int __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_sral(vector signed int __a, vector unsigned short __b) {
   return (vector signed int)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_sral(vector signed int __a, vector unsigned int __b) {
   return (vector signed int)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_sral(vector bool int __a, vector unsigned char __b) {
   return (vector bool int)__builtin_s390_vsra(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_sral(vector bool int __a, vector unsigned short __b) {
   return (vector bool int)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_sral(vector bool int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_s390_vsra(
@@ -5871,12 +7017,14 @@ vec_sral(vector unsigned int __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_sral(vector unsigned int __a, vector unsigned short __b) {
   return (vector unsigned int)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_sral(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_s390_vsra(
@@ -5889,30 +7037,35 @@ vec_sral(vector signed long long __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_sral(vector signed long long __a, vector unsigned short __b) {
   return (vector signed long long)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_sral(vector signed long long __a, vector unsigned int __b) {
   return (vector signed long long)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_sral(vector bool long long __a, vector unsigned char __b) {
   return (vector bool long long)__builtin_s390_vsra(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_sral(vector bool long long __a, vector unsigned short __b) {
   return (vector bool long long)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_sral(vector bool long long __a, vector unsigned int __b) {
   return (vector bool long long)__builtin_s390_vsra(
@@ -5925,12 +7078,14 @@ vec_sral(vector unsigned long long __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_sral(vector unsigned long long __a, vector unsigned short __b) {
   return (vector unsigned long long)__builtin_s390_vsra(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_sral(vector unsigned long long __a, vector unsigned int __b) {
   return (vector unsigned long long)__builtin_s390_vsra(
@@ -6033,6 +7188,20 @@ vec_srab(vector unsigned long long __a, vector unsigned long long __b) {
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_srab(vector float __a, vector signed int __b) {
+  return (vector float)__builtin_s390_vsrab(
+    (vector unsigned char)__a, (vector unsigned char)__b);
+}
+
+static inline __ATTRS_o_ai vector float
+vec_srab(vector float __a, vector unsigned int __b) {
+  return (vector float)__builtin_s390_vsrab(
+    (vector unsigned char)__a, (vector unsigned char)__b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_srab(vector double __a, vector signed long long __b) {
   return (vector double)__builtin_s390_vsrab(
@@ -6053,30 +7222,35 @@ vec_srl(vector signed char __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_srl(vector signed char __a, vector unsigned short __b) {
   return (vector signed char)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_srl(vector signed char __a, vector unsigned int __b) {
   return (vector signed char)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_srl(vector bool char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_s390_vsrl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_srl(vector bool char __a, vector unsigned short __b) {
   return (vector bool char)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool char
 vec_srl(vector bool char __a, vector unsigned int __b) {
   return (vector bool char)__builtin_s390_vsrl(
@@ -6088,11 +7262,13 @@ vec_srl(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_s390_vsrl(__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_srl(vector unsigned char __a, vector unsigned short __b) {
   return __builtin_s390_vsrl(__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_srl(vector unsigned char __a, vector unsigned int __b) {
   return __builtin_s390_vsrl(__a, (vector unsigned char)__b);
@@ -6104,30 +7280,35 @@ vec_srl(vector signed short __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_srl(vector signed short __a, vector unsigned short __b) {
   return (vector signed short)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_srl(vector signed short __a, vector unsigned int __b) {
   return (vector signed short)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_srl(vector bool short __a, vector unsigned char __b) {
   return (vector bool short)__builtin_s390_vsrl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_srl(vector bool short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool short
 vec_srl(vector bool short __a, vector unsigned int __b) {
   return (vector bool short)__builtin_s390_vsrl(
@@ -6140,12 +7321,14 @@ vec_srl(vector unsigned short __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_srl(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_srl(vector unsigned short __a, vector unsigned int __b) {
   return (vector unsigned short)__builtin_s390_vsrl(
@@ -6158,30 +7341,35 @@ vec_srl(vector signed int __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_srl(vector signed int __a, vector unsigned short __b) {
   return (vector signed int)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_srl(vector signed int __a, vector unsigned int __b) {
   return (vector signed int)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_srl(vector bool int __a, vector unsigned char __b) {
   return (vector bool int)__builtin_s390_vsrl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_srl(vector bool int __a, vector unsigned short __b) {
   return (vector bool int)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool int
 vec_srl(vector bool int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_s390_vsrl(
@@ -6194,12 +7382,14 @@ vec_srl(vector unsigned int __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_srl(vector unsigned int __a, vector unsigned short __b) {
   return (vector unsigned int)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_srl(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_s390_vsrl(
@@ -6212,30 +7402,35 @@ vec_srl(vector signed long long __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_srl(vector signed long long __a, vector unsigned short __b) {
   return (vector signed long long)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_srl(vector signed long long __a, vector unsigned int __b) {
   return (vector signed long long)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_srl(vector bool long long __a, vector unsigned char __b) {
   return (vector bool long long)__builtin_s390_vsrl(
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_srl(vector bool long long __a, vector unsigned short __b) {
   return (vector bool long long)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector bool long long
 vec_srl(vector bool long long __a, vector unsigned int __b) {
   return (vector bool long long)__builtin_s390_vsrl(
@@ -6248,12 +7443,14 @@ vec_srl(vector unsigned long long __a, vector unsigned char __b) {
     (vector unsigned char)__a, __b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_srl(vector unsigned long long __a, vector unsigned short __b) {
   return (vector unsigned long long)__builtin_s390_vsrl(
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_srl(vector unsigned long long __a, vector unsigned int __b) {
   return (vector unsigned long long)__builtin_s390_vsrl(
@@ -6356,6 +7553,20 @@ vec_srb(vector unsigned long long __a, vector unsigned long long __b) {
     (vector unsigned char)__a, (vector unsigned char)__b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_srb(vector float __a, vector signed int __b) {
+  return (vector float)__builtin_s390_vsrlb(
+    (vector unsigned char)__a, (vector unsigned char)__b);
+}
+
+static inline __ATTRS_o_ai vector float
+vec_srb(vector float __a, vector unsigned int __b) {
+  return (vector float)__builtin_s390_vsrlb(
+    (vector unsigned char)__a, (vector unsigned char)__b);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_srb(vector double __a, vector signed long long __b) {
   return (vector double)__builtin_s390_vsrlb(
@@ -6390,6 +7601,13 @@ vec_abs(vector signed long long __a) {
   return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed long long)0));
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_abs(vector float __a) {
+  return __builtin_s390_vflpsb(__a);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_abs(vector double __a) {
   return __builtin_s390_vflpdb(__a);
@@ -6397,7 +7615,14 @@ vec_abs(vector double __a) {
 
 /*-- vec_nabs ---------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_nabs(vector float __a) {
+  return __builtin_s390_vflnsb(__a);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_nabs(vector double __a) {
   return __builtin_s390_vflndb(__a);
 }
@@ -6409,12 +7634,14 @@ vec_max(vector signed char __a, vector signed char __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_max(vector signed char __a, vector bool char __b) {
   vector signed char __bc = (vector signed char)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_max(vector bool char __a, vector signed char __b) {
   vector signed char __ac = (vector signed char)__a;
@@ -6426,12 +7653,14 @@ vec_max(vector unsigned char __a, vector unsigned char __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_max(vector unsigned char __a, vector bool char __b) {
   vector unsigned char __bc = (vector unsigned char)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_max(vector bool char __a, vector unsigned char __b) {
   vector unsigned char __ac = (vector unsigned char)__a;
@@ -6443,12 +7672,14 @@ vec_max(vector signed short __a, vector signed short __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_max(vector signed short __a, vector bool short __b) {
   vector signed short __bc = (vector signed short)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_max(vector bool short __a, vector signed short __b) {
   vector signed short __ac = (vector signed short)__a;
@@ -6460,12 +7691,14 @@ vec_max(vector unsigned short __a, vector unsigned short __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_max(vector unsigned short __a, vector bool short __b) {
   vector unsigned short __bc = (vector unsigned short)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_max(vector bool short __a, vector unsigned short __b) {
   vector unsigned short __ac = (vector unsigned short)__a;
@@ -6477,12 +7710,14 @@ vec_max(vector signed int __a, vector signed int __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_max(vector signed int __a, vector bool int __b) {
   vector signed int __bc = (vector signed int)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_max(vector bool int __a, vector signed int __b) {
   vector signed int __ac = (vector signed int)__a;
@@ -6494,12 +7729,14 @@ vec_max(vector unsigned int __a, vector unsigned int __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_max(vector unsigned int __a, vector bool int __b) {
   vector unsigned int __bc = (vector unsigned int)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_max(vector bool int __a, vector unsigned int __b) {
   vector unsigned int __ac = (vector unsigned int)__a;
@@ -6511,12 +7748,14 @@ vec_max(vector signed long long __a, vector signed long long __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_max(vector signed long long __a, vector bool long long __b) {
   vector signed long long __bc = (vector signed long long)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_max(vector bool long long __a, vector signed long long __b) {
   vector signed long long __ac = (vector signed long long)__a;
@@ -6528,21 +7767,34 @@ vec_max(vector unsigned long long __a, vector unsigned long long __b) {
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_max(vector unsigned long long __a, vector bool long long __b) {
   vector unsigned long long __bc = (vector unsigned long long)__b;
   return vec_sel(__bc, __a, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_max(vector bool long long __a, vector unsigned long long __b) {
   vector unsigned long long __ac = (vector unsigned long long)__a;
   return vec_sel(__b, __ac, vec_cmpgt(__ac, __b));
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_max(vector float __a, vector float __b) {
+  return __builtin_s390_vfmaxsb(__a, __b, 0);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_max(vector double __a, vector double __b) {
+#if __ARCH__ >= 12
+  return __builtin_s390_vfmaxdb(__a, __b, 0);
+#else
   return vec_sel(__b, __a, vec_cmpgt(__a, __b));
+#endif
 }
 
 /*-- vec_min ----------------------------------------------------------------*/
@@ -6552,12 +7804,14 @@ vec_min(vector signed char __a, vector signed char __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_min(vector signed char __a, vector bool char __b) {
   vector signed char __bc = (vector signed char)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed char
 vec_min(vector bool char __a, vector signed char __b) {
   vector signed char __ac = (vector signed char)__a;
@@ -6569,12 +7823,14 @@ vec_min(vector unsigned char __a, vector unsigned char __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_min(vector unsigned char __a, vector bool char __b) {
   vector unsigned char __bc = (vector unsigned char)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned char
 vec_min(vector bool char __a, vector unsigned char __b) {
   vector unsigned char __ac = (vector unsigned char)__a;
@@ -6586,12 +7842,14 @@ vec_min(vector signed short __a, vector signed short __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_min(vector signed short __a, vector bool short __b) {
   vector signed short __bc = (vector signed short)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed short
 vec_min(vector bool short __a, vector signed short __b) {
   vector signed short __ac = (vector signed short)__a;
@@ -6603,12 +7861,14 @@ vec_min(vector unsigned short __a, vector unsigned short __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_min(vector unsigned short __a, vector bool short __b) {
   vector unsigned short __bc = (vector unsigned short)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned short
 vec_min(vector bool short __a, vector unsigned short __b) {
   vector unsigned short __ac = (vector unsigned short)__a;
@@ -6620,12 +7880,14 @@ vec_min(vector signed int __a, vector signed int __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_min(vector signed int __a, vector bool int __b) {
   vector signed int __bc = (vector signed int)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed int
 vec_min(vector bool int __a, vector signed int __b) {
   vector signed int __ac = (vector signed int)__a;
@@ -6637,12 +7899,14 @@ vec_min(vector unsigned int __a, vector unsigned int __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_min(vector unsigned int __a, vector bool int __b) {
   vector unsigned int __bc = (vector unsigned int)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned int
 vec_min(vector bool int __a, vector unsigned int __b) {
   vector unsigned int __ac = (vector unsigned int)__a;
@@ -6654,12 +7918,14 @@ vec_min(vector signed long long __a, vector signed long long __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_min(vector signed long long __a, vector bool long long __b) {
   vector signed long long __bc = (vector signed long long)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_min(vector bool long long __a, vector signed long long __b) {
   vector signed long long __ac = (vector signed long long)__a;
@@ -6671,21 +7937,34 @@ vec_min(vector unsigned long long __a, vector unsigned long long __b) {
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_min(vector unsigned long long __a, vector bool long long __b) {
   vector unsigned long long __bc = (vector unsigned long long)__b;
   return vec_sel(__a, __bc, vec_cmpgt(__a, __bc));
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_min(vector bool long long __a, vector unsigned long long __b) {
   vector unsigned long long __ac = (vector unsigned long long)__a;
   return vec_sel(__ac, __b, vec_cmpgt(__ac, __b));
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_min(vector float __a, vector float __b) {
+  return __builtin_s390_vfminsb(__a, __b, 0);
+}
+#endif
+
 static inline __ATTRS_o_ai vector double
 vec_min(vector double __a, vector double __b) {
+#if __ARCH__ >= 12
+  return __builtin_s390_vfmindb(__a, __b, 0);
+#else
   return vec_sel(__a, __b, vec_cmpgt(__a, __b));
+#endif
 }
 
 /*-- vec_add_u128 -----------------------------------------------------------*/
@@ -7126,6 +8405,13 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_s390_vmlof(__a, __b);
 }
 
+/*-- vec_msum_u128 ----------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+#define vec_msum_u128(X, Y, Z, W) \
+  ((vector unsigned char)__builtin_s390_vmslg((X), (Y), (Z), (W)));
+#endif
+
 /*-- vec_sub_u128 -----------------------------------------------------------*/
 
 static inline __ATTRS_ai vector unsigned char
@@ -7263,6 +8549,14 @@ vec_test_mask(vector unsigned long long __a, vector unsigned long long __b) {
                             (vector unsigned char)__b);
 }
 
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_test_mask(vector float __a, vector unsigned int __b) {
+  return __builtin_s390_vtm((vector unsigned char)__a,
+                            (vector unsigned char)__b);
+}
+#endif
+
 static inline __ATTRS_o_ai int
 vec_test_mask(vector double __a, vector unsigned long long __b) {
   return __builtin_s390_vtm((vector unsigned char)__a,
@@ -7271,27 +8565,77 @@ vec_test_mask(vector double __a, vector unsigned long long __b) {
 
 /*-- vec_madd ---------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_madd(vector float __a, vector float __b, vector float __c) {
+  return __builtin_s390_vfmasb(__a, __b, __c);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_madd(vector double __a, vector double __b, vector double __c) {
   return __builtin_s390_vfmadb(__a, __b, __c);
 }
 
 /*-- vec_msub ---------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_msub(vector float __a, vector float __b, vector float __c) {
+  return __builtin_s390_vfmssb(__a, __b, __c);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_msub(vector double __a, vector double __b, vector double __c) {
   return __builtin_s390_vfmsdb(__a, __b, __c);
 }
 
+/*-- vec_nmadd ---------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_nmadd(vector float __a, vector float __b, vector float __c) {
+  return __builtin_s390_vfnmasb(__a, __b, __c);
+}
+
+static inline __ATTRS_o_ai vector double
+vec_nmadd(vector double __a, vector double __b, vector double __c) {
+  return __builtin_s390_vfnmadb(__a, __b, __c);
+}
+#endif
+
+/*-- vec_nmsub ---------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_nmsub(vector float __a, vector float __b, vector float __c) {
+  return __builtin_s390_vfnmssb(__a, __b, __c);
+}
+
+static inline __ATTRS_o_ai vector double
+vec_nmsub(vector double __a, vector double __b, vector double __c) {
+  return __builtin_s390_vfnmsdb(__a, __b, __c);
+}
+#endif
+
 /*-- vec_sqrt ---------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_sqrt(vector float __a) {
+  return __builtin_s390_vfsqsb(__a);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_sqrt(vector double __a) {
   return __builtin_s390_vfsqdb(__a);
 }
 
 /*-- vec_ld2f ---------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_ai vector double
 vec_ld2f(const float *__ptr) {
   typedef float __v2f32 __attribute__((__vector_size__(8)));
@@ -7300,6 +8644,7 @@ vec_ld2f(const float *__ptr) {
 
 /*-- vec_st2f ---------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_ai void
 vec_st2f(vector double __a, float *__ptr) {
   typedef float __v2f32 __attribute__((__vector_size__(8)));
@@ -7308,6 +8653,7 @@ vec_st2f(vector double __a, float *__ptr) {
 
 /*-- vec_ctd ----------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector double
 vec_ctd(vector signed long long __a, int __b)
   __constant_range(__b, 0, 31) {
@@ -7316,6 +8662,7 @@ vec_ctd(vector signed long long __a, int __b)
   return __conv;
 }
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector double
 vec_ctd(vector unsigned long long __a, int __b)
   __constant_range(__b, 0, 31) {
@@ -7326,6 +8673,7 @@ vec_ctd(vector unsigned long long __a, int __b)
 
 /*-- vec_ctsl ---------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector signed long long
 vec_ctsl(vector double __a, int __b)
   __constant_range(__b, 0, 31) {
@@ -7335,6 +8683,7 @@ vec_ctsl(vector double __a, int __b)
 
 /*-- vec_ctul ---------------------------------------------------------------*/
 
+// This prototype is deprecated.
 static inline __ATTRS_o_ai vector unsigned long long
 vec_ctul(vector double __a, int __b)
   __constant_range(__b, 0, 31) {
@@ -7342,16 +8691,79 @@ vec_ctul(vector double __a, int __b)
   return __builtin_convertvector(__a, vector unsigned long long);
 }
 
-/*-- vec_roundp -------------------------------------------------------------*/
+/*-- vec_doublee ------------------------------------------------------------*/
 
+#if __ARCH__ >= 12
 static inline __ATTRS_ai vector double
+vec_doublee(vector float __a) {
+  typedef float __v2f32 __attribute__((__vector_size__(8)));
+  __v2f32 __pack = __builtin_shufflevector(__a, __a, 0, 2);
+  return __builtin_convertvector(__pack, vector double);
+}
+#endif
+
+/*-- vec_floate -------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_ai vector float
+vec_floate(vector double __a) {
+  typedef float __v2f32 __attribute__((__vector_size__(8)));
+  __v2f32 __pack = __builtin_convertvector(__a, __v2f32);
+  return __builtin_shufflevector(__pack, __pack, 0, -1, 1, -1);
+}
+#endif
+
+/*-- vec_double -------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai vector double
+vec_double(vector signed long long __a) {
+  return __builtin_convertvector(__a, vector double);
+}
+
+static inline __ATTRS_o_ai vector double
+vec_double(vector unsigned long long __a) {
+  return __builtin_convertvector(__a, vector double);
+}
+
+/*-- vec_signed -------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai vector signed long long
+vec_signed(vector double __a) {
+  return __builtin_convertvector(__a, vector signed long long);
+}
+
+/*-- vec_unsigned -----------------------------------------------------------*/
+
+static inline __ATTRS_o_ai vector unsigned long long
+vec_unsigned(vector double __a) {
+  return __builtin_convertvector(__a, vector unsigned long long);
+}
+
+/*-- vec_roundp -------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_roundp(vector float __a) {
+  return __builtin_s390_vfisb(__a, 4, 6);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_roundp(vector double __a) {
   return __builtin_s390_vfidb(__a, 4, 6);
 }
 
 /*-- vec_ceil ---------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_ceil(vector float __a) {
+  // On this platform, vec_ceil never triggers the IEEE-inexact exception.
+  return __builtin_s390_vfisb(__a, 4, 6);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_ceil(vector double __a) {
   // On this platform, vec_ceil never triggers the IEEE-inexact exception.
   return __builtin_s390_vfidb(__a, 4, 6);
@@ -7359,14 +8771,29 @@ vec_ceil(vector double __a) {
 
 /*-- vec_roundm -------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_roundm(vector float __a) {
+  return __builtin_s390_vfisb(__a, 4, 7);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_roundm(vector double __a) {
   return __builtin_s390_vfidb(__a, 4, 7);
 }
 
 /*-- vec_floor --------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_floor(vector float __a) {
+  // On this platform, vec_floor never triggers the IEEE-inexact exception.
+  return __builtin_s390_vfisb(__a, 4, 7);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_floor(vector double __a) {
   // On this platform, vec_floor never triggers the IEEE-inexact exception.
   return __builtin_s390_vfidb(__a, 4, 7);
@@ -7374,14 +8801,29 @@ vec_floor(vector double __a) {
 
 /*-- vec_roundz -------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_roundz(vector float __a) {
+  return __builtin_s390_vfisb(__a, 4, 5);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_roundz(vector double __a) {
   return __builtin_s390_vfidb(__a, 4, 5);
 }
 
 /*-- vec_trunc --------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_trunc(vector float __a) {
+  // On this platform, vec_trunc never triggers the IEEE-inexact exception.
+  return __builtin_s390_vfisb(__a, 4, 5);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_trunc(vector double __a) {
   // On this platform, vec_trunc never triggers the IEEE-inexact exception.
   return __builtin_s390_vfidb(__a, 4, 5);
@@ -7389,22 +8831,104 @@ vec_trunc(vector double __a) {
 
 /*-- vec_roundc -------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_roundc(vector float __a) {
+  return __builtin_s390_vfisb(__a, 4, 0);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_roundc(vector double __a) {
   return __builtin_s390_vfidb(__a, 4, 0);
 }
 
+/*-- vec_rint ---------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_rint(vector float __a) {
+  // vec_rint may trigger the IEEE-inexact exception.
+  return __builtin_s390_vfisb(__a, 0, 0);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
+vec_rint(vector double __a) {
+  // vec_rint may trigger the IEEE-inexact exception.
+  return __builtin_s390_vfidb(__a, 0, 0);
+}
+
 /*-- vec_round --------------------------------------------------------------*/
 
-static inline __ATTRS_ai vector double
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai vector float
+vec_round(vector float __a) {
+  return __builtin_s390_vfisb(__a, 4, 4);
+}
+#endif
+
+static inline __ATTRS_o_ai vector double
 vec_round(vector double __a) {
   return __builtin_s390_vfidb(__a, 4, 4);
 }
 
 /*-- vec_fp_test_data_class -------------------------------------------------*/
 
+#if __ARCH__ >= 12
+extern __ATTRS_o vector bool int
+vec_fp_test_data_class(vector float __a, int __b, int *__c)
+  __constant_range(__b, 0, 4095);
+
+extern __ATTRS_o vector bool long long
+vec_fp_test_data_class(vector double __a, int __b, int *__c)
+  __constant_range(__b, 0, 4095);
+
+#define vec_fp_test_data_class(X, Y, Z) \
+  ((__typeof__((vec_fp_test_data_class)((X), (Y), (Z)))) \
+   __extension__ ({ \
+     vector unsigned char __res; \
+     vector unsigned char __x = (vector unsigned char)(X); \
+     int *__z = (Z); \
+     switch (sizeof ((X)[0])) { \
+     case 4:  __res = (vector unsigned char) \
+                      __builtin_s390_vftcisb((vector float)__x, (Y), __z); \
+              break; \
+     default: __res = (vector unsigned char) \
+                      __builtin_s390_vftcidb((vector double)__x, (Y), __z); \
+              break; \
+     } __res; }))
+#else
 #define vec_fp_test_data_class(X, Y, Z) \
   ((vector bool long long)__builtin_s390_vftcidb((X), (Y), (Z)))
+#endif
+
+#define __VEC_CLASS_FP_ZERO_P (1 << 11)
+#define __VEC_CLASS_FP_ZERO_N (1 << 10)
+#define __VEC_CLASS_FP_ZERO (__VEC_CLASS_FP_ZERO_P | __VEC_CLASS_FP_ZERO_N)
+#define __VEC_CLASS_FP_NORMAL_P (1 << 9)
+#define __VEC_CLASS_FP_NORMAL_N (1 << 8)
+#define __VEC_CLASS_FP_NORMAL (__VEC_CLASS_FP_NORMAL_P | \
+                               __VEC_CLASS_FP_NORMAL_N)
+#define __VEC_CLASS_FP_SUBNORMAL_P (1 << 7)
+#define __VEC_CLASS_FP_SUBNORMAL_N (1 << 6)
+#define __VEC_CLASS_FP_SUBNORMAL (__VEC_CLASS_FP_SUBNORMAL_P | \
+                                  __VEC_CLASS_FP_SUBNORMAL_N)
+#define __VEC_CLASS_FP_INFINITY_P (1 << 5)
+#define __VEC_CLASS_FP_INFINITY_N (1 << 4)
+#define __VEC_CLASS_FP_INFINITY (__VEC_CLASS_FP_INFINITY_P | \
+                                 __VEC_CLASS_FP_INFINITY_N)
+#define __VEC_CLASS_FP_QNAN_P (1 << 3)
+#define __VEC_CLASS_FP_QNAN_N (1 << 2)
+#define __VEC_CLASS_FP_QNAN (__VEC_CLASS_FP_QNAN_P | __VEC_CLASS_FP_QNAN_N)
+#define __VEC_CLASS_FP_SNAN_P (1 << 1)
+#define __VEC_CLASS_FP_SNAN_N (1 << 0)
+#define __VEC_CLASS_FP_SNAN (__VEC_CLASS_FP_SNAN_P | __VEC_CLASS_FP_SNAN_N)
+#define __VEC_CLASS_FP_NAN (__VEC_CLASS_FP_QNAN | __VEC_CLASS_FP_SNAN)
+#define __VEC_CLASS_FP_NOT_NORMAL (__VEC_CLASS_FP_NAN | \
+                                   __VEC_CLASS_FP_SUBNORMAL | \
+                                   __VEC_CLASS_FP_ZERO | \
+                                   __VEC_CLASS_FP_INFINITY)
 
 /*-- vec_cp_until_zero ------------------------------------------------------*/
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Headers/xmmintrin.h b/interpreter/llvm/src/tools/clang/lib/Headers/xmmintrin.h
index 5c312c08efb6e..bbc2117b4ea11 100644
--- a/interpreter/llvm/src/tools/clang/lib/Headers/xmmintrin.h
+++ b/interpreter/llvm/src/tools/clang/lib/Headers/xmmintrin.h
@@ -2099,7 +2099,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a)
 ///
 /// \param __p
 ///    A pointer to a 128-bit aligned memory location that will receive the
-///    integer values.
+///    single-precision floating-point values.
 /// \param __a
 ///    A 128-bit vector of [4 x float] containing the values to be moved.
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -2133,7 +2133,7 @@ void _mm_sfence(void);
 /// \headerfile 
 ///
 /// \code
-/// void _mm_extract_pi(__m64 a, int n);
+/// int _mm_extract_pi16(__m64 a, int n);
 /// \endcode
 ///
 /// This intrinsic corresponds to the  VPEXTRW / PEXTRW  instruction.
@@ -2157,7 +2157,7 @@ void _mm_sfence(void);
 /// \headerfile 
 ///
 /// \code
-/// void _mm_insert_pi(__m64 a, int d, int n);
+/// __m64 _mm_insert_pi16(__m64 a, int d, int n);
 /// \endcode
 ///
 /// This intrinsic corresponds to the  VPINSRW / PINSRW  instruction.
@@ -2331,8 +2331,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
 /// \brief Conditionally copies the values from each 8-bit element in the first
 ///    64-bit integer vector operand to the specified memory location, as
 ///    specified by the most significant bit in the corresponding element in the
-///    second 64-bit integer vector operand. To minimize caching, the data is
-///    flagged as non-temporal (unlikely to be used again soon).
+///    second 64-bit integer vector operand.
+///
+///    To minimize caching, the data is flagged as non-temporal
+///    (unlikely to be used again soon).
 ///
 /// \headerfile 
 ///
@@ -2678,8 +2680,7 @@ _mm_movelh_ps(__m128 __a, __m128 __b)
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the  CVTPI2PS + \c COMPOSITE 
-///   instruction.
+/// This intrinsic corresponds to the  CVTPI2PS + COMPOSITE  instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of [4 x i16]. The elements of the destination are copied
@@ -2709,8 +2710,7 @@ _mm_cvtpi16_ps(__m64 __a)
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the  CVTPI2PS + \c COMPOSITE 
-///   instruction.
+/// This intrinsic corresponds to the  CVTPI2PS + COMPOSITE  instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of 16-bit unsigned integer values. The elements of the
@@ -2739,8 +2739,7 @@ _mm_cvtpu16_ps(__m64 __a)
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the  CVTPI2PS + \c COMPOSITE 
-///   instruction.
+/// This intrinsic corresponds to the  CVTPI2PS + COMPOSITE  instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of [8 x i8]. The elements of the destination are copied
@@ -2764,8 +2763,7 @@ _mm_cvtpi8_ps(__m64 __a)
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the  CVTPI2PS + \c COMPOSITE 
-///   instruction.
+/// This intrinsic corresponds to the  CVTPI2PS + COMPOSITE  instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of unsigned 8-bit integer values. The elements of the
@@ -2789,8 +2787,7 @@ _mm_cvtpu8_ps(__m64 __a)
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the  CVTPI2PS + \c COMPOSITE 
-///   instruction.
+/// This intrinsic corresponds to the  CVTPI2PS + COMPOSITE  instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of [2 x i32]. The lower elements of the destination are
@@ -2815,16 +2812,16 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 
 /// \brief Converts each single-precision floating-point element of a 128-bit
 ///    floating-point vector of [4 x float] into a 16-bit signed integer, and
-///    packs the results into a 64-bit integer vector of [4 x i16]. If the
-///    floating-point element is NaN or infinity, or if the floating-point
-///    element is greater than 0x7FFFFFFF or less than -0x8000, it is converted
-///    to 0x8000. Otherwise if the floating-point element is greater than
-///    0x7FFF, it is converted to 0x7FFF.
+///    packs the results into a 64-bit integer vector of [4 x i16].
+///
+///    If the floating-point element is NaN or infinity, or if the
+///    floating-point element is greater than 0x7FFFFFFF or less than -0x8000,
+///    it is converted to 0x8000. Otherwise if the floating-point element is
+///    greater than 0x7FFF, it is converted to 0x7FFF.
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the  CVTPS2PI + \c COMPOSITE 
-///   instruction.
+/// This intrinsic corresponds to the  CVTPS2PI + COMPOSITE  instruction.
 ///
 /// \param __a
 ///    A 128-bit floating-point vector of [4 x float].
@@ -2845,16 +2842,16 @@ _mm_cvtps_pi16(__m128 __a)
 /// \brief Converts each single-precision floating-point element of a 128-bit
 ///    floating-point vector of [4 x float] into an 8-bit signed integer, and
 ///    packs the results into the lower 32 bits of a 64-bit integer vector of
-///    [8 x i8]. The upper 32 bits of the vector are set to 0. If the
-///    floating-point element is NaN or infinity, or if the floating-point
-///    element is greater than 0x7FFFFFFF or less than -0x80, it is converted
-///    to 0x80. Otherwise if the floating-point element is greater than 0x7F,
-///    it is converted to 0x7F.
+///    [8 x i8]. The upper 32 bits of the vector are set to 0.
+///
+///    If the floating-point element is NaN or infinity, or if the
+///    floating-point element is greater than 0x7FFFFFFF or less than -0x80, it
+///    is converted to 0x80. Otherwise if the floating-point element is greater
+///    than 0x7F, it is converted to 0x7F.
 ///
 /// \headerfile 
 ///
-/// This intrinsic corresponds to the  CVTPS2PI + \c COMPOSITE 
-///   instruction.
+/// This intrinsic corresponds to the  CVTPS2PI + COMPOSITE  instruction.
 ///
 /// \param __a
 ///    128-bit floating-point vector of [4 x float].
diff --git a/interpreter/llvm/src/tools/clang/lib/Index/IndexBody.cpp b/interpreter/llvm/src/tools/clang/lib/Index/IndexBody.cpp
index 9439c11dac07e..6bbd38102509f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Index/IndexBody.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Index/IndexBody.cpp
@@ -165,6 +165,9 @@ class BodyIndexer : public RecursiveASTVisitor {
     if (!TD)
       return true;
     CXXRecordDecl *RD = TD->getTemplatedDecl();
+    if (!RD->hasDefinition())
+      return true;
+    RD = RD->getDefinition();
     std::vector Symbols =
         RD->lookupDependentName(NameInfo.getName(), Filter);
     // FIXME: Improve overload handling.
@@ -227,7 +230,31 @@ class BodyIndexer : public RecursiveASTVisitor {
       SmallVector Relations;
       addCallRole(Roles, Relations);
       Stmt *Containing = getParentStmt();
-      if (E->isImplicit() || (Containing && isa(Containing)))
+
+      auto IsImplicitProperty = [](const PseudoObjectExpr *POE) -> bool {
+        const auto *E = POE->getSyntacticForm();
+        if (const auto *BinOp = dyn_cast(E))
+          E = BinOp->getLHS();
+        const auto *PRE = dyn_cast(E);
+        if (!PRE)
+          return false;
+        if (PRE->isExplicitProperty())
+          return false;
+        if (const ObjCMethodDecl *Getter = PRE->getImplicitPropertyGetter()) {
+          // Class properties that are explicitly defined using @property
+          // declarations are represented implicitly as there is no ivar for
+          // class properties.
+          if (Getter->isClassMethod() &&
+              Getter->getCanonicalDecl()->findPropertyDecl())
+            return false;
+        }
+        return true;
+      };
+      bool IsPropCall = Containing && isa(Containing);
+      // Implicit property message sends are not 'implicit'.
+      if ((E->isImplicit() || IsPropCall) &&
+          !(IsPropCall &&
+            IsImplicitProperty(cast(Containing))))
         Roles |= (unsigned)SymbolRole::Implicit;
 
       if (isDynamic(E)) {
@@ -243,11 +270,26 @@ class BodyIndexer : public RecursiveASTVisitor {
   }
 
   bool VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *E) {
+    if (E->isClassReceiver())
+      IndexCtx.handleReference(E->getClassReceiver(), E->getReceiverLocation(),
+                               Parent, ParentDC);
     if (E->isExplicitProperty()) {
       SmallVector Relations;
       SymbolRoleSet Roles = getRolesForRef(E, Relations);
       return IndexCtx.handleReference(E->getExplicitProperty(), E->getLocation(),
                                       Parent, ParentDC, Roles, Relations, E);
+    } else if (const ObjCMethodDecl *Getter = E->getImplicitPropertyGetter()) {
+      // Class properties that are explicitly defined using @property
+      // declarations are represented implicitly as there is no ivar for class
+      // properties.
+      if (Getter->isClassMethod()) {
+        if (const auto *PD = Getter->getCanonicalDecl()->findPropertyDecl()) {
+          SmallVector Relations;
+          SymbolRoleSet Roles = getRolesForRef(E, Relations);
+          return IndexCtx.handleReference(PD, E->getLocation(), Parent,
+                                          ParentDC, Roles, Relations, E);
+        }
+      }
     }
 
     // No need to do a handleReference for the objc method, because there will
diff --git a/interpreter/llvm/src/tools/clang/lib/Index/IndexDecl.cpp b/interpreter/llvm/src/tools/clang/lib/Index/IndexDecl.cpp
index e55dea8e7761b..c5230c0f9acf8 100644
--- a/interpreter/llvm/src/tools/clang/lib/Index/IndexDecl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Index/IndexDecl.cpp
@@ -63,6 +63,17 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
     case TemplateArgument::Type:
       IndexCtx.indexTypeSourceInfo(LocInfo.getAsTypeSourceInfo(), Parent, DC);
       break;
+    case TemplateArgument::Template:
+    case TemplateArgument::TemplateExpansion:
+      IndexCtx.indexNestedNameSpecifierLoc(TALoc.getTemplateQualifierLoc(),
+                                           Parent, DC);
+      if (const TemplateDecl *TD = TALoc.getArgument()
+                                       .getAsTemplateOrTemplatePattern()
+                                       .getAsTemplateDecl()) {
+        if (const NamedDecl *TTD = TD->getTemplatedDecl())
+          IndexCtx.handleReference(TTD, TALoc.getTemplateNameLoc(), Parent, DC);
+      }
+      break;
     default:
       break;
     }
@@ -98,6 +109,17 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
           }
         }
       }
+    } else {
+      // Index the default parameter value for function definitions.
+      if (const auto *FD = dyn_cast(D)) {
+        if (FD->isThisDeclarationADefinition()) {
+          for (const auto *PV : FD->parameters()) {
+            if (PV->hasDefaultArg() && !PV->hasUninstantiatedDefaultArg() &&
+                !PV->hasUnparsedDefaultArg())
+              IndexCtx.indexBody(PV->getDefaultArg(), D);
+          }
+        }
+      }
     }
   }
 
@@ -206,9 +228,6 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
   }
 
   bool VisitFunctionDecl(const FunctionDecl *D) {
-    if (D->isDeleted())
-      return true;
-
     SymbolRoleSet Roles{};
     SmallVector Relations;
     if (auto *CXXMD = dyn_cast(D)) {
@@ -274,6 +293,12 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
     return true;
   }
 
+  bool VisitDecompositionDecl(const DecompositionDecl *D) {
+    for (const auto *Binding : D->bindings())
+      TRY_DECL(Binding, IndexCtx.handleDecl(Binding));
+    return Base::VisitDecompositionDecl(D);
+  }
+
   bool VisitFieldDecl(const FieldDecl *D) {
     SmallVector Relations;
     gatherTemplatePseudoOverrides(D, Relations);
@@ -326,9 +351,11 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
         IndexCtx.indexTagDecl(D, Relations);
       } else {
         auto *Parent = dyn_cast(D->getDeclContext());
+        SmallVector Relations;
+        gatherTemplatePseudoOverrides(D, Relations);
         return IndexCtx.handleReference(D, D->getLocation(), Parent,
                                         D->getLexicalDeclContext(),
-                                        SymbolRoleSet());
+                                        SymbolRoleSet(), Relations);
       }
     }
     return true;
@@ -544,6 +571,14 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
     return true;
   }
 
+  bool VisitNamespaceAliasDecl(const NamespaceAliasDecl *D) {
+    TRY_DECL(D, IndexCtx.handleDecl(D));
+    IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), D);
+    IndexCtx.handleReference(D->getAliasedNamespace(), D->getTargetNameLoc(), D,
+                             D->getLexicalDeclContext());
+    return true;
+  }
+
   bool VisitUsingDecl(const UsingDecl *D) {
     const DeclContext *DC = D->getDeclContext()->getRedeclContext();
     const NamedDecl *Parent = dyn_cast(DC);
@@ -560,8 +595,12 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
     const DeclContext *DC = D->getDeclContext()->getRedeclContext();
     const NamedDecl *Parent = dyn_cast(DC);
 
-    IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), Parent,
-                                         D->getLexicalDeclContext());
+    // NNS for the local 'using namespace' directives is visited by the body
+    // visitor.
+    if (!D->getParentFunctionOrMethod())
+      IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), Parent,
+                                           D->getLexicalDeclContext());
+
     return IndexCtx.handleReference(D->getNominatedNamespaceAsWritten(),
                                     D->getLocation(), Parent,
                                     D->getLexicalDeclContext(),
@@ -572,26 +611,61 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
                                            ClassTemplateSpecializationDecl *D) {
     // FIXME: Notify subsequent callbacks if info comes from implicit
     // instantiation.
-    if (D->isThisDeclarationADefinition()) {
-      llvm::PointerUnion
-          Template = D->getSpecializedTemplateOrPartial();
-      const Decl *SpecializationOf =
-          Template.is()
-              ? (Decl *)Template.get()
-              : Template.get();
-      IndexCtx.indexTagDecl(
-          D, SymbolRelation(SymbolRoleSet(SymbolRole::RelationSpecializationOf),
-                            SpecializationOf));
-    }
+    llvm::PointerUnion
+        Template = D->getSpecializedTemplateOrPartial();
+    const Decl *SpecializationOf =
+        Template.is()
+            ? (Decl *)Template.get()
+            : Template.get();
+    if (!D->isThisDeclarationADefinition())
+      IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), D);
+    IndexCtx.indexTagDecl(
+        D, SymbolRelation(SymbolRoleSet(SymbolRole::RelationSpecializationOf),
+                          SpecializationOf));
     if (TypeSourceInfo *TSI = D->getTypeAsWritten())
       IndexCtx.indexTypeSourceInfo(TSI, /*Parent=*/nullptr,
                                    D->getLexicalDeclContext());
     return true;
   }
 
+  static bool shouldIndexTemplateParameterDefaultValue(const NamedDecl *D) {
+    if (!D)
+      return false;
+    // We want to index the template parameters only once when indexing the
+    // canonical declaration.
+    if (const auto *FD = dyn_cast(D))
+      return FD->getCanonicalDecl() == FD;
+    else if (const auto *TD = dyn_cast(D))
+      return TD->getCanonicalDecl() == TD;
+    else if (const auto *VD = dyn_cast(D))
+      return VD->getCanonicalDecl() == VD;
+    return true;
+  }
+
   bool VisitTemplateDecl(const TemplateDecl *D) {
     // FIXME: Template parameters.
+
+    // Index the default values for the template parameters.
+    const NamedDecl *Parent = D->getTemplatedDecl();
+    if (D->getTemplateParameters() &&
+        shouldIndexTemplateParameterDefaultValue(Parent)) {
+      const TemplateParameterList *Params = D->getTemplateParameters();
+      for (const NamedDecl *TP : *Params) {
+        if (const auto *TTP = dyn_cast(TP)) {
+          if (TTP->hasDefaultArgument())
+            IndexCtx.indexTypeSourceInfo(TTP->getDefaultArgumentInfo(), Parent);
+        } else if (const auto *NTTP = dyn_cast(TP)) {
+          if (NTTP->hasDefaultArgument())
+            IndexCtx.indexBody(NTTP->getDefaultArgument(), Parent);
+        } else if (const auto *TTPD = dyn_cast(TP)) {
+          if (TTPD->hasDefaultArgument())
+            handleTemplateArgumentLoc(TTPD->getDefaultArgument(), Parent,
+                                      /*DC=*/nullptr);
+        }
+      }
+    }
+
     return Visit(D->getTemplatedDecl());
   }
 
@@ -616,6 +690,13 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
   bool VisitImportDecl(const ImportDecl *D) {
     return IndexCtx.importedModule(D);
   }
+
+  bool VisitStaticAssertDecl(const StaticAssertDecl *D) {
+    IndexCtx.indexBody(D->getAssertExpr(),
+                       dyn_cast(D->getDeclContext()),
+                       D->getLexicalDeclContext());
+    return true;
+  }
 };
 
 } // anonymous namespace
diff --git a/interpreter/llvm/src/tools/clang/lib/Index/IndexSymbol.cpp b/interpreter/llvm/src/tools/clang/lib/Index/IndexSymbol.cpp
index 0bfa19346b4ef..0dc3720208caf 100644
--- a/interpreter/llvm/src/tools/clang/lib/Index/IndexSymbol.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Index/IndexSymbol.cpp
@@ -61,17 +61,21 @@ bool index::isFunctionLocalSymbol(const Decl *D) {
   if (isa(D))
     return true;
 
+  if (isa(D))
+    return false;
   if (!D->getParentFunctionOrMethod())
     return false;
 
   if (const NamedDecl *ND = dyn_cast(D)) {
     switch (ND->getFormalLinkage()) {
       case NoLinkage:
-      case VisibleNoLinkage:
       case InternalLinkage:
         return true;
+      case VisibleNoLinkage:
       case UniqueExternalLinkage:
+      case ModuleInternalLinkage:
         llvm_unreachable("Not a sema linkage");
+      case ModuleLinkage:
       case ExternalLinkage:
         return false;
     }
@@ -299,6 +303,10 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
       Info.Kind = SymbolKind::TypeAlias;
       Info.Lang = SymbolLanguage::CXX;
       break;
+    case Decl::Binding:
+      Info.Kind = SymbolKind::Variable;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
     default:
       break;
     }
@@ -318,16 +326,7 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
   if (Info.Properties & (unsigned)SymbolProperty::Generic)
     Info.Lang = SymbolLanguage::CXX;
 
-  auto getExternalSymAttr = [](const Decl *D) -> ExternalSourceSymbolAttr* {
-    if (auto *attr = D->getAttr())
-      return attr;
-    if (auto *dcd = dyn_cast(D->getDeclContext())) {
-      if (auto *attr = dcd->getAttr())
-        return attr;
-    }
-    return nullptr;
-  };
-  if (auto *attr = getExternalSymAttr(D)) {
+  if (auto *attr = D->getExternalSourceSymbolAttr()) {
     if (attr->getLanguage() == "Swift")
       Info.Lang = SymbolLanguage::Swift;
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/Index/IndexTypeSourceInfo.cpp b/interpreter/llvm/src/tools/clang/lib/Index/IndexTypeSourceInfo.cpp
index 7e2041c6cb0e7..ae27ebe6ea4c0 100644
--- a/interpreter/llvm/src/tools/clang/lib/Index/IndexTypeSourceInfo.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Index/IndexTypeSourceInfo.cpp
@@ -157,6 +157,9 @@ class TypeIndexer : public RecursiveASTVisitor {
     if (!TD)
       return true;
     CXXRecordDecl *RD = TD->getTemplatedDecl();
+    if (!RD->hasDefinition())
+      return true;
+    RD = RD->getDefinition();
     DeclarationName Name(DNT->getIdentifier());
     std::vector Symbols = RD->lookupDependentName(
         Name, [](const NamedDecl *ND) { return isa(ND); });
@@ -209,7 +212,7 @@ void IndexingContext::indexNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
 
   if (!DC)
     DC = Parent->getLexicalDeclContext();
-  SourceLocation Loc = NNS.getSourceRange().getBegin();
+  SourceLocation Loc = NNS.getLocalBeginLoc();
 
   switch (NNS.getNestedNameSpecifier()->getKind()) {
   case NestedNameSpecifier::Identifier:
diff --git a/interpreter/llvm/src/tools/clang/lib/Index/IndexingAction.cpp b/interpreter/llvm/src/tools/clang/lib/Index/IndexingAction.cpp
index cac24d4b9c4c1..84d31200bab43 100644
--- a/interpreter/llvm/src/tools/clang/lib/Index/IndexingAction.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Index/IndexingAction.cpp
@@ -177,6 +177,18 @@ void index::indexASTUnit(ASTUnit &Unit,
   DataConsumer->finish();
 }
 
+void index::indexTopLevelDecls(ASTContext &Ctx, ArrayRef Decls,
+                               std::shared_ptr DataConsumer,
+                               IndexingOptions Opts) {
+  IndexingContext IndexCtx(Opts, *DataConsumer);
+  IndexCtx.setASTContext(Ctx);
+
+  DataConsumer->initialize(Ctx);
+  for (const Decl *D : Decls)
+    IndexCtx.indexTopLevelDecl(D);
+  DataConsumer->finish();
+}
+
 void index::indexModuleFile(serialization::ModuleFile &Mod,
                             ASTReader &Reader,
                             std::shared_ptr DataConsumer,
diff --git a/interpreter/llvm/src/tools/clang/lib/Index/IndexingContext.cpp b/interpreter/llvm/src/tools/clang/lib/Index/IndexingContext.cpp
index 709a23657b078..addee691e8046 100644
--- a/interpreter/llvm/src/tools/clang/lib/Index/IndexingContext.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Index/IndexingContext.cpp
@@ -124,6 +124,16 @@ bool IndexingContext::isTemplateImplicitInstantiation(const Decl *D) {
     TKind = FD->getTemplateSpecializationKind();
   } else if (auto *VD = dyn_cast(D)) {
     TKind = VD->getTemplateSpecializationKind();
+  } else if (const auto *RD = dyn_cast(D)) {
+    if (RD->getInstantiatedFromMemberClass())
+      TKind = RD->getTemplateSpecializationKind();
+  } else if (const auto *ED = dyn_cast(D)) {
+    if (ED->getInstantiatedFromMemberEnum())
+      TKind = ED->getTemplateSpecializationKind();
+  } else if (isa(D) || isa(D) ||
+             isa(D)) {
+    if (const auto *Parent = dyn_cast(D->getDeclContext()))
+      return isTemplateImplicitInstantiation(Parent);
   }
   switch (TKind) {
     case TSK_Undeclared:
@@ -151,6 +161,16 @@ bool IndexingContext::shouldIgnoreIfImplicit(const Decl *D) {
   return true;
 }
 
+static const CXXRecordDecl *
+getDeclContextForTemplateInstationPattern(const Decl *D) {
+  if (const auto *CTSD =
+          dyn_cast(D->getDeclContext()))
+    return CTSD->getTemplateInstantiationPattern();
+  else if (const auto *RD = dyn_cast(D->getDeclContext()))
+    return RD->getInstantiatedFromMemberClass();
+  return nullptr;
+}
+
 static const Decl *adjustTemplateImplicitInstantiation(const Decl *D) {
   if (const ClassTemplateSpecializationDecl *
       SD = dyn_cast(D)) {
@@ -159,6 +179,28 @@ static const Decl *adjustTemplateImplicitInstantiation(const Decl *D) {
     return FD->getTemplateInstantiationPattern();
   } else if (auto *VD = dyn_cast(D)) {
     return VD->getTemplateInstantiationPattern();
+  } else if (const auto *RD = dyn_cast(D)) {
+    return RD->getInstantiatedFromMemberClass();
+  } else if (const auto *ED = dyn_cast(D)) {
+    return ED->getInstantiatedFromMemberEnum();
+  } else if (isa(D) || isa(D)) {
+    const auto *ND = cast(D);
+    if (const CXXRecordDecl *Pattern =
+            getDeclContextForTemplateInstationPattern(ND)) {
+      for (const NamedDecl *BaseND : Pattern->lookup(ND->getDeclName())) {
+        if (BaseND->isImplicit())
+          continue;
+        if (BaseND->getKind() == ND->getKind())
+          return BaseND;
+      }
+    }
+  } else if (const auto *ECD = dyn_cast(D)) {
+    if (const auto *ED = dyn_cast(ECD->getDeclContext())) {
+      if (const EnumDecl *Pattern = ED->getInstantiatedFromMemberEnum()) {
+        for (const NamedDecl *BaseECD : Pattern->lookup(ECD->getDeclName()))
+          return BaseECD;
+      }
+    }
   }
   return nullptr;
 }
@@ -187,6 +229,12 @@ static bool isDeclADefinition(const Decl *D, const DeclContext *ContainerDC, AST
   return false;
 }
 
+/// Whether the given NamedDecl should be skipped because it has no name.
+static bool shouldSkipNamelessDecl(const NamedDecl *ND) {
+  return ND->getDeclName().isEmpty() && !isa(ND) &&
+         !isa(ND);
+}
+
 static const Decl *adjustParent(const Decl *Parent) {
   if (!Parent)
     return nullptr;
@@ -201,8 +249,8 @@ static const Decl *adjustParent(const Decl *Parent) {
     } else if (auto RD = dyn_cast(Parent)) {
       if (RD->isAnonymousStructOrUnion())
         continue;
-    } else if (auto FD = dyn_cast(Parent)) {
-      if (FD->getDeclName().isEmpty())
+    } else if (auto ND = dyn_cast(Parent)) {
+      if (shouldSkipNamelessDecl(ND))
         continue;
     }
     return Parent;
@@ -212,8 +260,10 @@ static const Decl *adjustParent(const Decl *Parent) {
 static const Decl *getCanonicalDecl(const Decl *D) {
   D = D->getCanonicalDecl();
   if (auto TD = dyn_cast(D)) {
-    D = TD->getTemplatedDecl();
-    assert(D->isCanonicalDecl());
+    if (auto TTD = TD->getTemplatedDecl()) {
+      D = TTD;
+      assert(D->isCanonicalDecl());
+    }
   }
 
   return D;
@@ -273,9 +323,7 @@ bool IndexingContext::handleDeclOccurrence(const Decl *D, SourceLocation Loc,
                                            const DeclContext *ContainerDC) {
   if (D->isImplicit() && !isa(D))
     return true;
-  if (!isa(D) ||
-      (cast(D)->getDeclName().isEmpty() &&
-       !isa(D) && !isa(D)))
+  if (!isa(D) || shouldSkipNamelessDecl(cast(D)))
     return true;
 
   SourceManager &SM = Ctx->getSourceManager();
diff --git a/interpreter/llvm/src/tools/clang/lib/Index/USRGeneration.cpp b/interpreter/llvm/src/tools/clang/lib/Index/USRGeneration.cpp
index 044edf715fc2b..21054b099a8e7 100644
--- a/interpreter/llvm/src/tools/clang/lib/Index/USRGeneration.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Index/USRGeneration.cpp
@@ -49,7 +49,7 @@ static bool printLoc(llvm::raw_ostream &OS, SourceLocation Loc,
 static StringRef GetExternalSourceContainer(const NamedDecl *D) {
   if (!D)
     return StringRef();
-  if (auto *attr = D->getAttr()) {
+  if (auto *attr = D->getExternalSourceSymbolAttr()) {
     return attr->getDefinedIn();
   }
   return StringRef();
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/HeaderSearch.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/HeaderSearch.cpp
index 0d706088083f2..4182720bfe93c 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/HeaderSearch.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/HeaderSearch.cpp
@@ -1120,6 +1120,8 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
   auto TryEnterImported = [&](void) -> bool {
     if (!ModulesEnabled)
       return false;
+    // Ensure FileInfo bits are up to date.
+    ModMap.resolveHeaderDirectives(File);
     // Modules with builtins are special; multiple modules use builtins as
     // modular headers, example:
     //
@@ -1332,14 +1334,27 @@ static const FileEntry *getPrivateModuleMap(const FileEntry *File,
 }
 
 bool HeaderSearch::loadModuleMapFile(const FileEntry *File, bool IsSystem,
-                                     FileID ID, unsigned *Offset) {
+                                     FileID ID, unsigned *Offset,
+                                     StringRef OriginalModuleMapFile) {
   // Find the directory for the module. For frameworks, that may require going
   // up from the 'Modules' directory.
   const DirectoryEntry *Dir = nullptr;
   if (getHeaderSearchOpts().ModuleMapFileHomeIsCwd)
     Dir = FileMgr.getDirectory(".");
   else {
-    Dir = File->getDir();
+    if (!OriginalModuleMapFile.empty()) {
+      // We're building a preprocessed module map. Find or invent the directory
+      // that it originally occupied.
+      Dir = FileMgr.getDirectory(
+          llvm::sys::path::parent_path(OriginalModuleMapFile));
+      if (!Dir) {
+        auto *FakeFile = FileMgr.getVirtualFile(OriginalModuleMapFile, 0, 0);
+        Dir = FakeFile->getDir();
+      }
+    } else {
+      Dir = File->getDir();
+    }
+
     StringRef DirName(Dir->getName());
     if (llvm::sys::path::filename(DirName) == "Modules") {
       DirName = llvm::sys::path::parent_path(DirName);
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/Lexer.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/Lexer.cpp
index ada4bcd9393c1..dbe6b4db94e70 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/Lexer.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/Lexer.cpp
@@ -19,6 +19,7 @@
 #include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Compiler.h"
@@ -43,6 +44,8 @@ using namespace clang;
 
 /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
 bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
+  if (isAnnotation())
+    return false;
   if (IdentifierInfo *II = getIdentifierInfo())
     return II->getObjCKeywordID() == objcKey;
   return false;
@@ -50,6 +53,8 @@ bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
 
 /// getObjCKeywordID - Return the ObjC keyword kind.
 tok::ObjCKeywordKind Token::getObjCKeywordID() const {
+  if (isAnnotation())
+    return tok::objc_not_keyword;
   IdentifierInfo *specId = getIdentifierInfo();
   return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
 }
@@ -546,8 +551,6 @@ namespace {
 
   enum PreambleDirectiveKind {
     PDK_Skipped,
-    PDK_StartIf,
-    PDK_EndIf,
     PDK_Unknown
   };
 
@@ -570,8 +573,6 @@ std::pair Lexer::ComputePreamble(StringRef Buffer,
 
   bool InPreprocessorDirective = false;
   Token TheTok;
-  Token IfStartTok;
-  unsigned IfCount = 0;
   SourceLocation ActiveCommentLoc;
 
   unsigned MaxLineOffset = 0;
@@ -654,33 +655,18 @@ std::pair Lexer::ComputePreamble(StringRef Buffer,
               .Case("sccs", PDK_Skipped)
               .Case("assert", PDK_Skipped)
               .Case("unassert", PDK_Skipped)
-              .Case("if", PDK_StartIf)
-              .Case("ifdef", PDK_StartIf)
-              .Case("ifndef", PDK_StartIf)
+              .Case("if", PDK_Skipped)
+              .Case("ifdef", PDK_Skipped)
+              .Case("ifndef", PDK_Skipped)
               .Case("elif", PDK_Skipped)
               .Case("else", PDK_Skipped)
-              .Case("endif", PDK_EndIf)
+              .Case("endif", PDK_Skipped)
               .Default(PDK_Unknown);
 
         switch (PDK) {
         case PDK_Skipped:
           continue;
 
-        case PDK_StartIf:
-          if (IfCount == 0)
-            IfStartTok = HashTok;
-            
-          ++IfCount;
-          continue;
-            
-        case PDK_EndIf:
-          // Mismatched #endif. The preamble ends here.
-          if (IfCount == 0)
-            break;
-
-          --IfCount;
-          continue;
-            
         case PDK_Unknown:
           // We don't know what this directive is; stop at the '#'.
           break;
@@ -701,16 +687,13 @@ std::pair Lexer::ComputePreamble(StringRef Buffer,
   } while (true);
   
   SourceLocation End;
-  if (IfCount)
-    End = IfStartTok.getLocation();
-  else if (ActiveCommentLoc.isValid())
+  if (ActiveCommentLoc.isValid())
     End = ActiveCommentLoc; // don't truncate a decl comment.
   else
     End = TheTok.getLocation();
 
   return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(),
-                        IfCount? IfStartTok.isAtStartOfLine()
-                               : TheTok.isAtStartOfLine());
+                        TheTok.isAtStartOfLine());
 }
 
 /// AdvanceToTokenCharacter - Given a location that specifies the start of a
@@ -2520,6 +2503,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl *Result) {
         break;
       }
       // FALL THROUGH.
+      LLVM_FALLTHROUGH;
     case '\r':
     case '\n':
       // Okay, we found the end of the line. First, back up past the \0, \r, \n.
@@ -2570,6 +2554,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
     return true;
   }
   
+  if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {
+    PP->setRecordedPreambleConditionalStack(ConditionalStack);
+    ConditionalStack.clear();
+  }
+
   // Issue diagnostics for unterminated #if and missing newline.
 
   // If we are in a #if directive, emit an error.
@@ -2766,7 +2755,7 @@ static const char *findPlaceholderEnd(const char *CurPtr,
 
 bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) {
   assert(CurPtr[-1] == '<' && CurPtr[0] == '#' && "Not a placeholder!");
-  if (!PP || LexingRawMode)
+  if (!PP || !PP->getPreprocessorOpts().LexEditorPlaceholders || LexingRawMode)
     return false;
   const char *End = findPlaceholderEnd(CurPtr + 1, BufferEnd);
   if (!End)
@@ -3264,6 +3253,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
       return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
                              tok::wide_char_constant);
     // FALL THROUGH, treating L like the start of an identifier.
+    LLVM_FALLTHROUGH;
 
   // C99 6.4.2: Identifiers.
   case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/LiteralSupport.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/LiteralSupport.cpp
index fbfd3fe5cce0f..a598a467816a8 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/LiteralSupport.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/LiteralSupport.cpp
@@ -456,10 +456,17 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
   // Finally, we write the bytes into ResultBuf.
   ResultBuf += bytesToWrite;
   switch (bytesToWrite) { // note: everything falls through.
-  case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
-  case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
-  case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
-  case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
+  case 4:
+    *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+    LLVM_FALLTHROUGH;
+  case 3:
+    *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+    LLVM_FALLTHROUGH;
+  case 2:
+    *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+    LLVM_FALLTHROUGH;
+  case 1:
+    *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
   }
   // Update the buffer.
   ResultBuf += bytesToWrite;
@@ -563,7 +570,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   // Parse the suffix.  At this point we can classify whether we have an FP or
   // integer constant.
   bool isFPConstant = isFloatingLiteral();
-  const char *ImaginarySuffixLoc = nullptr;
 
   // Loop over all of the characters of the suffix.  If we see something bad,
   // we break out of the loop.
@@ -660,7 +666,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
     case 'J':
       if (isImaginary) break;   // Cannot be repeated.
       isImaginary = true;
-      ImaginarySuffixLoc = s;
       continue;  // Success.
     }
     // If we reached here, there was an error or a ud-suffix.
@@ -694,8 +699,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   }
 
   if (isImaginary) {
-    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc,
-                                       ImaginarySuffixLoc - ThisTokBegin),
+    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin),
             diag::ext_imaginary_constant);
   }
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/MacroArgs.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/MacroArgs.cpp
index 1c1979d8e83df..f791d8d4bacc2 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/MacroArgs.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/MacroArgs.cpp
@@ -44,20 +44,22 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,
       // Otherwise, use the best fit.
       ClosestMatch = (*Entry)->NumUnexpArgTokens;
     }
-  
+
   MacroArgs *Result;
   if (!ResultEnt) {
     // Allocate memory for a MacroArgs object with the lexer tokens at the end.
-    Result = (MacroArgs*)malloc(sizeof(MacroArgs) + 
-                                UnexpArgTokens.size() * sizeof(Token));
+    Result = (MacroArgs *)malloc(sizeof(MacroArgs) +
+                                 UnexpArgTokens.size() * sizeof(Token));
     // Construct the MacroArgs object.
-    new (Result) MacroArgs(UnexpArgTokens.size(), VarargsElided);
+    new (Result)
+        MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
   } else {
     Result = *ResultEnt;
     // Unlink this node from the preprocessors singly linked list.
     *ResultEnt = Result->ArgCache;
     Result->NumUnexpArgTokens = UnexpArgTokens.size();
     Result->VarargsElided = VarargsElided;
+    Result->NumMacroArgs = MI->getNumParams();
   }
 
   // Copy the actual unexpanded tokens to immediately after the result ptr.
@@ -146,11 +148,11 @@ bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
 const std::vector &
 MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, 
                              Preprocessor &PP) {
-  assert(Arg < MI->getNumArgs() && "Invalid argument number!");
+  assert(Arg < MI->getNumParams() && "Invalid argument number!");
 
   // If we have already computed this, return it.
-  if (PreExpArgTokens.size() < MI->getNumArgs())
-    PreExpArgTokens.resize(MI->getNumArgs());
+  if (PreExpArgTokens.size() < MI->getNumParams())
+    PreExpArgTokens.resize(MI->getNumParams());
   
   std::vector &Result = PreExpArgTokens[Arg];
   if (!Result.empty()) return Result;
@@ -298,12 +300,10 @@ const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
                                                Preprocessor &PP,
                                                SourceLocation ExpansionLocStart,
                                                SourceLocation ExpansionLocEnd) {
-  assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
-  if (StringifiedArgs.empty()) {
-    StringifiedArgs.resize(getNumArguments());
-    memset((void*)&StringifiedArgs[0], 0,
-           sizeof(StringifiedArgs[0])*getNumArguments());
-  }
+  assert(ArgNo < getNumMacroArguments() && "Invalid argument number!");
+  if (StringifiedArgs.empty())
+    StringifiedArgs.resize(getNumMacroArguments(), {});
+
   if (StringifiedArgs[ArgNo].isNot(tok::string_literal))
     StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP,
                                                /*Charify=*/false,
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/MacroInfo.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/MacroInfo.cpp
index 1e5deeb1919b5..6dc7841bc160b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/MacroInfo.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/MacroInfo.cpp
@@ -17,8 +17,8 @@ using namespace clang;
 
 MacroInfo::MacroInfo(SourceLocation DefLoc)
   : Location(DefLoc),
-    ArgumentList(nullptr),
-    NumArguments(0),
+    ParameterList(nullptr),
+    NumParameters(0),
     IsDefinitionLengthCached(false),
     IsFunctionLike(false),
     IsC99Varargs(false),
@@ -74,7 +74,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
 
   // Check # tokens in replacement, number of args, and various flags all match.
   if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
-      getNumArgs() != Other.getNumArgs() ||
+      getNumParams() != Other.getNumParams() ||
       isFunctionLike() != Other.isFunctionLike() ||
       isC99Varargs() != Other.isC99Varargs() ||
       isGNUVarargs() != Other.isGNUVarargs())
@@ -82,7 +82,8 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
 
   if (Lexically) {
     // Check arguments.
-    for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
+    for (param_iterator I = param_begin(), OI = Other.param_begin(),
+                        E = param_end();
          I != E; ++I, ++OI)
       if (*I != *OI) return false;
   }
@@ -109,10 +110,10 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
         return false;
       // With syntactic equivalence the parameter names can be different as long
       // as they are used in the same place.
-      int AArgNum = getArgumentNum(A.getIdentifierInfo());
+      int AArgNum = getParameterNum(A.getIdentifierInfo());
       if (AArgNum == -1)
         return false;
-      if (AArgNum != Other.getArgumentNum(B.getIdentifierInfo()))
+      if (AArgNum != Other.getParameterNum(B.getIdentifierInfo()))
         return false;
       continue;
     }
@@ -141,12 +142,12 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const {
   Out << "\n    #define ";
   if (IsFunctionLike) {
     Out << "(";
-    for (unsigned I = 0; I != NumArguments; ++I) {
+    for (unsigned I = 0; I != NumParameters; ++I) {
       if (I) Out << ", ";
-      Out << ArgumentList[I]->getName();
+      Out << ParameterList[I]->getName();
     }
     if (IsC99Varargs || IsGNUVarargs) {
-      if (NumArguments && IsC99Varargs) Out << ", ";
+      if (NumParameters && IsC99Varargs) Out << ", ";
       Out << "...";
     }
     Out << ")";
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/ModuleMap.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/ModuleMap.cpp
index 6f44dc757e850..40f78ce25ceb3 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/ModuleMap.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/ModuleMap.cpp
@@ -36,6 +36,37 @@
 #endif
 using namespace clang;
 
+Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) {
+  switch ((int)Role) {
+  default: llvm_unreachable("unknown header role");
+  case NormalHeader:
+    return Module::HK_Normal;
+  case PrivateHeader:
+    return Module::HK_Private;
+  case TextualHeader:
+    return Module::HK_Textual;
+  case PrivateHeader | TextualHeader:
+    return Module::HK_PrivateTextual;
+  }
+}
+
+ModuleMap::ModuleHeaderRole
+ModuleMap::headerKindToRole(Module::HeaderKind Kind) {
+  switch ((int)Kind) {
+  case Module::HK_Normal:
+    return NormalHeader;
+  case Module::HK_Private:
+    return PrivateHeader;
+  case Module::HK_Textual:
+    return TextualHeader;
+  case Module::HK_PrivateTextual:
+    return ModuleHeaderRole(PrivateHeader | TextualHeader);
+  case Module::HK_Excluded:
+    llvm_unreachable("unexpected header kind");
+  }
+  llvm_unreachable("unknown header kind");
+}
+
 Module::ExportDecl 
 ModuleMap::resolveExport(Module *Mod, 
                          const Module::UnresolvedExportDecl &Unresolved,
@@ -84,6 +115,143 @@ Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod,
   return Context;
 }
 
+/// \brief Append to \p Paths the set of paths needed to get to the 
+/// subframework in which the given module lives.
+static void appendSubframeworkPaths(Module *Mod,
+                                    SmallVectorImpl &Path) {
+  // Collect the framework names from the given module to the top-level module.
+  SmallVector Paths;
+  for (; Mod; Mod = Mod->Parent) {
+    if (Mod->IsFramework)
+      Paths.push_back(Mod->Name);
+  }
+  
+  if (Paths.empty())
+    return;
+  
+  // Add Frameworks/Name.framework for each subframework.
+  for (unsigned I = Paths.size() - 1; I != 0; --I)
+    llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework");
+}
+
+const FileEntry *
+ModuleMap::findHeader(Module *M,
+                      const Module::UnresolvedHeaderDirective &Header,
+                      SmallVectorImpl &RelativePathName) {
+  auto GetFile = [&](StringRef Filename) -> const FileEntry * {
+    auto *File = SourceMgr.getFileManager().getFile(Filename);
+    if (!File ||
+        (Header.Size && File->getSize() != *Header.Size) ||
+        (Header.ModTime && File->getModificationTime() != *Header.ModTime))
+      return nullptr;
+    return File;
+  };
+
+  if (llvm::sys::path::is_absolute(Header.FileName)) {
+    RelativePathName.clear();
+    RelativePathName.append(Header.FileName.begin(), Header.FileName.end());
+    return GetFile(Header.FileName);
+  }
+
+  // Search for the header file within the module's home directory.
+  auto *Directory = M->Directory;
+  SmallString<128> FullPathName(Directory->getName());
+  unsigned FullPathLength = FullPathName.size();
+
+  if (M->isPartOfFramework()) {
+    appendSubframeworkPaths(M, RelativePathName);
+    unsigned RelativePathLength = RelativePathName.size();
+
+    // Check whether this file is in the public headers.
+    llvm::sys::path::append(RelativePathName, "Headers", Header.FileName);
+    llvm::sys::path::append(FullPathName, RelativePathName);
+    if (auto *File = GetFile(FullPathName))
+      return File;
+
+    // Check whether this file is in the private headers.
+    // Ideally, private modules in the form 'FrameworkName.Private' should
+    // be defined as 'module FrameworkName.Private', and not as
+    // 'framework module FrameworkName.Private', since a 'Private.Framework'
+    // does not usually exist. However, since both are currently widely used
+    // for private modules, make sure we find the right path in both cases.
+    if (M->IsFramework && M->Name == "Private")
+      RelativePathName.clear();
+    else
+      RelativePathName.resize(RelativePathLength);
+    FullPathName.resize(FullPathLength);
+    llvm::sys::path::append(RelativePathName, "PrivateHeaders",
+                            Header.FileName);
+    llvm::sys::path::append(FullPathName, RelativePathName);
+    return GetFile(FullPathName);
+  }
+
+  // Lookup for normal headers.
+  llvm::sys::path::append(RelativePathName, Header.FileName);
+  llvm::sys::path::append(FullPathName, RelativePathName);
+  return GetFile(FullPathName);
+}
+
+void ModuleMap::resolveHeader(Module *Mod,
+                              const Module::UnresolvedHeaderDirective &Header) {
+  SmallString<128> RelativePathName;
+  if (const FileEntry *File = findHeader(Mod, Header, RelativePathName)) {
+    if (Header.IsUmbrella) {
+      const DirectoryEntry *UmbrellaDir = File->getDir();
+      if (Module *UmbrellaMod = UmbrellaDirs[UmbrellaDir])
+        Diags.Report(Header.FileNameLoc, diag::err_mmap_umbrella_clash)
+          << UmbrellaMod->getFullModuleName();
+      else
+        // Record this umbrella header.
+        setUmbrellaHeader(Mod, File, RelativePathName.str());
+    } else {
+      Module::Header H = {RelativePathName.str(), File};
+      if (Header.Kind == Module::HK_Excluded)
+        excludeHeader(Mod, H);
+      else
+        addHeader(Mod, H, headerKindToRole(Header.Kind));
+    }
+  } else if (Header.HasBuiltinHeader && !Header.Size && !Header.ModTime) {
+    // There's a builtin header but no corresponding on-disk header. Assume
+    // this was supposed to modularize the builtin header alone.
+  } else if (Header.Kind == Module::HK_Excluded) {
+    // Ignore missing excluded header files. They're optional anyway.
+  } else {
+    // If we find a module that has a missing header, we mark this module as
+    // unavailable and store the header directive for displaying diagnostics.
+    Mod->MissingHeaders.push_back(Header);
+    // A missing header with stat information doesn't make the module
+    // unavailable; this keeps our behavior consistent as headers are lazily
+    // resolved. (Such a module still can't be built though, except from
+    // preprocessed source.)
+    if (!Header.Size && !Header.ModTime)
+      Mod->markUnavailable();
+  }
+}
+
+bool ModuleMap::resolveAsBuiltinHeader(
+    Module *Mod, const Module::UnresolvedHeaderDirective &Header) {
+  if (Header.Kind == Module::HK_Excluded ||
+      llvm::sys::path::is_absolute(Header.FileName) ||
+      Mod->isPartOfFramework() || !Mod->IsSystem || Header.IsUmbrella ||
+      !BuiltinIncludeDir || BuiltinIncludeDir == Mod->Directory ||
+      !isBuiltinHeader(Header.FileName))
+    return false;
+
+  // This is a system module with a top-level header. This header
+  // may have a counterpart (or replacement) in the set of headers
+  // supplied by Clang. Find that builtin header.
+  SmallString<128> Path;
+  llvm::sys::path::append(Path, BuiltinIncludeDir->getName(), Header.FileName);
+  auto *File = SourceMgr.getFileManager().getFile(Path);
+  if (!File)
+    return false;
+
+  auto Role = headerKindToRole(Header.Kind);
+  Module::Header H = {Path.str(), File};
+  addHeader(Mod, H, Role);
+  return true;
+}
+
 ModuleMap::ModuleMap(SourceManager &SourceMgr, DiagnosticsEngine &Diags,
                      const LangOptions &LangOpts, const TargetInfo *Target,
                      HeaderSearch &HeaderInfo)
@@ -162,6 +330,7 @@ bool ModuleMap::isBuiltinHeader(StringRef FileName) {
 
 ModuleMap::HeadersMap::iterator
 ModuleMap::findKnownHeader(const FileEntry *File) {
+  resolveHeaderDirectives(File);
   HeadersMap::iterator Known = Headers.find(File);
   if (HeaderInfo.getHeaderSearchOpts().ImplicitModuleMaps &&
       Known == Headers.end() && File->getDir() == BuiltinIncludeDir &&
@@ -244,8 +413,10 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,
   if (getTopLevelOrNull(RequestingModule) != getTopLevelOrNull(SourceModule))
     return;
 
-  if (RequestingModule)
+  if (RequestingModule) {
     resolveUses(RequestingModule, /*Complain=*/false);
+    resolveHeaderDirectives(RequestingModule);
+  }
 
   bool Excluded = false;
   Module *Private = nullptr;
@@ -427,6 +598,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) {
 
 ArrayRef
 ModuleMap::findAllModulesForHeader(const FileEntry *File) const {
+  resolveHeaderDirectives(File);
   auto It = Headers.find(File);
   if (It == Headers.end())
     return None;
@@ -440,6 +612,7 @@ bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const {
 bool
 ModuleMap::isHeaderUnavailableInModule(const FileEntry *Header,
                                        const Module *RequestingModule) const {
+  resolveHeaderDirectives(Header);
   HeadersMap::const_iterator Known = Headers.find(Header);
   if (Known != Headers.end()) {
     for (SmallVectorImpl::const_iterator
@@ -812,18 +985,63 @@ void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir,
   UmbrellaDirs[UmbrellaDir] = Mod;
 }
 
-static Module::HeaderKind headerRoleToKind(ModuleMap::ModuleHeaderRole Role) {
-  switch ((int)Role) {
-  default: llvm_unreachable("unknown header role");
-  case ModuleMap::NormalHeader:
-    return Module::HK_Normal;
-  case ModuleMap::PrivateHeader:
-    return Module::HK_Private;
-  case ModuleMap::TextualHeader:
-    return Module::HK_Textual;
-  case ModuleMap::PrivateHeader | ModuleMap::TextualHeader:
-    return Module::HK_PrivateTextual;
+void ModuleMap::addUnresolvedHeader(Module *Mod,
+                                    Module::UnresolvedHeaderDirective Header) {
+  // If there is a builtin counterpart to this file, add it now so it can
+  // wrap the system header.
+  if (resolveAsBuiltinHeader(Mod, Header)) {
+    // If we have both a builtin and system version of the file, the
+    // builtin version may want to inject macros into the system header, so
+    // force the system header to be treated as a textual header in this
+    // case.
+    Header.Kind = headerRoleToKind(ModuleMap::ModuleHeaderRole(
+        headerKindToRole(Header.Kind) | ModuleMap::TextualHeader));
+    Header.HasBuiltinHeader = true;
+  }
+
+  // If possible, don't stat the header until we need to. This requires the
+  // user to have provided us with some stat information about the file.
+  // FIXME: Add support for lazily stat'ing umbrella headers and excluded
+  // headers.
+  if ((Header.Size || Header.ModTime) && !Header.IsUmbrella &&
+      Header.Kind != Module::HK_Excluded) {
+    // We expect more variation in mtime than size, so if we're given both,
+    // use the mtime as the key.
+    if (Header.ModTime)
+      LazyHeadersByModTime[*Header.ModTime].push_back(Mod);
+    else
+      LazyHeadersBySize[*Header.Size].push_back(Mod);
+    Mod->UnresolvedHeaders.push_back(Header);
+    return;
+  }
+
+  // We don't have stat information or can't defer looking this file up.
+  // Perform the lookup now.
+  resolveHeader(Mod, Header);
+}
+
+void ModuleMap::resolveHeaderDirectives(const FileEntry *File) const {
+  auto BySize = LazyHeadersBySize.find(File->getSize());
+  if (BySize != LazyHeadersBySize.end()) {
+    for (auto *M : BySize->second)
+      resolveHeaderDirectives(M);
+    LazyHeadersBySize.erase(BySize);
   }
+
+  auto ByModTime = LazyHeadersByModTime.find(File->getModificationTime());
+  if (ByModTime != LazyHeadersByModTime.end()) {
+    for (auto *M : ByModTime->second)
+      resolveHeaderDirectives(M);
+    LazyHeadersByModTime.erase(ByModTime);
+  }
+}
+
+void ModuleMap::resolveHeaderDirectives(Module *Mod) const {
+  for (auto &Header : Mod->UnresolvedHeaders)
+    // This operation is logically const; we're just changing how we represent
+    // the header information for this file.
+    const_cast(this)->resolveHeader(Mod, Header);
+  Mod->UnresolvedHeaders.clear();
 }
 
 void ModuleMap::addHeader(Module *Mod, Module::Header Header,
@@ -950,39 +1168,6 @@ bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) {
   return !Mod->UnresolvedConflicts.empty();
 }
 
-Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) {
-  if (Loc.isInvalid())
-    return nullptr;
-
-  if (UmbrellaDirs.empty() && Headers.empty())
-    return nullptr;
-
-  // Use the expansion location to determine which module we're in.
-  FullSourceLoc ExpansionLoc = Loc.getExpansionLoc();
-  if (!ExpansionLoc.isFileID())
-    return nullptr;
-
-  const SourceManager &SrcMgr = Loc.getManager();
-  FileID ExpansionFileID = ExpansionLoc.getFileID();
-  
-  while (const FileEntry *ExpansionFile
-           = SrcMgr.getFileEntryForID(ExpansionFileID)) {
-    // Find the module that owns this header (if any).
-    if (Module *Mod = findModuleForHeader(ExpansionFile).getModule())
-      return Mod;
-    
-    // No module owns this header, so look up the inclusion chain to see if
-    // any included header has an associated module.
-    SourceLocation IncludeLoc = SrcMgr.getIncludeLoc(ExpansionFileID);
-    if (IncludeLoc.isInvalid())
-      return nullptr;
-
-    ExpansionFileID = SrcMgr.getFileID(IncludeLoc);
-  }
-
-  return nullptr;
-}
-
 //----------------------------------------------------------------------------//
 // Module map file parser
 //----------------------------------------------------------------------------//
@@ -1012,6 +1197,7 @@ namespace clang {
       RequiresKeyword,
       Star,
       StringLiteral,
+      IntegerLiteral,
       TextualKeyword,
       LBrace,
       RBrace,
@@ -1021,7 +1207,12 @@ namespace clang {
     
     unsigned Location;
     unsigned StringLength;
-    const char *StringData;
+    union {
+      // If Kind != IntegerLiteral.
+      const char *StringData;
+      // If Kind == IntegerLiteral.
+      uint64_t IntegerValue;
+    };
     
     void clear() {
       Kind = EndOfFile;
@@ -1035,9 +1226,14 @@ namespace clang {
     SourceLocation getLocation() const {
       return SourceLocation::getFromRawEncoding(Location);
     }
+
+    uint64_t getInteger() const {
+      return Kind == IntegerLiteral ? IntegerValue : 0;
+    }
     
     StringRef getString() const {
-      return StringRef(StringData, StringLength);
+      return Kind == IntegerLiteral ? StringRef()
+                                    : StringRef(StringData, StringLength);
     }
   };
 
@@ -1059,9 +1255,6 @@ namespace clang {
     /// be resolved relative to.
     const DirectoryEntry *Directory;
 
-    /// \brief The directory containing Clang-supplied headers.
-    const DirectoryEntry *BuiltinIncludeDir;
-
     /// \brief Whether this module map is in a system header directory.
     bool IsSystem;
     
@@ -1120,12 +1313,10 @@ namespace clang {
                              ModuleMap &Map,
                              const FileEntry *ModuleMapFile,
                              const DirectoryEntry *Directory,
-                             const DirectoryEntry *BuiltinIncludeDir,
                              bool IsSystem)
       : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), 
         ModuleMapFile(ModuleMapFile), Directory(Directory),
-        BuiltinIncludeDir(BuiltinIncludeDir), IsSystem(IsSystem),
-        HadError(false), ActiveModule(nullptr)
+        IsSystem(IsSystem), HadError(false), ActiveModule(nullptr)
     {
       Tok.clear();
       consumeToken();
@@ -1232,6 +1423,25 @@ SourceLocation ModuleMapParser::consumeToken() {
     Tok.StringLength = Length;
     break;
   }
+
+  case tok::numeric_constant: {
+    // We don't support any suffixes or other complications.
+    SmallString<32> SpellingBuffer;
+    SpellingBuffer.resize(LToken.getLength() + 1);
+    const char *Start = SpellingBuffer.data();
+    unsigned Length =
+        Lexer::getSpelling(LToken, Start, SourceMgr, L.getLangOpts());
+    uint64_t Value;
+    if (StringRef(Start, Length).getAsInteger(0, Value)) {
+      Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token);
+      HadError = true;
+      goto retry;
+    }
+
+    Tok.Kind = MMToken::IntegerLiteral;
+    Tok.IntegerValue = Value;
+    break;
+  }
       
   case tok::comment:
     goto retry;
@@ -1805,25 +2015,6 @@ void ModuleMapParser::parseRequiresDecl() {
   } while (true);
 }
 
-/// \brief Append to \p Paths the set of paths needed to get to the 
-/// subframework in which the given module lives.
-static void appendSubframeworkPaths(Module *Mod,
-                                    SmallVectorImpl &Path) {
-  // Collect the framework names from the given module to the top-level module.
-  SmallVector Paths;
-  for (; Mod; Mod = Mod->Parent) {
-    if (Mod->IsFramework)
-      Paths.push_back(Mod->Name);
-  }
-  
-  if (Paths.empty())
-    return;
-  
-  // Add Frameworks/Name.framework for each subframework.
-  for (unsigned I = Paths.size() - 1; I != 0; --I)
-    llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework");
-}
-
 /// \brief Parse a header declaration.
 ///
 ///   header-declaration:
@@ -1876,127 +2067,75 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,
   Module::UnresolvedHeaderDirective Header;
   Header.FileName = Tok.getString();
   Header.FileNameLoc = consumeToken();
+  Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword;
+  Header.Kind =
+      (LeadingToken == MMToken::ExcludeKeyword ? Module::HK_Excluded
+                                               : Map.headerRoleToKind(Role));
 
   // Check whether we already have an umbrella.
-  if (LeadingToken == MMToken::UmbrellaKeyword && ActiveModule->Umbrella) {
+  if (Header.IsUmbrella && ActiveModule->Umbrella) {
     Diags.Report(Header.FileNameLoc, diag::err_mmap_umbrella_clash)
       << ActiveModule->getFullModuleName();
     HadError = true;
     return;
   }
 
-  // Look for this file.
-  const FileEntry *File = nullptr;
-  const FileEntry *BuiltinFile = nullptr;
-  SmallString<128> RelativePathName;
-  if (llvm::sys::path::is_absolute(Header.FileName)) {
-    RelativePathName = Header.FileName;
-    File = SourceMgr.getFileManager().getFile(RelativePathName);
-  } else {
-    // Search for the header file within the search directory.
-    SmallString<128> FullPathName(Directory->getName());
-    unsigned FullPathLength = FullPathName.size();
-
-    if (ActiveModule->isPartOfFramework()) {
-      appendSubframeworkPaths(ActiveModule, RelativePathName);
-      unsigned RelativePathLength = RelativePathName.size();
-
-      // Check whether this file is in the public headers.
-      llvm::sys::path::append(RelativePathName, "Headers", Header.FileName);
-      llvm::sys::path::append(FullPathName, RelativePathName);
-      File = SourceMgr.getFileManager().getFile(FullPathName);
-
-      // Check whether this file is in the private headers.
-      if (!File) {
-        // Ideally, private modules in the form 'FrameworkName.Private' should
-        // be defined as 'module FrameworkName.Private', and not as
-        // 'framework module FrameworkName.Private', since a 'Private.Framework'
-        // does not usually exist. However, since both are currently widely used
-        // for private modules, make sure we find the right path in both cases.
-        if (ActiveModule->IsFramework && ActiveModule->Name == "Private")
-          RelativePathName.clear();
-        else
-          RelativePathName.resize(RelativePathLength);
-        FullPathName.resize(FullPathLength);
-        llvm::sys::path::append(RelativePathName, "PrivateHeaders",
-                                Header.FileName);
-        llvm::sys::path::append(FullPathName, RelativePathName);
-        File = SourceMgr.getFileManager().getFile(FullPathName);
-      }
-    } else {
-      // Lookup for normal headers.
-      llvm::sys::path::append(RelativePathName, Header.FileName);
-      llvm::sys::path::append(FullPathName, RelativePathName);
-      File = SourceMgr.getFileManager().getFile(FullPathName);
-
-      // If this is a system module with a top-level header, this header
-      // may have a counterpart (or replacement) in the set of headers
-      // supplied by Clang. Find that builtin header.
-      if (ActiveModule->IsSystem && LeadingToken != MMToken::UmbrellaKeyword &&
-          BuiltinIncludeDir && BuiltinIncludeDir != Directory &&
-          ModuleMap::isBuiltinHeader(Header.FileName)) {
-        SmallString<128> BuiltinPathName(BuiltinIncludeDir->getName());
-        llvm::sys::path::append(BuiltinPathName, Header.FileName);
-        BuiltinFile = SourceMgr.getFileManager().getFile(BuiltinPathName);
-
-        // If Clang supplies this header but the underlying system does not,
-        // just silently swap in our builtin version. Otherwise, we'll end
-        // up adding both (later).
-        if (BuiltinFile && !File) {
-          File = BuiltinFile;
-          RelativePathName = BuiltinPathName;
-          BuiltinFile = nullptr;
+  // If we were given stat information, parse it so we can skip looking for
+  // the file.
+  if (Tok.is(MMToken::LBrace)) {
+    SourceLocation LBraceLoc = consumeToken();
+
+    while (!Tok.is(MMToken::RBrace) && !Tok.is(MMToken::EndOfFile)) {
+      enum Attribute { Size, ModTime, Unknown };
+      StringRef Str = Tok.getString();
+      SourceLocation Loc = consumeToken();
+      switch (llvm::StringSwitch(Str)
+                  .Case("size", Size)
+                  .Case("mtime", ModTime)
+                  .Default(Unknown)) {
+      case Size:
+        if (Header.Size)
+          Diags.Report(Loc, diag::err_mmap_duplicate_header_attribute) << Str;
+        if (!Tok.is(MMToken::IntegerLiteral)) {
+          Diags.Report(Tok.getLocation(),
+                       diag::err_mmap_invalid_header_attribute_value) << Str;
+          skipUntil(MMToken::RBrace);
+          break;
         }
-      }
-    }
-  }
+        Header.Size = Tok.getInteger();
+        consumeToken();
+        break;
 
-  // FIXME: We shouldn't be eagerly stat'ing every file named in a module map.
-  // Come up with a lazy way to do this.
-  if (File) {
-    if (LeadingToken == MMToken::UmbrellaKeyword) {
-      const DirectoryEntry *UmbrellaDir = File->getDir();
-      if (Module *UmbrellaModule = Map.UmbrellaDirs[UmbrellaDir]) {
-        Diags.Report(LeadingLoc, diag::err_mmap_umbrella_clash)
-          << UmbrellaModule->getFullModuleName();
-        HadError = true;
-      } else {
-        // Record this umbrella header.
-        Map.setUmbrellaHeader(ActiveModule, File, RelativePathName.str());
-      }
-    } else if (LeadingToken == MMToken::ExcludeKeyword) {
-      Module::Header H = {RelativePathName.str(), File};
-      Map.excludeHeader(ActiveModule, H);
-    } else {
-      // If there is a builtin counterpart to this file, add it now so it can
-      // wrap the system header.
-      if (BuiltinFile) {
-        // FIXME: Taking the name from the FileEntry is unstable and can give
-        // different results depending on how we've previously named that file
-        // in this build.
-        Module::Header H = { BuiltinFile->getName(), BuiltinFile };
-        Map.addHeader(ActiveModule, H, Role);
-
-        // If we have both a builtin and system version of the file, the
-        // builtin version may want to inject macros into the system header, so
-        // force the system header to be treated as a textual header in this
-        // case.
-        Role = ModuleMap::ModuleHeaderRole(Role | ModuleMap::TextualHeader);
-      }
+      case ModTime:
+        if (Header.ModTime)
+          Diags.Report(Loc, diag::err_mmap_duplicate_header_attribute) << Str;
+        if (!Tok.is(MMToken::IntegerLiteral)) {
+          Diags.Report(Tok.getLocation(),
+                       diag::err_mmap_invalid_header_attribute_value) << Str;
+          skipUntil(MMToken::RBrace);
+          break;
+        }
+        Header.ModTime = Tok.getInteger();
+        consumeToken();
+        break;
 
-      // Record this header.
-      Module::Header H = { RelativePathName.str(), File };
-      Map.addHeader(ActiveModule, H, Role);
+      case Unknown:
+        Diags.Report(Loc, diag::err_mmap_expected_header_attribute);
+        skipUntil(MMToken::RBrace);
+        break;
+      }
     }
-  } else if (LeadingToken != MMToken::ExcludeKeyword) {
-    // Ignore excluded header files. They're optional anyway.
 
-    // If we find a module that has a missing header, we mark this module as
-    // unavailable and store the header directive for displaying diagnostics.
-    Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword;
-    ActiveModule->markUnavailable();
-    ActiveModule->MissingHeaders.push_back(Header);
+    if (Tok.is(MMToken::RBrace))
+      consumeToken();
+    else {
+      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace);
+      Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match);
+      HadError = true;
+    }
   }
+
+  Map.addUnresolvedHeader(ActiveModule, std::move(Header));
 }
 
 static int compareModuleHeaders(const Module::Header *A,
@@ -2547,6 +2686,7 @@ bool ModuleMapParser::parseModuleMapFile() {
     case MMToken::RequiresKeyword:
     case MMToken::Star:
     case MMToken::StringLiteral:
+    case MMToken::IntegerLiteral:
     case MMToken::TextualKeyword:
     case MMToken::UmbrellaKeyword:
     case MMToken::UseKeyword:
@@ -2570,7 +2710,8 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem,
 
   // If the module map file wasn't already entered, do so now.
   if (ID.isInvalid()) {
-    auto FileCharacter = IsSystem ? SrcMgr::C_System : SrcMgr::C_User;
+    auto FileCharacter =
+        IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap;
     ID = SourceMgr.createFileID(File, ExternModuleLoc, FileCharacter);
   }
 
@@ -2588,7 +2729,7 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem,
           Buffer->getBufferEnd());
   SourceLocation Start = L.getSourceLocation();
   ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, File, Dir,
-                         BuiltinIncludeDir, IsSystem);
+                         IsSystem);
   bool Result = Parser.parseModuleMapFile();
   ParsedModuleMap[File] = Result;
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/PPDirectives.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/PPDirectives.cpp
index f75f3c931b714..10cd821a94bf9 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/PPDirectives.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/PPDirectives.cpp
@@ -30,6 +30,7 @@
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Pragma.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Lex/PTHLexer.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -219,26 +220,18 @@ bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
     return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
 
   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
-  if (!II) {
-    bool Invalid = false;
-    std::string Spelling = getSpelling(MacroNameTok, &Invalid);
-    if (Invalid)
-      return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
-    II = getIdentifierInfo(Spelling);
-
-    if (!II->isCPlusPlusOperatorKeyword())
-      return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
+  if (!II)
+    return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
 
+  if (II->isCPlusPlusOperatorKeyword()) {
     // C++ 2.5p2: Alternative tokens behave the same as its primary token
     // except for their spellings.
     Diag(MacroNameTok, getLangOpts().MicrosoftExt
                            ? diag::ext_pp_operator_used_as_macro_name
                            : diag::err_pp_operator_used_as_macro_name)
         << II << MacroNameTok.getKind();
-
     // Allow #defining |and| and friends for Microsoft compatibility or
     // recovery when legacy C headers are included in C++.
-    MacroNameTok.setIdentifierInfo(II);
   }
 
   if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
@@ -537,7 +530,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
           CurPPLexer->LexingRawMode = false;
           IdentifierInfo *IfNDefMacro = nullptr;
-          const bool CondValue = EvaluateDirectiveExpression(IfNDefMacro);
+          const bool CondValue = EvaluateDirectiveExpression(IfNDefMacro).Conditional;
           CurPPLexer->LexingRawMode = true;
           if (Callbacks) {
             const SourceLocation CondEnd = CurPPLexer->getSourceLocation();
@@ -634,7 +627,7 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() {
     // Evaluate the condition of the #elif.
     IdentifierInfo *IfNDefMacro = nullptr;
     CurPTHLexer->ParsingPreprocessorDirective = true;
-    bool ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro);
+    bool ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro).Conditional;
     CurPTHLexer->ParsingPreprocessorDirective = false;
 
     // If this condition is true, enter it!
@@ -667,24 +660,17 @@ Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {
              : HeaderInfo.lookupModule(getLangOpts().CurrentModule);
 }
 
-Module *Preprocessor::getModuleContainingLocation(SourceLocation Loc) {
-  return HeaderInfo.getModuleMap().inferModuleFromLocation(
-      FullSourceLoc(Loc, SourceMgr));
-}
-
 const FileEntry *
 Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
+                                                     Module *M,
                                                      SourceLocation Loc) {
+  assert(M && "no module to include");
+
   // If we have a module import syntax, we shouldn't include a header to
   // make a particular module visible.
   if (getLangOpts().ObjC2)
     return nullptr;
 
-  // Figure out which module we'd want to import.
-  Module *M = getModuleContainingLocation(Loc);
-  if (!M)
-    return nullptr;
-
   Module *TopM = M->getTopLevelModule();
   Module *IncM = getModuleForLocation(IncLoc);
 
@@ -696,6 +682,8 @@ Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
   while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
     auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
     auto *FE = SM.getFileEntryForID(ID);
+    if (!FE)
+      break;
 
     bool InTextualHeader = false;
     for (auto Header : HeaderInfo.getModuleMap().findAllModulesForHeader(FE)) {
@@ -1182,18 +1170,26 @@ void Preprocessor::HandleLineDirective() {
     CheckEndOfDirective("line", true);
   }
 
-  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID);
+  // Take the file kind of the file containing the #line directive. #line
+  // directives are often used for generated sources from the same codebase, so
+  // the new file should generally be classified the same way as the current
+  // file. This is visible in GCC's pre-processed output, which rewrites #line
+  // to GNU line markers.
+  SrcMgr::CharacteristicKind FileKind =
+      SourceMgr.getFileCharacteristic(DigitTok.getLocation());
+
+  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
+                        false, FileKind);
 
   if (Callbacks)
     Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
-                           PPCallbacks::RenameFile,
-                           SrcMgr::C_User);
+                           PPCallbacks::RenameFile, FileKind);
 }
 
 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
 /// marker directive.
 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
-                                bool &IsSystemHeader, bool &IsExternCHeader,
+                                SrcMgr::CharacteristicKind &FileKind,
                                 Preprocessor &PP) {
   unsigned FlagVal;
   Token FlagTok;
@@ -1244,7 +1240,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
     return true;
   }
 
-  IsSystemHeader = true;
+  FileKind = SrcMgr::C_System;
 
   PP.Lex(FlagTok);
   if (FlagTok.is(tok::eod)) return false;
@@ -1258,7 +1254,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
     return true;
   }
 
-  IsExternCHeader = true;
+  FileKind = SrcMgr::C_ExternCSystem;
 
   PP.Lex(FlagTok);
   if (FlagTok.is(tok::eod)) return false;
@@ -1288,14 +1284,15 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
   Lex(StrTok);
 
   bool IsFileEntry = false, IsFileExit = false;
-  bool IsSystemHeader = false, IsExternCHeader = false;
   int FilenameID = -1;
+  SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
 
   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
   // string followed by eod.
-  if (StrTok.is(tok::eod))
-    ; // ok
-  else if (StrTok.isNot(tok::string_literal)) {
+  if (StrTok.is(tok::eod)) {
+    // Treat this like "#line NN", which doesn't change file characteristics.
+    FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
+  } else if (StrTok.isNot(tok::string_literal)) {
     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
     return DiscardUntilEndOfDirective();
   } else if (StrTok.hasUDSuffix()) {
@@ -1314,15 +1311,13 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
 
     // If a filename was present, read any flags that are present.
-    if (ReadLineMarkerFlags(IsFileEntry, IsFileExit,
-                            IsSystemHeader, IsExternCHeader, *this))
+    if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
       return;
   }
 
   // Create a line note with this information.
-  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID,
-                        IsFileEntry, IsFileExit,
-                        IsSystemHeader, IsExternCHeader);
+  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
+                        IsFileExit, FileKind);
 
   // If the preprocessor has callbacks installed, notify them of the #line
   // change.  This is used so that the line marker comes out in -E mode for
@@ -1333,11 +1328,6 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
       Reason = PPCallbacks::EnterFile;
     else if (IsFileExit)
       Reason = PPCallbacks::ExitFile;
-    SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
-    if (IsExternCHeader)
-      FileKind = SrcMgr::C_ExternCSystem;
-    else if (IsSystemHeader)
-      FileKind = SrcMgr::C_System;
 
     Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
   }
@@ -1661,6 +1651,26 @@ static bool trySimplifyPath(SmallVectorImpl &Components,
   return SuggestReplacement;
 }
 
+bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
+                                          const TargetInfo &TargetInfo,
+                                          DiagnosticsEngine &Diags, Module *M) {
+  Module::Requirement Requirement;
+  Module::UnresolvedHeaderDirective MissingHeader;
+  if (M->isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader))
+    return false;
+
+  if (MissingHeader.FileNameLoc.isValid()) {
+    Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
+        << MissingHeader.IsUmbrella << MissingHeader.FileName;
+  } else {
+    // FIXME: Track the location at which the requirement was specified, and
+    // use it here.
+    Diags.Report(M->DefinitionLoc, diag::err_module_unavailable)
+        << M->getFullModuleName() << Requirement.second << Requirement.first;
+  }
+  return true;
+}
+
 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
 /// the file to be included from the lexer, then include it!  This is a common
 /// routine with functionality shared between \#include, \#include_next and
@@ -1832,33 +1842,24 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   // we've imported or already built.
   bool ShouldEnter = true;
 
+  if (PPOpts->SingleFileParseMode)
+    ShouldEnter = false;
+
   // Determine whether we should try to import the module for this #include, if
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
-  if (File && SuggestedModule && getLangOpts().Modules &&
+  if (ShouldEnter && File && SuggestedModule && getLangOpts().Modules &&
       SuggestedModule.getModule()->getTopLevelModuleName() !=
           getLangOpts().CurrentModule) {
     // If this include corresponds to a module but that module is
     // unavailable, diagnose the situation and bail out.
     // FIXME: Remove this; loadModule does the same check (but produces
     // slightly worse diagnostics).
-    if (!SuggestedModule.getModule()->isAvailable()) {
-      Module::Requirement Requirement;
-      Module::UnresolvedHeaderDirective MissingHeader;
-      Module *M = SuggestedModule.getModule();
-      // Identify the cause.
-      (void)M->isAvailable(getLangOpts(), getTargetInfo(), Requirement,
-                           MissingHeader);
-      if (MissingHeader.FileNameLoc.isValid()) {
-        Diag(MissingHeader.FileNameLoc, diag::err_module_header_missing)
-            << MissingHeader.IsUmbrella << MissingHeader.FileName;
-      } else {
-        Diag(M->DefinitionLoc, diag::err_module_unavailable)
-            << M->getFullModuleName() << Requirement.second << Requirement.first;
-      }
+    if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), getDiagnostics(),
+                               SuggestedModule.getModule())) {
       Diag(FilenameTok.getLocation(),
            diag::note_implicit_top_level_module_import_here)
-          << M->getTopLevelModuleName();
+          << SuggestedModule.getModule()->getTopLevelModuleName();
       return;
     }
 
@@ -1915,6 +1916,25 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     }
   }
 
+  // The #included file will be considered to be a system header if either it is
+  // in a system include directory, or if the #includer is a system include
+  // header.
+  SrcMgr::CharacteristicKind FileCharacter =
+      SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
+  if (File)
+    FileCharacter = std::max(HeaderInfo.getFileDirFlavor(File), FileCharacter);
+
+  // Ask HeaderInfo if we should enter this #include file.  If not, #including
+  // this file will have no effect.
+  bool SkipHeader = false;
+  if (ShouldEnter && File &&
+      !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
+                                         getLangOpts().Modules,
+                                         SuggestedModule.getModule())) {
+    ShouldEnter = false;
+    SkipHeader = true;
+  }
+
   if (Callbacks) {
     // Notify the callback object that we've seen an inclusion directive.
     Callbacks->InclusionDirective(
@@ -1922,18 +1942,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
         LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled,
         FilenameRange, File, SearchPath, RelativePath,
         ShouldEnter ? nullptr : SuggestedModule.getModule());
+    if (SkipHeader && !SuggestedModule.getModule())
+      Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
   }
 
   if (!File)
     return;
 
-  // The #included file will be considered to be a system header if either it is
-  // in a system include directory, or if the #includer is a system include
-  // header.
-  SrcMgr::CharacteristicKind FileCharacter =
-    std::max(HeaderInfo.getFileDirFlavor(File),
-             SourceMgr.getFileCharacteristic(FilenameTok.getLocation()));
-
   // FIXME: If we have a suggested module, and we've already visited this file,
   // don't bother entering it again. We know it has no further effect.
 
@@ -1973,19 +1988,6 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     }
   }
 
-  // Ask HeaderInfo if we should enter this #include file.  If not, #including
-  // this file will have no effect.
-  bool SkipHeader = false;
-  if (ShouldEnter &&
-      !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
-                                         getLangOpts().Modules,
-                                         SuggestedModule.getModule())) {
-    ShouldEnter = false;
-    SkipHeader = true;
-    if (Callbacks)
-      Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
-  }
-
   // If we don't need to enter the file, stop now.
   if (!ShouldEnter) {
     // If this is a module import, make it visible if needed.
@@ -2137,11 +2139,11 @@ void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
 // Preprocessor Macro Directive Handling.
 //===----------------------------------------------------------------------===//
 
-/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
+/// ReadMacroParameterList - The ( starting an argument list of a macro
 /// definition has just been read.  Lex the rest of the arguments and the
 /// closing ), updating MI with what we learn.  Return true if an error occurs
 /// parsing the arg list.
-bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
+bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
   SmallVector Arguments;
 
   while (true) {
@@ -2175,7 +2177,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
       // Add the __VA_ARGS__ identifier as an argument.
       Arguments.push_back(Ident__VA_ARGS__);
       MI->setIsC99Varargs();
-      MI->setArgumentList(Arguments, BP);
+      MI->setParameterList(Arguments, BP);
       return false;
     case tok::eod:  // #define X(
       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
@@ -2209,7 +2211,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
         return true;
       case tok::r_paren: // #define X(A)
-        MI->setArgumentList(Arguments, BP);
+        MI->setParameterList(Arguments, BP);
         return false;
       case tok::comma:  // #define X(A,
         break;
@@ -2225,7 +2227,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
         }
 
         MI->setIsGNUVarargs();
-        MI->setArgumentList(Arguments, BP);
+        MI->setParameterList(Arguments, BP);
         return false;
       }
     }
@@ -2274,28 +2276,20 @@ static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
          MI->getNumTokens() == 0;
 }
 
-/// HandleDefineDirective - Implements \#define.  This consumes the entire macro
-/// line then lets the caller lex the next real token.
-void Preprocessor::HandleDefineDirective(Token &DefineTok,
-                                         bool ImmediatelyAfterHeaderGuard) {
-  ++NumDefined;
-
-  Token MacroNameTok;
-  bool MacroShadowsKeyword;
-  ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
+// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
+// entire line) of the macro's tokens and adds them to MacroInfo, and while
+// doing so performs certain validity checks including (but not limited to):
+//   - # (stringization) is followed by a macro parameter
+//
+//  Returns a nullptr if an invalid sequence of tokens is encountered or returns
+//  a pointer to a MacroInfo object.
 
-  // Error reading macro name?  If so, diagnostic already issued.
-  if (MacroNameTok.is(tok::eod))
-    return;
+MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
+    const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
 
   Token LastTok = MacroNameTok;
-
-  // If we are supposed to keep comments in #defines, reenable comment saving
-  // mode.
-  if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
-
   // Create the new macro.
-  MacroInfo *MI = AllocateMacroInfo(MacroNameTok.getLocation());
+  MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
 
   Token Tok;
   LexUnexpandedToken(Tok);
@@ -2317,11 +2311,11 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
   } else if (Tok.is(tok::l_paren)) {
     // This is a function-like macro definition.  Read the argument list.
     MI->setIsFunctionLike();
-    if (ReadMacroDefinitionArgList(MI, LastTok)) {
+    if (ReadMacroParameterList(MI, LastTok)) {
       // Throw away the rest of the line.
       if (CurPPLexer->ParsingPreprocessorDirective)
         DiscardUntilEndOfDirective();
-      return;
+      return nullptr;
     }
 
     // If this is a definition of a variadic C99 function-like macro, not using
@@ -2428,7 +2422,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
 
       // Check for a valid macro arg identifier.
       if (Tok.getIdentifierInfo() == nullptr ||
-          MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) {
+          MI->getParameterNum(Tok.getIdentifierInfo()) == -1) {
 
         // If this is assembler-with-cpp mode, we accept random gibberish after
         // the '#' because '#' is often a comment character.  However, change
@@ -2444,7 +2438,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
 
           // Disable __VA_ARGS__ again.
           Ident__VA_ARGS__->setIsPoisoned(true);
-          return;
+          return nullptr;
         }
       }
 
@@ -2457,15 +2451,39 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
       LexUnexpandedToken(Tok);
     }
   }
+  MI->setDefinitionEndLoc(LastTok.getLocation());
+  // Disable __VA_ARGS__ again.
+  Ident__VA_ARGS__->setIsPoisoned(true);
+
+  return MI;
+}
+/// HandleDefineDirective - Implements \#define.  This consumes the entire macro
+/// line then lets the caller lex the next real token.
+void Preprocessor::HandleDefineDirective(
+    Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
+  ++NumDefined;
+
+  Token MacroNameTok;
+  bool MacroShadowsKeyword;
+  ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
+
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.is(tok::eod))
+    return;
+
+  // If we are supposed to keep comments in #defines, reenable comment saving
+  // mode.
+  if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
+
+  MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
+      MacroNameTok, ImmediatelyAfterHeaderGuard);
+  
+  if (!MI) return;
 
   if (MacroShadowsKeyword &&
       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
-  }
-
-  // Disable __VA_ARGS__ again.
-  Ident__VA_ARGS__->setIsPoisoned(true);
-
+  }  
   // Check that there is no paste (##) operator at the beginning or end of the
   // replacement list.
   unsigned NumTokens = MI->getNumTokens();
@@ -2480,7 +2498,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
     }
   }
 
-  MI->setDefinitionEndLoc(LastTok.getLocation());
+  
 
   // Finally, if this identifier already had a macro defined for it, verify that
   // the macro bodies are identical, and issue diagnostics if they are not.
@@ -2648,7 +2666,13 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
   }
 
   // Should we include the stuff contained by this directive?
-  if (!MI == isIfndef) {
+  if (PPOpts->SingleFileParseMode && !MI) {
+    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+    // the directive blocks.
+    CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
+                                     /*wasskip*/false, /*foundnonskip*/false,
+                                     /*foundelse*/false);
+  } else if (!MI == isIfndef) {
     // Yes, remember that we are inside a conditional, then lex the next token.
     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
                                      /*wasskip*/false, /*foundnonskip*/true,
@@ -2670,7 +2694,8 @@ void Preprocessor::HandleIfDirective(Token &IfToken,
   // Parse and evaluate the conditional expression.
   IdentifierInfo *IfNDefMacro = nullptr;
   const SourceLocation ConditionalBegin = CurPPLexer->getSourceLocation();
-  const bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro);
+  const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
+  const bool ConditionalTrue = DER.Conditional;
   const SourceLocation ConditionalEnd = CurPPLexer->getSourceLocation();
 
   // If this condition is equivalent to #ifndef X, and if this is the first
@@ -2689,7 +2714,12 @@ void Preprocessor::HandleIfDirective(Token &IfToken,
                   (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
 
   // Should we include the stuff contained by this directive?
-  if (ConditionalTrue) {
+  if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
+    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+    // the directive blocks.
+    CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
+                                     /*foundnonskip*/false, /*foundelse*/false);
+  } else if (ConditionalTrue) {
     // Yes, remember that we are inside a conditional, then lex the next token.
     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
                                    /*foundnonskip*/true, /*foundelse*/false);
@@ -2750,6 +2780,14 @@ void Preprocessor::HandleElseDirective(Token &Result) {
   if (Callbacks)
     Callbacks->Else(Result.getLocation(), CI.IfLoc);
 
+  if (PPOpts->SingleFileParseMode && !CI.FoundNonSkip) {
+    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+    // the directive blocks.
+    CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
+                                     /*foundnonskip*/false, /*foundelse*/true);
+    return;
+  }
+
   // Finally, skip the rest of the contents of this block.
   SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
                                /*FoundElse*/true, Result.getLocation());
@@ -2785,6 +2823,14 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) {
                     SourceRange(ConditionalBegin, ConditionalEnd),
                     PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
 
+  if (PPOpts->SingleFileParseMode && !CI.FoundNonSkip) {
+    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+    // the directive blocks.
+    CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
+                                     /*foundnonskip*/false, /*foundelse*/false);
+    return;
+  }
+
   // Finally, skip the rest of the contents of this block.
   SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
                                /*FoundElse*/CI.FoundElse,
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/PPExpressions.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/PPExpressions.cpp
index 862a4713e4bca..d8431827e9cde 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/PPExpressions.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/PPExpressions.cpp
@@ -73,6 +73,7 @@ class PPValue {
 
 static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
                                      Token &PeekTok, bool ValueLive,
+                                     bool &IncludedUndefinedIds,
                                      Preprocessor &PP);
 
 /// DefinedTracker - This struct is used while parsing expressions to keep track
@@ -93,6 +94,7 @@ struct DefinedTracker {
   /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this
   /// indicates the macro that was checked.
   IdentifierInfo *TheMacro;
+  bool IncludedUndefinedIds = false;
 };
 
 /// EvaluateDefined - Process a 'defined(sym)' expression.
@@ -128,6 +130,7 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   MacroDefinition Macro = PP.getMacroDefinition(II);
   Result.Val = !!Macro;
   Result.Val.setIsUnsigned(false); // Result is signed intmax_t.
+  DT.IncludedUndefinedIds = !Macro;
 
   // If there is a macro, mark it used.
   if (Result.Val != 0 && ValueLive)
@@ -234,33 +237,32 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     PP.setCodeCompletionReached();
     PP.LexNonComment(PeekTok);
   }
-      
-  // If this token's spelling is a pp-identifier, check to see if it is
-  // 'defined' or if it is a macro.  Note that we check here because many
-  // keywords are pp-identifiers, so we can't check the kind.
-  if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
-    // Handle "defined X" and "defined(X)".
-    if (II->isStr("defined"))
-      return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP);
-
-    // If this identifier isn't 'defined' or one of the special
-    // preprocessor keywords and it wasn't macro expanded, it turns
-    // into a simple 0, unless it is the C++ keyword "true", in which case it
-    // turns into "1".
-    if (ValueLive &&
-        II->getTokenID() != tok::kw_true &&
-        II->getTokenID() != tok::kw_false)
-      PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II;
-    Result.Val = II->getTokenID() == tok::kw_true;
-    Result.Val.setIsUnsigned(false);  // "0" is signed intmax_t 0.
-    Result.setIdentifier(II);
-    Result.setRange(PeekTok.getLocation());
-    PP.LexNonComment(PeekTok);
-    return false;
-  }
 
   switch (PeekTok.getKind()) {
-  default:  // Non-value token.
+  default:
+    // If this token's spelling is a pp-identifier, check to see if it is
+    // 'defined' or if it is a macro.  Note that we check here because many
+    // keywords are pp-identifiers, so we can't check the kind.
+    if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
+      // Handle "defined X" and "defined(X)".
+      if (II->isStr("defined"))
+        return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP);
+
+      if (!II->isCPlusPlusOperatorKeyword()) {
+        // If this identifier isn't 'defined' or one of the special
+        // preprocessor keywords and it wasn't macro expanded, it turns
+        // into a simple 0
+        if (ValueLive)
+          PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II;
+        Result.Val = 0;
+        Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0.
+        Result.setIdentifier(II);
+        Result.setRange(PeekTok.getLocation());
+        DT.IncludedUndefinedIds = true;
+        PP.LexNonComment(PeekTok);
+        return false;
+      }
+    }
     PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr);
     return true;
   case tok::eod:
@@ -400,7 +402,8 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
       // Just use DT unmodified as our result.
     } else {
       // Otherwise, we have something like (x+y), and we consumed '(x'.
-      if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP))
+      if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive,
+                                   DT.IncludedUndefinedIds, PP))
         return true;
 
       if (PeekTok.isNot(tok::r_paren)) {
@@ -475,6 +478,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
       DT.State = DefinedTracker::DefinedMacro;
     return false;
   }
+  case tok::kw_true:
+  case tok::kw_false:
+    Result.Val = PeekTok.getKind() == tok::kw_true;
+    Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0.
+    Result.setIdentifier(PeekTok.getIdentifierInfo());
+    Result.setRange(PeekTok.getLocation());
+    PP.LexNonComment(PeekTok);
+    return false;
 
   // FIXME: Handle #assert
   }
@@ -532,6 +543,7 @@ static void diagnoseUnexpectedOperator(Preprocessor &PP, PPValue &LHS,
 /// evaluation, such as division by zero warnings.
 static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
                                      Token &PeekTok, bool ValueLive,
+                                     bool &IncludedUndefinedIds,
                                      Preprocessor &PP) {
   unsigned PeekPrec = getPrecedence(PeekTok.getKind());
   // If this token isn't valid, report the error.
@@ -571,6 +583,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
     // Parse the RHS of the operator.
     DefinedTracker DT;
     if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true;
+    IncludedUndefinedIds = DT.IncludedUndefinedIds;
 
     // Remember the precedence of this operator and get the precedence of the
     // operator immediately to the right of the RHS.
@@ -601,7 +614,8 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
       RHSPrec = ThisPrec+1;
 
     if (PeekPrec >= RHSPrec) {
-      if (EvaluateDirectiveSubExpr(RHS, RHSPrec, PeekTok, RHSIsLive, PP))
+      if (EvaluateDirectiveSubExpr(RHS, RHSPrec, PeekTok, RHSIsLive,
+                                   IncludedUndefinedIds, PP))
         return true;
       PeekPrec = getPrecedence(PeekTok.getKind());
     }
@@ -769,7 +783,8 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
       // Parse anything after the : with the same precedence as ?.  We allow
       // things of equal precedence because ?: is right associative.
       if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec,
-                                   PeekTok, AfterColonLive, PP))
+                                   PeekTok, AfterColonLive,
+                                   IncludedUndefinedIds, PP))
         return true;
 
       // Now that we have the condition, the LHS and the RHS of the :, evaluate.
@@ -806,7 +821,8 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
 /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
 /// may occur after a #if or #elif directive.  If the expression is equivalent
 /// to "!defined(X)" return X in IfNDefMacro.
-bool Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+Preprocessor::DirectiveEvalResult
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
   SaveAndRestore PPDir(ParsingIfOrElifDirective, true);
   // Save the current state of 'DisableMacroExpansion' and reset it to false. If
   // 'DisableMacroExpansion' is true, then we must be in a macro argument list
@@ -833,7 +849,7 @@ bool Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
     
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return false;
+    return {false, DT.IncludedUndefinedIds};
   }
 
   // If we are at the end of the expression after just parsing a value, there
@@ -847,20 +863,20 @@ bool Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return ResVal.Val != 0;
+    return {ResVal.Val != 0, DT.IncludedUndefinedIds};
   }
 
   // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
   // operator and the stuff after it.
   if (EvaluateDirectiveSubExpr(ResVal, getPrecedence(tok::question),
-                               Tok, true, *this)) {
+                               Tok, true, DT.IncludedUndefinedIds, *this)) {
     // Parse error, skip the rest of the macro line.
     if (Tok.isNot(tok::eod))
       DiscardUntilEndOfDirective();
     
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return false;
+    return {false, DT.IncludedUndefinedIds};
   }
 
   // If we aren't at the tok::eod token, something bad happened, like an extra
@@ -872,5 +888,5 @@ bool Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
   // Restore 'DisableMacroExpansion'.
   DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-  return ResVal.Val != 0;
+  return {ResVal.Val != 0, DT.IncludedUndefinedIds};
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/PPLexerChange.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/PPLexerChange.cpp
index 1938328c904d4..36d7028da6886 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/PPLexerChange.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/PPLexerChange.cpp
@@ -458,10 +458,16 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
       SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);
     }
 
+    bool ExitedFromPredefinesFile = false;
     FileID ExitedFID;
-    if (Callbacks && !isEndOfMacro && CurPPLexer)
+    if (!isEndOfMacro && CurPPLexer) {
       ExitedFID = CurPPLexer->getFileID();
 
+      assert(PredefinesFileID.isValid() &&
+             "HandleEndOfFile is called before PredefinesFileId is set");
+      ExitedFromPredefinesFile = (PredefinesFileID == ExitedFID);
+    }
+
     if (LeavingSubmodule) {
       // We're done with this submodule.
       Module *M = LeaveSubmodule(/*ForPragma*/false);
@@ -489,6 +495,11 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
                              PPCallbacks::ExitFile, FileType, ExitedFID);
     }
 
+    // Restore conditional stack from the preamble right after exiting from the
+    // predefines file.
+    if (ExitedFromPredefinesFile)
+      replayPreambleConditionalStack();
+
     // Client should lex another token unless we generated an EOM.
     return LeavingSubmodule;
   }
@@ -731,7 +742,7 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
   Module *LeavingMod = Info.M;
   SourceLocation ImportLoc = Info.ImportLoc;
 
-  if (!needModuleMacros() || 
+  if (!needModuleMacros() ||
       (!getLangOpts().ModulesLocalVisibility &&
        LeavingMod->getTopLevelModuleName() != getLangOpts().CurrentModule)) {
     // If we don't need module macros, or this is not a module for which we
@@ -777,17 +788,6 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
     for (auto *MD = Macro.getLatest(); MD != OldMD; MD = MD->getPrevious()) {
       assert(MD && "broken macro directive chain");
 
-      // Stop on macros defined in other submodules of this module that we
-      // #included along the way. There's no point doing this if we're
-      // tracking local submodule visibility, since there can be no such
-      // directives in our list.
-      if (!getLangOpts().ModulesLocalVisibility) {
-        Module *Mod = getModuleContainingLocation(MD->getLocation());
-        if (Mod != LeavingMod &&
-            Mod->getTopLevelModule() == LeavingMod->getTopLevelModule())
-          break;
-      }
-
       if (auto *VisMD = dyn_cast(MD)) {
         // The latest visibility directive for a name in a submodule affects
         // all the directives that come before it.
@@ -809,6 +809,13 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
         if (Def || !Macro.getOverriddenMacros().empty())
           addModuleMacro(LeavingMod, II, Def,
                          Macro.getOverriddenMacros(), IsNew);
+
+        if (!getLangOpts().ModulesLocalVisibility) {
+          // This macro is exposed to the rest of this compilation as a
+          // ModuleMacro; we don't need to track its MacroDirective any more.
+          Macro.setLatest(nullptr);
+          Macro.setOverriddenMacros(*this, {});
+        }
         break;
       }
     }
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/PPMacroExpansion.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/PPMacroExpansion.cpp
index e39c715eba75c..797f038cf05f3 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/PPMacroExpansion.cpp
@@ -426,7 +426,7 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
 
   // If this is a function-like macro invocation, it's safe to trivially expand
   // as long as the identifier is not a macro argument.
-  return std::find(MI->arg_begin(), MI->arg_end(), II) == MI->arg_end();
+  return std::find(MI->param_begin(), MI->param_end(), II) == MI->param_end();
 }
 
 /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
@@ -506,7 +506,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
     // Preprocessor directives used inside macro arguments are not portable, and
     // this enables the warning.
     InMacroArgs = true;
-    Args = ReadFunctionLikeMacroArgs(Identifier, MI, ExpansionEnd);
+    Args = ReadMacroCallArgumentList(Identifier, MI, ExpansionEnd);
 
     // Finished parsing args.
     InMacroArgs = false;
@@ -759,11 +759,11 @@ static bool GenerateNewArgTokens(Preprocessor &PP,
 /// token is the '(' of the macro, this method is invoked to read all of the
 /// actual arguments specified for the macro invocation.  This returns null on
 /// error.
-MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
+MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName,
                                                    MacroInfo *MI,
                                                    SourceLocation &MacroEnd) {
   // The number of fixed arguments to parse.
-  unsigned NumFixedArgsLeft = MI->getNumArgs();
+  unsigned NumFixedArgsLeft = MI->getNumParams();
   bool isVariadic = MI->isVariadic();
 
   // Outer loop, while there are more arguments, keep reading them.
@@ -903,7 +903,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
 
   // Okay, we either found the r_paren.  Check to see if we parsed too few
   // arguments.
-  unsigned MinArgsExpected = MI->getNumArgs();
+  unsigned MinArgsExpected = MI->getNumParams();
 
   // If this is not a variadic macro, and too many args were specified, emit
   // an error.
@@ -1139,6 +1139,7 @@ static bool HasFeature(const Preprocessor &PP, StringRef Feature) {
       .Case("attribute_overloadable", true)
       .Case("attribute_unavailable_with_message", true)
       .Case("attribute_unused_on_fields", true)
+      .Case("attribute_diagnose_if_objc", true)
       .Case("blocks", LangOpts.Blocks)
       .Case("c_thread_safety_attributes", true)
       .Case("cxx_exceptions", LangOpts.CXXExceptions)
@@ -1328,6 +1329,8 @@ static bool HasExtension(const Preprocessor &PP, StringRef Extension) {
            .Case("cxx_binary_literals", true)
            .Case("cxx_init_captures", LangOpts.CPlusPlus11)
            .Case("cxx_variable_templates", LangOpts.CPlusPlus)
+           // Miscellaneous language extensions
+           .Case("overloadable_unmarked", true)
            .Default(false);
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/Pragma.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/Pragma.cpp
index e93edd99db2fa..5c22a26d5b09b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/Pragma.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/Pragma.cpp
@@ -20,6 +20,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
@@ -477,9 +478,9 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
   // Emit a line marker.  This will change any source locations from this point
   // forward to realize they are in a system header.
   // Create a line note with this information.
-  SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine()+1,
+  SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine() + 1,
                         FilenameID, /*IsEntry=*/false, /*IsExit=*/false,
-                        /*IsSystem=*/true, /*IsExternC=*/false);
+                        SrcMgr::C_System);
 }
 
 /// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah.
@@ -756,6 +757,125 @@ void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) {
   getHeaderSearchInfo().AddIncludeAlias(OriginalSource, ReplaceFileName);
 }
 
+// Lex a component of a module name: either an identifier or a string literal;
+// for components that can be expressed both ways, the two forms are equivalent.
+static bool LexModuleNameComponent(
+    Preprocessor &PP, Token &Tok,
+    std::pair &ModuleNameComponent,
+    bool First) {
+  PP.LexUnexpandedToken(Tok);
+  if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) {
+    StringLiteralParser Literal(Tok, PP);
+    if (Literal.hadError)
+      return true;
+    ModuleNameComponent = std::make_pair(
+        PP.getIdentifierInfo(Literal.GetString()), Tok.getLocation());
+  } else if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) {
+    ModuleNameComponent =
+        std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation());
+  } else {
+    PP.Diag(Tok.getLocation(), diag::err_pp_expected_module_name) << First;
+    return true;
+  }
+  return false;
+}
+
+static bool LexModuleName(
+    Preprocessor &PP, Token &Tok,
+    llvm::SmallVectorImpl>
+        &ModuleName) {
+  while (true) {
+    std::pair NameComponent;
+    if (LexModuleNameComponent(PP, Tok, NameComponent, ModuleName.empty()))
+      return true;
+    ModuleName.push_back(NameComponent);
+
+    PP.LexUnexpandedToken(Tok);
+    if (Tok.isNot(tok::period))
+      return false;
+  }
+}
+
+void Preprocessor::HandlePragmaModuleBuild(Token &Tok) {
+  SourceLocation Loc = Tok.getLocation();
+
+  std::pair ModuleNameLoc;
+  if (LexModuleNameComponent(*this, Tok, ModuleNameLoc, true))
+    return;
+  IdentifierInfo *ModuleName = ModuleNameLoc.first;
+
+  LexUnexpandedToken(Tok);
+  if (Tok.isNot(tok::eod)) {
+    Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+    DiscardUntilEndOfDirective();
+  }
+
+  if (CurPTHLexer) {
+    // FIXME: Support this somehow?
+    Diag(Loc, diag::err_pp_module_build_pth);
+    return;
+  }
+
+  CurLexer->LexingRawMode = true;
+
+  auto TryConsumeIdentifier = [&](StringRef Ident) -> bool {
+    if (Tok.getKind() != tok::raw_identifier ||
+        Tok.getRawIdentifier() != Ident)
+      return false;
+    CurLexer->Lex(Tok);
+    return true;
+  };
+
+  // Scan forward looking for the end of the module.
+  const char *Start = CurLexer->getBufferLocation();
+  const char *End = nullptr;
+  unsigned NestingLevel = 1;
+  while (true) {
+    End = CurLexer->getBufferLocation();
+    CurLexer->Lex(Tok);
+
+    if (Tok.is(tok::eof)) {
+      Diag(Loc, diag::err_pp_module_build_missing_end);
+      break;
+    }
+
+    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) {
+      // Token was part of module; keep going.
+      continue;
+    }
+
+    // We hit something directive-shaped; check to see if this is the end
+    // of the module build.
+    CurLexer->ParsingPreprocessorDirective = true;
+    CurLexer->Lex(Tok);
+    if (TryConsumeIdentifier("pragma") && TryConsumeIdentifier("clang") &&
+        TryConsumeIdentifier("module")) {
+      if (TryConsumeIdentifier("build"))
+        // #pragma clang module build -> entering a nested module build.
+        ++NestingLevel;
+      else if (TryConsumeIdentifier("endbuild")) {
+        // #pragma clang module endbuild -> leaving a module build.
+        if (--NestingLevel == 0)
+          break;
+      }
+      // We should either be looking at the EOD or more of the current directive
+      // preceding the EOD. Either way we can ignore this token and keep going.
+      assert(Tok.getKind() != tok::eof && "missing EOD before EOF");
+    }
+  }
+
+  CurLexer->LexingRawMode = false;
+
+  // Load the extracted text as a preprocessed module.
+  assert(CurLexer->getBuffer().begin() <= Start &&
+         Start <= CurLexer->getBuffer().end() &&
+         CurLexer->getBuffer().begin() <= End &&
+         End <= CurLexer->getBuffer().end() &&
+         "module source range not contained within same file buffer");
+  TheModuleLoader.loadModuleFromSource(Loc, ModuleName->getName(),
+                                       StringRef(Start, End - Start));
+}
+
 /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
 /// If 'Namespace' is non-null, then it is a token required to exist on the
 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
@@ -1303,26 +1423,6 @@ struct PragmaMessageHandler : public PragmaHandler {
   }
 };
 
-static bool LexModuleName(
-    Preprocessor &PP, Token &Tok,
-    llvm::SmallVectorImpl>
-        &ModuleName) {
-  while (true) {
-    PP.LexUnexpandedToken(Tok);
-    if (Tok.isAnnotation() || !Tok.getIdentifierInfo()) {
-      PP.Diag(Tok.getLocation(), diag::err_pp_expected_module_name)
-        << ModuleName.empty();
-      return true;
-    }
-
-    ModuleName.emplace_back(Tok.getIdentifierInfo(), Tok.getLocation());
-
-    PP.LexUnexpandedToken(Tok);
-    if (Tok.isNot(tok::period))
-      return false;
-  }
-}
-
 /// Handle the clang \#pragma module import extension. The syntax is:
 /// \code
 ///   #pragma clang module import some.module.name
@@ -1393,7 +1493,7 @@ struct PragmaModuleBeginHandler : public PragmaHandler {
     // be loaded or implicitly loadable.
     // FIXME: We could create the submodule here. We'd need to know whether
     // it's supposed to be explicit, but not much else.
-    Module *M = PP.getHeaderSearchInfo().getModuleMap().findModule(Current);
+    Module *M = PP.getHeaderSearchInfo().lookupModule(Current);
     if (!M) {
       PP.Diag(ModuleName.front().second,
               diag::err_pp_module_begin_no_module_map) << Current;
@@ -1409,6 +1509,14 @@ struct PragmaModuleBeginHandler : public PragmaHandler {
       M = NewM;
     }
 
+    // If the module isn't available, it doesn't make sense to enter it.
+    if (Preprocessor::checkModuleIsAvailable(
+            PP.getLangOpts(), PP.getTargetInfo(), PP.getDiagnostics(), M)) {
+      PP.Diag(BeginLoc, diag::note_pp_module_begin_here)
+        << M->getTopLevelModuleName();
+      return;
+    }
+
     // Enter the scope of the submodule.
     PP.EnterSubmodule(M, BeginLoc, /*ForPragma*/true);
     PP.EnterAnnotationToken(SourceRange(BeginLoc, ModuleName.back().second),
@@ -1436,6 +1544,39 @@ struct PragmaModuleEndHandler : public PragmaHandler {
   }
 };
 
+/// Handle the clang \#pragma module build extension.
+struct PragmaModuleBuildHandler : public PragmaHandler {
+  PragmaModuleBuildHandler() : PragmaHandler("build") {}
+
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &Tok) override {
+    PP.HandlePragmaModuleBuild(Tok);
+  }
+};
+
+/// Handle the clang \#pragma module load extension.
+struct PragmaModuleLoadHandler : public PragmaHandler {
+  PragmaModuleLoadHandler() : PragmaHandler("load") {}
+
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &Tok) override {
+    SourceLocation Loc = Tok.getLocation();
+
+    // Read the module name.
+    llvm::SmallVector, 8>
+        ModuleName;
+    if (LexModuleName(PP, Tok, ModuleName))
+      return;
+
+    if (Tok.isNot(tok::eod))
+      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+    // Load the module, don't make it visible.
+    PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden,
+                                    /*IsIncludeDirective=*/false);
+  }
+};
+
 /// PragmaPushMacroHandler - "\#pragma push_macro" saves the value of the
 /// macro on the top of the stack.
 struct PragmaPushMacroHandler : public PragmaHandler {
@@ -1665,6 +1806,8 @@ void Preprocessor::RegisterBuiltinPragmas() {
   ModuleHandler->AddPragma(new PragmaModuleImportHandler());
   ModuleHandler->AddPragma(new PragmaModuleBeginHandler());
   ModuleHandler->AddPragma(new PragmaModuleEndHandler());
+  ModuleHandler->AddPragma(new PragmaModuleBuildHandler());
+  ModuleHandler->AddPragma(new PragmaModuleLoadHandler());
 
   AddPragmaHandler("STDC", new PragmaSTDC_FENV_ACCESSHandler());
   AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler());
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/Preprocessor.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/Preprocessor.cpp
index a6f8bc8cd99e1..bc19c56fd3edc 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/Preprocessor.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/Preprocessor.cpp
@@ -150,6 +150,9 @@ Preprocessor::Preprocessor(std::shared_ptr PPOpts,
     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
     Ident_AbnormalTermination = nullptr;
   }
+
+  if (this->PPOpts->GeneratePreamble)
+    PreambleConditionalStack.startRecording();
 }
 
 Preprocessor::~Preprocessor() {
@@ -571,6 +574,16 @@ void Preprocessor::EnterMainSourceFile() {
   EnterSourceFile(FID, nullptr, SourceLocation());
 }
 
+void Preprocessor::replayPreambleConditionalStack() {
+  // Restore the conditional stack from the preamble, if there is one.
+  if (PreambleConditionalStack.isReplaying()) {
+    assert(CurPPLexer &&
+           "CurPPLexer is null when calling replayPreambleConditionalStack.");
+    CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
+    PreambleConditionalStack.doneReplaying();
+  }
+}
+
 void Preprocessor::EndSourceFile() {
   // Notify the client that we reached the end of the source file.
   if (Callbacks)
@@ -608,7 +621,11 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
 
   // Update the token info (identifier info and appropriate token kind).
   Identifier.setIdentifierInfo(II);
-  Identifier.setKind(II->getTokenID());
+  if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
+      getSourceManager().isInSystemHeader(Identifier.getLocation()))
+    Identifier.setKind(clang::tok::identifier);
+  else
+    Identifier.setKind(II->getTokenID());
 
   return II;
 }
@@ -734,12 +751,6 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
     II.setIsFutureCompatKeyword(false);
   }
 
-  // C++ 2.11p2: If this is an alternative representation of a C++ operator,
-  // then we act as if it is the actual operator and not the textual
-  // representation of it.
-  if (II.isCPlusPlusOperatorKeyword())
-    Identifier.setIdentifierInfo(nullptr);
-
   // If this is an extension token, diagnose its use.
   // We avoid diagnosing tokens that originate from macro definitions.
   // FIXME: This warning is disabled in cases where it shouldn't be,
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/ScratchBuffer.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/ScratchBuffer.cpp
index cd8a27e76c286..e0f3966fce480 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/ScratchBuffer.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/ScratchBuffer.cpp
@@ -35,6 +35,14 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
                                        const char *&DestPtr) {
   if (BytesUsed+Len+2 > ScratchBufSize)
     AllocScratchBuffer(Len+2);
+  else {
+    // Clear out the source line cache if it's already been computed.
+    // FIXME: Allow this to be incrementally extended.
+    auto *ContentCache = const_cast(
+        SourceMgr.getSLocEntry(SourceMgr.getFileID(BufferStartLoc))
+                 .getFile().getContentCache());
+    ContentCache->SourceLineCache = nullptr;
+  }
 
   // Prefix the token with a \n, so that it looks like it is the first thing on
   // its own virtual line in caret diagnostics.
diff --git a/interpreter/llvm/src/tools/clang/lib/Lex/TokenLexer.cpp b/interpreter/llvm/src/tools/clang/lib/Lex/TokenLexer.cpp
index 049e046cece1b..c2e49ba919a93 100644
--- a/interpreter/llvm/src/tools/clang/lib/Lex/TokenLexer.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Lex/TokenLexer.cpp
@@ -67,7 +67,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
 
   // If this is a function-like macro, expand the arguments and change
   // Tokens to point to the expanded tokens.
-  if (Macro->isFunctionLike() && Macro->getNumArgs())
+  if (Macro->isFunctionLike() && Macro->getNumParams())
     ExpandFunctionArguments();
 
   // Mark the macro as currently disabled, so that it is not recursively
@@ -122,7 +122,7 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
     SmallVectorImpl &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
     unsigned MacroArgNo, Preprocessor &PP) {
   // Is the macro argument __VA_ARGS__?
-  if (!Macro->isVariadic() || MacroArgNo != Macro->getNumArgs()-1)
+  if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1)
     return false;
 
   // In Microsoft-compatibility mode, a comma is removed in the expansion
@@ -137,7 +137,7 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
   // with GNU extensions, it is removed regardless of named arguments.
   // Microsoft also appears to support this extension, unofficially.
   if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
-        && Macro->getNumArgs() < 2)
+        && Macro->getNumParams() < 2)
     return false;
 
   // Is a comma available to be removed?
@@ -193,7 +193,7 @@ void TokenLexer::ExpandFunctionArguments() {
       NextTokGetsSpace = true;
 
     if (CurTok.isOneOf(tok::hash, tok::hashat)) {
-      int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
+      int ArgNo = Macro->getParameterNum(Tokens[i+1].getIdentifierInfo());
       assert(ArgNo != -1 && "Token following # is not an argument?");
 
       SourceLocation ExpansionLocStart =
@@ -237,7 +237,7 @@ void TokenLexer::ExpandFunctionArguments() {
     // Otherwise, if this is not an argument token, just add the token to the
     // output buffer.
     IdentifierInfo *II = CurTok.getIdentifierInfo();
-    int ArgNo = II ? Macro->getArgumentNum(II) : -1;
+    int ArgNo = II ? Macro->getParameterNum(II) : -1;
     if (ArgNo == -1) {
       // This isn't an argument, just add it.
       ResultToks.push_back(CurTok);
@@ -330,7 +330,7 @@ void TokenLexer::ExpandFunctionArguments() {
       // expansion.
       if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
           ResultToks[ResultToks.size()-2].is(tok::comma) &&
-          (unsigned)ArgNo == Macro->getNumArgs()-1 &&
+          (unsigned)ArgNo == Macro->getNumParams()-1 &&
           Macro->isVariadic()) {
         VaArgsPseudoPaste = true;
         // Remove the paste operator, report use of the extension.
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
index 172b0edce5e4d..27651c9ca85cb 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -166,20 +166,11 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
   }
 
   if (FnD) {
-    // If this is a friend function, mark that it's late-parsed so that
-    // it's still known to be a definition even before we attach the
-    // parsed body.  Sema needs to treat friend function definitions
-    // differently during template instantiation, and it's possible for
-    // the containing class to be instantiated before all its member
-    // function definitions are parsed.
-    //
-    // If you remove this, you can remove the code that clears the flag
-    // after parsing the member.
-    if (D.getDeclSpec().isFriendSpecified()) {
-      FunctionDecl *FD = FnD->getAsFunction();
-      Actions.CheckForFunctionRedefinition(FD);
-      FD->setLateTemplateParsed(true);
-    }
+    FunctionDecl *FD = FnD->getAsFunction();
+    // Track that this function will eventually have a body; Sema needs
+    // to know this.
+    Actions.CheckForFunctionRedefinition(FD);
+    FD->setWillHaveBody(true);
   } else {
     // If semantic analysis could not build a function declaration,
     // just throw away the late-parsed declaration.
@@ -558,10 +549,6 @@ void Parser::ParseLexedMethodDef(LexedMethod &LM) {
 
   ParseFunctionStatementBody(LM.D, FnScope);
 
-  // Clear the late-template-parsed bit if we set it before.
-  if (LM.D)
-    LM.D->getAsFunction()->setLateTemplateParsed(false);
-
   while (Tok.isNot(tok::eof))
     ConsumeAnyToken();
 
@@ -731,19 +718,6 @@ bool Parser::ConsumeAndStoreUntil(tok::TokenKind T1, tok::TokenKind T2,
       ConsumeBrace();
       break;
 
-    case tok::code_completion:
-      Toks.push_back(Tok);
-      ConsumeCodeCompletionToken();
-      break;
-
-    case tok::string_literal:
-    case tok::wide_string_literal:
-    case tok::utf8_string_literal:
-    case tok::utf16_string_literal:
-    case tok::utf32_string_literal:
-      Toks.push_back(Tok);
-      ConsumeStringToken();
-      break;
     case tok::semi:
       if (StopAtSemi)
         return false;
@@ -751,7 +725,7 @@ bool Parser::ConsumeAndStoreUntil(tok::TokenKind T1, tok::TokenKind T2,
     default:
       // consume this token.
       Toks.push_back(Tok);
-      ConsumeToken();
+      ConsumeAnyToken(/*ConsumeCodeCompletionTok*/true);
       break;
     }
     isFirstTokenConsumed = false;
@@ -902,7 +876,7 @@ bool Parser::ConsumeAndStoreFunctionPrologue(CachedTokens &Toks) {
         // If the opening brace is not preceded by one of these tokens, we are
         // missing the mem-initializer-id. In order to recover better, we need
         // to use heuristics to determine if this '{' is most likely the
-        // begining of a brace-init-list or the function body.
+        // beginning of a brace-init-list or the function body.
         // Check the token after the corresponding '}'.
         TentativeParsingAction PA(*this);
         if (SkipUntil(tok::r_brace) &&
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseDecl.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseDecl.cpp
index 4ccee74eaa905..a4610698c46d0 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseDecl.cpp
@@ -71,11 +71,18 @@ TypeResult Parser::ParseTypeName(SourceRange *Range,
   return Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
 }
 
+/// \brief Normalizes an attribute name by dropping prefixed and suffixed __.
+static StringRef normalizeAttrName(StringRef Name) {
+  if (Name.size() >= 4 && Name.startswith("__") && Name.endswith("__"))
+    return Name.drop_front(2).drop_back(2);
+  return Name;
+}
+
 /// isAttributeLateParsed - Return true if the attribute has arguments that
 /// require late parsing.
 static bool isAttributeLateParsed(const IdentifierInfo &II) {
 #define CLANG_ATTR_LATE_PARSED_LIST
-    return llvm::StringSwitch(II.getName())
+    return llvm::StringSwitch(normalizeAttrName(II.getName()))
 #include "clang/Parse/AttrParserStringSwitches.inc"
         .Default(false);
 #undef CLANG_ATTR_LATE_PARSED_LIST
@@ -200,13 +207,6 @@ void Parser::ParseGNUAttributes(ParsedAttributes &attrs,
   }
 }
 
-/// \brief Normalizes an attribute name by dropping prefixed and suffixed __.
-static StringRef normalizeAttrName(StringRef Name) {
-  if (Name.size() >= 4 && Name.startswith("__") && Name.endswith("__"))
-    Name = Name.drop_front(2).drop_back(2);
-  return Name;
-}
-
 /// \brief Determine whether the given attribute has an identifier argument.
 static bool attributeHasIdentifierArg(const IdentifierInfo &II) {
 #define CLANG_ATTR_IDENTIFIER_ARG_LIST
@@ -2552,6 +2552,7 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS,
         }
       }
       // Fall through.
+      LLVM_FALLTHROUGH;
     }
     case tok::comma:
     case tok::equal:
@@ -2628,6 +2629,8 @@ Parser::getDeclSpecContextFromDeclaratorContext(unsigned Context) {
     return DSC_class;
   if (Context == Declarator::FileContext)
     return DSC_top_level;
+  if (Context == Declarator::TemplateParamContext)
+    return DSC_template_param;
   if (Context == Declarator::TemplateTypeArgContext)
     return DSC_template_type_arg;
   if (Context == Declarator::TrailingReturnContext)
@@ -2989,7 +2992,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
         }
 
         DS.getTypeSpecScope() = SS;
-        ConsumeToken(); // The C++ scope.
+        ConsumeAnnotationToken(); // The C++ scope.
         assert(Tok.is(tok::annot_template_id) &&
                "ParseOptionalCXXScopeSpecifier not working");
         AnnotateTemplateIdTokenAsType();
@@ -2998,7 +3001,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
 
       if (Next.is(tok::annot_typename)) {
         DS.getTypeSpecScope() = SS;
-        ConsumeToken(); // The C++ scope.
+        ConsumeAnnotationToken(); // The C++ scope.
         if (Tok.getAnnotationValue()) {
           ParsedType T = getTypeAnnotation(Tok);
           isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typename,
@@ -3010,7 +3013,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
         else
           DS.SetTypeSpecError();
         DS.SetRangeEnd(Tok.getAnnotationEndLoc());
-        ConsumeToken(); // The typename
+        ConsumeAnnotationToken(); // The typename
       }
 
       if (Next.isNot(tok::identifier))
@@ -3037,7 +3040,8 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       // C++ doesn't have implicit int.  Diagnose it as a typo w.r.t. to the
       // typename.
       if (!TypeRep) {
-        ConsumeToken();   // Eat the scope spec so the identifier is current.
+        // Eat the scope spec so the identifier is current.
+        ConsumeAnnotationToken();
         ParsedAttributesWithRange Attrs(AttrFactory);
         if (ParseImplicitInt(DS, &SS, TemplateInfo, AS, DSContext, Attrs)) {
           if (!Attrs.empty()) {
@@ -3050,7 +3054,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       }
 
       DS.getTypeSpecScope() = SS;
-      ConsumeToken(); // The C++ scope.
+      ConsumeAnnotationToken(); // The C++ scope.
 
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typename, Loc, PrevSpec,
                                      DiagID, TypeRep, Policy);
@@ -3080,7 +3084,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
         break;
 
       DS.SetRangeEnd(Tok.getAnnotationEndLoc());
-      ConsumeToken(); // The typename
+      ConsumeAnnotationToken(); // The typename
 
       continue;
     }
@@ -3677,6 +3681,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
         isInvalid = true;
         break;
       };
+      LLVM_FALLTHROUGH;
     case tok::kw___private:
     case tok::kw___global:
     case tok::kw___local:
@@ -4258,7 +4263,9 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
                                    AS, DS.getModulePrivateSpecLoc(), TParams,
                                    Owned, IsDependent, ScopedEnumKWLoc,
                                    IsScopedUsingClassTag, BaseType,
-                                   DSC == DSC_type_specifier, &SkipBody);
+                                   DSC == DSC_type_specifier,
+                                   DSC == DSC_template_param ||
+                                   DSC == DSC_template_type_arg, &SkipBody);
 
   if (SkipBody.ShouldSkip) {
     assert(TUK == Sema::TUK_Definition && "can only skip a definition");
@@ -4312,8 +4319,15 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
     return;
   }
 
-  if (Tok.is(tok::l_brace) && TUK != Sema::TUK_Reference)
-    ParseEnumBody(StartLoc, TagDecl);
+  if (Tok.is(tok::l_brace) && TUK != Sema::TUK_Reference) {
+    Decl *D = SkipBody.CheckSameAsPrevious ? SkipBody.New : TagDecl;
+    ParseEnumBody(StartLoc, D);
+    if (SkipBody.CheckSameAsPrevious &&
+        !Actions.ActOnDuplicateDefinition(DS, TagDecl, SkipBody)) {
+      DS.SetTypeSpecError();
+      return;
+    }
+  }
 
   if (DS.SetTypeSpecType(DeclSpec::TST_enum, StartLoc,
                          NameLoc.isValid() ? NameLoc : StartLoc,
@@ -4385,11 +4399,9 @@ void Parser::ParseEnumBody(SourceLocation StartLoc, Decl *EnumDecl) {
     }
 
     // Install the enumerator constant into EnumDecl.
-    Decl *EnumConstDecl = Actions.ActOnEnumConstant(getCurScope(), EnumDecl,
-                                                    LastEnumConstDecl,
-                                                    IdentLoc, Ident,
-                                                    attrs.getList(), EqualLoc,
-                                                    AssignedVal.get());
+    Decl *EnumConstDecl = Actions.ActOnEnumConstant(
+        getCurScope(), EnumDecl, LastEnumConstDecl, IdentLoc, Ident,
+        attrs.getList(), EqualLoc, AssignedVal.get());
     EnumAvailabilityDiags.back().done();
 
     EnumConstantDecls.push_back(EnumConstDecl);
@@ -4836,10 +4848,12 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified, bool DeductionGuide) {
   }
 
   // Parse the constructor name.
-  if (Tok.isOneOf(tok::identifier, tok::annot_template_id)) {
+  if (Tok.is(tok::identifier)) {
     // We already know that we have a constructor name; just consume
     // the token.
     ConsumeToken();
+  } else if (Tok.is(tok::annot_template_id)) {
+    ConsumeAnnotationToken();
   } else {
     TPA.Revert();
     return false;
@@ -4895,7 +4909,7 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified, bool DeductionGuide) {
     // be a constructor declaration with an invalid argument type. Keep
     // looking.
     if (Tok.is(tok::annot_cxxscope))
-      ConsumeToken();
+      ConsumeAnnotationToken();
     ConsumeToken();
 
     // If this is not a constructor, we must be parsing a declarator,
@@ -5042,6 +5056,7 @@ void Parser::ParseTypeQualifierListOpt(
         if (TryKeywordIdentFallback(false))
           continue;
       }
+      LLVM_FALLTHROUGH;
     case tok::kw___sptr:
     case tok::kw___w64:
     case tok::kw___ptr64:
@@ -5091,6 +5106,7 @@ void Parser::ParseTypeQualifierListOpt(
         continue; // do *not* consume the next token!
       }
       // otherwise, FALL THROUGH!
+      LLVM_FALLTHROUGH;
     default:
       DoneWithTypeQuals:
       // If this is not a type-qualifier token, we're done reading type
@@ -5539,11 +5555,28 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
     D.SetRangeEnd(Tok.getLocation());
     ConsumeToken();
     goto PastIdentifier;
-  } else if (Tok.is(tok::identifier) && D.diagnoseIdentifier()) {
-    // A virt-specifier isn't treated as an identifier if it appears after a
-    // trailing-return-type.
-    if (D.getContext() != Declarator::TrailingReturnContext ||
-        !isCXX11VirtSpecifier(Tok)) {
+  } else if (Tok.is(tok::identifier) && !D.mayHaveIdentifier()) {
+    // We're not allowed an identifier here, but we got one. Try to figure out
+    // if the user was trying to attach a name to the type, or whether the name
+    // is some unrelated trailing syntax.
+    bool DiagnoseIdentifier = false;
+    if (D.hasGroupingParens())
+      // An identifier within parens is unlikely to be intended to be anything
+      // other than a name being "declared".
+      DiagnoseIdentifier = true;
+    else if (D.getContext() == Declarator::TemplateTypeArgContext)
+      // T is an accidental identifier; T'.
+      DiagnoseIdentifier =
+          NextToken().isOneOf(tok::comma, tok::greater, tok::greatergreater);
+    else if (D.getContext() == Declarator::AliasDeclContext ||
+             D.getContext() == Declarator::AliasTemplateContext)
+      // The most likely error is that the ';' was forgotten.
+      DiagnoseIdentifier = NextToken().isOneOf(tok::comma, tok::semi);
+    else if (D.getContext() == Declarator::TrailingReturnContext &&
+             !isCXX11VirtSpecifier(Tok))
+      DiagnoseIdentifier = NextToken().isOneOf(
+          tok::comma, tok::semi, tok::equal, tok::l_brace, tok::kw_try);
+    if (DiagnoseIdentifier) {
       Diag(Tok.getLocation(), diag::err_unexpected_unqualified_id)
         << FixItHint::CreateRemoval(Tok.getLocation());
       D.SetIdentifier(nullptr, Tok.getLocation());
@@ -6617,7 +6650,7 @@ void Parser::ParseTypeofSpecifier(DeclSpec &DS) {
     return;
   }
 
-  // If we get here, the operand to the typeof was an expresion.
+  // If we get here, the operand to the typeof was an expression.
   if (Operand.isInvalid()) {
     DS.SetTypeSpecError();
     return;
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseDeclCXX.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseDeclCXX.cpp
index e6cf65e36cfa0..2301284b7f43b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseDeclCXX.cpp
@@ -840,7 +840,9 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd){
     return nullptr;
   }
 
-  ExprResult AssertExpr(ParseConstantExpression());
+  EnterExpressionEvaluationContext ConstantEvaluated(
+      Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+  ExprResult AssertExpr(ParseConstantExpressionInExprEvalContext());
   if (AssertExpr.isInvalid()) {
     SkipMalformedDecl();
     return nullptr;
@@ -901,7 +903,7 @@ SourceLocation Parser::ParseDecltypeSpecifier(DeclSpec &DS) {
   if (Tok.is(tok::annot_decltype)) {
     Result = getExprAnnotation(Tok);
     EndLoc = Tok.getAnnotationEndLoc();
-    ConsumeToken();
+    ConsumeAnnotationToken();
     if (Result.isInvalid()) {
       DS.SetTypeSpecError();
       return EndLoc;
@@ -1105,7 +1107,7 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
       assert(Tok.is(tok::annot_typename) && "template-id -> type failed");
       ParsedType Type = getTypeAnnotation(Tok);
       EndLocation = Tok.getAnnotationEndLoc();
-      ConsumeToken();
+      ConsumeAnnotationToken();
 
       if (Type)
         return Type;
@@ -1162,7 +1164,7 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
     // return.
     EndLocation = Tok.getAnnotationEndLoc();
     ParsedType Type = getTypeAnnotation(Tok);
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return Type;
   }
 
@@ -1565,7 +1567,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
     }
   } else if (Tok.is(tok::annot_template_id)) {
     TemplateId = takeTemplateIdAnnotation(Tok);
-    NameLoc = ConsumeToken();
+    NameLoc = ConsumeAnnotationToken();
 
     if (TemplateId->Kind != TNK_Type_template &&
         TemplateId->Kind != TNK_Dependent_template_name) {
@@ -1885,7 +1887,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
                                        SourceLocation(), false,
                                        clang::TypeResult(),
                                        DSC == DSC_type_specifier,
-                                       &SkipBody);
+                                       DSC == DSC_template_param ||
+                                       DSC == DSC_template_type_arg, &SkipBody);
 
     // If ActOnTag said the type was dependent, try again with the
     // less common call.
@@ -1907,12 +1910,22 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
     else if (getLangOpts().CPlusPlus)
       ParseCXXMemberSpecification(StartLoc, AttrFixitLoc, attrs, TagType,
                                   TagOrTempResult.get());
-    else
-      ParseStructUnionBody(StartLoc, TagType, TagOrTempResult.get());
+    else {
+      Decl *D =
+          SkipBody.CheckSameAsPrevious ? SkipBody.New : TagOrTempResult.get();
+      // Parse the definition body.
+      ParseStructUnionBody(StartLoc, TagType, D);
+      if (SkipBody.CheckSameAsPrevious &&
+          !Actions.ActOnDuplicateDefinition(DS, TagOrTempResult.get(),
+                                            SkipBody)) {
+        DS.SetTypeSpecError();
+        return;
+      }
+    }
   }
 
   if (!TagOrTempResult.isInvalid())
-    // Delayed proccessing of attributes.
+    // Delayed processing of attributes.
     Actions.ProcessDeclAttributeDelayed(TagOrTempResult.get(), attrs.getList());
 
   const char *PrevSpec = nullptr;
@@ -3405,39 +3418,42 @@ MemInitResult Parser::ParseMemInitializer(Decl *ConstructorDecl) {
   // parse '::'[opt] nested-name-specifier[opt]
   CXXScopeSpec SS;
   ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false);
-  ParsedType TemplateTypeTy;
-  if (Tok.is(tok::annot_template_id)) {
-    TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
-    if (TemplateId->Kind == TNK_Type_template ||
-        TemplateId->Kind == TNK_Dependent_template_name) {
-      AnnotateTemplateIdTokenAsType(/*IsClassName*/true);
-      assert(Tok.is(tok::annot_typename) && "template-id -> type failed");
-      TemplateTypeTy = getTypeAnnotation(Tok);
-    }
-  }
-  // Uses of decltype will already have been converted to annot_decltype by
-  // ParseOptionalCXXScopeSpecifier at this point.
-  if (!TemplateTypeTy && Tok.isNot(tok::identifier)
-      && Tok.isNot(tok::annot_decltype)) {
-    Diag(Tok, diag::err_expected_member_or_base_name);
-    return true;
-  }
 
+  // : identifier
   IdentifierInfo *II = nullptr;
-  DeclSpec DS(AttrFactory);
   SourceLocation IdLoc = Tok.getLocation();
-  if (Tok.is(tok::annot_decltype)) {
+  // : declype(...)
+  DeclSpec DS(AttrFactory);
+  // : template_name<...>
+  ParsedType TemplateTypeTy;
+
+  if (Tok.is(tok::identifier)) {
+    // Get the identifier. This may be a member name or a class name,
+    // but we'll let the semantic analysis determine which it is.
+    II = Tok.getIdentifierInfo();
+    ConsumeToken();
+  } else if (Tok.is(tok::annot_decltype)) {
     // Get the decltype expression, if there is one.
+    // Uses of decltype will already have been converted to annot_decltype by
+    // ParseOptionalCXXScopeSpecifier at this point.
+    // FIXME: Can we get here with a scope specifier?
     ParseDecltypeSpecifier(DS);
   } else {
-    if (Tok.is(tok::identifier))
-      // Get the identifier. This may be a member name or a class name,
-      // but we'll let the semantic analysis determine which it is.
-      II = Tok.getIdentifierInfo();
-    ConsumeToken();
+    TemplateIdAnnotation *TemplateId = Tok.is(tok::annot_template_id)
+                                           ? takeTemplateIdAnnotation(Tok)
+                                           : nullptr;
+    if (TemplateId && (TemplateId->Kind == TNK_Type_template ||
+                       TemplateId->Kind == TNK_Dependent_template_name)) {
+      AnnotateTemplateIdTokenAsType(/*IsClassName*/true);
+      assert(Tok.is(tok::annot_typename) && "template-id -> type failed");
+      TemplateTypeTy = getTypeAnnotation(Tok);
+      ConsumeAnnotationToken();
+    } else {
+      Diag(Tok, diag::err_expected_member_or_base_name);
+      return true;
+    }
   }
 
-
   // Parse the '('.
   if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) {
     Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists);
@@ -4212,6 +4228,7 @@ void Parser::ParseMicrosoftIfExistsClassDeclaration(DeclSpec::TST TagType,
     Diag(Result.KeywordLoc, diag::warn_microsoft_dependent_exists)
       << Result.IsIfExists;
     // Fall through to skip.
+    LLVM_FALLTHROUGH;
       
   case IEB_Skip:
     Braces.skipToEnd();
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseExpr.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseExpr.cpp
index 727fd3500991e..44b87af01abd0 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseExpr.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseExpr.cpp
@@ -192,6 +192,16 @@ Parser::ParseAssignmentExprWithObjCMessageExprStart(SourceLocation LBracLoc,
   return ParseRHSOfBinaryExpression(R, prec::Assignment);
 }
 
+ExprResult
+Parser::ParseConstantExpressionInExprEvalContext(TypeCastState isTypeCast) {
+  assert(Actions.ExprEvalContexts.back().Context ==
+             Sema::ExpressionEvaluationContext::ConstantEvaluated &&
+         "Call this function only if your ExpressionEvaluationContext is "
+         "already ConstantEvaluated");
+  ExprResult LHS(ParseCastExpression(false, false, isTypeCast));
+  ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::Conditional));
+  return Actions.ActOnConstantExpression(Res);
+}
 
 ExprResult Parser::ParseConstantExpression(TypeCastState isTypeCast) {
   // C++03 [basic.def.odr]p2:
@@ -200,10 +210,7 @@ ExprResult Parser::ParseConstantExpression(TypeCastState isTypeCast) {
   // C++98 and C++11 have no such rule, but this is only a defect in C++98.
   EnterExpressionEvaluationContext ConstantEvaluated(
       Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated);
-
-  ExprResult LHS(ParseCastExpression(false, false, isTypeCast));
-  ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::Conditional));
-  return Actions.ActOnConstantExpression(Res);
+  return ParseConstantExpressionInExprEvalContext(isTypeCast);
 }
 
 /// \brief Parse a constraint-expression.
@@ -804,7 +811,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
   case tok::annot_primary_expr:
     assert(Res.get() == nullptr && "Stray primary-expression annotation?");
     Res = getExprAnnotation(Tok);
-    ConsumeToken();
+    ConsumeAnnotationToken();
     break;
 
   case tok::kw___super:
@@ -1199,7 +1206,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
       if (Ty.isInvalid())
         break;
 
-      ConsumeToken();
+      ConsumeAnnotationToken();
       Res = ParseObjCMessageExpressionBody(SourceLocation(), SourceLocation(),
                                            Ty.get(), nullptr);
       break;
@@ -1307,6 +1314,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
     }
 
     // Fall through to treat the template-id as an id-expression.
+    LLVM_FALLTHROUGH;
   }
 
   case tok::kw_operator: // [C++] id-expression: operator/conversion-function-id
@@ -1477,9 +1485,9 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
                                              nullptr, LHS.get());
         break;
       }
-        
       // Fall through; this isn't a message send.
-                
+      LLVM_FALLTHROUGH;
+
     default:  // Not a postfix-expression suffix.
       return LHS;
     case tok::l_square: {  // postfix-expression: p-e '[' expression ']'
@@ -1858,7 +1866,7 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
     }
   }
 
-  // If we get here, the operand to the typeof/sizeof/alignof was an expresion.
+  // If we get here, the operand to the typeof/sizeof/alignof was an expression.
   isCastExpr = false;
   return Operand;
 }
@@ -1964,7 +1972,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
   if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof))
     Diag(OpTok, diag::ext_alignof_expr) << OpTok.getIdentifierInfo();
 
-  // If we get here, the operand to the sizeof/alignof was an expresion.
+  // If we get here, the operand to the sizeof/alignof was an expression.
   if (!Operand.isInvalid())
     Operand = Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(),
                                                     ExprKind,
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseExprCXX.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseExprCXX.cpp
index 56093f685617b..dcafbadae5c08 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseExprCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseExprCXX.cpp
@@ -160,7 +160,7 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS,
     Actions.RestoreNestedNameSpecifierAnnotation(Tok.getAnnotationValue(),
                                                  Tok.getAnnotationRange(),
                                                  SS);
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return false;
   }
 
@@ -346,7 +346,7 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS,
         *LastII = TemplateId->Name;
 
       // Consume the template-id token.
-      ConsumeToken();
+      ConsumeAnnotationToken();
 
       assert(Tok.is(tok::coloncolon) && "NextToken() not working properly!");
       SourceLocation CCLoc = ConsumeToken();
@@ -920,7 +920,7 @@ Optional Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
             PP.AnnotateCachedTokens(Tok);
 
             // Consume the annotated initializer.
-            ConsumeToken();
+            ConsumeAnnotationToken();
           }
         }
       } else
@@ -1528,7 +1528,7 @@ Parser::ParseCXXPseudoDestructor(Expr *Base, SourceLocation OpLoc,
     // store it in the pseudo-dtor node (to be used when instantiating it).
     FirstTypeName.setTemplateId(
                               (TemplateIdAnnotation *)Tok.getAnnotationValue());
-    ConsumeToken();
+    ConsumeAnnotationToken();
     assert(Tok.is(tok::coloncolon) &&"ParseOptionalCXXScopeSpecifier fail");
     CCLoc = ConsumeToken();
   } else {
@@ -1882,7 +1882,7 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
       DS.SetTypeSpecError();
     
     DS.SetRangeEnd(Tok.getAnnotationEndLoc());
-    ConsumeToken();
+    ConsumeAnnotationToken();
     
     DS.Finish(Actions, Policy);
     return;
@@ -1951,11 +1951,8 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
     DS.Finish(Actions, Policy);
     return;
   }
-  if (Tok.is(tok::annot_typename))
-    DS.SetRangeEnd(Tok.getAnnotationEndLoc());
-  else
-    DS.SetRangeEnd(Tok.getLocation());
-  ConsumeToken();
+  ConsumeAnyToken();
+  DS.SetRangeEnd(PrevTokLocation);
   DS.Finish(Actions, Policy);
 }
 
@@ -2123,31 +2120,18 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS,
       Id.getKind() == UnqualifiedId::IK_LiteralOperatorId) {
     // Form a parsed representation of the template-id to be stored in the
     // UnqualifiedId.
-    TemplateIdAnnotation *TemplateId
-      = TemplateIdAnnotation::Allocate(TemplateArgs.size(), TemplateIds);
 
     // FIXME: Store name for literal operator too.
-    if (Id.getKind() == UnqualifiedId::IK_Identifier) {
-      TemplateId->Name = Id.Identifier;
-      TemplateId->Operator = OO_None;
-      TemplateId->TemplateNameLoc = Id.StartLocation;
-    } else {
-      TemplateId->Name = nullptr;
-      TemplateId->Operator = Id.OperatorFunctionId.Operator;
-      TemplateId->TemplateNameLoc = Id.StartLocation;
-    }
+    IdentifierInfo *TemplateII =
+        Id.getKind() == UnqualifiedId::IK_Identifier ? Id.Identifier : nullptr;
+    OverloadedOperatorKind OpKind = Id.getKind() == UnqualifiedId::IK_Identifier
+                                        ? OO_None
+                                        : Id.OperatorFunctionId.Operator;
+
+    TemplateIdAnnotation *TemplateId = TemplateIdAnnotation::Create(
+        SS, TemplateKWLoc, Id.StartLocation, TemplateII, OpKind, Template, TNK,
+        LAngleLoc, RAngleLoc, TemplateArgs, TemplateIds);
 
-    TemplateId->SS = SS;
-    TemplateId->TemplateKWLoc = TemplateKWLoc;
-    TemplateId->Template = Template;
-    TemplateId->Kind = TNK;
-    TemplateId->LAngleLoc = LAngleLoc;
-    TemplateId->RAngleLoc = RAngleLoc;
-    ParsedTemplateArgument *Args = TemplateId->getTemplateArgs();
-    for (unsigned Arg = 0, ArgEnd = TemplateArgs.size(); 
-         Arg != ArgEnd; ++Arg)
-      Args[Arg] = TemplateArgs[Arg];
-    
     Id.setTemplateId(TemplateId);
     return false;
   }
@@ -2529,12 +2513,12 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
                                 /*NontrivialTypeSourceInfo=*/true);
         Result.setConstructorName(Ty, TemplateId->TemplateNameLoc,
                                   TemplateId->RAngleLoc);
-        ConsumeToken();
+        ConsumeAnnotationToken();
         return false;
       }
 
       Result.setConstructorTemplateId(TemplateId);
-      ConsumeToken();
+      ConsumeAnnotationToken();
       return false;
     }
 
@@ -2542,7 +2526,7 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
     // our unqualified-id.
     Result.setTemplateId(TemplateId);
     TemplateKWLoc = TemplateId->TemplateKWLoc;
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return false;
   }
   
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseInit.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseInit.cpp
index f48d01e0f6300..90f3561cb9637 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseInit.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseInit.cpp
@@ -501,7 +501,8 @@ bool Parser::ParseMicrosoftIfExistsBraceInitializer(ExprVector &InitExprs,
     Diag(Result.KeywordLoc, diag::warn_microsoft_dependent_exists)
       << Result.IsIfExists;
     // Fall through to skip.
-      
+    LLVM_FALLTHROUGH;
+
   case IEB_Skip:
     Braces.skipToEnd();
     return false;
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseObjc.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseObjc.cpp
index 77e63efc065eb..01b1bf48e4738 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseObjc.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseObjc.cpp
@@ -1007,6 +1007,10 @@ IdentifierInfo *Parser::ParseObjCSelectorPiece(SourceLocation &SelectorLoc) {
   switch (Tok.getKind()) {
   default:
     return nullptr;
+  case tok::colon:
+    // Empty selector piece uses the location of the ':'.
+    SelectorLoc = Tok.getLocation();
+    return nullptr;
   case tok::ampamp:
   case tok::ampequal:
   case tok::amp:
@@ -2255,7 +2259,7 @@ Parser::ObjCImplParsingDataRAII::~ObjCImplParsingDataRAII() {
 
 void Parser::ObjCImplParsingDataRAII::finish(SourceRange AtEnd) {
   assert(!Finished);
-  P.Actions.DefaultSynthesizeProperties(P.getCurScope(), Dcl);
+  P.Actions.DefaultSynthesizeProperties(P.getCurScope(), Dcl, AtEnd.getBegin());
   for (size_t i = 0; i < LateParsedObjCMethods.size(); ++i)
     P.ParseLexedObjCMethodDefs(*LateParsedObjCMethods[i], 
                                true/*Methods*/);
@@ -3627,6 +3631,14 @@ void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
   SourceLocation OrigLoc = Tok.getLocation();
 
   assert(!LM.Toks.empty() && "ParseLexedObjCMethodDef - Empty body!");
+  // Store an artificial EOF token to ensure that we don't run off the end of
+  // the method's body when we come to parse it.
+  Token Eof;
+  Eof.startToken();
+  Eof.setKind(tok::eof);
+  Eof.setEofData(MCDecl);
+  Eof.setLocation(OrigLoc);
+  LM.Toks.push_back(Eof);
   // Append the current token at the end of the new token stream so that it
   // doesn't get lost.
   LM.Toks.push_back(Tok);
@@ -3658,7 +3670,7 @@ void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
       Actions.ActOnDefaultCtorInitializers(MCDecl);
     ParseFunctionStatementBody(MCDecl, BodyScope);
   }
-  
+
   if (Tok.getLocation() != OrigLoc) {
     // Due to parsing error, we either went over the cached tokens or
     // there are still cached tokens left. If it's the latter case skip the
@@ -3670,4 +3682,6 @@ void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
       while (Tok.getLocation() != OrigLoc && Tok.isNot(tok::eof))
         ConsumeAnyToken();
   }
+  // Clean up the remaining EOF token.
+  ConsumeAnyToken();
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseOpenMP.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseOpenMP.cpp
index 86ac035f3c8c9..d9a088595ab73 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseOpenMP.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseOpenMP.cpp
@@ -192,6 +192,7 @@ static DeclarationName parseOpenMPReductionId(Parser &P) {
   case tok::identifier: // identifier
     if (!WithOperator)
       break;
+    LLVM_FALLTHROUGH;
   default:
     P.Diag(Tok.getLocation(), diag::err_omp_expected_reduction_identifier);
     P.SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
@@ -532,7 +533,7 @@ Parser::ParseOMPDeclareSimdClauses(Parser::DeclGroupPtrTy Ptr,
       ConsumeAnyToken();
   }
   // Skip the last annot_pragma_openmp_end.
-  SourceLocation EndLoc = ConsumeToken();
+  SourceLocation EndLoc = ConsumeAnnotationToken();
   if (!IsError) {
     return Actions.ActOnOpenMPDeclareSimdDirective(
         Ptr, BS, Simdlen.get(), Uniforms, Aligneds, Alignments, Linears,
@@ -562,7 +563,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
   assert(Tok.is(tok::annot_pragma_openmp) && "Not an OpenMP directive!");
   ParenBraceBracketBalancer BalancerRAIIObj(*this);
 
-  SourceLocation Loc = ConsumeToken();
+  SourceLocation Loc = ConsumeAnnotationToken();
   auto DKind = ParseOpenMPDirectiveKind(*this);
 
   switch (DKind) {
@@ -578,7 +579,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
         SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
       }
       // Skip the last annot_pragma_openmp_end.
-      ConsumeToken();
+      ConsumeAnnotationToken();
       return Actions.ActOnOpenMPThreadprivateDirective(Loc,
                                                        Helper.getIdentifiers());
     }
@@ -596,7 +597,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
           ConsumeAnyToken();
       }
       // Skip the last annot_pragma_openmp_end.
-      ConsumeToken();
+      ConsumeAnnotationToken();
       return Res;
     }
     break;
@@ -686,7 +687,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
       ParseExternalDeclaration(attrs);
       if (Tok.isAnnotation() && Tok.is(tok::annot_pragma_openmp)) {
         TentativeParsingAction TPA(*this);
-        ConsumeToken();
+        ConsumeAnnotationToken();
         DKind = ParseOpenMPDirectiveKind(*this);
         if (DKind != OMPD_end_declare_target)
           TPA.Revert();
@@ -814,7 +815,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
   FirstClauses(OMPC_unknown + 1);
   unsigned ScopeFlags =
       Scope::FnScope | Scope::DeclScope | Scope::OpenMPDirectiveScope;
-  SourceLocation Loc = ConsumeToken(), EndLoc;
+  SourceLocation Loc = ConsumeAnnotationToken(), EndLoc;
   auto DKind = ParseOpenMPDirectiveKind(*this);
   OpenMPDirectiveKind CancelRegion = OMPD_unknown;
   // Name of critical directive.
@@ -869,6 +870,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
       // pseudo-clause OMPFlushClause.
       PP.EnterToken(Tok);
     }
+    LLVM_FALLTHROUGH;
   case OMPD_taskyield:
   case OMPD_barrier:
   case OMPD_taskwait:
@@ -883,6 +885,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
     }
     HasAssociatedStatement = false;
     // Fall through for further analysis.
+    LLVM_FALLTHROUGH;
   case OMPD_parallel:
   case OMPD_simd:
   case OMPD_for:
@@ -973,7 +976,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
     // End location of the directive.
     EndLoc = Tok.getLocation();
     // Consume final annot_pragma_openmp_end.
-    ConsumeToken();
+    ConsumeAnnotationToken();
 
     // OpenMP [2.13.8, ordered Construct, Syntax]
     // If the depend clause is specified, the ordered construct is a stand-alone
@@ -1099,7 +1102,7 @@ bool Parser::ParseOpenMPSimpleVarList(
 ///       simdlen-clause | threads-clause | simd-clause | num_teams-clause |
 ///       thread_limit-clause | priority-clause | grainsize-clause |
 ///       nogroup-clause | num_tasks-clause | hint-clause | to-clause |
-///       from-clause | is_device_ptr-clause
+///       from-clause | is_device_ptr-clause | task_reduction-clause
 ///
 OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
                                      OpenMPClauseKind CKind, bool FirstClause) {
@@ -1184,6 +1187,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
           << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
       ErrorFound = true;
     }
+    LLVM_FALLTHROUGH;
 
   case OMPC_if:
     Clause = ParseOpenMPSingleExprWithArgClause(CKind);
@@ -1216,6 +1220,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_linear:
   case OMPC_aligned:
   case OMPC_copyin:
@@ -1581,7 +1586,7 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
   BalancedDelimiterTracker LinearT(*this, tok::l_paren,
                                   tok::annot_pragma_openmp_end);
   // Handle reduction-identifier for reduction clause.
-  if (Kind == OMPC_reduction) {
+  if (Kind == OMPC_reduction || Kind == OMPC_task_reduction) {
     ColonProtectionRAIIObject ColonRAII(*this);
     if (getLangOpts().CPlusPlus)
       ParseOptionalCXXScopeSpecifier(Data.ReductionIdScopeSpec,
@@ -1729,13 +1734,13 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
       Diag(Tok, diag::warn_pragma_expected_colon) << "map type";
   }
 
-  bool IsComma =
-      (Kind != OMPC_reduction && Kind != OMPC_depend && Kind != OMPC_map) ||
-      (Kind == OMPC_reduction && !InvalidReductionId) ||
-      (Kind == OMPC_map && Data.MapType != OMPC_MAP_unknown &&
-       (!MapTypeModifierSpecified ||
-        Data.MapTypeModifier == OMPC_MAP_always)) ||
-      (Kind == OMPC_depend && Data.DepKind != OMPC_DEPEND_unknown);
+  bool IsComma = (Kind != OMPC_reduction && Kind != OMPC_task_reduction &&
+                  Kind != OMPC_depend && Kind != OMPC_map) ||
+                 (Kind == OMPC_reduction && !InvalidReductionId) ||
+                 (Kind == OMPC_map && Data.MapType != OMPC_MAP_unknown &&
+                  (!MapTypeModifierSpecified ||
+                   Data.MapTypeModifier == OMPC_MAP_always)) ||
+                 (Kind == OMPC_depend && Data.DepKind != OMPC_DEPEND_unknown);
   const bool MayHaveTail = (Kind == OMPC_linear || Kind == OMPC_aligned);
   while (IsComma || (Tok.isNot(tok::r_paren) && Tok.isNot(tok::colon) &&
                      Tok.isNot(tok::annot_pragma_openmp_end))) {
@@ -1791,7 +1796,7 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
 }
 
 /// \brief Parsing of OpenMP clause 'private', 'firstprivate', 'lastprivate',
-/// 'shared', 'copyin', 'copyprivate', 'flush' or 'reduction'.
+/// 'shared', 'copyin', 'copyprivate', 'flush', 'reduction' or 'task_reduction'.
 ///
 ///    private-clause:
 ///       'private' '(' list ')'
@@ -1807,6 +1812,8 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
 ///       'aligned' '(' list [ ':' alignment ] ')'
 ///    reduction-clause:
 ///       'reduction' '(' reduction-identifier ':' list ')'
+///    task_reduction-clause:
+///       'task_reduction' '(' reduction-identifier ':' list ')'
 ///    copyprivate-clause:
 ///       'copyprivate' '(' list ')'
 ///    flush-clause:
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParsePragma.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParsePragma.cpp
index e764f8d320292..ac44a856e8f9e 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParsePragma.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParsePragma.cpp
@@ -49,6 +49,15 @@ struct PragmaPackHandler : public PragmaHandler {
                     Token &FirstToken) override;
 };
 
+struct PragmaClangSectionHandler : public PragmaHandler {
+  explicit PragmaClangSectionHandler(Sema &S)
+             : PragmaHandler("section"), Actions(S) {}
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &FirstToken) override;
+private:
+  Sema &Actions;
+};
+
 struct PragmaMSStructHandler : public PragmaHandler {
   explicit PragmaMSStructHandler() : PragmaHandler("ms_struct") {}
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
@@ -228,6 +237,9 @@ void Parser::initializePragmaHandlers() {
   FPContractHandler.reset(new PragmaFPContractHandler());
   PP.AddPragmaHandler("STDC", FPContractHandler.get());
 
+  PCSectionHandler.reset(new PragmaClangSectionHandler(Actions));
+  PP.AddPragmaHandler("clang", PCSectionHandler.get());
+
   if (getLangOpts().OpenCL) {
     OpenCLExtensionHandler.reset(new PragmaOpenCLExtensionHandler());
     PP.AddPragmaHandler("OPENCL", OpenCLExtensionHandler.get());
@@ -331,6 +343,9 @@ void Parser::resetPragmaHandlers() {
     MSCommentHandler.reset();
   }
 
+  PP.RemovePragmaHandler("clang", PCSectionHandler.get());
+  PCSectionHandler.reset();
+
   if (getLangOpts().MicrosoftExt) {
     PP.RemovePragmaHandler(MSDetectMismatchHandler.get());
     MSDetectMismatchHandler.reset();
@@ -390,7 +405,7 @@ void Parser::resetPragmaHandlers() {
 /// annot_pragma_unused 'x' annot_pragma_unused 'y'
 void Parser::HandlePragmaUnused() {
   assert(Tok.is(tok::annot_pragma_unused));
-  SourceLocation UnusedLoc = ConsumeToken();
+  SourceLocation UnusedLoc = ConsumeAnnotationToken();
   Actions.ActOnPragmaUnused(Tok, getCurScope(), UnusedLoc);
   ConsumeToken(); // The argument token.
 }
@@ -399,7 +414,7 @@ void Parser::HandlePragmaVisibility() {
   assert(Tok.is(tok::annot_pragma_vis));
   const IdentifierInfo *VisType =
     static_cast(Tok.getAnnotationValue());
-  SourceLocation VisLoc = ConsumeToken();
+  SourceLocation VisLoc = ConsumeAnnotationToken();
   Actions.ActOnPragmaVisibility(VisType, VisLoc);
 }
 
@@ -415,7 +430,7 @@ void Parser::HandlePragmaPack() {
   assert(Tok.is(tok::annot_pragma_pack));
   PragmaPackInfo *Info =
     static_cast(Tok.getAnnotationValue());
-  SourceLocation PragmaLoc = ConsumeToken();
+  SourceLocation PragmaLoc = ConsumeAnnotationToken();
   ExprResult Alignment;
   if (Info->Alignment.is(tok::numeric_constant)) {
     Alignment = Actions.ActOnNumericConstant(Info->Alignment);
@@ -431,7 +446,7 @@ void Parser::HandlePragmaMSStruct() {
   PragmaMSStructKind Kind = static_cast(
       reinterpret_cast(Tok.getAnnotationValue()));
   Actions.ActOnPragmaMSStruct(Kind);
-  ConsumeToken(); // The annotation token.
+  ConsumeAnnotationToken();
 }
 
 void Parser::HandlePragmaAlign() {
@@ -439,7 +454,7 @@ void Parser::HandlePragmaAlign() {
   Sema::PragmaOptionsAlignKind Kind =
     static_cast(
     reinterpret_cast(Tok.getAnnotationValue()));
-  SourceLocation PragmaLoc = ConsumeToken();
+  SourceLocation PragmaLoc = ConsumeAnnotationToken();
   Actions.ActOnPragmaOptionsAlign(Kind, PragmaLoc);
 }
 
@@ -448,12 +463,12 @@ void Parser::HandlePragmaDump() {
   IdentifierInfo *II =
       reinterpret_cast(Tok.getAnnotationValue());
   Actions.ActOnPragmaDump(getCurScope(), Tok.getLocation(), II);
-  ConsumeToken();
+  ConsumeAnnotationToken();
 }
 
 void Parser::HandlePragmaWeak() {
   assert(Tok.is(tok::annot_pragma_weak));
-  SourceLocation PragmaLoc = ConsumeToken();
+  SourceLocation PragmaLoc = ConsumeAnnotationToken();
   Actions.ActOnPragmaWeakID(Tok.getIdentifierInfo(), PragmaLoc,
                             Tok.getLocation());
   ConsumeToken(); // The weak name.
@@ -461,7 +476,7 @@ void Parser::HandlePragmaWeak() {
 
 void Parser::HandlePragmaWeakAlias() {
   assert(Tok.is(tok::annot_pragma_weakalias));
-  SourceLocation PragmaLoc = ConsumeToken();
+  SourceLocation PragmaLoc = ConsumeAnnotationToken();
   IdentifierInfo *WeakName = Tok.getIdentifierInfo();
   SourceLocation WeakNameLoc = Tok.getLocation();
   ConsumeToken();
@@ -475,7 +490,7 @@ void Parser::HandlePragmaWeakAlias() {
 
 void Parser::HandlePragmaRedefineExtname() {
   assert(Tok.is(tok::annot_pragma_redefine_extname));
-  SourceLocation RedefLoc = ConsumeToken();
+  SourceLocation RedefLoc = ConsumeAnnotationToken();
   IdentifierInfo *RedefName = Tok.getIdentifierInfo();
   SourceLocation RedefNameLoc = Tok.getLocation();
   ConsumeToken();
@@ -506,13 +521,13 @@ void Parser::HandlePragmaFPContract() {
   }
 
   Actions.ActOnPragmaFPContract(FPC);
-  ConsumeToken(); // The annotation token.
+  ConsumeAnnotationToken();
 }
 
 StmtResult Parser::HandlePragmaCaptured()
 {
   assert(Tok.is(tok::annot_pragma_captured));
-  ConsumeToken();
+  ConsumeAnnotationToken();
 
   if (Tok.isNot(tok::l_brace)) {
     PP.Diag(Tok, diag::err_expected) << tok::l_brace;
@@ -549,7 +564,7 @@ void Parser::HandlePragmaOpenCLExtension() {
   auto State = Data->second;
   auto Ident = Data->first;
   SourceLocation NameLoc = Tok.getLocation();
-  ConsumeToken(); // The annotation token.
+  ConsumeAnnotationToken();
 
   auto &Opt = Actions.getOpenCLOptions();
   auto Name = Ident->getName();
@@ -588,7 +603,7 @@ void Parser::HandlePragmaMSPointersToMembers() {
   LangOptions::PragmaMSPointersToMembersKind RepresentationMethod =
       static_cast(
           reinterpret_cast(Tok.getAnnotationValue()));
-  SourceLocation PragmaLoc = ConsumeToken(); // The annotation token.
+  SourceLocation PragmaLoc = ConsumeAnnotationToken();
   Actions.ActOnPragmaMSPointersToMembers(RepresentationMethod, PragmaLoc);
 }
 
@@ -598,7 +613,7 @@ void Parser::HandlePragmaMSVtorDisp() {
   Sema::PragmaMsStackAction Action =
       static_cast((Value >> 16) & 0xFFFF);
   MSVtorDispAttr::Mode Mode = MSVtorDispAttr::Mode(Value & 0xFFFF);
-  SourceLocation PragmaLoc = ConsumeToken(); // The annotation token.
+  SourceLocation PragmaLoc = ConsumeAnnotationToken();
   Actions.ActOnPragmaMSVtorDisp(Action, PragmaLoc, Mode);
 }
 
@@ -608,7 +623,7 @@ void Parser::HandlePragmaMSPragma() {
   auto TheTokens =
       (std::pair, size_t> *)Tok.getAnnotationValue();
   PP.EnterTokenStream(std::move(TheTokens->first), TheTokens->second, true);
-  SourceLocation PragmaLocation = ConsumeToken(); // The annotation token.
+  SourceLocation PragmaLocation = ConsumeAnnotationToken();
   assert(Tok.isAnyIdentifier());
   StringRef PragmaName = Tok.getIdentifierInfo()->getName();
   PP.Lex(Tok); // pragma kind
@@ -904,7 +919,7 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
   bool PragmaUnroll = PragmaNameInfo->getName() == "unroll";
   bool PragmaNoUnroll = PragmaNameInfo->getName() == "nounroll";
   if (Toks.empty() && (PragmaUnroll || PragmaNoUnroll)) {
-    ConsumeToken(); // The annotation token.
+    ConsumeAnnotationToken();
     Hint.Range = Info->PragmaName.getLocation();
     return true;
   }
@@ -931,7 +946,7 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
   bool AssumeSafetyArg = !OptionUnroll && !OptionDistribute;
   // Verify loop hint has an argument.
   if (Toks[0].is(tok::eof)) {
-    ConsumeToken(); // The annotation token.
+    ConsumeAnnotationToken();
     Diag(Toks[0].getLocation(), diag::err_pragma_loop_missing_argument)
         << /*StateArgument=*/StateOption << /*FullKeyword=*/OptionUnroll
         << /*AssumeSafetyKeyword=*/AssumeSafetyArg;
@@ -940,7 +955,7 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
 
   // Validate the argument.
   if (StateOption) {
-    ConsumeToken(); // The annotation token.
+    ConsumeAnnotationToken();
     SourceLocation StateLoc = Toks[0].getLocation();
     IdentifierInfo *StateInfo = Toks[0].getIdentifierInfo();
 
@@ -963,7 +978,7 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
   } else {
     // Enter constant expression including eof terminator into token stream.
     PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/false);
-    ConsumeToken(); // The annotation token.
+    ConsumeAnnotationToken();
 
     ExprResult R = ParseConstantExpression();
 
@@ -1249,7 +1264,7 @@ void Parser::HandlePragmaAttribute() {
   SourceLocation PragmaLoc = Tok.getLocation();
   auto *Info = static_cast(Tok.getAnnotationValue());
   if (Info->Action == PragmaAttributeInfo::Pop) {
-    ConsumeToken();
+    ConsumeAnnotationToken();
     Actions.ActOnPragmaAttributePop(PragmaLoc);
     return;
   }
@@ -1257,7 +1272,7 @@ void Parser::HandlePragmaAttribute() {
   assert(Info->Action == PragmaAttributeInfo::Push &&
          "Unexpected #pragma attribute command");
   PP.EnterTokenStream(Info->Tokens, /*DisableMacroExpansion=*/false);
-  ConsumeToken();
+  ConsumeAnnotationToken();
 
   ParsedAttributes &Attrs = Info->Attributes;
   Attrs.clearListOnly();
@@ -1622,6 +1637,51 @@ void PragmaMSStructHandler::HandlePragma(Preprocessor &PP,
   PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 }
 
+// #pragma clang section bss="abc" data="" rodata="def" text=""
+void PragmaClangSectionHandler::HandlePragma(Preprocessor &PP,
+             PragmaIntroducerKind Introducer, Token &FirstToken) {
+
+  Token Tok;
+  auto SecKind = Sema::PragmaClangSectionKind::PCSK_Invalid;
+
+  PP.Lex(Tok); // eat 'section'
+  while (Tok.isNot(tok::eod)) {
+    if (Tok.isNot(tok::identifier)) {
+      PP.Diag(Tok.getLocation(), diag::err_pragma_expected_clang_section_name) << "clang section";
+      return;
+    }
+
+    const IdentifierInfo *SecType = Tok.getIdentifierInfo();
+    if (SecType->isStr("bss"))
+      SecKind = Sema::PragmaClangSectionKind::PCSK_BSS;
+    else if (SecType->isStr("data"))
+      SecKind = Sema::PragmaClangSectionKind::PCSK_Data;
+    else if (SecType->isStr("rodata"))
+      SecKind = Sema::PragmaClangSectionKind::PCSK_Rodata;
+    else if (SecType->isStr("text"))
+      SecKind = Sema::PragmaClangSectionKind::PCSK_Text;
+    else {
+      PP.Diag(Tok.getLocation(), diag::err_pragma_expected_clang_section_name) << "clang section";
+      return;
+    }
+
+    PP.Lex(Tok); // eat ['bss'|'data'|'rodata'|'text']
+    if (Tok.isNot(tok::equal)) {
+      PP.Diag(Tok.getLocation(), diag::err_pragma_clang_section_expected_equal) << SecKind;
+      return;
+    }
+
+    std::string SecName;
+    if (!PP.LexStringLiteral(Tok, SecName, "pragma clang section", false))
+      return;
+
+    Actions.ActOnPragmaClangSection(Tok.getLocation(),
+      (SecName.size()? Sema::PragmaClangSectionAction::PCSA_Set :
+                       Sema::PragmaClangSectionAction::PCSA_Clear),
+       SecKind, SecName);
+  }
+}
+
 // #pragma 'align' '=' {'native','natural','mac68k','power','reset'}
 // #pragma 'options 'align' '=' {'native','natural','mac68k','power','reset'}
 static void ParseAlignPragma(Preprocessor &PP, Token &FirstTok,
@@ -2534,7 +2594,7 @@ void Parser::HandlePragmaFP() {
   }
 
   Actions.ActOnPragmaFPContract(FPC);
-  ConsumeToken(); // The annotation token.
+  ConsumeAnnotationToken();
 }
 
 /// \brief Parses loop or unroll pragma hint value and fills in Info.
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseStmt.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseStmt.cpp
index 7d78046d06841..b1fbb20c721bd 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseStmt.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseStmt.cpp
@@ -203,6 +203,7 @@ Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts,
     }
 
     // Fall through
+    LLVM_FALLTHROUGH;
   }
 
   default: {
@@ -338,13 +339,13 @@ Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts,
   case tok::annot_pragma_fp_contract:
     ProhibitAttributes(Attrs);
     Diag(Tok, diag::err_pragma_fp_contract_scope);
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return StmtError();
 
   case tok::annot_pragma_fp:
     ProhibitAttributes(Attrs);
     Diag(Tok, diag::err_pragma_fp_scope);
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return StmtError();
 
   case tok::annot_pragma_opencl_extension:
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseTemplate.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseTemplate.cpp
index 6a81e14ed4966..7e4a486c7c0e2 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseTemplate.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseTemplate.cpp
@@ -674,7 +674,8 @@ Parser::ParseNonTypeTemplateParameter(unsigned Depth, unsigned Position) {
   // FIXME: The type should probably be restricted in some way... Not all
   // declarators (parts of declarators?) are accepted for parameters.
   DeclSpec DS(AttrFactory);
-  ParseDeclarationSpecifiers(DS);
+  ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS_none,
+                             DSC_template_param);
 
   // Parse this as a typename.
   Declarator ParamDecl(DS, Declarator::TemplateParamContext);
@@ -1011,25 +1012,21 @@ bool Parser::AnnotateTemplateIdToken(TemplateTy Template, TemplateNameKind TNK,
     // Build a template-id annotation token that can be processed
     // later.
     Tok.setKind(tok::annot_template_id);
-    TemplateIdAnnotation *TemplateId
-      = TemplateIdAnnotation::Allocate(TemplateArgs.size(), TemplateIds);
-    TemplateId->TemplateNameLoc = TemplateNameLoc;
-    if (TemplateName.getKind() == UnqualifiedId::IK_Identifier) {
-      TemplateId->Name = TemplateName.Identifier;
-      TemplateId->Operator = OO_None;
-    } else {
-      TemplateId->Name = nullptr;
-      TemplateId->Operator = TemplateName.OperatorFunctionId.Operator;
-    }
-    TemplateId->SS = SS;
-    TemplateId->TemplateKWLoc = TemplateKWLoc;
-    TemplateId->Template = Template;
-    TemplateId->Kind = TNK;
-    TemplateId->LAngleLoc = LAngleLoc;
-    TemplateId->RAngleLoc = RAngleLoc;
-    ParsedTemplateArgument *Args = TemplateId->getTemplateArgs();
-    for (unsigned Arg = 0, ArgEnd = TemplateArgs.size(); Arg != ArgEnd; ++Arg)
-      Args[Arg] = ParsedTemplateArgument(TemplateArgs[Arg]);
+    
+    IdentifierInfo *TemplateII =
+        TemplateName.getKind() == UnqualifiedId::IK_Identifier
+            ? TemplateName.Identifier
+            : nullptr;
+
+    OverloadedOperatorKind OpKind =
+        TemplateName.getKind() == UnqualifiedId::IK_Identifier
+            ? OO_None
+            : TemplateName.OperatorFunctionId.Operator;
+
+    TemplateIdAnnotation *TemplateId = TemplateIdAnnotation::Create(
+      SS, TemplateKWLoc, TemplateNameLoc, TemplateII, OpKind, Template, TNK,
+      LAngleLoc, RAngleLoc, TemplateArgs, TemplateIds);
+    
     Tok.setAnnotationValue(TemplateId);
     if (TemplateKWLoc.isValid())
       Tok.setLocation(TemplateKWLoc);
@@ -1186,7 +1183,13 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() {
   //   expression is resolved to a type-id, regardless of the form of
   //   the corresponding template-parameter.
   //
-  // Therefore, we initially try to parse a type-id.  
+  // Therefore, we initially try to parse a type-id - and isCXXTypeId might look
+  // up and annotate an identifier as an id-expression during disambiguation,
+  // so enter the appropriate context for a constant expression template
+  // argument before trying to disambiguate.
+
+  EnterExpressionEvaluationContext EnterConstantEvaluated(
+      Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated);
   if (isCXXTypeId(TypeIdAsTemplateArgument)) {
     SourceLocation Loc = Tok.getLocation();
     TypeResult TypeArg = ParseTypeName(/*Range=*/nullptr,
@@ -1216,7 +1219,7 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() {
   
   // Parse a non-type template argument. 
   SourceLocation Loc = Tok.getLocation();
-  ExprResult ExprArg = ParseConstantExpression(MaybeTypeCast);
+  ExprResult ExprArg = ParseConstantExpressionInExprEvalContext(MaybeTypeCast);
   if (ExprArg.isInvalid() || !ExprArg.get())
     return ParsedTemplateArgument();
 
@@ -1234,7 +1237,7 @@ bool Parser::IsTemplateArgumentList(unsigned Skip) {
   } Tentative(*this);
   
   while (Skip) {
-    ConsumeToken();
+    ConsumeAnyToken();
     --Skip;
   }
   
@@ -1248,7 +1251,7 @@ bool Parser::IsTemplateArgumentList(unsigned Skip) {
   
   // See whether we have declaration specifiers, which indicate a type.
   while (isCXXDeclarationSpecifier() == TPResult::True)
-    ConsumeToken();
+    ConsumeAnyToken();
   
   // If we have a '>' or a ',' then this is a template argument list.
   return Tok.isOneOf(tok::greater, tok::comma);
@@ -1262,9 +1265,7 @@ bool Parser::IsTemplateArgumentList(unsigned Skip) {
 ///         template-argument-list ',' template-argument
 bool
 Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs) {
-  // Template argument lists are constant-evaluation contexts.
-  EnterExpressionEvaluationContext EvalContext(
-      Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+  
   ColonProtectionRAIIObject ColonProtection(*this, false);
 
   do {
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/ParseTentative.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/ParseTentative.cpp
index 0ea3f8d951793..d6684c39aa734 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/ParseTentative.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/ParseTentative.cpp
@@ -208,17 +208,20 @@ Parser::TPResult Parser::TryConsumeDeclarationSpecifier() {
         TryAnnotateCXXScopeToken())
       return TPResult::Error;
     if (Tok.is(tok::annot_cxxscope))
+      ConsumeAnnotationToken();
+    if (Tok.is(tok::identifier))
       ConsumeToken();
-    if (Tok.isNot(tok::identifier) && Tok.isNot(tok::annot_template_id))
+    else if (Tok.is(tok::annot_template_id))
+      ConsumeAnnotationToken();
+    else
       return TPResult::Error;
-    ConsumeToken();
     break;
 
   case tok::annot_cxxscope:
-    ConsumeToken();
+    ConsumeAnnotationToken();
     // Fall through.
   default:
-    ConsumeToken();
+    ConsumeAnyToken();
 
     if (getLangOpts().ObjC1 && Tok.is(tok::less))
       return TryParseProtocolQualifiers();
@@ -478,10 +481,10 @@ Parser::isCXXConditionDeclarationOrInitStatement(bool CanBeInitStatement) {
   /// the corresponding ')'. If the context is
   /// TypeIdAsTemplateArgument, we've already parsed the '<' or ','
   /// before this template argument, and will cease lookahead when we
-  /// hit a '>', '>>' (in C++0x), or ','. Returns true for a type-id
-  /// and false for an expression.  If during the disambiguation
-  /// process a parsing error is encountered, the function returns
-  /// true to let the declaration parsing code handle it.
+  /// hit a '>', '>>' (in C++0x), or ','; or, in C++0x, an ellipsis immediately
+  /// preceding such. Returns true for a type-id and false for an expression.
+  /// If during the disambiguation process a parsing error is encountered,
+  /// the function returns true to let the declaration parsing code handle it.
   ///
   /// type-id:
   ///   type-specifier-seq abstract-declarator[opt]
@@ -530,10 +533,15 @@ bool Parser::isCXXTypeId(TentativeCXXTypeIdContext Context, bool &isAmbiguous) {
 
     // We are supposed to be inside a template argument, so if after
     // the abstract declarator we encounter a '>', '>>' (in C++0x), or
-    // ',', this is a type-id. Otherwise, it's an expression.
+    // ','; or, in C++0x, an ellipsis immediately preceding such, this
+    // is a type-id. Otherwise, it's an expression.
     } else if (Context == TypeIdAsTemplateArgument &&
                (Tok.isOneOf(tok::greater, tok::comma) ||
-                (getLangOpts().CPlusPlus11 && Tok.is(tok::greatergreater)))) {
+                (getLangOpts().CPlusPlus11 &&
+                 (Tok.is(tok::greatergreater) ||
+                  (Tok.is(tok::ellipsis) &&
+                   NextToken().isOneOf(tok::greater, tok::greatergreater,
+                                       tok::comma)))))) {
       TPR = TPResult::True;
       isAmbiguous = true;
 
@@ -706,7 +714,7 @@ Parser::TPResult Parser::TryParsePtrOperatorSeq() {
     if (Tok.isOneOf(tok::star, tok::amp, tok::caret, tok::ampamp) ||
         (Tok.is(tok::annot_cxxscope) && NextToken().is(tok::star))) {
       // ptr-operator
-      ConsumeToken();
+      ConsumeAnyToken();
       while (Tok.isOneOf(tok::kw_const, tok::kw_volatile, tok::kw_restrict,
                          tok::kw__Nonnull, tok::kw__Nullable,
                          tok::kw__Null_unspecified))
@@ -826,14 +834,14 @@ Parser::TPResult Parser::TryParseOperatorId() {
 ///         abstract-declarator:
 ///           ptr-operator abstract-declarator[opt]
 ///           direct-abstract-declarator
-///           ...
 ///
 ///         direct-abstract-declarator:
 ///           direct-abstract-declarator[opt]
-///           '(' parameter-declaration-clause ')' cv-qualifier-seq[opt]
+///                 '(' parameter-declaration-clause ')' cv-qualifier-seq[opt]
 ///                 exception-specification[opt]
 ///           direct-abstract-declarator[opt] '[' constant-expression[opt] ']'
 ///           '(' abstract-declarator ')'
+/// [C++0x]   ...
 ///
 ///         ptr-operator:
 ///           '*' cv-qualifier-seq[opt]
@@ -883,7 +891,7 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract,
       mayHaveIdentifier) {
     // declarator-id
     if (Tok.is(tok::annot_cxxscope))
-      ConsumeToken();
+      ConsumeAnnotationToken();
     else if (Tok.is(tok::identifier))
       TentativelyDeclaredIdentifiers.push_back(Tok.getIdentifierInfo());
     if (Tok.is(tok::kw_operator)) {
@@ -925,10 +933,6 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract,
   while (1) {
     TPResult TPR(TPResult::Ambiguous);
 
-    // abstract-declarator: ...
-    if (Tok.is(tok::ellipsis))
-      ConsumeToken();
-
     if (Tok.is(tok::l_paren)) {
       // Check whether we have a function declarator or a possible ctor-style
       // initializer that follows the declarator. Note that ctor-style
@@ -1399,7 +1403,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
                                                      SS);
         if (SS.getScopeRep() && SS.getScopeRep()->isDependent()) {
           RevertingTentativeParsingAction PA(*this);
-          ConsumeToken();
+          ConsumeAnnotationToken();
           ConsumeToken();
           bool isIdentifier = Tok.is(tok::identifier);
           TPResult TPR = TPResult::False;
@@ -1446,6 +1450,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
       return TPResult::False;
     }
     // If that succeeded, fallthrough into the generic simple-type-id case.
+    LLVM_FALLTHROUGH;
 
     // The ambiguity resides in a simple-type-specifier/typename-specifier
     // followed by a '('. The '(' could either be the start of:
@@ -1471,7 +1476,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
     if (getLangOpts().ObjC1 && NextToken().is(tok::less)) {
       // Tentatively parse the protocol qualifiers.
       RevertingTentativeParsingAction PA(*this);
-      ConsumeToken(); // The type token
+      ConsumeAnyToken(); // The type token
       
       TPResult TPR = TryParseProtocolQualifiers();
       bool isFollowedByParen = Tok.is(tok::l_paren);
@@ -1488,6 +1493,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
       
       return TPResult::True;
     }
+    LLVM_FALLTHROUGH;
       
   case tok::kw_char:
   case tok::kw_wchar_t:
diff --git a/interpreter/llvm/src/tools/clang/lib/Parse/Parser.cpp b/interpreter/llvm/src/tools/clang/lib/Parse/Parser.cpp
index 1c8e53ddf684d..e4c216ee1fb73 100644
--- a/interpreter/llvm/src/tools/clang/lib/Parse/Parser.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Parse/Parser.cpp
@@ -341,21 +341,13 @@ bool Parser::SkipUntil(ArrayRef Toks, SkipUntilFlags Flags) {
       ConsumeBrace();
       break;
 
-    case tok::string_literal:
-    case tok::wide_string_literal:
-    case tok::utf8_string_literal:
-    case tok::utf16_string_literal:
-    case tok::utf32_string_literal:
-      ConsumeStringToken();
-      break;
-        
     case tok::semi:
       if (HasFlagsSet(Flags, StopAtSemi))
         return false;
       // FALL THROUGH.
     default:
       // Skip this token.
-      ConsumeToken();
+      ConsumeAnyToken();
       break;
     }
     isFirstTokenSkipped = false;
@@ -542,6 +534,8 @@ void Parser::LateTemplateParserCleanupCallback(void *P) {
 }
 
 bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result) {
+  Actions.ActOnStartOfTranslationUnit();
+
   // C11 6.9p1 says translation units must have at least one top-level
   // declaration. C++ doesn't have this restriction. We also don't want to
   // complain if we have a precompiled header, although technically if the PCH
@@ -586,19 +580,19 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result) {
     Actions.ActOnModuleInclude(Tok.getLocation(),
                                reinterpret_cast(
                                    Tok.getAnnotationValue()));
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return false;
 
   case tok::annot_module_begin:
     Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast(
                                                     Tok.getAnnotationValue()));
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return false;
 
   case tok::annot_module_end:
     Actions.ActOnModuleEnd(Tok.getLocation(), reinterpret_cast(
                                                   Tok.getAnnotationValue()));
-    ConsumeToken();
+    ConsumeAnnotationToken();
     return false;
 
   case tok::annot_pragma_attribute:
@@ -778,6 +772,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
     }
     // This must be 'export template'. Parse it so we can diagnose our lack
     // of support.
+    LLVM_FALLTHROUGH;
   case tok::kw_using:
   case tok::kw_namespace:
   case tok::kw_typedef:
@@ -1890,6 +1885,7 @@ bool Parser::isTokenEqualOrEqualTypo() {
     Diag(Tok, diag::err_invalid_token_after_declarator_suggest_equal)
         << Kind
         << FixItHint::CreateReplacement(SourceRange(Tok.getLocation()), "=");
+    LLVM_FALLTHROUGH;
   case tok::equal:
     return true;
   }
@@ -2176,7 +2172,7 @@ bool Parser::parseMisplacedModuleImport() {
         Actions.ActOnModuleEnd(Tok.getLocation(),
                                reinterpret_cast(
                                    Tok.getAnnotationValue()));
-        ConsumeToken();
+        ConsumeAnnotationToken();
         continue;
       }
       // Inform caller that recovery failed, the error must be handled at upper
@@ -2188,7 +2184,7 @@ bool Parser::parseMisplacedModuleImport() {
       Actions.ActOnModuleBegin(Tok.getLocation(),
                                reinterpret_cast(
                                    Tok.getAnnotationValue()));
-      ConsumeToken();
+      ConsumeAnnotationToken();
       ++MisplacedModuleBeginCount;
       continue;
     case tok::annot_module_include:
@@ -2197,7 +2193,7 @@ bool Parser::parseMisplacedModuleImport() {
       Actions.ActOnModuleInclude(Tok.getLocation(),
                                  reinterpret_cast(
                                      Tok.getAnnotationValue()));
-      ConsumeToken();
+      ConsumeAnnotationToken();
       // If there is another module import, process it.
       continue;
     default:
diff --git a/interpreter/llvm/src/tools/clang/lib/Rewrite/HTMLRewrite.cpp b/interpreter/llvm/src/tools/clang/lib/Rewrite/HTMLRewrite.cpp
index 27bb976a6e1a9..9e307f31be11c 100644
--- a/interpreter/llvm/src/tools/clang/lib/Rewrite/HTMLRewrite.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Rewrite/HTMLRewrite.cpp
@@ -409,6 +409,7 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
       ++TokOffs;
       --TokLen;
       // FALL THROUGH to chop the 8
+      LLVM_FALLTHROUGH;
     case tok::wide_string_literal:
     case tok::utf16_string_literal:
     case tok::utf32_string_literal:
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 50ad113fc8802..f83baa790b497 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -278,6 +278,159 @@ static void checkRecursiveFunction(Sema &S, const FunctionDecl *FD,
     S.Diag(Body->getLocStart(), diag::warn_infinite_recursive_function);
 }
 
+//===----------------------------------------------------------------------===//
+// Check for throw in a non-throwing function.
+//===----------------------------------------------------------------------===//
+enum ThrowState {
+  FoundNoPathForThrow,
+  FoundPathForThrow,
+  FoundPathWithNoThrowOutFunction,
+};
+
+static bool isThrowCaught(const CXXThrowExpr *Throw,
+                          const CXXCatchStmt *Catch) {
+  const Type *ThrowType = nullptr;
+  if (Throw->getSubExpr())
+    ThrowType = Throw->getSubExpr()->getType().getTypePtrOrNull();
+  if (!ThrowType)
+    return false;
+  const Type *CaughtType = Catch->getCaughtType().getTypePtrOrNull();
+  if (!CaughtType)
+    return true;
+  if (ThrowType->isReferenceType())
+    ThrowType = ThrowType->castAs()
+                    ->getPointeeType()
+                    ->getUnqualifiedDesugaredType();
+  if (CaughtType->isReferenceType())
+    CaughtType = CaughtType->castAs()
+                     ->getPointeeType()
+                     ->getUnqualifiedDesugaredType();
+  if (ThrowType->isPointerType() && CaughtType->isPointerType()) {
+    ThrowType = ThrowType->getPointeeType()->getUnqualifiedDesugaredType();
+    CaughtType = CaughtType->getPointeeType()->getUnqualifiedDesugaredType();
+  }
+  if (CaughtType == ThrowType)
+    return true;
+  const CXXRecordDecl *CaughtAsRecordType =
+      CaughtType->getAsCXXRecordDecl();
+  const CXXRecordDecl *ThrowTypeAsRecordType = ThrowType->getAsCXXRecordDecl();
+  if (CaughtAsRecordType && ThrowTypeAsRecordType)
+    return ThrowTypeAsRecordType->isDerivedFrom(CaughtAsRecordType);
+  return false;
+}
+
+static bool isThrowCaughtByHandlers(const CXXThrowExpr *CE,
+                                    const CXXTryStmt *TryStmt) {
+  for (unsigned H = 0, E = TryStmt->getNumHandlers(); H < E; ++H) {
+    if (isThrowCaught(CE, TryStmt->getHandler(H)))
+      return true;
+  }
+  return false;
+}
+
+static bool doesThrowEscapePath(CFGBlock Block, SourceLocation &OpLoc) {
+  for (const auto &B : Block) {
+    if (B.getKind() != CFGElement::Statement)
+      continue;
+    const auto *CE = dyn_cast(B.getAs()->getStmt());
+    if (!CE)
+      continue;
+
+    OpLoc = CE->getThrowLoc();
+    for (const auto &I : Block.succs()) {
+      if (!I.isReachable())
+        continue;
+      if (const auto *Terminator =
+              dyn_cast_or_null(I->getTerminator()))
+        if (isThrowCaughtByHandlers(CE, Terminator))
+          return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+static bool hasThrowOutNonThrowingFunc(SourceLocation &OpLoc, CFG *BodyCFG) {
+
+  unsigned ExitID = BodyCFG->getExit().getBlockID();
+
+  SmallVector States(BodyCFG->getNumBlockIDs(),
+                                     FoundNoPathForThrow);
+  States[BodyCFG->getEntry().getBlockID()] = FoundPathWithNoThrowOutFunction;
+
+  SmallVector Stack;
+  Stack.push_back(&BodyCFG->getEntry());
+  while (!Stack.empty()) {
+    CFGBlock *CurBlock = Stack.back();
+    Stack.pop_back();
+
+    unsigned ID = CurBlock->getBlockID();
+    ThrowState CurState = States[ID];
+    if (CurState == FoundPathWithNoThrowOutFunction) {
+      if (ExitID == ID)
+        continue;
+
+      if (doesThrowEscapePath(*CurBlock, OpLoc))
+        CurState = FoundPathForThrow;
+    }
+
+    // Loop over successor blocks and add them to the Stack if their state
+    // changes.
+    for (const auto &I : CurBlock->succs())
+      if (I.isReachable()) {
+        unsigned NextID = I->getBlockID();
+        if (NextID == ExitID && CurState == FoundPathForThrow) {
+          States[NextID] = CurState;
+        } else if (States[NextID] < CurState) {
+          States[NextID] = CurState;
+          Stack.push_back(I);
+        }
+      }
+  }
+  // Return true if the exit node is reachable, and only reachable through
+  // a throw expression.
+  return States[ExitID] == FoundPathForThrow;
+}
+
+static void EmitDiagForCXXThrowInNonThrowingFunc(Sema &S, SourceLocation OpLoc,
+                                                 const FunctionDecl *FD) {
+  if (!S.getSourceManager().isInSystemHeader(OpLoc) &&
+      FD->getTypeSourceInfo()) {
+    S.Diag(OpLoc, diag::warn_throw_in_noexcept_func) << FD;
+    if (S.getLangOpts().CPlusPlus11 &&
+        (isa(FD) ||
+         FD->getDeclName().getCXXOverloadedOperator() == OO_Delete ||
+         FD->getDeclName().getCXXOverloadedOperator() == OO_Array_Delete)) {
+      if (const auto *Ty = FD->getTypeSourceInfo()->getType()->
+                                         getAs())
+        S.Diag(FD->getLocation(), diag::note_throw_in_dtor)
+            << !isa(FD) << !Ty->hasExceptionSpec()
+            << FD->getExceptionSpecSourceRange();
+    } else 
+      S.Diag(FD->getLocation(), diag::note_throw_in_function)
+          << FD->getExceptionSpecSourceRange();
+  }
+}
+
+static void checkThrowInNonThrowingFunc(Sema &S, const FunctionDecl *FD,
+                                        AnalysisDeclContext &AC) {
+  CFG *BodyCFG = AC.getCFG();
+  if (!BodyCFG)
+    return;
+  if (BodyCFG->getExit().pred_empty())
+    return;
+  SourceLocation OpLoc;
+  if (hasThrowOutNonThrowingFunc(OpLoc, BodyCFG))
+    EmitDiagForCXXThrowInNonThrowingFunc(S, OpLoc, FD);
+}
+
+static bool isNoexcept(const FunctionDecl *FD) {
+  const auto *FPT = FD->getType()->castAs();
+  if (FPT->isNothrow(FD->getASTContext()))
+    return true;
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Check for missing return value.
 //===----------------------------------------------------------------------===//
@@ -542,6 +695,7 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body,
 
   bool ReturnsVoid = false;
   bool HasNoReturn = false;
+  bool IsCoroutine = S.getCurFunction() && S.getCurFunction()->isCoroutine();
 
   if (const auto *FD = dyn_cast(D)) {
     if (const auto *CBody = dyn_cast(Body))
@@ -570,8 +724,13 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body,
   // Short circuit for compilation speed.
   if (CD.checkDiagnostics(Diags, ReturnsVoid, HasNoReturn))
       return;
-
   SourceLocation LBrace = Body->getLocStart(), RBrace = Body->getLocEnd();
+  auto EmitDiag = [&](SourceLocation Loc, unsigned DiagID) {
+    if (IsCoroutine)
+      S.Diag(Loc, DiagID) << S.getCurFunction()->CoroutinePromise->getType();
+    else
+      S.Diag(Loc, DiagID);
+  };
   // Either in a function body compound statement, or a function-try-block.
   switch (CheckFallThrough(AC)) {
     case UnknownFallThrough:
@@ -579,15 +738,15 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body,
 
     case MaybeFallThrough:
       if (HasNoReturn)
-        S.Diag(RBrace, CD.diag_MaybeFallThrough_HasNoReturn);
+        EmitDiag(RBrace, CD.diag_MaybeFallThrough_HasNoReturn);
       else if (!ReturnsVoid)
-        S.Diag(RBrace, CD.diag_MaybeFallThrough_ReturnsNonVoid);
+        EmitDiag(RBrace, CD.diag_MaybeFallThrough_ReturnsNonVoid);
       break;
     case AlwaysFallThrough:
       if (HasNoReturn)
-        S.Diag(RBrace, CD.diag_AlwaysFallThrough_HasNoReturn);
+        EmitDiag(RBrace, CD.diag_AlwaysFallThrough_HasNoReturn);
       else if (!ReturnsVoid)
-        S.Diag(RBrace, CD.diag_AlwaysFallThrough_ReturnsNonVoid);
+        EmitDiag(RBrace, CD.diag_AlwaysFallThrough_ReturnsNonVoid);
       break;
     case NeverFallThroughOrReturn:
       if (ReturnsVoid && !HasNoReturn && CD.diag_NeverFallThroughOrReturn) {
@@ -2027,12 +2186,6 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
   
   // Warning: check missing 'return'
   if (P.enableCheckFallThrough) {
-    auto IsCoro = [&]() {
-      if (auto *FD = dyn_cast(D))
-        if (FD->getBody() && isa(FD->getBody()))
-          return true;
-      return false;
-    };
     const CheckFallThroughDiagnostics &CD =
         (isa(D)
              ? CheckFallThroughDiagnostics::MakeForBlock()
@@ -2040,7 +2193,7 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
                 cast(D)->getOverloadedOperator() == OO_Call &&
                 cast(D)->getParent()->isLambda())
                    ? CheckFallThroughDiagnostics::MakeForLambda()
-                   : (IsCoro()
+                   : (fscope->isCoroutine()
                           ? CheckFallThroughDiagnostics::MakeForCoroutine(D)
                           : CheckFallThroughDiagnostics::MakeForFunction(D)));
     CheckFallThroughForBody(S, D, Body, blkExpr, CD, AC);
@@ -2127,6 +2280,12 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
     }
   }
 
+  // Check for throw out of non-throwing function.
+  if (!Diags.isIgnored(diag::warn_throw_in_noexcept_func, D->getLocStart()))
+    if (const FunctionDecl *FD = dyn_cast(D))
+      if (S.getLangOpts().CPlusPlus && isNoexcept(FD))
+        checkThrowInNonThrowingFunc(S, FD, AC);
+
   // If none of the previous checks caused a CFG build, trigger one here
   // for -Wtautological-overlap-compare
   if (!Diags.isIgnored(diag::warn_tautological_overlap_comparison,
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/CoroutineStmtBuilder.h b/interpreter/llvm/src/tools/clang/lib/Sema/CoroutineStmtBuilder.h
index 4958576219e5f..33a368d92ff46 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/CoroutineStmtBuilder.h
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/CoroutineStmtBuilder.h
@@ -28,7 +28,6 @@ class CoroutineStmtBuilder : public CoroutineBodyStmt::CtorArgs {
   sema::FunctionScopeInfo &Fn;
   bool IsValid = true;
   SourceLocation Loc;
-  QualType RetType;
   SmallVector ParamMovesVector;
   const bool IsPromiseDependentType;
   CXXRecordDecl *PromiseRecordDecl = nullptr;
@@ -52,6 +51,9 @@ class CoroutineStmtBuilder : public CoroutineBodyStmt::CtorArgs {
   /// name lookup.
   bool buildDependentStatements();
 
+  /// \brief Build just parameter moves. To use for rebuilding in TreeTransform.
+  bool buildParameterMoves();
+
   bool isInvalid() const { return !this->IsValid; }
 
 private:
@@ -61,6 +63,7 @@ class CoroutineStmtBuilder : public CoroutineBodyStmt::CtorArgs {
   bool makeOnFallthrough();
   bool makeOnException();
   bool makeReturnObject();
+  bool makeGroDeclAndReturnStmt();
   bool makeReturnOnAllocFailure();
   bool makeParamMoves();
 };
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/DeclSpec.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/DeclSpec.cpp
index a55cdcccee5df..e4e84fcec954b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/DeclSpec.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/DeclSpec.cpp
@@ -1082,8 +1082,10 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
                !S.getLangOpts().ZVector)
         S.Diag(TSTLoc, diag::err_invalid_vector_double_decl_spec);
     } else if (TypeSpecType == TST_float) {
-      // vector float is unsupported for ZVector.
-      if (S.getLangOpts().ZVector)
+      // vector float is unsupported for ZVector unless we have the
+      // vector-enhancements facility 1 (ISA revision 12).
+      if (S.getLangOpts().ZVector &&
+          !S.Context.getTargetInfo().hasFeature("arch12"))
         S.Diag(TSTLoc, diag::err_invalid_vector_float_decl_spec);
     } else if (TypeSpecWidth == TSW_long) {
       // vector long is unsupported for ZVector and deprecated for AltiVec.
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/DelayedDiagnostic.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/DelayedDiagnostic.cpp
index 2fa5718d4e9b5..3d321d561e60b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/DelayedDiagnostic.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/DelayedDiagnostic.cpp
@@ -22,7 +22,8 @@ using namespace sema;
 DelayedDiagnostic
 DelayedDiagnostic::makeAvailability(AvailabilityResult AR,
                                     SourceLocation Loc,
-                                    const NamedDecl *D,
+                                    const NamedDecl *ReferringDecl,
+                                    const NamedDecl *OffendingDecl,
                                     const ObjCInterfaceDecl *UnknownObjCClass,
                                     const ObjCPropertyDecl  *ObjCProperty,
                                     StringRef Msg,
@@ -31,7 +32,8 @@ DelayedDiagnostic::makeAvailability(AvailabilityResult AR,
   DD.Kind = Availability;
   DD.Triggered = false;
   DD.Loc = Loc;
-  DD.AvailabilityData.Decl = D;
+  DD.AvailabilityData.ReferringDecl = ReferringDecl;
+  DD.AvailabilityData.OffendingDecl = OffendingDecl;
   DD.AvailabilityData.UnknownObjCClass = UnknownObjCClass;
   DD.AvailabilityData.ObjCProperty = ObjCProperty;
   char *MessageData = nullptr;
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/Sema.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/Sema.cpp
index add15ed695c53..429f44cc7246b 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/Sema.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/Sema.cpp
@@ -539,6 +539,9 @@ void Sema::getUndefinedButUsed(
     // __attribute__((weakref)) is basically a definition.
     if (ND->hasAttr()) continue;
 
+    if (isa(ND))
+      continue;
+
     if (FunctionDecl *FD = dyn_cast(ND)) {
       if (FD->isDefined())
         continue;
@@ -702,6 +705,18 @@ void Sema::emitAndClearUnusedLocalTypedefWarnings() {
   UnusedLocalTypedefNameCandidates.clear();
 }
 
+/// This is called before the very first declaration in the translation unit
+/// is parsed. Note that the ASTContext may have already injected some
+/// declarations.
+void Sema::ActOnStartOfTranslationUnit() {
+  if (getLangOpts().ModulesTS) {
+    // We start in the global module; all those declarations are implicitly
+    // module-private (though they do not have module linkage).
+    Context.getTranslationUnitDecl()->setModuleOwnershipKind(
+        Decl::ModuleOwnershipKind::ModulePrivate);
+  }
+}
+
 /// ActOnEndOfTranslationUnit - This is called at the very end of the
 /// translation unit when EOF is reached and all but the top-level scope is
 /// popped.
@@ -737,6 +752,9 @@ void Sema::ActOnEndOfTranslationUnit() {
       // Load pending instantiations from the external source.
       SmallVector Pending;
       ExternalSource->ReadPendingInstantiations(Pending);
+      for (auto PII : Pending)
+        if (auto Func = dyn_cast(PII.first))
+          Func->setInstantiationIsPending(true);
       PendingInstantiations.insert(PendingInstantiations.begin(),
                                    Pending.begin(), Pending.end());
     }
@@ -1684,7 +1702,8 @@ bool Sema::checkOpenCLDisabledTypeDeclSpec(const DeclSpec &DS, QualType QT) {
                                        QT, OpenCLTypeExtMap);
 }
 
-bool Sema::checkOpenCLDisabledDecl(const Decl &D, const Expr &E) {
-  return checkOpenCLDisabledTypeOrDecl(&D, E.getLocStart(), "",
+bool Sema::checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E) {
+  IdentifierInfo *FnName = D.getIdentifier();
+  return checkOpenCLDisabledTypeOrDecl(&D, E.getLocStart(), FnName,
                                        OpenCLDeclExtMap, 1, D.getSourceRange());
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaAttr.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaAttr.cpp
index 76ca65373dda8..8c13ead644574 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaAttr.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaAttr.cpp
@@ -126,6 +126,36 @@ void Sema::ActOnPragmaOptionsAlign(PragmaOptionsAlignKind Kind,
   PackStack.Act(PragmaLoc, Action, StringRef(), Alignment);
 }
 
+void Sema::ActOnPragmaClangSection(SourceLocation PragmaLoc, PragmaClangSectionAction Action,
+                                   PragmaClangSectionKind SecKind, StringRef SecName) {
+  PragmaClangSection *CSec;
+  switch (SecKind) {
+    case PragmaClangSectionKind::PCSK_BSS:
+      CSec = &PragmaClangBSSSection;
+      break;
+    case PragmaClangSectionKind::PCSK_Data:
+      CSec = &PragmaClangDataSection;
+      break;
+    case PragmaClangSectionKind::PCSK_Rodata:
+      CSec = &PragmaClangRodataSection;
+      break;
+    case PragmaClangSectionKind::PCSK_Text:
+      CSec = &PragmaClangTextSection;
+      break;
+    default:
+      llvm_unreachable("invalid clang section kind");
+  }
+
+  if (Action == PragmaClangSectionAction::PCSA_Clear) {
+    CSec->Valid = false;
+    return;
+  }
+
+  CSec->Valid = true;
+  CSec->SectionName = SecName;
+  CSec->PragmaLocation = PragmaLoc;
+}
+
 void Sema::ActOnPragmaPack(SourceLocation PragmaLoc, PragmaMsStackAction Action,
                            StringRef SlotLabel, Expr *alignment) {
   Expr *Alignment = static_cast(alignment);
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCUDA.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCUDA.cpp
index b938ac387c4da..cac5f682275ed 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCUDA.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCUDA.cpp
@@ -629,12 +629,6 @@ static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
   // emitted, because (say) the definition could include "inline".
   FunctionDecl *Def = FD->getDefinition();
 
-  // We may currently be parsing the body of FD, in which case
-  // FD->getDefinition() will be null, but we still want to treat FD as though
-  // it's a definition.
-  if (!Def && FD->willHaveBody())
-    Def = FD;
-
   if (Def &&
       !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def)))
     return true;
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCast.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCast.cpp
index 7d534263f4681..d603101c3fd9c 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCast.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCast.cpp
@@ -143,6 +143,9 @@ namespace {
   };
 }
 
+static void DiagnoseCastQual(Sema &Self, const ExprResult &SrcExpr,
+                             QualType DestType);
+
 // The Try functions attempt a specific way of casting. If they succeed, they
 // return TC_Success. If their way of casting is not appropriate for the given
 // arguments, they return TC_NotApplicable and *may* set diag to a diagnostic
@@ -427,6 +430,10 @@ static void diagnoseBadCast(Sema &S, unsigned msg, CastType castType,
 /// the same kind of pointer (plain or to-member). Unlike the Sema function,
 /// this one doesn't care if the two pointers-to-member don't point into the
 /// same class. This is because CastsAwayConstness doesn't care.
+/// And additionally, it handles C++ references. If both the types are
+/// references, then their pointee types are returned,
+/// else if only one of them is reference, it's pointee type is returned,
+/// and the other type is returned as-is.
 static bool UnwrapDissimilarPointerTypes(QualType& T1, QualType& T2) {
   const PointerType *T1PtrType = T1->getAs(),
                     *T2PtrType = T2->getAs();
@@ -475,6 +482,26 @@ static bool UnwrapDissimilarPointerTypes(QualType& T1, QualType& T2) {
     return true;
   }
   
+  const LValueReferenceType *T1RefType = T1->getAs(),
+                            *T2RefType = T2->getAs();
+  if (T1RefType && T2RefType) {
+    T1 = T1RefType->getPointeeType();
+    T2 = T2RefType->getPointeeType();
+    return true;
+  }
+
+  if (T1RefType) {
+    T1 = T1RefType->getPointeeType();
+    // T2 = T2;
+    return true;
+  }
+
+  if (T2RefType) {
+    // T1 = T1;
+    T2 = T2RefType->getPointeeType();
+    return true;
+  }
+
   return false;
 }
 
@@ -503,11 +530,13 @@ CastsAwayConstness(Sema &Self, QualType SrcType, QualType DestType,
   // the rules are non-trivial. So first we construct Tcv *...cv* as described
   // in C++ 5.2.11p8.
   assert((SrcType->isAnyPointerType() || SrcType->isMemberPointerType() ||
-          SrcType->isBlockPointerType()) &&
+          SrcType->isBlockPointerType() ||
+          DestType->isLValueReferenceType()) &&
          "Source type is not pointer or pointer to member.");
   assert((DestType->isAnyPointerType() || DestType->isMemberPointerType() ||
-          DestType->isBlockPointerType()) &&
-         "Destination type is not pointer or pointer to member.");
+          DestType->isBlockPointerType() ||
+          DestType->isLValueReferenceType()) &&
+         "Destination type is not pointer or pointer to member, or reference.");
 
   QualType UnwrappedSrcType = Self.Context.getCanonicalType(SrcType), 
            UnwrappedDestType = Self.Context.getCanonicalType(DestType);
@@ -523,7 +552,14 @@ CastsAwayConstness(Sema &Self, QualType SrcType, QualType DestType,
     Qualifiers SrcQuals, DestQuals;
     Self.Context.getUnqualifiedArrayType(UnwrappedSrcType, SrcQuals);
     Self.Context.getUnqualifiedArrayType(UnwrappedDestType, DestQuals);
-    
+
+    // We do not meaningfully track object const-ness of Objective-C object
+    // types. Remove const from the source type if either the source or
+    // the destination is an Objective-C object type.
+    if (UnwrappedSrcType->isObjCObjectType() ||
+        UnwrappedDestType->isObjCObjectType())
+      SrcQuals.removeConst();
+
     Qualifiers RetainedSrcQuals, RetainedDestQuals;
     if (CheckCVR) {
       RetainedSrcQuals.setCVRQualifiers(SrcQuals.getCVRQualifiers());
@@ -2177,6 +2213,8 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
 
 void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
                                        bool ListInitialization) {
+  assert(Self.getLangOpts().CPlusPlus);
+
   // Handle placeholders.
   if (isPlaceholder()) {
     // C-style casts can resolve __unknown_any types.
@@ -2580,30 +2618,42 @@ void CastOperation::CheckCStyleCast() {
 
   if (Kind == CK_BitCast)
     checkCastAlign();
+}
+
+/// DiagnoseCastQual - Warn whenever casts discards a qualifiers, be it either
+/// const, volatile or both.
+static void DiagnoseCastQual(Sema &Self, const ExprResult &SrcExpr,
+                             QualType DestType) {
+  if (SrcExpr.isInvalid())
+    return;
+
+  QualType SrcType = SrcExpr.get()->getType();
+  if (!((SrcType->isAnyPointerType() && DestType->isAnyPointerType()) ||
+        DestType->isLValueReferenceType()))
+    return;
 
-  // -Wcast-qual
   QualType TheOffendingSrcType, TheOffendingDestType;
   Qualifiers CastAwayQualifiers;
-  if (SrcType->isAnyPointerType() && DestType->isAnyPointerType() &&
-      CastsAwayConstness(Self, SrcType, DestType, true, false,
-                         &TheOffendingSrcType, &TheOffendingDestType,
-                         &CastAwayQualifiers)) {
-    int qualifiers = -1;
-    if (CastAwayQualifiers.hasConst() && CastAwayQualifiers.hasVolatile()) {
-      qualifiers = 0;
-    } else if (CastAwayQualifiers.hasConst()) {
-      qualifiers = 1;
-    } else if (CastAwayQualifiers.hasVolatile()) {
-      qualifiers = 2;
-    }
-    // This is a variant of int **x; const int **y = (const int **)x;
-    if (qualifiers == -1)
-      Self.Diag(SrcExpr.get()->getLocStart(), diag::warn_cast_qual2) <<
-        SrcType << DestType;
-    else
-      Self.Diag(SrcExpr.get()->getLocStart(), diag::warn_cast_qual) <<
-        TheOffendingSrcType << TheOffendingDestType << qualifiers;
-  }
+  if (!CastsAwayConstness(Self, SrcType, DestType, true, false,
+                          &TheOffendingSrcType, &TheOffendingDestType,
+                          &CastAwayQualifiers))
+    return;
+
+  int qualifiers = -1;
+  if (CastAwayQualifiers.hasConst() && CastAwayQualifiers.hasVolatile()) {
+    qualifiers = 0;
+  } else if (CastAwayQualifiers.hasConst()) {
+    qualifiers = 1;
+  } else if (CastAwayQualifiers.hasVolatile()) {
+    qualifiers = 2;
+  }
+  // This is a variant of int **x; const int **y = (const int **)x;
+  if (qualifiers == -1)
+    Self.Diag(SrcExpr.get()->getLocStart(), diag::warn_cast_qual2)
+        << SrcType << DestType;
+  else
+    Self.Diag(SrcExpr.get()->getLocStart(), diag::warn_cast_qual)
+        << TheOffendingSrcType << TheOffendingDestType << qualifiers;
 }
 
 ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc,
@@ -2624,6 +2674,9 @@ ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc,
   if (Op.SrcExpr.isInvalid())
     return ExprError();
 
+  // -Wcast-qual
+  DiagnoseCastQual(Op.Self, Op.SrcExpr, Op.DestType);
+
   return Op.complete(CStyleCastExpr::Create(Context, Op.ResultType,
                               Op.ValueKind, Op.Kind, Op.SrcExpr.get(),
                               &Op.BasePath, CastTypeInfo, LPLoc, RPLoc));
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaChecking.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaChecking.cpp
index 14dd6267b8542..b2223b7550614 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaChecking.cpp
@@ -309,7 +309,8 @@ static bool SemaOpenCLBuiltinKernelWorkGroupSize(Sema &S, CallExpr *TheCall) {
   Expr *BlockArg = TheCall->getArg(0);
   if (!isBlockPointer(BlockArg)) {
     S.Diag(BlockArg->getLocStart(),
-           diag::err_opencl_enqueue_kernel_expected_type) << "block";
+           diag::err_opencl_builtin_expected_type)
+        << TheCall->getDirectCallee() << "block";
     return true;
   }
   return checkOpenCLBlockArgs(S, BlockArg);
@@ -394,24 +395,24 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) {
   // First argument always needs to be a queue_t type.
   if (!Arg0->getType()->isQueueT()) {
     S.Diag(TheCall->getArg(0)->getLocStart(),
-           diag::err_opencl_enqueue_kernel_expected_type)
-        << S.Context.OCLQueueTy;
+           diag::err_opencl_builtin_expected_type)
+        << TheCall->getDirectCallee() << S.Context.OCLQueueTy;
     return true;
   }
 
   // Second argument always needs to be a kernel_enqueue_flags_t enum value.
   if (!Arg1->getType()->isIntegerType()) {
     S.Diag(TheCall->getArg(1)->getLocStart(),
-           diag::err_opencl_enqueue_kernel_expected_type)
-        << "'kernel_enqueue_flags_t' (i.e. uint)";
+           diag::err_opencl_builtin_expected_type)
+        << TheCall->getDirectCallee() << "'kernel_enqueue_flags_t' (i.e. uint)";
     return true;
   }
 
   // Third argument is always an ndrange_t type.
   if (Arg2->getType().getUnqualifiedType().getAsString() != "ndrange_t") {
     S.Diag(TheCall->getArg(2)->getLocStart(),
-           diag::err_opencl_enqueue_kernel_expected_type)
-        << "'ndrange_t'";
+           diag::err_opencl_builtin_expected_type)
+        << TheCall->getDirectCallee() << "'ndrange_t'";
     return true;
   }
 
@@ -420,8 +421,8 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) {
   if (NumArgs == 4) {
     // check that the last argument is the right block type.
     if (!isBlockPointer(Arg3)) {
-      S.Diag(Arg3->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type)
-          << "block";
+      S.Diag(Arg3->getLocStart(), diag::err_opencl_builtin_expected_type)
+          << TheCall->getDirectCallee() << "block";
       return true;
     }
     // we have a block type, check the prototype
@@ -443,8 +444,8 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) {
     // check common block argument.
     Expr *Arg6 = TheCall->getArg(6);
     if (!isBlockPointer(Arg6)) {
-      S.Diag(Arg6->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type)
-          << "block";
+      S.Diag(Arg6->getLocStart(), diag::err_opencl_builtin_expected_type)
+          << TheCall->getDirectCallee() << "block";
       return true;
     }
     if (checkOpenCLBlockArgs(S, Arg6))
@@ -453,8 +454,8 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) {
     // Forth argument has to be any integer type.
     if (!Arg3->getType()->isIntegerType()) {
       S.Diag(TheCall->getArg(3)->getLocStart(),
-             diag::err_opencl_enqueue_kernel_expected_type)
-          << "integer";
+             diag::err_opencl_builtin_expected_type)
+          << TheCall->getDirectCallee() << "integer";
       return true;
     }
     // check remaining common arguments.
@@ -466,7 +467,8 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) {
                                      Expr::NPC_ValueDependentIsNotNull) &&
         !Arg4->getType()->getPointeeOrArrayElementType()->isClkEventT()) {
       S.Diag(TheCall->getArg(4)->getLocStart(),
-             diag::err_opencl_enqueue_kernel_expected_type)
+             diag::err_opencl_builtin_expected_type)
+          << TheCall->getDirectCallee()
           << S.Context.getPointerType(S.Context.OCLClkEventTy);
       return true;
     }
@@ -477,7 +479,8 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) {
         !(Arg5->getType()->isPointerType() &&
           Arg5->getType()->getPointeeType()->isClkEventT())) {
       S.Diag(TheCall->getArg(5)->getLocStart(),
-             diag::err_opencl_enqueue_kernel_expected_type)
+             diag::err_opencl_builtin_expected_type)
+          << TheCall->getDirectCallee()
           << S.Context.getPointerType(S.Context.OCLClkEventTy);
       return true;
     }
@@ -757,6 +760,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     if (CheckObjCString(TheCall->getArg(0)))
       return ExprError();
     break;
+  case Builtin::BI__builtin_ms_va_start:
   case Builtin::BI__builtin_stdarg_start:
   case Builtin::BI__builtin_va_start:
     if (SemaBuiltinVAStart(BuiltinID, TheCall))
@@ -1696,6 +1700,9 @@ bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case PPC::BI__builtin_tabortdci:
     return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) ||
            SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+  case PPC::BI__builtin_vsx_xxpermdi:
+  case PPC::BI__builtin_vsx_xxsldwi:
+    return SemaBuiltinVSX(TheCall);
   }
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
@@ -1733,9 +1740,11 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
   case SystemZ::BI__builtin_s390_vfaezbs:
   case SystemZ::BI__builtin_s390_vfaezhs:
   case SystemZ::BI__builtin_s390_vfaezfs: i = 2; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_vfisb:
   case SystemZ::BI__builtin_s390_vfidb:
     return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15) ||
            SemaBuiltinConstantArgRange(TheCall, 2, 0, 15);
+  case SystemZ::BI__builtin_s390_vftcisb:
   case SystemZ::BI__builtin_s390_vftcidb: i = 1; l = 0; u = 4095; break;
   case SystemZ::BI__builtin_s390_vlbb: i = 1; l = 0; u = 15; break;
   case SystemZ::BI__builtin_s390_vpdi: i = 2; l = 0; u = 15; break;
@@ -1752,6 +1761,11 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
   case SystemZ::BI__builtin_s390_vstrczbs:
   case SystemZ::BI__builtin_s390_vstrczhs:
   case SystemZ::BI__builtin_s390_vstrczfs: i = 3; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_vmslg: i = 3; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_vfminsb:
+  case SystemZ::BI__builtin_s390_vfmaxsb:
+  case SystemZ::BI__builtin_s390_vfmindb:
+  case SystemZ::BI__builtin_s390_vfmaxdb: i = 2; l = 0; u = 15; break;
   }
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
@@ -2089,9 +2103,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   if (BuiltinID == X86::BI__builtin_cpu_supports)
     return SemaBuiltinCpuSupports(*this, TheCall);
 
-  if (BuiltinID == X86::BI__builtin_ms_va_start)
-    return SemaBuiltinVAStart(BuiltinID, TheCall);
-
   // If the intrinsic has rounding or SAE make sure its valid.
   if (CheckX86BuiltinRoundingOrSAE(BuiltinID, TheCall))
     return true;
@@ -3616,24 +3627,25 @@ ExprResult Sema::CheckOSLogFormatStringArg(Expr *Arg) {
 static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) {
   const llvm::Triple &TT = S.Context.getTargetInfo().getTriple();
   bool IsX64 = TT.getArch() == llvm::Triple::x86_64;
+  bool IsAArch64 = TT.getArch() == llvm::Triple::aarch64;
   bool IsWindows = TT.isOSWindows();
-  bool IsMSVAStart = BuiltinID == X86::BI__builtin_ms_va_start;
-  if (IsX64) {
+  bool IsMSVAStart = BuiltinID == Builtin::BI__builtin_ms_va_start;
+  if (IsX64 || IsAArch64) {
     clang::CallingConv CC = CC_C;
     if (const FunctionDecl *FD = S.getCurFunctionDecl())
       CC = FD->getType()->getAs()->getCallConv();
     if (IsMSVAStart) {
       // Don't allow this in System V ABI functions.
-      if (CC == CC_X86_64SysV || (!IsWindows && CC != CC_X86_64Win64))
+      if (CC == CC_X86_64SysV || (!IsWindows && CC != CC_Win64))
         return S.Diag(Fn->getLocStart(),
                       diag::err_ms_va_start_used_in_sysv_function);
     } else {
-      // On x86-64 Unix, don't allow this in Win64 ABI functions.
+      // On x86-64/AArch64 Unix, don't allow this in Win64 ABI functions.
       // On x64 Windows, don't allow this in System V ABI functions.
       // (Yes, that means there's no corresponding way to support variadic
       // System V ABI functions on Windows.)
       if ((IsWindows && CC == CC_X86_64SysV) ||
-          (!IsWindows && CC == CC_X86_64Win64))
+          (!IsWindows && CC == CC_Win64))
         return S.Diag(Fn->getLocStart(),
                       diag::err_va_start_used_in_wrong_abi_function)
                << !IsWindows;
@@ -3642,7 +3654,7 @@ static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) {
   }
 
   if (IsMSVAStart)
-    return S.Diag(Fn->getLocStart(), diag::err_x86_builtin_64_only);
+    return S.Diag(Fn->getLocStart(), diag::err_builtin_x64_aarch64_only);
   return false;
 }
 
@@ -3892,6 +3904,65 @@ bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) {
   return false;
 }
 
+// Customized Sema Checking for VSX builtins that have the following signature:
+// vector [...] builtinName(vector [...], vector [...], const int);
+// Which takes the same type of vectors (any legal vector type) for the first
+// two arguments and takes compile time constant for the third argument.
+// Example builtins are :
+// vector double vec_xxpermdi(vector double, vector double, int);
+// vector short vec_xxsldwi(vector short, vector short, int);
+bool Sema::SemaBuiltinVSX(CallExpr *TheCall) {
+  unsigned ExpectedNumArgs = 3;
+  if (TheCall->getNumArgs() < ExpectedNumArgs)
+    return Diag(TheCall->getLocEnd(),
+                diag::err_typecheck_call_too_few_args_at_least)
+           << 0 /*function call*/ <<  ExpectedNumArgs << TheCall->getNumArgs()
+           << TheCall->getSourceRange();
+
+  if (TheCall->getNumArgs() > ExpectedNumArgs)
+    return Diag(TheCall->getLocEnd(),
+                diag::err_typecheck_call_too_many_args_at_most)
+           << 0 /*function call*/ << ExpectedNumArgs << TheCall->getNumArgs()
+           << TheCall->getSourceRange();
+
+  // Check the third argument is a compile time constant
+  llvm::APSInt Value;
+  if(!TheCall->getArg(2)->isIntegerConstantExpr(Value, Context))
+    return Diag(TheCall->getLocStart(),
+                diag::err_vsx_builtin_nonconstant_argument)
+           << 3 /* argument index */ << TheCall->getDirectCallee()
+           << SourceRange(TheCall->getArg(2)->getLocStart(),
+                          TheCall->getArg(2)->getLocEnd());
+
+  QualType Arg1Ty = TheCall->getArg(0)->getType();
+  QualType Arg2Ty = TheCall->getArg(1)->getType();
+
+  // Check the type of argument 1 and argument 2 are vectors.
+  SourceLocation BuiltinLoc = TheCall->getLocStart();
+  if ((!Arg1Ty->isVectorType() && !Arg1Ty->isDependentType()) ||
+      (!Arg2Ty->isVectorType() && !Arg2Ty->isDependentType())) {
+    return Diag(BuiltinLoc, diag::err_vec_builtin_non_vector)
+           << TheCall->getDirectCallee()
+           << SourceRange(TheCall->getArg(0)->getLocStart(),
+                          TheCall->getArg(1)->getLocEnd());
+  }
+
+  // Check the first two arguments are the same type.
+  if (!Context.hasSameUnqualifiedType(Arg1Ty, Arg2Ty)) {
+    return Diag(BuiltinLoc, diag::err_vec_builtin_incompatible_vector)
+           << TheCall->getDirectCallee()
+           << SourceRange(TheCall->getArg(0)->getLocStart(),
+                          TheCall->getArg(1)->getLocEnd());
+  }
+
+  // When default clang type checking is turned off and the customized type
+  // checking is used, the returning type of the function must be explicitly
+  // set. Otherwise it is _Bool by default.
+  TheCall->setType(Arg1Ty);
+
+  return false;
+}
+
 /// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
 // This is declared to take (...), so we have to check everything.
 ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
@@ -3914,7 +3985,8 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
 
     if (!LHSType->isVectorType() || !RHSType->isVectorType())
       return ExprError(Diag(TheCall->getLocStart(),
-                            diag::err_shufflevector_non_vector)
+                            diag::err_vec_builtin_non_vector)
+                       << TheCall->getDirectCallee()
                        << SourceRange(TheCall->getArg(0)->getLocStart(),
                                       TheCall->getArg(1)->getLocEnd()));
 
@@ -3928,12 +4000,14 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
       if (!RHSType->hasIntegerRepresentation() ||
           RHSType->getAs()->getNumElements() != numElements)
         return ExprError(Diag(TheCall->getLocStart(),
-                              diag::err_shufflevector_incompatible_vector)
+                              diag::err_vec_builtin_incompatible_vector)
+                         << TheCall->getDirectCallee()
                          << SourceRange(TheCall->getArg(1)->getLocStart(),
                                         TheCall->getArg(1)->getLocEnd()));
     } else if (!Context.hasSameUnqualifiedType(LHSType, RHSType)) {
       return ExprError(Diag(TheCall->getLocStart(),
-                            diag::err_shufflevector_incompatible_vector)
+                            diag::err_vec_builtin_incompatible_vector)
+                       << TheCall->getDirectCallee()
                        << SourceRange(TheCall->getArg(0)->getLocStart(),
                                       TheCall->getArg(1)->getLocEnd()));
     } else if (numElements != numResElements) {
@@ -5935,6 +6009,7 @@ shouldNotPrintDirectly(const ASTContext &Context,
   while (const TypedefType *UserTy = TyTy->getAs()) {
     StringRef Name = UserTy->getDecl()->getName();
     QualType CastTy = llvm::StringSwitch(Name)
+      .Case("CFIndex", Context.LongTy)
       .Case("NSInteger", Context.LongTy)
       .Case("NSUInteger", Context.UnsignedLongTy)
       .Case("SInt32", Context.IntTy)
@@ -9870,6 +9945,28 @@ void Sema::CheckBoolLikeConversion(Expr *E, SourceLocation CC) {
   ::CheckBoolLikeConversion(*this, E, CC);
 }
 
+/// Diagnose when expression is an integer constant expression and its evaluation
+/// results in integer overflow
+void Sema::CheckForIntOverflow (Expr *E) {
+  // Use a work list to deal with nested struct initializers.
+  SmallVector Exprs(1, E);
+
+  do {
+    Expr *E = Exprs.pop_back_val();
+
+    if (isa(E->IgnoreParenCasts())) {
+      E->IgnoreParenCasts()->EvaluateForOverflow(Context);
+      continue;
+    }
+
+    if (auto InitList = dyn_cast(E))
+      Exprs.append(InitList->inits().begin(), InitList->inits().end());
+
+    if (isa(E))
+      E->IgnoreParenCasts()->EvaluateForOverflow(Context);
+  } while (!Exprs.empty());
+}
+
 namespace {
 /// \brief Visitor for expressions which looks for unsequenced operations on the
 /// same object.
@@ -10371,7 +10468,7 @@ void Sema::CheckCompletedExpr(Expr *E, SourceLocation CheckLoc,
   if (!E->isInstantiationDependent())
     CheckUnsequencedOperations(E);
   if (!IsConstexpr && !E->isValueDependent())
-    E->EvaluateForOverflow(Context);
+    CheckForIntOverflow(E);
   DiagnoseMisalignedMembers();
 }
 
@@ -12006,6 +12103,8 @@ void Sema::RefersToMemberWithReducedAlignment(
     if (ME->isArrow())
       BaseType = BaseType->getPointeeType();
     RecordDecl *RD = BaseType->getAs()->getDecl();
+    if (RD->isInvalidDecl())
+      return;
 
     ValueDecl *MD = ME->getMemberDecl();
     auto *FD = dyn_cast(MD);
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCodeComplete.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCodeComplete.cpp
index 8fb2f41392365..4de7d422072da 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCodeComplete.cpp
@@ -1860,12 +1860,14 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC,
 
     AddStaticAssertResult(Builder, Results, SemaRef.getLangOpts());
   }
+  LLVM_FALLTHROUGH;
 
   // Fall through (for statement expressions).
   case Sema::PCC_ForInit:
   case Sema::PCC_Condition:
     AddStorageSpecifiers(CCC, SemaRef.getLangOpts(), Results);
     // Fall through: conditions and statements can have expressions.
+    LLVM_FALLTHROUGH;
 
   case Sema::PCC_ParenthesizedExpression:
     if (SemaRef.getLangOpts().ObjCAutoRefCount &&
@@ -1895,6 +1897,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC,
       Results.AddResult(Result(Builder.TakeString()));      
     }
     // Fall through
+    LLVM_FALLTHROUGH;
 
   case Sema::PCC_Expression: {
     if (SemaRef.getLangOpts().CPlusPlus) {
@@ -2395,6 +2398,34 @@ formatBlockPlaceholder(const PrintingPolicy &Policy, const NamedDecl *BlockDecl,
   return Result;
 }
 
+static std::string GetDefaultValueString(const ParmVarDecl *Param,
+                                         const SourceManager &SM,
+                                         const LangOptions &LangOpts) {
+  const SourceRange SrcRange = Param->getDefaultArgRange();
+  CharSourceRange CharSrcRange = CharSourceRange::getTokenRange(SrcRange);
+  bool Invalid = CharSrcRange.isInvalid();
+  if (Invalid)
+    return "";
+  StringRef srcText = Lexer::getSourceText(CharSrcRange, SM, LangOpts, &Invalid);
+  if (Invalid)
+    return "";
+
+  if (srcText.empty() || srcText == "=") {
+    // Lexer can't determine the value.
+    // This happens if the code is incorrect (for example class is forward declared).
+    return "";
+  }
+  std::string DefValue(srcText.str());
+  // FIXME: remove this check if the Lexer::getSourceText value is fixed and
+  // this value always has (or always does not have) '=' in front of it
+  if (DefValue.at(0) != '=') {
+    // If we don't have '=' in front of value.
+    // Lexer returns built-in types values without '=' and user-defined types values with it.
+    return " = " + DefValue;
+  }
+  return " " + DefValue;
+}
+
 /// \brief Add function parameter chunks to the given code completion string.
 static void AddFunctionParameterChunks(Preprocessor &PP,
                                        const PrintingPolicy &Policy,
@@ -2428,6 +2459,8 @@ static void AddFunctionParameterChunks(Preprocessor &PP,
     
     // Format the placeholder string.
     std::string PlaceholderStr = FormatFunctionParameter(Policy, Param);
+    if (Param->hasDefaultArg())
+      PlaceholderStr += GetDefaultValueString(Param, PP.getSourceManager(), PP.getLangOpts());
 
     if (Function->isVariadic() && P == N - 1)
       PlaceholderStr += ", ...";
@@ -2735,7 +2768,7 @@ CodeCompletionResult::CreateCodeCompletionString(ASTContext &Ctx,
     
     // Format a function-like macro with placeholders for the arguments.
     Result.AddChunk(CodeCompletionString::CK_LeftParen);
-    MacroInfo::arg_iterator A = MI->arg_begin(), AEnd = MI->arg_end();
+    MacroInfo::param_iterator A = MI->param_begin(), AEnd = MI->param_end();
     
     // C99 variadic macros add __VA_ARGS__ at the end. Skip it.
     if (MI->isC99Varargs()) {
@@ -2746,8 +2779,8 @@ CodeCompletionResult::CreateCodeCompletionString(ASTContext &Ctx,
       }
     }
     
-    for (MacroInfo::arg_iterator A = MI->arg_begin(); A != AEnd; ++A) {
-      if (A != MI->arg_begin())
+    for (MacroInfo::param_iterator A = MI->param_begin(); A != AEnd; ++A) {
+      if (A != MI->param_begin())
         Result.AddChunk(CodeCompletionString::CK_Comma);
 
       if (MI->isVariadic() && (A+1) == AEnd) {
@@ -3009,10 +3042,14 @@ static void AddOverloadParameterChunks(ASTContext &Context,
 
     // Format the placeholder string.
     std::string Placeholder;
-    if (Function)
-      Placeholder = FormatFunctionParameter(Policy, Function->getParamDecl(P));
-    else
+    if (Function) {
+      const ParmVarDecl *Param = Function->getParamDecl(P);
+      Placeholder = FormatFunctionParameter(Policy, Param);
+      if (Param->hasDefaultArg())
+        Placeholder += GetDefaultValueString(Param, Context.getSourceManager(), Context.getLangOpts());
+    } else {
       Placeholder = Prototype->getParamType(P).getAsString(Policy);
+    }
 
     if (P == CurrentArg)
       Result.AddCurrentParameterChunk(
@@ -4539,8 +4576,10 @@ void Sema::CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS,
                                    bool EnteringContext) {
   if (!SS.getScopeRep() || !CodeCompleter)
     return;
-  
-  DeclContext *Ctx = computeDeclContext(SS, EnteringContext);
+
+  // Always pretend to enter a context to ensure that a dependent type
+  // resolves to a dependent record.
+  DeclContext *Ctx = computeDeclContext(SS, /*EnteringContext=*/true);
   if (!Ctx)
     return;
 
@@ -4570,7 +4609,9 @@ void Sema::CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS,
   Results.ExitScope();  
   
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
-  LookupVisibleDecls(Ctx, LookupOrdinaryName, Consumer);
+  LookupVisibleDecls(Ctx, LookupOrdinaryName, Consumer,
+                     /*IncludeGlobalScope=*/true,
+                     /*IncludeDependentBases=*/true);
 
   HandleCodeCompleteResults(this, CodeCompleter, 
                             Results.getCompletionContext(),
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCoroutine.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCoroutine.cpp
index c709a1a723d02..dc7d8e4e9cec3 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaCoroutine.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaCoroutine.cpp
@@ -23,21 +23,30 @@
 using namespace clang;
 using namespace sema;
 
-static bool lookupMember(Sema &S, const char *Name, CXXRecordDecl *RD,
-                         SourceLocation Loc) {
+static LookupResult lookupMember(Sema &S, const char *Name, CXXRecordDecl *RD,
+                                 SourceLocation Loc, bool &Res) {
   DeclarationName DN = S.PP.getIdentifierInfo(Name);
   LookupResult LR(S, DN, Loc, Sema::LookupMemberName);
   // Suppress diagnostics when a private member is selected. The same warnings
   // will be produced again when building the call.
   LR.suppressDiagnostics();
-  return S.LookupQualifiedName(LR, RD);
+  Res = S.LookupQualifiedName(LR, RD);
+  return LR;
+}
+
+static bool lookupMember(Sema &S, const char *Name, CXXRecordDecl *RD,
+                         SourceLocation Loc) {
+  bool Res;
+  lookupMember(S, Name, RD, Loc, Res);
+  return Res;
 }
 
 /// Look up the std::coroutine_traits<...>::promise_type for the given
 /// function type.
-static QualType lookupPromiseType(Sema &S, const FunctionProtoType *FnType,
-                                  SourceLocation KwLoc,
-                                  SourceLocation FuncLoc) {
+static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD,
+                                  SourceLocation KwLoc) {
+  const FunctionProtoType *FnType = FD->getType()->castAs();
+  const SourceLocation FuncLoc = FD->getLocation();
   // FIXME: Cache std::coroutine_traits once we've found it.
   NamespaceDecl *StdExp = S.lookupStdExperimentalNamespace();
   if (!StdExp) {
@@ -63,16 +72,35 @@ static QualType lookupPromiseType(Sema &S, const FunctionProtoType *FnType,
     return QualType();
   }
 
-  // Form template argument list for coroutine_traits.
+  // Form template argument list for coroutine_traits according
+  // to [dcl.fct.def.coroutine]3
   TemplateArgumentListInfo Args(KwLoc, KwLoc);
-  Args.addArgument(TemplateArgumentLoc(
-      TemplateArgument(FnType->getReturnType()),
-      S.Context.getTrivialTypeSourceInfo(FnType->getReturnType(), KwLoc)));
-  // FIXME: If the function is a non-static member function, add the type
-  // of the implicit object parameter before the formal parameters.
-  for (QualType T : FnType->getParamTypes())
+  auto AddArg = [&](QualType T) {
     Args.addArgument(TemplateArgumentLoc(
         TemplateArgument(T), S.Context.getTrivialTypeSourceInfo(T, KwLoc)));
+  };
+  AddArg(FnType->getReturnType());
+  // If the function is a non-static member function, add the type
+  // of the implicit object parameter before the formal parameters.
+  if (auto *MD = dyn_cast(FD)) {
+    if (MD->isInstance()) {
+      // [over.match.funcs]4
+      // For non-static member functions, the type of the implicit object
+      // parameter is
+      //  -- "lvalue reference to cv X" for functions declared without a
+      //      ref-qualifier or with the & ref-qualifier
+      //  -- "rvalue reference to cv X" for functions declared with the &&
+      //      ref-qualifier
+      QualType T =
+          MD->getThisType(S.Context)->getAs()->getPointeeType();
+      T = FnType->getRefQualifier() == RQ_RValue
+              ? S.Context.getRValueReferenceType(T)
+              : S.Context.getLValueReferenceType(T, /*SpelledAsLValue*/ true);
+      AddArg(T);
+    }
+  }
+  for (QualType T : FnType->getParamTypes())
+    AddArg(T);
 
   // Build the template-id.
   QualType CoroTrait =
@@ -120,8 +148,7 @@ static QualType lookupPromiseType(Sema &S, const FunctionProtoType *FnType,
   return PromiseType;
 }
 
-/// Look up the std::coroutine_traits<...>::promise_type for the given
-/// function type.
+/// Look up the std::experimental::coroutine_handle.
 static QualType lookupCoroutineHandleType(Sema &S, QualType PromiseType,
                                           SourceLocation Loc) {
   if (PromiseType.isNull())
@@ -314,6 +341,7 @@ static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType,
 }
 
 struct ReadySuspendResumeResult {
+  enum AwaitCallType { ACT_Ready, ACT_Suspend, ACT_Resume };
   Expr *Results[3];
   OpaqueValueExpr *OpaqueValue;
   bool IsInvalid;
@@ -359,7 +387,44 @@ static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise,
     Calls.Results[I] = Result.get();
   }
 
+  // Assume the calls are valid; all further checking should make them invalid.
   Calls.IsInvalid = false;
+
+  using ACT = ReadySuspendResumeResult::AwaitCallType;
+  CallExpr *AwaitReady = cast(Calls.Results[ACT::ACT_Ready]);
+  if (!AwaitReady->getType()->isDependentType()) {
+    // [expr.await]p3 [...]
+    // — await-ready is the expression e.await_ready(), contextually converted
+    // to bool.
+    ExprResult Conv = S.PerformContextuallyConvertToBool(AwaitReady);
+    if (Conv.isInvalid()) {
+      S.Diag(AwaitReady->getDirectCallee()->getLocStart(),
+             diag::note_await_ready_no_bool_conversion);
+      S.Diag(Loc, diag::note_coroutine_promise_call_implicitly_required)
+          << AwaitReady->getDirectCallee() << E->getSourceRange();
+      Calls.IsInvalid = true;
+    }
+    Calls.Results[ACT::ACT_Ready] = Conv.get();
+  }
+  CallExpr *AwaitSuspend = cast(Calls.Results[ACT::ACT_Suspend]);
+  if (!AwaitSuspend->getType()->isDependentType()) {
+    // [expr.await]p3 [...]
+    //   - await-suspend is the expression e.await_suspend(h), which shall be
+    //     a prvalue of type void or bool.
+    QualType RetType = AwaitSuspend->getCallReturnType(S.Context);
+    // non-class prvalues always have cv-unqualified types
+    QualType AdjRetType = RetType.getUnqualifiedType();
+    if (RetType->isReferenceType() ||
+        (AdjRetType != S.Context.BoolTy && AdjRetType != S.Context.VoidTy)) {
+      S.Diag(AwaitSuspend->getCalleeDecl()->getLocation(),
+             diag::err_await_suspend_invalid_return_type)
+          << RetType;
+      S.Diag(Loc, diag::note_coroutine_promise_call_implicitly_required)
+          << AwaitSuspend->getDirectCallee();
+      Calls.IsInvalid = true;
+    }
+  }
+
   return Calls;
 }
 
@@ -373,19 +438,22 @@ static ExprResult buildPromiseCall(Sema &S, VarDecl *Promise,
   if (PromiseRef.isInvalid())
     return ExprError();
 
-  // Call 'yield_value', passing in E.
   return buildMemberCall(S, PromiseRef.get(), Loc, Name, Args);
 }
 
 VarDecl *Sema::buildCoroutinePromise(SourceLocation Loc) {
   assert(isa(CurContext) && "not in a function scope");
   auto *FD = cast(CurContext);
+  bool IsThisDependentType = [&] {
+    if (auto *MD = dyn_cast_or_null(FD))
+      return MD->isInstance() && MD->getThisType(Context)->isDependentType();
+    else
+      return false;
+  }();
 
-  QualType T =
-      FD->getType()->isDependentType()
-          ? Context.DependentTy
-          : lookupPromiseType(*this, FD->getType()->castAs(),
-                              Loc, FD->getLocation());
+  QualType T = FD->getType()->isDependentType() || IsThisDependentType
+                   ? Context.DependentTy
+                   : lookupPromiseType(*this, FD, Loc);
   if (T.isNull())
     return nullptr;
 
@@ -396,6 +464,7 @@ VarDecl *Sema::buildCoroutinePromise(SourceLocation Loc) {
   if (VD->isInvalidDecl())
     return nullptr;
   ActOnUninitializedDecl(VD);
+  FD->addDecl(VD);
   assert(!VD->isInvalidDecl());
   return VD;
 }
@@ -425,11 +494,11 @@ static FunctionScopeInfo *checkCoroutineContext(Sema &S, SourceLocation Loc,
   return ScopeInfo;
 }
 
-static bool actOnCoroutineBodyStart(Sema &S, Scope *SC, SourceLocation KWLoc,
-                                    StringRef Keyword) {
-  if (!checkCoroutineContext(S, KWLoc, Keyword))
+bool Sema::ActOnCoroutineBodyStart(Scope *SC, SourceLocation KWLoc,
+                                   StringRef Keyword) {
+  if (!checkCoroutineContext(*this, KWLoc, Keyword))
     return false;
-  auto *ScopeInfo = S.getCurFunction();
+  auto *ScopeInfo = getCurFunction();
   assert(ScopeInfo->CoroutinePromise);
 
   // If we have existing coroutine statements then we have already built
@@ -439,24 +508,24 @@ static bool actOnCoroutineBodyStart(Sema &S, Scope *SC, SourceLocation KWLoc,
 
   ScopeInfo->setNeedsCoroutineSuspends(false);
 
-  auto *Fn = cast(S.CurContext);
+  auto *Fn = cast(CurContext);
   SourceLocation Loc = Fn->getLocation();
   // Build the initial suspend point
   auto buildSuspends = [&](StringRef Name) mutable -> StmtResult {
     ExprResult Suspend =
-        buildPromiseCall(S, ScopeInfo->CoroutinePromise, Loc, Name, None);
+        buildPromiseCall(*this, ScopeInfo->CoroutinePromise, Loc, Name, None);
     if (Suspend.isInvalid())
       return StmtError();
-    Suspend = buildOperatorCoawaitCall(S, SC, Loc, Suspend.get());
+    Suspend = buildOperatorCoawaitCall(*this, SC, Loc, Suspend.get());
     if (Suspend.isInvalid())
       return StmtError();
-    Suspend = S.BuildResolvedCoawaitExpr(Loc, Suspend.get(),
-                                         /*IsImplicit*/ true);
-    Suspend = S.ActOnFinishFullExpr(Suspend.get());
+    Suspend = BuildResolvedCoawaitExpr(Loc, Suspend.get(),
+                                       /*IsImplicit*/ true);
+    Suspend = ActOnFinishFullExpr(Suspend.get());
     if (Suspend.isInvalid()) {
-      S.Diag(Loc, diag::note_coroutine_promise_suspend_implicitly_required)
+      Diag(Loc, diag::note_coroutine_promise_suspend_implicitly_required)
           << ((Name == "initial_suspend") ? 0 : 1);
-      S.Diag(KWLoc, diag::note_declared_coroutine_here) << Keyword;
+      Diag(KWLoc, diag::note_declared_coroutine_here) << Keyword;
       return StmtError();
     }
     return cast(Suspend.get());
@@ -476,7 +545,7 @@ static bool actOnCoroutineBodyStart(Sema &S, Scope *SC, SourceLocation KWLoc,
 }
 
 ExprResult Sema::ActOnCoawaitExpr(Scope *S, SourceLocation Loc, Expr *E) {
-  if (!actOnCoroutineBodyStart(*this, S, Loc, "co_await")) {
+  if (!ActOnCoroutineBodyStart(S, Loc, "co_await")) {
     CorrectDelayedTyposInExpr(E);
     return ExprError();
   }
@@ -568,7 +637,7 @@ ExprResult Sema::BuildResolvedCoawaitExpr(SourceLocation Loc, Expr *E,
 }
 
 ExprResult Sema::ActOnCoyieldExpr(Scope *S, SourceLocation Loc, Expr *E) {
-  if (!actOnCoroutineBodyStart(*this, S, Loc, "co_yield")) {
+  if (!ActOnCoroutineBodyStart(S, Loc, "co_yield")) {
     CorrectDelayedTyposInExpr(E);
     return ExprError();
   }
@@ -613,14 +682,15 @@ ExprResult Sema::BuildCoyieldExpr(SourceLocation Loc, Expr *E) {
   if (RSS.IsInvalid)
     return ExprError();
 
-  Expr *Res = new (Context) CoyieldExpr(Loc, E, RSS.Results[0], RSS.Results[1],
-                                        RSS.Results[2], RSS.OpaqueValue);
+  Expr *Res =
+      new (Context) CoyieldExpr(Loc, E, RSS.Results[0], RSS.Results[1],
+                                RSS.Results[2], RSS.OpaqueValue);
 
   return Res;
 }
 
 StmtResult Sema::ActOnCoreturnStmt(Scope *S, SourceLocation Loc, Expr *E) {
-  if (!actOnCoroutineBodyStart(*this, S, Loc, "co_return")) {
+  if (!ActOnCoroutineBodyStart(S, Loc, "co_return")) {
     CorrectDelayedTyposInExpr(E);
     return StmtError();
   }
@@ -675,8 +745,6 @@ static Expr *buildStdNoThrowDeclRef(Sema &S, SourceLocation Loc) {
     return nullptr;
   }
 
-  // FIXME: Mark the variable as ODR used. This currently does not work
-  // likely due to the scope at in which this function is called.
   auto *VD = Result.getAsSingle();
   if (!VD) {
     Result.suppressDiagnostics();
@@ -721,17 +789,19 @@ static FunctionDecl *findDeleteForPromise(Sema &S, SourceLocation Loc,
 
 void Sema::CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body) {
   FunctionScopeInfo *Fn = getCurFunction();
-  assert(Fn && Fn->CoroutinePromise && "not a coroutine");
-
+  assert(Fn && Fn->isCoroutine() && "not a coroutine");
   if (!Body) {
     assert(FD->isInvalidDecl() &&
            "a null body is only allowed for invalid declarations");
     return;
   }
+  // We have a function that uses coroutine keywords, but we failed to build
+  // the promise type.
+  if (!Fn->CoroutinePromise)
+    return FD->setInvalidDecl();
 
   if (isa(Body)) {
-    // FIXME(EricWF): Nothing todo. the body is already a transformed coroutine
-    // body statement.
+    // Nothing todo. the body is already a transformed coroutine body statement.
     return;
   }
 
@@ -780,10 +850,17 @@ bool CoroutineStmtBuilder::buildDependentStatements() {
   assert(!this->IsPromiseDependentType &&
          "coroutine cannot have a dependent promise type");
   this->IsValid = makeOnException() && makeOnFallthrough() &&
-                  makeReturnOnAllocFailure() && makeNewAndDeleteExpr();
+                  makeGroDeclAndReturnStmt() && makeReturnOnAllocFailure() &&
+                  makeNewAndDeleteExpr();
   return this->IsValid;
 }
 
+bool CoroutineStmtBuilder::buildParameterMoves() {
+  assert(this->IsValid && "coroutine already invalid");
+  assert(this->ParamMoves.empty() && "param moves already built");
+  return this->IsValid = makeParamMoves();
+}
+
 bool CoroutineStmtBuilder::makePromiseStmt() {
   // Form a declaration statement for the promise declaration, so that AST
   // visitors can more easily find it.
@@ -857,15 +934,15 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() {
   if (ReturnObjectOnAllocationFailure.isInvalid())
     return false;
 
-  // FIXME: ActOnReturnStmt expects a scope that is inside of the function, due
-  //   to CheckJumpOutOfSEHFinally(*this, ReturnLoc, *CurScope->getFnParent());
-  //   S.getCurScope()->getFnParent() == nullptr at ActOnFinishFunctionBody when
-  //   CoroutineBodyStmt is built. Figure it out and fix it.
-  //   Use BuildReturnStmt here to unbreak sanitized tests. (Gor:3/27/2017)
   StmtResult ReturnStmt =
       S.BuildReturnStmt(Loc, ReturnObjectOnAllocationFailure.get());
-  if (ReturnStmt.isInvalid())
+  if (ReturnStmt.isInvalid()) {
+    S.Diag(Found.getFoundDecl()->getLocation(), diag::note_member_declared_here)
+        << DN;
+    S.Diag(Fn.FirstCoroutineStmtLoc, diag::note_declared_coroutine_here)
+        << Fn.getFirstCoroutineStmtKeyword();
     return false;
+  }
 
   this->ReturnStmtOnAllocFailure = ReturnStmt.get();
   return true;
@@ -991,13 +1068,32 @@ bool CoroutineStmtBuilder::makeOnFallthrough() {
   // [dcl.fct.def.coroutine]/4
   // The unqualified-ids 'return_void' and 'return_value' are looked up in
   // the scope of class P. If both are found, the program is ill-formed.
-  const bool HasRVoid = lookupMember(S, "return_void", PromiseRecordDecl, Loc);
-  const bool HasRValue = lookupMember(S, "return_value", PromiseRecordDecl, Loc);
+  bool HasRVoid, HasRValue;
+  LookupResult LRVoid =
+      lookupMember(S, "return_void", PromiseRecordDecl, Loc, HasRVoid);
+  LookupResult LRValue =
+      lookupMember(S, "return_value", PromiseRecordDecl, Loc, HasRValue);
 
   StmtResult Fallthrough;
   if (HasRVoid && HasRValue) {
     // FIXME Improve this diagnostic
-    S.Diag(FD.getLocation(), diag::err_coroutine_promise_return_ill_formed)
+    S.Diag(FD.getLocation(),
+           diag::err_coroutine_promise_incompatible_return_functions)
+        << PromiseRecordDecl;
+    S.Diag(LRVoid.getRepresentativeDecl()->getLocation(),
+           diag::note_member_first_declared_here)
+        << LRVoid.getLookupName();
+    S.Diag(LRValue.getRepresentativeDecl()->getLocation(),
+           diag::note_member_first_declared_here)
+        << LRValue.getLookupName();
+    return false;
+  } else if (!HasRVoid && !HasRValue) {
+    // FIXME: The PDTS currently specifies this case as UB, not ill-formed.
+    // However we still diagnose this as an error since until the PDTS is fixed.
+    S.Diag(FD.getLocation(),
+           diag::err_coroutine_promise_requires_return_function)
+        << PromiseRecordDecl;
+    S.Diag(PromiseRecordDecl->getLocation(), diag::note_defined_here)
         << PromiseRecordDecl;
     return false;
   } else if (HasRVoid) {
@@ -1029,6 +1125,8 @@ bool CoroutineStmtBuilder::makeOnException() {
             : diag::
                   warn_coroutine_promise_unhandled_exception_required_with_exceptions;
     S.Diag(Loc, DiagID) << PromiseRecordDecl;
+    S.Diag(PromiseRecordDecl->getLocation(), diag::note_defined_here)
+        << PromiseRecordDecl;
     return !RequireUnhandledException;
   }
 
@@ -1042,37 +1140,180 @@ bool CoroutineStmtBuilder::makeOnException() {
   if (UnhandledException.isInvalid())
     return false;
 
+  // Since the body of the coroutine will be wrapped in try-catch, it will
+  // be incompatible with SEH __try if present in a function.
+  if (!S.getLangOpts().Borland && Fn.FirstSEHTryLoc.isValid()) {
+    S.Diag(Fn.FirstSEHTryLoc, diag::err_seh_in_a_coroutine_with_cxx_exceptions);
+    S.Diag(Fn.FirstCoroutineStmtLoc, diag::note_declared_coroutine_here)
+        << Fn.getFirstCoroutineStmtKeyword();
+    return false;
+  }
+
   this->OnException = UnhandledException.get();
   return true;
 }
 
 bool CoroutineStmtBuilder::makeReturnObject() {
-
   // Build implicit 'p.get_return_object()' expression and form initialization
   // of return type from it.
   ExprResult ReturnObject =
       buildPromiseCall(S, Fn.CoroutinePromise, Loc, "get_return_object", None);
   if (ReturnObject.isInvalid())
     return false;
-  QualType RetType = FD.getReturnType();
-  if (!RetType->isDependentType()) {
-    InitializedEntity Entity =
-        InitializedEntity::InitializeResult(Loc, RetType, false);
-    ReturnObject = S.PerformMoveOrCopyInitialization(Entity, nullptr, RetType,
-                                                   ReturnObject.get());
-    if (ReturnObject.isInvalid())
+
+  this->ReturnValue = ReturnObject.get();
+  return true;
+}
+
+static void noteMemberDeclaredHere(Sema &S, Expr *E, FunctionScopeInfo &Fn) {
+  if (auto *MbrRef = dyn_cast(E)) {
+    auto *MethodDecl = MbrRef->getMethodDecl();
+    S.Diag(MethodDecl->getLocation(), diag::note_member_declared_here)
+        << MethodDecl;
+  }
+  S.Diag(Fn.FirstCoroutineStmtLoc, diag::note_declared_coroutine_here)
+      << Fn.getFirstCoroutineStmtKeyword();
+}
+
+bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() {
+  assert(!IsPromiseDependentType &&
+         "cannot make statement while the promise type is dependent");
+  assert(this->ReturnValue && "ReturnValue must be already formed");
+
+  QualType const GroType = this->ReturnValue->getType();
+  assert(!GroType->isDependentType() &&
+         "get_return_object type must no longer be dependent");
+
+  QualType const FnRetType = FD.getReturnType();
+  assert(!FnRetType->isDependentType() &&
+         "get_return_object type must no longer be dependent");
+
+  if (FnRetType->isVoidType()) {
+    ExprResult Res = S.ActOnFinishFullExpr(this->ReturnValue, Loc);
+    if (Res.isInvalid())
       return false;
+
+    this->ResultDecl = Res.get();
+    return true;
   }
-  ReturnObject = S.ActOnFinishFullExpr(ReturnObject.get(), Loc);
-  if (ReturnObject.isInvalid())
+
+  if (GroType->isVoidType()) {
+    // Trigger a nice error message.
+    InitializedEntity Entity =
+        InitializedEntity::InitializeResult(Loc, FnRetType, false);
+    S.PerformMoveOrCopyInitialization(Entity, nullptr, FnRetType, ReturnValue);
+    noteMemberDeclaredHere(S, ReturnValue, Fn);
     return false;
+  }
 
-  this->ReturnValue = ReturnObject.get();
+  auto *GroDecl = VarDecl::Create(
+      S.Context, &FD, FD.getLocation(), FD.getLocation(),
+      &S.PP.getIdentifierTable().get("__coro_gro"), GroType,
+      S.Context.getTrivialTypeSourceInfo(GroType, Loc), SC_None);
+
+  S.CheckVariableDeclarationType(GroDecl);
+  if (GroDecl->isInvalidDecl())
+    return false;
+
+  InitializedEntity Entity = InitializedEntity::InitializeVariable(GroDecl);
+  ExprResult Res = S.PerformMoveOrCopyInitialization(Entity, nullptr, GroType,
+                                                     this->ReturnValue);
+  if (Res.isInvalid())
+    return false;
+
+  Res = S.ActOnFinishFullExpr(Res.get());
+  if (Res.isInvalid())
+    return false;
+
+  if (GroType == FnRetType) {
+    GroDecl->setNRVOVariable(true);
+  }
+
+  S.AddInitializerToDecl(GroDecl, Res.get(),
+                         /*DirectInit=*/false);
+
+  S.FinalizeDeclaration(GroDecl);
+
+  // Form a declaration statement for the return declaration, so that AST
+  // visitors can more easily find it.
+  StmtResult GroDeclStmt =
+      S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(GroDecl), Loc, Loc);
+  if (GroDeclStmt.isInvalid())
+    return false;
+
+  this->ResultDecl = GroDeclStmt.get();
+
+  ExprResult declRef = S.BuildDeclRefExpr(GroDecl, GroType, VK_LValue, Loc);
+  if (declRef.isInvalid())
+    return false;
+
+  StmtResult ReturnStmt = S.BuildReturnStmt(Loc, declRef.get());
+  if (ReturnStmt.isInvalid()) {
+    noteMemberDeclaredHere(S, ReturnValue, Fn);
+    return false;
+  }
+
+  this->ReturnStmt = ReturnStmt.get();
   return true;
 }
 
+// Create a static_cast\(expr).
+static Expr *castForMoving(Sema &S, Expr *E, QualType T = QualType()) {
+  if (T.isNull())
+    T = E->getType();
+  QualType TargetType = S.BuildReferenceType(
+      T, /*SpelledAsLValue*/ false, SourceLocation(), DeclarationName());
+  SourceLocation ExprLoc = E->getLocStart();
+  TypeSourceInfo *TargetLoc =
+      S.Context.getTrivialTypeSourceInfo(TargetType, ExprLoc);
+
+  return S
+      .BuildCXXNamedCast(ExprLoc, tok::kw_static_cast, TargetLoc, E,
+                         SourceRange(ExprLoc, ExprLoc), E->getSourceRange())
+      .get();
+}
+
+
+/// \brief Build a variable declaration for move parameter.
+static VarDecl *buildVarDecl(Sema &S, SourceLocation Loc, QualType Type,
+                             IdentifierInfo *II) {
+  TypeSourceInfo *TInfo = S.Context.getTrivialTypeSourceInfo(Type, Loc);
+  VarDecl *Decl =
+      VarDecl::Create(S.Context, S.CurContext, Loc, Loc, II, Type, TInfo, SC_None);
+  Decl->setImplicit();
+  return Decl;
+}
+
 bool CoroutineStmtBuilder::makeParamMoves() {
-  // FIXME: Perform move-initialization of parameters into frame-local copies.
+  for (auto *paramDecl : FD.parameters()) {
+    auto Ty = paramDecl->getType();
+    if (Ty->isDependentType())
+      continue;
+
+    // No need to copy scalars, llvm will take care of them.
+    if (Ty->getAsCXXRecordDecl()) {
+      ExprResult ParamRef =
+          S.BuildDeclRefExpr(paramDecl, paramDecl->getType(),
+                             ExprValueKind::VK_LValue, Loc); // FIXME: scope?
+      if (ParamRef.isInvalid())
+        return false;
+
+      Expr *RCast = castForMoving(S, ParamRef.get());
+
+      auto D = buildVarDecl(S, Loc, Ty, paramDecl->getIdentifier());
+      S.AddInitializerToDecl(D, RCast, /*DirectInit=*/true);
+
+      // Convert decl to a statement.
+      StmtResult Stmt = S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(D), Loc, Loc);
+      if (Stmt.isInvalid())
+        return false;
+
+      ParamMovesVector.push_back(Stmt.get());
+    }
+  }
+
+  // Convert to ArrayRef in CtorArgs structure that builder inherits from.
+  ParamMoves = ParamMovesVector;
   return true;
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDecl.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDecl.cpp
index 8d47518bd9d63..d1a8c20419e15 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDecl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDecl.cpp
@@ -409,6 +409,7 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
       }
     }
     // If typo correction failed or was not performed, fall through
+    LLVM_FALLTHROUGH;
   case LookupResult::FoundOverloaded:
   case LookupResult::FoundUnresolvedValue:
     Result.suppressDiagnostics();
@@ -1331,15 +1332,17 @@ void Sema::ActOnExitFunctionContext() {
 /// overloaded function declaration or has the "overloadable"
 /// attribute.
 static bool AllowOverloadingOfFunction(LookupResult &Previous,
-                                       ASTContext &Context) {
+                                       ASTContext &Context,
+                                       const FunctionDecl *New) {
   if (Context.getLangOpts().CPlusPlus)
     return true;
 
   if (Previous.getResultKind() == LookupResult::FoundOverloaded)
     return true;
 
-  return (Previous.getResultKind() == LookupResult::Found
-          && Previous.getFoundDecl()->hasAttr());
+  return Previous.getResultKind() == LookupResult::Found &&
+         (Previous.getFoundDecl()->hasAttr() ||
+          New->hasAttr());
 }
 
 /// Add this decl to the scope shadowed decl chains.
@@ -2025,7 +2028,7 @@ bool Sema::isIncompatibleTypedef(TypeDecl *Old, TypedefNameDecl *New) {
     Diag(New->getLocation(), diag::err_redefinition_variably_modified_typedef)
       << Kind << NewType;
     if (Old->getLocation().isValid())
-      notePreviousDefinition(Old->getLocation(), New->getLocation());
+      notePreviousDefinition(Old, New->getLocation());
     New->setInvalidDecl();
     return true;
   }
@@ -2038,7 +2041,7 @@ bool Sema::isIncompatibleTypedef(TypeDecl *Old, TypedefNameDecl *New) {
     Diag(New->getLocation(), diag::err_redefinition_different_typedef)
       << Kind << NewType << OldType;
     if (Old->getLocation().isValid())
-      notePreviousDefinition(Old->getLocation(), New->getLocation());
+      notePreviousDefinition(Old, New->getLocation());
     New->setInvalidDecl();
     return true;
   }
@@ -2105,7 +2108,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
 
     NamedDecl *OldD = OldDecls.getRepresentativeDecl();
     if (OldD->getLocation().isValid())
-      notePreviousDefinition(OldD->getLocation(), New->getLocation());
+      notePreviousDefinition(OldD, New->getLocation());
 
     return New->setInvalidDecl();
   }
@@ -2197,7 +2200,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
 
     Diag(New->getLocation(), diag::err_redefinition)
       << New->getDeclName();
-    notePreviousDefinition(Old->getLocation(), New->getLocation());
+    notePreviousDefinition(Old, New->getLocation());
     return New->setInvalidDecl();
   }
 
@@ -2218,7 +2221,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
 
   Diag(New->getLocation(), diag::ext_redefinition_of_typedef)
     << New->getDeclName();
-  notePreviousDefinition(Old->getLocation(), New->getLocation());
+  notePreviousDefinition(Old, New->getLocation());
 }
 
 /// DeclhasAttr - returns true if decl Declaration already has the target
@@ -2452,7 +2455,7 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
   return false;
 }
 
-static const Decl *getDefinition(const Decl *D) {
+static const NamedDecl *getDefinition(const Decl *D) {
   if (const TagDecl *TD = dyn_cast(D))
     return TD->getDefinition();
   if (const VarDecl *VD = dyn_cast(D)) {
@@ -2479,7 +2482,7 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
   if (!New->hasAttrs())
     return;
 
-  const Decl *Def = getDefinition(Old);
+  const NamedDecl *Def = getDefinition(Old);
   if (!Def || Def == New)
     return;
 
@@ -2506,7 +2509,7 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
                             : diag::err_redefinition;
         S.Diag(VD->getLocation(), Diag) << VD->getDeclName();
         if (Diag == diag::err_redefinition)
-          S.notePreviousDefinition(Def->getLocation(), VD->getLocation());
+          S.notePreviousDefinition(Def, VD->getLocation());
         else
           S.Diag(Def->getLocation(), diag::note_previous_definition);
         VD->setInvalidDecl();
@@ -2895,7 +2898,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
     } else {
       Diag(New->getLocation(), diag::err_redefinition_different_kind)
         << New->getDeclName();
-      notePreviousDefinition(OldD->getLocation(), New->getLocation());
+      notePreviousDefinition(OldD, New->getLocation());
       return true;
     }
   }
@@ -2932,10 +2935,45 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
       !Old->hasAttr()) {
     Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
         << New->getDeclName();
-    notePreviousDefinition(Old->getLocation(), New->getLocation());
+    notePreviousDefinition(Old, New->getLocation());
     New->dropAttr();
   }
 
+  if (!getLangOpts().CPlusPlus) {
+    bool OldOvl = Old->hasAttr();
+    if (OldOvl != New->hasAttr() && !Old->isImplicit()) {
+      Diag(New->getLocation(), diag::err_attribute_overloadable_mismatch)
+        << New << OldOvl;
+
+      // Try our best to find a decl that actually has the overloadable
+      // attribute for the note. In most cases (e.g. programs with only one
+      // broken declaration/definition), this won't matter.
+      //
+      // FIXME: We could do this if we juggled some extra state in
+      // OverloadableAttr, rather than just removing it.
+      const Decl *DiagOld = Old;
+      if (OldOvl) {
+        auto OldIter = llvm::find_if(Old->redecls(), [](const Decl *D) {
+          const auto *A = D->getAttr();
+          return A && !A->isImplicit();
+        });
+        // If we've implicitly added *all* of the overloadable attrs to this
+        // chain, emitting a "previous redecl" note is pointless.
+        DiagOld = OldIter == Old->redecls_end() ? nullptr : *OldIter;
+      }
+
+      if (DiagOld)
+        Diag(DiagOld->getLocation(),
+             diag::note_attribute_overloadable_prev_overload)
+          << OldOvl;
+
+      if (OldOvl)
+        New->addAttr(OverloadableAttr::CreateImplicit(Context));
+      else
+        New->dropAttr();
+    }
+  }
+
   // If a function is first declared with a calling convention, but is later
   // declared or defined without one, all following decls assume the calling
   // convention of the first.
@@ -3661,7 +3699,7 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
   if (!Old) {
     Diag(New->getLocation(), diag::err_redefinition_different_kind)
         << New->getDeclName();
-    notePreviousDefinition(Previous.getRepresentativeDecl()->getLocation(),
+    notePreviousDefinition(Previous.getRepresentativeDecl(),
                            New->getLocation());
     return New->setInvalidDecl();
   }
@@ -3691,7 +3729,7 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
       Old->getStorageClass() == SC_None &&
       !Old->hasAttr()) {
     Diag(New->getLocation(), diag::warn_weak_import) << New->getDeclName();
-    notePreviousDefinition(Old->getLocation(), New->getLocation());
+    notePreviousDefinition(Old, New->getLocation());
     // Remove weak_import attribute on new declaration.
     New->dropAttr();
   }
@@ -3700,7 +3738,7 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
       !Old->hasAttr()) {
     Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
         << New->getDeclName();
-    notePreviousDefinition(Old->getLocation(), New->getLocation());
+    notePreviousDefinition(Old, New->getLocation());
     New->dropAttr();
   }
 
@@ -3857,29 +3895,22 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
     New->setImplicitlyInline();
 }
 
-void Sema::notePreviousDefinition(SourceLocation Old, SourceLocation New) {
+void Sema::notePreviousDefinition(const NamedDecl *Old, SourceLocation New) {
   SourceManager &SrcMgr = getSourceManager();
   auto FNewDecLoc = SrcMgr.getDecomposedLoc(New);
-  auto FOldDecLoc = SrcMgr.getDecomposedLoc(Old);
+  auto FOldDecLoc = SrcMgr.getDecomposedLoc(Old->getLocation());
   auto *FNew = SrcMgr.getFileEntryForID(FNewDecLoc.first);
   auto *FOld = SrcMgr.getFileEntryForID(FOldDecLoc.first);
   auto &HSI = PP.getHeaderSearchInfo();
-  StringRef HdrFilename = SrcMgr.getFilename(SrcMgr.getSpellingLoc(Old));
+  StringRef HdrFilename =
+      SrcMgr.getFilename(SrcMgr.getSpellingLoc(Old->getLocation()));
 
-  auto noteFromModuleOrInclude = [&](SourceLocation &Loc,
-                                     SourceLocation &IncLoc) -> bool {
-    Module *Mod = nullptr;
+  auto noteFromModuleOrInclude = [&](Module *Mod,
+                                     SourceLocation IncLoc) -> bool {
     // Redefinition errors with modules are common with non modular mapped
     // headers, example: a non-modular header H in module A that also gets
     // included directly in a TU. Pointing twice to the same header/definition
     // is confusing, try to get better diagnostics when modules is on.
-    if (getLangOpts().Modules) {
-      auto ModLoc = SrcMgr.getModuleImportLoc(Old);
-      if (!ModLoc.first.isInvalid())
-        Mod = HSI.getModuleMap().inferModuleFromLocation(
-            FullSourceLoc(Loc, SrcMgr));
-    }
-
     if (IncLoc.isValid()) {
       if (Mod) {
         Diag(IncLoc, diag::note_redefinition_modules_same_file)
@@ -3903,19 +3934,19 @@ void Sema::notePreviousDefinition(SourceLocation Old, SourceLocation New) {
   if (FNew == FOld && FNewDecLoc.second == FOldDecLoc.second) {
     SourceLocation OldIncLoc = SrcMgr.getIncludeLoc(FOldDecLoc.first);
     SourceLocation NewIncLoc = SrcMgr.getIncludeLoc(FNewDecLoc.first);
-    EmittedDiag = noteFromModuleOrInclude(Old, OldIncLoc);
-    EmittedDiag |= noteFromModuleOrInclude(New, NewIncLoc);
+    EmittedDiag = noteFromModuleOrInclude(Old->getOwningModule(), OldIncLoc);
+    EmittedDiag |= noteFromModuleOrInclude(getCurrentModule(), NewIncLoc);
 
     // If the header has no guards, emit a note suggesting one.
     if (FOld && !HSI.isFileMultipleIncludeGuarded(FOld))
-      Diag(Old, diag::note_use_ifdef_guards);
+      Diag(Old->getLocation(), diag::note_use_ifdef_guards);
 
     if (EmittedDiag)
       return;
   }
 
   // Redefinition coming from different files or couldn't do better above.
-  Diag(Old, diag::note_previous_definition);
+  Diag(Old->getLocation(), diag::note_previous_definition);
 }
 
 /// We've just determined that \p Old and \p New both appear to be definitions
@@ -3938,7 +3969,7 @@ bool Sema::checkVarDeclRedefinition(VarDecl *Old, VarDecl *New) {
     return false;
   } else {
     Diag(New->getLocation(), diag::err_redefinition) << New;
-    notePreviousDefinition(Old->getLocation(), New->getLocation());
+    notePreviousDefinition(Old, New->getLocation());
     New->setInvalidDecl();
     return true;
   }
@@ -6171,7 +6202,7 @@ NamedDecl *Sema::ActOnVariableDeclarator(
     QualType NR = R;
     while (NR->isPointerType()) {
       if (NR->isFunctionPointerType()) {
-        Diag(D.getIdentifierLoc(), diag::err_opencl_function_pointer_variable);
+        Diag(D.getIdentifierLoc(), diag::err_opencl_function_pointer);
         D.setInvalidType();
         break;
       }
@@ -6527,7 +6558,7 @@ NamedDecl *Sema::ActOnVariableDeclarator(
            diag::err_thread_non_global)
         << DeclSpec::getSpecifierName(TSCS);
     else if (!Context.getTargetInfo().isTLSSupported()) {
-      if (getLangOpts().CUDA) {
+      if (getLangOpts().CUDA || getLangOpts().OpenMPIsDevice) {
         // Postpone error emission until we've collected attributes required to
         // figure out whether it's a host or device variable and whether the
         // error should be ignored.
@@ -6589,8 +6620,11 @@ NamedDecl *Sema::ActOnVariableDeclarator(
   // Handle attributes prior to checking for duplicates in MergeVarDecl
   ProcessDeclAttributes(S, NewVD, D);
 
-  if (getLangOpts().CUDA) {
-    if (EmitTLSUnsupportedError && DeclAttrsMatchCUDAMode(getLangOpts(), NewVD))
+  if (getLangOpts().CUDA || getLangOpts().OpenMPIsDevice) {
+    if (EmitTLSUnsupportedError &&
+        ((getLangOpts().CUDA && DeclAttrsMatchCUDAMode(getLangOpts(), NewVD)) ||
+         (getLangOpts().OpenMPIsDevice &&
+          NewVD->hasAttr())))
       Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
            diag::err_thread_unsupported);
     // CUDA B.2.5: "__shared__ and __constant__ variables have implied static
@@ -6946,7 +6980,7 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl,
         }
     }
 
-  DeclContext *OldDC = ShadowedDecl->getDeclContext();
+  DeclContext *OldDC = ShadowedDecl->getDeclContext()->getRedeclContext();
 
   unsigned WarningDiag = diag::warn_decl_shadow;
   SourceLocation CaptureLoc;
@@ -6970,6 +7004,21 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl,
           return;
         }
       }
+
+      if (cast(ShadowedDecl)->hasLocalStorage()) {
+        // A variable can't shadow a local variable in an enclosing scope, if
+        // they are separated by a non-capturing declaration context.
+        for (DeclContext *ParentDC = NewDC;
+             ParentDC && !ParentDC->Equals(OldDC);
+             ParentDC = getLambdaAwareParentOfDeclContext(ParentDC)) {
+          // Only block literals, captured statements, and lambda expressions
+          // can capture; other scopes don't.
+          if (!isa(ParentDC) && !isa(ParentDC) &&
+              !isLambdaCallOperator(ParentDC)) {
+            return;
+          }
+        }
+      }
     }
   }
 
@@ -7202,7 +7251,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
   // ISO/IEC TR 18037 S5.1.2
   if (!getLangOpts().OpenCL
       && NewVD->hasLocalStorage() && T.getAddressSpace() != 0) {
-    Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl);
+    Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl) << 0;
     NewVD->setInvalidDecl();
     return;
   }
@@ -7267,11 +7316,11 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
         NewVD->setInvalidDecl();
         return;
       }
-      // OpenCL v1.1 s6.5.2 and s6.5.3 no local or constant variables
-      // in functions.
       if (T.getAddressSpace() == LangAS::opencl_constant ||
           T.getAddressSpace() == LangAS::opencl_local) {
         FunctionDecl *FD = getCurFunctionDecl();
+        // OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables
+        // in functions.
         if (FD && !FD->hasAttr()) {
           if (T.getAddressSpace() == LangAS::opencl_constant)
             Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
@@ -7282,6 +7331,25 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
           NewVD->setInvalidDecl();
           return;
         }
+        // OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be
+        // in the outermost scope of a kernel function.
+        if (FD && FD->hasAttr()) {
+          if (!getCurScope()->isFunctionScope()) {
+            if (T.getAddressSpace() == LangAS::opencl_constant)
+              Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope)
+                  << "constant";
+            else
+              Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope)
+                  << "local";
+            NewVD->setInvalidDecl();
+            return;
+          }
+        }
+      } else if (T.getAddressSpace() != LangAS::Default) {
+        // Do not allow other address spaces on automatic variable.
+        Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl) << 1;
+        NewVD->setInvalidDecl();
+        return;
       }
     }
   }
@@ -7923,10 +7991,7 @@ static OpenCLParamType getOpenCLKernelParameterType(Sema &S, QualType PT) {
   if (PT->isImageType())
     return PtrKernelParam;
 
-  if (PT->isBooleanType())
-    return InvalidKernelParam;
-
-  if (PT->isEventT())
+  if (PT->isBooleanType() || PT->isEventT() || PT->isReserveIDT())
     return InvalidKernelParam;
 
   // OpenCL extension spec v1.2 s9.5:
@@ -8656,6 +8721,14 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     NewFD->setInvalidDecl();
   }
 
+  // Apply an implicit SectionAttr if '#pragma clang section text' is active
+  if (PragmaClangTextSection.Valid && D.isFunctionDefinition() &&
+      !NewFD->hasAttr()) {
+    NewFD->addAttr(PragmaClangTextSectionAttr::CreateImplicit(Context,
+                                                 PragmaClangTextSection.SectionName,
+                                                 PragmaClangTextSection.PragmaLocation));
+  }
+
   // Apply an implicit SectionAttr if #pragma code_seg is active.
   if (CodeSegStack.CurrentValue && D.isFunctionDefinition() &&
       !NewFD->hasAttr()) {
@@ -9162,6 +9235,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
 
   bool Redeclaration = false;
   NamedDecl *OldDecl = nullptr;
+  bool MayNeedOverloadableChecks = false;
 
   // Merge or overload the declaration with an existing declaration of
   // the same name, if appropriate.
@@ -9170,13 +9244,14 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
     // a declaration that requires merging. If it's an overload,
     // there's no more work to do here; we'll just add the new
     // function to the scope.
-    if (!AllowOverloadingOfFunction(Previous, Context)) {
+    if (!AllowOverloadingOfFunction(Previous, Context, NewFD)) {
       NamedDecl *Candidate = Previous.getRepresentativeDecl();
       if (shouldLinkPossiblyHiddenDecl(Candidate, NewFD)) {
         Redeclaration = true;
         OldDecl = Candidate;
       }
     } else {
+      MayNeedOverloadableChecks = true;
       switch (CheckOverload(S, NewFD, Previous, OldDecl,
                             /*NewIsUsingDecl*/ false)) {
       case Ovl_Match:
@@ -9191,18 +9266,6 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
         Redeclaration = false;
         break;
       }
-
-      if (!getLangOpts().CPlusPlus && !NewFD->hasAttr()) {
-        // If a function name is overloadable in C, then every function
-        // with that name must be marked "overloadable".
-        Diag(NewFD->getLocation(), diag::err_attribute_overloadable_missing)
-          << Redeclaration << NewFD;
-        NamedDecl *OverloadedDecl =
-            Redeclaration ? OldDecl : Previous.getRepresentativeDecl();
-        Diag(OverloadedDecl->getLocation(),
-             diag::note_attribute_overloadable_prev_overload);
-        NewFD->addAttr(OverloadableAttr::CreateImplicit(Context));
-      }
     }
   }
 
@@ -9217,15 +9280,10 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
       MergeTypeWithPrevious = false;
 
       // ... except in the presence of __attribute__((overloadable)).
-      if (OldDecl->hasAttr()) {
-        if (!getLangOpts().CPlusPlus && !NewFD->hasAttr()) {
-          Diag(NewFD->getLocation(), diag::err_attribute_overloadable_missing)
-            << Redeclaration << NewFD;
-          Diag(Previous.getFoundDecl()->getLocation(),
-               diag::note_attribute_overloadable_prev_overload);
-          NewFD->addAttr(OverloadableAttr::CreateImplicit(Context));
-        }
+      if (OldDecl->hasAttr() ||
+          NewFD->hasAttr()) {
         if (IsOverload(NewFD, cast(OldDecl), false)) {
+          MayNeedOverloadableChecks = true;
           Redeclaration = false;
           OldDecl = nullptr;
         }
@@ -9320,6 +9378,29 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
           NewFD->setAccess(OldDecl->getAccess());
       }
     }
+  } else if (!getLangOpts().CPlusPlus && MayNeedOverloadableChecks &&
+             !NewFD->getAttr()) {
+    assert((Previous.empty() ||
+            llvm::any_of(Previous,
+                         [](const NamedDecl *ND) {
+                           return ND->hasAttr();
+                         })) &&
+           "Non-redecls shouldn't happen without overloadable present");
+
+    auto OtherUnmarkedIter = llvm::find_if(Previous, [](const NamedDecl *ND) {
+      const auto *FD = dyn_cast(ND);
+      return FD && !FD->hasAttr();
+    });
+
+    if (OtherUnmarkedIter != Previous.end()) {
+      Diag(NewFD->getLocation(),
+           diag::err_attribute_overloadable_multiple_unmarked_overloads);
+      Diag((*OtherUnmarkedIter)->getLocation(),
+           diag::note_attribute_overloadable_prev_overload)
+          << false;
+
+      NewFD->addAttr(OverloadableAttr::CreateImplicit(Context));
+    }
   }
 
   // Semantic checking for this function declaration (in isolation).
@@ -10398,23 +10479,36 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
   VDecl->setInit(Init);
 
   if (VDecl->isLocalVarDecl()) {
+    // Don't check the initializer if the declaration is malformed.
+    if (VDecl->isInvalidDecl()) {
+      // do nothing
+
+    // OpenCL v1.2 s6.5.3: __constant locals must be constant-initialized.
+    // This is true even in OpenCL C++.
+    } else if (VDecl->getType().getAddressSpace() == LangAS::opencl_constant) {
+      CheckForConstantInitializer(Init, DclT);
+
+    // Otherwise, C++ does not restrict the initializer.
+    } else if (getLangOpts().CPlusPlus) {
+      // do nothing
+
     // C99 6.7.8p4: All the expressions in an initializer for an object that has
     // static storage duration shall be constant expressions or string literals.
-    // C++ does not have this restriction.
-    if (!getLangOpts().CPlusPlus && !VDecl->isInvalidDecl()) {
+    } else if (VDecl->getStorageClass() == SC_Static) {
+      CheckForConstantInitializer(Init, DclT);
+
+    // C89 is stricter than C99 for aggregate initializers.
+    // C89 6.5.7p3: All the expressions [...] in an initializer list
+    // for an object that has aggregate or union type shall be
+    // constant expressions.
+    } else if (!getLangOpts().C99 && VDecl->getType()->isAggregateType() &&
+               isa(Init)) {
       const Expr *Culprit;
-      if (VDecl->getStorageClass() == SC_Static)
-        CheckForConstantInitializer(Init, DclT);
-      // C89 is stricter than C99 for non-static aggregate types.
-      // C89 6.5.7p3: All the expressions [...] in an initializer list
-      // for an object that has aggregate or union type shall be
-      // constant expressions.
-      else if (!getLangOpts().C99 && VDecl->getType()->isAggregateType() &&
-               isa(Init) &&
-               !Init->isConstantInitializer(Context, false, &Culprit))
+      if (!Init->isConstantInitializer(Context, false, &Culprit)) {
         Diag(Culprit->getExprLoc(),
              diag::ext_aggregate_init_not_constant)
           << Culprit->getSourceRange();
+      }
     }
   } else if (VDecl->isStaticDataMember() && !VDecl->isInline() &&
              VDecl->getLexicalDeclContext()->isRecord()) {
@@ -11070,9 +11164,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
   bool IsGlobal = GlobalStorage && !var->isStaticLocal();
   QualType baseType = Context.getBaseElementType(type);
 
-  if (!var->getDeclContext()->isDependentContext() &&
-      Init && !Init->isValueDependent()) {
-
+  if (Init && !Init->isValueDependent()) {
     if (var->isConstexpr()) {
       SmallVector Notes;
       if (!var->evaluateValue(Notes) || !var->isInitICE()) {
@@ -11109,6 +11201,17 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
           << Init->getSourceRange();
         Diag(attr->getLocation(), diag::note_declared_required_constant_init_here)
           << attr->getRange();
+        if (getLangOpts().CPlusPlus11) {
+          APValue Value;
+          SmallVector Notes;
+          Init->EvaluateAsInitializer(Value, getASTContext(), var, Notes);
+          for (auto &it : Notes)
+            Diag(it.first, it.second);
+        } else {
+          Diag(CacheCulprit->getExprLoc(),
+               diag::note_invalid_subexpr_in_const_expr)
+              << CacheCulprit->getSourceRange();
+        }
       }
     }
     else if (!var->isConstexpr() && IsGlobal &&
@@ -11156,6 +11259,23 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) {
   if (!VD)
     return;
 
+  // Apply an implicit SectionAttr if '#pragma clang section bss|data|rodata' is active
+  if (VD->hasGlobalStorage() && VD->isThisDeclarationADefinition() &&
+      !inTemplateInstantiation() && !VD->hasAttr()) {
+    if (PragmaClangBSSSection.Valid)
+      VD->addAttr(PragmaClangBSSSectionAttr::CreateImplicit(Context,
+                                                            PragmaClangBSSSection.SectionName,
+                                                            PragmaClangBSSSection.PragmaLocation));
+    if (PragmaClangDataSection.Valid)
+      VD->addAttr(PragmaClangDataSectionAttr::CreateImplicit(Context,
+                                                             PragmaClangDataSection.SectionName,
+                                                             PragmaClangDataSection.PragmaLocation));
+    if (PragmaClangRodataSection.Valid)
+      VD->addAttr(PragmaClangRodataSectionAttr::CreateImplicit(Context,
+                                                               PragmaClangRodataSection.SectionName,
+                                                               PragmaClangRodataSection.PragmaLocation));
+  }
+
   if (auto *DD = dyn_cast(ThisDecl)) {
     for (auto *BD : DD->bindings()) {
       FinalizeDeclaration(BD);
@@ -11874,7 +11994,7 @@ Sema::CheckForFunctionRedefinition(FunctionDecl *FD,
   if (canRedefineFunction(Definition, getLangOpts()))
     return;
 
-  // Don't emit an error when this is redifinition of a typo-corrected
+  // Don't emit an error when this is redefinition of a typo-corrected
   // definition.
   if (TypoCorrectedFunctionDefinitions.count(Definition))
     return;
@@ -11975,8 +12095,9 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
     FD->setInvalidDecl();
   }
 
-  // See if this is a redefinition.
-  if (!FD->isLateTemplateParsed()) {
+  // See if this is a redefinition. If 'will have body' is already set, then
+  // these checks were already performed when it was set.
+  if (!FD->willHaveBody() && !FD->isLateTemplateParsed()) {
     CheckForFunctionRedefinition(FD, nullptr, SkipBody);
 
     // If we're skipping the body, we're done. Don't enter the scope.
@@ -12169,11 +12290,12 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   sema::AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy();
   sema::AnalysisBasedWarnings::Policy *ActivePolicy = nullptr;
 
-  if (getLangOpts().CoroutinesTS && getCurFunction()->CoroutinePromise)
+  if (getLangOpts().CoroutinesTS && getCurFunction()->isCoroutine())
     CheckCompletedCoroutineBody(FD, Body);
 
   if (FD) {
     FD->setBody(Body);
+    FD->setWillHaveBody(false);
 
     if (getLangOpts().CPlusPlus14) {
       if (!FD->isInvalidDecl() && Body && !FD->isDependentContext() &&
@@ -12291,7 +12413,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
         TypeSourceInfo *TI = FD->getTypeSourceInfo();
         TypeLoc TL = TI->getTypeLoc();
         FunctionTypeLoc FTL = TL.getAsAdjusted();
-        Diag(FTL.getLParenLoc(), diag::warn_strict_prototypes) << 1;
+        Diag(FTL.getLParenLoc(), diag::warn_strict_prototypes) << 2;
       }
     }
 
@@ -12502,6 +12624,9 @@ NamedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc,
   unsigned diag_id;
   if (II.getName().startswith("__builtin_"))
     diag_id = diag::warn_builtin_unknown;
+  // OpenCL v2.0 s6.9.u - Implicit function declaration is not supported.
+  else if (getLangOpts().OpenCL)
+    diag_id = diag::err_opencl_implicit_function_decl;
   else if (getLangOpts().C99)
     diag_id = diag::ext_implicit_function_decl;
   else
@@ -13030,7 +13155,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                      SourceLocation ScopedEnumKWLoc,
                      bool ScopedEnumUsesClassTag,
                      TypeResult UnderlyingType,
-                     bool IsTypeSpecifier, SkipBodyInfo *SkipBody) {
+                     bool IsTypeSpecifier, bool IsTemplateParamOrArg,
+                     SkipBodyInfo *SkipBody) {
   // If this is not a definition, it must have a name.
   IdentifierInfo *OrigName = Name;
   assert((Name != nullptr || TUK == TUK_Definition) &&
@@ -13129,6 +13255,56 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
   if (TUK == TUK_Friend || TUK == TUK_Reference)
     Redecl = NotForRedeclaration;
 
+  /// Create a new tag decl in C/ObjC. Since the ODR-like semantics for ObjC/C
+  /// implemented asks for structural equivalence checking, the returned decl
+  /// here is passed back to the parser, allowing the tag body to be parsed.
+  auto createTagFromNewDecl = [&]() -> TagDecl * {
+    assert(!getLangOpts().CPlusPlus && "not meant for C++ usage");
+    // If there is an identifier, use the location of the identifier as the
+    // location of the decl, otherwise use the location of the struct/union
+    // keyword.
+    SourceLocation Loc = NameLoc.isValid() ? NameLoc : KWLoc;
+    TagDecl *New = nullptr;
+
+    if (Kind == TTK_Enum) {
+      New = EnumDecl::Create(Context, SearchDC, KWLoc, Loc, Name, nullptr,
+                             ScopedEnum, ScopedEnumUsesClassTag,
+                             !EnumUnderlying.isNull());
+      // If this is an undefined enum, bail.
+      if (TUK != TUK_Definition && !Invalid)
+        return nullptr;
+      if (EnumUnderlying) {
+        EnumDecl *ED = cast(New);
+        if (TypeSourceInfo *TI = EnumUnderlying.dyn_cast())
+          ED->setIntegerTypeSourceInfo(TI);
+        else
+          ED->setIntegerType(QualType(EnumUnderlying.get(), 0));
+        ED->setPromotionType(ED->getIntegerType());
+      }
+    } else { // struct/union
+      New = RecordDecl::Create(Context, Kind, SearchDC, KWLoc, Loc, Name,
+                               nullptr);
+    }
+
+    if (RecordDecl *RD = dyn_cast(New)) {
+      // Add alignment attributes if necessary; these attributes are checked
+      // when the ASTContext lays out the structure.
+      //
+      // It is important for implementing the correct semantics that this
+      // happen here (in ActOnTag). The #pragma pack stack is
+      // maintained as a result of parser callbacks which can occur at
+      // many points during the parsing of a struct declaration (because
+      // the #pragma tokens are effectively skipped over during the
+      // parsing of the struct).
+      if (TUK == TUK_Definition) {
+        AddAlignmentAttributesForRecord(RD);
+        AddMsStructLayoutForRecord(RD);
+      }
+    }
+    New->setLexicalDeclContext(CurContext);
+    return New;
+  };
+
   LookupResult Previous(*this, Name, NameLoc, LookupTagName, Redecl);
   if (Name && SS.isNotEmpty()) {
     // We have a nested-name tag ('struct foo::bar').
@@ -13300,11 +13476,11 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
   // also need to do a redeclaration lookup there, just in case
   // there's a shadow friend decl.
   if (Name && Previous.empty() &&
-      (TUK == TUK_Reference || TUK == TUK_Friend)) {
+      (TUK == TUK_Reference || TUK == TUK_Friend || IsTemplateParamOrArg)) {
     if (Invalid) goto CreateNewDecl;
     assert(SS.isEmpty());
 
-    if (TUK == TUK_Reference) {
+    if (TUK == TUK_Reference || IsTemplateParamOrArg) {
       // C++ [basic.scope.pdecl]p5:
       //   -- for an elaborated-type-specifier of the form
       //
@@ -13496,9 +13672,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
             } else if (TUK == TUK_Reference &&
                        (PrevTagDecl->getFriendObjectKind() ==
                             Decl::FOK_Undeclared ||
-                        PP.getModuleContainingLocation(
-                            PrevDecl->getLocation()) !=
-                            PP.getModuleContainingLocation(KWLoc)) &&
+                        PrevDecl->getOwningModule() != getCurrentModule()) &&
                        SS.isEmpty()) {
               // This declaration is a reference to an existing entity, but
               // has different visibility from that entity: it either makes
@@ -13536,16 +13710,28 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                     TSK_ExplicitSpecialization;
               }
 
+              // Note that clang allows ODR-like semantics for ObjC/C, i.e., do
+              // not keep more that one definition around (merge them). However,
+              // ensure the decl passes the structural compatibility check in
+              // C11 6.2.7/1 (or 6.1.2.6/1 in C89).
               NamedDecl *Hidden = nullptr;
-              if (SkipBody && getLangOpts().CPlusPlus &&
-                  !hasVisibleDefinition(Def, &Hidden)) {
+              if (SkipBody && !hasVisibleDefinition(Def, &Hidden)) {
                 // There is a definition of this tag, but it is not visible. We
                 // explicitly make use of C++'s one definition rule here, and
                 // assume that this definition is identical to the hidden one
                 // we already have. Make the existing definition visible and
                 // use it in place of this one.
-                SkipBody->ShouldSkip = true;
-                makeMergedDefinitionVisible(Hidden);
+                if (!getLangOpts().CPlusPlus) {
+                  // Postpone making the old definition visible until after we
+                  // complete parsing the new one and do the structural
+                  // comparison.
+                  SkipBody->CheckSameAsPrevious = true;
+                  SkipBody->New = createTagFromNewDecl();
+                  SkipBody->Previous = Hidden;
+                } else {
+                  SkipBody->ShouldSkip = true;
+                  makeMergedDefinitionVisible(Hidden);
+                }
                 return Def;
               } else if (!IsExplicitSpecializationAfterInstantiation) {
                 // A redeclaration in function prototype scope in C isn't
@@ -13554,7 +13740,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                   Diag(NameLoc, diag::warn_redefinition_in_param_list) << Name;
                 else
                   Diag(NameLoc, diag::err_redefinition) << Name;
-                notePreviousDefinition(Def->getLocation(),
+                notePreviousDefinition(Def,
                                        NameLoc.isValid() ? NameLoc : KWLoc);
                 // If this is a redefinition, recover by making this
                 // struct be anonymous, which will make any later
@@ -13645,7 +13831,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
         // The tag name clashes with something else in the target scope,
         // issue an error and recover by making this tag be anonymous.
         Diag(NameLoc, diag::err_redefinition_different_kind) << Name;
-        notePreviousDefinition(PrevDecl->getLocation(), NameLoc);
+        notePreviousDefinition(PrevDecl, NameLoc);
         Name = nullptr;
         Invalid = true;
       }
@@ -13739,7 +13925,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
 
   // C++11 [dcl.type]p3:
   //   A type-specifier-seq shall not define a class or enumeration [...].
-  if (getLangOpts().CPlusPlus && IsTypeSpecifier && TUK == TUK_Definition) {
+  if (getLangOpts().CPlusPlus && (IsTypeSpecifier || IsTemplateParamOrArg) &&
+      TUK == TUK_Definition) {
     Diag(New->getLocation(), diag::err_type_defined_in_type_specifier)
       << Context.getTagDeclType(New);
     Invalid = true;
@@ -13772,7 +13959,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
     // the ASTContext lays out the structure.
     //
     // It is important for implementing the correct semantics that this
-    // happen here (in act on tag decl). The #pragma pack stack is
+    // happen here (in ActOnTag). The #pragma pack stack is
     // maintained as a result of parser callbacks which can occur at
     // many points during the parsing of a struct declaration (because
     // the #pragma tokens are effectively skipped over during the
@@ -13908,6 +14095,16 @@ void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) {
   AddPushedVisibilityAttribute(Tag);
 }
 
+bool Sema::ActOnDuplicateDefinition(DeclSpec &DS, Decl *Prev,
+                                    SkipBodyInfo &SkipBody) {
+  if (!hasStructuralCompatLayout(Prev, SkipBody.New))
+    return false;
+
+  // Make the previous decl visible.
+  makeMergedDefinitionVisible(SkipBody.Previous);
+  return true;
+}
+
 Decl *Sema::ActOnObjCContainerStartDefinition(Decl *IDecl) {
   assert(isa(IDecl) &&
          "ActOnObjCContainerStartDefinition - Not ObjCContainerDecl");
@@ -15329,7 +15526,7 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst,
   // different from T:
   // - every enumerator of every member of class T that is an unscoped
   // enumerated type
-  if (!TheEnumDecl->isScoped())
+  if (getLangOpts().CPlusPlus && !TheEnumDecl->isScoped())
     DiagnoseClassNameShadow(TheEnumDecl->getDeclContext(),
                             DeclarationNameInfo(Id, IdLoc));
 
@@ -15349,7 +15546,7 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst,
         Diag(IdLoc, diag::err_redefinition_of_enumerator) << Id;
       else
         Diag(IdLoc, diag::err_redefinition) << Id;
-      notePreviousDefinition(PrevDecl->getLocation(), IdLoc);
+      notePreviousDefinition(PrevDecl, IdLoc);
       return nullptr;
     }
   }
@@ -15881,8 +16078,6 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
     return nullptr;
   }
 
-  // FIXME: Create a ModuleDecl and return it.
-
   // FIXME: Most of this work should be done by the preprocessor rather than
   // here, in order to support macro import.
 
@@ -15896,6 +16091,8 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
     ModuleName += Piece.first->getName();
   }
 
+  // FIXME: If we've already seen a module-declaration, report an error.
+
   // If a module name was explicitly specified on the command line, it must be
   // correct.
   if (!getLangOpts().CurrentModule.empty() &&
@@ -15908,6 +16105,7 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
   const_cast(getLangOpts()).CurrentModule = ModuleName;
 
   auto &Map = PP.getHeaderSearchInfo().getModuleMap();
+  Module *Mod;
 
   switch (MDK) {
   case ModuleDeclKind::Module: {
@@ -15926,12 +16124,9 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
     }
 
     // Create a Module for the module that we're defining.
-    Module *Mod = Map.createModuleForInterfaceUnit(ModuleLoc, ModuleName);
+    Mod = Map.createModuleForInterfaceUnit(ModuleLoc, ModuleName);
     assert(Mod && "module creation should not fail");
-
-    // Enter the semantic scope of the module.
-    ActOnModuleBegin(ModuleLoc, Mod);
-    return nullptr;
+    break;
   }
 
   case ModuleDeclKind::Partition:
@@ -15941,14 +16136,26 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
   case ModuleDeclKind::Implementation:
     std::pair ModuleNameLoc(
         PP.getIdentifierInfo(ModuleName), Path[0].second);
-
-    DeclResult Import = ActOnModuleImport(ModuleLoc, ModuleLoc, ModuleNameLoc);
-    if (Import.isInvalid())
+    Mod = getModuleLoader().loadModule(ModuleLoc, Path, Module::AllVisible,
+                                       /*IsIncludeDirective=*/false);
+    if (!Mod)
       return nullptr;
-    return ConvertDeclToDeclGroup(Import.get());
+    break;
   }
 
-  llvm_unreachable("unexpected module decl kind");
+  // Enter the semantic scope of the module.
+  ModuleScopes.push_back({});
+  ModuleScopes.back().Module = Mod;
+  ModuleScopes.back().OuterVisibleModules = std::move(VisibleModules);
+  VisibleModules.setVisible(Mod, ModuleLoc);
+
+  // From now on, we have an owning module for all declarations we see.
+  // However, those declarations are module-private unless explicitly
+  // exported.
+  Context.getTranslationUnitDecl()->setLocalOwningModule(Mod);
+
+  // FIXME: Create a ModuleDecl.
+  return nullptr;
 }
 
 DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
@@ -16046,7 +16253,10 @@ void Sema::ActOnModuleBegin(SourceLocation DirectiveLoc, Module *Mod) {
   // lexically within the module.
   if (getLangOpts().trackLocalOwningModule()) {
     for (auto *DC = CurContext; DC; DC = DC->getLexicalParent()) {
-      cast(DC)->setHidden(true);
+      cast(DC)->setModuleOwnershipKind(
+          getLangOpts().ModulesLocalVisibility
+              ? Decl::ModuleOwnershipKind::VisibleWhenImported
+              : Decl::ModuleOwnershipKind::Visible);
       cast(DC)->setLocalOwningModule(Mod);
     }
   }
@@ -16086,7 +16296,8 @@ void Sema::ActOnModuleEnd(SourceLocation EomLoc, Module *Mod) {
     for (auto *DC = CurContext; DC; DC = DC->getLexicalParent()) {
       cast(DC)->setLocalOwningModule(getCurrentModule());
       if (!getCurrentModule())
-        cast(DC)->setHidden(false);
+        cast(DC)->setModuleOwnershipKind(
+            Decl::ModuleOwnershipKind::Unowned);
     }
   }
 }
@@ -16094,7 +16305,8 @@ void Sema::ActOnModuleEnd(SourceLocation EomLoc, Module *Mod) {
 void Sema::createImplicitModuleImportForErrorRecovery(SourceLocation Loc,
                                                       Module *Mod) {
   // Bail if we're not allowed to implicitly import a module here.
-  if (isSFINAEContext() || !getLangOpts().ModulesErrorRecovery)
+  if (isSFINAEContext() || !getLangOpts().ModulesErrorRecovery ||
+      VisibleModules.isVisible(Mod))
     return;
 
   // Create the implicit import declaration.
@@ -16132,6 +16344,7 @@ Decl *Sema::ActOnStartExportDecl(Scope *S, SourceLocation ExportLoc,
 
   CurContext->addDecl(D);
   PushDeclContext(S, D);
+  D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::VisibleWhenImported);
   return D;
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclAttr.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclAttr.cpp
index 68639d8ded771..2a310bf41c703 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclAttr.cpp
@@ -238,7 +238,7 @@ static typename std::enable_if::value,
 getAttrName(const AttrInfo &Attr) {
   return &Attr;
 }
-const IdentifierInfo *getAttrName(const clang::AttributeList &Attr) {
+static const IdentifierInfo *getAttrName(const clang::AttributeList &Attr) {
   return Attr.getName();
 }
 
@@ -313,8 +313,8 @@ static bool checkAttrMutualExclusion(Sema &S, Decl *D, SourceRange Range,
 /// \returns true if IdxExpr is a valid index.
 template 
 static bool checkFunctionOrMethodParameterIndex(
-    Sema &S, const Decl *D, const AttrInfo& Attr,
-    unsigned AttrArgNum, const Expr *IdxExpr, uint64_t &Idx) {
+    Sema &S, const Decl *D, const AttrInfo &Attr, unsigned AttrArgNum,
+    const Expr *IdxExpr, uint64_t &Idx, bool AllowImplicitThis = false) {
   assert(isFunctionOrMethodOrBlock(D));
 
   // In C++ the implicit 'this' function parameter also counts.
@@ -341,7 +341,7 @@ static bool checkFunctionOrMethodParameterIndex(
     return false;
   }
   Idx--; // Convert to zero-based.
-  if (HasImplicitThisParam) {
+  if (HasImplicitThisParam && !AllowImplicitThis) {
     if (Idx == 0) {
       S.Diag(getAttrLoc(Attr),
              diag::err_attribute_invalid_implicit_this_argument)
@@ -949,7 +949,7 @@ static bool checkFunctionConditionAttr(Sema &S, Decl *D,
     Msg = "";
 
   SmallVector Diags;
-  if (!Cond->isValueDependent() &&
+  if (isa(D) && !Cond->isValueDependent() &&
       !Expr::isPotentialConstantExprUnevaluated(Cond, cast(D),
                                                 Diags)) {
     S.Diag(Attr.getLoc(), diag::err_attr_cond_never_constant_expr)
@@ -1037,10 +1037,11 @@ static void handleDiagnoseIfAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     return;
   }
 
-  auto *FD = cast(D);
-  bool ArgDependent = ArgumentDependenceChecker(FD).referencesArgs(Cond);
+  bool ArgDependent = false;
+  if (const auto *FD = dyn_cast(D))
+    ArgDependent = ArgumentDependenceChecker(FD).referencesArgs(Cond);
   D->addAttr(::new (S.Context) DiagnoseIfAttr(
-      Attr.getRange(), S.Context, Cond, Msg, DiagType, ArgDependent, FD,
+      Attr.getRange(), S.Context, Cond, Msg, DiagType, ArgDependent, cast(D),
       Attr.getAttributeSpellingListIndex()));
 }
 
@@ -4279,7 +4280,7 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC,
   case AttributeList::AT_RegCall: CC = CC_X86RegCall; break;
   case AttributeList::AT_MSABI:
     CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_C :
-                                                             CC_X86_64Win64;
+                                                             CC_Win64;
     break;
   case AttributeList::AT_SysVABI:
     CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_X86_64SysV :
@@ -4603,14 +4604,16 @@ static void handleTypeTagForDatatypeAttr(Sema &S, Decl *D,
 static void handleXRayLogArgsAttr(Sema &S, Decl *D,
                                   const AttributeList &Attr) {
   uint64_t ArgCount;
+
   if (!checkFunctionOrMethodParameterIndex(S, D, Attr, 1, Attr.getArgAsExpr(0),
-                                           ArgCount))
+                                           ArgCount,
+                                           true /* AllowImplicitThis*/))
     return;
 
   // ArgCount isn't a parameter index [0;n), it's a count [1;n] - hence + 1.
   D->addAttr(::new (S.Context)
-             XRayLogArgsAttr(Attr.getRange(), S.Context, ++ArgCount,
-             Attr.getAttributeSpellingListIndex()));
+                 XRayLogArgsAttr(Attr.getRange(), S.Context, ++ArgCount,
+                                 Attr.getAttributeSpellingListIndex()));
 }
 
 //===----------------------------------------------------------------------===//
@@ -4676,6 +4679,16 @@ void Sema::AddNSConsumedAttr(SourceRange attrRange, Decl *D,
                    CFConsumedAttr(attrRange, Context, spellingIndex));
 }
 
+bool Sema::checkNSReturnsRetainedReturnType(SourceLocation loc,
+                                            QualType type) {
+  if (isValidSubjectOfNSReturnsRetainedAttribute(type))
+    return false;
+
+  Diag(loc, diag::warn_ns_attribute_wrong_return_type)
+    << "'ns_returns_retained'" << 0 << 0;
+  return true;
+}
+
 static void handleNSReturnsRetainedAttr(Sema &S, Decl *D,
                                         const AttributeList &Attr) {
   QualType returnType;
@@ -4697,6 +4710,8 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D,
           << Attr.getRange();
       return;
     }
+  } else if (Attr.isUsedAsTypeAttr()) {
+    return;
   } else {
     AttributeDeclKind ExpectedDeclKind;
     switch (Attr.getKind()) {
@@ -4740,6 +4755,9 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D,
   }
 
   if (!typeOK) {
+    if (Attr.isUsedAsTypeAttr())
+      return;
+
     if (isa(D)) {
       S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_parameter_type)
           << Attr.getName() << /*pointer-to-CF*/2
@@ -5935,12 +5953,18 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleDLLAttr(S, D, Attr);
     break;
   case AttributeList::AT_Mips16:
-    handleSimpleAttributeWithExclusions(S, D,
-                                                                       Attr);
+    handleSimpleAttributeWithExclusions(S, D, Attr);
     break;
   case AttributeList::AT_NoMips16:
     handleSimpleAttribute(S, D, Attr);
     break;
+  case AttributeList::AT_MicroMips:
+    handleSimpleAttributeWithExclusions(S, D, Attr);
+    break;
+  case AttributeList::AT_NoMicroMips:
+    handleSimpleAttribute(S, D, Attr);
+    break;
   case AttributeList::AT_AMDGPUFlatWorkGroupSize:
     handleAMDGPUFlatWorkGroupSizeAttr(S, D, Attr);
     break;
@@ -6562,7 +6586,7 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
   }
 }
 
-// Helper for delayed proccessing TransparentUnion attribute.
+// Helper for delayed processing TransparentUnion attribute.
 void Sema::ProcessDeclAttributeDelayed(Decl *D, const AttributeList *AttrList) {
   for (const AttributeList *Attr = AttrList; Attr; Attr = Attr->getNext())
     if (Attr->getKind() == AttributeList::AT_TransparentUnion) {
@@ -6829,6 +6853,50 @@ static const AvailabilityAttr *getAttrForPlatform(ASTContext &Context,
   return nullptr;
 }
 
+/// The diagnostic we should emit for \c D, and the declaration that
+/// originated it, or \c AR_Available.
+///
+/// \param D The declaration to check.
+/// \param Message If non-null, this will be populated with the message from
+/// the availability attribute that is selected.
+static std::pair
+ShouldDiagnoseAvailabilityOfDecl(const NamedDecl *D, std::string *Message) {
+  AvailabilityResult Result = D->getAvailability(Message);
+
+  // For typedefs, if the typedef declaration appears available look
+  // to the underlying type to see if it is more restrictive.
+  while (const TypedefNameDecl *TD = dyn_cast(D)) {
+    if (Result == AR_Available) {
+      if (const TagType *TT = TD->getUnderlyingType()->getAs()) {
+        D = TT->getDecl();
+        Result = D->getAvailability(Message);
+        continue;
+      }
+    }
+    break;
+  }
+
+  // Forward class declarations get their attributes from their definition.
+  if (const ObjCInterfaceDecl *IDecl = dyn_cast(D)) {
+    if (IDecl->getDefinition()) {
+      D = IDecl->getDefinition();
+      Result = D->getAvailability(Message);
+    }
+  }
+
+  if (const auto *ECD = dyn_cast(D))
+    if (Result == AR_Available) {
+      const DeclContext *DC = ECD->getDeclContext();
+      if (const auto *TheEnumDecl = dyn_cast(DC)) {
+        Result = TheEnumDecl->getAvailability(Message);
+        D = TheEnumDecl;
+      }
+    }
+
+  return {Result, D};
+}
+
+
 /// \brief whether we should emit a diagnostic for \c K and \c DeclVersion in
 /// the context of \c Ctx. For example, we should emit an unavailable diagnostic
 /// in a deprecated context, but not the other way around.
@@ -6894,8 +6962,60 @@ static bool ShouldDiagnoseAvailabilityInContext(Sema &S, AvailabilityResult K,
   return true;
 }
 
+static bool
+shouldDiagnoseAvailabilityByDefault(const ASTContext &Context,
+                                    const VersionTuple &DeploymentVersion,
+                                    const VersionTuple &DeclVersion) {
+  const auto &Triple = Context.getTargetInfo().getTriple();
+  VersionTuple ForceAvailabilityFromVersion;
+  switch (Triple.getOS()) {
+  case llvm::Triple::IOS:
+  case llvm::Triple::TvOS:
+    ForceAvailabilityFromVersion = VersionTuple(/*Major=*/11);
+    break;
+  case llvm::Triple::WatchOS:
+    ForceAvailabilityFromVersion = VersionTuple(/*Major=*/4);
+    break;
+  case llvm::Triple::Darwin:
+  case llvm::Triple::MacOSX:
+    ForceAvailabilityFromVersion = VersionTuple(/*Major=*/10, /*Minor=*/13);
+    break;
+  default:
+    // New targets should always warn about availability.
+    return Triple.getVendor() == llvm::Triple::Apple;
+  }
+  return DeploymentVersion >= ForceAvailabilityFromVersion ||
+         DeclVersion >= ForceAvailabilityFromVersion;
+}
+
+static NamedDecl *findEnclosingDeclToAnnotate(Decl *OrigCtx) {
+  for (Decl *Ctx = OrigCtx; Ctx;
+       Ctx = cast_or_null(Ctx->getDeclContext())) {
+    if (isa(Ctx) || isa(Ctx) || isa(Ctx))
+      return cast(Ctx);
+    if (auto *CD = dyn_cast(Ctx)) {
+      if (auto *Imp = dyn_cast(Ctx))
+        return Imp->getClassInterface();
+      return CD;
+    }
+  }
+
+  return dyn_cast(OrigCtx);
+}
+
+/// Actually emit an availability diagnostic for a reference to an unavailable
+/// decl.
+///
+/// \param Ctx The context that the reference occurred in
+/// \param ReferringDecl The exact declaration that was referenced.
+/// \param OffendingDecl A related decl to \c ReferringDecl that has an
+/// availability attribute corrisponding to \c K attached to it. Note that this
+/// may not be the same as ReferringDecl, i.e. if an EnumDecl is annotated and
+/// we refer to a member EnumConstantDecl, ReferringDecl is the EnumConstantDecl
+/// and OffendingDecl is the EnumDecl.
 static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
-                                      Decl *Ctx, const NamedDecl *D,
+                                      Decl *Ctx, const NamedDecl *ReferringDecl,
+                                      const NamedDecl *OffendingDecl,
                                       StringRef Message, SourceLocation Loc,
                                       const ObjCInterfaceDecl *UnknownObjCClass,
                                       const ObjCPropertyDecl *ObjCProperty,
@@ -6903,7 +7023,7 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
   // Diagnostics for deprecated or unavailable.
   unsigned diag, diag_message, diag_fwdclass_message;
   unsigned diag_available_here = diag::note_availability_specified_here;
-  SourceLocation NoteLocation = D->getLocation();
+  SourceLocation NoteLocation = OffendingDecl->getLocation();
 
   // Matches 'diag::note_property_attribute' options.
   unsigned property_note_select;
@@ -6912,7 +7032,7 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
   unsigned available_here_select_kind;
 
   VersionTuple DeclVersion;
-  if (const AvailabilityAttr *AA = getAttrForPlatform(S.Context, D))
+  if (const AvailabilityAttr *AA = getAttrForPlatform(S.Context, OffendingDecl))
     DeclVersion = AA->getIntroduced();
 
   if (!ShouldDiagnoseAvailabilityInContext(S, K, DeclVersion, Ctx))
@@ -6926,7 +7046,7 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
     diag_fwdclass_message = diag::warn_deprecated_fwdclass_message;
     property_note_select = /* deprecated */ 0;
     available_here_select_kind = /* deprecated */ 2;
-    if (const auto *attr = D->getAttr())
+    if (const auto *attr = OffendingDecl->getAttr())
       NoteLocation = attr->getLocation();
     break;
 
@@ -6938,13 +7058,14 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
     property_note_select = /* unavailable */ 1;
     available_here_select_kind = /* unavailable */ 0;
 
-    if (auto attr = D->getAttr()) {
+    if (auto attr = OffendingDecl->getAttr()) {
       if (attr->isImplicit() && attr->getImplicitReason()) {
         // Most of these failures are due to extra restrictions in ARC;
         // reflect that in the primary diagnostic when applicable.
         auto flagARCError = [&] {
           if (S.getLangOpts().ObjCAutoRefCount &&
-              S.getSourceManager().isInSystemHeader(D->getLocation()))
+              S.getSourceManager().isInSystemHeader(
+                  OffendingDecl->getLocation()))
             diag = diag::err_unavailable_in_arc;
         };
 
@@ -6982,13 +7103,27 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
     }
     break;
 
-  case AR_NotYetIntroduced:
-    diag = diag::warn_partial_availability;
-    diag_message = diag::warn_partial_message;
-    diag_fwdclass_message = diag::warn_partial_fwdclass_message;
+  case AR_NotYetIntroduced: {
+    // We would like to emit the diagnostic even if -Wunguarded-availability is
+    // not specified for deployment targets >= to iOS 11 or equivalent or
+    // for declarations that were introduced in iOS 11 (macOS 10.13, ...) or
+    // later.
+    const AvailabilityAttr *AA =
+        getAttrForPlatform(S.getASTContext(), OffendingDecl);
+    VersionTuple Introduced = AA->getIntroduced();
+    bool NewWarning = shouldDiagnoseAvailabilityByDefault(
+        S.Context, S.Context.getTargetInfo().getPlatformMinVersion(),
+        Introduced);
+    diag = NewWarning ? diag::warn_partial_availability_new
+                      : diag::warn_partial_availability;
+    diag_message = NewWarning ? diag::warn_partial_message_new
+                              : diag::warn_partial_message;
+    diag_fwdclass_message = NewWarning ? diag::warn_partial_fwdclass_message_new
+                                       : diag::warn_partial_fwdclass_message;
     property_note_select = /* partial */ 2;
     available_here_select_kind = /* partial */ 3;
     break;
+  }
 
   case AR_Available:
     llvm_unreachable("Warning for availability of available declaration?");
@@ -6997,9 +7132,9 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
   CharSourceRange UseRange;
   StringRef Replacement;
   if (K == AR_Deprecated) {
-    if (auto attr = D->getAttr())
+    if (auto attr = OffendingDecl->getAttr())
       Replacement = attr->getReplacement();
-    if (auto attr = getAttrForPlatform(S.Context, D))
+    if (auto attr = getAttrForPlatform(S.Context, OffendingDecl))
       Replacement = attr->getReplacement();
 
     if (!Replacement.empty())
@@ -7008,21 +7143,21 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
   }
 
   if (!Message.empty()) {
-    S.Diag(Loc, diag_message) << D << Message
+    S.Diag(Loc, diag_message) << ReferringDecl << Message
       << (UseRange.isValid() ?
           FixItHint::CreateReplacement(UseRange, Replacement) : FixItHint());
     if (ObjCProperty)
       S.Diag(ObjCProperty->getLocation(), diag::note_property_attribute)
           << ObjCProperty->getDeclName() << property_note_select;
   } else if (!UnknownObjCClass) {
-    S.Diag(Loc, diag) << D
+    S.Diag(Loc, diag) << ReferringDecl
       << (UseRange.isValid() ?
           FixItHint::CreateReplacement(UseRange, Replacement) : FixItHint());
     if (ObjCProperty)
       S.Diag(ObjCProperty->getLocation(), diag::note_property_attribute)
           << ObjCProperty->getDeclName() << property_note_select;
   } else {
-    S.Diag(Loc, diag_fwdclass_message) << D
+    S.Diag(Loc, diag_fwdclass_message) << ReferringDecl
       << (UseRange.isValid() ?
           FixItHint::CreateReplacement(UseRange, Replacement) : FixItHint());
     S.Diag(UnknownObjCClass->getLocation(), diag::note_forward_class);
@@ -7030,16 +7165,16 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
 
   // The declaration can have multiple availability attributes, we are looking
   // at one of them.
-  const AvailabilityAttr *A = getAttrForPlatform(S.Context, D);
+  const AvailabilityAttr *A = getAttrForPlatform(S.Context, OffendingDecl);
   if (A && A->isInherited()) {
-    for (const Decl *Redecl = D->getMostRecentDecl(); Redecl;
+    for (const Decl *Redecl = OffendingDecl->getMostRecentDecl(); Redecl;
          Redecl = Redecl->getPreviousDecl()) {
       const AvailabilityAttr *AForRedecl = getAttrForPlatform(S.Context,
                                                               Redecl);
       if (AForRedecl && !AForRedecl->isInherited()) {
         // If D is a declaration with inherited attributes, the note should
         // point to the declaration with actual attributes.
-        S.Diag(Redecl->getLocation(), diag_available_here) << D
+        S.Diag(Redecl->getLocation(), diag_available_here) << OffendingDecl
             << available_here_select_kind;
         break;
       }
@@ -7047,10 +7182,19 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
   }
   else
     S.Diag(NoteLocation, diag_available_here)
-        << D << available_here_select_kind;
+        << OffendingDecl << available_here_select_kind;
 
   if (K == AR_NotYetIntroduced)
-    S.Diag(Loc, diag::note_partial_availability_silence) << D;
+    if (const auto *Enclosing = findEnclosingDeclToAnnotate(Ctx)) {
+      if (auto *TD = dyn_cast(Enclosing))
+        if (TD->getDeclName().isEmpty()) {
+          S.Diag(TD->getLocation(), diag::note_partial_availability_silence)
+              << /*Anonymous*/1 << TD->getKindName();
+          return;
+        }
+      S.Diag(Enclosing->getLocation(), diag::note_partial_availability_silence)
+          << /*Named*/0 << Enclosing;
+    }
 }
 
 static void handleDelayedAvailabilityCheck(Sema &S, DelayedDiagnostic &DD,
@@ -7060,9 +7204,9 @@ static void handleDelayedAvailabilityCheck(Sema &S, DelayedDiagnostic &DD,
 
   DD.Triggered = true;
   DoEmitAvailabilityWarning(
-      S, DD.getAvailabilityResult(), Ctx, DD.getAvailabilityDecl(),
-      DD.getAvailabilityMessage(), DD.Loc, DD.getUnknownObjCClass(),
-      DD.getObjCProperty(), false);
+      S, DD.getAvailabilityResult(), Ctx, DD.getAvailabilityReferringDecl(),
+      DD.getAvailabilityOffendingDecl(), DD.getAvailabilityMessage(), DD.Loc,
+      DD.getUnknownObjCClass(), DD.getObjCProperty(), false);
 }
 
 void Sema::PopParsingDeclaration(ParsingDeclState state, Decl *decl) {
@@ -7120,23 +7264,26 @@ void Sema::redelayDiagnostics(DelayedDiagnosticPool &pool) {
   curPool->steal(pool);
 }
 
-void Sema::EmitAvailabilityWarning(AvailabilityResult AR,
-                                   NamedDecl *D, StringRef Message,
-                                   SourceLocation Loc,
-                                   const ObjCInterfaceDecl *UnknownObjCClass,
-                                   const ObjCPropertyDecl  *ObjCProperty,
-                                   bool ObjCPropertyAccess) {
+static void EmitAvailabilityWarning(Sema &S, AvailabilityResult AR,
+                                    const NamedDecl *ReferringDecl,
+                                    const NamedDecl *OffendingDecl,
+                                    StringRef Message, SourceLocation Loc,
+                                    const ObjCInterfaceDecl *UnknownObjCClass,
+                                    const ObjCPropertyDecl *ObjCProperty,
+                                    bool ObjCPropertyAccess) {
   // Delay if we're currently parsing a declaration.
-  if (DelayedDiagnostics.shouldDelayDiagnostics()) {
-    DelayedDiagnostics.add(DelayedDiagnostic::makeAvailability(
-        AR, Loc, D, UnknownObjCClass, ObjCProperty, Message,
-        ObjCPropertyAccess));
+  if (S.DelayedDiagnostics.shouldDelayDiagnostics()) {
+    S.DelayedDiagnostics.add(
+        DelayedDiagnostic::makeAvailability(
+            AR, Loc, ReferringDecl, OffendingDecl, UnknownObjCClass,
+            ObjCProperty, Message, ObjCPropertyAccess));
     return;
   }
 
-  Decl *Ctx = cast(getCurLexicalContext());
-  DoEmitAvailabilityWarning(*this, AR, Ctx, D, Message, Loc, UnknownObjCClass,
-                            ObjCProperty, ObjCPropertyAccess);
+  Decl *Ctx = cast(S.getCurLexicalContext());
+  DoEmitAvailabilityWarning(S, AR, Ctx, ReferringDecl, OffendingDecl,
+                            Message, Loc, UnknownObjCClass, ObjCProperty,
+                            ObjCPropertyAccess);
 }
 
 namespace {
@@ -7230,6 +7377,13 @@ class DiagnoseUnguardedAvailability
         SemaRef.Context.getTargetInfo().getPlatformMinVersion());
   }
 
+  bool TraverseDecl(Decl *D) {
+    // Avoid visiting nested functions to prevent duplicate warnings.
+    if (!D || isa(D))
+      return true;
+    return Base::TraverseDecl(D);
+  }
+
   bool TraverseStmt(Stmt *S) {
     if (!S)
       return true;
@@ -7243,6 +7397,14 @@ class DiagnoseUnguardedAvailability
 
   bool TraverseIfStmt(IfStmt *If);
 
+  bool TraverseLambdaExpr(LambdaExpr *E) { return true; }
+
+  bool VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *PRE) {
+    if (PRE->isClassReceiver())
+      DiagnoseDeclAvailability(PRE->getClassReceiver(), PRE->getReceiverLocation());
+    return true;
+  }
+
   bool VisitObjCMessageExpr(ObjCMessageExpr *Msg) {
     if (ObjCMethodDecl *D = Msg->getMethodDecl())
       DiagnoseDeclAvailability(
@@ -7262,24 +7424,32 @@ class DiagnoseUnguardedAvailability
     return true;
   }
 
+  bool VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) {
+    SemaRef.Diag(E->getLocStart(), diag::warn_at_available_unchecked_use)
+        << (!SemaRef.getLangOpts().ObjC1);
+    return true;
+  }
+
   bool VisitTypeLoc(TypeLoc Ty);
 };
 
 void DiagnoseUnguardedAvailability::DiagnoseDeclAvailability(
     NamedDecl *D, SourceRange Range) {
-
-  VersionTuple ContextVersion = AvailabilityStack.back();
-  if (AvailabilityResult Result =
-          SemaRef.ShouldDiagnoseAvailabilityOfDecl(D, nullptr)) {
+  AvailabilityResult Result;
+  const NamedDecl *OffendingDecl;
+  std::tie(Result, OffendingDecl) =
+    ShouldDiagnoseAvailabilityOfDecl(D, nullptr);
+  if (Result != AR_Available) {
     // All other diagnostic kinds have already been handled in
     // DiagnoseAvailabilityOfDecl.
     if (Result != AR_NotYetIntroduced)
       return;
 
-    const AvailabilityAttr *AA = getAttrForPlatform(SemaRef.getASTContext(), D);
+    const AvailabilityAttr *AA =
+      getAttrForPlatform(SemaRef.getASTContext(), OffendingDecl);
     VersionTuple Introduced = AA->getIntroduced();
 
-    if (ContextVersion >= Introduced)
+    if (AvailabilityStack.back() >= Introduced)
       return;
 
     // If the context of this function is less available than D, we should not
@@ -7287,14 +7457,26 @@ void DiagnoseUnguardedAvailability::DiagnoseDeclAvailability(
     if (!ShouldDiagnoseAvailabilityInContext(SemaRef, Result, Introduced, Ctx))
       return;
 
-    SemaRef.Diag(Range.getBegin(), diag::warn_unguarded_availability)
+    // We would like to emit the diagnostic even if -Wunguarded-availability is
+    // not specified for deployment targets >= to iOS 11 or equivalent or
+    // for declarations that were introduced in iOS 11 (macOS 10.13, ...) or
+    // later.
+    unsigned DiagKind =
+        shouldDiagnoseAvailabilityByDefault(
+            SemaRef.Context,
+            SemaRef.Context.getTargetInfo().getPlatformMinVersion(), Introduced)
+            ? diag::warn_unguarded_availability_new
+            : diag::warn_unguarded_availability;
+
+    SemaRef.Diag(Range.getBegin(), DiagKind)
         << Range << D
         << AvailabilityAttr::getPrettyPlatformName(
                SemaRef.getASTContext().getTargetInfo().getPlatformName())
         << Introduced.getAsString();
 
-    SemaRef.Diag(D->getLocation(), diag::note_availability_specified_here)
-        << D << /* partial */ 3;
+    SemaRef.Diag(OffendingDecl->getLocation(),
+                 diag::note_availability_specified_here)
+        << OffendingDecl << /* partial */ 3;
 
     auto FixitDiag =
         SemaRef.Diag(Range.getBegin(), diag::note_unguarded_available_silence)
@@ -7372,6 +7554,9 @@ bool DiagnoseUnguardedAvailability::VisitTypeLoc(TypeLoc Ty) {
   const Type *TyPtr = Ty.getTypePtr();
   SourceRange Range{Ty.getBeginLoc(), Ty.getEndLoc()};
 
+  if (Range.isInvalid())
+    return true;
+
   if (const TagType *TT = dyn_cast(TyPtr)) {
     TagDecl *TD = TT->getDecl();
     DiagnoseDeclAvailability(TD, Range);
@@ -7431,3 +7616,44 @@ void Sema::DiagnoseUnguardedAvailabilityViolations(Decl *D) {
 
   DiagnoseUnguardedAvailability(*this, D).IssueDiagnostics(Body);
 }
+
+void Sema::DiagnoseAvailabilityOfDecl(NamedDecl *D, SourceLocation Loc,
+                                      const ObjCInterfaceDecl *UnknownObjCClass,
+                                      bool ObjCPropertyAccess,
+                                      bool AvoidPartialAvailabilityChecks) {
+  std::string Message;
+  AvailabilityResult Result;
+  const NamedDecl* OffendingDecl;
+  // See if this declaration is unavailable, deprecated, or partial.
+  std::tie(Result, OffendingDecl) = ShouldDiagnoseAvailabilityOfDecl(D, &Message);
+  if (Result == AR_Available)
+    return;
+
+  if (Result == AR_NotYetIntroduced) {
+    if (AvoidPartialAvailabilityChecks)
+      return;
+
+    // We need to know the @available context in the current function to
+    // diagnose this use, let DiagnoseUnguardedAvailabilityViolations do that
+    // when we're done parsing the current function.
+    if (getCurFunctionOrMethodDecl()) {
+      getEnclosingFunction()->HasPotentialAvailabilityViolations = true;
+      return;
+    } else if (getCurBlock() || getCurLambda()) {
+      getCurFunction()->HasPotentialAvailabilityViolations = true;
+      return;
+    }
+  }
+
+  const ObjCPropertyDecl *ObjCPDecl = nullptr;
+  if (const ObjCMethodDecl *MD = dyn_cast(D)) {
+    if (const ObjCPropertyDecl *PD = MD->findPropertyDecl()) {
+      AvailabilityResult PDeclResult = PD->getAvailability(nullptr);
+      if (PDeclResult == Result)
+        ObjCPDecl = PD;
+    }
+  }
+
+  EmitAvailabilityWarning(*this, Result, D, OffendingDecl, Message, Loc,
+                          UnknownObjCClass, ObjCPDecl, ObjCPropertyAccess);
+}
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclCXX.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclCXX.cpp
index 9c7c8290a5235..ef7ebaec8a5a9 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclCXX.cpp
@@ -550,17 +550,23 @@ bool Sema::MergeCXXFunctionDecl(FunctionDecl *New, FunctionDecl *Old,
         << OldParam->getDefaultArgRange();
 #endif
     } else if (OldParamHasDfl) {
-      // Merge the old default argument into the new parameter.
-      // It's important to use getInit() here;  getDefaultArg()
-      // strips off any top-level ExprWithCleanups.
-      NewParam->setHasInheritedDefaultArg();
-      if (OldParam->hasUnparsedDefaultArg())
-        NewParam->setUnparsedDefaultArg();
-      else if (OldParam->hasUninstantiatedDefaultArg())
-        NewParam->setUninstantiatedDefaultArg(
-                                      OldParam->getUninstantiatedDefaultArg());
-      else
-        NewParam->setDefaultArg(OldParam->getInit());
+      // Merge the old default argument into the new parameter unless the new
+      // function is a friend declaration in a template class. In the latter
+      // case the default arguments will be inherited when the friend
+      // declaration will be instantiated.
+      if (New->getFriendObjectKind() == Decl::FOK_None ||
+          !New->getLexicalDeclContext()->isDependentContext()) {
+        // It's important to use getInit() here;  getDefaultArg()
+        // strips off any top-level ExprWithCleanups.
+        NewParam->setHasInheritedDefaultArg();
+        if (OldParam->hasUnparsedDefaultArg())
+          NewParam->setUnparsedDefaultArg();
+        else if (OldParam->hasUninstantiatedDefaultArg())
+          NewParam->setUninstantiatedDefaultArg(
+                                       OldParam->getUninstantiatedDefaultArg());
+        else
+          NewParam->setDefaultArg(OldParam->getInit());
+      }
     } else if (NewParamHasDfl) {
       if (New->getDescribedFunctionTemplate()) {
         // Paragraph 4, quoted above, only applies to non-template functions.
@@ -641,7 +647,12 @@ bool Sema::MergeCXXFunctionDecl(FunctionDecl *New, FunctionDecl *Old,
     Diag(Old->getLocation(), diag::note_previous_declaration);
     Invalid = true;
   } else if (!Old->getMostRecentDecl()->isInlined() && New->isInlined() &&
-             Old->isDefined(Def)) {
+             Old->isDefined(Def) &&
+             // If a friend function is inlined but does not have 'inline'
+             // specifier, it is a definition. Do not report attribute conflict
+             // in this case, redefinition will be diagnosed later.
+             (New->isInlineSpecified() ||
+              New->getFriendObjectKind() == Decl::FOK_None)) {
     // C++11 [dcl.fcn.spec]p4:
     //   If the definition of a function appears in a translation unit before its
     //   first declaration as inline, the program is ill-formed.
@@ -3770,6 +3781,15 @@ Sema::BuildMemInitializer(Decl *ConstructorD,
           if (BaseType.isNull())
             return true;
 
+          TInfo = Context.CreateTypeSourceInfo(BaseType);
+          DependentNameTypeLoc TL =
+              TInfo->getTypeLoc().castAs();
+          if (!TL.isNull()) {
+            TL.setNameLoc(IdLoc);
+            TL.setElaboratedKeywordLoc(SourceLocation());
+            TL.setQualifierLoc(SS.getWithLocInContext(Context));
+          }
+
           R.clear();
           R.setLookupName(MemberOrBase);
         }
@@ -5709,6 +5729,53 @@ static void DefineImplicitSpecialMember(Sema &S, CXXMethodDecl *MD,
   }
 }
 
+/// Determine whether a type is permitted to be passed or returned in
+/// registers, per C++ [class.temporary]p3.
+static bool computeCanPassInRegisters(Sema &S, CXXRecordDecl *D) {
+  if (D->isDependentType() || D->isInvalidDecl())
+    return false;
+
+  // Per C++ [class.temporary]p3, the relevant condition is:
+  //   each copy constructor, move constructor, and destructor of X is
+  //   either trivial or deleted, and X has at least one non-deleted copy
+  //   or move constructor
+  bool HasNonDeletedCopyOrMove = false;
+
+  if (D->needsImplicitCopyConstructor() &&
+      !D->defaultedCopyConstructorIsDeleted()) {
+    if (!D->hasTrivialCopyConstructor())
+      return false;
+    HasNonDeletedCopyOrMove = true; 
+  }
+
+  if (S.getLangOpts().CPlusPlus11 && D->needsImplicitMoveConstructor() &&
+      !D->defaultedMoveConstructorIsDeleted()) {
+    if (!D->hasTrivialMoveConstructor())
+      return false;
+    HasNonDeletedCopyOrMove = true;
+  }
+
+  if (D->needsImplicitDestructor() && !D->defaultedDestructorIsDeleted() &&
+      !D->hasTrivialDestructor())
+    return false;
+
+  for (const CXXMethodDecl *MD : D->methods()) {
+    if (MD->isDeleted())
+      continue;
+
+    auto *CD = dyn_cast(MD);
+    if (CD && CD->isCopyOrMoveConstructor())
+      HasNonDeletedCopyOrMove = true;
+    else if (!isa(MD))
+      continue;
+
+    if (!MD->isTrivial())
+      return false;
+  }
+
+  return HasNonDeletedCopyOrMove;
+}
+
 /// \brief Perform semantic checks on a class definition that has been
 /// completing, introducing implicitly-declared members, checking for
 /// abstract types, etc.
@@ -5853,6 +5920,8 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
   }
 
   checkClassLevelDLLAttribute(Record);
+
+  Record->setCanPassInRegisters(computeCanPassInRegisters(*this, Record));
 }
 
 /// Look up the special member function that would be called by a special
@@ -7479,8 +7548,7 @@ void Sema::ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc,
               reinterpret_cast(FieldCollector->getCurFields()),
               FieldCollector->getCurNumFields()), LBrac, RBrac, AttrList);
 
-  CheckCompletedCXXClass(
-                        dyn_cast_or_null(TagDecl));
+  CheckCompletedCXXClass(dyn_cast_or_null(TagDecl));
 }
 
 /// AddImplicitlyDeclaredMembersToClass - Adds any implicitly-declared
@@ -10351,32 +10419,33 @@ void Sema::DefineImplicitDefaultConstructor(SourceLocation CurrentLocation,
           !Constructor->doesThisDeclarationHaveABody() &&
           !Constructor->isDeleted()) &&
     "DefineImplicitDefaultConstructor - call it for implicit default ctor");
+  if (Constructor->willHaveBody() || Constructor->isInvalidDecl())
+    return;
 
   CXXRecordDecl *ClassDecl = Constructor->getParent();
   assert(ClassDecl && "DefineImplicitDefaultConstructor - invalid constructor");
 
   SynthesizedFunctionScope Scope(*this, Constructor);
-  DiagnosticErrorTrap Trap(Diags);
-  if (SetCtorInitializers(Constructor, /*AnyErrors=*/false) ||
-      Trap.hasErrorOccurred()) {
-    Diag(CurrentLocation, diag::note_member_synthesized_at) 
-      << CXXDefaultConstructor << Context.getTagDeclType(ClassDecl);
-    Constructor->setInvalidDecl();
-    return;
-  }
 
   // The exception specification is needed because we are defining the
   // function.
   ResolveExceptionSpec(CurrentLocation,
                        Constructor->getType()->castAs());
+  MarkVTableUsed(CurrentLocation, ClassDecl);
+
+  // Add a context note for diagnostics produced after this point.
+  Scope.addContextNote(CurrentLocation);
+
+  if (SetCtorInitializers(Constructor, /*AnyErrors=*/false)) {
+    Constructor->setInvalidDecl();
+    return;
+  }
 
   SourceLocation Loc = Constructor->getLocEnd().isValid()
                            ? Constructor->getLocEnd()
                            : Constructor->getLocation();
   Constructor->setBody(new (Context) CompoundStmt(Loc));
-
   Constructor->markUsed(Context);
-  MarkVTableUsed(CurrentLocation, ClassDecl);
 
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(Constructor);
@@ -10486,9 +10555,22 @@ void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
   assert(Constructor->getInheritedConstructor() &&
          !Constructor->doesThisDeclarationHaveABody() &&
          !Constructor->isDeleted());
-  if (Constructor->isInvalidDecl())
+  if (Constructor->willHaveBody() || Constructor->isInvalidDecl())
     return;
 
+  // Initializations are performed "as if by a defaulted default constructor",
+  // so enter the appropriate scope.
+  SynthesizedFunctionScope Scope(*this, Constructor);
+
+  // The exception specification is needed because we are defining the
+  // function.
+  ResolveExceptionSpec(CurrentLocation,
+                       Constructor->getType()->castAs());
+  MarkVTableUsed(CurrentLocation, ClassDecl);
+
+  // Add a context note for diagnostics produced after this point.
+  Scope.addContextNote(CurrentLocation);
+
   ConstructorUsingShadowDecl *Shadow =
       Constructor->getInheritedConstructor().getShadowDecl();
   CXXConstructorDecl *InheritedCtor =
@@ -10503,11 +10585,6 @@ void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
   CXXRecordDecl *RD = Shadow->getParent();
   SourceLocation InitLoc = Shadow->getLocation();
 
-  // Initializations are performed "as if by a defaulted default constructor",
-  // so enter the appropriate scope.
-  SynthesizedFunctionScope Scope(*this, Constructor);
-  DiagnosticErrorTrap Trap(Diags);
-
   // Build explicit initializers for all base classes from which the
   // constructor was inherited.
   SmallVector Inits;
@@ -10538,22 +10615,13 @@ void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
   // We now proceed as if for a defaulted default constructor, with the relevant
   // initializers replaced.
 
-  bool HadError = SetCtorInitializers(Constructor, /*AnyErrors*/false, Inits);
-  if (HadError || Trap.hasErrorOccurred()) {
-    Diag(CurrentLocation, diag::note_inhctor_synthesized_at) << RD;
+  if (SetCtorInitializers(Constructor, /*AnyErrors*/false, Inits)) {
     Constructor->setInvalidDecl();
     return;
   }
 
-  // The exception specification is needed because we are defining the
-  // function.
-  ResolveExceptionSpec(CurrentLocation,
-                       Constructor->getType()->castAs());
-
   Constructor->setBody(new (Context) CompoundStmt(InitLoc));
-
   Constructor->markUsed(Context);
-  MarkVTableUsed(CurrentLocation, ClassDecl);
 
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(Constructor);
@@ -10629,37 +10697,36 @@ void Sema::DefineImplicitDestructor(SourceLocation CurrentLocation,
           !Destructor->doesThisDeclarationHaveABody() &&
           !Destructor->isDeleted()) &&
          "DefineImplicitDestructor - call it for implicit default dtor");
+  if (Destructor->willHaveBody() || Destructor->isInvalidDecl())
+    return;
+
   CXXRecordDecl *ClassDecl = Destructor->getParent();
   assert(ClassDecl && "DefineImplicitDestructor - invalid destructor");
 
-  if (Destructor->isInvalidDecl())
-    return;
-
   SynthesizedFunctionScope Scope(*this, Destructor);
 
-  DiagnosticErrorTrap Trap(Diags);
+  // The exception specification is needed because we are defining the
+  // function.
+  ResolveExceptionSpec(CurrentLocation,
+                       Destructor->getType()->castAs());
+  MarkVTableUsed(CurrentLocation, ClassDecl);
+
+  // Add a context note for diagnostics produced after this point.
+  Scope.addContextNote(CurrentLocation);
+
   MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(),
                                          Destructor->getParent());
 
-  if (CheckDestructor(Destructor) || Trap.hasErrorOccurred()) {
-    Diag(CurrentLocation, diag::note_member_synthesized_at) 
-      << CXXDestructor << Context.getTagDeclType(ClassDecl);
-
+  if (CheckDestructor(Destructor)) {
     Destructor->setInvalidDecl();
     return;
   }
 
-  // The exception specification is needed because we are defining the
-  // function.
-  ResolveExceptionSpec(CurrentLocation,
-                       Destructor->getType()->castAs());
-
   SourceLocation Loc = Destructor->getLocEnd().isValid()
                            ? Destructor->getLocEnd()
                            : Destructor->getLocation();
   Destructor->setBody(new (Context) CompoundStmt(Loc));
   Destructor->markUsed(Context);
-  MarkVTableUsed(CurrentLocation, ClassDecl);
 
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(Destructor);
@@ -11227,8 +11294,7 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) {
 /// Diagnose an implicit copy operation for a class which is odr-used, but
 /// which is deprecated because the class has a user-declared copy constructor,
 /// copy assignment operator, or destructor.
-static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp,
-                                            SourceLocation UseLoc) {
+static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) {
   assert(CopyOp->isImplicit());
 
   CXXRecordDecl *RD = CopyOp->getParent();
@@ -11267,10 +11333,6 @@ static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp,
          diag::warn_deprecated_copy_operation)
       << RD << /*copy assignment*/!isa(CopyOp)
       << /*destructor*/isa(UserDeclaredOperation);
-    S.Diag(UseLoc, diag::note_member_synthesized_at)
-      << (isa(CopyOp) ? Sema::CXXCopyConstructor
-                                          : Sema::CXXCopyAssignment)
-      << RD;
   }
 }
 
@@ -11282,25 +11344,31 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
           !CopyAssignOperator->doesThisDeclarationHaveABody() &&
           !CopyAssignOperator->isDeleted()) &&
          "DefineImplicitCopyAssignment called for wrong function");
+  if (CopyAssignOperator->willHaveBody() || CopyAssignOperator->isInvalidDecl())
+    return;
 
   CXXRecordDecl *ClassDecl = CopyAssignOperator->getParent();
-
-  if (ClassDecl->isInvalidDecl() || CopyAssignOperator->isInvalidDecl()) {
+  if (ClassDecl->isInvalidDecl()) {
     CopyAssignOperator->setInvalidDecl();
     return;
   }
 
+  SynthesizedFunctionScope Scope(*this, CopyAssignOperator);
+
+  // The exception specification is needed because we are defining the
+  // function.
+  ResolveExceptionSpec(CurrentLocation,
+                       CopyAssignOperator->getType()->castAs());
+
+  // Add a context note for diagnostics produced after this point.
+  Scope.addContextNote(CurrentLocation);
+
   // C++11 [class.copy]p18:
   //   The [definition of an implicitly declared copy assignment operator] is
   //   deprecated if the class has a user-declared copy constructor or a
   //   user-declared destructor.
   if (getLangOpts().CPlusPlus11 && CopyAssignOperator->isImplicit())
-    diagnoseDeprecatedCopyOperation(*this, CopyAssignOperator, CurrentLocation);
-
-  CopyAssignOperator->markUsed(Context);
-
-  SynthesizedFunctionScope Scope(*this, CopyAssignOperator);
-  DiagnosticErrorTrap Trap(Diags);
+    diagnoseDeprecatedCopyOperation(*this, CopyAssignOperator);
 
   // C++0x [class.copy]p30:
   //   The implicitly-defined or explicitly-defaulted copy assignment operator
@@ -11366,8 +11434,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
                                             /*CopyingBaseSubobject=*/true,
                                             /*Copying=*/true);
     if (Copy.isInvalid()) {
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXCopyAssignment << Context.getTagDeclType(ClassDecl);
       CopyAssignOperator->setInvalidDecl();
       return;
     }
@@ -11393,8 +11459,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
       Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
         << Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName();
       Diag(Field->getLocation(), diag::note_declared_at);
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXCopyAssignment << Context.getTagDeclType(ClassDecl);
       Invalid = true;
       continue;
     }
@@ -11405,8 +11469,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
       Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
         << Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName();
       Diag(Field->getLocation(), diag::note_declared_at);
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXCopyAssignment << Context.getTagDeclType(ClassDecl);
       Invalid = true;      
       continue;
     }
@@ -11439,8 +11501,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
                                             /*CopyingBaseSubobject=*/false,
                                             /*Copying=*/true);
     if (Copy.isInvalid()) {
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXCopyAssignment << Context.getTagDeclType(ClassDecl);
       CopyAssignOperator->setInvalidDecl();
       return;
     }
@@ -11456,22 +11516,10 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
     StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
     if (Return.isInvalid())
       Invalid = true;
-    else {
+    else
       Statements.push_back(Return.getAs());
-
-      if (Trap.hasErrorOccurred()) {
-        Diag(CurrentLocation, diag::note_member_synthesized_at) 
-          << CXXCopyAssignment << Context.getTagDeclType(ClassDecl);
-        Invalid = true;
-      }
-    }
   }
 
-  // The exception specification is needed because we are defining the
-  // function.
-  ResolveExceptionSpec(CurrentLocation,
-                       CopyAssignOperator->getType()->castAs());
-
   if (Invalid) {
     CopyAssignOperator->setInvalidDecl();
     return;
@@ -11485,6 +11533,7 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
     assert(!Body.isInvalid() && "Compound statement creation cannot fail");
   }
   CopyAssignOperator->setBody(Body.getAs());
+  CopyAssignOperator->markUsed(Context);
 
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(CopyAssignOperator);
@@ -11657,19 +11706,15 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
           !MoveAssignOperator->doesThisDeclarationHaveABody() &&
           !MoveAssignOperator->isDeleted()) &&
          "DefineImplicitMoveAssignment called for wrong function");
+  if (MoveAssignOperator->willHaveBody() || MoveAssignOperator->isInvalidDecl())
+    return;
 
   CXXRecordDecl *ClassDecl = MoveAssignOperator->getParent();
-
-  if (ClassDecl->isInvalidDecl() || MoveAssignOperator->isInvalidDecl()) {
+  if (ClassDecl->isInvalidDecl()) {
     MoveAssignOperator->setInvalidDecl();
     return;
   }
   
-  MoveAssignOperator->markUsed(Context);
-
-  SynthesizedFunctionScope Scope(*this, MoveAssignOperator);
-  DiagnosticErrorTrap Trap(Diags);
-
   // C++0x [class.copy]p28:
   //   The implicitly-defined or move assignment operator for a non-union class
   //   X performs memberwise move assignment of its subobjects. The direct base
@@ -11682,6 +11727,16 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
   // from a virtual base more than once.
   checkMoveAssignmentForRepeatedMove(*this, ClassDecl, CurrentLocation);
 
+  SynthesizedFunctionScope Scope(*this, MoveAssignOperator);
+
+  // The exception specification is needed because we are defining the
+  // function.
+  ResolveExceptionSpec(CurrentLocation,
+                       MoveAssignOperator->getType()->castAs());
+
+  // Add a context note for diagnostics produced after this point.
+  Scope.addContextNote(CurrentLocation);
+
   // The statements that form the synthesized function body.
   SmallVector Statements;
 
@@ -11746,8 +11801,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
                                             /*CopyingBaseSubobject=*/true,
                                             /*Copying=*/false);
     if (Move.isInvalid()) {
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXMoveAssignment << Context.getTagDeclType(ClassDecl);
       MoveAssignOperator->setInvalidDecl();
       return;
     }
@@ -11773,8 +11826,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
       Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
         << Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName();
       Diag(Field->getLocation(), diag::note_declared_at);
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXMoveAssignment << Context.getTagDeclType(ClassDecl);
       Invalid = true;
       continue;
     }
@@ -11785,8 +11836,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
       Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
         << Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName();
       Diag(Field->getLocation(), diag::note_declared_at);
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXMoveAssignment << Context.getTagDeclType(ClassDecl);
       Invalid = true;      
       continue;
     }
@@ -11822,8 +11871,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
                                             /*CopyingBaseSubobject=*/false,
                                             /*Copying=*/false);
     if (Move.isInvalid()) {
-      Diag(CurrentLocation, diag::note_member_synthesized_at) 
-        << CXXMoveAssignment << Context.getTagDeclType(ClassDecl);
       MoveAssignOperator->setInvalidDecl();
       return;
     }
@@ -11840,22 +11887,10 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
     StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
     if (Return.isInvalid())
       Invalid = true;
-    else {
+    else
       Statements.push_back(Return.getAs());
-
-      if (Trap.hasErrorOccurred()) {
-        Diag(CurrentLocation, diag::note_member_synthesized_at) 
-          << CXXMoveAssignment << Context.getTagDeclType(ClassDecl);
-        Invalid = true;
-      }
-    }
   }
 
-  // The exception specification is needed because we are defining the
-  // function.
-  ResolveExceptionSpec(CurrentLocation,
-                       MoveAssignOperator->getType()->castAs());
-
   if (Invalid) {
     MoveAssignOperator->setInvalidDecl();
     return;
@@ -11869,6 +11904,7 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
     assert(!Body.isInvalid() && "Compound statement creation cannot fail");
   }
   MoveAssignOperator->setBody(Body.getAs());
+  MoveAssignOperator->markUsed(Context);
 
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(MoveAssignOperator);
@@ -11944,8 +11980,10 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor(
   Scope *S = getScopeForContext(ClassDecl);
   CheckImplicitSpecialMemberDeclaration(S, CopyConstructor);
 
-  if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor))
+  if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor)) {
+    ClassDecl->setImplicitCopyConstructorIsDeleted();
     SetDeclDeleted(CopyConstructor, ClassLoc);
+  }
 
   if (S)
     PushOnScopeChains(CopyConstructor, S, false);
@@ -11955,30 +11993,37 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor(
 }
 
 void Sema::DefineImplicitCopyConstructor(SourceLocation CurrentLocation,
-                                   CXXConstructorDecl *CopyConstructor) {
+                                         CXXConstructorDecl *CopyConstructor) {
   assert((CopyConstructor->isDefaulted() &&
           CopyConstructor->isCopyConstructor() &&
           !CopyConstructor->doesThisDeclarationHaveABody() &&
           !CopyConstructor->isDeleted()) &&
          "DefineImplicitCopyConstructor - call it for implicit copy ctor");
+  if (CopyConstructor->willHaveBody() || CopyConstructor->isInvalidDecl())
+    return;
 
   CXXRecordDecl *ClassDecl = CopyConstructor->getParent();
   assert(ClassDecl && "DefineImplicitCopyConstructor - invalid constructor");
 
+  SynthesizedFunctionScope Scope(*this, CopyConstructor);
+
+  // The exception specification is needed because we are defining the
+  // function.
+  ResolveExceptionSpec(CurrentLocation,
+                       CopyConstructor->getType()->castAs());
+  MarkVTableUsed(CurrentLocation, ClassDecl);
+
+  // Add a context note for diagnostics produced after this point.
+  Scope.addContextNote(CurrentLocation);
+
   // C++11 [class.copy]p7:
   //   The [definition of an implicitly declared copy constructor] is
   //   deprecated if the class has a user-declared copy assignment operator
   //   or a user-declared destructor.
   if (getLangOpts().CPlusPlus11 && CopyConstructor->isImplicit())
-    diagnoseDeprecatedCopyOperation(*this, CopyConstructor, CurrentLocation);
+    diagnoseDeprecatedCopyOperation(*this, CopyConstructor);
 
-  SynthesizedFunctionScope Scope(*this, CopyConstructor);
-  DiagnosticErrorTrap Trap(Diags);
-
-  if (SetCtorInitializers(CopyConstructor, /*AnyErrors=*/false) ||
-      Trap.hasErrorOccurred()) {
-    Diag(CurrentLocation, diag::note_member_synthesized_at) 
-      << CXXCopyConstructor << Context.getTagDeclType(ClassDecl);
+  if (SetCtorInitializers(CopyConstructor, /*AnyErrors=*/false)) {
     CopyConstructor->setInvalidDecl();
   }  else {
     SourceLocation Loc = CopyConstructor->getLocEnd().isValid()
@@ -11987,16 +12032,9 @@ void Sema::DefineImplicitCopyConstructor(SourceLocation CurrentLocation,
     Sema::CompoundScopeRAII CompoundScope(*this);
     CopyConstructor->setBody(
         ActOnCompoundStmt(Loc, Loc, None, /*isStmtExpr=*/false).getAs());
+    CopyConstructor->markUsed(Context);
   }
 
-  // The exception specification is needed because we are defining the
-  // function.
-  ResolveExceptionSpec(CurrentLocation,
-                       CopyConstructor->getType()->castAs());
-
-  CopyConstructor->markUsed(Context);
-  MarkVTableUsed(CurrentLocation, ClassDecl);
-
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(CopyConstructor);
   }
@@ -12078,41 +12116,41 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor(
 }
 
 void Sema::DefineImplicitMoveConstructor(SourceLocation CurrentLocation,
-                                   CXXConstructorDecl *MoveConstructor) {
+                                         CXXConstructorDecl *MoveConstructor) {
   assert((MoveConstructor->isDefaulted() &&
           MoveConstructor->isMoveConstructor() &&
           !MoveConstructor->doesThisDeclarationHaveABody() &&
           !MoveConstructor->isDeleted()) &&
          "DefineImplicitMoveConstructor - call it for implicit move ctor");
+  if (MoveConstructor->willHaveBody() || MoveConstructor->isInvalidDecl())
+    return;
 
   CXXRecordDecl *ClassDecl = MoveConstructor->getParent();
   assert(ClassDecl && "DefineImplicitMoveConstructor - invalid constructor");
 
   SynthesizedFunctionScope Scope(*this, MoveConstructor);
-  DiagnosticErrorTrap Trap(Diags);
 
-  if (SetCtorInitializers(MoveConstructor, /*AnyErrors=*/false) ||
-      Trap.hasErrorOccurred()) {
-    Diag(CurrentLocation, diag::note_member_synthesized_at) 
-      << CXXMoveConstructor << Context.getTagDeclType(ClassDecl);
+  // The exception specification is needed because we are defining the
+  // function.
+  ResolveExceptionSpec(CurrentLocation,
+                       MoveConstructor->getType()->castAs());
+  MarkVTableUsed(CurrentLocation, ClassDecl);
+
+  // Add a context note for diagnostics produced after this point.
+  Scope.addContextNote(CurrentLocation);
+
+  if (SetCtorInitializers(MoveConstructor, /*AnyErrors=*/false)) {
     MoveConstructor->setInvalidDecl();
-  }  else {
+  } else {
     SourceLocation Loc = MoveConstructor->getLocEnd().isValid()
                              ? MoveConstructor->getLocEnd()
                              : MoveConstructor->getLocation();
     Sema::CompoundScopeRAII CompoundScope(*this);
     MoveConstructor->setBody(ActOnCompoundStmt(
         Loc, Loc, None, /*isStmtExpr=*/ false).getAs());
+    MoveConstructor->markUsed(Context);
   }
 
-  // The exception specification is needed because we are defining the
-  // function.
-  ResolveExceptionSpec(CurrentLocation,
-                       MoveConstructor->getType()->castAs());
-
-  MoveConstructor->markUsed(Context);
-  MarkVTableUsed(CurrentLocation, ClassDecl);
-
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(MoveConstructor);
   }
@@ -12125,6 +12163,8 @@ bool Sema::isImplicitlyDeleted(FunctionDecl *FD) {
 void Sema::DefineImplicitLambdaToFunctionPointerConversion(
                             SourceLocation CurrentLocation,
                             CXXConversionDecl *Conv) {
+  SynthesizedFunctionScope Scope(*this, Conv);
+   
   CXXRecordDecl *Lambda = Conv->getParent();
   CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
   // If we are defining a specialization of a conversion to function-ptr
@@ -12147,6 +12187,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion(
           "Conversion operator must have a corresponding call operator");
     CallOp = cast(CallOpSpec);
   }
+
   // Mark the call operator referenced (and add to pending instantiations
   // if necessary).
   // For both the conversion and static-invoker template specializations
@@ -12154,9 +12195,6 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion(
   // to the PendingInstantiations.
   MarkFunctionReferenced(CurrentLocation, CallOp);
 
-  SynthesizedFunctionScope Scope(*this, Conv);
-  DiagnosticErrorTrap Trap(Diags);
-   
   // Retrieve the static invoker...
   CXXMethodDecl *Invoker = Lambda->getLambdaStaticInvoker();
   // ... and get the corresponding specialization for a generic lambda.
@@ -12194,7 +12232,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion(
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(Conv);
     L->CompletedImplicitDefinition(Invoker);
-   }
+  }
 }
 
 
@@ -12205,10 +12243,7 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion(
 {
   assert(!Conv->getParent()->isGenericLambda());
 
-  Conv->markUsed(Context);
-  
   SynthesizedFunctionScope Scope(*this, Conv);
-  DiagnosticErrorTrap Trap(Diags);
   
   // Copy-initialize the lambda object as needed to capture it.
   Expr *This = ActOnCXXThis(CurrentLocation).get();
@@ -12247,6 +12282,7 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion(
   Conv->setBody(new (Context) CompoundStmt(Context, ReturnS,
                                            Conv->getLocation(),
                                            Conv->getLocation()));
+  Conv->markUsed(Context);
   
   // We're done; notify the mutation listener, if any.
   if (ASTMutationListener *L = getASTMutationListener()) {
@@ -13269,6 +13305,14 @@ Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc,
     }
   }
 
+  ExprResult FullAssertExpr = ActOnFinishFullExpr(AssertExpr, StaticAssertLoc,
+                                                  /*DiscardedValue*/false,
+                                                  /*IsConstexpr*/true);
+  if (FullAssertExpr.isInvalid())
+    Failed = true;
+  else
+    AssertExpr = FullAssertExpr.get();
+
   Decl *Decl = StaticAssertDecl::Create(Context, CurContext, StaticAssertLoc,
                                         AssertExpr, AssertMessage, RParenLoc,
                                         Failed);
@@ -13412,7 +13456,8 @@ Decl *Sema::ActOnTemplatedFriendTag(Scope *S, SourceLocation FriendLoc,
                       /*ScopedEnumKWLoc=*/SourceLocation(),
                       /*ScopedEnumUsesClassTag=*/false,
                       /*UnderlyingType=*/TypeResult(),
-                      /*IsTypeSpecifier=*/false);
+                      /*IsTypeSpecifier=*/false,
+                      /*IsTemplateParamOrArg=*/false);
     }
 
     NestedNameSpecifierLoc QualifierLoc = SS.getWithLocInContext(Context);
@@ -13896,6 +13941,9 @@ void Sema::SetDeclDeleted(Decl *Dcl, SourceLocation DelLoc) {
     return;
   }
 
+  // Deleted function does not have a body.
+  Fn->setWillHaveBody(false);
+
   if (const FunctionDecl *Prev = Fn->getPreviousDecl()) {
     // Don't consider the implicit declaration we generate for explicit
     // specializations. FIXME: Do not generate these implicit declarations.
@@ -13974,6 +14022,11 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) {
     MD->setDefaulted();
     MD->setExplicitlyDefaulted();
 
+    // Unset that we will have a body for this function. We might not,
+    // if it turns out to be trivial, and we don't need this marking now
+    // that we've marked it as defaulted.
+    MD->setWillHaveBody(false);
+
     // If this definition appears within the record, do the checking when
     // the record is complete.
     const FunctionDecl *Primary = MD;
@@ -14671,6 +14724,7 @@ bool Sema::checkThisInStaticMemberFunctionExceptionSpec(CXXMethodDecl *Method) {
   case EST_ComputedNoexcept:
     if (!Finder.TraverseStmt(Proto->getNoexceptExpr()))
       return true;
+    LLVM_FALLTHROUGH;
     
   case EST_Dynamic:
     for (const auto &E : Proto->exceptions()) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclObjC.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclObjC.cpp
index 370461c4a24e4..967573011d0dc 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaDeclObjC.cpp
@@ -248,19 +248,41 @@ bool Sema::CheckARCMethodDecl(ObjCMethodDecl *method) {
   return false;
 }
 
-static void DiagnoseObjCImplementedDeprecations(Sema &S,
-                                                NamedDecl *ND,
-                                                SourceLocation ImplLoc,
-                                                int select) {
-  if (ND && ND->isDeprecated()) {
-    S.Diag(ImplLoc, diag::warn_deprecated_def) << select;
-    if (select == 0)
+static void DiagnoseObjCImplementedDeprecations(Sema &S, const NamedDecl *ND,
+                                                SourceLocation ImplLoc) {
+  if (!ND)
+    return;
+  bool IsCategory = false;
+  AvailabilityResult Availability = ND->getAvailability();
+  if (Availability != AR_Deprecated) {
+    if (isa(ND)) {
+      if (Availability != AR_Unavailable)
+        return;
+      // Warn about implementing unavailable methods.
+      S.Diag(ImplLoc, diag::warn_unavailable_def);
       S.Diag(ND->getLocation(), diag::note_method_declared_at)
-        << ND->getDeclName();
-    else
-      S.Diag(ND->getLocation(), diag::note_previous_decl)
-          << (isa(ND) ? "category" : "class");
+          << ND->getDeclName();
+      return;
+    }
+    if (const auto *CD = dyn_cast(ND)) {
+      if (!CD->getClassInterface()->isDeprecated())
+        return;
+      ND = CD->getClassInterface();
+      IsCategory = true;
+    } else
+      return;
   }
+  S.Diag(ImplLoc, diag::warn_deprecated_def)
+      << (isa(ND)
+              ? /*Method*/ 0
+              : isa(ND) || IsCategory ? /*Category*/ 2
+                                                        : /*Class*/ 1);
+  if (isa(ND))
+    S.Diag(ND->getLocation(), diag::note_method_declared_at)
+        << ND->getDeclName();
+  else
+    S.Diag(ND->getLocation(), diag::note_previous_decl)
+        << (isa(ND) ? "category" : "class");
 }
 
 /// AddAnyMethodToGlobalPool - Add any method, instance or factory to global
@@ -385,9 +407,7 @@ void Sema::ActOnStartOfObjCMethodDef(Scope *FnBodyScope, Decl *D) {
       // No need to issue deprecated warning if deprecated mehod in class/category
       // is being implemented in its own implementation (no overriding is involved).
       if (!ImplDeclOfMethodDecl || ImplDeclOfMethodDecl != ImplDeclOfMethodDef)
-        DiagnoseObjCImplementedDeprecations(*this, 
-                                          dyn_cast(IMD), 
-                                          MDecl->getLocation(), 0);
+        DiagnoseObjCImplementedDeprecations(*this, IMD, MDecl->getLocation());
     }
 
     if (MDecl->getMethodFamily() == OMF_init) {
@@ -458,7 +478,10 @@ static void diagnoseUseOfProtocols(Sema &TheSema,
   // Diagnose availability in the context of the ObjC container.
   Sema::ContextRAII SavedContext(TheSema, CD);
   for (unsigned i = 0; i < NumProtoRefs; ++i) {
-    (void)TheSema.DiagnoseUseOfDecl(ProtoRefs[i], ProtoLocs[i]);
+    (void)TheSema.DiagnoseUseOfDecl(ProtoRefs[i], ProtoLocs[i],
+                                    /*UnknownObjCClass=*/nullptr,
+                                    /*ObjCPropertyAccess=*/false,
+                                    /*AvoidPartialAvailabilityChecks=*/true);
   }
 }
 
@@ -1851,10 +1874,6 @@ Decl *Sema::ActOnStartCategoryImplementation(
   // FIXME: PushOnScopeChains?
   CurContext->addDecl(CDecl);
 
-  // If the interface is deprecated/unavailable, warn/error about it.
-  if (IDecl)
-    DiagnoseUseOfDecl(IDecl, ClassLoc);
-
   // If the interface has the objc_runtime_visible attribute, we
   // cannot implement a category for it.
   if (IDecl && IDecl->hasAttr()) {
@@ -1874,10 +1893,8 @@ Decl *Sema::ActOnStartCategoryImplementation(
       CatIDecl->setImplementation(CDecl);
       // Warn on implementating category of deprecated class under 
       // -Wdeprecated-implementations flag.
-      DiagnoseObjCImplementedDeprecations(
-          *this,
-          CatIDecl->isDeprecated() ? CatIDecl : dyn_cast(IDecl),
-          CDecl->getLocation(), 2);
+      DiagnoseObjCImplementedDeprecations(*this, CatIDecl,
+                                          CDecl->getLocation());
     }
   }
 
@@ -1997,9 +2014,7 @@ Decl *Sema::ActOnStartClassImplementation(
     PushOnScopeChains(IMPDecl, TUScope);
     // Warn on implementating deprecated class under 
     // -Wdeprecated-implementations flag.
-    DiagnoseObjCImplementedDeprecations(*this, 
-                                        dyn_cast(IDecl), 
-                                        IMPDecl->getLocation(), 1);
+    DiagnoseObjCImplementedDeprecations(*this, IDecl, IMPDecl->getLocation());
   }
 
   // If the superclass has the objc_runtime_visible attribute, we
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaExpr.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaExpr.cpp
index 57245356a0ea7..083cf538614f2 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaExpr.cpp
@@ -87,114 +87,6 @@ static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) {
   }
 }
 
-static bool HasRedeclarationWithoutAvailabilityInCategory(const Decl *D) {
-  const auto *OMD = dyn_cast(D);
-  if (!OMD)
-    return false;
-  const ObjCInterfaceDecl *OID = OMD->getClassInterface();
-  if (!OID)
-    return false;
-
-  for (const ObjCCategoryDecl *Cat : OID->visible_categories())
-    if (ObjCMethodDecl *CatMeth =
-            Cat->getMethod(OMD->getSelector(), OMD->isInstanceMethod()))
-      if (!CatMeth->hasAttr())
-        return true;
-  return false;
-}
-
-AvailabilityResult
-Sema::ShouldDiagnoseAvailabilityOfDecl(NamedDecl *&D, std::string *Message) {
-  AvailabilityResult Result = D->getAvailability(Message);
-
-  // For typedefs, if the typedef declaration appears available look
-  // to the underlying type to see if it is more restrictive.
-  while (const TypedefNameDecl *TD = dyn_cast(D)) {
-    if (Result == AR_Available) {
-      if (const TagType *TT = TD->getUnderlyingType()->getAs()) {
-        D = TT->getDecl();
-        Result = D->getAvailability(Message);
-        continue;
-      }
-    }
-    break;
-  }
-
-  // Forward class declarations get their attributes from their definition.
-  if (ObjCInterfaceDecl *IDecl = dyn_cast(D)) {
-    if (IDecl->getDefinition()) {
-      D = IDecl->getDefinition();
-      Result = D->getAvailability(Message);
-    }
-  }
-
-  if (const EnumConstantDecl *ECD = dyn_cast(D))
-    if (Result == AR_Available) {
-      const DeclContext *DC = ECD->getDeclContext();
-      if (const EnumDecl *TheEnumDecl = dyn_cast(DC))
-        Result = TheEnumDecl->getAvailability(Message);
-    }
-
-  if (Result == AR_NotYetIntroduced) {
-    // Don't do this for enums, they can't be redeclared.
-    if (isa(D) || isa(D))
-      return AR_Available;
-
-    bool Warn = !D->getAttr()->isInherited();
-    // Objective-C method declarations in categories are not modelled as
-    // redeclarations, so manually look for a redeclaration in a category
-    // if necessary.
-    if (Warn && HasRedeclarationWithoutAvailabilityInCategory(D))
-      Warn = false;
-    // In general, D will point to the most recent redeclaration. However,
-    // for `@class A;` decls, this isn't true -- manually go through the
-    // redecl chain in that case.
-    if (Warn && isa(D))
-      for (Decl *Redecl = D->getMostRecentDecl(); Redecl && Warn;
-           Redecl = Redecl->getPreviousDecl())
-        if (!Redecl->hasAttr() ||
-            Redecl->getAttr()->isInherited())
-          Warn = false;
-
-    return Warn ? AR_NotYetIntroduced : AR_Available;
-  }
-
-  return Result;
-}
-
-static void
-DiagnoseAvailabilityOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc,
-                           const ObjCInterfaceDecl *UnknownObjCClass,
-                           bool ObjCPropertyAccess) {
-  std::string Message;
-  // See if this declaration is unavailable, deprecated, or partial.
-  if (AvailabilityResult Result =
-          S.ShouldDiagnoseAvailabilityOfDecl(D, &Message)) {
-
-    if (Result == AR_NotYetIntroduced) {
-      if (S.getCurFunctionOrMethodDecl()) {
-        S.getEnclosingFunction()->HasPotentialAvailabilityViolations = true;
-        return;
-      } else if (S.getCurBlock() || S.getCurLambda()) {
-        S.getCurFunction()->HasPotentialAvailabilityViolations = true;
-        return;
-      }
-    }
-
-    const ObjCPropertyDecl *ObjCPDecl = nullptr;
-    if (const ObjCMethodDecl *MD = dyn_cast(D)) {
-      if (const ObjCPropertyDecl *PD = MD->findPropertyDecl()) {
-        AvailabilityResult PDeclResult = PD->getAvailability(nullptr);
-        if (PDeclResult == Result)
-          ObjCPDecl = PD;
-      }
-    }
-
-    S.EmitAvailabilityWarning(Result, D, Message, Loc, UnknownObjCClass,
-                              ObjCPDecl, ObjCPropertyAccess);
-  }
-}
-
 /// \brief Emit a note explaining that this function is deleted.
 void Sema::NoteDeletedFunction(FunctionDecl *Decl) {
   assert(Decl->isDeleted());
@@ -310,7 +202,8 @@ void Sema::MaybeSuggestAddingStaticToDecl(const FunctionDecl *Cur) {
 ///
 bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
                              const ObjCInterfaceDecl *UnknownObjCClass,
-                             bool ObjCPropertyAccess) {
+                             bool ObjCPropertyAccess,
+                             bool AvoidPartialAvailabilityChecks) {
   if (getLangOpts().CPlusPlus && isa(D)) {
     // If there were any diagnostics suppressed by template argument deduction,
     // emit them now.
@@ -366,8 +259,18 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
 
     if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD))
       return true;
+  }
 
-    if (diagnoseArgIndependentDiagnoseIfAttrs(FD, Loc))
+  auto getReferencedObjCProp = [](const NamedDecl *D) ->
+                                      const ObjCPropertyDecl * {
+    if (const auto *MD = dyn_cast(D))
+      return MD->findPropertyDecl();
+    return nullptr;
+  };
+  if (const ObjCPropertyDecl *ObjCPDecl = getReferencedObjCProp(D)) {
+    if (diagnoseArgIndependentDiagnoseIfAttrs(ObjCPDecl, Loc))
+      return true;
+  } else if (diagnoseArgIndependentDiagnoseIfAttrs(D, Loc)) {
       return true;
   }
 
@@ -384,8 +287,8 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
     return true;
   }
 
-  DiagnoseAvailabilityOfDecl(*this, D, Loc, UnknownObjCClass,
-                             ObjCPropertyAccess);
+  DiagnoseAvailabilityOfDecl(D, Loc, UnknownObjCClass, ObjCPropertyAccess,
+                             AvoidPartialAvailabilityChecks);
 
   DiagnoseUnusedOfDecl(*this, D, Loc);
 
@@ -8064,28 +7967,38 @@ QualType Sema::InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS,
 /// rank; for C, Obj-C, and C++ we allow any real scalar conversion except
 /// for float->int.
 ///
+/// OpenCL V2.0 6.2.6.p2:
+/// An error shall occur if any scalar operand type has greater rank
+/// than the type of the vector element.
+///
 /// \param scalar - if non-null, actually perform the conversions
 /// \return true if the operation fails (but without diagnosing the failure)
 static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar,
                                      QualType scalarTy,
                                      QualType vectorEltTy,
-                                     QualType vectorTy) {
+                                     QualType vectorTy,
+                                     unsigned &DiagID) {
   // The conversion to apply to the scalar before splatting it,
   // if necessary.
   CastKind scalarCast = CK_Invalid;
   
   if (vectorEltTy->isIntegralType(S.Context)) {
-    if (!scalarTy->isIntegralType(S.Context))
+    if (S.getLangOpts().OpenCL && (scalarTy->isRealFloatingType() ||
+        (scalarTy->isIntegerType() &&
+         S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0))) {
+      DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type;
       return true;
-    if (S.getLangOpts().OpenCL &&
-        S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0)
+    }
+    if (!scalarTy->isIntegralType(S.Context))
       return true;
     scalarCast = CK_IntegralCast;
   } else if (vectorEltTy->isRealFloatingType()) {
     if (scalarTy->isRealFloatingType()) {
       if (S.getLangOpts().OpenCL &&
-          S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0)
+          S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0) {
+        DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type;
         return true;
+      }
       scalarCast = CK_FloatingCast;
     }
     else if (scalarTy->isIntegralType(S.Context))
@@ -8204,7 +8117,7 @@ static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar,
 
   // The conversion to apply to the scalar before splatting it,
   // if necessary.
-  CastKind ScalarCast = CK_Invalid;
+  CastKind ScalarCast = CK_NoOp;
 
   // Accept cases where the vector elements are integers and the scalar is
   // an integer.
@@ -8254,7 +8167,7 @@ static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar,
 
   // Adjust scalar if desired.
   if (Scalar) {
-    if (ScalarCast != CK_Invalid)
+    if (ScalarCast != CK_NoOp)
       *Scalar = S.ImpCastExprToType(Scalar->get(), VectorEltTy, ScalarCast);
     *Scalar = S.ImpCastExprToType(Scalar->get(), VectorTy, CK_VectorSplat);
   }
@@ -8331,10 +8244,12 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
 
   // If there's a vector type and a scalar, try to convert the scalar to
   // the vector element type and splat.
+  unsigned DiagID = diag::err_typecheck_vector_not_convertable;
   if (!RHSVecType) {
     if (isa(LHSVecType)) {
       if (!tryVectorConvertAndSplat(*this, &RHS, RHSType,
-                                    LHSVecType->getElementType(), LHSType))
+                                    LHSVecType->getElementType(), LHSType,
+                                    DiagID))
         return LHSType;
     } else {
       if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS))
@@ -8345,7 +8260,7 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
     if (isa(RHSVecType)) {
       if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS),
                                     LHSType, RHSVecType->getElementType(),
-                                    RHSType))
+                                    RHSType, DiagID))
         return RHSType;
     } else {
       if (LHS.get()->getValueKind() == VK_LValue ||
@@ -8373,7 +8288,7 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
     // type. Note that this is already done by non-compound assignments in
     // CheckAssignmentConstraints. If it's a scalar type, only bitcast for
     // <1 x T> -> T. The result is also a vector type.
-    } else if (OtherType->isExtVectorType() ||
+    } else if (OtherType->isExtVectorType() || OtherType->isVectorType() ||
                (OtherType->isScalarType() && VT->getNumElements() == 1)) {
       ExprResult *RHSExpr = &RHS;
       *RHSExpr = ImpCastExprToType(RHSExpr->get(), LHSType, CK_BitCast);
@@ -8421,7 +8336,7 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
   }
 
   // Otherwise, use the generic diagnostic.
-  Diag(Loc, diag::err_typecheck_vector_not_convertable)
+  Diag(Loc, DiagID)
     << LHSType << RHSType
     << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
   return QualType();
@@ -11473,6 +11388,7 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
     break;
   case BO_And:
     checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc);
+    LLVM_FALLTHROUGH;
   case BO_Xor:
   case BO_Or:
     ResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc);
@@ -11515,6 +11431,7 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
   case BO_AndAssign:
   case BO_OrAssign: // fallthrough
     DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc);
+    LLVM_FALLTHROUGH;
   case BO_XorAssign:
     CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc);
     CompLHSTy = CompResultTy;
@@ -11837,6 +11754,28 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
           RHSExpr->getType()->isOverloadableType())
         return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
     }
+
+    // If we're instantiating "a.x < b" or "A::x < b" and 'x' names a function
+    // template, diagnose the missing 'template' keyword instead of diagnosing
+    // an invalid use of a bound member function.
+    //
+    // Note that "A::x < b" might be valid if 'b' has an overloadable type due
+    // to C++1z [over.over]/1.4, but we already checked for that case above.
+    if (Opc == BO_LT && inTemplateInstantiation() &&
+        (pty->getKind() == BuiltinType::BoundMember ||
+         pty->getKind() == BuiltinType::Overload)) {
+      auto *OE = dyn_cast(LHSExpr);
+      if (OE && !OE->hasTemplateKeyword() && !OE->hasExplicitTemplateArgs() &&
+          std::any_of(OE->decls_begin(), OE->decls_end(), [](NamedDecl *ND) {
+            return isa(ND);
+          })) {
+        Diag(OE->getQualifier() ? OE->getQualifierLoc().getBeginLoc()
+                                : OE->getNameLoc(),
+             diag::err_template_kw_missing)
+          << OE->getName().getAsString() << "";
+        return ExprError();
+      }
+    }
         
     ExprResult LHS = CheckPlaceholderExpr(LHSExpr);
     if (LHS.isInvalid()) return ExprError();
@@ -11962,16 +11901,13 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
           << resultType << Input.get()->getSourceRange();
     else if (resultType->hasIntegerRepresentation())
       break;
-    else if (resultType->isExtVectorType()) {
-      if (Context.getLangOpts().OpenCL) {
-        // OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate
-        // on vector float types.
-        QualType T = resultType->getAs()->getElementType();
-        if (!T->isIntegerType())
-          return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
-                           << resultType << Input.get()->getSourceRange());
-      }
-      break;
+    else if (resultType->isExtVectorType() && Context.getLangOpts().OpenCL) {
+      // OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate
+      // on vector float types.
+      QualType T = resultType->getAs()->getElementType();
+      if (!T->isIntegerType())
+        return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
+                          << resultType << Input.get()->getSourceRange());
     } else {
       return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
                        << resultType << Input.get()->getSourceRange());
@@ -12047,11 +11983,17 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
     }
     break;
   case UO_Extension:
-  case UO_Coawait:
     resultType = Input.get()->getType();
     VK = Input.get()->getValueKind();
     OK = Input.get()->getObjectKind();
     break;
+  case UO_Coawait:
+    // It's unnessesary to represent the pass-through operator co_await in the
+    // AST; just return the input expression instead.
+    assert(!Input.get()->getType()->isDependentType() &&
+                   "the co_await expression must be non-dependant before "
+                   "building operator co_await");
+    return Input;
   }
   if (resultType.isNull() || Input.isInvalid())
     return ExprError();
@@ -13716,6 +13658,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
         // call to such a function.
         InstantiateFunctionDefinition(PointOfInstantiation, Func);
       else {
+        Func->setInstantiationIsPending(true);
         PendingInstantiations.push_back(std::make_pair(Func,
                                                        PointOfInstantiation));
         // Notify the consumer that a function was implicitly instantiated.
@@ -14678,24 +14621,24 @@ static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc,
                           ME->performsVirtualDispatch(SemaRef.getLangOpts());
   if (!IsVirtualCall)
     return;
-  const Expr *Base = ME->getBase();
-  const CXXRecordDecl *MostDerivedClassDecl = Base->getBestDynamicClassType();
-  if (!MostDerivedClassDecl)
-    return;
-  CXXMethodDecl *DM = MD->getCorrespondingMethodInClass(MostDerivedClassDecl);
-  if (!DM || DM->isPure())
-    return;
-  SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse);
+
+  // If it's possible to devirtualize the call, mark the called function
+  // referenced.
+  CXXMethodDecl *DM = MD->getDevirtualizedMethod(
+      ME->getBase(), SemaRef.getLangOpts().AppleKext);
+  if (DM)
+    SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse);
 } 
 
 /// \brief Perform reference-marking and odr-use handling for a DeclRefExpr.
-void Sema::MarkDeclRefReferenced(DeclRefExpr *E) {
+void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) {
   // TODO: update this with DR# once a defect report is filed.
   // C++11 defect. The address of a pure member should not be an ODR use, even
   // if it's a qualified reference.
   bool OdrUse = true;
-  if (CXXMethodDecl *Method = dyn_cast(E->getDecl()))
-    if (Method->isVirtual())
+  if (const CXXMethodDecl *Method = dyn_cast(E->getDecl()))
+    if (Method->isVirtual() &&
+        !Method->getDevirtualizedMethod(Base, getLangOpts().AppleKext))
       OdrUse = false;
   MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse);
 }
@@ -15775,6 +15718,13 @@ ExprResult Sema::ActOnObjCAvailabilityCheckExpr(
   if (Spec != AvailSpecs.end())
     Version = Spec->getVersion();
 
+  // The use of `@available` in the enclosing function should be analyzed to
+  // warn when it's used inappropriately (i.e. not if(@available)).
+  if (getCurFunctionOrMethodDecl())
+    getEnclosingFunction()->HasPotentialAvailabilityViolations = true;
+  else if (getCurBlock() || getCurLambda())
+    getCurFunction()->HasPotentialAvailabilityViolations = true;
+
   return new (Context)
       ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy);
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprCXX.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprCXX.cpp
index 3293e335767ca..a9cf3ec7990b2 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -189,12 +189,15 @@ ParsedType Sema::getDestructorName(SourceLocation TildeLoc,
     // have one) and, if that fails to find a match, in the scope (if
     // we're allowed to look there).
     Found.clear();
-    if (Step == 0 && LookupCtx)
+    if (Step == 0 && LookupCtx) {
+      if (RequireCompleteDeclContext(SS, LookupCtx))
+        return nullptr;
       LookupQualifiedName(Found, LookupCtx);
-    else if (Step == 1 && LookInScope && S)
+    } else if (Step == 1 && LookInScope && S) {
       LookupName(Found, S);
-    else
+    } else {
       continue;
+    }
 
     // FIXME: Should we be suppressing ambiguities here?
     if (Found.isAmbiguous())
@@ -1643,6 +1646,27 @@ static bool isLegalArrayNewInitializer(CXXNewExpr::InitializationStyle Style,
   return false;
 }
 
+// Emit a diagnostic if an aligned allocation/deallocation function that is not
+// implemented in the standard library is selected.
+static void diagnoseUnavailableAlignedAllocation(const FunctionDecl &FD,
+                                                 SourceLocation Loc, bool IsDelete,
+                                                 Sema &S) {
+  if (!S.getLangOpts().AlignedAllocationUnavailable)
+    return;
+
+  // Return if there is a definition.
+  if (FD.isDefined())
+    return;
+
+  bool IsAligned = false;
+  if (FD.isReplaceableGlobalAllocationFunction(&IsAligned) && IsAligned) {
+    S.Diag(Loc, diag::warn_aligned_allocation_unavailable)
+         << IsDelete << FD.getType().getAsString()
+         << S.getASTContext().getTargetInfo().getTriple().str();
+    S.Diag(Loc, diag::note_silence_unligned_allocation_unavailable);
+  }
+}
+
 ExprResult
 Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
                   SourceLocation PlacementLParen,
@@ -2020,11 +2044,13 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
     if (DiagnoseUseOfDecl(OperatorNew, StartLoc))
       return ExprError();
     MarkFunctionReferenced(StartLoc, OperatorNew);
+    diagnoseUnavailableAlignedAllocation(*OperatorNew, StartLoc, false, *this);
   }
   if (OperatorDelete) {
     if (DiagnoseUseOfDecl(OperatorDelete, StartLoc))
       return ExprError();
     MarkFunctionReferenced(StartLoc, OperatorDelete);
+    diagnoseUnavailableAlignedAllocation(*OperatorDelete, StartLoc, true, *this);
   }
 
   // C++0x [expr.new]p17:
@@ -2627,7 +2653,7 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
           // Make the function visible to name lookup, even if we found it in
           // an unimported module. It either is an implicitly-declared global
           // allocation function, or is suppressing that function.
-          Func->setHidden(false);
+          Func->setVisibleDespiteOwningModule();
           return;
         }
       }
@@ -2659,7 +2685,7 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
         FnType, /*TInfo=*/nullptr, SC_None, false, true);
     Alloc->setImplicit();
     // Global allocation functions should always be visible.
-    Alloc->setHidden(false);
+    Alloc->setVisibleDespiteOwningModule();
 
     // Implicit sized deallocation functions always have default visibility.
     Alloc->addAttr(
@@ -3240,6 +3266,9 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
                       PDiag(diag::err_access_dtor) << PointeeElem);
       }
     }
+
+    diagnoseUnavailableAlignedAllocation(*OperatorDelete, StartLoc, true,
+                                         *this);
   }
 
   CXXDeleteExpr *Result = new (Context) CXXDeleteExpr(
@@ -4082,24 +4111,17 @@ static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT,
           Loc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr);
     return true;
 
-  // C++0x [meta.unary.prop] Table 49 requires the following traits to be
-  // applied to a complete type.
+  // C++1z [meta.unary.prop]:
+  //   remove_all_extents_t shall be a complete type or cv void.
   case UTT_IsAggregate:
   case UTT_IsTrivial:
   case UTT_IsTriviallyCopyable:
   case UTT_IsStandardLayout:
   case UTT_IsPOD:
   case UTT_IsLiteral:
-
-  case UTT_IsDestructible:
-  case UTT_IsNothrowDestructible:
-    // Fall-through
-
-    // These trait expressions are designed to help implement predicates in
-    // [meta.unary.prop] despite not being named the same. They are specified
-    // by both GCC and the Embarcadero C++ compiler, and require the complete
-    // type due to the overarching C++0x type predicates being implemented
-    // requiring the complete type.
+  // Per the GCC type traits documentation, T shall be a complete type, cv void,
+  // or an array of unknown bound. But GCC actually imposes the same constraints
+  // as above.
   case UTT_HasNothrowAssign:
   case UTT_HasNothrowMoveAssign:
   case UTT_HasNothrowConstructor:
@@ -4111,17 +4133,19 @@ static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT,
   case UTT_HasTrivialCopy:
   case UTT_HasTrivialDestructor:
   case UTT_HasVirtualDestructor:
-    // Arrays of unknown bound are expressly allowed.
-    QualType ElTy = ArgTy;
-    if (ArgTy->isIncompleteArrayType())
-      ElTy = S.Context.getAsArrayType(ArgTy)->getElementType();
+    ArgTy = QualType(ArgTy->getBaseElementTypeUnsafe(), 0);
+    LLVM_FALLTHROUGH;
 
-    // The void type is expressly allowed.
-    if (ElTy->isVoidType())
+  // C++1z [meta.unary.prop]:
+  //   T shall be a complete type, cv void, or an array of unknown bound.
+  case UTT_IsDestructible:
+  case UTT_IsNothrowDestructible:
+  case UTT_IsTriviallyDestructible:
+    if (ArgTy->isIncompleteArrayType() || ArgTy->isVoidType())
       return true;
 
     return !S.RequireCompleteType(
-      Loc, ElTy, diag::err_incomplete_type_used_in_type_trait_expr);
+        Loc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr);
   }
 }
 
@@ -4358,6 +4382,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
              !RD->hasNonTrivialCopyAssignment();
     return false;
   case UTT_IsDestructible:
+  case UTT_IsTriviallyDestructible:
   case UTT_IsNothrowDestructible:
     // C++14 [meta.unary.prop]:
     //   For reference types, is_destructible::value is true.
@@ -4375,6 +4400,11 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     if (T->isIncompleteType() || T->isFunctionType())
       return false;
 
+    // A type that requires destruction (via a non-trivial destructor or ARC
+    // lifetime semantics) is not trivially-destructible.
+    if (UTT == UTT_IsTriviallyDestructible && T.isDestructedType())
+      return false;
+
     // C++14 [meta.unary.prop]:
     //   For object types and given U equal to remove_all_extents_t, if the
     //   expression std::declval().~U() is well-formed when treated as an
@@ -5107,7 +5137,9 @@ QualType Sema::CheckPointerToMemberOperands(ExprResult &LHS, ExprResult &RHS,
       return QualType();
 
     // Cast LHS to type of use.
-    QualType UseType = isIndirect ? Context.getPointerType(Class) : Class;
+    QualType UseType = Context.getQualifiedType(Class, LHSType.getQualifiers());
+    if (isIndirect)
+      UseType = Context.getPointerType(UseType);
     ExprValueKind VK = isIndirect ? VK_RValue : LHS.get()->getValueKind();
     LHS = ImpCastExprToType(LHS.get(), UseType, CK_DerivedToBase, VK,
                             &BasePath);
@@ -5284,16 +5316,16 @@ static bool FindConditionalOverload(Sema &Self, ExprResult &LHS, ExprResult &RHS
   switch (CandidateSet.BestViableFunction(Self, QuestionLoc, Best)) {
     case OR_Success: {
       // We found a match. Perform the conversions on the arguments and move on.
-      ExprResult LHSRes =
-        Self.PerformImplicitConversion(LHS.get(), Best->BuiltinTypes.ParamTypes[0],
-                                       Best->Conversions[0], Sema::AA_Converting);
+      ExprResult LHSRes = Self.PerformImplicitConversion(
+          LHS.get(), Best->BuiltinParamTypes[0], Best->Conversions[0],
+          Sema::AA_Converting);
       if (LHSRes.isInvalid())
         break;
       LHS = LHSRes;
 
-      ExprResult RHSRes =
-        Self.PerformImplicitConversion(RHS.get(), Best->BuiltinTypes.ParamTypes[1],
-                                       Best->Conversions[1], Sema::AA_Converting);
+      ExprResult RHSRes = Self.PerformImplicitConversion(
+          RHS.get(), Best->BuiltinParamTypes[1], Best->Conversions[1],
+          Sema::AA_Converting);
       if (RHSRes.isInvalid())
         break;
       RHS = RHSRes;
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprMember.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprMember.cpp
index b18de7e946865..c3d0e2db76b67 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprMember.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaExprMember.cpp
@@ -1842,10 +1842,6 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow,
                                                   FoundDecl, Field);
   if (Base.isInvalid())
     return ExprError();
-  MemberExpr *ME =
-      BuildMemberExpr(*this, Context, Base.get(), IsArrow, OpLoc, SS,
-                      /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl,
-                      MemberNameInfo, MemberType, VK, OK);
 
   // Build a reference to a private copy for non-static data members in
   // non-static member functions, privatized by OpenMP constructs.
@@ -1855,7 +1851,10 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow,
     if (auto *PrivateCopy = IsOpenMPCapturedDecl(Field))
       return getOpenMPCapturedExpr(PrivateCopy, VK, OK, OpLoc);
   }
-  return ME;
+
+  return BuildMemberExpr(*this, Context, Base.get(), IsArrow, OpLoc, SS,
+                         /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl,
+                         MemberNameInfo, MemberType, VK, OK);
 }
 
 /// Builds an implicit member access expression.  The current context
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaInit.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaInit.cpp
index 8d19b82c3f9d5..32024cb335dc3 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaInit.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaInit.cpp
@@ -8296,8 +8296,46 @@ Sema::PerformCopyInitialization(const InitializedEntity &Entity,
                                                            AllowExplicit);
   InitializationSequence Seq(*this, Entity, Kind, InitE, TopLevelOfInitList);
 
+  // Prevent infinite recursion when performing parameter copy-initialization.
+  const bool ShouldTrackCopy =
+      Entity.isParameterKind() && Seq.isConstructorInitialization();
+  if (ShouldTrackCopy) {
+    if (llvm::find(CurrentParameterCopyTypes, Entity.getType()) !=
+        CurrentParameterCopyTypes.end()) {
+      Seq.SetOverloadFailure(
+          InitializationSequence::FK_ConstructorOverloadFailed,
+          OR_No_Viable_Function);
+
+      // Try to give a meaningful diagnostic note for the problematic
+      // constructor.
+      const auto LastStep = Seq.step_end() - 1;
+      assert(LastStep->Kind ==
+             InitializationSequence::SK_ConstructorInitialization);
+      const FunctionDecl *Function = LastStep->Function.Function;
+      auto Candidate =
+          llvm::find_if(Seq.getFailedCandidateSet(),
+                        [Function](const OverloadCandidate &Candidate) -> bool {
+                          return Candidate.Viable &&
+                                 Candidate.Function == Function &&
+                                 Candidate.Conversions.size() > 0;
+                        });
+      if (Candidate != Seq.getFailedCandidateSet().end() &&
+          Function->getNumParams() > 0) {
+        Candidate->Viable = false;
+        Candidate->FailureKind = ovl_fail_bad_conversion;
+        Candidate->Conversions[0].setBad(BadConversionSequence::no_conversion,
+                                         InitE,
+                                         Function->getParamDecl(0)->getType());
+      }
+    }
+    CurrentParameterCopyTypes.push_back(Entity.getType());
+  }
+
   ExprResult Result = Seq.Perform(*this, Entity, Kind, InitE);
 
+  if (ShouldTrackCopy)
+    CurrentParameterCopyTypes.pop_back();
+
   return Result;
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaLambda.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaLambda.cpp
index 7a9a8ff911aa8..46f2ba3760068 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaLambda.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaLambda.cpp
@@ -337,6 +337,7 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC,
       return nullptr;
     }
     // Fall through to get the current context.
+    LLVM_FALLTHROUGH;
 
   case DataMember:
     //  -- the in-class initializers of class members
@@ -1491,6 +1492,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
   bool ExplicitResultType;
   CleanupInfo LambdaCleanup;
   bool ContainsUnexpandedParameterPack;
+  bool IsGenericLambda;
   {
     CallOperator = LSI->CallOperator;
     Class = LSI->Lambda;
@@ -1499,7 +1501,8 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
     ExplicitResultType = !LSI->HasImplicitReturnType;
     LambdaCleanup = LSI->Cleanup;
     ContainsUnexpandedParameterPack = LSI->ContainsUnexpandedParameterPack;
-    
+    IsGenericLambda = Class->isGenericLambda();
+
     CallOperator->setLexicalDeclContext(Class);
     Decl *TemplateOrNonTemplateCallOperatorDecl = 
         CallOperator->getDescribedFunctionTemplate()  
@@ -1519,8 +1522,13 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
       bool IsImplicit = I >= LSI->NumExplicitCaptures;
 
       // Warn about unused explicit captures.
-      if (!CurContext->isDependentContext() && !IsImplicit && !From.isODRUsed())
-        DiagnoseUnusedLambdaCapture(From);
+      if (!CurContext->isDependentContext() && !IsImplicit && !From.isODRUsed()) {
+        // Initialized captures that are non-ODR used may not be eliminated.
+        bool NonODRUsedInitCapture =
+            IsGenericLambda && From.isNonODRUsed() && From.getInitExpr();
+        if (!NonODRUsedInitCapture)
+          DiagnoseUnusedLambdaCapture(From);
+      }
 
       // Handle 'this' capture.
       if (From.isThisCapture()) {
@@ -1567,8 +1575,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
     //   same parameter and return types as the closure type's function call
     //   operator.
     // FIXME: Fix generic lambda to block conversions.
-    if (getLangOpts().Blocks && getLangOpts().ObjC1 && 
-                                              !Class->isGenericLambda())
+    if (getLangOpts().Blocks && getLangOpts().ObjC1 && !IsGenericLambda)
       addBlockPointerConversion(*this, IntroducerRange, Class, CallOperator);
     
     // Finalize the lambda class.
@@ -1588,9 +1595,10 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
                                           ContainsUnexpandedParameterPack);
   // If the lambda expression's call operator is not explicitly marked constexpr
   // and we are not in a dependent context, analyze the call operator to infer
-  // its constexpr-ness, supressing diagnostics while doing so.
+  // its constexpr-ness, suppressing diagnostics while doing so.
   if (getLangOpts().CPlusPlus1z && !CallOperator->isInvalidDecl() &&
       !CallOperator->isConstexpr() &&
+      !isa(CallOperator->getBody()) &&
       !Class->getDeclContext()->isDependentContext()) {
     TentativeAnalysisScope DiagnosticScopeGuard(*this);
     CallOperator->setConstexpr(
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaLookup.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaLookup.cpp
index 02702ad39be93..bc16bdded48db 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaLookup.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaLookup.cpp
@@ -862,6 +862,16 @@ static bool LookupDirect(Sema &S, LookupResult &R, const DeclContext *DC) {
   if (!Record->isCompleteDefinition())
     return Found;
 
+  // For conversion operators, 'operator auto' should only match
+  // 'operator auto'.  Since 'auto' is not a type, it shouldn't be considered
+  // as a candidate for template substitution.
+  auto *ContainedDeducedType =
+      R.getLookupName().getCXXNameType()->getContainedDeducedType();
+  if (R.getLookupName().getNameKind() ==
+          DeclarationName::CXXConversionFunctionName &&
+      ContainedDeducedType && ContainedDeducedType->isUndeducedType())
+    return Found;
+
   for (CXXRecordDecl::conversion_iterator U = Record->conversion_begin(),
          UEnd = Record->conversion_end(); U != UEnd; ++U) {
     FunctionTemplateDecl *ConvTemplate = dyn_cast(*U);
@@ -1331,7 +1341,7 @@ void Sema::makeMergedDefinitionVisible(NamedDecl *ND) {
     Context.mergeDefinitionIntoModule(ND, M);
   else
     // We're not building a module; just make the definition visible.
-    ND->setHidden(false);
+    ND->setVisibleDespiteOwningModule();
 
   // If ND is a template declaration, make the template parameters
   // visible too. They're not (necessarily) within a mergeable DeclContext.
@@ -1385,6 +1395,20 @@ bool Sema::hasVisibleMergedDefinition(NamedDecl *Def) {
   return false;
 }
 
+bool Sema::hasMergedDefinitionInCurrentModule(NamedDecl *Def) {
+  // FIXME: When not in local visibility mode, we can't tell the difference
+  // between a declaration being visible because we merged a local copy of
+  // the same declaration into it, and it being visible because its owning
+  // module is visible.
+  if (Def->getModuleOwnershipKind() == Decl::ModuleOwnershipKind::Visible &&
+      getLangOpts().ModulesLocalVisibility)
+    return true;
+  for (Module *Merged : Context.getModulesWithMergedDefinition(Def))
+    if (Merged->getTopLevelModuleName() == getLangOpts().CurrentModule)
+      return true;
+  return false;
+}
+
 template
 static bool
 hasVisibleDefaultArgument(Sema &S, const ParmDecl *D,
@@ -1420,11 +1444,46 @@ bool Sema::hasVisibleDefaultArgument(const NamedDecl *D,
                                      Modules);
 }
 
+template
+static bool hasVisibleDeclarationImpl(Sema &S, const NamedDecl *D,
+                                      llvm::SmallVectorImpl *Modules,
+                                      Filter F) {
+  for (auto *Redecl : D->redecls()) {
+    auto *R = cast(Redecl);
+    if (!F(R))
+      continue;
+
+    if (S.isVisible(R))
+      return true;
+
+    if (Modules) {
+      Modules->push_back(R->getOwningModule());
+      const auto &Merged = S.Context.getModulesWithMergedDefinition(R);
+      Modules->insert(Modules->end(), Merged.begin(), Merged.end());
+    }
+  }
+
+  return false;
+}
+
+bool Sema::hasVisibleExplicitSpecialization(
+    const NamedDecl *D, llvm::SmallVectorImpl *Modules) {
+  return hasVisibleDeclarationImpl(*this, D, Modules, [](const NamedDecl *D) {
+    if (auto *RD = dyn_cast(D))
+      return RD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization;
+    if (auto *FD = dyn_cast(D))
+      return FD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization;
+    if (auto *VD = dyn_cast(D))
+      return VD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization;
+    llvm_unreachable("unknown explicit specialization kind");
+  });
+}
+
 bool Sema::hasVisibleMemberSpecialization(
     const NamedDecl *D, llvm::SmallVectorImpl *Modules) {
   assert(isa(D->getDeclContext()) &&
          "not a member specialization");
-  for (auto *Redecl : D->redecls()) {
+  return hasVisibleDeclarationImpl(*this, D, Modules, [](const NamedDecl *D) {
     // If the specialization is declared at namespace scope, then it's a member
     // specialization declaration. If it's lexically inside the class
     // definition then it was instantiated.
@@ -1432,19 +1491,8 @@ bool Sema::hasVisibleMemberSpecialization(
     // FIXME: This is a hack. There should be a better way to determine this.
     // FIXME: What about MS-style explicit specializations declared within a
     //        class definition?
-    if (Redecl->getLexicalDeclContext()->isFileContext()) {
-      auto *NonConstR = const_cast(cast(Redecl));
-
-      if (isVisible(NonConstR))
-        return true;
-
-      if (Modules) {
-        Modules->push_back(getOwningModule(NonConstR));
-        const auto &Merged = Context.getModulesWithMergedDefinition(NonConstR);
-        Modules->insert(Modules->end(), Merged.begin(), Merged.end());
-      }
-    }
-  }
+    return D->getLexicalDeclContext()->isFileContext();
+  });
 
   return false;
 }
@@ -1459,29 +1507,42 @@ bool Sema::hasVisibleMemberSpecialization(
 /// your module can see, including those later on in your module).
 bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {
   assert(D->isHidden() && "should not call this: not in slow case");
-  Module *DeclModule = nullptr;
-  
-  if (SemaRef.getLangOpts().ModulesLocalVisibility) {
-    DeclModule = SemaRef.getOwningModule(D);
-    if (!DeclModule) {
-      assert(!D->isHidden() && "hidden decl not from a module");
-      return true;
-    }
 
+  Module *DeclModule = SemaRef.getOwningModule(D);
+  if (!DeclModule) {
+    // A module-private declaration with no owning module means this is in the
+    // global module in the C++ Modules TS. This is visible within the same
+    // translation unit only.
+    // FIXME: Don't assume that "same translation unit" means the same thing
+    // as "not from an AST file".
+    assert(D->isModulePrivate() && "hidden decl has no module");
+    if (!D->isFromASTFile() || SemaRef.hasMergedDefinitionInCurrentModule(D))
+      return true;
+  } else {
     // If the owning module is visible, and the decl is not module private,
     // then the decl is visible too. (Module private is ignored within the same
     // top-level module.)
-    if ((!D->isFromASTFile() || !D->isModulePrivate()) &&
-        (SemaRef.isModuleVisible(DeclModule) ||
-         SemaRef.hasVisibleMergedDefinition(D)))
+    if (D->isModulePrivate()
+          ? DeclModule->getTopLevelModuleName() ==
+                    SemaRef.getLangOpts().CurrentModule ||
+            SemaRef.hasMergedDefinitionInCurrentModule(D)
+          : SemaRef.isModuleVisible(DeclModule) ||
+            SemaRef.hasVisibleMergedDefinition(D))
       return true;
   }
 
-  // If this declaration is not at namespace scope nor module-private,
+  // Determine whether a decl context is a file context for the purpose of
+  // visibility. This looks through some (export and linkage spec) transparent
+  // contexts, but not others (enums).
+  auto IsEffectivelyFileContext = [](const DeclContext *DC) {
+    return DC->isFileContext() || isa(DC) ||
+           isa(DC);
+  };
+
+  // If this declaration is not at namespace scope
   // then it is visible if its lexical parent has a visible definition.
   DeclContext *DC = D->getLexicalDeclContext();
-  if (!D->isModulePrivate() && DC && !DC->isFileContext() &&
-      !isa(DC) && !isa(DC)) {
+  if (DC && !IsEffectivelyFileContext(DC)) {
     // For a parameter, check whether our current template declaration's
     // lexical context is visible, not whether there's some other visible
     // definition of it, because parameters aren't "within" the definition.
@@ -1489,32 +1550,45 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {
     // In C++ we need to check for a visible definition due to ODR merging,
     // and in C we must not because each declaration of a function gets its own
     // set of declarations for tags in prototype scope.
-    if ((D->isTemplateParameter() || isa(D)
-         || (isa(DC) && !SemaRef.getLangOpts().CPlusPlus))
-            ? isVisible(SemaRef, cast(DC))
-            : SemaRef.hasVisibleDefinition(cast(DC))) {
-      if (SemaRef.CodeSynthesisContexts.empty() &&
-          // FIXME: Do something better in this case.
-          !SemaRef.getLangOpts().ModulesLocalVisibility) {
-        // Cache the fact that this declaration is implicitly visible because
-        // its parent has a visible definition.
-        D->setHidden(false);
-      }
-      return true;
+    bool VisibleWithinParent;
+    if (D->isTemplateParameter() || isa(D) ||
+        (isa(DC) && !SemaRef.getLangOpts().CPlusPlus))
+      VisibleWithinParent = isVisible(SemaRef, cast(DC));
+    else if (D->isModulePrivate()) {
+      // A module-private declaration is only visible if an enclosing lexical
+      // parent was merged with another definition in the current module.
+      VisibleWithinParent = false;
+      do {
+        if (SemaRef.hasMergedDefinitionInCurrentModule(cast(DC))) {
+          VisibleWithinParent = true;
+          break;
+        }
+        DC = DC->getLexicalParent();
+      } while (!IsEffectivelyFileContext(DC));
+    } else {
+      VisibleWithinParent = SemaRef.hasVisibleDefinition(cast(DC));
     }
-    return false;
+
+    if (VisibleWithinParent && SemaRef.CodeSynthesisContexts.empty() &&
+        // FIXME: Do something better in this case.
+        !SemaRef.getLangOpts().ModulesLocalVisibility) {
+      // Cache the fact that this declaration is implicitly visible because
+      // its parent has a visible definition.
+      D->setVisibleDespiteOwningModule();
+    }
+    return VisibleWithinParent;
   }
 
+  // FIXME: All uses of DeclModule below this point should also check merged
+  // modules.
+  if (!DeclModule)
+    return false;
+
   // Find the extra places where we need to look.
   llvm::DenseSet &LookupModules = SemaRef.getLookupModules();
   if (LookupModules.empty())
     return false;
 
-  if (!DeclModule) {
-    DeclModule = SemaRef.getOwningModule(D);
-    assert(DeclModule && "hidden decl not from a module");
-  }
-
   // If our lookup set contains the decl's module, it's visible.
   if (LookupModules.count(DeclModule))
     return true;
@@ -1571,20 +1645,8 @@ static NamedDecl *findAcceptableDecl(Sema &SemaRef, NamedDecl *D) {
 bool Sema::hasVisibleDeclarationSlow(const NamedDecl *D,
                                      llvm::SmallVectorImpl *Modules) {
   assert(!isVisible(D) && "not in slow case");
-
-  for (auto *Redecl : D->redecls()) {
-    auto *NonConstR = const_cast(cast(Redecl));
-    if (isVisible(NonConstR))
-      return true;
-
-    if (Modules) {
-      Modules->push_back(getOwningModule(NonConstR));
-      const auto &Merged = Context.getModulesWithMergedDefinition(NonConstR);
-      Modules->insert(Modules->end(), Merged.begin(), Merged.end());
-    }
-  }
-
-  return false;
+  return hasVisibleDeclarationImpl(*this, D, Modules,
+                                   [](const NamedDecl *) { return true; });
 }
 
 NamedDecl *LookupResult::getAcceptableDeclSlow(NamedDecl *D) const {
@@ -2592,6 +2654,7 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
       for (const auto &Arg : Proto->param_types())
         Queue.push_back(Arg.getTypePtr());
       // fallthrough
+      LLVM_FALLTHROUGH;
     }
     case Type::FunctionNoProto: {
       const FunctionType *FnType = cast(T);
@@ -3744,20 +3807,19 @@ static void LookupPotentialTypoResult(Sema &SemaRef,
                                       bool FindHidden);
 
 /// \brief Check whether the declarations found for a typo correction are
-/// visible, and if none of them are, convert the correction to an 'import
-/// a module' correction.
+/// visible. Set the correction's RequiresImport flag to true if none of the
+/// declarations are visible, false otherwise.
 static void checkCorrectionVisibility(Sema &SemaRef, TypoCorrection &TC) {
-  if (TC.begin() == TC.end())
-    return;
-
   TypoCorrection::decl_iterator DI = TC.begin(), DE = TC.end();
 
   for (/**/; DI != DE; ++DI)
     if (!LookupResult::isVisible(SemaRef, *DI))
       break;
-  // Nothing to do if all decls are visible.
-  if (DI == DE)
+  // No filtering needed if all decls are visible.
+  if (DI == DE) {
+    TC.setRequiresImport(false);
     return;
+  }
 
   llvm::SmallVector NewDecls(TC.begin(), DI);
   bool AnyVisibleDecls = !NewDecls.empty();
@@ -4927,8 +4989,6 @@ static NamedDecl *getDefinitionToImport(NamedDecl *D) {
 
 void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
                                  MissingImportKind MIK, bool Recover) {
-  assert(!isVisible(Decl) && "missing import for non-hidden decl?");
-
   // Suggest importing a module providing the definition of this entity, if
   // possible.
   NamedDecl *Def = getDefinitionToImport(Decl);
@@ -4963,6 +5023,14 @@ void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
                                  MissingImportKind MIK, bool Recover) {
   assert(!Modules.empty());
 
+  // Weed out duplicates from module list.
+  llvm::SmallVector UniqueModules;
+  llvm::SmallDenseSet UniqueModuleSet;
+  for (auto *M : Modules)
+    if (UniqueModuleSet.insert(M).second)
+      UniqueModules.push_back(M);
+  Modules = UniqueModules;
+
   if (Modules.size() > 1) {
     std::string ModuleList;
     unsigned N = 0;
@@ -4977,8 +5045,8 @@ void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
 
     Diag(UseLoc, diag::err_module_unimported_use_multiple)
       << (int)MIK << Decl << ModuleList;
-  } else if (const FileEntry *E =
-                 PP.getModuleHeaderToIncludeForDiagnostics(UseLoc, DeclLoc)) {
+  } else if (const FileEntry *E = PP.getModuleHeaderToIncludeForDiagnostics(
+                 UseLoc, Modules[0], DeclLoc)) {
     // The right way to make the declaration visible is to include a header;
     // suggest doing so.
     //
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaObjCProperty.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaObjCProperty.cpp
index 6c57164548742..bfb0071a54f9f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaObjCProperty.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaObjCProperty.cpp
@@ -814,53 +814,185 @@ static void setImpliedPropertyAttributeForReadOnlyProperty(
     property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_weak);
 }
 
-/// DiagnosePropertyMismatchDeclInProtocols - diagnose properties declared
-/// in inherited protocols with mismatched types. Since any of them can
-/// be candidate for synthesis.
-static void
-DiagnosePropertyMismatchDeclInProtocols(Sema &S, SourceLocation AtLoc,
+static bool
+isIncompatiblePropertyAttribute(unsigned Attr1, unsigned Attr2,
+                                ObjCPropertyDecl::PropertyAttributeKind Kind) {
+  return (Attr1 & Kind) != (Attr2 & Kind);
+}
+
+static bool areIncompatiblePropertyAttributes(unsigned Attr1, unsigned Attr2,
+                                              unsigned Kinds) {
+  return ((Attr1 & Kinds) != 0) != ((Attr2 & Kinds) != 0);
+}
+
+/// SelectPropertyForSynthesisFromProtocols - Finds the most appropriate
+/// property declaration that should be synthesised in all of the inherited
+/// protocols. It also diagnoses properties declared in inherited protocols with
+/// mismatched types or attributes, since any of them can be candidate for
+/// synthesis.
+static ObjCPropertyDecl *
+SelectPropertyForSynthesisFromProtocols(Sema &S, SourceLocation AtLoc,
                                         ObjCInterfaceDecl *ClassDecl,
                                         ObjCPropertyDecl *Property) {
-  ObjCInterfaceDecl::ProtocolPropertyMap PropMap;
+  assert(isa(Property->getDeclContext()) &&
+         "Expected a property from a protocol");
+  ObjCInterfaceDecl::ProtocolPropertySet ProtocolSet;
+  ObjCInterfaceDecl::PropertyDeclOrder Properties;
   for (const auto *PI : ClassDecl->all_referenced_protocols()) {
     if (const ObjCProtocolDecl *PDecl = PI->getDefinition())
-      PDecl->collectInheritedProtocolProperties(Property, PropMap);
+      PDecl->collectInheritedProtocolProperties(Property, ProtocolSet,
+                                                Properties);
   }
-  if (ObjCInterfaceDecl *SDecl = ClassDecl->getSuperClass())
+  if (ObjCInterfaceDecl *SDecl = ClassDecl->getSuperClass()) {
     while (SDecl) {
       for (const auto *PI : SDecl->all_referenced_protocols()) {
         if (const ObjCProtocolDecl *PDecl = PI->getDefinition())
-          PDecl->collectInheritedProtocolProperties(Property, PropMap);
+          PDecl->collectInheritedProtocolProperties(Property, ProtocolSet,
+                                                    Properties);
       }
       SDecl = SDecl->getSuperClass();
     }
-  
-  if (PropMap.empty())
-    return;
-  
+  }
+
+  if (Properties.empty())
+    return Property;
+
+  ObjCPropertyDecl *OriginalProperty = Property;
+  size_t SelectedIndex = 0;
+  for (const auto &Prop : llvm::enumerate(Properties)) {
+    // Select the 'readwrite' property if such property exists.
+    if (Property->isReadOnly() && !Prop.value()->isReadOnly()) {
+      Property = Prop.value();
+      SelectedIndex = Prop.index();
+    }
+  }
+  if (Property != OriginalProperty) {
+    // Check that the old property is compatible with the new one.
+    Properties[SelectedIndex] = OriginalProperty;
+  }
+
   QualType RHSType = S.Context.getCanonicalType(Property->getType());
-  bool FirsTime = true;
-  for (ObjCInterfaceDecl::ProtocolPropertyMap::iterator
-       I = PropMap.begin(), E = PropMap.end(); I != E; I++) {
-    ObjCPropertyDecl *Prop = I->second;
+  unsigned OriginalAttributes = Property->getPropertyAttributesAsWritten();
+  enum MismatchKind {
+    IncompatibleType = 0,
+    HasNoExpectedAttribute,
+    HasUnexpectedAttribute,
+    DifferentGetter,
+    DifferentSetter
+  };
+  // Represents a property from another protocol that conflicts with the
+  // selected declaration.
+  struct MismatchingProperty {
+    const ObjCPropertyDecl *Prop;
+    MismatchKind Kind;
+    StringRef AttributeName;
+  };
+  SmallVector Mismatches;
+  for (ObjCPropertyDecl *Prop : Properties) {
+    // Verify the property attributes.
+    unsigned Attr = Prop->getPropertyAttributesAsWritten();
+    if (Attr != OriginalAttributes) {
+      auto Diag = [&](bool OriginalHasAttribute, StringRef AttributeName) {
+        MismatchKind Kind = OriginalHasAttribute ? HasNoExpectedAttribute
+                                                 : HasUnexpectedAttribute;
+        Mismatches.push_back({Prop, Kind, AttributeName});
+      };
+      if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr,
+                                          ObjCPropertyDecl::OBJC_PR_copy)) {
+        Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_copy, "copy");
+        continue;
+      }
+      if (areIncompatiblePropertyAttributes(
+              OriginalAttributes, Attr, ObjCPropertyDecl::OBJC_PR_retain |
+                                            ObjCPropertyDecl::OBJC_PR_strong)) {
+        Diag(OriginalAttributes & (ObjCPropertyDecl::OBJC_PR_retain |
+                                   ObjCPropertyDecl::OBJC_PR_strong),
+             "retain (or strong)");
+        continue;
+      }
+      if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr,
+                                          ObjCPropertyDecl::OBJC_PR_atomic)) {
+        Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_atomic, "atomic");
+        continue;
+      }
+    }
+    if (Property->getGetterName() != Prop->getGetterName()) {
+      Mismatches.push_back({Prop, DifferentGetter, ""});
+      continue;
+    }
+    if (!Property->isReadOnly() && !Prop->isReadOnly() &&
+        Property->getSetterName() != Prop->getSetterName()) {
+      Mismatches.push_back({Prop, DifferentSetter, ""});
+      continue;
+    }
     QualType LHSType = S.Context.getCanonicalType(Prop->getType());
     if (!S.Context.propertyTypesAreCompatible(LHSType, RHSType)) {
       bool IncompatibleObjC = false;
       QualType ConvertedType;
       if (!S.isObjCPointerConversion(RHSType, LHSType, ConvertedType, IncompatibleObjC)
           || IncompatibleObjC) {
-        if (FirsTime) {
-          S.Diag(Property->getLocation(), diag::warn_protocol_property_mismatch)
-            << Property->getType();
-          FirsTime = false;
-        }
-        S.Diag(Prop->getLocation(), diag::note_protocol_property_declare)
-          << Prop->getType();
+        Mismatches.push_back({Prop, IncompatibleType, ""});
+        continue;
       }
     }
   }
-  if (!FirsTime && AtLoc.isValid())
+
+  if (Mismatches.empty())
+    return Property;
+
+  // Diagnose incompability.
+  {
+    bool HasIncompatibleAttributes = false;
+    for (const auto &Note : Mismatches)
+      HasIncompatibleAttributes =
+          Note.Kind != IncompatibleType ? true : HasIncompatibleAttributes;
+    // Promote the warning to an error if there are incompatible attributes or
+    // incompatible types together with readwrite/readonly incompatibility.
+    auto Diag = S.Diag(Property->getLocation(),
+                       Property != OriginalProperty || HasIncompatibleAttributes
+                           ? diag::err_protocol_property_mismatch
+                           : diag::warn_protocol_property_mismatch);
+    Diag << Mismatches[0].Kind;
+    switch (Mismatches[0].Kind) {
+    case IncompatibleType:
+      Diag << Property->getType();
+      break;
+    case HasNoExpectedAttribute:
+    case HasUnexpectedAttribute:
+      Diag << Mismatches[0].AttributeName;
+      break;
+    case DifferentGetter:
+      Diag << Property->getGetterName();
+      break;
+    case DifferentSetter:
+      Diag << Property->getSetterName();
+      break;
+    }
+  }
+  for (const auto &Note : Mismatches) {
+    auto Diag =
+        S.Diag(Note.Prop->getLocation(), diag::note_protocol_property_declare)
+        << Note.Kind;
+    switch (Note.Kind) {
+    case IncompatibleType:
+      Diag << Note.Prop->getType();
+      break;
+    case HasNoExpectedAttribute:
+    case HasUnexpectedAttribute:
+      Diag << Note.AttributeName;
+      break;
+    case DifferentGetter:
+      Diag << Note.Prop->getGetterName();
+      break;
+    case DifferentSetter:
+      Diag << Note.Prop->getSetterName();
+      break;
+    }
+  }
+  if (AtLoc.isValid())
     S.Diag(AtLoc, diag::note_property_synthesize);
+
+  return Property;
 }
 
 /// Determine whether any storage attributes were written on the property.
@@ -996,8 +1128,9 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
       }
     }
     if (Synthesize && isa(property->getDeclContext()))
-      DiagnosePropertyMismatchDeclInProtocols(*this, AtLoc, IDecl, property);
-        
+      property = SelectPropertyForSynthesisFromProtocols(*this, AtLoc, IDecl,
+                                                         property);
+
   } else if ((CatImplClass = dyn_cast(ClassImpDecl))) {
     if (Synthesize) {
       Diag(AtLoc, diag::err_synthesize_category_decl);
@@ -1676,8 +1809,9 @@ static bool SuperClassImplementsProperty(ObjCInterfaceDecl *IDecl,
 
 /// \brief Default synthesizes all properties which must be synthesized
 /// in class's \@implementation.
-void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
-                                       ObjCInterfaceDecl *IDecl) {
+void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl *IMPDecl,
+                                       ObjCInterfaceDecl *IDecl,
+                                       SourceLocation AtEnd) {
   ObjCInterfaceDecl::PropertyMap PropMap;
   ObjCInterfaceDecl::PropertyDeclOrder PropertyOrder;
   IDecl->collectPropertiesToImplement(PropMap, PropertyOrder);
@@ -1725,6 +1859,10 @@ void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
              diag::warn_auto_synthesizing_protocol_property)
           << Prop << Proto;
         Diag(Prop->getLocation(), diag::note_property_declare);
+        std::string FixIt =
+            (Twine("@synthesize ") + Prop->getName() + ";\n\n").str();
+        Diag(AtEnd, diag::note_add_synthesize_directive)
+            << FixItHint::CreateInsertion(AtEnd, FixIt);
       }
       continue;
     }
@@ -1764,7 +1902,8 @@ void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
   }
 }
 
-void Sema::DefaultSynthesizeProperties(Scope *S, Decl *D) {
+void Sema::DefaultSynthesizeProperties(Scope *S, Decl *D,
+                                       SourceLocation AtEnd) {
   if (!LangOpts.ObjCDefaultSynthProperties || LangOpts.ObjCRuntime.isFragile())
     return;
   ObjCImplementationDecl *IC=dyn_cast_or_null(D);
@@ -1772,7 +1911,7 @@ void Sema::DefaultSynthesizeProperties(Scope *S, Decl *D) {
     return;
   if (ObjCInterfaceDecl* IDecl = IC->getClassInterface())
     if (!IDecl->isObjCRequiresPropertyDefs())
-      DefaultSynthesizeProperties(S, IC, IDecl);
+      DefaultSynthesizeProperties(S, IC, IDecl, AtEnd);
 }
 
 static void DiagnoseUnimplementedAccessor(
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaOpenMP.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaOpenMP.cpp
index 43fd055bbc565..01f574b6aeeb4 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaOpenMP.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaOpenMP.cpp
@@ -412,6 +412,30 @@ class DSAStackTy final {
     return false;
   }
 
+  /// Do the check specified in \a Check to all component lists at a given level
+  /// and return true if any issue is found.
+  bool checkMappableExprComponentListsForDeclAtLevel(
+      ValueDecl *VD, unsigned Level,
+      const llvm::function_ref<
+          bool(OMPClauseMappableExprCommon::MappableExprComponentListRef,
+               OpenMPClauseKind)> &Check) {
+    if (isStackEmpty())
+      return false;
+
+    auto StartI = Stack.back().first.begin();
+    auto EndI = Stack.back().first.end();
+    if (std::distance(StartI, EndI) <= (int)Level)
+      return false;
+    std::advance(StartI, Level);
+
+    auto MI = StartI->MappedExprComponents.find(VD);
+    if (MI != StartI->MappedExprComponents.end())
+      for (auto &L : MI->second.Components)
+        if (Check(L, MI->second.Kind))
+          return true;
+    return false;
+  }
+
   /// Create a new mappable expression component list associated with a given
   /// declaration and initialize it with the provided list of components.
   void addMappableExpressionComponents(
@@ -994,9 +1018,8 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) {
     bool IsVariableUsedInMapClause = false;
     bool IsVariableAssociatedWithSection = false;
 
-    DSAStack->checkMappableExprComponentListsForDecl(
-        D, /*CurrentRegionOnly=*/true,
-        [&](OMPClauseMappableExprCommon::MappableExprComponentListRef
+    DSAStack->checkMappableExprComponentListsForDeclAtLevel(
+        D, Level, [&](OMPClauseMappableExprCommon::MappableExprComponentListRef
                 MapExprComponents,
             OpenMPClauseKind WhereFoundClauseKind) {
           // Only the map clause information influences how a variable is
@@ -1784,6 +1807,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
         std::make_pair(".lb.", KmpUInt64Ty),
         std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty),
         std::make_pair(".liter.", KmpInt32Ty),
+        std::make_pair(".reductions.",
+                       Context.VoidPtrTy.withConst().withRestrict()),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
@@ -2475,9 +2500,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPTaskwaitDirective(StartLoc, EndLoc);
     break;
   case OMPD_taskgroup:
-    assert(ClausesWithImplicit.empty() &&
-           "No clauses are allowed for 'omp taskgroup' directive");
-    Res = ActOnOpenMPTaskgroupDirective(AStmt, StartLoc, EndLoc);
+    Res = ActOnOpenMPTaskgroupDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                        EndLoc);
     break;
   case OMPD_flush:
     assert(AStmt == nullptr &&
@@ -5044,7 +5068,8 @@ StmtResult Sema::ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc,
   return OMPTaskwaitDirective::Create(Context, StartLoc, EndLoc);
 }
 
-StmtResult Sema::ActOnOpenMPTaskgroupDirective(Stmt *AStmt,
+StmtResult Sema::ActOnOpenMPTaskgroupDirective(ArrayRef Clauses,
+                                               Stmt *AStmt,
                                                SourceLocation StartLoc,
                                                SourceLocation EndLoc) {
   if (!AStmt)
@@ -5054,7 +5079,8 @@ StmtResult Sema::ActOnOpenMPTaskgroupDirective(Stmt *AStmt,
 
   getCurFunction()->setHasBranchProtectedScope();
 
-  return OMPTaskgroupDirective::Create(Context, StartLoc, EndLoc, AStmt);
+  return OMPTaskgroupDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                       AStmt);
 }
 
 StmtResult Sema::ActOnOpenMPFlushDirective(ArrayRef Clauses,
@@ -5929,16 +5955,17 @@ StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
                                                B, DSAStack->isCancelRegion());
 }
 
-/// \brief Check for existence of a map clause in the list of clauses.
-static bool HasMapClause(ArrayRef Clauses) {
-  for (ArrayRef::iterator I = Clauses.begin(), E = Clauses.end();
-       I != E; ++I) {
-    if (*I != nullptr && (*I)->getClauseKind() == OMPC_map) {
-      return true;
-    }
-  }
+/// Check for existence of a map clause in the list of clauses.
+static bool hasClauses(ArrayRef Clauses,
+                       const OpenMPClauseKind K) {
+  return llvm::any_of(
+      Clauses, [K](const OMPClause *C) { return C->getClauseKind() == K; });
+}
 
-  return false;
+template 
+static bool hasClauses(ArrayRef Clauses, const OpenMPClauseKind K,
+                       const Params... ClauseTypes) {
+  return hasClauses(Clauses, K) || hasClauses(Clauses, ClauseTypes...);
 }
 
 StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef Clauses,
@@ -5952,8 +5979,9 @@ StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef Clauses,
 
   // OpenMP [2.10.1, Restrictions, p. 97]
   // At least one map clause must appear on the directive.
-  if (!HasMapClause(Clauses)) {
-    Diag(StartLoc, diag::err_omp_no_map_for_directive)
+  if (!hasClauses(Clauses, OMPC_map, OMPC_use_device_ptr)) {
+    Diag(StartLoc, diag::err_omp_no_clause_for_directive)
+        << "'map' or 'use_device_ptr'"
         << getOpenMPDirectiveName(OMPD_target_data);
     return StmtError();
   }
@@ -5970,9 +5998,9 @@ Sema::ActOnOpenMPTargetEnterDataDirective(ArrayRef Clauses,
                                           SourceLocation EndLoc) {
   // OpenMP [2.10.2, Restrictions, p. 99]
   // At least one map clause must appear on the directive.
-  if (!HasMapClause(Clauses)) {
-    Diag(StartLoc, diag::err_omp_no_map_for_directive)
-        << getOpenMPDirectiveName(OMPD_target_enter_data);
+  if (!hasClauses(Clauses, OMPC_map)) {
+    Diag(StartLoc, diag::err_omp_no_clause_for_directive)
+        << "'map'" << getOpenMPDirectiveName(OMPD_target_enter_data);
     return StmtError();
   }
 
@@ -5986,9 +6014,9 @@ Sema::ActOnOpenMPTargetExitDataDirective(ArrayRef Clauses,
                                          SourceLocation EndLoc) {
   // OpenMP [2.10.3, Restrictions, p. 102]
   // At least one map clause must appear on the directive.
-  if (!HasMapClause(Clauses)) {
-    Diag(StartLoc, diag::err_omp_no_map_for_directive)
-        << getOpenMPDirectiveName(OMPD_target_exit_data);
+  if (!hasClauses(Clauses, OMPC_map)) {
+    Diag(StartLoc, diag::err_omp_no_clause_for_directive)
+        << "'map'" << getOpenMPDirectiveName(OMPD_target_exit_data);
     return StmtError();
   }
 
@@ -5998,12 +6026,7 @@ Sema::ActOnOpenMPTargetExitDataDirective(ArrayRef Clauses,
 StmtResult Sema::ActOnOpenMPTargetUpdateDirective(ArrayRef Clauses,
                                                   SourceLocation StartLoc,
                                                   SourceLocation EndLoc) {
-  bool seenMotionClause = false;
-  for (auto *C : Clauses) {
-    if (C->getClauseKind() == OMPC_to || C->getClauseKind() == OMPC_from)
-      seenMotionClause = true;
-  }
-  if (!seenMotionClause) {
+  if (!hasClauses(Clauses, OMPC_to, OMPC_from)) {
     Diag(StartLoc, diag::err_omp_at_least_one_motion_clause_required);
     return StmtError();
   }
@@ -6086,6 +6109,33 @@ static bool checkGrainsizeNumTasksClauses(Sema &S,
   return ErrorFound;
 }
 
+static bool checkReductionClauseWithNogroup(Sema &S,
+                                            ArrayRef Clauses) {
+  OMPClause *ReductionClause = nullptr;
+  OMPClause *NogroupClause = nullptr;
+  for (auto *C : Clauses) {
+    if (C->getClauseKind() == OMPC_reduction) {
+      ReductionClause = C;
+      if (NogroupClause)
+        break;
+      continue;
+    }
+    if (C->getClauseKind() == OMPC_nogroup) {
+      NogroupClause = C;
+      if (ReductionClause)
+        break;
+      continue;
+    }
+  }
+  if (ReductionClause && NogroupClause) {
+    S.Diag(ReductionClause->getLocStart(), diag::err_omp_reduction_with_nogroup)
+        << SourceRange(NogroupClause->getLocStart(),
+                       NogroupClause->getLocEnd());
+    return true;
+  }
+  return false;
+}
+
 StmtResult Sema::ActOnOpenMPTaskLoopDirective(
     ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
@@ -6112,6 +6162,11 @@ StmtResult Sema::ActOnOpenMPTaskLoopDirective(
   // not appear on the same taskloop directive.
   if (checkGrainsizeNumTasksClauses(*this, Clauses))
     return StmtError();
+  // OpenMP, [2.9.2 taskloop Construct, Restrictions]
+  // If a reduction clause is present on the taskloop directive, the nogroup
+  // clause must not be specified.
+  if (checkReductionClauseWithNogroup(*this, Clauses))
+    return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
   return OMPTaskLoopDirective::Create(Context, StartLoc, EndLoc,
@@ -6155,6 +6210,11 @@ StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective(
   // not appear on the same taskloop directive.
   if (checkGrainsizeNumTasksClauses(*this, Clauses))
     return StmtError();
+  // OpenMP, [2.9.2 taskloop Construct, Restrictions]
+  // If a reduction clause is present on the taskloop directive, the nogroup
+  // clause must not be specified.
+  if (checkReductionClauseWithNogroup(*this, Clauses))
+    return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
   return OMPTaskLoopSimdDirective::Create(Context, StartLoc, EndLoc,
@@ -6792,6 +6852,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_linear:
   case OMPC_aligned:
   case OMPC_copyin:
@@ -7095,6 +7156,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
   case OMPC_firstprivate:
   case OMPC_lastprivate:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_linear:
   case OMPC_default:
   case OMPC_proc_bind:
@@ -7410,6 +7472,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause(
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_linear:
   case OMPC_aligned:
   case OMPC_copyin:
@@ -7567,6 +7630,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_linear:
   case OMPC_aligned:
   case OMPC_copyin:
@@ -7764,6 +7828,7 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
+  case OMPC_task_reduction:
   case OMPC_linear:
   case OMPC_aligned:
   case OMPC_copyin:
@@ -7876,6 +7941,11 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
     Res = ActOnOpenMPReductionClause(VarList, StartLoc, LParenLoc, ColonLoc,
                                      EndLoc, ReductionIdScopeSpec, ReductionId);
     break;
+  case OMPC_task_reduction:
+    Res = ActOnOpenMPTaskReductionClause(VarList, StartLoc, LParenLoc, ColonLoc,
+                                         EndLoc, ReductionIdScopeSpec,
+                                         ReductionId);
+    break;
   case OMPC_linear:
     Res = ActOnOpenMPLinearClause(VarList, TailExpr, StartLoc, LParenLoc,
                                   LinKind, DepLinMapLoc, ColonLoc, EndLoc);
@@ -8844,15 +8914,66 @@ buildDeclareReductionRef(Sema &SemaRef, SourceLocation Loc, SourceRange Range,
   return ExprEmpty();
 }
 
-OMPClause *Sema::ActOnOpenMPReductionClause(
+namespace {
+/// Data for the reduction-based clauses.
+struct ReductionData {
+  /// List of original reduction items.
+  SmallVector Vars;
+  /// List of private copies of the reduction items.
+  SmallVector Privates;
+  /// LHS expressions for the reduction_op expressions.
+  SmallVector LHSs;
+  /// RHS expressions for the reduction_op expressions.
+  SmallVector RHSs;
+  /// Reduction operation expression.
+  SmallVector ReductionOps;
+  /// List of captures for clause.
+  SmallVector ExprCaptures;
+  /// List of postupdate expressions.
+  SmallVector ExprPostUpdates;
+  ReductionData() = delete;
+  /// Reserves required memory for the reduction data.
+  ReductionData(unsigned Size) {
+    Vars.reserve(Size);
+    Privates.reserve(Size);
+    LHSs.reserve(Size);
+    RHSs.reserve(Size);
+    ReductionOps.reserve(Size);
+    ExprCaptures.reserve(Size);
+    ExprPostUpdates.reserve(Size);
+  }
+  /// Stores reduction item and reduction operation only (required for dependent
+  /// reduction item).
+  void push(Expr *Item, Expr *ReductionOp) {
+    Vars.emplace_back(Item);
+    Privates.emplace_back(nullptr);
+    LHSs.emplace_back(nullptr);
+    RHSs.emplace_back(nullptr);
+    ReductionOps.emplace_back(ReductionOp);
+  }
+  /// Stores reduction data.
+  void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS,
+            Expr *ReductionOp) {
+    Vars.emplace_back(Item);
+    Privates.emplace_back(Private);
+    LHSs.emplace_back(LHS);
+    RHSs.emplace_back(RHS);
+    ReductionOps.emplace_back(ReductionOp);
+  }
+};
+} // namespace
+
+static bool ActOnOMPReductionKindClause(
+    Sema &S, DSAStackTy *Stack, OpenMPClauseKind ClauseKind,
     ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc,
     SourceLocation ColonLoc, SourceLocation EndLoc,
     CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId,
-    ArrayRef UnresolvedReductions) {
+    ArrayRef UnresolvedReductions, ReductionData &RD) {
   auto DN = ReductionId.getName();
   auto OOK = DN.getCXXOverloadedOperator();
   BinaryOperatorKind BOK = BO_Comma;
 
+  ASTContext &Context = S.Context;
   // OpenMP [2.14.3.6, reduction clause]
   // C
   // reduction-identifier is either an identifier or one of the following
@@ -8936,13 +9057,6 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     ReductionIdRange.setBegin(ReductionIdScopeSpec.getBeginLoc());
   ReductionIdRange.setEnd(ReductionId.getEndLoc());
 
-  SmallVector Vars;
-  SmallVector Privates;
-  SmallVector LHSs;
-  SmallVector RHSs;
-  SmallVector ReductionOps;
-  SmallVector ExprCaptures;
-  SmallVector ExprPostUpdates;
   auto IR = UnresolvedReductions.begin(), ER = UnresolvedReductions.end();
   bool FirstIter = true;
   for (auto RefExpr : VarList) {
@@ -8960,27 +9074,23 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     SourceLocation ELoc;
     SourceRange ERange;
     Expr *SimpleRefExpr = RefExpr;
-    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange,
+    auto Res = getPrivateItem(S, SimpleRefExpr, ELoc, ERange,
                               /*AllowArraySection=*/true);
     if (Res.second) {
-      // It will be analyzed later.
-      Vars.push_back(RefExpr);
-      Privates.push_back(nullptr);
-      LHSs.push_back(nullptr);
-      RHSs.push_back(nullptr);
       // Try to find 'declare reduction' corresponding construct before using
       // builtin/overloaded operators.
       QualType Type = Context.DependentTy;
       CXXCastPath BasePath;
       ExprResult DeclareReductionRef = buildDeclareReductionRef(
-          *this, ELoc, ERange, DSAStack->getCurScope(), ReductionIdScopeSpec,
+          S, ELoc, ERange, Stack->getCurScope(), ReductionIdScopeSpec,
           ReductionId, Type, BasePath, IR == ER ? nullptr : *IR);
-      if (CurContext->isDependentContext() &&
+      Expr *ReductionOp = nullptr;
+      if (S.CurContext->isDependentContext() &&
           (DeclareReductionRef.isUnset() ||
            isa(DeclareReductionRef.get())))
-        ReductionOps.push_back(DeclareReductionRef.get());
-      else
-        ReductionOps.push_back(nullptr);
+        ReductionOp = DeclareReductionRef.get();
+      // It will be analyzed later.
+      RD.push(RefExpr, ReductionOp);
     }
     ValueDecl *D = Res.first;
     if (!D)
@@ -9005,21 +9115,19 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.3]
     //  A variable that appears in a private clause must not have an incomplete
     //  type or a reference type.
-    if (RequireCompleteType(ELoc, Type,
-                            diag::err_omp_reduction_incomplete_type))
+    if (S.RequireCompleteType(ELoc, Type,
+                              diag::err_omp_reduction_incomplete_type))
       continue;
     // OpenMP [2.14.3.6, reduction clause, Restrictions]
     // A list item that appears in a reduction clause must not be
     // const-qualified.
     if (Type.getNonReferenceType().isConstant(Context)) {
-      Diag(ELoc, diag::err_omp_const_reduction_list_item)
-          << getOpenMPClauseName(OMPC_reduction) << Type << ERange;
+      S.Diag(ELoc, diag::err_omp_const_reduction_list_item) << ERange;
       if (!ASE && !OASE) {
-        bool IsDecl = !VD ||
-                      VD->isThisDeclarationADefinition(Context) ==
-                          VarDecl::DeclarationOnly;
-        Diag(D->getLocation(),
-             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+        bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) ==
+                                 VarDecl::DeclarationOnly;
+        S.Diag(D->getLocation(),
+               IsDecl ? diag::note_previous_decl : diag::note_defined_here)
             << D;
       }
       continue;
@@ -9030,10 +9138,11 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     if (!ASE && !OASE && VD) {
       VarDecl *VDDef = VD->getDefinition();
       if (VD->getType()->isReferenceType() && VDDef && VDDef->hasInit()) {
-        DSARefChecker Check(DSAStack);
+        DSARefChecker Check(Stack);
         if (Check.Visit(VDDef->getInit())) {
-          Diag(ELoc, diag::err_omp_reduction_ref_type_arg) << ERange;
-          Diag(VDDef->getLocation(), diag::note_defined_here) << VDDef;
+          S.Diag(ELoc, diag::err_omp_reduction_ref_type_arg)
+              << getOpenMPClauseName(ClauseKind) << ERange;
+          S.Diag(VDDef->getLocation(), diag::note_defined_here) << VDDef;
           continue;
         }
       }
@@ -9051,17 +9160,17 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     //  but a list item can appear only once in the reduction clauses for that
     //  directive.
     DSAStackTy::DSAVarData DVar;
-    DVar = DSAStack->getTopDSA(D, false);
+    DVar = Stack->getTopDSA(D, false);
     if (DVar.CKind == OMPC_reduction) {
-      Diag(ELoc, diag::err_omp_once_referenced)
-          << getOpenMPClauseName(OMPC_reduction);
+      S.Diag(ELoc, diag::err_omp_once_referenced)
+          << getOpenMPClauseName(ClauseKind);
       if (DVar.RefExpr)
-        Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_referenced);
+        S.Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_referenced);
     } else if (DVar.CKind != OMPC_unknown) {
-      Diag(ELoc, diag::err_omp_wrong_dsa)
+      S.Diag(ELoc, diag::err_omp_wrong_dsa)
           << getOpenMPClauseName(DVar.CKind)
           << getOpenMPClauseName(OMPC_reduction);
-      ReportOriginalDSA(*this, DSAStack, D, DVar);
+      ReportOriginalDSA(S, Stack, D, DVar);
       continue;
     }
 
@@ -9069,16 +9178,16 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     //  A list item that appears in a reduction clause of a worksharing
     //  construct must be shared in the parallel regions to which any of the
     //  worksharing regions arising from the worksharing construct bind.
-    OpenMPDirectiveKind CurrDir = DSAStack->getCurrentDirective();
+    OpenMPDirectiveKind CurrDir = Stack->getCurrentDirective();
     if (isOpenMPWorksharingDirective(CurrDir) &&
         !isOpenMPParallelDirective(CurrDir) &&
         !isOpenMPTeamsDirective(CurrDir)) {
-      DVar = DSAStack->getImplicitDSA(D, true);
+      DVar = Stack->getImplicitDSA(D, true);
       if (DVar.CKind != OMPC_shared) {
-        Diag(ELoc, diag::err_omp_required_access)
+        S.Diag(ELoc, diag::err_omp_required_access)
             << getOpenMPClauseName(OMPC_reduction)
             << getOpenMPClauseName(OMPC_shared);
-        ReportOriginalDSA(*this, DSAStack, D, DVar);
+        ReportOriginalDSA(S, Stack, D, DVar);
         continue;
       }
     }
@@ -9087,24 +9196,20 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     // builtin/overloaded operators.
     CXXCastPath BasePath;
     ExprResult DeclareReductionRef = buildDeclareReductionRef(
-        *this, ELoc, ERange, DSAStack->getCurScope(), ReductionIdScopeSpec,
+        S, ELoc, ERange, Stack->getCurScope(), ReductionIdScopeSpec,
         ReductionId, Type, BasePath, IR == ER ? nullptr : *IR);
     if (DeclareReductionRef.isInvalid())
       continue;
-    if (CurContext->isDependentContext() &&
+    if (S.CurContext->isDependentContext() &&
         (DeclareReductionRef.isUnset() ||
          isa(DeclareReductionRef.get()))) {
-      Vars.push_back(RefExpr);
-      Privates.push_back(nullptr);
-      LHSs.push_back(nullptr);
-      RHSs.push_back(nullptr);
-      ReductionOps.push_back(DeclareReductionRef.get());
+      RD.push(RefExpr, DeclareReductionRef.get());
       continue;
     }
     if (BOK == BO_Comma && DeclareReductionRef.isUnset()) {
       // Not allowed reduction identifier is found.
-      Diag(ReductionId.getLocStart(),
-           diag::err_omp_unknown_reduction_identifier)
+      S.Diag(ReductionId.getLocStart(),
+             diag::err_omp_unknown_reduction_identifier)
           << Type << ReductionIdRange;
       continue;
     }
@@ -9120,28 +9225,27 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     if (DeclareReductionRef.isUnset()) {
       if ((BOK == BO_GT || BOK == BO_LT) &&
           !(Type->isScalarType() ||
-            (getLangOpts().CPlusPlus && Type->isArithmeticType()))) {
-        Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg)
-            << getLangOpts().CPlusPlus;
+            (S.getLangOpts().CPlusPlus && Type->isArithmeticType()))) {
+        S.Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg)
+            << getOpenMPClauseName(ClauseKind) << S.getLangOpts().CPlusPlus;
         if (!ASE && !OASE) {
-          bool IsDecl = !VD ||
-                        VD->isThisDeclarationADefinition(Context) ==
-                            VarDecl::DeclarationOnly;
-          Diag(D->getLocation(),
-               IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) ==
+                                   VarDecl::DeclarationOnly;
+          S.Diag(D->getLocation(),
+                 IsDecl ? diag::note_previous_decl : diag::note_defined_here)
               << D;
         }
         continue;
       }
       if ((BOK == BO_OrAssign || BOK == BO_AndAssign || BOK == BO_XorAssign) &&
-          !getLangOpts().CPlusPlus && Type->isFloatingType()) {
-        Diag(ELoc, diag::err_omp_clause_floating_type_arg);
+          !S.getLangOpts().CPlusPlus && Type->isFloatingType()) {
+        S.Diag(ELoc, diag::err_omp_clause_floating_type_arg)
+            << getOpenMPClauseName(ClauseKind);
         if (!ASE && !OASE) {
-          bool IsDecl = !VD ||
-                        VD->isThisDeclarationADefinition(Context) ==
-                            VarDecl::DeclarationOnly;
-          Diag(D->getLocation(),
-               IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) ==
+                                   VarDecl::DeclarationOnly;
+          S.Diag(D->getLocation(),
+                 IsDecl ? diag::note_previous_decl : diag::note_defined_here)
               << D;
         }
         continue;
@@ -9149,9 +9253,9 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     }
 
     Type = Type.getNonLValueExprType(Context).getUnqualifiedType();
-    auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs",
+    auto *LHSVD = buildVarDecl(S, ELoc, Type, ".reduction.lhs",
                                D->hasAttrs() ? &D->getAttrs() : nullptr);
-    auto *RHSVD = buildVarDecl(*this, ELoc, Type, D->getName(),
+    auto *RHSVD = buildVarDecl(S, ELoc, Type, D->getName(),
                                D->hasAttrs() ? &D->getAttrs() : nullptr);
     auto PrivateTy = Type;
     if (OASE ||
@@ -9163,19 +9267,20 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
       // For array subscripts or single variables Private Ty is the same as Type
       // (type of the variable or single array element).
       PrivateTy = Context.getVariableArrayType(
-          Type, new (Context) OpaqueValueExpr(SourceLocation(),
-                                              Context.getSizeType(), VK_RValue),
+          Type,
+          new (Context) OpaqueValueExpr(SourceLocation(), Context.getSizeType(),
+                                        VK_RValue),
           ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange());
     } else if (!ASE && !OASE &&
                Context.getAsArrayType(D->getType().getNonReferenceType()))
       PrivateTy = D->getType().getNonReferenceType();
     // Private copy.
-    auto *PrivateVD = buildVarDecl(*this, ELoc, PrivateTy, D->getName(),
+    auto *PrivateVD = buildVarDecl(S, ELoc, PrivateTy, D->getName(),
                                    D->hasAttrs() ? &D->getAttrs() : nullptr);
     // Add initializer for private variable.
     Expr *Init = nullptr;
-    auto *LHSDRE = buildDeclRefExpr(*this, LHSVD, Type, ELoc);
-    auto *RHSDRE = buildDeclRefExpr(*this, RHSVD, Type, ELoc);
+    auto *LHSDRE = buildDeclRefExpr(S, LHSVD, Type, ELoc);
+    auto *RHSDRE = buildDeclRefExpr(S, RHSVD, Type, ELoc);
     if (DeclareReductionRef.isUsable()) {
       auto *DRDRef = DeclareReductionRef.getAs();
       auto *DRD = cast(DRDRef->getDecl());
@@ -9192,13 +9297,13 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
       case BO_LOr:
         // '+', '-', '^', '|', '||' reduction ops - initializer is '0'.
         if (Type->isScalarType() || Type->isAnyComplexType())
-          Init = ActOnIntegerConstant(ELoc, /*Val=*/0).get();
+          Init = S.ActOnIntegerConstant(ELoc, /*Val=*/0).get();
         break;
       case BO_Mul:
       case BO_LAnd:
         if (Type->isScalarType() || Type->isAnyComplexType()) {
           // '*' and '&&' reduction ops - initializer is '1'.
-          Init = ActOnIntegerConstant(ELoc, /*Val=*/1).get();
+          Init = S.ActOnIntegerConstant(ELoc, /*Val=*/1).get();
         }
         break;
       case BO_And: {
@@ -9221,7 +9326,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
         if (Init && OrigType->isAnyComplexType()) {
           // Init = 0xFFFF + 0xFFFFi;
           auto *Im = new (Context) ImaginaryLiteral(Init, OrigType);
-          Init = CreateBuiltinBinOp(ELoc, BO_Add, Init, Im).get();
+          Init = S.CreateBuiltinBinOp(ELoc, BO_Add, Init, Im).get();
         }
         Type = OrigType;
         break;
@@ -9238,15 +9343,14 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
           QualType IntTy =
               Context.getIntTypeForBitwidth(Size, /*Signed=*/IsSigned);
           llvm::APInt InitValue =
-              (BOK != BO_LT)
-                  ? IsSigned ? llvm::APInt::getSignedMinValue(Size)
-                             : llvm::APInt::getMinValue(Size)
-                  : IsSigned ? llvm::APInt::getSignedMaxValue(Size)
-                             : llvm::APInt::getMaxValue(Size);
+              (BOK != BO_LT) ? IsSigned ? llvm::APInt::getSignedMinValue(Size)
+                                        : llvm::APInt::getMinValue(Size)
+                             : IsSigned ? llvm::APInt::getSignedMaxValue(Size)
+                                        : llvm::APInt::getMaxValue(Size);
           Init = IntegerLiteral::Create(Context, InitValue, IntTy, ELoc);
           if (Type->isPointerType()) {
             // Cast to pointer type.
-            auto CastExpr = BuildCStyleCastExpr(
+            auto CastExpr = S.BuildCStyleCastExpr(
                 SourceLocation(), Context.getTrivialTypeSourceInfo(Type, ELoc),
                 SourceLocation(), Init);
             if (CastExpr.isInvalid())
@@ -9287,20 +9391,19 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
         llvm_unreachable("Unexpected reduction operation");
       }
     }
-    if (Init && DeclareReductionRef.isUnset()) {
-      AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false);
-    } else if (!Init)
-      ActOnUninitializedDecl(RHSVD);
+    if (Init && DeclareReductionRef.isUnset())
+      S.AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false);
+    else if (!Init)
+      S.ActOnUninitializedDecl(RHSVD);
     if (RHSVD->isInvalidDecl())
       continue;
     if (!RHSVD->hasInit() && DeclareReductionRef.isUnset()) {
-      Diag(ELoc, diag::err_omp_reduction_id_not_compatible) << Type
-                                                            << ReductionIdRange;
-      bool IsDecl =
-          !VD ||
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(D->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+      S.Diag(ELoc, diag::err_omp_reduction_id_not_compatible)
+          << Type << ReductionIdRange;
+      bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) ==
+                               VarDecl::DeclarationOnly;
+      S.Diag(D->getLocation(),
+             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
           << D;
       continue;
     }
@@ -9308,16 +9411,16 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     // codegen.
     PrivateVD->setInit(RHSVD->getInit());
     PrivateVD->setInitStyle(RHSVD->getInitStyle());
-    auto *PrivateDRE = buildDeclRefExpr(*this, PrivateVD, PrivateTy, ELoc);
+    auto *PrivateDRE = buildDeclRefExpr(S, PrivateVD, PrivateTy, ELoc);
     ExprResult ReductionOp;
     if (DeclareReductionRef.isUsable()) {
       QualType RedTy = DeclareReductionRef.get()->getType();
       QualType PtrRedTy = Context.getPointerType(RedTy);
-      ExprResult LHS = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, LHSDRE);
-      ExprResult RHS = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, RHSDRE);
+      ExprResult LHS = S.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, LHSDRE);
+      ExprResult RHS = S.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, RHSDRE);
       if (!BasePath.empty()) {
-        LHS = DefaultLvalueConversion(LHS.get());
-        RHS = DefaultLvalueConversion(RHS.get());
+        LHS = S.DefaultLvalueConversion(LHS.get());
+        RHS = S.DefaultLvalueConversion(RHS.get());
         LHS = ImplicitCastExpr::Create(Context, PtrRedTy,
                                        CK_UncheckedDerivedToBase, LHS.get(),
                                        &BasePath, LHS.get()->getValueKind());
@@ -9330,27 +9433,27 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
       QualType FnTy = Context.getFunctionType(Context.VoidTy, Params, EPI);
       auto *OVE = new (Context) OpaqueValueExpr(
           ELoc, Context.getPointerType(FnTy), VK_RValue, OK_Ordinary,
-          DefaultLvalueConversion(DeclareReductionRef.get()).get());
+          S.DefaultLvalueConversion(DeclareReductionRef.get()).get());
       Expr *Args[] = {LHS.get(), RHS.get()};
       ReductionOp = new (Context)
           CallExpr(Context, OVE, Args, Context.VoidTy, VK_RValue, ELoc);
     } else {
-      ReductionOp = BuildBinOp(DSAStack->getCurScope(),
-                               ReductionId.getLocStart(), BOK, LHSDRE, RHSDRE);
+      ReductionOp = S.BuildBinOp(
+          Stack->getCurScope(), ReductionId.getLocStart(), BOK, LHSDRE, RHSDRE);
       if (ReductionOp.isUsable()) {
         if (BOK != BO_LT && BOK != BO_GT) {
           ReductionOp =
-              BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
-                         BO_Assign, LHSDRE, ReductionOp.get());
+              S.BuildBinOp(Stack->getCurScope(), ReductionId.getLocStart(),
+                           BO_Assign, LHSDRE, ReductionOp.get());
         } else {
           auto *ConditionalOp = new (Context) ConditionalOperator(
               ReductionOp.get(), SourceLocation(), LHSDRE, SourceLocation(),
               RHSDRE, Type, VK_LValue, OK_Ordinary);
           ReductionOp =
-              BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
-                         BO_Assign, LHSDRE, ConditionalOp);
+              S.BuildBinOp(Stack->getCurScope(), ReductionId.getLocStart(),
+                           BO_Assign, LHSDRE, ConditionalOp);
         }
-        ReductionOp = ActOnFinishFullExpr(ReductionOp.get());
+        ReductionOp = S.ActOnFinishFullExpr(ReductionOp.get());
       }
       if (ReductionOp.isInvalid())
         continue;
@@ -9358,48 +9461,86 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
 
     DeclRefExpr *Ref = nullptr;
     Expr *VarsExpr = RefExpr->IgnoreParens();
-    if (!VD && !CurContext->isDependentContext()) {
+    if (!VD && !S.CurContext->isDependentContext()) {
       if (ASE || OASE) {
-        TransformExprToCaptures RebuildToCapture(*this, D);
+        TransformExprToCaptures RebuildToCapture(S, D);
         VarsExpr =
             RebuildToCapture.TransformExpr(RefExpr->IgnoreParens()).get();
         Ref = RebuildToCapture.getCapturedExpr();
       } else {
-        VarsExpr = Ref =
-            buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false);
+        VarsExpr = Ref = buildCapture(S, D, SimpleRefExpr, /*WithInit=*/false);
       }
-      if (!IsOpenMPCapturedDecl(D)) {
-        ExprCaptures.push_back(Ref->getDecl());
+      if (!S.IsOpenMPCapturedDecl(D)) {
+        RD.ExprCaptures.emplace_back(Ref->getDecl());
         if (Ref->getDecl()->hasAttr()) {
-          ExprResult RefRes = DefaultLvalueConversion(Ref);
+          ExprResult RefRes = S.DefaultLvalueConversion(Ref);
           if (!RefRes.isUsable())
             continue;
           ExprResult PostUpdateRes =
-              BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign,
-                         SimpleRefExpr, RefRes.get());
+              S.BuildBinOp(Stack->getCurScope(), ELoc, BO_Assign, SimpleRefExpr,
+                           RefRes.get());
           if (!PostUpdateRes.isUsable())
             continue;
-          ExprPostUpdates.push_back(
-              IgnoredValueConversions(PostUpdateRes.get()).get());
+          if (isOpenMPTaskingDirective(Stack->getCurrentDirective()) ||
+              Stack->getCurrentDirective() == OMPD_taskgroup) {
+            S.Diag(RefExpr->getExprLoc(),
+                   diag::err_omp_reduction_non_addressable_expression)
+                << RefExpr->getSourceRange();
+            continue;
+          }
+          RD.ExprPostUpdates.emplace_back(
+              S.IgnoredValueConversions(PostUpdateRes.get()).get());
         }
       }
     }
-    DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref);
-    Vars.push_back(VarsExpr);
-    Privates.push_back(PrivateDRE);
-    LHSs.push_back(LHSDRE);
-    RHSs.push_back(RHSDRE);
-    ReductionOps.push_back(ReductionOp.get());
+    // All reduction items are still marked as reduction (to do not increase
+    // code base size).
+    Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref);
+    RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get());
   }
+  return RD.Vars.empty();
+}
 
-  if (Vars.empty())
+OMPClause *Sema::ActOnOpenMPReductionClause(
+    ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation ColonLoc, SourceLocation EndLoc,
+    CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId,
+    ArrayRef UnresolvedReductions) {
+  ReductionData RD(VarList.size());
+
+  if (ActOnOMPReductionKindClause(*this, DSAStack, OMPC_reduction, VarList,
+                                  StartLoc, LParenLoc, ColonLoc, EndLoc,
+                                  ReductionIdScopeSpec, ReductionId,
+                                  UnresolvedReductions, RD))
     return nullptr;
 
   return OMPReductionClause::Create(
-      Context, StartLoc, LParenLoc, ColonLoc, EndLoc, Vars,
-      ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, Privates,
-      LHSs, RHSs, ReductionOps, buildPreInits(Context, ExprCaptures),
-      buildPostUpdate(*this, ExprPostUpdates));
+      Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars,
+      ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId,
+      RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps,
+      buildPreInits(Context, RD.ExprCaptures),
+      buildPostUpdate(*this, RD.ExprPostUpdates));
+}
+
+OMPClause *Sema::ActOnOpenMPTaskReductionClause(
+    ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation ColonLoc, SourceLocation EndLoc,
+    CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId,
+    ArrayRef UnresolvedReductions) {
+  ReductionData RD(VarList.size());
+
+  if (ActOnOMPReductionKindClause(*this, DSAStack, OMPC_task_reduction,
+                                  VarList, StartLoc, LParenLoc, ColonLoc,
+                                  EndLoc, ReductionIdScopeSpec, ReductionId,
+                                  UnresolvedReductions, RD))
+    return nullptr;
+
+  return OMPTaskReductionClause::Create(
+      Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars,
+      ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId,
+      RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps,
+      buildPreInits(Context, RD.ExprCaptures),
+      buildPostUpdate(*this, RD.ExprPostUpdates));
 }
 
 bool Sema::CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaOverload.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaOverload.cpp
index 51794160278ca..36f24fd9c463a 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaOverload.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaOverload.cpp
@@ -48,13 +48,13 @@ static bool functionHasPassObjectSizeParams(const FunctionDecl *FD) {
 /// A convenience routine for creating a decayed reference to a function.
 static ExprResult
 CreateFunctionRefExpr(Sema &S, FunctionDecl *Fn, NamedDecl *FoundDecl,
-                      bool HadMultipleCandidates,
-                      SourceLocation Loc = SourceLocation(), 
+                      const Expr *Base, bool HadMultipleCandidates,
+                      SourceLocation Loc = SourceLocation(),
                       const DeclarationNameLoc &LocInfo = DeclarationNameLoc()){
   if (S.DiagnoseUseOfDecl(FoundDecl, Loc))
-    return ExprError(); 
+    return ExprError();
   // If FoundDecl is different from Fn (such as if one is a template
-  // and the other a specialization), make sure DiagnoseUseOfDecl is 
+  // and the other a specialization), make sure DiagnoseUseOfDecl is
   // called on both.
   // FIXME: This would be more comprehensively addressed by modifying
   // DiagnoseUseOfDecl to accept both the FoundDecl and the decl
@@ -68,7 +68,7 @@ CreateFunctionRefExpr(Sema &S, FunctionDecl *Fn, NamedDecl *FoundDecl,
   if (HadMultipleCandidates)
     DRE->setHadMultipleCandidates(true);
 
-  S.MarkDeclRefReferenced(DRE);
+  S.MarkDeclRefReferenced(DRE, Base);
   return S.ImpCastExprToType(DRE, S.Context.getPointerType(DRE->getType()),
                              CK_FunctionToPointerDecay);
 }
@@ -79,7 +79,7 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
                                  bool CStyle,
                                  bool AllowObjCWritebackConversion);
 
-static bool IsTransparentUnionStandardConversion(Sema &S, Expr* From, 
+static bool IsTransparentUnionStandardConversion(Sema &S, Expr* From,
                                                  QualType &ToType,
                                                  bool InOverloadResolution,
                                                  StandardConversionSequence &SCS,
@@ -330,13 +330,13 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx,
     } else if (FromType->isIntegralType(Ctx) && ToType->isRealFloatingType()) {
       llvm::APSInt IntConstantValue;
       const Expr *Initializer = IgnoreNarrowingConversion(Converted);
+      assert(Initializer && "Unknown conversion expression");
 
       // If it's value-dependent, we can't tell whether it's narrowing.
       if (Initializer->isValueDependent())
         return NK_Dependent_Narrowing;
 
-      if (Initializer &&
-          Initializer->isIntegerConstantExpr(IntConstantValue, Ctx)) {
+      if (Initializer->isIntegerConstantExpr(IntConstantValue, Ctx)) {
         // Convert the integer to the floating type.
         llvm::APFloat Result(Ctx.getFloatTypeSemantics(ToType));
         Result.convertFromAPInt(IntConstantValue, IntConstantValue.isSigned(),
@@ -852,7 +852,7 @@ namespace {
       Expr *Saved;
     };
     SmallVector Entries;
-    
+
   public:
     void save(Sema &S, Expr *&E) {
       assert(E->hasPlaceholderType(BuiltinType::ARCUnbridgedCast));
@@ -863,7 +863,7 @@ namespace {
 
     void restore() {
       for (SmallVectorImpl::iterator
-             i = Entries.begin(), e = Entries.end(); i != e; ++i) 
+             i = Entries.begin(), e = Entries.end(); i != e; ++i)
         *i->Addr = i->Saved;
     }
   };
@@ -1368,9 +1368,9 @@ Sema::TryImplicitConversion(Expr *From, QualType ToType,
                             bool InOverloadResolution,
                             bool CStyle,
                             bool AllowObjCWritebackConversion) {
-  return ::TryImplicitConversion(*this, From, ToType, 
+  return ::TryImplicitConversion(*this, From, ToType,
                                  SuppressUserConversions, AllowExplicit,
-                                 InOverloadResolution, CStyle, 
+                                 InOverloadResolution, CStyle,
                                  AllowObjCWritebackConversion,
                                  /*AllowObjCConversionOnExplicit=*/false);
 }
@@ -1396,7 +1396,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
 
   // Objective-C ARC: Determine whether we will allow the writeback conversion.
   bool AllowObjCWritebackConversion
-    = getLangOpts().ObjCAutoRefCount && 
+    = getLangOpts().ObjCAutoRefCount &&
       (Action == AA_Passing || Action == AA_Sending);
   if (getLangOpts().ObjC1)
     CheckObjCBridgeRelatedConversions(From->getLocStart(),
@@ -1592,15 +1592,15 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
         // if the function type matches except for [[noreturn]], it's ok
         if (!S.IsFunctionConversion(FromType,
               S.ExtractUnqualifiedFunctionType(ToType), resultTy))
-          // otherwise, only a boolean conversion is standard   
-          if (!ToType->isBooleanType()) 
-            return false; 
+          // otherwise, only a boolean conversion is standard
+          if (!ToType->isBooleanType())
+            return false;
       }
 
       // Check if the "from" expression is taking the address of an overloaded
       // function and recompute the FromType accordingly. Take advantage of the
       // fact that non-static member functions *must* have such an address-of
-      // expression. 
+      // expression.
       CXXMethodDecl *Method = dyn_cast(Fn);
       if (Method && !Method->isStatic()) {
         assert(isa(From->IgnoreParens()) &&
@@ -1638,7 +1638,7 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
     SCS.First = ICK_Lvalue_To_Rvalue;
 
     // C11 6.3.2.1p2:
-    //   ... if the lvalue has atomic type, the value has the non-atomic version 
+    //   ... if the lvalue has atomic type, the value has the non-atomic version
     //   of the type of the lvalue ...
     if (const AtomicType *Atomic = FromType->getAs())
       FromType = Atomic->getValueType();
@@ -1890,12 +1890,12 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
 }
 
 static bool
-IsTransparentUnionStandardConversion(Sema &S, Expr* From, 
+IsTransparentUnionStandardConversion(Sema &S, Expr* From,
                                      QualType &ToType,
                                      bool InOverloadResolution,
                                      StandardConversionSequence &SCS,
                                      bool CStyle) {
-    
+
   const RecordType *UT = ToType->getAsUnionType();
   if (!UT || !UT->getDecl()->hasAttr())
     return false;
@@ -2129,7 +2129,7 @@ BuildSimilarlyQualifiedPointerType(const Type *FromPtr,
          "Invalid similarly-qualified pointer type");
 
   /// Conversions to 'id' subsume cv-qualifier conversions.
-  if (ToType->isObjCIdType() || ToType->isObjCQualifiedIdType()) 
+  if (ToType->isObjCIdType() || ToType->isObjCQualifiedIdType())
     return ToType.getUnqualifiedType();
 
   QualType CanonFromPointee
@@ -2139,7 +2139,7 @@ BuildSimilarlyQualifiedPointerType(const Type *FromPtr,
 
   if (StripObjCLifetime)
     Quals.removeObjCLifetime();
-  
+
   // Exact qualifier match -> return the pointer type we're converting to.
   if (CanonToPointee.getLocalQualifiers() == Quals) {
     // ToType is exactly what we need. Return it.
@@ -2323,21 +2323,21 @@ bool Sema::IsPointerConversion(Expr *From, QualType FromType, QualType ToType,
                                                        ToType, Context);
     return true;
   }
-  
+
   return false;
 }
- 
+
 /// \brief Adopt the given qualifiers for the given type.
 static QualType AdoptQualifiers(ASTContext &Context, QualType T, Qualifiers Qs){
   Qualifiers TQs = T.getQualifiers();
-  
+
   // Check whether qualifiers already match.
   if (TQs == Qs)
     return T;
-  
+
   if (Qs.compatiblyIncludes(TQs))
     return Context.getQualifiedType(T, Qs);
-  
+
   return Context.getQualifiedType(T.getUnqualifiedType(), Qs);
 }
 
@@ -2352,7 +2352,7 @@ bool Sema::isObjCPointerConversion(QualType FromType, QualType ToType,
 
   // The set of qualifiers on the type we're converting from.
   Qualifiers FromQualifiers = FromType.getQualifiers();
-  
+
   // First, we handle all conversions on ObjC object pointer types.
   const ObjCObjectPointerType* ToObjCPtr =
     ToType->getAs();
@@ -2443,7 +2443,7 @@ bool Sema::isObjCPointerConversion(QualType FromType, QualType ToType,
       ToPointeeType->getAs() &&
       isObjCPointerConversion(FromPointeeType, ToPointeeType, ConvertedType,
                               IncompatibleObjC)) {
-        
+
     ConvertedType = Context.getPointerType(ConvertedType);
     ConvertedType = AdoptQualifiers(Context, ConvertedType, FromQualifiers);
     return true;
@@ -2526,46 +2526,46 @@ bool Sema::isObjCPointerConversion(QualType FromType, QualType ToType,
 /// this conversion.
 bool Sema::isObjCWritebackConversion(QualType FromType, QualType ToType,
                                      QualType &ConvertedType) {
-  if (!getLangOpts().ObjCAutoRefCount || 
+  if (!getLangOpts().ObjCAutoRefCount ||
       Context.hasSameUnqualifiedType(FromType, ToType))
     return false;
-  
+
   // Parameter must be a pointer to __autoreleasing (with no other qualifiers).
   QualType ToPointee;
   if (const PointerType *ToPointer = ToType->getAs())
     ToPointee = ToPointer->getPointeeType();
   else
     return false;
-  
+
   Qualifiers ToQuals = ToPointee.getQualifiers();
-  if (!ToPointee->isObjCLifetimeType() || 
+  if (!ToPointee->isObjCLifetimeType() ||
       ToQuals.getObjCLifetime() != Qualifiers::OCL_Autoreleasing ||
       !ToQuals.withoutObjCLifetime().empty())
     return false;
-  
+
   // Argument must be a pointer to __strong to __weak.
   QualType FromPointee;
   if (const PointerType *FromPointer = FromType->getAs())
     FromPointee = FromPointer->getPointeeType();
   else
     return false;
-  
+
   Qualifiers FromQuals = FromPointee.getQualifiers();
   if (!FromPointee->isObjCLifetimeType() ||
       (FromQuals.getObjCLifetime() != Qualifiers::OCL_Strong &&
        FromQuals.getObjCLifetime() != Qualifiers::OCL_Weak))
     return false;
-  
+
   // Make sure that we have compatible qualifiers.
   FromQuals.setObjCLifetime(Qualifiers::OCL_Autoreleasing);
   if (!ToQuals.compatiblyIncludes(FromQuals))
     return false;
-  
+
   // Remove qualifiers from the pointee type we're converting from; they
   // aren't used in the compatibility check belong, and we'll be adding back
   // qualifiers (with __autoreleasing) if the compatibility check succeeds.
   FromPointee = FromPointee.getUnqualifiedType();
-  
+
   // The unqualified form of the pointee types must be compatible.
   ToPointee = ToPointee.getUnqualifiedType();
   bool IncompatibleObjC;
@@ -2574,7 +2574,7 @@ bool Sema::isObjCWritebackConversion(QualType FromType, QualType ToType,
   else if (!isObjCPointerConversion(FromPointee, ToPointee, FromPointee,
                                     IncompatibleObjC))
     return false;
-  
+
   /// \brief Construct the type we're converting to, which is a pointer to
   /// __autoreleasing pointee.
   FromPointee = Context.getQualifiedType(FromPointee, FromQuals);
@@ -2590,7 +2590,7 @@ bool Sema::IsBlockPointerConversion(QualType FromType, QualType ToType,
     ToPointeeType = ToBlockPtr->getPointeeType();
   else
     return false;
-  
+
   QualType FromPointeeType;
   if (const BlockPointerType *FromBlockPtr =
       FromType->getAs())
@@ -2600,24 +2600,24 @@ bool Sema::IsBlockPointerConversion(QualType FromType, QualType ToType,
   // We have pointer to blocks, check whether the only
   // differences in the argument and result types are in Objective-C
   // pointer conversions. If so, we permit the conversion.
-  
+
   const FunctionProtoType *FromFunctionType
     = FromPointeeType->getAs();
   const FunctionProtoType *ToFunctionType
     = ToPointeeType->getAs();
-  
+
   if (!FromFunctionType || !ToFunctionType)
     return false;
 
   if (Context.hasSameType(FromPointeeType, ToPointeeType))
     return true;
-    
+
   // Perform the quick checks that will tell us whether these
   // function types are obviously different.
   if (FromFunctionType->getNumParams() != ToFunctionType->getNumParams() ||
       FromFunctionType->isVariadic() != ToFunctionType->isVariadic())
     return false;
-    
+
   FunctionType::ExtInfo FromEInfo = FromFunctionType->getExtInfo();
   FunctionType::ExtInfo ToEInfo = ToFunctionType->getExtInfo();
   if (FromEInfo != ToEInfo)
@@ -2645,7 +2645,7 @@ bool Sema::IsBlockPointerConversion(QualType FromType, QualType ToType,
      else
        return false;
    }
-    
+
    // Check argument types.
    for (unsigned ArgIdx = 0, NumArgs = FromFunctionType->getNumParams();
         ArgIdx != NumArgs; ++ArgIdx) {
@@ -2666,7 +2666,7 @@ bool Sema::IsBlockPointerConversion(QualType FromType, QualType ToType,
    if (!Context.doFunctionTypesMatchOnExtParameterInfos(FromFunctionType,
                                                         ToFunctionType))
      return false;
-   
+
    ConvertedType = ToType;
    return true;
 }
@@ -3012,7 +3012,7 @@ bool Sema::CheckMemberPointerConversion(Expr *From, QualType ToType,
 static bool isNonTrivialObjCLifetimeConversion(Qualifiers FromQuals,
                                                Qualifiers ToQuals) {
   // Converting anything to const __unsafe_unretained is trivial.
-  if (ToQuals.hasConst() && 
+  if (ToQuals.hasConst() &&
       ToQuals.getObjCLifetime() == Qualifiers::OCL_ExplicitNone)
     return false;
 
@@ -3032,7 +3032,7 @@ Sema::IsQualificationConversion(QualType FromType, QualType ToType,
   FromType = Context.getCanonicalType(FromType);
   ToType = Context.getCanonicalType(ToType);
   ObjCLifetimeConversion = false;
-  
+
   // If FromType and ToType are the same type, this is not a
   // qualification conversion.
   if (FromType.getUnqualifiedType() == ToType.getUnqualifiedType())
@@ -3058,7 +3058,7 @@ Sema::IsQualificationConversion(QualType FromType, QualType ToType,
     // Ignore __unaligned qualifier if this type is void.
     if (ToType.getUnqualifiedType()->isVoidType())
       FromQuals.removeUnaligned();
-    
+
     // Objective-C ARC:
     //   Check Objective-C lifetime conversions.
     if (FromQuals.getObjCLifetime() != ToQuals.getObjCLifetime() &&
@@ -3074,14 +3074,14 @@ Sema::IsQualificationConversion(QualType FromType, QualType ToType,
         return false;
       }
     }
-    
+
     // Allow addition/removal of GC attributes but not changing GC attributes.
     if (FromQuals.getObjCGCAttr() != ToQuals.getObjCGCAttr() &&
         (!FromQuals.hasObjCGCAttr() || !ToQuals.hasObjCGCAttr())) {
       FromQuals.removeObjCGCAttr();
       ToQuals.removeObjCGCAttr();
     }
-    
+
     //   -- for every j > 0, if const is in cv 1,j then const is in cv
     //      2,j, and similarly for volatile.
     if (!CStyle && !ToQuals.compatiblyIncludes(FromQuals))
@@ -3119,13 +3119,13 @@ static bool tryAtomicConversion(Sema &S, Expr *From, QualType ToType,
   const AtomicType *ToAtomic = ToType->getAs();
   if (!ToAtomic)
     return false;
-  
+
   StandardConversionSequence InnerSCS;
-  if (!IsStandardConversion(S, From, ToAtomic->getValueType(), 
+  if (!IsStandardConversion(S, From, ToAtomic->getValueType(),
                             InOverloadResolution, InnerSCS,
                             CStyle, /*AllowObjCWritebackConversion=*/false))
     return false;
-  
+
   SCS.Second = InnerSCS.Second;
   SCS.setToType(1, InnerSCS.getToType(1));
   SCS.Third = InnerSCS.Third;
@@ -3180,8 +3180,8 @@ IsInitializerListConstructorConversion(Sema &S, Expr *From, QualType ToType,
   bool HadMultipleCandidates = (CandidateSet.size() > 1);
 
   OverloadCandidateSet::iterator Best;
-  switch (auto Result = 
-            CandidateSet.BestViableFunction(S, From->getLocStart(), 
+  switch (auto Result =
+            CandidateSet.BestViableFunction(S, From->getLocStart(),
                                             Best, true)) {
   case OR_Deleted:
   case OR_Success: {
@@ -3552,7 +3552,7 @@ CompareImplicitConversionSequences(Sema &S, SourceLocation Loc,
   // Two implicit conversion sequences of the same form are
   // indistinguishable conversion sequences unless one of the
   // following rules apply: (C++ 13.3.3.2p3):
-  
+
   // List-initialization sequence L1 is a better conversion sequence than
   // list-initialization sequence L2 if:
   // - L1 converts to std::initializer_list for some X and L2 does not, or,
@@ -3587,7 +3587,7 @@ CompareImplicitConversionSequences(Sema &S, SourceLocation Loc,
                                                   ICS1.UserDefined.After,
                                                   ICS2.UserDefined.After);
     else
-      Result = compareConversionFunctions(S, 
+      Result = compareConversionFunctions(S,
                                           ICS1.UserDefined.ConversionFunction,
                                           ICS2.UserDefined.ConversionFunction);
   }
@@ -3769,9 +3769,9 @@ CompareStandardConversionSequences(Sema &S, SourceLocation Loc,
     const ObjCObjectPointerType* FromObjCPtr2
       = FromType2->getAs();
     if (FromObjCPtr1 && FromObjCPtr2) {
-      bool AssignLeft = S.Context.canAssignObjCInterfaces(FromObjCPtr1, 
+      bool AssignLeft = S.Context.canAssignObjCInterfaces(FromObjCPtr1,
                                                           FromObjCPtr2);
-      bool AssignRight = S.Context.canAssignObjCInterfaces(FromObjCPtr2, 
+      bool AssignRight = S.Context.canAssignObjCInterfaces(FromObjCPtr2,
                                                            FromObjCPtr1);
       if (AssignLeft != AssignRight) {
         return AssignLeft? ImplicitConversionSequence::Better
@@ -3809,13 +3809,13 @@ CompareStandardConversionSequences(Sema &S, SourceLocation Loc,
     if (UnqualT1 == UnqualT2) {
       // Objective-C++ ARC: If the references refer to objects with different
       // lifetimes, prefer bindings that don't change lifetime.
-      if (SCS1.ObjCLifetimeConversionBinding != 
+      if (SCS1.ObjCLifetimeConversionBinding !=
                                           SCS2.ObjCLifetimeConversionBinding) {
         return SCS1.ObjCLifetimeConversionBinding
                                            ? ImplicitConversionSequence::Worse
                                            : ImplicitConversionSequence::Better;
       }
-      
+
       // If the type is an array type, promote the element qualifiers to the
       // type for comparison.
       if (isa(T1) && T1Quals)
@@ -3825,7 +3825,7 @@ CompareStandardConversionSequences(Sema &S, SourceLocation Loc,
       if (T2.isMoreQualifiedThan(T1))
         return ImplicitConversionSequence::Better;
       else if (T1.isMoreQualifiedThan(T2))
-        return ImplicitConversionSequence::Worse;      
+        return ImplicitConversionSequence::Worse;
     }
   }
 
@@ -3891,17 +3891,17 @@ CompareQualificationConversions(Sema &S,
 
   ImplicitConversionSequence::CompareKind Result
     = ImplicitConversionSequence::Indistinguishable;
-  
+
   // Objective-C++ ARC:
   //   Prefer qualification conversions not involving a change in lifetime
   //   to qualification conversions that do not change lifetime.
-  if (SCS1.QualificationIncludesObjCLifetime != 
+  if (SCS1.QualificationIncludesObjCLifetime !=
                                       SCS2.QualificationIncludesObjCLifetime) {
     Result = SCS1.QualificationIncludesObjCLifetime
                ? ImplicitConversionSequence::Worse
                : ImplicitConversionSequence::Better;
   }
-  
+
   while (S.Context.UnwrapSimilarPointerTypes(T1, T2)) {
     // Within each iteration of the loop, we check the qualifiers to
     // determine if this still looks like a qualification
@@ -4033,7 +4033,7 @@ CompareDerivedToBaseConversions(Sema &S, SourceLocation Loc,
       = ToType1->getAs();
     const ObjCObjectPointerType *ToPtr2
       = ToType2->getAs();
-    
+
     if (FromPtr1 && FromPtr2 && ToPtr1 && ToPtr2) {
       // Apply the same conversion ranking rules for Objective-C pointer types
       // that we do for C++ pointers to class types. However, we employ the
@@ -4048,7 +4048,7 @@ CompareDerivedToBaseConversions(Sema &S, SourceLocation Loc,
       bool ToAssignRight
         = S.Context.canAssignObjCInterfaces(ToPtr2, ToPtr1);
 
-      // A conversion to an a non-id object pointer type or qualified 'id' 
+      // A conversion to an a non-id object pointer type or qualified 'id'
       // type is better than a conversion to 'id'.
       if (ToPtr1->isObjCIdType() &&
           (ToPtr2->isObjCQualifiedIdType() || ToPtr2->getInterfaceDecl()))
@@ -4056,15 +4056,15 @@ CompareDerivedToBaseConversions(Sema &S, SourceLocation Loc,
       if (ToPtr2->isObjCIdType() &&
           (ToPtr1->isObjCQualifiedIdType() || ToPtr1->getInterfaceDecl()))
         return ImplicitConversionSequence::Better;
-      
-      // A conversion to a non-id object pointer type is better than a 
-      // conversion to a qualified 'id' type 
+
+      // A conversion to a non-id object pointer type is better than a
+      // conversion to a qualified 'id' type
       if (ToPtr1->isObjCQualifiedIdType() && ToPtr2->getInterfaceDecl())
         return ImplicitConversionSequence::Worse;
       if (ToPtr2->isObjCQualifiedIdType() && ToPtr1->getInterfaceDecl())
         return ImplicitConversionSequence::Better;
-  
-      // A conversion to an a non-Class object pointer type or qualified 'Class' 
+
+      // A conversion to an a non-Class object pointer type or qualified 'Class'
       // type is better than a conversion to 'Class'.
       if (ToPtr1->isObjCClassType() &&
           (ToPtr2->isObjCQualifiedClassType() || ToPtr2->getInterfaceDecl()))
@@ -4072,8 +4072,8 @@ CompareDerivedToBaseConversions(Sema &S, SourceLocation Loc,
       if (ToPtr2->isObjCClassType() &&
           (ToPtr1->isObjCQualifiedClassType() || ToPtr1->getInterfaceDecl()))
         return ImplicitConversionSequence::Better;
-      
-      // A conversion to a non-Class object pointer type is better than a 
+
+      // A conversion to a non-Class object pointer type is better than a
       // conversion to a qualified 'Class' type.
       if (ToPtr1->isObjCQualifiedClassType() && ToPtr2->getInterfaceDecl())
         return ImplicitConversionSequence::Worse;
@@ -4108,7 +4108,7 @@ CompareDerivedToBaseConversions(Sema &S, SourceLocation Loc,
         : ImplicitConversionSequence::Worse;
     }
   }
-  
+
   // Ranking of member-pointer types.
   if (SCS1.Second == ICK_Pointer_Member && SCS2.Second == ICK_Pointer_Member &&
       FromType1->isMemberPointerType() && FromType2->isMemberPointerType() &&
@@ -4264,9 +4264,9 @@ Sema::CompareReferenceRelationship(SourceLocation Loc,
       ObjCLifetimeConversion = true;
 
     T1Quals.removeObjCLifetime();
-    T2Quals.removeObjCLifetime();    
+    T2Quals.removeObjCLifetime();
   }
-    
+
   // MS compiler ignores __unaligned qualifier for references; do the same.
   T1Quals.removeUnaligned();
   T2Quals.removeUnaligned();
@@ -4313,7 +4313,7 @@ FindConversionForRefInit(Sema &S, ImplicitConversionSequence &ICS,
       bool DerivedToBase = false;
       bool ObjCConversion = false;
       bool ObjCLifetimeConversion = false;
-      
+
       // If we are initializing an rvalue reference, don't permit conversion
       // functions that return lvalues.
       if (!ConvTemplate && DeclType->isRValueReferenceType()) {
@@ -4322,7 +4322,7 @@ FindConversionForRefInit(Sema &S, ImplicitConversionSequence &ICS,
         if (RefType && !RefType->getPointeeType()->isFunctionType())
           continue;
       }
-      
+
       if (!ConvTemplate &&
           S.CompareReferenceRelationship(
             DeclLoc,
@@ -6051,24 +6051,24 @@ Sema::SelectBestMethod(Selector Sel, MultiExprArg Args, bool IsInstance,
       NumNamedArgs = Method->param_size();
     if (Args.size() < NumNamedArgs)
       continue;
-            
+
     for (unsigned i = 0; i < NumNamedArgs; i++) {
       // We can't do any type-checking on a type-dependent argument.
       if (Args[i]->isTypeDependent()) {
         Match = false;
         break;
       }
-        
+
       ParmVarDecl *param = Method->parameters()[i];
       Expr *argExpr = Args[i];
       assert(argExpr && "SelectBestMethod(): missing expression");
-                
+
       // Strip the unbridged-cast placeholder expression off unless it's
       // a consumed argument.
       if (argExpr->hasPlaceholderType(BuiltinType::ARCUnbridgedCast) &&
           !param->hasAttr())
         argExpr = stripARCUnbridgedCast(argExpr);
-                
+
       // If the parameter is __unknown_anytype, move on to the next method.
       if (param->getType() == Context.UnknownAnyTy) {
         Match = false;
@@ -6242,11 +6242,11 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef Args,
 }
 
 template 
-static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const FunctionDecl *FD,
+static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const NamedDecl *ND,
                                         bool ArgDependent, SourceLocation Loc,
                                         CheckFn &&IsSuccessful) {
   SmallVector Attrs;
-  for (const auto *DIA : FD->specific_attrs()) {
+  for (const auto *DIA : ND->specific_attrs()) {
     if (ArgDependent == DIA->getArgDependent())
       Attrs.push_back(DIA);
   }
@@ -6293,16 +6293,16 @@ bool Sema::diagnoseArgDependentDiagnoseIfAttrs(const FunctionDecl *Function,
         // EvaluateWithSubstitution only cares about the position of each
         // argument in the arg list, not the ParmVarDecl* it maps to.
         if (!DIA->getCond()->EvaluateWithSubstitution(
-                Result, Context, DIA->getParent(), Args, ThisArg))
+                Result, Context, cast(DIA->getParent()), Args, ThisArg))
           return false;
         return Result.isInt() && Result.getInt().getBoolValue();
       });
 }
 
-bool Sema::diagnoseArgIndependentDiagnoseIfAttrs(const FunctionDecl *Function,
+bool Sema::diagnoseArgIndependentDiagnoseIfAttrs(const NamedDecl *ND,
                                                  SourceLocation Loc) {
   return diagnoseDiagnoseIfAttrsWith(
-      *this, Function, /*ArgDependent=*/false, Loc,
+      *this, ND, /*ArgDependent=*/false, Loc,
       [&](const DiagnoseIfAttr *DIA) {
         bool Result;
         return DIA->getCond()->EvaluateAsBooleanCondition(Result, Context) &&
@@ -6754,7 +6754,7 @@ static bool isAllowableExplicitConversion(Sema &S,
   return S.isObjCPointerConversion(ConvType, ToNonRefType, ConvertedType,
                                    IncompatibleObjC);
 }
-                                          
+
 /// AddConversionCandidate - Add a C++ conversion function as a
 /// candidate in the candidate set (C++ [over.match.conv],
 /// C++ [over.match.copy]). From is the expression we're converting from,
@@ -6785,8 +6785,8 @@ Sema::AddConversionCandidate(CXXConversionDecl *Conversion,
   // Per C++ [over.match.conv]p1, [over.match.ref]p1, an explicit conversion
   // operator is only a candidate if its return type is the target type or
   // can be converted to the target type with a qualification conversion.
-  if (Conversion->isExplicit() && 
-      !isAllowableExplicitConversion(*this, ConvType, ToType, 
+  if (Conversion->isExplicit() &&
+      !isAllowableExplicitConversion(*this, ConvType, ToType,
                                      AllowObjCConversionOnExplicit))
     return;
 
@@ -7136,8 +7136,7 @@ void Sema::AddMemberOperatorCandidates(OverloadedOperatorKind Op,
 /// operator. NumContextualBoolArguments is the number of arguments
 /// (at the beginning of the argument list) that will be contextually
 /// converted to bool.
-void Sema::AddBuiltinCandidate(QualType ResultTy, QualType *ParamTys,
-                               ArrayRef Args,
+void Sema::AddBuiltinCandidate(QualType *ParamTys, ArrayRef Args,
                                OverloadCandidateSet& CandidateSet,
                                bool IsAssignmentOperator,
                                unsigned NumContextualBoolArguments) {
@@ -7151,9 +7150,7 @@ void Sema::AddBuiltinCandidate(QualType ResultTy, QualType *ParamTys,
   Candidate.Function = nullptr;
   Candidate.IsSurrogate = false;
   Candidate.IgnoreObjectArgument = false;
-  Candidate.BuiltinTypes.ResultTy = ResultTy;
-  for (unsigned ArgIdx = 0, N = Args.size(); ArgIdx != N; ++ArgIdx)
-    Candidate.BuiltinTypes.ParamTypes[ArgIdx] = ParamTys[ArgIdx];
+  std::copy(ParamTys, ParamTys + Args.size(), Candidate.BuiltinParamTypes);
 
   // Determine the implicit conversion sequences for each of the
   // arguments.
@@ -7230,7 +7227,7 @@ class BuiltinCandidateTypeSet  {
   /// \brief A flag indicating whether the nullptr type was present in the
   /// candidate set.
   bool HasNullPtrType;
-  
+
   /// Sema - The semantic analysis instance where we are building the
   /// candidate type set.
   Sema &SemaRef;
@@ -7314,14 +7311,14 @@ BuiltinCandidateTypeSet::AddPointerWithMoreQualifiedTypeVariants(QualType Ty,
   } else {
     PointeeTy = PointerTy->getPointeeType();
   }
-  
+
   // Don't add qualified variants of arrays. For one, they're not allowed
   // (the qualifier would sink to the element type), and for another, the
   // only overload situation where it matters is subscript or pointer +- int,
   // and those shouldn't have qualifier variants anyway.
   if (PointeeTy->isArrayType())
     return true;
-  
+
   unsigned BaseCVR = PointeeTy.getCVRQualifiers();
   bool hasVolatile = VisibleQuals.hasVolatile();
   bool hasRestrict = VisibleQuals.hasRestrict();
@@ -7331,24 +7328,24 @@ BuiltinCandidateTypeSet::AddPointerWithMoreQualifiedTypeVariants(QualType Ty,
     if ((CVR | BaseCVR) != CVR) continue;
     // Skip over volatile if no volatile found anywhere in the types.
     if ((CVR & Qualifiers::Volatile) && !hasVolatile) continue;
-    
+
     // Skip over restrict if no restrict found anywhere in the types, or if
     // the type cannot be restrict-qualified.
     if ((CVR & Qualifiers::Restrict) &&
         (!hasRestrict ||
          (!(PointeeTy->isAnyPointerType() || PointeeTy->isReferenceType()))))
       continue;
-  
+
     // Build qualified pointee type.
     QualType QPointeeTy = Context.getCVRQualifiedType(PointeeTy, CVR);
-    
+
     // Build qualified pointer type.
     QualType QPointerTy;
     if (!buildObjCPtr)
       QPointerTy = Context.getPointerType(QPointeeTy);
     else
       QPointerTy = Context.getObjCObjectPointerType(QPointeeTy);
-    
+
     // Insert qualified pointer type.
     PointerTypes.insert(QPointerTy);
   }
@@ -7492,7 +7489,7 @@ static void AddBuiltinAssignmentOperatorCandidates(Sema &S,
   // T& operator=(T&, T)
   ParamTypes[0] = S.Context.getLValueReferenceType(T);
   ParamTypes[1] = T;
-  S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+  S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                         /*IsAssignmentOperator=*/true);
 
   if (!S.Context.getCanonicalType(T).isVolatileQualified()) {
@@ -7500,7 +7497,7 @@ static void AddBuiltinAssignmentOperatorCandidates(Sema &S,
     ParamTypes[0]
       = S.Context.getLValueReferenceType(S.Context.getVolatileType(T));
     ParamTypes[1] = T;
-    S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+    S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                           /*IsAssignmentOperator=*/true);
   }
 }
@@ -7620,64 +7617,6 @@ class BuiltinOperatorOverloadBuilder {
     return S.Context.*ArithmeticTypes[index];
   }
 
-  /// \brief Gets the canonical type resulting from the usual arithemetic
-  /// converions for the given arithmetic types.
-  CanQualType getUsualArithmeticConversions(unsigned L, unsigned R) {
-    // Accelerator table for performing the usual arithmetic conversions.
-    // The rules are basically:
-    //   - if either is floating-point, use the wider floating-point
-    //   - if same signedness, use the higher rank
-    //   - if same size, use unsigned of the higher rank
-    //   - use the larger type
-    // These rules, together with the axiom that higher ranks are
-    // never smaller, are sufficient to precompute all of these results
-    // *except* when dealing with signed types of higher rank.
-    // (we could precompute SLL x UI for all known platforms, but it's
-    // better not to make any assumptions).
-    // We assume that int128 has a higher rank than long long on all platforms.
-    enum PromotedType : int8_t {
-            Dep=-1,
-            Flt,  Dbl, LDbl,   SI,   SL,  SLL, S128,   UI,   UL,  ULL, U128
-    };
-    static const PromotedType ConversionsTable[LastPromotedArithmeticType]
-                                        [LastPromotedArithmeticType] = {
-/* Flt*/ {  Flt,  Dbl, LDbl,  Flt,  Flt,  Flt,  Flt,  Flt,  Flt,  Flt,  Flt },
-/* Dbl*/ {  Dbl,  Dbl, LDbl,  Dbl,  Dbl,  Dbl,  Dbl,  Dbl,  Dbl,  Dbl,  Dbl },
-/*LDbl*/ { LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl },
-/*  SI*/ {  Flt,  Dbl, LDbl,   SI,   SL,  SLL, S128,   UI,   UL,  ULL, U128 },
-/*  SL*/ {  Flt,  Dbl, LDbl,   SL,   SL,  SLL, S128,  Dep,   UL,  ULL, U128 },
-/* SLL*/ {  Flt,  Dbl, LDbl,  SLL,  SLL,  SLL, S128,  Dep,  Dep,  ULL, U128 },
-/*S128*/ {  Flt,  Dbl, LDbl, S128, S128, S128, S128, S128, S128, S128, U128 },
-/*  UI*/ {  Flt,  Dbl, LDbl,   UI,  Dep,  Dep, S128,   UI,   UL,  ULL, U128 },
-/*  UL*/ {  Flt,  Dbl, LDbl,   UL,   UL,  Dep, S128,   UL,   UL,  ULL, U128 },
-/* ULL*/ {  Flt,  Dbl, LDbl,  ULL,  ULL,  ULL, S128,  ULL,  ULL,  ULL, U128 },
-/*U128*/ {  Flt,  Dbl, LDbl, U128, U128, U128, U128, U128, U128, U128, U128 },
-    };
-
-    assert(L < LastPromotedArithmeticType);
-    assert(R < LastPromotedArithmeticType);
-    int Idx = ConversionsTable[L][R];
-
-    // Fast path: the table gives us a concrete answer.
-    if (Idx != Dep) return getArithmeticType(Idx);
-
-    // Slow path: we need to compare widths.
-    // An invariant is that the signed type has higher rank.
-    CanQualType LT = getArithmeticType(L),
-                RT = getArithmeticType(R);
-    unsigned LW = S.Context.getIntWidth(LT),
-             RW = S.Context.getIntWidth(RT);
-
-    // If they're different widths, use the signed type.
-    if (LW > RW) return LT;
-    else if (LW < RW) return RT;
-
-    // Otherwise, use the unsigned type of the signed type's rank.
-    if (L == SL || R == SL) return S.Context.UnsignedLongTy;
-    assert(L == SLL || R == SLL);
-    return S.Context.UnsignedLongLongTy;
-  }
-
   /// \brief Helper method to factor out the common pattern of adding overloads
   /// for '++' and '--' builtin operators.
   void addPlusPlusMinusMinusStyleOverloads(QualType CandidateTy,
@@ -7689,10 +7628,7 @@ class BuiltinOperatorOverloadBuilder {
     };
 
     // Non-volatile version.
-    if (Args.size() == 1)
-      S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet);
-    else
-      S.AddBuiltinCandidate(CandidateTy, ParamTypes, Args, CandidateSet);
+    S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
 
     // Use a heuristic to reduce number of builtin candidates in the set:
     // add volatile version only if there are conversions to a volatile type.
@@ -7700,12 +7636,9 @@ class BuiltinOperatorOverloadBuilder {
       ParamTypes[0] =
         S.Context.getLValueReferenceType(
           S.Context.getVolatileType(CandidateTy));
-      if (Args.size() == 1)
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet);
-      else
-        S.AddBuiltinCandidate(CandidateTy, ParamTypes, Args, CandidateSet);
+      S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
     }
-    
+
     // Add restrict version only if there are conversions to a restrict type
     // and our candidate type is a non-restrict-qualified pointer.
     if (HasRestrict && CandidateTy->isAnyPointerType() &&
@@ -7713,21 +7646,15 @@ class BuiltinOperatorOverloadBuilder {
       ParamTypes[0]
         = S.Context.getLValueReferenceType(
             S.Context.getCVRQualifiedType(CandidateTy, Qualifiers::Restrict));
-      if (Args.size() == 1)
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet);
-      else
-        S.AddBuiltinCandidate(CandidateTy, ParamTypes, Args, CandidateSet);
-      
+      S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
+
       if (HasVolatile) {
         ParamTypes[0]
           = S.Context.getLValueReferenceType(
               S.Context.getCVRQualifiedType(CandidateTy,
                                             (Qualifiers::Volatile |
                                              Qualifiers::Restrict)));
-        if (Args.size() == 1)
-          S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet);
-        else
-          S.AddBuiltinCandidate(CandidateTy, ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
       }
     }
 
@@ -7841,8 +7768,7 @@ class BuiltinOperatorOverloadBuilder {
         if (Proto->getTypeQuals() || Proto->getRefQualifier())
           continue;
 
-      S.AddBuiltinCandidate(S.Context.getLValueReferenceType(PointeeTy),
-                            &ParamTy, Args, CandidateSet);
+      S.AddBuiltinCandidate(&ParamTy, Args, CandidateSet);
     }
   }
 
@@ -7859,7 +7785,7 @@ class BuiltinOperatorOverloadBuilder {
     for (unsigned Arith = FirstPromotedArithmeticType;
          Arith < LastPromotedArithmeticType; ++Arith) {
       QualType ArithTy = getArithmeticType(Arith);
-      S.AddBuiltinCandidate(ArithTy, &ArithTy, Args, CandidateSet);
+      S.AddBuiltinCandidate(&ArithTy, Args, CandidateSet);
     }
 
     // Extension: We also add these operators for vector types.
@@ -7868,7 +7794,7 @@ class BuiltinOperatorOverloadBuilder {
            VecEnd = CandidateTypes[0].vector_end();
          Vec != VecEnd; ++Vec) {
       QualType VecTy = *Vec;
-      S.AddBuiltinCandidate(VecTy, &VecTy, Args, CandidateSet);
+      S.AddBuiltinCandidate(&VecTy, Args, CandidateSet);
     }
   }
 
@@ -7883,7 +7809,7 @@ class BuiltinOperatorOverloadBuilder {
            PtrEnd = CandidateTypes[0].pointer_end();
          Ptr != PtrEnd; ++Ptr) {
       QualType ParamTy = *Ptr;
-      S.AddBuiltinCandidate(ParamTy, &ParamTy, Args, CandidateSet);
+      S.AddBuiltinCandidate(&ParamTy, Args, CandidateSet);
     }
   }
 
@@ -7899,7 +7825,7 @@ class BuiltinOperatorOverloadBuilder {
     for (unsigned Int = FirstPromotedIntegralType;
          Int < LastPromotedIntegralType; ++Int) {
       QualType IntTy = getArithmeticType(Int);
-      S.AddBuiltinCandidate(IntTy, &IntTy, Args, CandidateSet);
+      S.AddBuiltinCandidate(&IntTy, Args, CandidateSet);
     }
 
     // Extension: We also add this operator for vector types.
@@ -7908,7 +7834,7 @@ class BuiltinOperatorOverloadBuilder {
            VecEnd = CandidateTypes[0].vector_end();
          Vec != VecEnd; ++Vec) {
       QualType VecTy = *Vec;
-      S.AddBuiltinCandidate(VecTy, &VecTy, Args, CandidateSet);
+      S.AddBuiltinCandidate(&VecTy, Args, CandidateSet);
     }
   }
 
@@ -7933,15 +7859,14 @@ class BuiltinOperatorOverloadBuilder {
           continue;
 
         QualType ParamTypes[2] = { *MemPtr, *MemPtr };
-        S.AddBuiltinCandidate(S.Context.BoolTy, ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
       }
 
       if (CandidateTypes[ArgIdx].hasNullPtrType()) {
         CanQualType NullPtrTy = S.Context.getCanonicalType(S.Context.NullPtrTy);
         if (AddedTypes.insert(NullPtrTy).second) {
           QualType ParamTypes[2] = { NullPtrTy, NullPtrTy };
-          S.AddBuiltinCandidate(S.Context.BoolTy, ParamTypes, Args,
-                                CandidateSet);
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
         }
       }
     }
@@ -8017,7 +7942,7 @@ class BuiltinOperatorOverloadBuilder {
           continue;
 
         QualType ParamTypes[2] = { *Ptr, *Ptr };
-        S.AddBuiltinCandidate(S.Context.BoolTy, ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
       }
       for (BuiltinCandidateTypeSet::iterator
                 Enum = CandidateTypes[ArgIdx].enumeration_begin(),
@@ -8033,7 +7958,7 @@ class BuiltinOperatorOverloadBuilder {
           continue;
 
         QualType ParamTypes[2] = { *Enum, *Enum };
-        S.AddBuiltinCandidate(S.Context.BoolTy, ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
       }
     }
   }
@@ -8076,7 +8001,7 @@ class BuiltinOperatorOverloadBuilder {
         if (Arg == 0 || Op == OO_Plus) {
           // operator+(T*, ptrdiff_t) or operator-(T*, ptrdiff_t)
           // T* operator+(ptrdiff_t, T*);
-          S.AddBuiltinCandidate(*Ptr, AsymmetricParamTypes, Args, CandidateSet);
+          S.AddBuiltinCandidate(AsymmetricParamTypes, Args, CandidateSet);
         }
         if (Op == OO_Minus) {
           // ptrdiff_t operator-(T, T);
@@ -8084,8 +8009,7 @@ class BuiltinOperatorOverloadBuilder {
             continue;
 
           QualType ParamTypes[2] = { *Ptr, *Ptr };
-          S.AddBuiltinCandidate(S.Context.getPointerDiffType(), ParamTypes,
-                                Args, CandidateSet);
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
         }
       }
     }
@@ -8120,7 +8044,7 @@ class BuiltinOperatorOverloadBuilder {
   //   where LR is the result of the usual arithmetic conversions
   //   between types L and R.
   // Our candidates ignore the first parameter.
-  void addGenericBinaryArithmeticOverloads(bool isComparison) {
+  void addGenericBinaryArithmeticOverloads() {
     if (!HasArithmeticOrEnumeralCandidateType)
       return;
 
@@ -8130,10 +8054,7 @@ class BuiltinOperatorOverloadBuilder {
            Right < LastPromotedArithmeticType; ++Right) {
         QualType LandR[2] = { getArithmeticType(Left),
                               getArithmeticType(Right) };
-        QualType Result =
-          isComparison ? S.Context.BoolTy
-                       : getUsualArithmeticConversions(Left, Right);
-        S.AddBuiltinCandidate(Result, LandR, Args, CandidateSet);
+        S.AddBuiltinCandidate(LandR, Args, CandidateSet);
       }
     }
 
@@ -8148,15 +8069,7 @@ class BuiltinOperatorOverloadBuilder {
              Vec2End = CandidateTypes[1].vector_end();
            Vec2 != Vec2End; ++Vec2) {
         QualType LandR[2] = { *Vec1, *Vec2 };
-        QualType Result = S.Context.BoolTy;
-        if (!isComparison) {
-          if ((*Vec1)->isExtVectorType() || !(*Vec2)->isExtVectorType())
-            Result = *Vec1;
-          else
-            Result = *Vec2;
-        }
-
-        S.AddBuiltinCandidate(Result, LandR, Args, CandidateSet);
+        S.AddBuiltinCandidate(LandR, Args, CandidateSet);
       }
     }
   }
@@ -8185,10 +8098,7 @@ class BuiltinOperatorOverloadBuilder {
            Right < LastPromotedIntegralType; ++Right) {
         QualType LandR[2] = { getArithmeticType(Left),
                               getArithmeticType(Right) };
-        QualType Result = (Op == OO_LessLess || Op == OO_GreaterGreater)
-            ? LandR[0]
-            : getUsualArithmeticConversions(Left, Right);
-        S.AddBuiltinCandidate(Result, LandR, Args, CandidateSet);
+        S.AddBuiltinCandidate(LandR, Args, CandidateSet);
       }
     }
   }
@@ -8262,7 +8172,7 @@ class BuiltinOperatorOverloadBuilder {
         S.Context.getLValueReferenceType(*Ptr),
         isEqualOp ? *Ptr : S.Context.getPointerDiffType(),
       };
-      S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+      S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                             /*IsAssigmentOperator=*/ isEqualOp);
 
       bool NeedVolatile = !(*Ptr).isVolatileQualified() &&
@@ -8271,18 +8181,18 @@ class BuiltinOperatorOverloadBuilder {
         // volatile version
         ParamTypes[0] =
           S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr));
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                               /*IsAssigmentOperator=*/isEqualOp);
       }
-      
+
       if (!(*Ptr).isRestrictQualified() &&
           VisibleTypeConversionsQuals.hasRestrict()) {
         // restrict version
         ParamTypes[0]
           = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr));
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                               /*IsAssigmentOperator=*/isEqualOp);
-        
+
         if (NeedVolatile) {
           // volatile restrict version
           ParamTypes[0]
@@ -8290,7 +8200,7 @@ class BuiltinOperatorOverloadBuilder {
                 S.Context.getCVRQualifiedType(*Ptr,
                                               (Qualifiers::Volatile |
                                                Qualifiers::Restrict)));
-          S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                                 /*IsAssigmentOperator=*/isEqualOp);
         }
       }
@@ -8311,7 +8221,7 @@ class BuiltinOperatorOverloadBuilder {
         };
 
         // non-volatile version
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                               /*IsAssigmentOperator=*/true);
 
         bool NeedVolatile = !(*Ptr).isVolatileQualified() &&
@@ -8320,18 +8230,18 @@ class BuiltinOperatorOverloadBuilder {
           // volatile version
           ParamTypes[0] =
             S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr));
-          S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                                 /*IsAssigmentOperator=*/true);
         }
-      
+
         if (!(*Ptr).isRestrictQualified() &&
             VisibleTypeConversionsQuals.hasRestrict()) {
           // restrict version
           ParamTypes[0]
             = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr));
-          S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                                 /*IsAssigmentOperator=*/true);
-          
+
           if (NeedVolatile) {
             // volatile restrict version
             ParamTypes[0]
@@ -8339,7 +8249,7 @@ class BuiltinOperatorOverloadBuilder {
                   S.Context.getCVRQualifiedType(*Ptr,
                                                 (Qualifiers::Volatile |
                                                  Qualifiers::Restrict)));
-            S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+            S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                                   /*IsAssigmentOperator=*/true);
           }
         }
@@ -8372,7 +8282,7 @@ class BuiltinOperatorOverloadBuilder {
         // Add this built-in operator as a candidate (VQ is empty).
         ParamTypes[0] =
           S.Context.getLValueReferenceType(getArithmeticType(Left));
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                               /*IsAssigmentOperator=*/isEqualOp);
 
         // Add this built-in operator as a candidate (VQ is 'volatile').
@@ -8380,7 +8290,7 @@ class BuiltinOperatorOverloadBuilder {
           ParamTypes[0] =
             S.Context.getVolatileType(getArithmeticType(Left));
           ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]);
-          S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                                 /*IsAssigmentOperator=*/isEqualOp);
         }
       }
@@ -8399,14 +8309,14 @@ class BuiltinOperatorOverloadBuilder {
         ParamTypes[1] = *Vec2;
         // Add this built-in operator as a candidate (VQ is empty).
         ParamTypes[0] = S.Context.getLValueReferenceType(*Vec1);
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                               /*IsAssigmentOperator=*/isEqualOp);
 
         // Add this built-in operator as a candidate (VQ is 'volatile').
         if (VisibleTypeConversionsQuals.hasVolatile()) {
           ParamTypes[0] = S.Context.getVolatileType(*Vec1);
           ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]);
-          S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet,
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                                 /*IsAssigmentOperator=*/isEqualOp);
         }
       }
@@ -8438,13 +8348,13 @@ class BuiltinOperatorOverloadBuilder {
         // Add this built-in operator as a candidate (VQ is empty).
         ParamTypes[0] =
           S.Context.getLValueReferenceType(getArithmeticType(Left));
-        S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
         if (VisibleTypeConversionsQuals.hasVolatile()) {
           // Add this built-in operator as a candidate (VQ is 'volatile').
           ParamTypes[0] = getArithmeticType(Left);
           ParamTypes[0] = S.Context.getVolatileType(ParamTypes[0]);
           ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]);
-          S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, CandidateSet);
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
         }
       }
     }
@@ -8459,13 +8369,13 @@ class BuiltinOperatorOverloadBuilder {
   //        bool        operator||(bool, bool);
   void addExclaimOverload() {
     QualType ParamTy = S.Context.BoolTy;
-    S.AddBuiltinCandidate(ParamTy, &ParamTy, Args, CandidateSet,
+    S.AddBuiltinCandidate(&ParamTy, Args, CandidateSet,
                           /*IsAssignmentOperator=*/false,
                           /*NumContextualBoolArguments=*/1);
   }
   void addAmpAmpOrPipePipeOverload() {
     QualType ParamTypes[2] = { S.Context.BoolTy, S.Context.BoolTy };
-    S.AddBuiltinCandidate(S.Context.BoolTy, ParamTypes, Args, CandidateSet,
+    S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                           /*IsAssignmentOperator=*/false,
                           /*NumContextualBoolArguments=*/2);
   }
@@ -8490,10 +8400,8 @@ class BuiltinOperatorOverloadBuilder {
       if (!PointeeType->isObjectType())
         continue;
 
-      QualType ResultTy = S.Context.getLValueReferenceType(PointeeType);
-
       // T& operator[](T*, ptrdiff_t)
-      S.AddBuiltinCandidate(ResultTy, ParamTypes, Args, CandidateSet);
+      S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
     }
 
     for (BuiltinCandidateTypeSet::iterator
@@ -8505,10 +8413,8 @@ class BuiltinOperatorOverloadBuilder {
       if (!PointeeType->isObjectType())
         continue;
 
-      QualType ResultTy = S.Context.getLValueReferenceType(PointeeType);
-
       // T& operator[](ptrdiff_t, T*)
-      S.AddBuiltinCandidate(ResultTy, ParamTypes, Args, CandidateSet);
+      S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
     }
   }
 
@@ -8558,8 +8464,7 @@ class BuiltinOperatorOverloadBuilder {
             T.isRestrictQualified())
           continue;
         T = Q1.apply(S.Context, T);
-        QualType ResultTy = S.Context.getLValueReferenceType(T);
-        S.AddBuiltinCandidate(ResultTy, ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
       }
     }
   }
@@ -8587,7 +8492,7 @@ class BuiltinOperatorOverloadBuilder {
           continue;
 
         QualType ParamTypes[2] = { *Ptr, *Ptr };
-        S.AddBuiltinCandidate(*Ptr, ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
       }
 
       for (BuiltinCandidateTypeSet::iterator
@@ -8598,7 +8503,7 @@ class BuiltinOperatorOverloadBuilder {
           continue;
 
         QualType ParamTypes[2] = { *MemPtr, *MemPtr };
-        S.AddBuiltinCandidate(*MemPtr, ParamTypes, Args, CandidateSet);
+        S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
       }
 
       if (S.getLangOpts().CPlusPlus11) {
@@ -8613,7 +8518,7 @@ class BuiltinOperatorOverloadBuilder {
             continue;
 
           QualType ParamTypes[2] = { *Enum, *Enum };
-          S.AddBuiltinCandidate(*Enum, ParamTypes, Args, CandidateSet);
+          S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
         }
       }
     }
@@ -8707,7 +8612,7 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op,
       OpBuilder.addUnaryPlusOrMinusArithmeticOverloads();
     } else {
       OpBuilder.addBinaryPlusOrMinusPointerOverloads(Op);
-      OpBuilder.addGenericBinaryArithmeticOverloads(/*isComparison=*/false);
+      OpBuilder.addGenericBinaryArithmeticOverloads();
     }
     break;
 
@@ -8715,11 +8620,11 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op,
     if (Args.size() == 1)
       OpBuilder.addUnaryStarPointerOverloads();
     else
-      OpBuilder.addGenericBinaryArithmeticOverloads(/*isComparison=*/false);
+      OpBuilder.addGenericBinaryArithmeticOverloads();
     break;
 
   case OO_Slash:
-    OpBuilder.addGenericBinaryArithmeticOverloads(/*isComparison=*/false);
+    OpBuilder.addGenericBinaryArithmeticOverloads();
     break;
 
   case OO_PlusPlus:
@@ -8738,7 +8643,7 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op,
   case OO_LessEqual:
   case OO_GreaterEqual:
     OpBuilder.addRelationalPointerOrEnumeralOverloads();
-    OpBuilder.addGenericBinaryArithmeticOverloads(/*isComparison=*/true);
+    OpBuilder.addGenericBinaryArithmeticOverloads();
     break;
 
   case OO_Percent:
@@ -8805,7 +8710,7 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op,
 
   case OO_Conditional:
     OpBuilder.addConditionalOperatorOverloads();
-    OpBuilder.addGenericBinaryArithmeticOverloads(/*isComparison=*/false);
+    OpBuilder.addGenericBinaryArithmeticOverloads();
     break;
   }
 }
@@ -9424,13 +9329,13 @@ void Sema::NoteAllOverloadCandidates(Expr *OverloadedExpr, QualType DestType,
   OverloadExpr *OvlExpr = Ovl.Expression;
 
   for (UnresolvedSetIterator I = OvlExpr->decls_begin(),
-                            IEnd = OvlExpr->decls_end(); 
+                            IEnd = OvlExpr->decls_end();
        I != IEnd; ++I) {
-    if (FunctionTemplateDecl *FunTmpl = 
+    if (FunctionTemplateDecl *FunTmpl =
                 dyn_cast((*I)->getUnderlyingDecl()) ) {
       NoteOverloadCandidate(*I, FunTmpl->getTemplatedDecl(), DestType,
                             TakingAddress);
-    } else if (FunctionDecl *Fun 
+    } else if (FunctionDecl *Fun
                       = dyn_cast((*I)->getUnderlyingDecl()) ) {
       NoteOverloadCandidate(*I, Fun, DestType, TakingAddress);
     }
@@ -9608,7 +9513,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
       << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
       << FromTy << ToTy << (unsigned) isObjectArgument << I+1
       << (unsigned) (Cand->Fix.Kind);
-      
+
     MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
@@ -9711,7 +9616,7 @@ static bool CheckArityMismatch(Sema &S, OverloadCandidate *Cand,
   // right number of arguments, because only overloaded operators have
   // the weird behavior of overloading member and non-member functions.
   // Just don't report anything.
-  if (Fn->isInvalidDecl() && 
+  if (Fn->isInvalidDecl() &&
       Fn->getDeclName().getNameKind() == DeclarationName::CXXOperatorName)
     return true;
 
@@ -9735,9 +9640,9 @@ static void DiagnoseArityMismatch(Sema &S, NamedDecl *Found, Decl *D,
       "The templated declaration should at least be a function"
       " when diagnosing bad template argument deduction due to too many"
       " or too few arguments");
-  
+
   FunctionDecl *Fn = cast(D);
-  
+
   // TODO: treat calls to a missing default constructor as a special case
   const FunctionProtoType *FnTy = Fn->getType()->getAs();
   unsigned MinParams = Fn->getMinRequiredArguments();
@@ -9925,6 +9830,15 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated,
       return;
     }
 
+    // We found a specific requirement that disabled the enable_if.
+    if (PDiag && PDiag->second.getDiagID() ==
+        diag::err_typename_nested_not_found_requirement) {
+      S.Diag(Templated->getLocation(),
+             diag::note_ovl_candidate_disabled_by_requirement)
+        << PDiag->second.getStringArg(0) << TemplateArgString;
+      return;
+    }
+
     // Format the SFINAE diagnostic into the argument string.
     // FIXME: Add a general mechanism to include a PartialDiagnostic *'s
     //        formatted message in another diagnostic.
@@ -9993,8 +9907,8 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated,
       return;
 
     // FIXME: For generic lambda parameters, check if the function is a lambda
-    // call operator, and if so, emit a prettier and more informative 
-    // diagnostic that mentions 'auto' and lambda in addition to 
+    // call operator, and if so, emit a prettier and more informative
+    // diagnostic that mentions 'auto' and lambda in addition to
     // (or instead of?) the canonical template type parameters.
     S.Diag(Templated->getLocation(),
            diag::note_ovl_candidate_non_deduced_mismatch)
@@ -10237,13 +10151,13 @@ static void NoteBuiltinOperatorCandidate(Sema &S, StringRef Opc,
   std::string TypeStr("operator");
   TypeStr += Opc;
   TypeStr += "(";
-  TypeStr += Cand->BuiltinTypes.ParamTypes[0].getAsString();
+  TypeStr += Cand->BuiltinParamTypes[0].getAsString();
   if (Cand->Conversions.size() == 1) {
     TypeStr += ")";
     S.Diag(OpLoc, diag::note_ovl_builtin_unary_candidate) << TypeStr;
   } else {
     TypeStr += ", ";
-    TypeStr += Cand->BuiltinTypes.ParamTypes[1].getAsString();
+    TypeStr += Cand->BuiltinParamTypes[1].getAsString();
     TypeStr += ")";
     S.Diag(OpLoc, diag::note_ovl_builtin_binary_candidate) << TypeStr;
   }
@@ -10480,7 +10394,7 @@ static void CompleteNonViableCandidate(Sema &S, OverloadCandidate *Cand,
   } else {
     // Builtin operator.
     assert(ConvCount <= 3);
-    ParamTypes = Cand->BuiltinTypes.ParamTypes;
+    ParamTypes = Cand->BuiltinParamTypes;
   }
 
   // Fill in the rest of the conversions.
@@ -10692,16 +10606,16 @@ void TemplateSpecCandidateSet::NoteCandidates(Sema &S, SourceLocation Loc) {
 // R (S::*)(A) --> R (A)
 QualType Sema::ExtractUnqualifiedFunctionType(QualType PossiblyAFunctionType) {
   QualType Ret = PossiblyAFunctionType;
-  if (const PointerType *ToTypePtr = 
+  if (const PointerType *ToTypePtr =
     PossiblyAFunctionType->getAs())
     Ret = ToTypePtr->getPointeeType();
-  else if (const ReferenceType *ToTypeRef = 
+  else if (const ReferenceType *ToTypeRef =
     PossiblyAFunctionType->getAs())
     Ret = ToTypeRef->getPointeeType();
   else if (const MemberPointerType *MemTypePtr =
-    PossiblyAFunctionType->getAs()) 
-    Ret = MemTypePtr->getPointeeType();   
-  Ret = 
+    PossiblyAFunctionType->getAs())
+    Ret = MemTypePtr->getPointeeType();
+  Ret =
     Context.getCanonicalType(Ret).getUnqualifiedType();
   return Ret;
 }
@@ -10727,9 +10641,9 @@ namespace {
 class AddressOfFunctionResolver {
   Sema& S;
   Expr* SourceExpr;
-  const QualType& TargetType; 
-  QualType TargetFunctionType; // Extracted function type from target type 
-   
+  const QualType& TargetType;
+  QualType TargetFunctionType; // Extracted function type from target type
+
   bool Complain;
   //DeclAccessPair& ResultFunctionAccessPair;
   ASTContext& Context;
@@ -10739,7 +10653,7 @@ class AddressOfFunctionResolver {
   bool StaticMemberFunctionFromBoundPointer;
   bool HasComplained;
 
-  OverloadExpr::FindResult OvlExprInfo; 
+  OverloadExpr::FindResult OvlExprInfo;
   OverloadExpr *OvlExpr;
   TemplateArgumentListInfo OvlExplicitTemplateArgs;
   SmallVector, 4> Matches;
@@ -10786,7 +10700,7 @@ class AddressOfFunctionResolver {
       }
       return;
     }
-    
+
     if (OvlExpr->hasExplicitTemplateArgs())
       OvlExpr->copyTemplateArgumentsInto(OvlExplicitTemplateArgs);
 
@@ -10864,7 +10778,7 @@ class AddressOfFunctionResolver {
   }
 
   // return true if any matching specializations were found
-  bool AddMatchingTemplateFunction(FunctionTemplateDecl* FunctionTemplate, 
+  bool AddMatchingTemplateFunction(FunctionTemplateDecl* FunctionTemplate,
                                    const DeclAccessPair& CurAccessFunPair) {
     if (CXXMethodDecl *Method
               = dyn_cast(FunctionTemplate->getTemplatedDecl())) {
@@ -10872,7 +10786,7 @@ class AddressOfFunctionResolver {
       // static when converting to member pointer.
       if (Method->isStatic() == TargetTypeIsNonStaticMemberFunction)
         return false;
-    } 
+    }
     else if (TargetTypeIsNonStaticMemberFunction)
       return false;
 
@@ -10885,17 +10799,17 @@ class AddressOfFunctionResolver {
     FunctionDecl *Specialization = nullptr;
     TemplateDeductionInfo Info(FailedCandidates.getLocation());
     if (Sema::TemplateDeductionResult Result
-          = S.DeduceTemplateArguments(FunctionTemplate, 
+          = S.DeduceTemplateArguments(FunctionTemplate,
                                       &OvlExplicitTemplateArgs,
-                                      TargetFunctionType, Specialization, 
+                                      TargetFunctionType, Specialization,
                                       Info, /*IsAddressOfFunction*/true)) {
       // Make a note of the failed deduction for diagnostics.
       FailedCandidates.addCandidate()
           .set(CurAccessFunPair, FunctionTemplate->getTemplatedDecl(),
                MakeDeductionFailureInfo(Context, Result, Info));
       return false;
-    } 
-    
+    }
+
     // Template argument deduction ensures that we have an exact match or
     // compatible pointer-to-function arguments that would be adjusted by ICS.
     // This function template specicalization works.
@@ -10909,15 +10823,15 @@ class AddressOfFunctionResolver {
     Matches.push_back(std::make_pair(CurAccessFunPair, Specialization));
     return true;
   }
-  
-  bool AddMatchingNonTemplateFunction(NamedDecl* Fn, 
+
+  bool AddMatchingNonTemplateFunction(NamedDecl* Fn,
                                       const DeclAccessPair& CurAccessFunPair) {
     if (CXXMethodDecl *Method = dyn_cast(Fn)) {
       // Skip non-static functions when converting to pointer, and static
       // when converting to member pointer.
       if (Method->isStatic() == TargetTypeIsNonStaticMemberFunction)
         return false;
-    } 
+    }
     else if (TargetTypeIsNonStaticMemberFunction)
       return false;
 
@@ -10947,20 +10861,20 @@ class AddressOfFunctionResolver {
         return true;
       }
     }
-    
+
     return false;
   }
-  
+
   bool FindAllFunctionsThatMatchTargetTypeExactly() {
     bool Ret = false;
-    
+
     // If the overload expression doesn't have the form of a pointer to
     // member, don't try to convert it to a pointer-to-member type.
     if (IsInvalidFormOfPointerToMemberFunction())
       return false;
 
     for (UnresolvedSetIterator I = OvlExpr->decls_begin(),
-                               E = OvlExpr->decls_end(); 
+                               E = OvlExpr->decls_end();
          I != E; ++I) {
       // Look through any using declarations to find the underlying function.
       NamedDecl *Fn = (*I)->getUnderlyingDecl();
@@ -11103,12 +11017,12 @@ class AddressOfFunctionResolver {
   bool hadMultipleCandidates() const { return (OvlExpr->getNumDecls() > 1); }
 
   int getNumMatches() const { return Matches.size(); }
-  
+
   FunctionDecl* getMatchingFunctionDecl() const {
     if (Matches.size() != 1) return nullptr;
     return Matches[0].second;
   }
-  
+
   const DeclAccessPair* getMatchingFunctionAccessPair() const {
     if (Matches.size() != 1) return nullptr;
     return &Matches[0].first;
@@ -11248,7 +11162,7 @@ bool Sema::resolveAndFixAddressOfOnlyViableOverloadCandidate(
 /// If no template-ids are found, no diagnostics are emitted and NULL is
 /// returned.
 FunctionDecl *
-Sema::ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl, 
+Sema::ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl,
                                                   bool Complain,
                                                   DeclAccessPair *FoundResult) {
   // C++ [over.over]p1:
@@ -11311,9 +11225,9 @@ Sema::ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl,
       }
       return nullptr;
     }
-    
+
     Matched = Specialization;
-    if (FoundResult) *FoundResult = I.getPair();    
+    if (FoundResult) *FoundResult = I.getPair();
   }
 
   if (Matched &&
@@ -11336,8 +11250,8 @@ Sema::ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl,
 // returns true if 'complain' is set.
 bool Sema::ResolveAndFixSingleFunctionTemplateSpecialization(
                       ExprResult &SrcExpr, bool doFunctionPointerConverion,
-                      bool complain, SourceRange OpRangeForComplaining, 
-                                           QualType DestTypeForComplaining, 
+                      bool complain, SourceRange OpRangeForComplaining,
+                                           QualType DestTypeForComplaining,
                                             unsigned DiagIDForComplaining) {
   assert(SrcExpr.get()->getType() == Context.OverloadTy);
 
@@ -11394,7 +11308,7 @@ bool Sema::ResolveAndFixSingleFunctionTemplateSpecialization(
       Diag(OpRangeForComplaining.getBegin(), DiagIDForComplaining)
         << ovl.Expression->getName()
         << DestTypeForComplaining
-        << OpRangeForComplaining 
+        << OpRangeForComplaining
         << ovl.Expression->getQualifierLoc().getSourceRange();
       NoteAllOverloadCandidates(SrcExpr.get());
 
@@ -12032,6 +11946,7 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
     FunctionDecl *FnDecl = Best->Function;
 
     if (FnDecl) {
+      Expr *Base = nullptr;
       // We matched an overloaded operator. Build a call to that
       // operator.
 
@@ -12044,7 +11959,7 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
                                               Best->FoundDecl, Method);
         if (InputRes.isInvalid())
           return ExprError();
-        Input = InputRes.get();
+        Base = Input = InputRes.get();
       } else {
         // Convert the arguments.
         ExprResult InputInit
@@ -12060,7 +11975,8 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
 
       // Build the actual expression node.
       ExprResult FnExpr = CreateFunctionRefExpr(*this, FnDecl, Best->FoundDecl,
-                                                HadMultipleCandidates, OpLoc);
+                                                Base, HadMultipleCandidates,
+                                                OpLoc);
       if (FnExpr.isInvalid())
         return ExprError();
 
@@ -12086,9 +12002,8 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
       // We matched a built-in operator. Convert the arguments, then
       // break out so that we will build the appropriate built-in
       // operator node.
-      ExprResult InputRes =
-        PerformImplicitConversion(Input, Best->BuiltinTypes.ParamTypes[0],
-                                  Best->Conversions[0], AA_Passing);
+      ExprResult InputRes = PerformImplicitConversion(
+          Input, Best->BuiltinParamTypes[0], Best->Conversions[0], AA_Passing);
       if (InputRes.isInvalid())
         return ExprError();
       Input = InputRes.get();
@@ -12183,8 +12098,8 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
     // TODO: provide better source location info in DNLoc component.
     DeclarationNameInfo OpNameInfo(OpName, OpLoc);
     UnresolvedLookupExpr *Fn
-      = UnresolvedLookupExpr::Create(Context, NamingClass, 
-                                     NestedNameSpecifierLoc(), OpNameInfo, 
+      = UnresolvedLookupExpr::Create(Context, NamingClass,
+                                     NestedNameSpecifierLoc(), OpNameInfo,
                                      /*ADL*/ true, IsOverloaded(Fns),
                                      Fns.begin(), Fns.end());
     return new (Context)
@@ -12246,6 +12161,7 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
       FunctionDecl *FnDecl = Best->Function;
 
       if (FnDecl) {
+        Expr *Base = nullptr;
         // We matched an overloaded operator. Build a call to that
         // operator.
 
@@ -12267,7 +12183,7 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
                                                 Best->FoundDecl, Method);
           if (Arg0.isInvalid())
             return ExprError();
-          Args[0] = Arg0.getAs();
+          Base = Args[0] = Arg0.getAs();
           Args[1] = RHS = Arg1.getAs();
         } else {
           // Convert the arguments.
@@ -12291,7 +12207,7 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
 
         // Build the actual expression node.
         ExprResult FnExpr = CreateFunctionRefExpr(*this, FnDecl,
-                                                  Best->FoundDecl,
+                                                  Best->FoundDecl, Base,
                                                   HadMultipleCandidates, OpLoc);
         if (FnExpr.isInvalid())
           return ExprError();
@@ -12332,15 +12248,15 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         // break out so that we will build the appropriate built-in
         // operator node.
         ExprResult ArgsRes0 =
-          PerformImplicitConversion(Args[0], Best->BuiltinTypes.ParamTypes[0],
-                                    Best->Conversions[0], AA_Passing);
+            PerformImplicitConversion(Args[0], Best->BuiltinParamTypes[0],
+                                      Best->Conversions[0], AA_Passing);
         if (ArgsRes0.isInvalid())
           return ExprError();
         Args[0] = ArgsRes0.get();
 
         ExprResult ArgsRes1 =
-          PerformImplicitConversion(Args[1], Best->BuiltinTypes.ParamTypes[1],
-                                    Best->Conversions[1], AA_Passing);
+            PerformImplicitConversion(Args[1], Best->BuiltinParamTypes[1],
+                                      Best->Conversions[1], AA_Passing);
         if (ArgsRes1.isInvalid())
           return ExprError();
         Args[1] = ArgsRes1.get();
@@ -12513,6 +12429,7 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
         OpLocInfo.setCXXOperatorNameRange(SourceRange(LLoc, RLoc));
         ExprResult FnExpr = CreateFunctionRefExpr(*this, FnDecl,
                                                   Best->FoundDecl,
+                                                  Base,
                                                   HadMultipleCandidates,
                                                   OpLocInfo.getLoc(),
                                                   OpLocInfo.getInfo());
@@ -12543,15 +12460,15 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
         // break out so that we will build the appropriate built-in
         // operator node.
         ExprResult ArgsRes0 =
-          PerformImplicitConversion(Args[0], Best->BuiltinTypes.ParamTypes[0],
-                                    Best->Conversions[0], AA_Passing);
+            PerformImplicitConversion(Args[0], Best->BuiltinParamTypes[0],
+                                      Best->Conversions[0], AA_Passing);
         if (ArgsRes0.isInvalid())
           return ExprError();
         Args[0] = ArgsRes0.get();
 
         ExprResult ArgsRes1 =
-          PerformImplicitConversion(Args[1], Best->BuiltinTypes.ParamTypes[1],
-                                    Best->Conversions[1], AA_Passing);
+            PerformImplicitConversion(Args[1], Best->BuiltinParamTypes[1],
+                                      Best->Conversions[1], AA_Passing);
         if (ArgsRes1.isInvalid())
           return ExprError();
         Args[1] = ArgsRes1.get();
@@ -12747,12 +12664,12 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
       if (DiagnoseUseOfDecl(Best->FoundDecl, UnresExpr->getNameLoc()))
         return ExprError();
       // If FoundDecl is different from Method (such as if one is a template
-      // and the other a specialization), make sure DiagnoseUseOfDecl is 
+      // and the other a specialization), make sure DiagnoseUseOfDecl is
       // called on both.
       // FIXME: This would be more comprehensively addressed by modifying
       // DiagnoseUseOfDecl to accept both the FoundDecl and the decl
       // being used.
-      if (Method != FoundDecl.getDecl() && 
+      if (Method != FoundDecl.getDecl() &&
                       DiagnoseUseOfDecl(Method, UnresExpr->getNameLoc()))
         return ExprError();
       break;
@@ -12775,7 +12692,7 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
     case OR_Deleted:
       Diag(UnresExpr->getMemberLoc(), diag::err_ovl_deleted_member_call)
         << Best->Function->isDeleted()
-        << DeclName 
+        << DeclName
         << getDeletedOrUnavailableSuffix(Best->Function)
         << MemExprE->getSourceRange();
       CandidateSet.NoteCandidates(*this, OCD_AllCandidates, Args);
@@ -12848,8 +12765,8 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
     }
   }
 
-  if ((isa(CurContext) || 
-       isa(CurContext)) && 
+  if ((isa(CurContext) ||
+       isa(CurContext)) &&
       TheCall->getMethodDecl()->isPure()) {
     const CXXMethodDecl *MD = TheCall->getMethodDecl();
 
@@ -12929,7 +12846,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
   }
 
   // C++ [over.call.object]p2:
-  //   In addition, for each (non-explicit in C++0x) conversion function 
+  //   In addition, for each (non-explicit in C++0x) conversion function
   //   declared in T of the form
   //
   //        operator conversion-type-id () cv-qualifier;
@@ -13008,7 +12925,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
     Diag(Object.get()->getLocStart(),
          diag::err_ovl_deleted_object_call)
       << Best->Function->isDeleted()
-      << Object.get()->getType() 
+      << Object.get()->getType()
       << getDeletedOrUnavailableSuffix(Best->Function)
       << Object.get()->getSourceRange();
     CandidateSet.NoteCandidates(*this, OCD_AllCandidates, Args);
@@ -13031,7 +12948,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
                               Best->FoundDecl);
     if (DiagnoseUseOfDecl(Best->FoundDecl, LParenLoc))
       return ExprError();
-    assert(Conv == Best->FoundDecl.getDecl() && 
+    assert(Conv == Best->FoundDecl.getDecl() &&
              "Found Decl & conversion-to-functionptr should be same, right?!");
     // We selected one of the surrogate functions that converts the
     // object parameter to a function pointer. Perform the conversion
@@ -13071,7 +12988,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
                Context.DeclarationNames.getCXXOperatorName(OO_Call), LParenLoc);
   OpLocInfo.setCXXOperatorNameRange(SourceRange(LParenLoc, RParenLoc));
   ExprResult NewFn = CreateFunctionRefExpr(*this, Method, Best->FoundDecl,
-                                           HadMultipleCandidates,
+                                           Obj, HadMultipleCandidates,
                                            OpLocInfo.getLoc(),
                                            OpLocInfo.getInfo());
   if (NewFn.isInvalid())
@@ -13242,7 +13159,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc,
   case OR_Deleted:
     Diag(OpLoc,  diag::err_ovl_deleted_oper)
       << Best->Function->isDeleted()
-      << "->" 
+      << "->"
       << getDeletedOrUnavailableSuffix(Best->Function)
       << Base->getSourceRange();
     CandidateSet.NoteCandidates(*this, OCD_AllCandidates, Base);
@@ -13262,7 +13179,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc,
 
   // Build the operator call.
   ExprResult FnExpr = CreateFunctionRefExpr(*this, Method, Best->FoundDecl,
-                                            HadMultipleCandidates, OpLoc);
+                                            Base, HadMultipleCandidates, OpLoc);
   if (FnExpr.isInvalid())
     return ExprError();
 
@@ -13321,7 +13238,7 @@ ExprResult Sema::BuildLiteralOperatorCall(LookupResult &R,
 
   FunctionDecl *FD = Best->Function;
   ExprResult Fn = CreateFunctionRefExpr(*this, FD, Best->FoundDecl,
-                                        HadMultipleCandidates,
+                                        nullptr, HadMultipleCandidates,
                                         SuffixInfo.getLoc(),
                                         SuffixInfo.getInfo());
   if (Fn.isInvalid())
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaPseudoObject.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaPseudoObject.cpp
index b6b429d1f25c4..d159172a69908 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaPseudoObject.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaPseudoObject.cpp
@@ -1176,8 +1176,6 @@ bool ObjCSubscriptOpBuilder::findAtIndexGetter() {
   
   AtIndexGetter = S.LookupMethodInObjectType(AtIndexGetterSelector, ResultType, 
                                              true /*instance*/);
-  bool receiverIdType = (BaseT->isObjCIdType() ||
-                         BaseT->isObjCQualifiedIdType());
   
   if (!AtIndexGetter && S.getLangOpts().DebuggerObjCLiteral) {
     AtIndexGetter = ObjCMethodDecl::Create(S.Context, SourceLocation(), 
@@ -1203,7 +1201,7 @@ bool ObjCSubscriptOpBuilder::findAtIndexGetter() {
   }
 
   if (!AtIndexGetter) {
-    if (!receiverIdType) {
+    if (!BaseT->isObjCIdType()) {
       S.Diag(BaseExpr->getExprLoc(), diag::err_objc_subscript_method_not_found)
       << BaseExpr->getType() << 0 << arrayRef;
       return false;
@@ -1284,9 +1282,6 @@ bool ObjCSubscriptOpBuilder::findAtIndexSetter() {
   }
   AtIndexSetter = S.LookupMethodInObjectType(AtIndexSetterSelector, ResultType, 
                                              true /*instance*/);
-  
-  bool receiverIdType = (BaseT->isObjCIdType() ||
-                         BaseT->isObjCQualifiedIdType());
 
   if (!AtIndexSetter && S.getLangOpts().DebuggerObjCLiteral) {
     TypeSourceInfo *ReturnTInfo = nullptr;
@@ -1321,7 +1316,7 @@ bool ObjCSubscriptOpBuilder::findAtIndexSetter() {
   }
   
   if (!AtIndexSetter) {
-    if (!receiverIdType) {
+    if (!BaseT->isObjCIdType()) {
       S.Diag(BaseExpr->getExprLoc(), 
              diag::err_objc_subscript_method_not_found)
       << BaseExpr->getType() << 1 << arrayRef;
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmt.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmt.cpp
index 5d7eada287173..2a38a1f8e1d87 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmt.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmt.cpp
@@ -1288,17 +1288,22 @@ Sema::ActOnDoStmt(SourceLocation DoLoc, Stmt *Body,
 }
 
 namespace {
+  // Use SetVector since the diagnostic cares about the ordering of the Decl's.
+  using DeclSetVector =
+      llvm::SetVector,
+                      llvm::SmallPtrSet>;
+
   // This visitor will traverse a conditional statement and store all
   // the evaluated decls into a vector.  Simple is set to true if none
   // of the excluded constructs are used.
   class DeclExtractor : public EvaluatedExprVisitor {
-    llvm::SmallPtrSetImpl &Decls;
+    DeclSetVector &Decls;
     SmallVectorImpl &Ranges;
     bool Simple;
   public:
     typedef EvaluatedExprVisitor Inherited;
 
-    DeclExtractor(Sema &S, llvm::SmallPtrSetImpl &Decls,
+    DeclExtractor(Sema &S, DeclSetVector &Decls,
                   SmallVectorImpl &Ranges) :
         Inherited(S.Context),
         Decls(Decls),
@@ -1370,14 +1375,13 @@ namespace {
   // DeclMatcher checks to see if the decls are used in a non-evaluated
   // context.
   class DeclMatcher : public EvaluatedExprVisitor {
-    llvm::SmallPtrSetImpl &Decls;
+    DeclSetVector &Decls;
     bool FoundDecl;
 
   public:
     typedef EvaluatedExprVisitor Inherited;
 
-    DeclMatcher(Sema &S, llvm::SmallPtrSetImpl &Decls,
-                Stmt *Statement) :
+    DeclMatcher(Sema &S, DeclSetVector &Decls, Stmt *Statement) :
         Inherited(S.Context), Decls(Decls), FoundDecl(false) {
       if (!Statement) return;
 
@@ -1459,7 +1463,7 @@ namespace {
       return;
 
     PartialDiagnostic PDiag = S.PDiag(diag::warn_variables_not_in_loop_body);
-    llvm::SmallPtrSet Decls;
+    DeclSetVector Decls;
     SmallVector Ranges;
     DeclExtractor DE(S, Decls, Ranges);
     DE.Visit(Second);
@@ -1471,11 +1475,9 @@ namespace {
     if (Decls.size() == 0) return;
 
     // Don't warn on volatile, static, or global variables.
-    for (llvm::SmallPtrSetImpl::iterator I = Decls.begin(),
-                                                   E = Decls.end();
-         I != E; ++I)
-      if ((*I)->getType().isVolatileQualified() ||
-          (*I)->hasGlobalStorage()) return;
+    for (auto *VD : Decls)
+      if (VD->getType().isVolatileQualified() || VD->hasGlobalStorage())
+        return;
 
     if (DeclMatcher(S, Decls, Second).FoundDeclInUse() ||
         DeclMatcher(S, Decls, Third).FoundDeclInUse() ||
@@ -1483,25 +1485,16 @@ namespace {
       return;
 
     // Load decl names into diagnostic.
-    if (Decls.size() > 4)
+    if (Decls.size() > 4) {
       PDiag << 0;
-    else {
-      PDiag << Decls.size();
-      for (llvm::SmallPtrSetImpl::iterator I = Decls.begin(),
-                                                     E = Decls.end();
-           I != E; ++I)
-        PDiag << (*I)->getDeclName();
-    }
-
-    // Load SourceRanges into diagnostic if there is room.
-    // Otherwise, load the SourceRange of the conditional expression.
-    if (Ranges.size() <= PartialDiagnostic::MaxArguments)
-      for (SmallVectorImpl::iterator I = Ranges.begin(),
-                                                  E = Ranges.end();
-           I != E; ++I)
-        PDiag << *I;
-    else
-      PDiag << Second->getSourceRange();
+    } else {
+      PDiag << (unsigned)Decls.size();
+      for (auto *VD : Decls)
+        PDiag << VD->getDeclName();
+    }
+
+    for (auto Range : Ranges)
+      PDiag << Range;
 
     S.Diag(Ranges.begin()->getBegin(), PDiag);
   }
@@ -1551,23 +1544,78 @@ namespace {
 
   // A visitor to determine if a continue or break statement is a
   // subexpression.
-  class BreakContinueFinder : public EvaluatedExprVisitor {
+  class BreakContinueFinder : public ConstEvaluatedExprVisitor {
     SourceLocation BreakLoc;
     SourceLocation ContinueLoc;
+    bool InSwitch = false;
+
   public:
-    BreakContinueFinder(Sema &S, Stmt* Body) :
+    BreakContinueFinder(Sema &S, const Stmt* Body) :
         Inherited(S.Context) {
       Visit(Body);
     }
 
-    typedef EvaluatedExprVisitor Inherited;
+    typedef ConstEvaluatedExprVisitor Inherited;
 
-    void VisitContinueStmt(ContinueStmt* E) {
+    void VisitContinueStmt(const ContinueStmt* E) {
       ContinueLoc = E->getContinueLoc();
     }
 
-    void VisitBreakStmt(BreakStmt* E) {
-      BreakLoc = E->getBreakLoc();
+    void VisitBreakStmt(const BreakStmt* E) {
+      if (!InSwitch)
+        BreakLoc = E->getBreakLoc();
+    }
+
+    void VisitSwitchStmt(const SwitchStmt* S) {
+      if (const Stmt *Init = S->getInit())
+        Visit(Init);
+      if (const Stmt *CondVar = S->getConditionVariableDeclStmt())
+        Visit(CondVar);
+      if (const Stmt *Cond = S->getCond())
+        Visit(Cond);
+
+      // Don't return break statements from the body of a switch.
+      InSwitch = true;
+      if (const Stmt *Body = S->getBody())
+        Visit(Body);
+      InSwitch = false;
+    }
+
+    void VisitForStmt(const ForStmt *S) {
+      // Only visit the init statement of a for loop; the body
+      // has a different break/continue scope.
+      if (const Stmt *Init = S->getInit())
+        Visit(Init);
+    }
+
+    void VisitWhileStmt(const WhileStmt *) {
+      // Do nothing; the children of a while loop have a different
+      // break/continue scope.
+    }
+
+    void VisitDoStmt(const DoStmt *) {
+      // Do nothing; the children of a while loop have a different
+      // break/continue scope.
+    }
+
+    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
+      // Only visit the initialization of a for loop; the body
+      // has a different break/continue scope.
+      if (const Stmt *Range = S->getRangeStmt())
+        Visit(Range);
+      if (const Stmt *Begin = S->getBeginStmt())
+        Visit(Begin);
+      if (const Stmt *End = S->getEndStmt())
+        Visit(End);
+    }
+
+    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
+      // Only visit the initialization of a for loop; the body
+      // has a different break/continue scope.
+      if (const Stmt *Element = S->getElement())
+        Visit(Element);
+      if (const Stmt *Collection = S->getCollection())
+        Visit(Collection);
     }
 
     bool ContinueFound() { return ContinueLoc.isValid(); }
@@ -1996,11 +2044,11 @@ StmtResult Sema::ActOnCXXForRangeStmt(Scope *S, SourceLocation ForLoc,
     return StmtError();
   }
 
-  // Coroutines: 'for co_await' implicitly co_awaits its range.
-  if (CoawaitLoc.isValid()) {
-    ExprResult Coawait = ActOnCoawaitExpr(S, CoawaitLoc, Range);
-    if (Coawait.isInvalid()) return StmtError();
-    Range = Coawait.get();
+  // Build the coroutine state immediately and not later during template
+  // instantiation
+  if (!CoawaitLoc.isInvalid()) {
+    if (!ActOnCoroutineBodyStart(S, CoawaitLoc, "co_await"))
+      return StmtError();
   }
 
   // Build  auto && __range = range-init
@@ -2038,16 +2086,12 @@ StmtResult Sema::ActOnCXXForRangeStmt(Scope *S, SourceLocation ForLoc,
 /// BeginExpr and EndExpr are set and FRS_Success is returned on success;
 /// CandidateSet and BEF are set and some non-success value is returned on
 /// failure.
-static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef,
-                                            Expr *BeginRange, Expr *EndRange,
-                                            QualType RangeType,
-                                            VarDecl *BeginVar,
-                                            VarDecl *EndVar,
-                                            SourceLocation ColonLoc,
-                                            OverloadCandidateSet *CandidateSet,
-                                            ExprResult *BeginExpr,
-                                            ExprResult *EndExpr,
-                                            BeginEndFunction *BEF) {
+static Sema::ForRangeStatus
+BuildNonArrayForRange(Sema &SemaRef, Expr *BeginRange, Expr *EndRange,
+                      QualType RangeType, VarDecl *BeginVar, VarDecl *EndVar,
+                      SourceLocation ColonLoc, SourceLocation CoawaitLoc,
+                      OverloadCandidateSet *CandidateSet, ExprResult *BeginExpr,
+                      ExprResult *EndExpr, BeginEndFunction *BEF) {
   DeclarationNameInfo BeginNameInfo(
       &SemaRef.PP.getIdentifierTable().get("begin"), ColonLoc);
   DeclarationNameInfo EndNameInfo(&SemaRef.PP.getIdentifierTable().get("end"),
@@ -2094,6 +2138,15 @@ static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef,
           << ColonLoc << BEF_begin << BeginRange->getType();
     return RangeStatus;
   }
+  if (!CoawaitLoc.isInvalid()) {
+    // FIXME: getCurScope() should not be used during template instantiation.
+    // We should pick up the set of unqualified lookup results for operator
+    // co_await during the initial parse.
+    *BeginExpr = SemaRef.ActOnCoawaitExpr(SemaRef.getCurScope(), ColonLoc,
+                                          BeginExpr->get());
+    if (BeginExpr->isInvalid())
+      return Sema::FRS_DiagnosticIssued;
+  }
   if (FinishForRangeVarDecl(SemaRef, BeginVar, BeginExpr->get(), ColonLoc,
                             diag::err_for_range_iter_deduction_failure)) {
     NoteForRangeBeginEndFunction(SemaRef, BeginExpr->get(), *BEF);
@@ -2213,8 +2266,12 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
 
     // Deduce any 'auto's in the loop variable as 'DependentTy'. We'll fill
     // them in properly when we instantiate the loop.
-    if (!LoopVar->isInvalidDecl() && Kind != BFRK_Check)
+    if (!LoopVar->isInvalidDecl() && Kind != BFRK_Check) {
+      if (auto *DD = dyn_cast(LoopVar))
+        for (auto *Binding : DD->bindings())
+          Binding->setType(Context.DependentTy);
       LoopVar->setType(SubstAutoType(LoopVar->getType(), Context.DependentTy));
+    }
   } else if (!BeginDeclStmt.get()) {
     SourceLocation RangeLoc = RangeVar->getLocation();
 
@@ -2256,6 +2313,11 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
 
       // begin-expr is __range.
       BeginExpr = BeginRangeRef;
+      if (!CoawaitLoc.isInvalid()) {
+        BeginExpr = ActOnCoawaitExpr(S, ColonLoc, BeginExpr.get());
+        if (BeginExpr.isInvalid())
+          return StmtError();
+      }
       if (FinishForRangeVarDecl(*this, BeginVar, BeginRangeRef.get(), ColonLoc,
                                 diag::err_for_range_iter_deduction_failure)) {
         NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin);
@@ -2268,9 +2330,57 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
         BoundExpr = IntegerLiteral::Create(
             Context, CAT->getSize(), Context.getPointerDiffType(), RangeLoc);
       else if (const VariableArrayType *VAT =
-               dyn_cast(UnqAT))
-        BoundExpr = VAT->getSizeExpr();
-      else {
+               dyn_cast(UnqAT)) {
+        // For a variably modified type we can't just use the expression within
+        // the array bounds, since we don't want that to be re-evaluated here.
+        // Rather, we need to determine what it was when the array was first
+        // created - so we resort to using sizeof(vla)/sizeof(element).
+        // For e.g.
+        //  void f(int b) { 
+        //    int vla[b];
+        //    b = -1;   <-- This should not affect the num of iterations below
+        //    for (int &c : vla) { .. }
+        //  }
+
+        // FIXME: This results in codegen generating IR that recalculates the
+        // run-time number of elements (as opposed to just using the IR Value
+        // that corresponds to the run-time value of each bound that was
+        // generated when the array was created.) If this proves too embarassing
+        // even for unoptimized IR, consider passing a magic-value/cookie to
+        // codegen that then knows to simply use that initial llvm::Value (that
+        // corresponds to the bound at time of array creation) within
+        // getelementptr.  But be prepared to pay the price of increasing a
+        // customized form of coupling between the two components - which  could
+        // be hard to maintain as the codebase evolves.
+
+        ExprResult SizeOfVLAExprR = ActOnUnaryExprOrTypeTraitExpr(
+            EndVar->getLocation(), UETT_SizeOf,
+            /*isType=*/true,
+            CreateParsedType(VAT->desugar(), Context.getTrivialTypeSourceInfo(
+                                                 VAT->desugar(), RangeLoc))
+                .getAsOpaquePtr(),
+            EndVar->getSourceRange());
+        if (SizeOfVLAExprR.isInvalid())
+          return StmtError();
+        
+        ExprResult SizeOfEachElementExprR = ActOnUnaryExprOrTypeTraitExpr(
+            EndVar->getLocation(), UETT_SizeOf,
+            /*isType=*/true,
+            CreateParsedType(VAT->desugar(),
+                             Context.getTrivialTypeSourceInfo(
+                                 VAT->getElementType(), RangeLoc))
+                .getAsOpaquePtr(),
+            EndVar->getSourceRange());
+        if (SizeOfEachElementExprR.isInvalid())
+          return StmtError();
+
+        BoundExpr =
+            ActOnBinOp(S, EndVar->getLocation(), tok::slash,
+                       SizeOfVLAExprR.get(), SizeOfEachElementExprR.get());
+        if (BoundExpr.isInvalid())
+          return StmtError();
+        
+      } else {
         // Can't be a DependentSizedArrayType or an IncompleteArrayType since
         // UnqAT is not incomplete and Range is not type-dependent.
         llvm_unreachable("Unexpected array type in for-range");
@@ -2290,11 +2400,10 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
       OverloadCandidateSet CandidateSet(RangeLoc,
                                         OverloadCandidateSet::CSK_Normal);
       BeginEndFunction BEFFailure;
-      ForRangeStatus RangeStatus =
-          BuildNonArrayForRange(*this, BeginRangeRef.get(),
-                                EndRangeRef.get(), RangeType,
-                                BeginVar, EndVar, ColonLoc, &CandidateSet,
-                                &BeginExpr, &EndExpr, &BEFFailure);
+      ForRangeStatus RangeStatus = BuildNonArrayForRange(
+          *this, BeginRangeRef.get(), EndRangeRef.get(), RangeType, BeginVar,
+          EndVar, ColonLoc, CoawaitLoc, &CandidateSet, &BeginExpr, &EndExpr,
+          &BEFFailure);
 
       if (Kind == BFRK_Build && RangeStatus == FRS_NoViableFunction &&
           BEFFailure == BEF_begin) {
@@ -2391,6 +2500,9 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
 
     IncrExpr = ActOnUnaryOp(S, ColonLoc, tok::plusplus, BeginRef.get());
     if (!IncrExpr.isInvalid() && CoawaitLoc.isValid())
+      // FIXME: getCurScope() should not be used during template instantiation.
+      // We should pick up the set of unqualified lookup results for operator
+      // co_await during the initial parse.
       IncrExpr = ActOnCoawaitExpr(S, CoawaitLoc, IncrExpr.get());
     if (!IncrExpr.isInvalid())
       IncrExpr = ActOnFinishFullExpr(IncrExpr.get());
@@ -3915,8 +4027,9 @@ void Sema::ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope,
   DeclContext *DC = CapturedDecl::castToDeclContext(CD);
   IdentifierInfo *ParamName = &Context.Idents.get("__context");
   QualType ParamType = Context.getPointerType(Context.getTagDeclType(RD));
-  ImplicitParamDecl *Param
-    = ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType);
+  auto *Param =
+      ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType,
+                                ImplicitParamDecl::CapturedContext);
   DC->addDecl(Param);
 
   CD->setContextParam(0, Param);
@@ -3951,15 +4064,17 @@ void Sema::ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope,
              "null type has been found already for '__context' parameter");
       IdentifierInfo *ParamName = &Context.Idents.get("__context");
       QualType ParamType = Context.getPointerType(Context.getTagDeclType(RD));
-      ImplicitParamDecl *Param
-        = ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType);
+      auto *Param =
+          ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType,
+                                    ImplicitParamDecl::CapturedContext);
       DC->addDecl(Param);
       CD->setContextParam(ParamNum, Param);
       ContextIsFound = true;
     } else {
       IdentifierInfo *ParamName = &Context.Idents.get(I->first);
-      ImplicitParamDecl *Param
-        = ImplicitParamDecl::Create(Context, DC, Loc, ParamName, I->second);
+      auto *Param =
+          ImplicitParamDecl::Create(Context, DC, Loc, ParamName, I->second,
+                                    ImplicitParamDecl::CapturedContext);
       DC->addDecl(Param);
       CD->setParam(ParamNum, Param);
     }
@@ -3969,8 +4084,9 @@ void Sema::ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope,
     // Add __context implicitly if it is not specified.
     IdentifierInfo *ParamName = &Context.Idents.get("__context");
     QualType ParamType = Context.getPointerType(Context.getTagDeclType(RD));
-    ImplicitParamDecl *Param =
-        ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType);
+    auto *Param =
+        ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType,
+                                  ImplicitParamDecl::CapturedContext);
     DC->addDecl(Param);
     CD->setContextParam(ParamNum, Param);
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmtAsm.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmtAsm.cpp
index 5f91cac14a383..c182b35bfad4c 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaStmtAsm.cpp
@@ -277,6 +277,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       if (RequireCompleteType(OutputExpr->getLocStart(), Exprs[i]->getType(),
                               diag::err_dereference_incomplete_type))
         return StmtError();
+      LLVM_FALLTHROUGH;
     default:
       return StmtError(Diag(OutputExpr->getLocStart(),
                             diag::err_asm_invalid_lvalue_in_output)
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplate.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplate.cpp
index 754e710845a55..123ebb652149f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplate.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplate.cpp
@@ -2856,6 +2856,101 @@ checkBuiltinTemplateIdType(Sema &SemaRef, BuiltinTemplateDecl *BTD,
   llvm_unreachable("unexpected BuiltinTemplateDecl!");
 }
 
+/// Determine whether this alias template is "enable_if_t".
+static bool isEnableIfAliasTemplate(TypeAliasTemplateDecl *AliasTemplate) {
+  return AliasTemplate->getName().equals("enable_if_t");
+}
+
+/// Collect all of the separable terms in the given condition, which
+/// might be a conjunction.
+///
+/// FIXME: The right answer is to convert the logical expression into
+/// disjunctive normal form, so we can find the first failed term
+/// within each possible clause.
+static void collectConjunctionTerms(Expr *Clause,
+                                    SmallVectorImpl &Terms) {
+  if (auto BinOp = dyn_cast(Clause->IgnoreParenImpCasts())) {
+    if (BinOp->getOpcode() == BO_LAnd) {
+      collectConjunctionTerms(BinOp->getLHS(), Terms);
+      collectConjunctionTerms(BinOp->getRHS(), Terms);
+    }
+
+    return;
+  }
+
+  Terms.push_back(Clause);
+}
+
+// The ranges-v3 library uses an odd pattern of a top-level "||" with
+// a left-hand side that is value-dependent but never true. Identify
+// the idiom and ignore that term.
+static Expr *lookThroughRangesV3Condition(Preprocessor &PP, Expr *Cond) {
+  // Top-level '||'.
+  auto *BinOp = dyn_cast(Cond->IgnoreParenImpCasts());
+  if (!BinOp) return Cond;
+
+  if (BinOp->getOpcode() != BO_LOr) return Cond;
+
+  // With an inner '==' that has a literal on the right-hand side.
+  Expr *LHS = BinOp->getLHS();
+  auto *InnerBinOp = dyn_cast(LHS->IgnoreParenImpCasts());
+  if (!InnerBinOp) return Cond;
+
+  if (InnerBinOp->getOpcode() != BO_EQ ||
+      !isa(InnerBinOp->getRHS()))
+    return Cond;
+
+  // If the inner binary operation came from a macro expansion named
+  // CONCEPT_REQUIRES or CONCEPT_REQUIRES_, return the right-hand side
+  // of the '||', which is the real, user-provided condition.
+  SourceLocation Loc = InnerBinOp->getExprLoc();
+  if (!Loc.isMacroID()) return Cond;
+
+  StringRef MacroName = PP.getImmediateMacroName(Loc);
+  if (MacroName == "CONCEPT_REQUIRES" || MacroName == "CONCEPT_REQUIRES_")
+    return BinOp->getRHS();
+
+  return Cond;
+}
+
+/// Find the failed subexpression within enable_if, and describe it
+/// with a string.
+static std::pair
+findFailedEnableIfCondition(Sema &S, Expr *Cond) {
+  Cond = lookThroughRangesV3Condition(S.PP, Cond);
+
+  // Separate out all of the terms in a conjunction.
+  SmallVector Terms;
+  collectConjunctionTerms(Cond, Terms);
+
+  // Determine which term failed.
+  Expr *FailedCond = nullptr;
+  for (Expr *Term : Terms) {
+    // The initialization of the parameter from the argument is
+    // a constant-evaluated context.
+    EnterExpressionEvaluationContext ConstantEvaluated(
+      S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+
+    bool Succeeded;
+    if (Term->EvaluateAsBooleanCondition(Succeeded, S.Context) &&
+        !Succeeded) {
+      FailedCond = Term->IgnoreParenImpCasts();
+      break;
+    }
+  }
+
+  if (!FailedCond)
+    FailedCond = Cond->IgnoreParenImpCasts();
+
+  std::string Description;
+  {
+    llvm::raw_string_ostream Out(Description);
+    FailedCond->printPretty(Out, nullptr,
+                            PrintingPolicy(S.Context.getLangOpts()));
+  }
+  return { FailedCond, Description };
+}
+
 QualType Sema::CheckTemplateIdType(TemplateName Name,
                                    SourceLocation TemplateLoc,
                                    TemplateArgumentListInfo &TemplateArgs) {
@@ -2902,12 +2997,12 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
     if (Pattern->isInvalidDecl())
       return QualType();
 
-    TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                      Converted);
+    TemplateArgumentList StackTemplateArgs(TemplateArgumentList::OnStack,
+                                           Converted);
 
     // Only substitute for the innermost template argument list.
     MultiLevelTemplateArgumentList TemplateArgLists;
-    TemplateArgLists.addOuterTemplateArguments(&TemplateArgs);
+    TemplateArgLists.addOuterTemplateArguments(&StackTemplateArgs);
     unsigned Depth = AliasTemplate->getTemplateParameters()->getDepth();
     for (unsigned I = 0; I < Depth; ++I)
       TemplateArgLists.addOuterTemplateArguments(None);
@@ -2920,8 +3015,42 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
     CanonType = SubstType(Pattern->getUnderlyingType(),
                           TemplateArgLists, AliasTemplate->getLocation(),
                           AliasTemplate->getDeclName());
-    if (CanonType.isNull())
+    if (CanonType.isNull()) {
+      // If this was enable_if and we failed to find the nested type
+      // within enable_if in a SFINAE context, dig out the specific
+      // enable_if condition that failed and present that instead.
+      if (isEnableIfAliasTemplate(AliasTemplate)) {
+        if (auto DeductionInfo = isSFINAEContext()) {
+          if (*DeductionInfo &&
+              (*DeductionInfo)->hasSFINAEDiagnostic() &&
+              (*DeductionInfo)->peekSFINAEDiagnostic().second.getDiagID() ==
+                diag::err_typename_nested_not_found_enable_if &&
+              TemplateArgs[0].getArgument().getKind()
+                == TemplateArgument::Expression) {
+            Expr *FailedCond;
+            std::string FailedDescription;
+            std::tie(FailedCond, FailedDescription) =
+              findFailedEnableIfCondition(
+                *this, TemplateArgs[0].getSourceExpression());
+
+            // Remove the old SFINAE diagnostic.
+            PartialDiagnosticAt OldDiag =
+              {SourceLocation(), PartialDiagnostic::NullDiagnostic()};
+            (*DeductionInfo)->takeSFINAEDiagnostic(OldDiag);
+
+            // Add a new SFINAE diagnostic specifying which condition
+            // failed.
+            (*DeductionInfo)->addSFINAEDiagnostic(
+              OldDiag.first,
+              PDiag(diag::err_typename_nested_not_found_requirement)
+                << FailedDescription
+                << FailedCond->getSourceRange());
+          }
+        }
+      }
+
       return QualType();
+    }
   } else if (Name.isDependent() ||
              TemplateSpecializationType::anyDependentTemplateArguments(
                TemplateArgs, InstantiationDependent)) {
@@ -4070,6 +4199,7 @@ bool Sema::CheckTemplateTypeArgument(TemplateTypeParmDecl *Param,
       }
     }
     // fallthrough
+    LLVM_FALLTHROUGH;
   }
   default: {
     // We have a template type parameter but the template argument
@@ -5254,10 +5384,16 @@ enum NullPointerValueKind {
 /// value of the appropriate type.
 static NullPointerValueKind
 isNullPointerValueTemplateArgument(Sema &S, NonTypeTemplateParmDecl *Param,
-                                   QualType ParamType, Expr *Arg) {
+                                   QualType ParamType, Expr *Arg,
+                                   Decl *Entity = nullptr) {
   if (Arg->isValueDependent() || Arg->isTypeDependent())
     return NPV_NotNullPointer;
 
+  // dllimport'd entities aren't constant but are available inside of template
+  // arguments.
+  if (Entity && Entity->hasAttr())
+    return NPV_NotNullPointer;
+
   if (!S.isCompleteType(Arg->getExprLoc(), ParamType))
     llvm_unreachable(
         "Incomplete parameter type in isNullPointerValueTemplateArgument!");
@@ -5501,14 +5637,8 @@ CheckTemplateArgumentAddressOfObjectOrFunction(Sema &S,
 
   // If our parameter has pointer type, check for a null template value.
   if (ParamType->isPointerType() || ParamType->isNullPtrType()) {
-    NullPointerValueKind NPV;
-    // dllimport'd entities aren't constant but are available inside of template
-    // arguments.
-    if (Entity && Entity->hasAttr())
-      NPV = NPV_NotNullPointer;
-    else
-      NPV = isNullPointerValueTemplateArgument(S, Param, ParamType, ArgIn);
-    switch (NPV) {
+    switch (isNullPointerValueTemplateArgument(S, Param, ParamType, ArgIn,
+                                               Entity)) {
     case NPV_NullPointer:
       S.Diag(Arg->getExprLoc(), diag::warn_cxx98_compat_template_arg_null);
       Converted = TemplateArgument(S.Context.getCanonicalType(ParamType),
@@ -5700,39 +5830,8 @@ static bool CheckTemplateArgumentPointerToMember(Sema &S,
                                                  TemplateArgument &Converted) {
   bool Invalid = false;
 
-  // Check for a null pointer value.
   Expr *Arg = ResultArg;
-  switch (isNullPointerValueTemplateArgument(S, Param, ParamType, Arg)) {
-  case NPV_Error:
-    return true;
-  case NPV_NullPointer:
-    S.Diag(Arg->getExprLoc(), diag::warn_cxx98_compat_template_arg_null);
-    Converted = TemplateArgument(S.Context.getCanonicalType(ParamType),
-                                 /*isNullPtr*/true);
-    return false;
-  case NPV_NotNullPointer:
-    break;
-  }
-
   bool ObjCLifetimeConversion;
-  if (S.IsQualificationConversion(Arg->getType(),
-                                  ParamType.getNonReferenceType(),
-                                  false, ObjCLifetimeConversion)) {
-    Arg = S.ImpCastExprToType(Arg, ParamType, CK_NoOp,
-                              Arg->getValueKind()).get();
-    ResultArg = Arg;
-  } else if (!S.Context.hasSameUnqualifiedType(Arg->getType(),
-                ParamType.getNonReferenceType())) {
-    // We can't perform this conversion.
-    S.Diag(Arg->getLocStart(), diag::err_template_arg_not_convertible)
-      << Arg->getType() << ParamType << Arg->getSourceRange();
-    S.Diag(Param->getLocation(), diag::note_template_param_here);
-    return true;
-  }
-
-  // See through any implicit casts we added to fix the type.
-  while (ImplicitCastExpr *Cast = dyn_cast(Arg))
-    Arg = Cast->getSubExpr();
 
   // C++ [temp.arg.nontype]p1:
   //
@@ -5789,6 +5888,37 @@ static bool CheckTemplateArgumentPointerToMember(Sema &S,
     DRE = nullptr;
   }
 
+  ValueDecl *Entity = DRE ? DRE->getDecl() : nullptr;
+
+  // Check for a null pointer value.
+  switch (isNullPointerValueTemplateArgument(S, Param, ParamType, ResultArg,
+                                             Entity)) {
+  case NPV_Error:
+    return true;
+  case NPV_NullPointer:
+    S.Diag(ResultArg->getExprLoc(), diag::warn_cxx98_compat_template_arg_null);
+    Converted = TemplateArgument(S.Context.getCanonicalType(ParamType),
+                                 /*isNullPtr*/true);
+    return false;
+  case NPV_NotNullPointer:
+    break;
+  }
+
+  if (S.IsQualificationConversion(ResultArg->getType(),
+                                  ParamType.getNonReferenceType(), false,
+                                  ObjCLifetimeConversion)) {
+    ResultArg = S.ImpCastExprToType(ResultArg, ParamType, CK_NoOp,
+                                    ResultArg->getValueKind())
+                    .get();
+  } else if (!S.Context.hasSameUnqualifiedType(
+                 ResultArg->getType(), ParamType.getNonReferenceType())) {
+    // We can't perform this conversion.
+    S.Diag(ResultArg->getLocStart(), diag::err_template_arg_not_convertible)
+        << ResultArg->getType() << ParamType << ResultArg->getSourceRange();
+    S.Diag(Param->getLocation(), diag::note_template_param_here);
+    return true;
+  }
+
   if (!DRE)
     return S.Diag(Arg->getLocStart(),
                   diag::err_template_arg_not_pointer_to_member_form)
@@ -7659,6 +7789,7 @@ Sema::CheckSpecializationInstantiationRedecl(SourceLocation NewLoc,
         return false;
       }
       // Fall through
+      LLVM_FALLTHROUGH;
 
     case TSK_ExplicitInstantiationDeclaration:
     case TSK_ExplicitInstantiationDefinition:
@@ -7685,6 +7816,7 @@ Sema::CheckSpecializationInstantiationRedecl(SourceLocation NewLoc,
 
       return true;
     }
+    llvm_unreachable("The switch over PrevTSK must be exhaustive.");
 
   case TSK_ExplicitInstantiationDeclaration:
     switch (PrevTSK) {
@@ -7966,6 +8098,7 @@ bool Sema::CheckFunctionTemplateSpecialization(
   TemplateSpecializationKind TSK = SpecInfo->getTemplateSpecializationKind();
   if (TSK == TSK_Undeclared || TSK == TSK_ImplicitInstantiation) {
     Specialization->setLocation(FD->getLocation());
+    Specialization->setLexicalDeclContext(FD->getLexicalDeclContext());
     // C++11 [dcl.constexpr]p1: An explicit specialization of a constexpr
     // function can differ from the template declaration with respect to
     // the constexpr specifier.
@@ -8026,6 +8159,7 @@ bool Sema::CheckFunctionTemplateSpecialization(
       // FIXME: We need an update record for this AST mutation.
       Specialization->setDeletedAsWritten(false);
     }
+    // FIXME: We need an update record for this AST mutation.
     SpecInfo->setTemplateSpecializationKind(TSK_ExplicitSpecialization);
     MarkUnusedFileScopedDecl(Specialization);
   }
@@ -8672,7 +8806,8 @@ Sema::ActOnExplicitInstantiation(Scope *S,
                         /*ModulePrivateLoc=*/SourceLocation(),
                         MultiTemplateParamsArg(), Owned, IsDependent,
                         SourceLocation(), false, TypeResult(),
-                        /*IsTypeSpecifier*/false);
+                        /*IsTypeSpecifier*/false,
+                        /*IsTemplateParamOrArg*/false);
   assert(!IsDependent && "explicit instantiation of dependent name not yet handled");
 
   if (!TagD)
@@ -9018,7 +9153,8 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
   //   A member function [...] of a class template can be explicitly
   //  instantiated from the member definition associated with its class
   //  template.
-  UnresolvedSet<8> Matches;
+  UnresolvedSet<8> TemplateMatches;
+  FunctionDecl *NonTemplateMatch = nullptr;
   AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
   TemplateSpecCandidateSet FailedCandidates(D.getIdentifierLoc());
   for (LookupResult::iterator P = Previous.begin(), PEnd = Previous.end();
@@ -9029,11 +9165,13 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
         QualType Adjusted = adjustCCAndNoReturn(R, Method->getType(),
                                                 /*AdjustExceptionSpec*/true);
         if (Context.hasSameUnqualifiedType(Method->getType(), Adjusted)) {
-          Matches.clear();
-
-          Matches.addDecl(Method, P.getAccess());
-          if (Method->getTemplateSpecializationKind() == TSK_Undeclared)
-            break;
+          if (Method->getPrimaryTemplate()) {
+            TemplateMatches.addDecl(Method, P.getAccess());
+          } else {
+            // FIXME: Can this assert ever happen?  Needs a test.
+            assert(!NonTemplateMatch && "Multiple NonTemplateMatches");
+            NonTemplateMatch = Method;
+          }
         }
       }
     }
@@ -9072,22 +9210,25 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
       continue;
     }
 
-    Matches.addDecl(Specialization, P.getAccess());
+    TemplateMatches.addDecl(Specialization, P.getAccess());
   }
 
-  // Find the most specialized function template specialization.
-  UnresolvedSetIterator Result = getMostSpecialized(
-      Matches.begin(), Matches.end(), FailedCandidates,
-      D.getIdentifierLoc(),
-      PDiag(diag::err_explicit_instantiation_not_known) << Name,
-      PDiag(diag::err_explicit_instantiation_ambiguous) << Name,
-      PDiag(diag::note_explicit_instantiation_candidate));
-
-  if (Result == Matches.end())
-    return true;
+  FunctionDecl *Specialization = NonTemplateMatch;
+  if (!Specialization) {
+    // Find the most specialized function template specialization.
+    UnresolvedSetIterator Result = getMostSpecialized(
+        TemplateMatches.begin(), TemplateMatches.end(), FailedCandidates,
+        D.getIdentifierLoc(),
+        PDiag(diag::err_explicit_instantiation_not_known) << Name,
+        PDiag(diag::err_explicit_instantiation_ambiguous) << Name,
+        PDiag(diag::note_explicit_instantiation_candidate));
+
+    if (Result == TemplateMatches.end())
+      return true;
 
-  // Ignore access control bits, we don't need them for redeclaration checking.
-  FunctionDecl *Specialization = cast(*Result);
+    // Ignore access control bits, we don't need them for redeclaration checking.
+    Specialization = cast(*Result);
+  }
 
   // C++11 [except.spec]p4
   // In an explicit instantiation an exception-specification may be specified,
@@ -9343,7 +9484,7 @@ Sema::ActOnTypenameType(Scope *S,
 /// Determine whether this failed name lookup should be treated as being
 /// disabled by a usage of std::enable_if.
 static bool isEnableIf(NestedNameSpecifierLoc NNS, const IdentifierInfo &II,
-                       SourceRange &CondRange) {
+                       SourceRange &CondRange, Expr *&Cond) {
   // We must be looking for a ::type...
   if (!II.isStr("type"))
     return false;
@@ -9373,6 +9514,19 @@ static bool isEnableIf(NestedNameSpecifierLoc NNS, const IdentifierInfo &II,
 
   // Assume the first template argument is the condition.
   CondRange = EnableIfTSTLoc.getArgLoc(0).getSourceRange();
+
+  // Dig out the condition.
+  Cond = nullptr;
+  if (EnableIfTSTLoc.getArgLoc(0).getArgument().getKind()
+        != TemplateArgument::Expression)
+    return true;
+
+  Cond = EnableIfTSTLoc.getArgLoc(0).getSourceExpression();
+
+  // Ignore Boolean literals; they add no value.
+  if (isa(Cond->IgnoreParenCasts()))
+    Cond = nullptr;
+
   return true;
 }
 
@@ -9416,9 +9570,25 @@ Sema::CheckTypenameType(ElaboratedTypeKeyword Keyword,
     // If we're looking up 'type' within a template named 'enable_if', produce
     // a more specific diagnostic.
     SourceRange CondRange;
-    if (isEnableIf(QualifierLoc, II, CondRange)) {
+    Expr *Cond = nullptr;
+    if (isEnableIf(QualifierLoc, II, CondRange, Cond)) {
+      // If we have a condition, narrow it down to the specific failed
+      // condition.
+      if (Cond) {
+        Expr *FailedCond;
+        std::string FailedDescription;
+        std::tie(FailedCond, FailedDescription) =
+          findFailedEnableIfCondition(*this, Cond);
+
+        Diag(FailedCond->getExprLoc(),
+             diag::err_typename_nested_not_found_requirement)
+          << FailedDescription
+          << FailedCond->getSourceRange();
+        return QualType();
+      }
+
       Diag(CondRange.getBegin(), diag::err_typename_nested_not_found_enable_if)
-        << Ctx << CondRange;
+          << Ctx << CondRange;
       return QualType();
     }
 
@@ -9442,6 +9612,7 @@ Sema::CheckTypenameType(ElaboratedTypeKeyword Keyword,
   }
   // Fall through to create a dependent typename type, from which we can recover
   // better.
+  LLVM_FALLTHROUGH;
 
   case LookupResult::NotFoundInCurrentInstantiation:
     // Okay, it's a member of an unknown instantiation.
@@ -9810,7 +9981,7 @@ class ExplicitSpecializationVisibilityChecker {
       IsHiddenExplicitSpecialization =
           Spec->getMemberSpecializationInfo()
               ? !S.hasVisibleMemberSpecialization(Spec, &Modules)
-              : !S.hasVisibleDeclaration(Spec);
+              : !S.hasVisibleExplicitSpecialization(Spec, &Modules);
     } else {
       checkInstantiated(Spec);
     }
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateDeduction.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
index ebdf6dd57fc54..983b1ea795dda 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -56,8 +56,12 @@ namespace clang {
     TDF_TopLevelParameterTypeList = 0x10,
     /// \brief Within template argument deduction from overload resolution per
     /// C++ [over.over] allow matching function types that are compatible in
-    /// terms of noreturn and default calling convention adjustments.
-    TDF_InOverloadResolution = 0x20
+    /// terms of noreturn and default calling convention adjustments, or
+    /// similarly matching a declared template specialization against a
+    /// possible template, per C++ [temp.deduct.decl]. In either case, permit
+    /// deduction where the parameter is a function type that can be converted
+    /// to the argument type.
+    TDF_AllowCompatibleFunctionType = 0x20,
   };
 }
 
@@ -1306,9 +1310,10 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
     // If the parameter type is not dependent, there is nothing to deduce.
     if (!Param->isDependentType()) {
       if (!(TDF & TDF_SkipNonDependent)) {
-        bool NonDeduced = (TDF & TDF_InOverloadResolution)?
-                          !S.isSameOrCompatibleFunctionType(CanParam, CanArg) :
-                          Param != Arg;
+        bool NonDeduced =
+            (TDF & TDF_AllowCompatibleFunctionType)
+                ? !S.isSameOrCompatibleFunctionType(CanParam, CanArg)
+                : Param != Arg;
         if (NonDeduced) {
           return Sema::TDK_NonDeducedMismatch;
         }
@@ -1318,10 +1323,10 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
   } else if (!Param->isDependentType()) {
     CanQualType ParamUnqualType = CanParam.getUnqualifiedType(),
                 ArgUnqualType = CanArg.getUnqualifiedType();
-    bool Success = (TDF & TDF_InOverloadResolution)?
-                   S.isSameOrCompatibleFunctionType(ParamUnqualType,
-                                                    ArgUnqualType) :
-                   ParamUnqualType == ArgUnqualType;
+    bool Success =
+        (TDF & TDF_AllowCompatibleFunctionType)
+            ? S.isSameOrCompatibleFunctionType(ParamUnqualType, ArgUnqualType)
+            : ParamUnqualType == ArgUnqualType;
     if (Success)
       return Sema::TDK_Success;
   }
@@ -1524,17 +1529,56 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
         return Sema::TDK_NonDeducedMismatch;
 
       // Check return types.
-      if (Sema::TemplateDeductionResult Result =
-              DeduceTemplateArgumentsByTypeMatch(
-                  S, TemplateParams, FunctionProtoParam->getReturnType(),
-                  FunctionProtoArg->getReturnType(), Info, Deduced, 0))
+      if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+              S, TemplateParams, FunctionProtoParam->getReturnType(),
+              FunctionProtoArg->getReturnType(), Info, Deduced, 0))
         return Result;
 
-      return DeduceTemplateArguments(
-          S, TemplateParams, FunctionProtoParam->param_type_begin(),
-          FunctionProtoParam->getNumParams(),
-          FunctionProtoArg->param_type_begin(),
-          FunctionProtoArg->getNumParams(), Info, Deduced, SubTDF);
+      // Check parameter types.
+      if (auto Result = DeduceTemplateArguments(
+              S, TemplateParams, FunctionProtoParam->param_type_begin(),
+              FunctionProtoParam->getNumParams(),
+              FunctionProtoArg->param_type_begin(),
+              FunctionProtoArg->getNumParams(), Info, Deduced, SubTDF))
+        return Result;
+
+      if (TDF & TDF_AllowCompatibleFunctionType)
+        return Sema::TDK_Success;
+
+      // FIXME: Per core-2016/10/1019 (no corresponding core issue yet), permit
+      // deducing through the noexcept-specifier if it's part of the canonical
+      // type. libstdc++ relies on this.
+      Expr *NoexceptExpr = FunctionProtoParam->getNoexceptExpr();
+      if (NonTypeTemplateParmDecl *NTTP =
+          NoexceptExpr ? getDeducedParameterFromExpr(Info, NoexceptExpr)
+                       : nullptr) {
+        assert(NTTP->getDepth() == Info.getDeducedDepth() &&
+               "saw non-type template parameter with wrong depth");
+
+        llvm::APSInt Noexcept(1);
+        switch (FunctionProtoArg->canThrow(S.Context)) {
+        case CT_Cannot:
+          Noexcept = 1;
+          LLVM_FALLTHROUGH;
+
+        case CT_Can:
+          // We give E in noexcept(E) the "deduced from array bound" treatment.
+          // FIXME: Should we?
+          return DeduceNonTypeTemplateArgument(
+              S, TemplateParams, NTTP, Noexcept, S.Context.BoolTy,
+              /*ArrayBound*/true, Info, Deduced);
+
+        case CT_Dependent:
+          if (Expr *ArgNoexceptExpr = FunctionProtoArg->getNoexceptExpr())
+            return DeduceNonTypeTemplateArgument(
+                S, TemplateParams, NTTP, ArgNoexceptExpr, Info, Deduced);
+          // Can't deduce anything from throw(T...).
+          break;
+        }
+      }
+      // FIXME: Detect non-deduced exception specification mismatches?
+
+      return Sema::TDK_Success;
     }
 
     case Type::InjectedClassName: {
@@ -1544,7 +1588,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
         ->getInjectedSpecializationType();
       assert(isa(Param) &&
              "injected class name is not a template specialization type");
-      // fall through
+      LLVM_FALLTHROUGH;
     }
 
     //     template-name (where template-name refers to a class template)
@@ -2383,7 +2427,8 @@ static Sema::TemplateDeductionResult ConvertDeducedTemplateArguments(
     bool HasDefaultArg = false;
     TemplateDecl *TD = dyn_cast(Template);
     if (!TD) {
-      assert(isa(Template));
+      assert(isa(Template) ||
+             isa(Template));
       return Sema::TDK_Incomplete;
     }
 
@@ -2819,6 +2864,17 @@ Sema::SubstituteExplicitTemplateArguments(
   if (FunctionType) {
     auto EPI = Proto->getExtProtoInfo();
     EPI.ExtParameterInfos = ExtParamInfos.getPointerOrNull(ParamTypes.size());
+
+    // In C++1z onwards, exception specifications are part of the function type,
+    // so substitution into the type must also substitute into the exception
+    // specification.
+    SmallVector ExceptionStorage;
+    if (getLangOpts().CPlusPlus1z &&
+        SubstExceptionSpec(
+            Function->getLocation(), EPI.ExceptionSpec, ExceptionStorage,
+            MultiLevelTemplateArgumentList(*ExplicitArgumentList)))
+      return TDK_SubstitutionFailure;
+
     *FunctionType = BuildFunctionType(ResultType, ParamTypes,
                                       Function->getLocation(),
                                       Function->getDeclName(),
@@ -3713,13 +3769,6 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
     = FunctionTemplate->getTemplateParameters();
   QualType FunctionType = Function->getType();
 
-  // When taking the address of a function, we require convertibility of
-  // the resulting function type. Otherwise, we allow arbitrary mismatches
-  // of calling convention, noreturn, and noexcept.
-  if (!IsAddressOfFunction)
-    ArgFunctionType = adjustCCAndNoReturn(ArgFunctionType, FunctionType,
-                                          /*AdjustExceptionSpec*/true);
-
   // Substitute any explicit template arguments.
   LocalInstantiationScope InstScope(*this);
   SmallVector Deduced;
@@ -3736,6 +3785,13 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
     NumExplicitlySpecified = Deduced.size();
   }
 
+  // When taking the address of a function, we require convertibility of
+  // the resulting function type. Otherwise, we allow arbitrary mismatches
+  // of calling convention and noreturn.
+  if (!IsAddressOfFunction)
+    ArgFunctionType = adjustCCAndNoReturn(ArgFunctionType, FunctionType,
+                                          /*AdjustExceptionSpec*/false);
+
   // Unevaluated SFINAE context.
   EnterExpressionEvaluationContext Unevaluated(
       *this, Sema::ExpressionEvaluationContext::Unevaluated);
@@ -3755,9 +3811,8 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
   }
 
   if (!ArgFunctionType.isNull()) {
-    unsigned TDF = TDF_TopLevelParameterTypeList;
-    if (IsAddressOfFunction)
-      TDF |= TDF_InOverloadResolution;
+    unsigned TDF =
+        TDF_TopLevelParameterTypeList | TDF_AllowCompatibleFunctionType;
     // Deduce template arguments from the function type.
     if (TemplateDeductionResult Result
           = DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams,
@@ -3788,7 +3843,7 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
       !ResolveExceptionSpec(Info.getLocation(), SpecializationFPT))
     return TDK_MiscellaneousDeductionFailure;
 
-  // Adjust the exception specification of the argument again to match the
+  // Adjust the exception specification of the argument to match the
   // substituted and resolved type we just formed. (Calling convention and
   // noreturn can't be dependent, so we don't actually need this for them
   // right now.)
@@ -5093,6 +5148,7 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
                                cast(T)->getSizeExpr(),
                                OnlyDeduced, Depth, Used);
     // Fall through to check the element type
+    LLVM_FALLTHROUGH;
 
   case Type::ConstantArray:
   case Type::IncompleteArray:
@@ -5125,6 +5181,8 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
     for (unsigned I = 0, N = Proto->getNumParams(); I != N; ++I)
       MarkUsedTemplateParameters(Ctx, Proto->getParamType(I), OnlyDeduced,
                                  Depth, Used);
+    if (auto *E = Proto->getNoexceptExpr())
+      MarkUsedTemplateParameters(Ctx, E, OnlyDeduced, Depth, Used);
     break;
   }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
index a5108e039dedb..ce5e9c50a80f6 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -197,6 +197,7 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const {
 
   case DefaultTemplateArgumentChecking:
   case DeclaringSpecialMember:
+  case DefiningSynthesizedFunction:
     return false;
   }
 
@@ -624,6 +625,17 @@ void Sema::PrintInstantiationStack() {
                    diag::note_in_declaration_of_implicit_special_member)
         << cast(Active->Entity) << Active->SpecialMember;
       break;
+
+    case CodeSynthesisContext::DefiningSynthesizedFunction:
+      // FIXME: For synthesized members other than special members, produce a note.
+      auto *MD = dyn_cast(Active->Entity);
+      auto CSM = MD ? getSpecialMember(MD) : CXXInvalid;
+      if (CSM != CXXInvalid) {
+        Diags.Report(Active->PointOfInstantiation,
+                     diag::note_member_synthesized_at)
+          << CSM << Context.getTagDeclType(MD->getParent());
+      }
+      break;
     }
   }
 }
@@ -666,6 +678,7 @@ Optional Sema::isSFINAEContext() const {
       return Active->DeductionInfo;
 
     case CodeSynthesisContext::DeclaringSpecialMember:
+    case CodeSynthesisContext::DefiningSynthesizedFunction:
       // This happens in a context unrelated to template instantiation, so
       // there is no SFINAE.
       return None;
@@ -1681,20 +1694,26 @@ TypeSourceInfo *Sema::SubstFunctionDeclType(TypeSourceInfo *T,
   return TLB.getTypeSourceInfo(Context, Result);
 }
 
+bool Sema::SubstExceptionSpec(SourceLocation Loc,
+                              FunctionProtoType::ExceptionSpecInfo &ESI,
+                              SmallVectorImpl &ExceptionStorage,
+                              const MultiLevelTemplateArgumentList &Args) {
+  assert(ESI.Type != EST_Uninstantiated);
+
+  bool Changed = false;
+  TemplateInstantiator Instantiator(*this, Args, Loc, DeclarationName());
+  return Instantiator.TransformExceptionSpec(Loc, ESI, ExceptionStorage,
+                                             Changed);
+}
+
 void Sema::SubstExceptionSpec(FunctionDecl *New, const FunctionProtoType *Proto,
                               const MultiLevelTemplateArgumentList &Args) {
   FunctionProtoType::ExceptionSpecInfo ESI =
       Proto->getExtProtoInfo().ExceptionSpec;
-  assert(ESI.Type != EST_Uninstantiated);
-
-  TemplateInstantiator Instantiator(*this, Args, New->getLocation(),
-                                    New->getDeclName());
 
   SmallVector ExceptionStorage;
-  bool Changed = false;
-  if (Instantiator.TransformExceptionSpec(
-          New->getTypeSourceInfo()->getTypeLoc().getLocEnd(), ESI,
-          ExceptionStorage, Changed))
+  if (SubstExceptionSpec(New->getTypeSourceInfo()->getTypeLoc().getLocEnd(),
+                         ESI, ExceptionStorage, Args))
     // On error, recover by dropping the exception specification.
     ESI.Type = EST_None;
 
@@ -2028,7 +2047,7 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
 
   // The instantiation is visible here, even if it was first declared in an
   // unimported module.
-  Instantiation->setHidden(false);
+  Instantiation->setVisibleDespiteOwningModule();
 
   // FIXME: This loses the as-written tag kind for an explicit instantiation.
   Instantiation->setTagKind(Pattern->getTagKind());
@@ -2232,7 +2251,7 @@ bool Sema::InstantiateEnum(SourceLocation PointOfInstantiation,
 
   // The instantiation is visible here, even if it was first declared in an
   // unimported module.
-  Instantiation->setHidden(false);
+  Instantiation->setVisibleDespiteOwningModule();
 
   // Enter the scope of this instantiation. We don't use
   // PushDeclContext because we don't have a scope.
@@ -2341,6 +2360,25 @@ namespace {
   };
 }
 
+bool Sema::usesPartialOrExplicitSpecialization(
+    SourceLocation Loc, ClassTemplateSpecializationDecl *ClassTemplateSpec) {
+  if (ClassTemplateSpec->getTemplateSpecializationKind() ==
+      TSK_ExplicitSpecialization)
+    return true;
+
+  SmallVector PartialSpecs;
+  ClassTemplateSpec->getSpecializedTemplate()
+                   ->getPartialSpecializations(PartialSpecs);
+  for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) {
+    TemplateDeductionInfo Info(Loc);
+    if (!DeduceTemplateArguments(PartialSpecs[I],
+                                 ClassTemplateSpec->getTemplateArgs(), Info))
+      return true;
+  }
+
+  return false;
+}
+
 /// Get the instantiation pattern to use to instantiate the definition of a
 /// given ClassTemplateSpecializationDecl (either the pattern of the primary
 /// template or of a partial specialization).
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 03df6fde6c802..6fee23aa8bc10 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1540,8 +1540,7 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
   // DR1484 clarifies that the members of a local class are instantiated as part
   // of the instantiation of their enclosing entity.
   if (D->isCompleteDefinition() && D->isLocalClass()) {
-    Sema::SavePendingLocalImplicitInstantiationsRAII
-        SavedPendingLocalImplicitInstantiations(SemaRef);
+    Sema::LocalEagerInstantiationScope LocalInstantiations(SemaRef);
 
     SemaRef.InstantiateClass(D->getLocation(), Record, D, TemplateArgs,
                              TSK_ImplicitInstantiation,
@@ -1555,7 +1554,7 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
 
     // This class may have local implicit instantiations that need to be
     // performed within this scope.
-    SemaRef.PerformPendingInstantiations(/*LocalOnly=*/true);
+    LocalInstantiations.perform();
   }
 
   SemaRef.DiagnoseUnusedNestedTypedefs(Record);
@@ -1783,6 +1782,9 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D,
       Previous.clear();
   }
 
+  if (isFriend)
+    Function->setObjectOfFriendDecl();
+
   SemaRef.CheckFunctionDeclaration(/*Scope*/ nullptr, Function, Previous,
                                    isExplicitSpecialization);
 
@@ -3769,6 +3771,8 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   if (PatternDef) {
     Pattern = PatternDef->getBody(PatternDef);
     PatternDecl = PatternDef;
+    if (PatternDef->willHaveBody())
+      PatternDef = nullptr;
   }
 
   // FIXME: We need to track the instantiation stack in order to know which
@@ -3783,6 +3787,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
       // Try again at the end of the translation unit (at which point a
       // definition will be required).
       assert(!Recursive);
+      Function->setInstantiationIsPending(true);
       PendingInstantiations.push_back(
         std::make_pair(Function, PointOfInstantiation));
     } else if (TSK == TSK_ImplicitInstantiation) {
@@ -3802,6 +3807,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   // Postpone late parsed template instantiations.
   if (PatternDecl->isLateTemplateParsed() &&
       !LateTemplateParser) {
+    Function->setInstantiationIsPending(true);
     PendingInstantiations.push_back(
       std::make_pair(Function, PointOfInstantiation));
     return;
@@ -3812,10 +3818,9 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   // while we're still within our own instantiation context.
   // This has to happen before LateTemplateParser below is called, so that
   // it marks vtables used in late parsed templates as used.
-  SavePendingLocalImplicitInstantiationsRAII
-      SavedPendingLocalImplicitInstantiations(*this);
-  SavePendingInstantiationsAndVTableUsesRAII
-      SavePendingInstantiationsAndVTableUses(*this, /*Enabled=*/Recursive);
+  GlobalEagerInstantiationScope GlobalInstantiations(*this,
+                                                     /*Enabled=*/Recursive);
+  LocalEagerInstantiationScope LocalInstantiations(*this);
 
   // Call the LateTemplateParser callback if there is a need to late parse
   // a templated function definition.
@@ -3865,7 +3870,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
 
   // The instantiation is visible here, even if it was first declared in an
   // unimported module.
-  Function->setHidden(false);
+  Function->setVisibleDespiteOwningModule();
 
   // Copy the inner loc start from the pattern.
   Function->setInnerLocStart(PatternDecl->getInnerLocStart());
@@ -3942,20 +3947,9 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
 
   // This class may have local implicit instantiations that need to be
   // instantiation within this scope.
-  PerformPendingInstantiations(/*LocalOnly=*/true);
+  LocalInstantiations.perform();
   Scope.Exit();
-
-  if (Recursive) {
-    // Define any pending vtables.
-    DefineUsedVTables();
-
-    // Instantiate any pending implicit instantiations found during the
-    // instantiation of this template.
-    PerformPendingInstantiations();
-
-    // PendingInstantiations and VTableUses are restored through
-    // SavePendingInstantiationsAndVTableUses's destructor.
-  }
+  GlobalInstantiations.perform();
 }
 
 VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation(
@@ -4282,15 +4276,15 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
 
       // The instantiation is visible here, even if it was first declared in an
       // unimported module.
-      Var->setHidden(false);
+      Var->setVisibleDespiteOwningModule();
 
       // If we're performing recursive template instantiation, create our own
       // queue of pending implicit instantiations that we will instantiate
       // later, while we're still within our own instantiation context.
-      SavePendingInstantiationsAndVTableUsesRAII
-          SavePendingInstantiationsAndVTableUses(*this, /*Enabled=*/Recursive);
-
+      GlobalEagerInstantiationScope GlobalInstantiations(*this,
+                                                         /*Enabled=*/Recursive);
       LocalInstantiationScope Local(*this);
+      LocalEagerInstantiationScope LocalInstantiations(*this);
 
       // Enter the scope of this instantiation. We don't use
       // PushDeclContext because we don't have a scope.
@@ -4298,26 +4292,11 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
       InstantiateVariableInitializer(Var, PatternDecl, TemplateArgs);
       PreviousContext.pop();
 
-      // FIXME: Need to inform the ASTConsumer that we instantiated the
-      // initializer?
-
       // This variable may have local implicit instantiations that need to be
       // instantiated within this scope.
-      PerformPendingInstantiations(/*LocalOnly=*/true);
-
+      LocalInstantiations.perform();
       Local.Exit();
-
-      if (Recursive) {
-        // Define any newly required vtables.
-        DefineUsedVTables();
-
-        // Instantiate any pending implicit instantiations found during the
-        // instantiation of this template.
-        PerformPendingInstantiations();
-
-        // PendingInstantiations and VTableUses are restored through
-        // SavePendingInstantiationsAndVTableUses's destructor.
-      }
+      GlobalInstantiations.perform();
     }
 
     // Find actual definition
@@ -4408,21 +4387,20 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
   // If we're performing recursive template instantiation, create our own
   // queue of pending implicit instantiations that we will instantiate later,
   // while we're still within our own instantiation context.
-  SavePendingLocalImplicitInstantiationsRAII
-      SavedPendingLocalImplicitInstantiations(*this);
-  SavePendingInstantiationsAndVTableUsesRAII
-      SavePendingInstantiationsAndVTableUses(*this, /*Enabled=*/Recursive);
+  GlobalEagerInstantiationScope GlobalInstantiations(*this,
+                                                     /*Enabled=*/Recursive);
 
   // Enter the scope of this instantiation. We don't use
   // PushDeclContext because we don't have a scope.
   ContextRAII PreviousContext(*this, Var->getDeclContext());
   LocalInstantiationScope Local(*this);
 
+  LocalEagerInstantiationScope LocalInstantiations(*this);
+
   VarDecl *OldVar = Var;
   if (Def->isStaticDataMember() && !Def->isOutOfLine()) {
     // We're instantiating an inline static data member whose definition was
     // provided inside the class.
-    // FIXME: Update record?
     InstantiateVariableInitializer(Var, Def, TemplateArgs);
   } else if (!VarSpec) {
     Var = cast_or_null(SubstDecl(Def, Var->getDeclContext(),
@@ -4470,21 +4448,9 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
 
   // This variable may have local implicit instantiations that need to be
   // instantiated within this scope.
-  PerformPendingInstantiations(/*LocalOnly=*/true);
-
+  LocalInstantiations.perform();
   Local.Exit();
-  
-  if (Recursive) {
-    // Define any newly required vtables.
-    DefineUsedVTables();
-
-    // Instantiate any pending implicit instantiations found during the
-    // instantiation of this template.
-    PerformPendingInstantiations();
-
-    // PendingInstantiations and VTableUses are restored through
-    // SavePendingInstantiationsAndVTableUses's destructor.
-  }
+  GlobalInstantiations.perform();
 }
 
 void
@@ -4844,7 +4810,7 @@ static NamedDecl *findInstantiationOf(ASTContext &Ctx,
 DeclContext *Sema::FindInstantiatedContext(SourceLocation Loc, DeclContext* DC,
                           const MultiLevelTemplateArgumentList &TemplateArgs) {
   if (NamedDecl *D = dyn_cast(DC)) {
-    Decl* ID = FindInstantiatedDecl(Loc, D, TemplateArgs);
+    Decl* ID = FindInstantiatedDecl(Loc, D, TemplateArgs, true);
     return cast_or_null(ID);
   } else return DC;
 }
@@ -4876,7 +4842,8 @@ DeclContext *Sema::FindInstantiatedContext(SourceLocation Loc, DeclContext* DC,
 /// (X::::KnownValue). \p FindInstantiatedDecl performs
 /// this mapping from within the instantiation of X.
 NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
-                          const MultiLevelTemplateArgumentList &TemplateArgs) {
+                          const MultiLevelTemplateArgumentList &TemplateArgs,
+                          bool FindingInstantiatedContext) {
   DeclContext *ParentDC = D->getDeclContext();
   // FIXME: Parmeters of pointer to functions (y below) that are themselves 
   // parameters (p below) can have their ParentDC set to the translation-unit
@@ -5037,7 +5004,22 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
           QualType T = CheckTemplateIdType(TemplateName(TD), Loc, Args);
           if (T.isNull())
             return nullptr;
-          DC = T->getAsCXXRecordDecl();
+          auto *SubstRecord = T->getAsCXXRecordDecl();
+          assert(SubstRecord && "class template id not a class type?");
+          // Check that this template-id names the primary template and not a
+          // partial or explicit specialization. (In the latter cases, it's
+          // meaningless to attempt to find an instantiation of D within the
+          // specialization.)
+          // FIXME: The standard doesn't say what should happen here.
+          if (FindingInstantiatedContext &&
+              usesPartialOrExplicitSpecialization(
+                  Loc, cast(SubstRecord))) {
+            Diag(Loc, diag::err_specialization_not_primary_template)
+              << T << (SubstRecord->getTemplateSpecializationKind() ==
+                           TSK_ExplicitSpecialization);
+            return nullptr;
+          }
+          DC = SubstRecord;
           continue;
         }
       }
@@ -5167,6 +5149,8 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) {
                                 TSK_ExplicitInstantiationDefinition;
       InstantiateFunctionDefinition(/*FIXME:*/Inst.second, Function, true,
                                     DefinitionRequired, true);
+      if (Function->isDefined())
+        Function->setInstantiationIsPending(false);
       continue;
     }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/SemaType.cpp b/interpreter/llvm/src/tools/clang/lib/Sema/SemaType.cpp
index 3992179fabae3..598a11300b876 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/SemaType.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/SemaType.cpp
@@ -15,6 +15,7 @@
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTMutationListener.h"
+#include "clang/AST/ASTStructuralEquivalence.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
@@ -119,6 +120,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const AttributeList &attr,
 
 // Function type attributes.
 #define FUNCTION_TYPE_ATTRS_CASELIST \
+  case AttributeList::AT_NSReturnsRetained: \
   case AttributeList::AT_NoReturn: \
   case AttributeList::AT_Regparm: \
   case AttributeList::AT_AnyX86NoCallerSavedRegisters: \
@@ -639,11 +641,6 @@ static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
       distributeObjCPointerTypeAttrFromDeclarator(state, *attr, declSpecType);
       break;
 
-    case AttributeList::AT_NSReturnsRetained:
-      if (!state.getSema().getLangOpts().ObjCAutoRefCount)
-        break;
-      // fallthrough
-
     FUNCTION_TYPE_ATTRS_CASELIST:
       distributeFunctionTypeAttrFromDeclarator(state, *attr, declSpecType);
       break;
@@ -1881,6 +1878,11 @@ QualType Sema::BuildPointerType(QualType T,
     return QualType();
   }
 
+  if (T->isFunctionType() && getLangOpts().OpenCL) {
+    Diag(Loc, diag::err_opencl_function_pointer);
+    return QualType();
+  }
+
   if (checkQualifiedFunction(*this, T, Loc, QFK_Pointer))
     return QualType();
 
@@ -2378,6 +2380,11 @@ QualType Sema::BuildFunctionType(QualType T,
                            [=](unsigned i) { return Loc; });
   }
 
+  if (EPI.ExtInfo.getProducesResult()) {
+    // This is just a warning, so we can't fail to build if we see it.
+    checkNSReturnsRetainedReturnType(Loc, T);
+  }
+
   if (Invalid)
     return QualType();
 
@@ -3175,11 +3182,7 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D,
     for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
          Attr; Attr = Attr->getNext()) {
       if (Attr->getKind() == AttributeList::AT_OpenCLKernel) {
-        llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch();
-        if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64 ||
-            arch == llvm::Triple::amdgcn || arch == llvm::Triple::r600) {
-          CC = CC_OpenCLKernel;
-        }
+        CC = CC_OpenCLKernel;
         break;
       }
     }
@@ -3365,7 +3368,7 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator,
     if (auto objcClass = type->getAs()) {
       if (objcClass->getInterface()->getIdentifier() == S.getNSErrorIdent()) {
         if (numNormalPointers == 2 && numTypeSpecifierPointers < 2)
-          return PointerDeclaratorKind::NSErrorPointerPointer;;
+          return PointerDeclaratorKind::NSErrorPointerPointer;
       }
 
       break;
@@ -3735,16 +3738,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
     // inner pointers.
     complainAboutMissingNullability = CAMN_InnerPointers;
 
-    auto isDependentNonPointerType = [](QualType T) -> bool {
-      // Note: This is intended to be the same check as Type::canHaveNullability
-      // except with all of the ambiguous cases being treated as 'false' rather
-      // than 'true'.
-      return T->isDependentType() && !T->isAnyPointerType() &&
-        !T->isBlockPointerType() && !T->isMemberPointerType();
-    };
-
-    if (T->canHaveNullability() && !T->getNullability(S.Context) &&
-        !isDependentNonPointerType(T)) {
+    if (T->canHaveNullability(/*ResultIfUnknown*/false) &&
+        !T->getNullability(S.Context)) {
       // Note that we allow but don't require nullability on dependent types.
       ++NumPointersRemaining;
     }
@@ -3962,7 +3957,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
   // If the type itself could have nullability but does not, infer pointer
   // nullability and perform consistency checking.
   if (S.CodeSynthesisContexts.empty()) {
-    if (T->canHaveNullability() && !T->getNullability(S.Context)) {
+    if (T->canHaveNullability(/*ResultIfUnknown*/false) &&
+        !T->getNullability(S.Context)) {
       if (isVaList(T)) {
         // Record that we've seen a pointer, but do nothing else.
         if (NumPointersRemaining > 0)
@@ -4358,19 +4354,6 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       if (FTI.isAmbiguous)
         warnAboutAmbiguousFunction(S, D, DeclType, T);
 
-      // GNU warning -Wstrict-prototypes
-      //   Warn if a function declaration is without a prototype.
-      //   This warning is issued for all kinds of unprototyped function
-      //   declarations (i.e. function type typedef, function pointer etc.)
-      //   C99 6.7.5.3p14:
-      //   The empty list in a function declarator that is not part of a
-      //   definition of that function specifies that no information
-      //   about the number or types of the parameters is supplied.
-      if (D.getFunctionDefinitionKind() == FDK_Declaration &&
-          FTI.NumParams == 0 && !LangOpts.CPlusPlus)
-        S.Diag(DeclType.Loc, diag::warn_strict_prototypes)
-            << 0 << FixItHint::CreateInsertion(FTI.getRParenLoc(), "void");
-
       FunctionType::ExtInfo EI(getCCForDeclaratorChunk(S, D, FTI, chunkIndex));
 
       if (!FTI.NumParams && !FTI.isVariadic && !LangOpts.CPlusPlus) {
@@ -4613,6 +4596,36 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
                      const_cast(DeclType.getAttrs()));
   }
 
+  // GNU warning -Wstrict-prototypes
+  //   Warn if a function declaration is without a prototype.
+  //   This warning is issued for all kinds of unprototyped function
+  //   declarations (i.e. function type typedef, function pointer etc.)
+  //   C99 6.7.5.3p14:
+  //   The empty list in a function declarator that is not part of a definition
+  //   of that function specifies that no information about the number or types
+  //   of the parameters is supplied.
+  if (!LangOpts.CPlusPlus && D.getFunctionDefinitionKind() == FDK_Declaration) {
+    bool IsBlock = false;
+    for (const DeclaratorChunk &DeclType : D.type_objects()) {
+      switch (DeclType.Kind) {
+      case DeclaratorChunk::BlockPointer:
+        IsBlock = true;
+        break;
+      case DeclaratorChunk::Function: {
+        const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
+        if (FTI.NumParams == 0)
+          S.Diag(DeclType.Loc, diag::warn_strict_prototypes)
+              << IsBlock
+              << FixItHint::CreateInsertion(FTI.getRParenLoc(), "void");
+        IsBlock = false;
+        break;
+      }
+      default:
+        break;
+      }
+    }
+  }
+
   assert(!T.isNull() && "T must not be null after this point");
 
   if (LangOpts.CPlusPlus && T->isFunctionType()) {
@@ -5004,6 +5017,8 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) {
     return AttributeList::AT_TypeNullUnspecified;
   case AttributedType::attr_objc_kindof:
     return AttributeList::AT_ObjCKindOf;
+  case AttributedType::attr_ns_returns_retained:
+    return AttributeList::AT_NSReturnsRetained;
   }
   llvm_unreachable("unexpected attribute kind!");
 }
@@ -5531,14 +5546,15 @@ static void HandleAddressSpaceTypeAttribute(QualType &Type,
       addrSpace.setIsSigned(false);
     }
     llvm::APSInt max(addrSpace.getBitWidth());
-    max = Qualifiers::MaxAddressSpace - LangAS::Count;
+    max = Qualifiers::MaxAddressSpace - LangAS::FirstTargetAddressSpace;
     if (addrSpace > max) {
       S.Diag(Attr.getLoc(), diag::err_attribute_address_space_too_high)
         << (unsigned)max.getZExtValue() << ASArgExpr->getSourceRange();
       Attr.setInvalid();
       return;
     }
-    ASIdx = static_cast(addrSpace.getZExtValue()) + LangAS::Count;
+    ASIdx = static_cast(addrSpace.getZExtValue()) +
+            LangAS::FirstTargetAddressSpace;
   } else {
     // The keyword-based type attributes imply which address space to use.
     switch (Attr.getKind()) {
@@ -6359,17 +6375,26 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state,
   // ns_returns_retained is not always a type attribute, but if we got
   // here, we're treating it as one right now.
   if (attr.getKind() == AttributeList::AT_NSReturnsRetained) {
-    assert(S.getLangOpts().ObjCAutoRefCount &&
-           "ns_returns_retained treated as type attribute in non-ARC");
     if (attr.getNumArgs()) return true;
 
     // Delay if this is not a function type.
     if (!unwrapped.isFunctionType())
       return false;
 
-    FunctionType::ExtInfo EI
-      = unwrapped.get()->getExtInfo().withProducesResult(true);
-    type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
+    // Check whether the return type is reasonable.
+    if (S.checkNSReturnsRetainedReturnType(attr.getLoc(),
+                                           unwrapped.get()->getReturnType()))
+      return true;
+
+    // Only actually change the underlying type in ARC builds.
+    QualType origType = type;
+    if (state.getSema().getLangOpts().ObjCAutoRefCount) {
+      FunctionType::ExtInfo EI
+        = unwrapped.get()->getExtInfo().withProducesResult(true);
+      type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
+    }
+    type = S.Context.getAttributedType(AttributedType::attr_ns_returns_retained,
+                                       origType, type);
     return true;
   }
 
@@ -6931,11 +6956,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
       attr.setUsedAsTypeAttr();
       break;
 
-    case AttributeList::AT_NSReturnsRetained:
-      if (!state.getSema().getLangOpts().ObjCAutoRefCount)
-        break;
-      // fallthrough into the function attrs
-
     FUNCTION_TYPE_ATTRS_CASELIST:
       attr.setUsedAsTypeAttr();
 
@@ -7097,6 +7117,20 @@ bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
   return false;
 }
 
+bool Sema::hasStructuralCompatLayout(Decl *D, Decl *Suggested) {
+  llvm::DenseSet> NonEquivalentDecls;
+  if (!Suggested)
+    return false;
+
+  // FIXME: Add a specific mode for C11 6.2.7/1 in StructuralEquivalenceContext
+  // and isolate from other C++ specific checks.
+  StructuralEquivalenceContext Ctx(
+      D->getASTContext(), Suggested->getASTContext(), NonEquivalentDecls,
+      false /*StrictTypeSpelling*/, true /*Complain*/,
+      true /*ErrorOnTagTypeMismatch*/);
+  return Ctx.IsStructurallyEquivalent(D, Suggested);
+}
+
 /// \brief Determine whether there is any declaration of \p D that was ever a
 ///        definition (perhaps before module merging) and is currently visible.
 /// \param D The definition of the entity.
diff --git a/interpreter/llvm/src/tools/clang/lib/Sema/TreeTransform.h b/interpreter/llvm/src/tools/clang/lib/Sema/TreeTransform.h
index 895cf18eef8ea..1a11678b76c54 100644
--- a/interpreter/llvm/src/tools/clang/lib/Sema/TreeTransform.h
+++ b/interpreter/llvm/src/tools/clang/lib/Sema/TreeTransform.h
@@ -1653,6 +1653,21 @@ class TreeTransform {
         ReductionId, UnresolvedReductions);
   }
 
+  /// Build a new OpenMP 'task_reduction' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPTaskReductionClause(
+      ArrayRef VarList, SourceLocation StartLoc,
+      SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc,
+      CXXScopeSpec &ReductionIdScopeSpec,
+      const DeclarationNameInfo &ReductionId,
+      ArrayRef UnresolvedReductions) {
+    return getSema().ActOnOpenMPTaskReductionClause(
+        VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec,
+        ReductionId, UnresolvedReductions);
+  }
+
   /// \brief Build a new OpenMP 'linear' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -6956,7 +6971,22 @@ TreeTransform::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) {
     if (DeallocRes.isInvalid())
       return StmtError();
     Builder.Deallocate = DeallocRes.get();
+
+    assert(S->getResultDecl() && "ResultDecl must already be built");
+    StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl());
+    if (ResultDecl.isInvalid())
+      return StmtError();
+    Builder.ResultDecl = ResultDecl.get();
+
+    if (auto *ReturnStmt = S->getReturnStmt()) {
+      StmtResult Res = getDerived().TransformStmt(ReturnStmt);
+      if (Res.isInvalid())
+        return StmtError();
+      Builder.ReturnStmt = Res.get();
+    }
   }
+  if (!Builder.buildParameterMoves())
+    return StmtError();
 
   return getDerived().RebuildCoroutineBodyStmt(Builder);
 }
@@ -8395,6 +8425,51 @@ TreeTransform::TransformOMPReductionClause(OMPReductionClause *C) {
       C->getLocEnd(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
 }
 
+template 
+OMPClause *TreeTransform::TransformOMPTaskReductionClause(
+    OMPTaskReductionClause *C) {
+  llvm::SmallVector Vars;
+  Vars.reserve(C->varlist_size());
+  for (auto *VE : C->varlists()) {
+    ExprResult EVar = getDerived().TransformExpr(cast(VE));
+    if (EVar.isInvalid())
+      return nullptr;
+    Vars.push_back(EVar.get());
+  }
+  CXXScopeSpec ReductionIdScopeSpec;
+  ReductionIdScopeSpec.Adopt(C->getQualifierLoc());
+
+  DeclarationNameInfo NameInfo = C->getNameInfo();
+  if (NameInfo.getName()) {
+    NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
+    if (!NameInfo.getName())
+      return nullptr;
+  }
+  // Build a list of all UDR decls with the same names ranged by the Scopes.
+  // The Scope boundary is a duplication of the previous decl.
+  llvm::SmallVector UnresolvedReductions;
+  for (auto *E : C->reduction_ops()) {
+    // Transform all the decls.
+    if (E) {
+      auto *ULE = cast(E);
+      UnresolvedSet<8> Decls;
+      for (auto *D : ULE->decls()) {
+        NamedDecl *InstD =
+            cast(getDerived().TransformDecl(E->getExprLoc(), D));
+        Decls.addDecl(InstD, InstD->getAccess());
+      }
+      UnresolvedReductions.push_back(UnresolvedLookupExpr::Create(
+          SemaRef.Context, /*NamingClass=*/nullptr,
+          ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
+          /*ADL=*/true, ULE->isOverloaded(), Decls.begin(), Decls.end()));
+    } else
+      UnresolvedReductions.push_back(nullptr);
+  }
+  return getDerived().RebuildOMPTaskReductionClause(
+      Vars, C->getLocStart(), C->getLParenLoc(), C->getColonLoc(),
+      C->getLocEnd(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
+}
+
 template 
 OMPClause *
 TreeTransform::TransformOMPLinearClause(OMPLinearClause *C) {
diff --git a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReader.cpp b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReader.cpp
index 631f6af2be403..4bbec6d6830c1 100644
--- a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReader.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReader.cpp
@@ -292,6 +292,33 @@ static bool checkLanguageOptions(const LangOptions &LangOpts,
     return true;
   }
 
+  // Sanitizer feature mismatches are treated as compatible differences. If
+  // compatible differences aren't allowed, we still only want to check for
+  // mismatches of non-modular sanitizers (the only ones which can affect AST
+  // generation).
+  if (!AllowCompatibleDifferences) {
+    SanitizerMask ModularSanitizers = getPPTransparentSanitizers();
+    SanitizerSet ExistingSanitizers = ExistingLangOpts.Sanitize;
+    SanitizerSet ImportedSanitizers = LangOpts.Sanitize;
+    ExistingSanitizers.clear(ModularSanitizers);
+    ImportedSanitizers.clear(ModularSanitizers);
+    if (ExistingSanitizers.Mask != ImportedSanitizers.Mask) {
+      const std::string Flag = "-fsanitize=";
+      if (Diags) {
+#define SANITIZER(NAME, ID)                                                    \
+  {                                                                            \
+    bool InExistingModule = ExistingSanitizers.has(SanitizerKind::ID);         \
+    bool InImportedModule = ImportedSanitizers.has(SanitizerKind::ID);         \
+    if (InExistingModule != InImportedModule)                                  \
+      Diags->Report(diag::err_pch_targetopt_feature_mismatch)                  \
+          << InExistingModule << (Flag + NAME);                                \
+  }
+#include "clang/Basic/Sanitizers.def"
+      }
+      return true;
+    }
+  }
+
   return false;
 }
 
@@ -829,7 +856,7 @@ static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II,
          II.isPoisoned() ||
          (IsModule ? II.hasRevertedBuiltin() : II.getObjCOrBuiltinID()) ||
          II.hasRevertedTokenIDToIdentifier() ||
-         (!(IsModule && Reader.getContext().getLangOpts().CPlusPlus) &&
+         (!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) &&
           II.getFETokenInfo());
 }
 
@@ -1121,7 +1148,7 @@ bool ASTReader::ReadVisibleDeclContextStorage(ModuleFile &M,
 
 void ASTReader::Error(StringRef Msg) const {
   Error(diag::err_fe_pch_malformed, Msg);
-  if (Context.getLangOpts().Modules && !Diags.isDiagnosticInFlight() &&
+  if (PP.getLangOpts().Modules && !Diags.isDiagnosticInFlight() &&
       !PP.getHeaderSearchInfo().getModuleCachePath().empty()) {
     Diag(diag::note_module_cache_path)
       << PP.getHeaderSearchInfo().getModuleCachePath();
@@ -1364,15 +1391,14 @@ bool ASTReader::ReadSLocEntry(int ID) {
 
     const DeclID *FirstDecl = F->FileSortedDecls + Record[6];
     unsigned NumFileDecls = Record[7];
-    if (NumFileDecls) {
+    if (NumFileDecls && ContextObj) {
       assert(F->FileSortedDecls && "FILE_SORTED_DECLS not encountered yet ?");
       FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl,
                                                              NumFileDecls));
     }
 
     const SrcMgr::ContentCache *ContentCache
-      = SourceMgr.getOrCreateContentCache(File,
-                              /*isSystemFile=*/FileCharacter != SrcMgr::C_User);
+      = SourceMgr.getOrCreateContentCache(File, isSystem(FileCharacter));
     if (OverriddenBuffer && !ContentCache->BufferOverridden &&
         ContentCache->ContentsEntry == ContentCache->OrigEntry &&
         !ContentCache->getRawBuffer()) {
@@ -1500,7 +1526,7 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
 
   Stream.JumpToBit(Offset);
   RecordData Record;
-  SmallVector MacroArgs;
+  SmallVector MacroParams;
   MacroInfo *Macro = nullptr;
 
   while (true) {
@@ -1551,17 +1577,17 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
         bool isC99VarArgs = Record[NextIndex++];
         bool isGNUVarArgs = Record[NextIndex++];
         bool hasCommaPasting = Record[NextIndex++];
-        MacroArgs.clear();
+        MacroParams.clear();
         unsigned NumArgs = Record[NextIndex++];
         for (unsigned i = 0; i != NumArgs; ++i)
-          MacroArgs.push_back(getLocalIdentifier(F, Record[NextIndex++]));
+          MacroParams.push_back(getLocalIdentifier(F, Record[NextIndex++]));
 
         // Install function-like macro info.
         MI->setIsFunctionLike();
         if (isC99VarArgs) MI->setIsC99Varargs();
         if (isGNUVarArgs) MI->setIsGNUVarargs();
         if (hasCommaPasting) MI->setHasCommaPasting();
-        MI->setArgumentList(MacroArgs, PP.getPreprocessorAllocator());
+        MI->setParameterList(MacroParams, PP.getPreprocessorAllocator());
       }
 
       // Remember that we saw this macro last so that we add the tokens that
@@ -1633,6 +1659,9 @@ bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) {
   if (llvm::sys::path::is_absolute(a.Filename) && a.Filename == b.Filename)
     return true;
 
+  if (StringRef(b.Filename).endswith(a.Filename))
+    return true;
+
   // Determine whether the actual files are equivalent.
   FileManager &FileMgr = Reader.getFileManager();
   auto GetFile = [&](const internal_key_type &Key) -> const FileEntry* {
@@ -1676,9 +1705,9 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   HeaderFileInfo HFI;
   unsigned Flags = *d++;
   // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
-  HFI.isImport |= (Flags >> 4) & 0x01;
-  HFI.isPragmaOnce |= (Flags >> 3) & 0x01;
-  HFI.DirInfo = (Flags >> 1) & 0x03;
+  HFI.isImport |= (Flags >> 5) & 0x01;
+  HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
+  HFI.DirInfo = (Flags >> 1) & 0x07;
   HFI.IndexHeaderMapHeader = Flags & 0x01;
   // FIXME: Find a better way to handle this. Maybe just store a
   // "has been included" flag?
@@ -2008,6 +2037,7 @@ ASTReader::readInputFileInfo(ModuleFile &F, unsigned ID) {
   R.StoredTime = static_cast(Record[2]);
   R.Overridden = static_cast(Record[3]);
   R.Transient = static_cast(Record[4]);
+  R.TopLevelModuleMap = static_cast(Record[5]);
   R.Filename = Blob;
   ResolveImportedPath(F, R.Filename);
   return R;
@@ -2597,10 +2627,11 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       // contains any declarations lexically within it (which it always does!).
       // This usually has no cost, since we very rarely need the lookup map for
       // the translation unit outside C++.
-      DeclContext *DC = Context.getTranslationUnitDecl();
-      if (DC->hasExternalLexicalStorage() &&
-          !getContext().getLangOpts().CPlusPlus)
-        DC->setMustBuildLookupTable();
+      if (ASTContext *Ctx = ContextObj) {
+        DeclContext *DC = Ctx->getTranslationUnitDecl();
+        if (DC->hasExternalLexicalStorage() && !Ctx->getLangOpts().CPlusPlus)
+          DC->setMustBuildLookupTable();
+      }
 
       return Success;
     }
@@ -2689,7 +2720,33 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
     // Read and process a record.
     Record.clear();
     StringRef Blob;
-    switch ((ASTRecordTypes)Stream.readRecord(Entry.ID, Record, &Blob)) {
+    auto RecordType =
+        (ASTRecordTypes)Stream.readRecord(Entry.ID, Record, &Blob);
+
+    // If we're not loading an AST context, we don't care about most records.
+    if (!ContextObj) {
+      switch (RecordType) {
+      case IDENTIFIER_TABLE:
+      case IDENTIFIER_OFFSET:
+      case INTERESTING_IDENTIFIERS:
+      case STATISTICS:
+      case PP_CONDITIONAL_STACK:
+      case PP_COUNTER_VALUE:
+      case SOURCE_LOCATION_OFFSETS:
+      case MODULE_OFFSET_MAP:
+      case SOURCE_MANAGER_LINE_TABLE:
+      case SOURCE_LOCATION_PRELOADS:
+      case PPD_ENTITIES_OFFSETS:
+      case HEADER_SEARCH_TABLE:
+      case IMPORTED_MODULES:
+      case MACRO_OFFSET:
+        break;
+      default:
+        continue;
+      }
+    }
+
+    switch (RecordType) {
     default:  // Default behavior: ignore.
       break;
 
@@ -2748,7 +2805,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
     }
 
     case TU_UPDATE_LEXICAL: {
-      DeclContext *TU = Context.getTranslationUnitDecl();
+      DeclContext *TU = ContextObj->getTranslationUnitDecl();
       LexicalContents Contents(
           reinterpret_cast(
               Blob.data()),
@@ -2766,7 +2823,8 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       // If we've already loaded the decl, perform the updates when we finish
       // loading this block.
       if (Decl *D = GetExistingDecl(ID))
-        PendingUpdateRecords.push_back(std::make_pair(ID, D));
+        PendingUpdateRecords.push_back(
+            PendingUpdateRecord(ID, D, /*JustLoaded=*/false));
       break;
     }
 
@@ -2934,6 +2992,21 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       }
       break;
 
+    case PP_CONDITIONAL_STACK:
+      if (!Record.empty()) {
+        SmallVector ConditionalStack;
+        for (unsigned Idx = 0, N = Record.size() - 1; Idx < N; /* in loop */) {
+          auto Loc = ReadSourceLocation(F, Record, Idx);
+          bool WasSkipping = Record[Idx++];
+          bool FoundNonSkip = Record[Idx++];
+          bool FoundElse = Record[Idx++];
+          ConditionalStack.push_back(
+              {Loc, WasSkipping, FoundNonSkip, FoundElse});
+        }
+        PP.setReplayablePreambleConditionalStack(ConditionalStack);
+      }
+      break;
+
     case PP_COUNTER_VALUE:
       if (!Record.empty() && Listener)
         Listener->ReadCounter(F, Record[0]);
@@ -3096,7 +3169,8 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
         // If we've already loaded the decl, perform the updates when we finish
         // loading this block.
         if (Decl *D = GetExistingDecl(ID))
-          PendingUpdateRecords.push_back(std::make_pair(ID, D));
+          PendingUpdateRecords.push_back(
+              PendingUpdateRecord(ID, D, /*JustLoaded=*/false));
       }
       break;
     }
@@ -3421,12 +3495,6 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
   unsigned Idx = 0;
   F.ModuleMapPath = ReadPath(F, Record, Idx);
 
-  if (F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule) {
-    // For an explicitly-loaded module, we don't care whether the original
-    // module map file exists or matches.
-    return Success;
-  }
-
   // Try to resolve ModuleName in the current header search context and
   // verify that it is found in the same module map file as we saved. If the
   // top-level AST file is a main file, skip this check because there is no
@@ -3546,8 +3614,8 @@ static void moveMethodToBackOfGlobalList(Sema &S, ObjCMethodDecl *Method) {
 void ASTReader::makeNamesVisible(const HiddenNames &Names, Module *Owner) {
   assert(Owner->NameVisibility != Module::Hidden && "nothing to make visible?");
   for (Decl *D : Names) {
-    bool wasHidden = D->Hidden;
-    D->Hidden = false;
+    bool wasHidden = D->isHidden();
+    D->setVisibleDespiteOwningModule();
 
     if (wasHidden && SemaObj) {
       if (ObjCMethodDecl *Method = dyn_cast(D)) {
@@ -3614,7 +3682,7 @@ void ASTReader::mergeDefinitionVisibility(NamedDecl *Def,
   if (Def->isHidden()) {
     // If MergedDef is visible or becomes visible, make the definition visible.
     if (!MergedDef->isHidden())
-      Def->Hidden = false;
+      Def->setVisibleDespiteOwningModule();
     else if (getContext().getLangOpts().ModulesLocalVisibility) {
       getContext().mergeDefinitionIntoModule(
           Def, MergedDef->getImportedOwningModule(),
@@ -3633,7 +3701,7 @@ bool ASTReader::loadGlobalIndex() {
     return false;
 
   if (TriedLoadingGlobalIndex || !UseGlobalIndex ||
-      !Context.getLangOpts().Modules)
+      !PP.getLangOpts().Modules)
     return true;
 
   // Try to load the global index.
@@ -3651,7 +3719,7 @@ bool ASTReader::loadGlobalIndex() {
 }
 
 bool ASTReader::isGlobalIndexUnavailable() const {
-  return Context.getLangOpts().Modules && UseGlobalIndex &&
+  return PP.getLangOpts().Modules && UseGlobalIndex &&
          !hasGlobalIndex() && TriedLoadingGlobalIndex;
 }
 
@@ -3663,6 +3731,8 @@ static void updateModuleTimestamp(ModuleFile &MF) {
   if (EC)
     return;
   OS << "Timestamp file\n";
+  OS.close();
+  OS.clear_error(); // Avoid triggering a fatal error.
 }
 
 /// \brief Given a cursor at the start of an AST file, scan ahead and drop the
@@ -3707,7 +3777,9 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
   Deserializing AnASTFile(this);
 
   // Bump the generation number.
-  unsigned PreviousGeneration = incrementGeneration(Context);
+  unsigned PreviousGeneration = 0;
+  if (ContextObj)
+    PreviousGeneration = incrementGeneration(*ContextObj);
 
   unsigned NumModules = ModuleMgr.size();
   SmallVector Loaded;
@@ -3726,7 +3798,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
       LoadedSet.insert(IM.Mod);
 
     ModuleMgr.removeModules(ModuleMgr.begin() + NumModules, LoadedSet,
-                            Context.getLangOpts().Modules
+                            PP.getLangOpts().Modules
                                 ? &PP.getHeaderSearchInfo().getModuleMap()
                                 : nullptr);
 
@@ -3822,7 +3894,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
       F.ImportLoc = TranslateSourceLocation(*M->ImportedBy, M->ImportLoc);
   }
 
-  if (!Context.getLangOpts().CPlusPlus ||
+  if (!PP.getLangOpts().CPlusPlus ||
       (Type != MK_ImplicitModule && Type != MK_ExplicitModule &&
        Type != MK_PrebuiltModule)) {
     // Mark all of the identifiers in the identifier table as being out of date,
@@ -3879,7 +3951,8 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
   // Might be unnecessary as use declarations are only used to build the
   // module itself.
 
-  InitializeContext();
+  if (ContextObj)
+    InitializeContext();
 
   if (SemaObj)
     UpdateSema();
@@ -3901,10 +3974,12 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
 
   // For any Objective-C class definitions we have already loaded, make sure
   // that we load any additional categories.
-  for (unsigned I = 0, N = ObjCClassesLoaded.size(); I != N; ++I) {
-    loadObjCCategories(ObjCClassesLoaded[I]->getGlobalID(),
-                       ObjCClassesLoaded[I],
-                       PreviousGeneration);
+  if (ContextObj) {
+    for (unsigned I = 0, N = ObjCClassesLoaded.size(); I != N; ++I) {
+      loadObjCCategories(ObjCClassesLoaded[I]->getGlobalID(),
+                         ObjCClassesLoaded[I],
+                         PreviousGeneration);
+    }
   }
 
   if (PP.getHeaderSearchInfo()
@@ -4284,6 +4359,9 @@ ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) {
 }
 
 void ASTReader::InitializeContext() {
+  assert(ContextObj && "no context to initialize");
+  ASTContext &Context = *ContextObj;
+
   // If there's a listener, notify them that we "read" the translation unit.
   if (DeserializationListener)
     DeserializationListener->DeclRead(PREDEF_DECL_TRANSLATION_UNIT_ID,
@@ -4882,6 +4960,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
         }
 
         CurrentModule->setASTFile(F.File);
+        CurrentModule->PresumedModuleMapFile = F.ModuleMapPath;
       }
 
       CurrentModule->Kind = ModuleKind;
@@ -5017,8 +5096,8 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       break;
     }
     case SUBMODULE_REQUIRES: {
-      CurrentModule->addRequirement(Blob, Record[0], Context.getLangOpts(),
-                                    Context.getTargetInfo());
+      CurrentModule->addRequirement(Blob, Record[0], PP.getLangOpts(),
+                                    PP.getTargetInfo());
       break;
     }
 
@@ -5044,10 +5123,12 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
     }
 
     case SUBMODULE_INITIALIZERS:
+      if (!ContextObj)
+        break;
       SmallVector Inits;
       for (auto &ID : Record)
         Inits.push_back(getGlobalDeclID(F, ID));
-      Context.addLazyModuleInitializers(CurrentModule, Inits);
+      ContextObj->addLazyModuleInitializers(CurrentModule, Inits);
       break;
     }
   }
@@ -5175,6 +5256,8 @@ bool ASTReader::ParseHeaderSearchOptions(const RecordData &Record,
   HSOpts.ModuleCachePath = ReadString(Record, Idx);
   HSOpts.ModuleUserBuildPath = ReadString(Record, Idx);
   HSOpts.DisableModuleHash = Record[Idx++];
+  HSOpts.ImplicitModuleMaps = Record[Idx++];
+  HSOpts.ModuleMapFileHomeIsCwd = Record[Idx++];
   HSOpts.UseBuiltinIncludes = Record[Idx++];
   HSOpts.UseStandardSystemIncludes = Record[Idx++];
   HSOpts.UseStandardCXXIncludes = Record[Idx++];
@@ -5666,6 +5749,8 @@ ASTReader::RecordLocation ASTReader::TypeCursorForIndex(unsigned Index) {
 /// location. It is a helper routine for GetType, which deals with reading type
 /// IDs.
 QualType ASTReader::readTypeRecord(unsigned Index) {
+  assert(ContextObj && "reading type with no AST context");
+  ASTContext &Context = *ContextObj;
   RecordLocation Loc = TypeCursorForIndex(Index);
   BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor;
 
@@ -6522,6 +6607,9 @@ ASTReader::GetTypeSourceInfo(ModuleFile &F, const ASTReader::RecordData &Record,
 }
 
 QualType ASTReader::GetType(TypeID ID) {
+  assert(ContextObj && "reading type with no AST context");
+  ASTContext &Context = *ContextObj;
+
   unsigned FastQuals = ID & Qualifiers::FastMask;
   unsigned Index = ID >> Qualifiers::FastWidth;
 
@@ -6853,6 +6941,9 @@ ASTReader::GetExternalCXXCtorInitializers(uint64_t Offset) {
 }
 
 CXXBaseSpecifier *ASTReader::GetExternalCXXBaseSpecifiers(uint64_t Offset) {
+  assert(ContextObj && "reading base specifiers with no AST context");
+  ASTContext &Context = *ContextObj;
+
   RecordLocation Loc = getLocalBitOffset(Offset);
   BitstreamCursor &Cursor = Loc.F->DeclsCursor;
   SavedStreamPosition SavedPosition(Cursor);
@@ -6984,8 +7075,9 @@ static Decl *getPredefinedDecl(ASTContext &Context, PredefinedDeclIDs ID) {
 }
 
 Decl *ASTReader::GetExistingDecl(DeclID ID) {
+  assert(ContextObj && "reading decl with no AST context");
   if (ID < NUM_PREDEF_DECL_IDS) {
-    Decl *D = getPredefinedDecl(Context, (PredefinedDeclIDs)ID);
+    Decl *D = getPredefinedDecl(*ContextObj, (PredefinedDeclIDs)ID);
     if (D) {
       // Track that we have merged the declaration with ID \p ID into the
       // pre-existing predefined declaration \p D.
@@ -7530,7 +7622,7 @@ IdentifierInfo *ASTReader::get(StringRef Name) {
   // all interesting declarations, and don't need to use the scope for name
   // lookups). Perform the lookup in PCH files, though, since we don't build
   // a complete initial identifier table if we're carrying on from a PCH.
-  if (Context.getLangOpts().CPlusPlus) {
+  if (PP.getLangOpts().CPlusPlus) {
     for (auto F : ModuleMgr.pch_modules())
       if (Visitor(*F))
         break;
@@ -8184,7 +8276,7 @@ ASTReader::getSourceDescriptor(unsigned ID) {
     return ExternalASTSource::ASTSourceDescriptor(*M);
 
   // If there is only a single PCH, return it instead.
-  // Chained PCH are not suported.
+  // Chained PCH are not supported.
   const auto &PCHChain = ModuleMgr.pch_modules();
   if (std::distance(std::begin(PCHChain), std::end(PCHChain))) {
     ModuleFile &MF = ModuleMgr.getPrimaryModule();
@@ -8260,6 +8352,7 @@ ASTReader::getGlobalSelectorID(ModuleFile &M, unsigned LocalID) const {
 DeclarationName
 ASTReader::ReadDeclarationName(ModuleFile &F,
                                const RecordData &Record, unsigned &Idx) {
+  ASTContext &Context = getContext();
   DeclarationName::NameKind Kind = (DeclarationName::NameKind)Record[Idx++];
   switch (Kind) {
   case DeclarationName::Identifier:
@@ -8350,7 +8443,8 @@ void ASTReader::ReadQualifierInfo(ModuleFile &F, QualifierInfo &Info,
   unsigned NumTPLists = Record[Idx++];
   Info.NumTemplParamLists = NumTPLists;
   if (NumTPLists) {
-    Info.TemplParamLists = new (Context) TemplateParameterList*[NumTPLists];
+    Info.TemplParamLists =
+        new (getContext()) TemplateParameterList *[NumTPLists];
     for (unsigned i = 0; i != NumTPLists; ++i)
       Info.TemplParamLists[i] = ReadTemplateParameterList(F, Record, Idx);
   }
@@ -8359,6 +8453,7 @@ void ASTReader::ReadQualifierInfo(ModuleFile &F, QualifierInfo &Info,
 TemplateName
 ASTReader::ReadTemplateName(ModuleFile &F, const RecordData &Record,
                             unsigned &Idx) {
+  ASTContext &Context = getContext();
   TemplateName::NameKind Kind = (TemplateName::NameKind)Record[Idx++];
   switch (Kind) {
   case TemplateName::Template:
@@ -8419,6 +8514,7 @@ TemplateArgument ASTReader::ReadTemplateArgument(ModuleFile &F,
                                                  const RecordData &Record,
                                                  unsigned &Idx,
                                                  bool Canonicalize) {
+  ASTContext &Context = getContext();
   if (Canonicalize) {
     // The caller wants a canonical template argument. Sometimes the AST only
     // wants template arguments in canonical form (particularly as the template
@@ -8482,9 +8578,8 @@ ASTReader::ReadTemplateParameterList(ModuleFile &F,
     Params.push_back(ReadDeclAs(F, Record, Idx));
 
   // TODO: Concepts
-  TemplateParameterList* TemplateParams =
-    TemplateParameterList::Create(Context, TemplateLoc, LAngleLoc,
-                                  Params, RAngleLoc, nullptr);
+  TemplateParameterList *TemplateParams = TemplateParameterList::Create(
+      getContext(), TemplateLoc, LAngleLoc, Params, RAngleLoc, nullptr);
   return TemplateParams;
 }
 
@@ -8503,11 +8598,11 @@ ReadTemplateArgumentList(SmallVectorImpl &TemplArgs,
 void ASTReader::ReadUnresolvedSet(ModuleFile &F, LazyASTUnresolvedSet &Set,
                                   const RecordData &Record, unsigned &Idx) {
   unsigned NumDecls = Record[Idx++];
-  Set.reserve(Context, NumDecls);
+  Set.reserve(getContext(), NumDecls);
   while (NumDecls--) {
     DeclID ID = ReadDeclID(F, Record, Idx);
     AccessSpecifier AS = (AccessSpecifier)Record[Idx++];
-    Set.addLazyDecl(Context, ID, AS);
+    Set.addLazyDecl(getContext(), ID, AS);
   }
 }
 
@@ -8530,6 +8625,7 @@ ASTReader::ReadCXXBaseSpecifier(ModuleFile &F,
 CXXCtorInitializer **
 ASTReader::ReadCXXCtorInitializers(ModuleFile &F, const RecordData &Record,
                                    unsigned &Idx) {
+  ASTContext &Context = getContext();
   unsigned NumInitializers = Record[Idx++];
   assert(NumInitializers && "wrote ctor initializers but have no inits");
   auto **CtorInitializers = new (Context) CXXCtorInitializer*[NumInitializers];
@@ -8595,6 +8691,7 @@ ASTReader::ReadCXXCtorInitializers(ModuleFile &F, const RecordData &Record,
 NestedNameSpecifier *
 ASTReader::ReadNestedNameSpecifier(ModuleFile &F,
                                    const RecordData &Record, unsigned &Idx) {
+  ASTContext &Context = getContext();
   unsigned N = Record[Idx++];
   NestedNameSpecifier *NNS = nullptr, *Prev = nullptr;
   for (unsigned I = 0; I != N; ++I) {
@@ -8650,6 +8747,7 @@ ASTReader::ReadNestedNameSpecifier(ModuleFile &F,
 NestedNameSpecifierLoc
 ASTReader::ReadNestedNameSpecifierLoc(ModuleFile &F, const RecordData &Record,
                                       unsigned &Idx) {
+  ASTContext &Context = getContext();
   unsigned N = Record[Idx++];
   NestedNameSpecifierLocBuilder Builder;
   for (unsigned I = 0; I != N; ++I) {
@@ -8771,7 +8869,7 @@ CXXTemporary *ASTReader::ReadCXXTemporary(ModuleFile &F,
                                           const RecordData &Record,
                                           unsigned &Idx) {
   CXXDestructorDecl *Decl = ReadDeclAs(F, Record, Idx);
-  return CXXTemporary::Create(Context, Decl);
+  return CXXTemporary::Create(getContext(), Decl);
 }
 
 DiagnosticBuilder ASTReader::Diag(unsigned DiagID) const {
@@ -8807,6 +8905,7 @@ void ASTReader::ClearSwitchCaseIDs() {
 }
 
 void ASTReader::ReadComments() {
+  ASTContext &Context = getContext();
   std::vector Comments;
   for (SmallVectorImpl >::iterator
@@ -8876,6 +8975,19 @@ void ASTReader::visitInputFiles(serialization::ModuleFile &MF,
   }
 }
 
+void ASTReader::visitTopLevelModuleMaps(
+    serialization::ModuleFile &MF,
+    llvm::function_ref Visitor) {
+  unsigned NumInputs = MF.InputFilesLoaded.size();
+  for (unsigned I = 0; I < NumInputs; ++I) {
+    InputFileInfo IFI = readInputFileInfo(MF, I + 1);
+    if (IFI.TopLevelModuleMap)
+      // FIXME: This unnecessarily re-reads the InputFileInfo.
+      if (auto *FE = getInputFile(MF, I + 1).getFile())
+        Visitor(FE);
+  }
+}
+
 std::string ASTReader::getOwningModuleNameForDiagnostic(const Decl *D) {
   // If we know the owning module, use it.
   if (Module *M = D->getImportedOwningModule())
@@ -8966,7 +9078,7 @@ void ASTReader::finishPendingActions() {
     while (!PendingUpdateRecords.empty()) {
       auto Update = PendingUpdateRecords.pop_back_val();
       ReadingKindTracker ReadingKind(Read_Decl, *this);
-      loadDeclUpdateRecords(Update.first, Update.second);
+      loadDeclUpdateRecords(Update);
     }
   }
 
@@ -9203,6 +9315,7 @@ void ASTReader::diagnoseOdrViolations() {
 
       // Used with err_module_odr_violation_mismatch_decl and
       // note_module_odr_violation_mismatch_decl
+      // This list should be the same Decl's as in ODRHash::isWhiteListedDecl
       enum {
         EndOfClass,
         PublicSpecifer,
@@ -9211,6 +9324,10 @@ void ASTReader::diagnoseOdrViolations() {
         StaticAssert,
         Field,
         CXXMethod,
+        TypeAlias,
+        TypeDef,
+        Var,
+        Friend,
         Other
       } FirstDiffType = Other,
         SecondDiffType = Other;
@@ -9237,7 +9354,17 @@ void ASTReader::diagnoseOdrViolations() {
         case Decl::Field:
           return Field;
         case Decl::CXXMethod:
+        case Decl::CXXConstructor:
+        case Decl::CXXDestructor:
           return CXXMethod;
+        case Decl::TypeAlias:
+          return TypeAlias;
+        case Decl::Typedef:
+          return TypeDef;
+        case Decl::Var:
+          return Var;
+        case Decl::Friend:
+          return Friend;
         }
       };
 
@@ -9275,9 +9402,20 @@ void ASTReader::diagnoseOdrViolations() {
              diag::err_module_odr_violation_different_definitions)
             << FirstRecord << FirstModule.empty() << FirstModule;
 
+        if (FirstDecl) {
+          Diag(FirstDecl->getLocation(), diag::note_first_module_difference)
+              << FirstRecord << FirstDecl->getSourceRange();
+        }
+
         Diag(SecondRecord->getLocation(),
              diag::note_module_odr_violation_different_definitions)
             << SecondModule;
+
+        if (SecondDecl) {
+          Diag(SecondDecl->getLocation(), diag::note_second_module_difference)
+              << SecondDecl->getSourceRange();
+        }
+
         Diagnosed = true;
         break;
       }
@@ -9334,6 +9472,18 @@ void ASTReader::diagnoseOdrViolations() {
         MethodNumberParameters,
         MethodParameterType,
         MethodParameterName,
+        MethodParameterSingleDefaultArgument,
+        MethodParameterDifferentDefaultArgument,
+        TypedefName,
+        TypedefType,
+        VarName,
+        VarType,
+        VarSingleInitializer,
+        VarDifferentInitializer,
+        VarConstexpr,
+        FriendTypeFunction,
+        FriendType,
+        FriendFunction,
       };
 
       // These lambdas have the common portions of the ODR diagnostics.  This
@@ -9358,12 +9508,6 @@ void ASTReader::diagnoseOdrViolations() {
         return Hash.CalculateHash();
       };
 
-      auto ComputeDeclNameODRHash = [&Hash](const DeclarationName Name) {
-        Hash.clear();
-        Hash.AddDeclarationName(Name);
-        return Hash.CalculateHash();
-      };
-
       auto ComputeQualTypeODRHash = [&Hash](QualType Ty) {
         Hash.clear();
         Hash.AddQualType(Ty);
@@ -9451,16 +9595,13 @@ void ASTReader::diagnoseOdrViolations() {
           break;
         }
 
-        assert(
-            Context.hasSameType(FirstField->getType(), SecondField->getType()));
+        assert(getContext().hasSameType(FirstField->getType(),
+                                        SecondField->getType()));
 
         QualType FirstType = FirstField->getType();
         QualType SecondType = SecondField->getType();
-        const TypedefType *FirstTypedef = dyn_cast(FirstType);
-        const TypedefType *SecondTypedef = dyn_cast(SecondType);
-
-        if ((FirstTypedef && !SecondTypedef) ||
-            (!FirstTypedef && SecondTypedef)) {
+        if (ComputeQualTypeODRHash(FirstType) !=
+            ComputeQualTypeODRHash(SecondType)) {
           ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(),
                        FieldTypeName)
               << FirstII << FirstType;
@@ -9472,24 +9613,6 @@ void ASTReader::diagnoseOdrViolations() {
           break;
         }
 
-        if (FirstTypedef && SecondTypedef) {
-          unsigned FirstHash = ComputeDeclNameODRHash(
-              FirstTypedef->getDecl()->getDeclName());
-          unsigned SecondHash = ComputeDeclNameODRHash(
-              SecondTypedef->getDecl()->getDeclName());
-          if (FirstHash != SecondHash) {
-            ODRDiagError(FirstField->getLocation(),
-                         FirstField->getSourceRange(), FieldTypeName)
-                << FirstII << FirstType;
-            ODRDiagNote(SecondField->getLocation(),
-                        SecondField->getSourceRange(), FieldTypeName)
-                << SecondII << SecondType;
-
-            Diagnosed = true;
-            break;
-          }
-        }
-
         const bool IsFirstBitField = FirstField->isBitField();
         const bool IsSecondBitField = SecondField->isBitField();
         if (IsFirstBitField != IsSecondBitField) {
@@ -9561,17 +9684,30 @@ void ASTReader::diagnoseOdrViolations() {
         break;
       }
       case CXXMethod: {
+        enum {
+          DiagMethod,
+          DiagConstructor,
+          DiagDestructor,
+        } FirstMethodType,
+            SecondMethodType;
+        auto GetMethodTypeForDiagnostics = [](const CXXMethodDecl* D) {
+          if (isa(D)) return DiagConstructor;
+          if (isa(D)) return DiagDestructor;
+          return DiagMethod;
+        };
         const CXXMethodDecl *FirstMethod = cast(FirstDecl);
         const CXXMethodDecl *SecondMethod = cast(SecondDecl);
+        FirstMethodType = GetMethodTypeForDiagnostics(FirstMethod);
+        SecondMethodType = GetMethodTypeForDiagnostics(SecondMethod);
         auto FirstName = FirstMethod->getDeclName();
         auto SecondName = SecondMethod->getDeclName();
-        if (FirstName != SecondName) {
+        if (FirstMethodType != SecondMethodType || FirstName != SecondName) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodName)
-              << FirstName;
+              << FirstMethodType << FirstName;
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodName)
-              << SecondName;
+              << SecondMethodType << SecondName;
 
           Diagnosed = true;
           break;
@@ -9582,11 +9718,11 @@ void ASTReader::diagnoseOdrViolations() {
         if (FirstDeleted != SecondDeleted) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodDeleted)
-              << FirstName << FirstDeleted;
+              << FirstMethodType << FirstName << FirstDeleted;
 
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodDeleted)
-              << SecondName << SecondDeleted;
+              << SecondMethodType << SecondName << SecondDeleted;
           Diagnosed = true;
           break;
         }
@@ -9599,10 +9735,10 @@ void ASTReader::diagnoseOdrViolations() {
             (FirstVirtual != SecondVirtual || FirstPure != SecondPure)) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodVirtual)
-              << FirstName << FirstPure << FirstVirtual;
+              << FirstMethodType << FirstName << FirstPure << FirstVirtual;
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodVirtual)
-              << SecondName << SecondPure << SecondVirtual;
+              << SecondMethodType << SecondName << SecondPure << SecondVirtual;
           Diagnosed = true;
           break;
         }
@@ -9617,10 +9753,10 @@ void ASTReader::diagnoseOdrViolations() {
         if (FirstStatic != SecondStatic) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodStatic)
-              << FirstName << FirstStatic;
+              << FirstMethodType << FirstName << FirstStatic;
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodStatic)
-              << SecondName << SecondStatic;
+              << SecondMethodType << SecondName << SecondStatic;
           Diagnosed = true;
           break;
         }
@@ -9630,10 +9766,10 @@ void ASTReader::diagnoseOdrViolations() {
         if (FirstVolatile != SecondVolatile) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodVolatile)
-              << FirstName << FirstVolatile;
+              << FirstMethodType << FirstName << FirstVolatile;
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodVolatile)
-              << SecondName << SecondVolatile;
+              << SecondMethodType << SecondName << SecondVolatile;
           Diagnosed = true;
           break;
         }
@@ -9643,10 +9779,10 @@ void ASTReader::diagnoseOdrViolations() {
         if (FirstConst != SecondConst) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodConst)
-              << FirstName << FirstConst;
+              << FirstMethodType << FirstName << FirstConst;
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodConst)
-              << SecondName << SecondConst;
+              << SecondMethodType << SecondName << SecondConst;
           Diagnosed = true;
           break;
         }
@@ -9656,10 +9792,10 @@ void ASTReader::diagnoseOdrViolations() {
         if (FirstInline != SecondInline) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodInline)
-              << FirstName << FirstInline;
+              << FirstMethodType << FirstName << FirstInline;
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodInline)
-              << SecondName << SecondInline;
+              << SecondMethodType << SecondName << SecondInline;
           Diagnosed = true;
           break;
         }
@@ -9669,10 +9805,10 @@ void ASTReader::diagnoseOdrViolations() {
         if (FirstNumParameters != SecondNumParameters) {
           ODRDiagError(FirstMethod->getLocation(),
                        FirstMethod->getSourceRange(), MethodNumberParameters)
-              << FirstName << FirstNumParameters;
+              << FirstMethodType << FirstName << FirstNumParameters;
           ODRDiagNote(SecondMethod->getLocation(),
                       SecondMethod->getSourceRange(), MethodNumberParameters)
-              << SecondName << SecondNumParameters;
+              << SecondMethodType << SecondName << SecondNumParameters;
           Diagnosed = true;
           break;
         }
@@ -9692,24 +9828,27 @@ void ASTReader::diagnoseOdrViolations() {
                     FirstParamType->getAs()) {
               ODRDiagError(FirstMethod->getLocation(),
                            FirstMethod->getSourceRange(), MethodParameterType)
-                  << FirstName << (I + 1) << FirstParamType << true
-                  << ParamDecayedType->getOriginalType();
+                  << FirstMethodType << FirstName << (I + 1) << FirstParamType
+                  << true << ParamDecayedType->getOriginalType();
             } else {
               ODRDiagError(FirstMethod->getLocation(),
                            FirstMethod->getSourceRange(), MethodParameterType)
-                  << FirstName << (I + 1) << FirstParamType << false;
+                  << FirstMethodType << FirstName << (I + 1) << FirstParamType
+                  << false;
             }
 
             if (const DecayedType *ParamDecayedType =
                     SecondParamType->getAs()) {
               ODRDiagNote(SecondMethod->getLocation(),
                           SecondMethod->getSourceRange(), MethodParameterType)
-                  << SecondName << (I + 1) << SecondParamType << true
+                  << SecondMethodType << SecondName << (I + 1)
+                  << SecondParamType << true
                   << ParamDecayedType->getOriginalType();
             } else {
               ODRDiagNote(SecondMethod->getLocation(),
                           SecondMethod->getSourceRange(), MethodParameterType)
-                  << SecondName << (I + 1) << SecondParamType << false;
+                  << SecondMethodType << SecondName << (I + 1)
+                  << SecondParamType << false;
             }
             ParameterMismatch = true;
             break;
@@ -9720,12 +9859,48 @@ void ASTReader::diagnoseOdrViolations() {
           if (FirstParamName != SecondParamName) {
             ODRDiagError(FirstMethod->getLocation(),
                          FirstMethod->getSourceRange(), MethodParameterName)
-                << FirstName << (I + 1) << FirstParamName;
+                << FirstMethodType << FirstName << (I + 1) << FirstParamName;
             ODRDiagNote(SecondMethod->getLocation(),
                         SecondMethod->getSourceRange(), MethodParameterName)
-                << SecondName << (I + 1) << SecondParamName;
+                << SecondMethodType << SecondName << (I + 1) << SecondParamName;
+            ParameterMismatch = true;
+            break;
+          }
+
+          const Expr *FirstInit = FirstParam->getInit();
+          const Expr *SecondInit = SecondParam->getInit();
+          if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
+            ODRDiagError(FirstMethod->getLocation(),
+                         FirstMethod->getSourceRange(),
+                         MethodParameterSingleDefaultArgument)
+                << FirstMethodType << FirstName << (I + 1)
+                << (FirstInit == nullptr)
+                << (FirstInit ? FirstInit->getSourceRange() : SourceRange());
+            ODRDiagNote(SecondMethod->getLocation(),
+                        SecondMethod->getSourceRange(),
+                        MethodParameterSingleDefaultArgument)
+                << SecondMethodType << SecondName << (I + 1)
+                << (SecondInit == nullptr)
+                << (SecondInit ? SecondInit->getSourceRange() : SourceRange());
+            ParameterMismatch = true;
+            break;
+          }
+
+          if (FirstInit && SecondInit &&
+              ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
+            ODRDiagError(FirstMethod->getLocation(),
+                         FirstMethod->getSourceRange(),
+                         MethodParameterDifferentDefaultArgument)
+                << FirstMethodType << FirstName << (I + 1)
+                << FirstInit->getSourceRange();
+            ODRDiagNote(SecondMethod->getLocation(),
+                        SecondMethod->getSourceRange(),
+                        MethodParameterDifferentDefaultArgument)
+                << SecondMethodType << SecondName << (I + 1)
+                << SecondInit->getSourceRange();
             ParameterMismatch = true;
             break;
+
           }
         }
 
@@ -9736,18 +9911,168 @@ void ASTReader::diagnoseOdrViolations() {
 
         break;
       }
+      case TypeAlias:
+      case TypeDef: {
+        TypedefNameDecl *FirstTD = cast(FirstDecl);
+        TypedefNameDecl *SecondTD = cast(SecondDecl);
+        auto FirstName = FirstTD->getDeclName();
+        auto SecondName = SecondTD->getDeclName();
+        if (FirstName != SecondName) {
+          ODRDiagError(FirstTD->getLocation(), FirstTD->getSourceRange(),
+                       TypedefName)
+              << (FirstDiffType == TypeAlias) << FirstName;
+          ODRDiagNote(SecondTD->getLocation(), SecondTD->getSourceRange(),
+                      TypedefName)
+              << (FirstDiffType == TypeAlias) << SecondName;
+          Diagnosed = true;
+          break;
+        }
+
+        QualType FirstType = FirstTD->getUnderlyingType();
+        QualType SecondType = SecondTD->getUnderlyingType();
+        if (ComputeQualTypeODRHash(FirstType) !=
+            ComputeQualTypeODRHash(SecondType)) {
+          ODRDiagError(FirstTD->getLocation(), FirstTD->getSourceRange(),
+                       TypedefType)
+              << (FirstDiffType == TypeAlias) << FirstName << FirstType;
+          ODRDiagNote(SecondTD->getLocation(), SecondTD->getSourceRange(),
+                      TypedefType)
+              << (FirstDiffType == TypeAlias) << SecondName << SecondType;
+          Diagnosed = true;
+          break;
+        }
+        break;
+      }
+      case Var: {
+        VarDecl *FirstVD = cast(FirstDecl);
+        VarDecl *SecondVD = cast(SecondDecl);
+        auto FirstName = FirstVD->getDeclName();
+        auto SecondName = SecondVD->getDeclName();
+        if (FirstName != SecondName) {
+          ODRDiagError(FirstVD->getLocation(), FirstVD->getSourceRange(),
+                       VarName)
+              << FirstName;
+          ODRDiagNote(SecondVD->getLocation(), SecondVD->getSourceRange(),
+                      VarName)
+              << SecondName;
+          Diagnosed = true;
+          break;
+        }
+
+        QualType FirstType = FirstVD->getType();
+        QualType SecondType = SecondVD->getType();
+        if (ComputeQualTypeODRHash(FirstType) !=
+                        ComputeQualTypeODRHash(SecondType)) {
+          ODRDiagError(FirstVD->getLocation(), FirstVD->getSourceRange(),
+                       VarType)
+              << FirstName << FirstType;
+          ODRDiagNote(SecondVD->getLocation(), SecondVD->getSourceRange(),
+                      VarType)
+              << SecondName << SecondType;
+          Diagnosed = true;
+          break;
+        }
+
+        const Expr *FirstInit = FirstVD->getInit();
+        const Expr *SecondInit = SecondVD->getInit();
+        if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
+          ODRDiagError(FirstVD->getLocation(), FirstVD->getSourceRange(),
+                       VarSingleInitializer)
+              << FirstName << (FirstInit == nullptr)
+              << (FirstInit ? FirstInit->getSourceRange(): SourceRange());
+          ODRDiagNote(SecondVD->getLocation(), SecondVD->getSourceRange(),
+                      VarSingleInitializer)
+              << SecondName << (SecondInit == nullptr)
+              << (SecondInit ? SecondInit->getSourceRange() : SourceRange());
+          Diagnosed = true;
+          break;
+        }
+
+        if (FirstInit && SecondInit &&
+            ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
+          ODRDiagError(FirstVD->getLocation(), FirstVD->getSourceRange(),
+                       VarDifferentInitializer)
+              << FirstName << FirstInit->getSourceRange();
+          ODRDiagNote(SecondVD->getLocation(), SecondVD->getSourceRange(),
+                      VarDifferentInitializer)
+              << SecondName << SecondInit->getSourceRange();
+          Diagnosed = true;
+          break;
+        }
+
+        const bool FirstIsConstexpr = FirstVD->isConstexpr();
+        const bool SecondIsConstexpr = SecondVD->isConstexpr();
+        if (FirstIsConstexpr != SecondIsConstexpr) {
+          ODRDiagError(FirstVD->getLocation(), FirstVD->getSourceRange(),
+                       VarConstexpr)
+              << FirstName << FirstIsConstexpr;
+          ODRDiagNote(SecondVD->getLocation(), SecondVD->getSourceRange(),
+                      VarConstexpr)
+              << SecondName << SecondIsConstexpr;
+          Diagnosed = true;
+          break;
+        }
+        break;
+      }
+      case Friend: {
+        FriendDecl *FirstFriend = cast(FirstDecl);
+        FriendDecl *SecondFriend = cast(SecondDecl);
+
+        NamedDecl *FirstND = FirstFriend->getFriendDecl();
+        NamedDecl *SecondND = SecondFriend->getFriendDecl();
+
+        TypeSourceInfo *FirstTSI = FirstFriend->getFriendType();
+        TypeSourceInfo *SecondTSI = SecondFriend->getFriendType();
+
+        if (FirstND && SecondND) {
+          ODRDiagError(FirstFriend->getFriendLoc(),
+                       FirstFriend->getSourceRange(), FriendFunction)
+              << FirstND;
+          ODRDiagNote(SecondFriend->getFriendLoc(),
+                      SecondFriend->getSourceRange(), FriendFunction)
+              << SecondND;
+
+          Diagnosed = true;
+          break;
+        }
+
+        if (FirstTSI && SecondTSI) {
+          QualType FirstFriendType = FirstTSI->getType();
+          QualType SecondFriendType = SecondTSI->getType();
+          assert(ComputeQualTypeODRHash(FirstFriendType) !=
+                 ComputeQualTypeODRHash(SecondFriendType));
+          ODRDiagError(FirstFriend->getFriendLoc(),
+                       FirstFriend->getSourceRange(), FriendType)
+              << FirstFriendType;
+          ODRDiagNote(SecondFriend->getFriendLoc(),
+                      SecondFriend->getSourceRange(), FriendType)
+              << SecondFriendType;
+          Diagnosed = true;
+          break;
+        }
+
+        ODRDiagError(FirstFriend->getFriendLoc(), FirstFriend->getSourceRange(),
+                     FriendTypeFunction)
+            << (FirstTSI == nullptr);
+        ODRDiagNote(SecondFriend->getFriendLoc(),
+                    SecondFriend->getSourceRange(), FriendTypeFunction)
+            << (SecondTSI == nullptr);
+
+        Diagnosed = true;
+        break;
+      }
       }
 
       if (Diagnosed == true)
         continue;
 
-      Diag(FirstRecord->getLocation(),
-           diag::err_module_odr_violation_different_definitions)
-          << FirstRecord << FirstModule.empty() << FirstModule;
-
-      Diag(SecondRecord->getLocation(),
-           diag::note_module_odr_violation_different_definitions)
-          << SecondModule;
+      Diag(FirstDecl->getLocation(),
+           diag::err_module_odr_violation_mismatch_decl_unknown)
+          << FirstRecord << FirstModule.empty() << FirstModule << FirstDiffType
+          << FirstDecl->getSourceRange();
+      Diag(SecondDecl->getLocation(),
+           diag::note_module_odr_violation_mismatch_decl_unknown)
+          << SecondModule << FirstDiffType << SecondDecl->getSourceRange();
       Diagnosed = true;
     }
 
@@ -9789,10 +10114,10 @@ void ASTReader::FinishedDeserializing() {
         ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
         auto *FPT = Update.second->getType()->castAs();
         auto ESI = FPT->getExtProtoInfo().ExceptionSpec;
-        if (auto *Listener = Context.getASTMutationListener())
+        if (auto *Listener = getContext().getASTMutationListener())
           Listener->ResolvedExceptionSpec(cast(Update.second));
         for (auto *Redecl : Update.second->redecls())
-          Context.adjustExceptionSpec(cast(Redecl), ESI);
+          getContext().adjustExceptionSpec(cast(Redecl), ESI);
       }
     }
 
@@ -9834,7 +10159,7 @@ void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) {
   }
 }
 
-ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context,
+ASTReader::ASTReader(Preprocessor &PP, ASTContext *Context,
                      const PCHContainerReader &PCHContainerRdr,
                      ArrayRef> Extensions,
                      StringRef isysroot, bool DisableValidation,
@@ -9847,7 +10172,7 @@ ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context,
                    : cast(new PCHValidator(PP, *this))),
       SourceMgr(PP.getSourceManager()), FileMgr(PP.getFileManager()),
       PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), PP(PP),
-      Context(Context),
+      ContextObj(Context),
       ModuleMgr(PP.getFileManager(), PP.getPCMCache(), PCHContainerRdr),
       PCMCache(PP.getPCMCache()), DummyIdResolver(PP),
       ReadTimer(std::move(ReadTimer)), isysroot(isysroot),
diff --git a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderDecl.cpp b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderDecl.cpp
index e0304d22fe50c..085341571ced4 100644
--- a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -126,6 +126,9 @@ namespace clang {
     void ReadObjCDefinitionData(struct ObjCInterfaceDecl::DefinitionData &Data);
     void MergeDefinitionData(ObjCInterfaceDecl *D,
                              struct ObjCInterfaceDecl::DefinitionData &&NewDD);
+    void ReadObjCDefinitionData(struct ObjCProtocolDecl::DefinitionData &Data);
+    void MergeDefinitionData(ObjCProtocolDecl *D,
+                             struct ObjCProtocolDecl::DefinitionData &&NewDD);
 
     static NamedDecl *getAnonymousDeclForMerging(ASTReader &Reader,
                                                  DeclContext *DC,
@@ -216,6 +219,30 @@ namespace clang {
           TypedefNameForLinkage(nullptr), HasPendingBody(false),
           IsDeclMarkedUsed(false) {}
 
+    template  static
+    void AddLazySpecializations(T *D,
+                                SmallVectorImpl& IDs) {
+      if (IDs.empty())
+        return;
+
+      // FIXME: We should avoid this pattern of getting the ASTContext.
+      ASTContext &C = D->getASTContext();
+
+      auto *&LazySpecializations = D->getCommonPtr()->LazySpecializations;
+
+      if (auto &Old = LazySpecializations) {
+        IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0]);
+        std::sort(IDs.begin(), IDs.end());
+        IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end());
+      }
+
+      auto *Result = new (C) serialization::DeclID[1 + IDs.size()];
+      *Result = IDs.size();
+      std::copy(IDs.begin(), IDs.end(), Result + 1);
+
+      LazySpecializations = Result;
+    }
+
     template 
     static Decl *getMostRecentDeclImpl(Redeclarable *D);
     static Decl *getMostRecentDeclImpl(...);
@@ -244,7 +271,7 @@ namespace clang {
     void ReadFunctionDefinition(FunctionDecl *FD);
     void Visit(Decl *D);
 
-    void UpdateDecl(Decl *D);
+    void UpdateDecl(Decl *D, llvm::SmallVectorImpl&);
 
     static void setNextObjCCategory(ObjCCategoryDecl *Cat,
                                     ObjCCategoryDecl *Next) {
@@ -522,32 +549,32 @@ void ASTDeclReader::VisitDecl(Decl *D) {
   D->setTopLevelDeclInObjCContainer(Record.readInt());
   D->setAccess((AccessSpecifier)Record.readInt());
   D->FromASTFile = true;
-  D->setModulePrivate(Record.readInt());
-  D->Hidden = D->isModulePrivate();
+  bool ModulePrivate = Record.readInt();
 
   // Determine whether this declaration is part of a (sub)module. If so, it
   // may not yet be visible.
   if (unsigned SubmoduleID = readSubmoduleID()) {
     // Store the owning submodule ID in the declaration.
+    D->setModuleOwnershipKind(
+        ModulePrivate ? Decl::ModuleOwnershipKind::ModulePrivate
+                      : Decl::ModuleOwnershipKind::VisibleWhenImported);
     D->setOwningModuleID(SubmoduleID);
 
-    if (D->Hidden) {
-      // Module-private declarations are never visible, so there is no work to do.
+    if (ModulePrivate) {
+      // Module-private declarations are never visible, so there is no work to
+      // do.
     } else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
       // If local visibility is being tracked, this declaration will become
-      // hidden and visible as the owning module does. Inform Sema that this
-      // declaration might not be visible.
-      D->Hidden = true;
+      // hidden and visible as the owning module does.
     } else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) {
-      if (Owner->NameVisibility != Module::AllVisible) {
-        // The owning module is not visible. Mark this declaration as hidden.
-        D->Hidden = true;
-
-        // Note that this declaration was hidden because its owning module is 
-        // not yet visible.
+      // Mark the declaration as visible when its owning module becomes visible.
+      if (Owner->NameVisibility == Module::AllVisible)
+        D->setVisibleDespiteOwningModule();
+      else
         Reader.HiddenNamesMap[Owner].push_back(D);
-      }
     }
+  } else if (ModulePrivate) {
+    D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate);
   }
 }
 
@@ -1047,18 +1074,8 @@ void ASTDeclReader::VisitObjCIvarDecl(ObjCIvarDecl *IVD) {
   IVD->setSynthesize(synth);
 }
 
-void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) {
-  RedeclarableResult Redecl = VisitRedeclarable(PD);
-  VisitObjCContainerDecl(PD);
-  mergeRedeclarable(PD, Redecl);
-
-  if (Record.readInt()) {
-    // Read the definition.
-    PD->allocateDefinitionData();
-
-    // Set the definition data of the canonical declaration, so other
-    // redeclarations will see it.
-    PD->getCanonicalDecl()->Data = PD->Data;
+void ASTDeclReader::ReadObjCDefinitionData(
+         struct ObjCProtocolDecl::DefinitionData &Data) {
 
     unsigned NumProtoRefs = Record.readInt();
     SmallVector ProtoRefs;
@@ -1069,9 +1086,37 @@ void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) {
     ProtoLocs.reserve(NumProtoRefs);
     for (unsigned I = 0; I != NumProtoRefs; ++I)
       ProtoLocs.push_back(ReadSourceLocation());
-    PD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(),
-                        Reader.getContext());
+    Data.ReferencedProtocols.set(ProtoRefs.data(), NumProtoRefs,
+                                 ProtoLocs.data(), Reader.getContext());
+}
+
+void ASTDeclReader::MergeDefinitionData(ObjCProtocolDecl *D,
+         struct ObjCProtocolDecl::DefinitionData &&NewDD) {
+  // FIXME: odr checking?
+}
+
+void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) {
+  RedeclarableResult Redecl = VisitRedeclarable(PD);
+  VisitObjCContainerDecl(PD);
+  mergeRedeclarable(PD, Redecl);
+
+  if (Record.readInt()) {
+    // Read the definition.
+    PD->allocateDefinitionData();
 
+    ReadObjCDefinitionData(PD->data());
+
+    ObjCProtocolDecl *Canon = PD->getCanonicalDecl();
+    if (Canon->Data.getPointer()) {
+      // If we already have a definition, keep the definition invariant and
+      // merge the data.
+      MergeDefinitionData(Canon, std::move(PD->data()));
+      PD->Data = Canon->Data;
+    } else {
+      // Set the definition data of the canonical declaration, so other
+      // redeclarations will see it.
+      PD->getCanonicalDecl()->Data = PD->Data;
+    }
     // Note that we have deserialized a definition.
     Reader.PendingDefinitions.insert(PD);
   } else {
@@ -1229,6 +1274,7 @@ ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
     VD->NonParmVarDeclBits.IsConstexpr = Record.readInt();
     VD->NonParmVarDeclBits.IsInitCapture = Record.readInt();
     VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = Record.readInt();
+    VD->NonParmVarDeclBits.ImplicitParamKind = Record.readInt();
   }
   Linkage VarLinkage = Linkage(Record.readInt());
   VD->setCachedLinkage(VarLinkage);
@@ -1513,9 +1559,11 @@ void ASTDeclReader::ReadCXXDefinitionData(
   Data.HasUninitializedFields = Record.readInt();
   Data.HasInheritedConstructor = Record.readInt();
   Data.HasInheritedAssignment = Record.readInt();
+  Data.NeedOverloadResolutionForCopyConstructor = Record.readInt();
   Data.NeedOverloadResolutionForMoveConstructor = Record.readInt();
   Data.NeedOverloadResolutionForMoveAssignment = Record.readInt();
   Data.NeedOverloadResolutionForDestructor = Record.readInt();
+  Data.DefaultedCopyConstructorIsDeleted = Record.readInt();
   Data.DefaultedMoveConstructorIsDeleted = Record.readInt();
   Data.DefaultedMoveAssignmentIsDeleted = Record.readInt();
   Data.DefaultedDestructorIsDeleted = Record.readInt();
@@ -1524,6 +1572,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
   Data.HasIrrelevantDestructor = Record.readInt();
   Data.HasConstexprNonCopyMoveConstructor = Record.readInt();
   Data.HasDefaultedDefaultConstructor = Record.readInt();
+  Data.CanPassInRegisters = Record.readInt();
   Data.DefaultedDefaultConstructorIsConstexpr = Record.readInt();
   Data.HasConstexprDefaultConstructor = Record.readInt();
   Data.HasNonLiteralTypeFieldsOrBases = Record.readInt();
@@ -1567,8 +1616,8 @@ void ASTDeclReader::ReadCXXDefinitionData(
     Lambda.NumExplicitCaptures = Record.readInt();
     Lambda.ManglingNumber = Record.readInt();
     Lambda.ContextDecl = ReadDeclID();
-    Lambda.Captures 
-      = (Capture*)Reader.Context.Allocate(sizeof(Capture)*Lambda.NumCaptures);
+    Lambda.Captures = (Capture *)Reader.getContext().Allocate(
+        sizeof(Capture) * Lambda.NumCaptures);
     Capture *ToCapture = Lambda.Captures;
     Lambda.MethodTyInfo = GetTypeSourceInfo();
     for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
@@ -1651,9 +1700,11 @@ void ASTDeclReader::MergeDefinitionData(
   MATCH_FIELD(HasUninitializedFields)
   MATCH_FIELD(HasInheritedConstructor)
   MATCH_FIELD(HasInheritedAssignment)
+  MATCH_FIELD(NeedOverloadResolutionForCopyConstructor)
   MATCH_FIELD(NeedOverloadResolutionForMoveConstructor)
   MATCH_FIELD(NeedOverloadResolutionForMoveAssignment)
   MATCH_FIELD(NeedOverloadResolutionForDestructor)
+  MATCH_FIELD(DefaultedCopyConstructorIsDeleted)
   MATCH_FIELD(DefaultedMoveConstructorIsDeleted)
   MATCH_FIELD(DefaultedMoveAssignmentIsDeleted)
   MATCH_FIELD(DefaultedDestructorIsDeleted)
@@ -1662,6 +1713,7 @@ void ASTDeclReader::MergeDefinitionData(
   MATCH_FIELD(HasIrrelevantDestructor)
   OR_FIELD(HasConstexprNonCopyMoveConstructor)
   OR_FIELD(HasDefaultedDefaultConstructor)
+  MATCH_FIELD(CanPassInRegisters)
   MATCH_FIELD(DefaultedDefaultConstructorIsConstexpr)
   OR_FIELD(HasConstexprDefaultConstructor)
   MATCH_FIELD(HasNonLiteralTypeFieldsOrBases)
@@ -1951,21 +2003,6 @@ ASTDeclReader::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) {
   return Redecl;
 }
 
-static DeclID *newDeclIDList(ASTContext &Context, DeclID *Old,
-                             SmallVectorImpl &IDs) {
-  assert(!IDs.empty() && "no IDs to add to list");
-  if (Old) {
-    IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0]);
-    std::sort(IDs.begin(), IDs.end());
-    IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end());
-  }
-
-  auto *Result = new (Context) DeclID[1 + IDs.size()];
-  *Result = IDs.size();
-  std::copy(IDs.begin(), IDs.end(), Result + 1);
-  return Result;
-}
-
 void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) {
   RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);
 
@@ -1974,19 +2011,14 @@ void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) {
     // the specializations.
     SmallVector SpecIDs;
     ReadDeclIDList(SpecIDs);
-
-    if (!SpecIDs.empty()) {
-      auto *CommonPtr = D->getCommonPtr();
-      CommonPtr->LazySpecializations = newDeclIDList(
-          Reader.getContext(), CommonPtr->LazySpecializations, SpecIDs);
-    }
+    ASTDeclReader::AddLazySpecializations(D, SpecIDs);
   }
 
   if (D->getTemplatedDecl()->TemplateOrInstantiation) {
     // We were loaded before our templated declaration was. We've not set up
     // its corresponding type yet (see VisitCXXRecordDeclImpl), so reconstruct
     // it now.
-    Reader.Context.getInjectedClassNameType(
+    Reader.getContext().getInjectedClassNameType(
         D->getTemplatedDecl(), D->getInjectedClassNameSpecialization());
   }
 }
@@ -2006,12 +2038,7 @@ void ASTDeclReader::VisitVarTemplateDecl(VarTemplateDecl *D) {
     // the specializations.
     SmallVector SpecIDs;
     ReadDeclIDList(SpecIDs);
-
-    if (!SpecIDs.empty()) {
-      auto *CommonPtr = D->getCommonPtr();
-      CommonPtr->LazySpecializations = newDeclIDList(
-          Reader.getContext(), CommonPtr->LazySpecializations, SpecIDs);
-    }
+    ASTDeclReader::AddLazySpecializations(D, SpecIDs);
   }
 }
 
@@ -2117,12 +2144,7 @@ void ASTDeclReader::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
     // This FunctionTemplateDecl owns a CommonPtr; read it.
     SmallVector SpecIDs;
     ReadDeclIDList(SpecIDs);
-
-    if (!SpecIDs.empty()) {
-      auto *CommonPtr = D->getCommonPtr();
-      CommonPtr->LazySpecializations = newDeclIDList(
-          Reader.getContext(), CommonPtr->LazySpecializations, SpecIDs);
-    }
+    ASTDeclReader::AddLazySpecializations(D, SpecIDs);
   }
 }
 
@@ -2473,8 +2495,8 @@ void ASTDeclReader::mergeMergeable(Mergeable *D) {
 
   if (FindExistingResult ExistingRes = findExisting(static_cast(D)))
     if (T *Existing = ExistingRes)
-      Reader.Context.setPrimaryMergedDecl(static_cast(D),
-                                          Existing->getCanonicalDecl());
+      Reader.getContext().setPrimaryMergedDecl(static_cast(D),
+                                               Existing->getCanonicalDecl());
 }
 
 void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) {
@@ -2510,6 +2532,7 @@ void ASTReader::ReadAttributes(ASTRecordReader &Record, AttrVec &Attrs) {
     Attr *New = nullptr;
     attr::Kind Kind = (attr::Kind)Record.readInt();
     SourceRange Range = Record.readSourceRange();
+    ASTContext &Context = getContext();
 
 #include "clang/Serialization/AttrPCHRead.inc"
 
@@ -2908,7 +2931,7 @@ DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
     // commit to DC being the canonical definition now, and will fix this when
     // we load the update record.
     if (!DD) {
-      DD = new (Reader.Context) struct CXXRecordDecl::DefinitionData(RD);
+      DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD);
       RD->IsCompleteDefinition = true;
       RD->DefinitionData = DD;
       RD->getCanonicalDecl()->DefinitionData = DD;
@@ -3353,6 +3376,7 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   ASTDeclReader Reader(*this, Record, Loc, ID, DeclLoc);
   unsigned Code = DeclsCursor.ReadCode();
 
+  ASTContext &Context = getContext();
   Decl *D = nullptr;
   switch ((DeclCode)Record.readRecord(DeclsCursor, Code)) {
   case DECL_CONTEXT_LEXICAL:
@@ -3612,7 +3636,8 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   assert(Record.getIdx() == Record.size());
 
   // Load any relevant update records.
-  PendingUpdateRecords.push_back(std::make_pair(ID, D));
+  PendingUpdateRecords.push_back(
+      PendingUpdateRecord(ID, D, /*JustLoaded=*/true));
 
   // Load the categories after recursive loading is finished.
   if (ObjCInterfaceDecl *Class = dyn_cast(D))
@@ -3652,25 +3677,32 @@ void ASTReader::PassInterestingDeclsToConsumer() {
   while (!PotentiallyInterestingDecls.empty()) {
     InterestingDecl D = PotentiallyInterestingDecls.front();
     PotentiallyInterestingDecls.pop_front();
-    if (isConsumerInterestedIn(Context, D.getDecl(), D.hasPendingBody()))
+    if (isConsumerInterestedIn(getContext(), D.getDecl(), D.hasPendingBody()))
       PassInterestingDeclToConsumer(D.getDecl());
   }
 }
 
-void ASTReader::loadDeclUpdateRecords(serialization::DeclID ID, Decl *D) {
+void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
   // The declaration may have been modified by files later in the chain.
   // If this is the case, read the record containing the updates from each file
   // and pass it to ASTDeclReader to make the modifications.
+  serialization::GlobalDeclID ID = Record.ID;
+  Decl *D = Record.D;
   ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
   DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID);
+
+  llvm::SmallVector PendingLazySpecializationIDs;
+
   if (UpdI != DeclUpdateOffsets.end()) {
     auto UpdateOffsets = std::move(UpdI->second);
     DeclUpdateOffsets.erase(UpdI);
 
-    // FIXME: This call to isConsumerInterestedIn is not safe because
-    // we could be deserializing declarations at the moment. We should
-    // delay calling this in the same way as done in D30793.
-    bool WasInteresting = isConsumerInterestedIn(Context, D, false);
+    // Check if this decl was interesting to the consumer. If we just loaded
+    // the declaration, then we know it was interesting and we skip the call
+    // to isConsumerInterestedIn because it is unsafe to call in the
+    // current ASTReader state.
+    bool WasInteresting =
+        Record.JustLoaded || isConsumerInterestedIn(getContext(), D, false);
     for (auto &FileAndOffset : UpdateOffsets) {
       ModuleFile *F = FileAndOffset.first;
       uint64_t Offset = FileAndOffset.second;
@@ -3685,18 +3717,29 @@ void ASTReader::loadDeclUpdateRecords(serialization::DeclID ID, Decl *D) {
 
       ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID,
                            SourceLocation());
-      Reader.UpdateDecl(D);
+      Reader.UpdateDecl(D, PendingLazySpecializationIDs);
 
       // We might have made this declaration interesting. If so, remember that
       // we need to hand it off to the consumer.
       if (!WasInteresting &&
-          isConsumerInterestedIn(Context, D, Reader.hasPendingBody())) {
+          isConsumerInterestedIn(getContext(), D, Reader.hasPendingBody())) {
         PotentiallyInterestingDecls.push_back(
             InterestingDecl(D, Reader.hasPendingBody()));
         WasInteresting = true;
       }
     }
   }
+  // Add the lazy specializations to the template.
+  assert((PendingLazySpecializationIDs.empty() || isa(D) ||
+          isa(D) || isa(D)) &&
+         "Must not have pending specializations");
+  if (auto *CTD = dyn_cast(D))
+    ASTDeclReader::AddLazySpecializations(CTD, PendingLazySpecializationIDs);
+  else if (auto *FTD = dyn_cast(D))
+    ASTDeclReader::AddLazySpecializations(FTD, PendingLazySpecializationIDs);
+  else if (auto *VTD = dyn_cast(D))
+    ASTDeclReader::AddLazySpecializations(VTD, PendingLazySpecializationIDs);
+  PendingLazySpecializationIDs.clear();
 
   // Load the pending visible updates for this decl context, if it has any.
   auto I = PendingVisibleUpdates.find(ID);
@@ -3893,7 +3936,8 @@ static void forAllLaterRedecls(DeclT *D, Fn F) {
   }
 }
 
-void ASTDeclReader::UpdateDecl(Decl *D) {
+void ASTDeclReader::UpdateDecl(Decl *D,
+   llvm::SmallVectorImpl &PendingLazySpecializationIDs) {
   while (Record.getIdx() < Record.size()) {
     switch ((DeclUpdateKind)Record.readInt()) {
     case UPD_CXX_ADDED_IMPLICIT_MEMBER: {
@@ -3909,8 +3953,8 @@ void ASTDeclReader::UpdateDecl(Decl *D) {
     }
 
     case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION:
-      // It will be added to the template's specializations set when loaded.
-      (void)Record.readDecl();
+      // It will be added to the template's lazy specialization set.
+      PendingLazySpecializationIDs.push_back(ReadDeclID());
       break;
 
     case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: {
@@ -3928,10 +3972,21 @@ void ASTDeclReader::UpdateDecl(Decl *D) {
       break;
     }
 
-    case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER:
-      cast(D)->getMemberSpecializationInfo()->setPointOfInstantiation(
+    case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
+      VarDecl *VD = cast(D);
+      VD->getMemberSpecializationInfo()->setPointOfInstantiation(
           ReadSourceLocation());
+      uint64_t Val = Record.readInt();
+      if (Val && !VD->getInit()) {
+        VD->setInit(Record.readExpr());
+        if (Val > 1) { // IsInitKnownICE = 1, IsInitNotICE = 2, IsInitICE = 3
+          EvaluatedStmt *Eval = VD->ensureEvaluatedStmt();
+          Eval->CheckedICE = true;
+          Eval->IsICE = Val == 3;
+        }
+      }
       break;
+    }
 
     case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: {
       auto Param = cast(D);
@@ -4068,7 +4123,7 @@ void ASTDeclReader::UpdateDecl(Decl *D) {
       // FIXME: If the exception specification is already present, check that it
       // matches.
       if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) {
-        FD->setType(Reader.Context.getFunctionType(
+        FD->setType(Reader.getContext().getFunctionType(
             FPT->getReturnType(), FPT->getParamTypes(),
             FPT->getExtProtoInfo().withExceptionSpec(ESI)));
 
@@ -4086,28 +4141,31 @@ void ASTDeclReader::UpdateDecl(Decl *D) {
       for (auto *Redecl : merged_redecls(D)) {
         // FIXME: If the return type is already deduced, check that it matches.
         FunctionDecl *FD = cast(Redecl);
-        Reader.Context.adjustDeducedFunctionResultType(FD, DeducedResultType);
+        Reader.getContext().adjustDeducedFunctionResultType(FD,
+                                                            DeducedResultType);
       }
       break;
     }
 
     case UPD_DECL_MARKED_USED: {
       // Maintain AST consistency: any later redeclarations are used too.
-      D->markUsed(Reader.Context);
+      D->markUsed(Reader.getContext());
       break;
     }
 
     case UPD_MANGLING_NUMBER:
-      Reader.Context.setManglingNumber(cast(D), Record.readInt());
+      Reader.getContext().setManglingNumber(cast(D),
+                                            Record.readInt());
       break;
 
     case UPD_STATIC_LOCAL_NUMBER:
-      Reader.Context.setStaticLocalNumber(cast(D), Record.readInt());
+      Reader.getContext().setStaticLocalNumber(cast(D),
+                                               Record.readInt());
       break;
 
     case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
-      D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(
-          Reader.Context, ReadSourceRange()));
+      D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(Reader.getContext(),
+                                                          ReadSourceRange()));
       break;
 
     case UPD_DECL_EXPORTED: {
@@ -4127,7 +4185,7 @@ void ASTDeclReader::UpdateDecl(Decl *D) {
         Reader.HiddenNamesMap[Owner].push_back(Exported);
       } else {
         // The declaration is now visible.
-        Exported->Hidden = false;
+        Exported->setVisibleDespiteOwningModule();
       }
       break;
     }
diff --git a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderStmt.cpp
index 3d314a85ff17f..3f5da029947c5 100644
--- a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -367,28 +367,45 @@ void ASTStmtReader::VisitMSAsmStmt(MSAsmStmt *S) {
 }
 
 void ASTStmtReader::VisitCoroutineBodyStmt(CoroutineBodyStmt *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+  VisitStmt(S);
+  assert(Record.peekInt() == S->NumParams);
+  Record.skipInts(1);
+  auto *StoredStmts = S->getStoredStmts();
+  for (unsigned i = 0;
+       i < CoroutineBodyStmt::SubStmt::FirstParamMove + S->NumParams; ++i)
+    StoredStmts[i] = Record.readSubStmt();
 }
 
 void ASTStmtReader::VisitCoreturnStmt(CoreturnStmt *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+  VisitStmt(S);
+  S->CoreturnLoc = Record.readSourceLocation();
+  for (auto &SubStmt: S->SubStmts)
+    SubStmt = Record.readSubStmt();
+  S->IsImplicit = Record.readInt() != 0;
 }
 
-void ASTStmtReader::VisitCoawaitExpr(CoawaitExpr *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+void ASTStmtReader::VisitCoawaitExpr(CoawaitExpr *E) {
+  VisitExpr(E);
+  E->KeywordLoc = ReadSourceLocation();
+  for (auto &SubExpr: E->SubExprs)
+    SubExpr = Record.readSubStmt();
+  E->OpaqueValue = cast_or_null(Record.readSubStmt());
+  E->setIsImplicit(Record.readInt() != 0);
 }
 
-void ASTStmtReader::VisitDependentCoawaitExpr(DependentCoawaitExpr *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+void ASTStmtReader::VisitCoyieldExpr(CoyieldExpr *E) {
+  VisitExpr(E);
+  E->KeywordLoc = ReadSourceLocation();
+  for (auto &SubExpr: E->SubExprs)
+    SubExpr = Record.readSubStmt();
+  E->OpaqueValue = cast_or_null(Record.readSubStmt());
 }
 
-void ASTStmtReader::VisitCoyieldExpr(CoyieldExpr *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+void ASTStmtReader::VisitDependentCoawaitExpr(DependentCoawaitExpr *E) {
+  VisitExpr(E);
+  E->KeywordLoc = ReadSourceLocation();
+  for (auto &SubExpr: E->SubExprs)
+    SubExpr = Record.readSubStmt();
 }
 
 void ASTStmtReader::VisitCapturedStmt(CapturedStmt *S) {
@@ -1834,6 +1851,9 @@ OMPClause *OMPClauseReader::readClause() {
   case OMPC_reduction:
     C = OMPReductionClause::CreateEmpty(Context, Reader->Record.readInt());
     break;
+  case OMPC_task_reduction:
+    C = OMPTaskReductionClause::CreateEmpty(Context, Reader->Record.readInt());
+    break;
   case OMPC_linear:
     C = OMPLinearClause::CreateEmpty(Context, Reader->Record.readInt());
     break;
@@ -2138,6 +2158,40 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
   C->setReductionOps(Vars);
 }
 
+void OMPClauseReader::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) {
+  VisitOMPClauseWithPostUpdate(C);
+  C->setLParenLoc(Reader->ReadSourceLocation());
+  C->setColonLoc(Reader->ReadSourceLocation());
+  NestedNameSpecifierLoc NNSL = Reader->Record.readNestedNameSpecifierLoc();
+  DeclarationNameInfo DNI;
+  Reader->ReadDeclarationNameInfo(DNI);
+  C->setQualifierLoc(NNSL);
+  C->setNameInfo(DNI);
+
+  unsigned NumVars = C->varlist_size();
+  SmallVector Vars;
+  Vars.reserve(NumVars);
+  for (unsigned I = 0; I != NumVars; ++I)
+    Vars.push_back(Reader->Record.readSubExpr());
+  C->setVarRefs(Vars);
+  Vars.clear();
+  for (unsigned I = 0; I != NumVars; ++I)
+    Vars.push_back(Reader->Record.readSubExpr());
+  C->setPrivates(Vars);
+  Vars.clear();
+  for (unsigned I = 0; I != NumVars; ++I)
+    Vars.push_back(Reader->Record.readSubExpr());
+  C->setLHSExprs(Vars);
+  Vars.clear();
+  for (unsigned I = 0; I != NumVars; ++I)
+    Vars.push_back(Reader->Record.readSubExpr());
+  C->setRHSExprs(Vars);
+  Vars.clear();
+  for (unsigned I = 0; I != NumVars; ++I)
+    Vars.push_back(Reader->Record.readSubExpr());
+  C->setReductionOps(Vars);
+}
+
 void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {
   VisitOMPClauseWithPostUpdate(C);
   C->setLParenLoc(Reader->ReadSourceLocation());
@@ -2709,6 +2763,8 @@ void ASTStmtReader::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
 
 void ASTStmtReader::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) {
   VisitStmt(D);
+  // The NumClauses field was read in ReadStmtFromStream.
+  Record.skipInts(1);
   VisitOMPExecutableDirective(D);
 }
 
@@ -2954,6 +3010,7 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
     }
 
+    ASTContext &Context = getContext();
     Stmt *S = nullptr;
     bool Finished = false;
     bool IsStmtReference = false;
@@ -3478,7 +3535,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
 
     case STMT_OMP_TASKGROUP_DIRECTIVE:
-      S = OMPTaskgroupDirective::CreateEmpty(Context, Empty);
+      S = OMPTaskgroupDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
       break;
 
     case STMT_OMP_FLUSH_DIRECTIVE:
@@ -3906,6 +3964,29 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = LambdaExpr::CreateDeserialized(Context, NumCaptures);
       break;
     }
+
+    case STMT_COROUTINE_BODY: {
+      unsigned NumParams = Record[ASTStmtReader::NumStmtFields];
+      S = CoroutineBodyStmt::Create(Context, Empty, NumParams);
+      break;
+    }
+
+    case STMT_CORETURN:
+      S = new (Context) CoreturnStmt(Empty);
+      break;
+
+    case EXPR_COAWAIT:
+      S = new (Context) CoawaitExpr(Empty);
+      break;
+
+    case EXPR_COYIELD:
+      S = new (Context) CoyieldExpr(Empty);
+      break;
+
+    case EXPR_DEPENDENT_COAWAIT:
+      S = new (Context) DependentCoawaitExpr(Empty);
+      break;
+
     }
 
     // We hit a STMT_STOP, so we're done with this expression.
diff --git a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriter.cpp b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriter.cpp
index 202e0003b1e59..7a26726022eed 100644
--- a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriter.cpp
@@ -1093,6 +1093,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES);
   RECORD(DELETE_EXPRS_TO_ANALYZE);
   RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
+  RECORD(PP_CONDITIONAL_STACK);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -1421,8 +1422,8 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
     Stream.EmitRecordWithBlob(MetadataAbbrevCode, Record,
                               getClangFullRepositoryVersion());
   }
-  if (WritingModule) {
 
+  if (WritingModule) {
     // Module name
     auto Abbrev = std::make_shared();
     Abbrev->Add(BitCodeAbbrevOp(MODULE_NAME));
@@ -1461,13 +1462,14 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
   }
 
   // Module map file
-  if (WritingModule) {
+  if (WritingModule && WritingModule->Kind == Module::ModuleMapModule) {
     Record.clear();
 
     auto &Map = PP.getHeaderSearchInfo().getModuleMap();
-
-    // Primary module map file.
-    AddPath(Map.getModuleMapFileForUniquing(WritingModule)->getName(), Record);
+    AddPath(WritingModule->PresumedModuleMapFile.empty()
+                ? Map.getModuleMapFileForUniquing(WritingModule)->getName()
+                : StringRef(WritingModule->PresumedModuleMapFile),
+            Record);
 
     // Additional module map files.
     if (auto *AdditionalModMaps =
@@ -1599,6 +1601,8 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
   AddString(HSOpts.ModuleCachePath, Record);
   AddString(HSOpts.ModuleUserBuildPath, Record);
   Record.push_back(HSOpts.DisableModuleHash);
+  Record.push_back(HSOpts.ImplicitModuleMaps);
+  Record.push_back(HSOpts.ModuleMapFileHomeIsCwd);
   Record.push_back(HSOpts.UseBuiltinIncludes);
   Record.push_back(HSOpts.UseStandardSystemIncludes);
   Record.push_back(HSOpts.UseStandardCXXIncludes);
@@ -1688,6 +1692,7 @@ namespace  {
     bool IsSystemFile;
     bool IsTransient;
     bool BufferOverridden;
+    bool IsTopLevelModuleMap;
   };
 
 } // end anonymous namespace
@@ -1706,6 +1711,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
+  IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
   unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev));
 
@@ -1720,7 +1726,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
     // We only care about file entries that were not overridden.
     if (!SLoc->isFile())
       continue;
-    const SrcMgr::ContentCache *Cache = SLoc->getFile().getContentCache();
+    const SrcMgr::FileInfo &File = SLoc->getFile();
+    const SrcMgr::ContentCache *Cache = File.getContentCache();
     if (!Cache->OrigEntry)
       continue;
 
@@ -1729,6 +1736,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
     Entry.IsSystemFile = Cache->IsSystemFile;
     Entry.IsTransient = Cache->IsTransient;
     Entry.BufferOverridden = Cache->BufferOverridden;
+    Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
+                                File.getIncludeLoc().isInvalid();
     if (Cache->IsSystemFile)
       SortedFiles.push_back(Entry);
     else
@@ -1759,7 +1768,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
         (uint64_t)Entry.File->getSize(),
         (uint64_t)getTimestampForOutput(Entry.File),
         Entry.BufferOverridden,
-        Entry.IsTransient};
+        Entry.IsTransient,
+        Entry.IsTopLevelModuleMap};
 
     EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
   }
@@ -1794,7 +1804,7 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) {
   Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_FILE_ENTRY));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Characteristic
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives
   // FileEntry fields.
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Input File ID
@@ -1813,7 +1823,7 @@ static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) {
   Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_ENTRY));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Characteristic
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Buffer name blob
   return Stream.EmitAbbrev(std::move(Abbrev));
@@ -1854,24 +1864,31 @@ namespace {
   // Trait used for the on-disk hash table of header search information.
   class HeaderFileInfoTrait {
     ASTWriter &Writer;
-    const HeaderSearch &HS;
     
     // Keep track of the framework names we've used during serialization.
     SmallVector FrameworkStringData;
     llvm::StringMap FrameworkNameOffset;
     
   public:
-    HeaderFileInfoTrait(ASTWriter &Writer, const HeaderSearch &HS)
-      : Writer(Writer), HS(HS) { }
-    
+    HeaderFileInfoTrait(ASTWriter &Writer) : Writer(Writer) {}
+
     struct key_type {
-      const FileEntry *FE;
       StringRef Filename;
+      off_t Size;
+      time_t ModTime;
     };
     typedef const key_type &key_type_ref;
+
+    using UnresolvedModule =
+        llvm::PointerIntPair;
     
-    typedef HeaderFileInfo data_type;
+    struct data_type {
+      const HeaderFileInfo &HFI;
+      ArrayRef KnownHeaders;
+      UnresolvedModule Unresolved;
+    };
     typedef const data_type &data_type_ref;
+
     typedef unsigned hash_value_type;
     typedef unsigned offset_type;
     
@@ -1879,8 +1896,7 @@ namespace {
       // The hash is based only on size/time of the file, so that the reader can
       // match even when symlinking or excess path elements ("foo/../", "../")
       // change the form of the name. However, complete path is still the key.
-      return llvm::hash_combine(key.FE->getSize(),
-                                Writer.getTimestampForOutput(key.FE));
+      return llvm::hash_combine(key.Size, key.ModTime);
     }
     
     std::pair
@@ -1890,68 +1906,74 @@ namespace {
       unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
       LE.write(KeyLen);
       unsigned DataLen = 1 + 2 + 4 + 4;
-      for (auto ModInfo : HS.getModuleMap().findAllModulesForHeader(key.FE))
+      for (auto ModInfo : Data.KnownHeaders)
         if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
           DataLen += 4;
+      if (Data.Unresolved.getPointer())
+        DataLen += 4;
       LE.write(DataLen);
       return std::make_pair(KeyLen, DataLen);
     }
-    
+
     void EmitKey(raw_ostream& Out, key_type_ref key, unsigned KeyLen) {
       using namespace llvm::support;
       endian::Writer LE(Out);
-      LE.write(key.FE->getSize());
+      LE.write(key.Size);
       KeyLen -= 8;
-      LE.write(Writer.getTimestampForOutput(key.FE));
+      LE.write(key.ModTime);
       KeyLen -= 8;
       Out.write(key.Filename.data(), KeyLen);
     }
-    
+
     void EmitData(raw_ostream &Out, key_type_ref key,
                   data_type_ref Data, unsigned DataLen) {
       using namespace llvm::support;
       endian::Writer LE(Out);
       uint64_t Start = Out.tell(); (void)Start;
       
-      unsigned char Flags = (Data.isImport << 4)
-                          | (Data.isPragmaOnce << 3)
-                          | (Data.DirInfo << 1)
-                          | Data.IndexHeaderMapHeader;
+      unsigned char Flags = (Data.HFI.isImport << 5)
+                          | (Data.HFI.isPragmaOnce << 4)
+                          | (Data.HFI.DirInfo << 1)
+                          | Data.HFI.IndexHeaderMapHeader;
       LE.write(Flags);
-      LE.write(Data.NumIncludes);
+      LE.write(Data.HFI.NumIncludes);
       
-      if (!Data.ControllingMacro)
-        LE.write(Data.ControllingMacroID);
+      if (!Data.HFI.ControllingMacro)
+        LE.write(Data.HFI.ControllingMacroID);
       else
-        LE.write(Writer.getIdentifierRef(Data.ControllingMacro));
-      
+        LE.write(Writer.getIdentifierRef(Data.HFI.ControllingMacro));
+
       unsigned Offset = 0;
-      if (!Data.Framework.empty()) {
+      if (!Data.HFI.Framework.empty()) {
         // If this header refers into a framework, save the framework name.
         llvm::StringMap::iterator Pos
-          = FrameworkNameOffset.find(Data.Framework);
+          = FrameworkNameOffset.find(Data.HFI.Framework);
         if (Pos == FrameworkNameOffset.end()) {
           Offset = FrameworkStringData.size() + 1;
-          FrameworkStringData.append(Data.Framework.begin(), 
-                                     Data.Framework.end());
+          FrameworkStringData.append(Data.HFI.Framework.begin(), 
+                                     Data.HFI.Framework.end());
           FrameworkStringData.push_back(0);
           
-          FrameworkNameOffset[Data.Framework] = Offset;
+          FrameworkNameOffset[Data.HFI.Framework] = Offset;
         } else
           Offset = Pos->second;
       }
       LE.write(Offset);
 
-      // FIXME: If the header is excluded, we should write out some
-      // record of that fact.
-      for (auto ModInfo : HS.getModuleMap().findAllModulesForHeader(key.FE)) {
-        if (uint32_t ModID =
-                Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule())) {
-          uint32_t Value = (ModID << 2) | (unsigned)ModInfo.getRole();
+      auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) {
+        if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) {
+          uint32_t Value = (ModID << 2) | (unsigned)Role;
           assert((Value >> 2) == ModID && "overflow in header module info");
           LE.write(Value);
         }
-      }
+      };
+
+      // FIXME: If the header is excluded, we should write out some
+      // record of that fact.
+      for (auto ModInfo : Data.KnownHeaders)
+        EmitModule(ModInfo.getModule(), ModInfo.getRole());
+      if (Data.Unresolved.getPointer())
+        EmitModule(Data.Unresolved.getPointer(), Data.Unresolved.getInt());
 
       assert(Out.tell() - Start == DataLen && "Wrong data length");
     }
@@ -1966,16 +1988,72 @@ namespace {
 ///
 /// \param HS The header search structure to save.
 void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
+  HeaderFileInfoTrait GeneratorTrait(*this);
+  llvm::OnDiskChainedHashTableGenerator Generator;
+  SmallVector SavedStrings;
+  unsigned NumHeaderSearchEntries = 0;
+
+  // Find all unresolved headers for the current module. We generally will
+  // have resolved them before we get here, but not necessarily: we might be
+  // compiling a preprocessed module, where there is no requirement for the
+  // original files to exist any more.
+  const HeaderFileInfo Empty; // So we can take a reference.
+  if (WritingModule) {
+    llvm::SmallVector Worklist(1, WritingModule);
+    while (!Worklist.empty()) {
+      Module *M = Worklist.pop_back_val();
+      if (!M->isAvailable())
+        continue;
+
+      // Map to disk files where possible, to pick up any missing stat
+      // information. This also means we don't need to check the unresolved
+      // headers list when emitting resolved headers in the first loop below.
+      // FIXME: It'd be preferable to avoid doing this if we were given
+      // sufficient stat information in the module map.
+      HS.getModuleMap().resolveHeaderDirectives(M);
+
+      // If the file didn't exist, we can still create a module if we were given
+      // enough information in the module map.
+      for (auto U : M->MissingHeaders) {
+        // Check that we were given enough information to build a module
+        // without this file existing on disk.
+        if (!U.Size || (!U.ModTime && IncludeTimestamps)) {
+          PP->Diag(U.FileNameLoc, diag::err_module_no_size_mtime_for_header)
+            << WritingModule->getFullModuleName() << U.Size.hasValue()
+            << U.FileName;
+          continue;
+        }
+
+        // Form the effective relative pathname for the file.
+        SmallString<128> Filename(M->Directory->getName());
+        llvm::sys::path::append(Filename, U.FileName);
+        PreparePathForOutput(Filename);
+
+        StringRef FilenameDup = strdup(Filename.c_str());
+        SavedStrings.push_back(FilenameDup.data());
+
+        HeaderFileInfoTrait::key_type Key = {
+          FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0
+        };
+        HeaderFileInfoTrait::data_type Data = {
+          Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}
+        };
+        // FIXME: Deal with cases where there are multiple unresolved header
+        // directives in different submodules for the same header.
+        Generator.insert(Key, Data, GeneratorTrait);
+        ++NumHeaderSearchEntries;
+      }
+
+      Worklist.append(M->submodule_begin(), M->submodule_end());
+    }
+  }
+
   SmallVector FilesByUID;
   HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
   
   if (FilesByUID.size() > HS.header_file_size())
     FilesByUID.resize(HS.header_file_size());
-  
-  HeaderFileInfoTrait GeneratorTrait(*this, HS);
-  llvm::OnDiskChainedHashTableGenerator Generator;
-  SmallVector SavedStrings;
-  unsigned NumHeaderSearchEntries = 0;
+
   for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
     const FileEntry *File = FilesByUID[UID];
     if (!File)
@@ -2002,11 +2080,16 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
       SavedStrings.push_back(Filename.data());
     }
 
-    HeaderFileInfoTrait::key_type key = { File, Filename };
-    Generator.insert(key, *HFI, GeneratorTrait);
+    HeaderFileInfoTrait::key_type Key = {
+      Filename, File->getSize(), getTimestampForOutput(File)
+    };
+    HeaderFileInfoTrait::data_type Data = {
+      *HFI, HS.getModuleMap().findAllModulesForHeader(File), {}
+    };
+    Generator.insert(Key, Data, GeneratorTrait);
     ++NumHeaderSearchEntries;
   }
-  
+
   // Create the on-disk hash table in a buffer.
   SmallString<4096> TableData;
   uint32_t BucketOffset;
@@ -2302,6 +2385,18 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
     Stream.EmitRecord(PP_COUNTER_VALUE, Record);
   }
 
+  if (PP.isRecordingPreamble() && PP.hasRecordedPreamble()) {
+    assert(!IsModule);
+    for (const auto &Cond : PP.getPreambleConditionalStack()) {
+      AddSourceLocation(Cond.IfLoc, Record);
+      Record.push_back(Cond.WasSkipping);
+      Record.push_back(Cond.FoundNonSkip);
+      Record.push_back(Cond.FoundElse);
+    }
+    Stream.EmitRecord(PP_CONDITIONAL_STACK, Record);
+    Record.clear();
+  }
+
   // Enter the preprocessor block.
   Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);
 
@@ -2426,9 +2521,9 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
       Record.push_back(MI->isC99Varargs());
       Record.push_back(MI->isGNUVarargs());
       Record.push_back(MI->hasCommaPasting());
-      Record.push_back(MI->getNumArgs());
-      for (const IdentifierInfo *Arg : MI->args())
-        AddIdentifierRef(Arg, Record);
+      Record.push_back(MI->getNumParams());
+      for (const IdentifierInfo *Param : MI->params())
+        AddIdentifierRef(Param, Record);
     }
 
     // If we have a detailed preprocessing record, record the macro definition
@@ -2841,25 +2936,6 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
          "non-imported submodule?");
 }
 
-serialization::SubmoduleID 
-ASTWriter::inferSubmoduleIDFromLocation(SourceLocation Loc) {
-  if (Loc.isInvalid() || !WritingModule)
-    return 0; // No submodule
-    
-  // Find the module that owns this location.
-  ModuleMap &ModMap = PP->getHeaderSearchInfo().getModuleMap();
-  Module *OwningMod 
-    = ModMap.inferModuleFromLocation(FullSourceLoc(Loc,PP->getSourceManager()));
-  if (!OwningMod)
-    return 0;
-  
-  // Check whether this submodule is part of our own module.
-  if (WritingModule != OwningMod && !OwningMod->isSubModuleOf(WritingModule))
-    return 0;
-  
-  return getSubmoduleID(OwningMod);
-}
-
 void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
                                               bool isModule) {
   llvm::SmallDenseMap
@@ -4965,9 +5041,18 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
       case UPD_CXX_ADDED_FUNCTION_DEFINITION:
         break;
 
-      case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER:
+      case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
+        const VarDecl *VD = cast(D);
         Record.AddSourceLocation(Update.getLoc());
+        if (VD->getInit()) {
+          Record.push_back(!VD->isInitKnownICE() ? 1
+                                                 : (VD->isInitICE() ? 3 : 2));
+          Record.AddStmt(const_cast(VD->getInit()));
+        } else {
+          Record.push_back(0);
+        }
         break;
+      }
 
       case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT:
         Record.AddStmt(const_cast(
@@ -5791,9 +5876,11 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
   Record->push_back(Data.HasUninitializedFields);
   Record->push_back(Data.HasInheritedConstructor);
   Record->push_back(Data.HasInheritedAssignment);
+  Record->push_back(Data.NeedOverloadResolutionForCopyConstructor);
   Record->push_back(Data.NeedOverloadResolutionForMoveConstructor);
   Record->push_back(Data.NeedOverloadResolutionForMoveAssignment);
   Record->push_back(Data.NeedOverloadResolutionForDestructor);
+  Record->push_back(Data.DefaultedCopyConstructorIsDeleted);
   Record->push_back(Data.DefaultedMoveConstructorIsDeleted);
   Record->push_back(Data.DefaultedMoveAssignmentIsDeleted);
   Record->push_back(Data.DefaultedDestructorIsDeleted);
@@ -5802,6 +5889,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
   Record->push_back(Data.HasIrrelevantDestructor);
   Record->push_back(Data.HasConstexprNonCopyMoveConstructor);
   Record->push_back(Data.HasDefaultedDefaultConstructor);
+  Record->push_back(Data.CanPassInRegisters);
   Record->push_back(Data.DefaultedDefaultConstructorIsConstexpr);
   Record->push_back(Data.HasConstexprDefaultConstructor);
   Record->push_back(Data.HasNonLiteralTypeFieldsOrBases);
diff --git a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterDecl.cpp b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterDecl.cpp
index 812cd9e916d91..ec21ca2276c11 100644
--- a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -299,7 +299,7 @@ void ASTDeclWriter::VisitDecl(Decl *D) {
   Record.push_back(D->isTopLevelDeclInObjCContainer());
   Record.push_back(D->getAccess());
   Record.push_back(D->isModulePrivate());
-  Record.push_back(Writer.inferSubmoduleIDFromLocation(D->getLocation()));
+  Record.push_back(Writer.getSubmoduleID(D->getOwningModule()));
 
   // If this declaration injected a name into a context different from its
   // lexical context, and that context is an imported namespace, we need to
@@ -915,6 +915,10 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
     Record.push_back(D->isConstexpr());
     Record.push_back(D->isInitCapture());
     Record.push_back(D->isPreviousDeclInSameBlockScope());
+    if (const auto *IPD = dyn_cast(D))
+      Record.push_back(static_cast(IPD->getParameterKind()));
+    else
+      Record.push_back(0);
   }
   Record.push_back(D->getLinkageInternal());
 
@@ -1989,6 +1993,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(0));                         // isConstexpr
   Abv->Add(BitCodeAbbrevOp(0));                         // isInitCapture
   Abv->Add(BitCodeAbbrevOp(0));                         // isPrevDeclInSameScope
+  Abv->Add(BitCodeAbbrevOp(0));                         // ImplicitParamKind
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Linkage
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // IsInitICE (local)
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // VarKind (local enum)
@@ -2228,8 +2233,18 @@ void ASTRecordWriter::AddFunctionDefinition(const FunctionDecl *FD) {
   Writer->ClearSwitchCaseIDs();
 
   assert(FD->doesThisDeclarationHaveABody());
-  bool ModulesCodegen = Writer->Context->getLangOpts().ModulesCodegen &&
-                        Writer->WritingModule && !FD->isDependentContext();
+  bool ModulesCodegen = false;
+  if (Writer->WritingModule && !FD->isDependentContext()) {
+    // Under -fmodules-codegen, codegen is performed for all defined functions.
+    // When building a C++ Modules TS module interface unit, a strong definition
+    // in the module interface is provided by the compilation of that module
+    // interface unit, not by its users. (Inline functions are still emitted
+    // in module users.)
+    ModulesCodegen =
+        Writer->Context->getLangOpts().ModulesCodegen ||
+        (Writer->WritingModule->Kind == Module::ModuleInterfaceUnit &&
+         Writer->Context->GetGVALinkageForFunction(FD) == GVA_StrongExternal);
+  }
   Record->push_back(ModulesCodegen);
   if (ModulesCodegen)
     Writer->ModularCodegenDecls.push_back(Writer->GetDeclRef(FD));
diff --git a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterStmt.cpp
index 90a732e575e2e..6971339663f09 100644
--- a/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -286,7 +286,7 @@ void ASTStmtWriter::VisitMSAsmStmt(MSAsmStmt *S) {
   }
 
   // Outputs
-  for (unsigned I = 0, N = S->getNumOutputs(); I != N; ++I) {      
+  for (unsigned I = 0, N = S->getNumOutputs(); I != N; ++I) {
     Record.AddStmt(S->getOutputExpr(I));
     Record.AddString(S->getOutputConstraint(I));
   }
@@ -300,29 +300,48 @@ void ASTStmtWriter::VisitMSAsmStmt(MSAsmStmt *S) {
   Code = serialization::STMT_MSASM;
 }
 
-void ASTStmtWriter::VisitCoroutineBodyStmt(CoroutineBodyStmt *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+void ASTStmtWriter::VisitCoroutineBodyStmt(CoroutineBodyStmt *CoroStmt) {
+  VisitStmt(CoroStmt);
+  Record.push_back(CoroStmt->getParamMoves().size());
+  for (Stmt *S : CoroStmt->children())
+    Record.AddStmt(S);
+  Code = serialization::STMT_COROUTINE_BODY;
 }
 
 void ASTStmtWriter::VisitCoreturnStmt(CoreturnStmt *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+  VisitStmt(S);
+  Record.AddSourceLocation(S->getKeywordLoc());
+  Record.AddStmt(S->getOperand());
+  Record.AddStmt(S->getPromiseCall());
+  Record.push_back(S->isImplicit());
+  Code = serialization::STMT_CORETURN;
+}
+
+void ASTStmtWriter::VisitCoroutineSuspendExpr(CoroutineSuspendExpr *E) {
+  VisitExpr(E);
+  Record.AddSourceLocation(E->getKeywordLoc());
+  for (Stmt *S : E->children())
+    Record.AddStmt(S);
+  Record.AddStmt(E->getOpaqueValue());
 }
 
-void ASTStmtWriter::VisitCoawaitExpr(CoawaitExpr *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+void ASTStmtWriter::VisitCoawaitExpr(CoawaitExpr *E) {
+  VisitCoroutineSuspendExpr(E);
+  Record.push_back(E->isImplicit());
+  Code = serialization::EXPR_COAWAIT;
 }
 
-void ASTStmtWriter::VisitDependentCoawaitExpr(DependentCoawaitExpr *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+void ASTStmtWriter::VisitCoyieldExpr(CoyieldExpr *E) {
+  VisitCoroutineSuspendExpr(E);
+  Code = serialization::EXPR_COYIELD;
 }
 
-void ASTStmtWriter::VisitCoyieldExpr(CoyieldExpr *S) {
-  // FIXME: Implement coroutine serialization.
-  llvm_unreachable("unimplemented");
+void ASTStmtWriter::VisitDependentCoawaitExpr(DependentCoawaitExpr *E) {
+  VisitExpr(E);
+  Record.AddSourceLocation(E->getKeywordLoc());
+  for (Stmt *S : E->children())
+    Record.AddStmt(S);
+  Code = serialization::EXPR_DEPENDENT_COAWAIT;
 }
 
 void ASTStmtWriter::VisitCapturedStmt(CapturedStmt *S) {
@@ -1963,6 +1982,25 @@ void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) {
     Record.AddStmt(E);
 }
 
+void OMPClauseWriter::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) {
+  Record.push_back(C->varlist_size());
+  VisitOMPClauseWithPostUpdate(C);
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getColonLoc());
+  Record.AddNestedNameSpecifierLoc(C->getQualifierLoc());
+  Record.AddDeclarationNameInfo(C->getNameInfo());
+  for (auto *VE : C->varlists())
+    Record.AddStmt(VE);
+  for (auto *VE : C->privates())
+    Record.AddStmt(VE);
+  for (auto *E : C->lhs_exprs())
+    Record.AddStmt(E);
+  for (auto *E : C->rhs_exprs())
+    Record.AddStmt(E);
+  for (auto *E : C->reduction_ops())
+    Record.AddStmt(E);
+}
+
 void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) {
   Record.push_back(C->varlist_size());
   VisitOMPClauseWithPostUpdate(C);
@@ -2440,6 +2478,7 @@ void ASTStmtWriter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
 
 void ASTStmtWriter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) {
   VisitStmt(D);
+  Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
   Code = serialization::STMT_OMP_TASKGROUP_DIRECTIVE;
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index 48d6cd8a527c9..097d4198800d4 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -50,8 +50,10 @@ bool BuiltinFunctionChecker::evalCall(const CallExpr *CE,
     state = state->assume(ArgSVal.castAs(), true);
     // FIXME: do we want to warn here? Not right now. The most reports might
     // come from infeasible paths, thus being false positives.
-    if (!state)
+    if (!state) {
+      C.generateSink(C.getState(), C.getPredecessor());
       return true;
+    }
 
     C.addTransition(state);
     return true;
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
index 60d60bc074eb0..2759240dd2768 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -39,7 +39,7 @@ add_clang_library(clangStaticAnalyzerCheckers
   GenericTaintChecker.cpp
   GTestChecker.cpp
   IdenticalExprChecker.cpp
-  IteratorPastEndChecker.cpp
+  IteratorChecker.cpp
   IvarInvalidationChecker.cpp
   LLVMConventionsChecker.cpp
   LocalizationChecker.cpp
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 32e3ce9270aa9..77c24629d71e5 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -120,6 +120,7 @@ class CStringChecker : public Checker< eval::Call,
   void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
   void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
   void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
+  void evalMemset(CheckerContext &C, const CallExpr *CE) const;
 
   // Utility methods
   std::pair
@@ -1999,6 +2000,54 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C,
   C.addTransition(State);
 }
 
+void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const {
+  if (CE->getNumArgs() != 3)
+    return;
+
+  CurrentFunctionDescription = "memory set function";
+
+  const Expr *Mem = CE->getArg(0);
+  const Expr *Size = CE->getArg(2);
+  ProgramStateRef State = C.getState();
+
+  // See if the size argument is zero.
+  const LocationContext *LCtx = C.getLocationContext();
+  SVal SizeVal = State->getSVal(Size, LCtx);
+  QualType SizeTy = Size->getType();
+
+  ProgramStateRef StateZeroSize, StateNonZeroSize;
+  std::tie(StateZeroSize, StateNonZeroSize) =
+    assumeZero(C, State, SizeVal, SizeTy);
+
+  // Get the value of the memory area.
+  SVal MemVal = State->getSVal(Mem, LCtx);
+
+  // If the size is zero, there won't be any actual memory access, so
+  // just bind the return value to the Mem buffer and return.
+  if (StateZeroSize && !StateNonZeroSize) {
+    StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, MemVal);
+    C.addTransition(StateZeroSize);
+    return;
+  }
+
+  // Ensure the memory area is not null.
+  // If it is NULL there will be a NULL pointer dereference.
+  State = checkNonNull(C, StateNonZeroSize, Mem, MemVal);
+  if (!State)
+    return;
+
+  State = CheckBufferAccess(C, State, Size, Mem);
+  if (!State)
+    return;
+  State = InvalidateBuffer(C, State, Mem, C.getSVal(Mem),
+                           /*IsSourceBuffer*/false, Size);
+  if (!State)
+    return;
+
+  State = State->BindExpr(CE, LCtx, MemVal);
+  C.addTransition(State);
+}
+
 static bool isCPPStdLibraryFunction(const FunctionDecl *FD, StringRef Name) {
   IdentifierInfo *II = FD->getIdentifier();
   if (!II)
@@ -2032,6 +2081,8 @@ bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
     evalFunction =  &CStringChecker::evalMemcmp;
   else if (C.isCLibraryFunction(FDecl, "memmove"))
     evalFunction =  &CStringChecker::evalMemmove;
+  else if (C.isCLibraryFunction(FDecl, "memset"))
+    evalFunction =  &CStringChecker::evalMemset;
   else if (C.isCLibraryFunction(FDecl, "strcpy"))
     evalFunction =  &CStringChecker::evalStrcpy;
   else if (C.isCLibraryFunction(FDecl, "strncpy"))
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
index 1885b0e39203c..83955c586b688 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -73,12 +73,17 @@ void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
   bool ReportNormalClones = Mgr.getAnalyzerOptions().getBooleanOption(
       "ReportNormalClones", true, this);
 
+  StringRef IgnoredFilesPattern = Mgr.getAnalyzerOptions().getOptionAsString(
+      "IgnoredFilesPattern", "", this);
+
   // Let the CloneDetector create a list of clones from all the analyzed
   // statements. We don't filter for matching variable patterns at this point
   // because reportSuspiciousClones() wants to search them for errors.
   std::vector AllCloneGroups;
 
-  Detector.findClones(AllCloneGroups, RecursiveCloneTypeIIConstraint(),
+  Detector.findClones(AllCloneGroups,
+                      FilenamePatternConstraint(IgnoredFilesPattern),
+                      RecursiveCloneTypeIIConstraint(),
                       MinComplexityConstraint(MinComplexity),
                       MinGroupSizeConstraint(2), OnlyLargestCloneConstraint());
 
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
index 8ca2a24cffe74..f7b5f61cfb8aa 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
@@ -189,6 +189,7 @@ class DeadStoreObs : public LiveVariables::Observer {
 
       case DeadIncrement:
         BugType = "Dead increment";
+        LLVM_FALLTHROUGH;
       case Standard:
         if (!BugType) BugType = "Dead assignment";
         os << "Value stored to '" << *V << "' is never read";
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index b1a54e77951bf..883c6a663291d 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -65,21 +65,8 @@ class GenericTaintChecker : public Checker< check::PostStmt,
   /// and thus, is tainted.
   static bool isStdin(const Expr *E, CheckerContext &C);
 
-  /// This is called from getPointedToSymbol() to resolve symbol references for
-  /// the region underlying a LazyCompoundVal. This is the default binding
-  /// for the LCV, which could be a conjured symbol from a function call that
-  /// initialized the region. It only returns the conjured symbol if the LCV
-  /// covers the entire region, e.g. we avoid false positives by not returning
-  /// a default bindingc for an entire struct if the symbol for only a single
-  /// field or element within it is requested.
-  // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so
-  // that they are also appropriately tainted.
-  static SymbolRef getLCVSymbol(CheckerContext &C,
-                                nonloc::LazyCompoundVal &LCV);
-
-  /// \brief Given a pointer argument, get the symbol of the value it contains
-  /// (points to).
-  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
+  /// \brief Given a pointer argument, return the value it points to.
+  static Optional getPointedToSVal(CheckerContext &C, const Expr *Arg);
 
   /// Functions defining the attack surface.
   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
@@ -186,9 +173,14 @@ class GenericTaintChecker : public Checker< check::PostStmt,
     static inline bool isTaintedOrPointsToTainted(const Expr *E,
                                                   ProgramStateRef State,
                                                   CheckerContext &C) {
-      return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
-              (E->getType().getTypePtr()->isPointerType() &&
-               State->isTainted(getPointedToSymbol(C, E))));
+      if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
+        return true;
+
+      if (!E->getType().getTypePtr()->isPointerType())
+        return false;
+
+      Optional V = getPointedToSVal(C, E);
+      return (V && State->isTainted(*V));
     }
 
     /// \brief Pre-process a function which propagates taint according to the
@@ -400,9 +392,9 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
     if (CE->getNumArgs() < (ArgNum + 1))
       return false;
     const Expr* Arg = CE->getArg(ArgNum);
-    SymbolRef Sym = getPointedToSymbol(C, Arg);
-    if (Sym)
-      State = State->addTaint(Sym);
+    Optional V = getPointedToSVal(C, Arg);
+    if (V)
+      State = State->addTaint(*V);
   }
 
   // Clear up the taint info from the state.
@@ -473,47 +465,20 @@ bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
   return false;
 }
 
-SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C,
-                                            nonloc::LazyCompoundVal &LCV) {
-  StoreManager &StoreMgr = C.getStoreManager();
-
-  // getLCVSymbol() is reached in a PostStmt so we can always expect a default
-  // binding to exist if one is present.
-  if (Optional binding = StoreMgr.getDefaultBinding(LCV)) {
-    SymbolRef Sym = binding->getAsSymbol();
-    if (!Sym)
-      return nullptr;
-
-    // If the LCV covers an entire base region return the default conjured symbol.
-    if (LCV.getRegion() == LCV.getRegion()->getBaseRegion())
-      return Sym;
-  }
-
-  // Otherwise, return a nullptr as there's not yet a functional way to taint
-  // sub-regions of LCVs.
-  return nullptr;
-}
-
-SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
-                                                  const Expr* Arg) {
+Optional GenericTaintChecker::getPointedToSVal(CheckerContext &C,
+                                            const Expr* Arg) {
   ProgramStateRef State = C.getState();
   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
   if (AddrVal.isUnknownOrUndef())
-    return nullptr;
+    return None;
 
   Optional AddrLoc = AddrVal.getAs();
   if (!AddrLoc)
-    return nullptr;
+    return None;
 
   const PointerType *ArgTy =
     dyn_cast(Arg->getType().getCanonicalType().getTypePtr());
-  SVal Val = State->getSVal(*AddrLoc,
-                            ArgTy ? ArgTy->getPointeeType(): QualType());
-
-  if (auto LCV = Val.getAs())
-    return getLCVSymbol(C, *LCV);
-
-  return Val.getAsSymbol();
+  return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
 }
 
 ProgramStateRef
@@ -633,9 +598,9 @@ ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
     // The arguments are pointer arguments. The data they are pointing at is
     // tainted after the call.
     const Expr* Arg = CE->getArg(i);
-        SymbolRef Sym = getPointedToSymbol(C, Arg);
-    if (Sym)
-      State = State->addTaint(Sym);
+    Optional V = getPointedToSVal(C, Arg);
+    if (V)
+      State = State->addTaint(*V);
   }
   return State;
 }
@@ -710,10 +675,10 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
 
   // Check for taint.
   ProgramStateRef State = C.getState();
-  const SymbolRef PointedToSym = getPointedToSymbol(C, E);
+  Optional PointedToSVal = getPointedToSVal(C, E);
   SVal TaintedSVal;
-  if (State->isTainted(PointedToSym))
-    TaintedSVal = nonloc::SymbolVal(PointedToSym);
+  if (PointedToSVal && State->isTainted(*PointedToSVal))
+    TaintedSVal = *PointedToSVal;
   else if (State->isTainted(E, C.getLocationContext()))
     TaintedSVal = C.getSVal(E);
   else
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
new file mode 100644
index 0000000000000..0f9b749506fa2
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
@@ -0,0 +1,833 @@
+//===-- IteratorChecker.cpp ---------------------------------------*- C++ -*--//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a checker for using iterators outside their range (past end). Usage
+// means here dereferencing, incrementing etc.
+//
+//===----------------------------------------------------------------------===//
+//
+// In the code, iterator can be represented as a:
+// * type-I: typedef-ed pointer. Operations over such iterator, such as
+//           comparisons or increments, are modeled straightforwardly by the
+//           analyzer.
+// * type-II: structure with its method bodies available.  Operations over such
+//            iterator are inlined by the analyzer, and results of modeling
+//            these operations are exposing implementation details of the
+//            iterators, which is not necessarily helping.
+// * type-III: completely opaque structure. Operations over such iterator are
+//             modeled conservatively, producing conjured symbols everywhere.
+//
+// To handle all these types in a common way we introduce a structure called
+// IteratorPosition which is an abstraction of the position the iterator
+// represents using symbolic expressions. The checker handles all the
+// operations on this structure.
+//
+// Additionally, depending on the circumstances, operators of types II and III
+// can be represented as:
+// * type-IIa, type-IIIa: conjured structure symbols - when returned by value
+//                        from conservatively evaluated methods such as
+//                        `.begin()`.
+// * type-IIb, type-IIIb: memory regions of iterator-typed objects, such as
+//                        variables or temporaries, when the iterator object is
+//                        currently treated as an lvalue.
+// * type-IIc, type-IIIc: compound values of iterator-typed objects, when the
+//                        iterator object is treated as an rvalue taken of a
+//                        particular lvalue, eg. a copy of "type-a" iterator
+//                        object, or an iterator that existed before the
+//                        analysis has started.
+//
+// To handle any of these three different representations stored in an SVal we
+// use setter and getters functions which separate the three cases. To store
+// them we use a pointer union of symbol and memory region.
+//
+// The checker works the following way: We record the past-end iterator for
+// all containers whenever their `.end()` is called. Since the Constraint
+// Manager cannot handle SVals we need to take over its role. We post-check
+// equality and non-equality comparisons and propagate the position of the
+// iterator to the other side of the comparison if it is past-end and we are in
+// the 'equal' branch (true-branch for `==` and false-branch for `!=`).
+//
+// In case of type-I or type-II iterators we get a concrete integer as a result
+// of the comparison (1 or 0) but in case of type-III we only get a Symbol. In
+// this latter case we record the symbol and reload it in evalAssume() and do
+// the propagation there. We also handle (maybe double) negated comparisons
+// which are represented in the form of (x == 0 or x !=0 ) where x is the
+// comparison itself.
+
+#include "ClangSACheckers.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+
+// Abstract position of an iterator. This helps to handle all three kinds
+// of operators in a common way by using a symbolic position.
+struct IteratorPosition {
+private:
+
+  // Container the iterator belongs to
+  const MemRegion *Cont;
+
+  // Abstract offset
+  SymbolRef Offset;
+
+  IteratorPosition(const MemRegion *C, SymbolRef Of)
+      : Cont(C), Offset(Of) {}
+
+public:
+  const MemRegion *getContainer() const { return Cont; }
+  SymbolRef getOffset() const { return Offset; }
+
+  static IteratorPosition getPosition(const MemRegion *C, SymbolRef Of) {
+    return IteratorPosition(C, Of);
+  }
+
+  IteratorPosition setTo(SymbolRef NewOf) const {
+    return IteratorPosition(Cont, NewOf);
+  }
+
+  bool operator==(const IteratorPosition &X) const {
+    return Cont == X.Cont && Offset == X.Offset;
+  }
+
+  bool operator!=(const IteratorPosition &X) const {
+    return Cont != X.Cont || Offset != X.Offset;
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const {
+    ID.AddPointer(Cont);
+    ID.Add(Offset);
+  }
+};
+
+typedef llvm::PointerUnion RegionOrSymbol;
+
+// Structure to record the symbolic end position of a container
+struct ContainerData {
+private:
+  SymbolRef End;
+
+  ContainerData(SymbolRef E) : End(E) {}
+
+public:
+  static ContainerData fromEnd(SymbolRef E) {
+    return ContainerData(E);
+  }
+
+  SymbolRef getEnd() const { return End; }
+
+  ContainerData newEnd(SymbolRef E) const { return ContainerData(E); }
+
+  bool operator==(const ContainerData &X) const {
+    return End == X.End;
+  }
+
+  bool operator!=(const ContainerData &X) const {
+    return End != X.End;
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const {
+    ID.Add(End);
+  }
+};
+
+// Structure fo recording iterator comparisons. We needed to retrieve the
+// original comparison expression in assumptions.
+struct IteratorComparison {
+private:
+  RegionOrSymbol Left, Right;
+  bool Equality;
+
+public:
+  IteratorComparison(RegionOrSymbol L, RegionOrSymbol R, bool Eq)
+      : Left(L), Right(R), Equality(Eq) {}
+
+  RegionOrSymbol getLeft() const { return Left; }
+  RegionOrSymbol getRight() const { return Right; }
+  bool isEquality() const { return Equality; }
+  bool operator==(const IteratorComparison &X) const {
+    return Left == X.Left && Right == X.Right && Equality == X.Equality;
+  }
+  bool operator!=(const IteratorComparison &X) const {
+    return Left != X.Left || Right != X.Right || Equality != X.Equality;
+  }
+  void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger(Equality); }
+};
+
+class IteratorChecker
+    : public Checker,
+                     check::DeadSymbols,
+                     eval::Assume> {
+
+  std::unique_ptr OutOfRangeBugType;
+
+  void handleComparison(CheckerContext &C, const SVal &RetVal, const SVal &LVal,
+                        const SVal &RVal, OverloadedOperatorKind Op) const;
+  void verifyDereference(CheckerContext &C, const SVal &Val) const;
+  void handleEnd(CheckerContext &C, const Expr *CE, const SVal &RetVal,
+                 const SVal &Cont) const;
+  void assignToContainer(CheckerContext &C, const Expr *CE, const SVal &RetVal,
+                         const MemRegion *Cont) const;
+  void reportOutOfRangeBug(const StringRef &Message, const SVal &Val,
+                           CheckerContext &C, ExplodedNode *ErrNode) const;
+
+public:
+  IteratorChecker();
+
+  enum CheckKind {
+    CK_IteratorRangeChecker,
+    CK_NumCheckKinds
+  };
+
+  DefaultBool ChecksEnabled[CK_NumCheckKinds];
+  CheckName CheckNames[CK_NumCheckKinds];
+
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkPostStmt(const MaterializeTemporaryExpr *MTE,
+                     CheckerContext &C) const;
+  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
+  ProgramStateRef evalAssume(ProgramStateRef State, SVal Cond,
+                             bool Assumption) const;
+};
+} // namespace
+
+REGISTER_MAP_WITH_PROGRAMSTATE(IteratorSymbolMap, SymbolRef, IteratorPosition)
+REGISTER_MAP_WITH_PROGRAMSTATE(IteratorRegionMap, const MemRegion *,
+                               IteratorPosition)
+
+REGISTER_MAP_WITH_PROGRAMSTATE(ContainerMap, const MemRegion *, ContainerData)
+
+REGISTER_MAP_WITH_PROGRAMSTATE(IteratorComparisonMap, const SymExpr *,
+                               IteratorComparison)
+
+namespace {
+
+bool isIteratorType(const QualType &Type);
+bool isIterator(const CXXRecordDecl *CRD);
+bool isEndCall(const FunctionDecl *Func);
+bool isSimpleComparisonOperator(OverloadedOperatorKind OK);
+bool isDereferenceOperator(OverloadedOperatorKind OK);
+BinaryOperator::Opcode getOpcode(const SymExpr *SE);
+const RegionOrSymbol getRegionOrSymbol(const SVal &Val);
+const ProgramStateRef processComparison(ProgramStateRef State,
+                                        RegionOrSymbol LVal,
+                                        RegionOrSymbol RVal, bool Equal);
+const ProgramStateRef saveComparison(ProgramStateRef State,
+                                     const SymExpr *Condition, const SVal &LVal,
+                                     const SVal &RVal, bool Eq);
+const IteratorComparison *loadComparison(ProgramStateRef State,
+                                         const SymExpr *Condition);
+SymbolRef getContainerEnd(ProgramStateRef State, const MemRegion *Cont);
+ProgramStateRef createContainerEnd(ProgramStateRef State, const MemRegion *Cont,
+                                   const SymbolRef Sym);
+const IteratorPosition *getIteratorPosition(ProgramStateRef State,
+                                            const SVal &Val);
+const IteratorPosition *getIteratorPosition(ProgramStateRef State,
+                                            RegionOrSymbol RegOrSym);
+ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal &Val,
+                                    const IteratorPosition &Pos);
+ProgramStateRef setIteratorPosition(ProgramStateRef State,
+                                    RegionOrSymbol RegOrSym,
+                                    const IteratorPosition &Pos);
+ProgramStateRef removeIteratorPosition(ProgramStateRef State, const SVal &Val);
+ProgramStateRef adjustIteratorPosition(ProgramStateRef State,
+                                       RegionOrSymbol RegOrSym,
+                                       const IteratorPosition &Pos, bool Equal);
+ProgramStateRef relateIteratorPositions(ProgramStateRef State,
+                                        const IteratorPosition &Pos1,
+                                        const IteratorPosition &Pos2,
+                                        bool Equal);
+const ContainerData *getContainerData(ProgramStateRef State,
+                                      const MemRegion *Cont);
+ProgramStateRef setContainerData(ProgramStateRef State, const MemRegion *Cont,
+                                 const ContainerData &CData);
+bool isOutOfRange(ProgramStateRef State, const IteratorPosition &Pos);
+} // namespace
+
+IteratorChecker::IteratorChecker() {
+  OutOfRangeBugType.reset(
+      new BugType(this, "Iterator out of range", "Misuse of STL APIs"));
+  OutOfRangeBugType->setSuppressOnSink(true);
+}
+
+void IteratorChecker::checkPreCall(const CallEvent &Call,
+                                   CheckerContext &C) const {
+  // Check for out of range access
+  const auto *Func = dyn_cast_or_null(Call.getDecl());
+  if (!Func)
+    return;
+
+  if (Func->isOverloadedOperator()) {
+    if (ChecksEnabled[CK_IteratorRangeChecker] &&
+               isDereferenceOperator(Func->getOverloadedOperator())) {
+      // Check for dereference of out-of-range iterators
+      if (const auto *InstCall = dyn_cast(&Call)) {
+        verifyDereference(C, InstCall->getCXXThisVal());
+      } else {
+        verifyDereference(C, Call.getArgSVal(0));
+      }
+    }
+  }
+}
+
+void IteratorChecker::checkPostCall(const CallEvent &Call,
+                                    CheckerContext &C) const {
+  // Record new iterator positions and iterator position changes
+  const auto *Func = dyn_cast_or_null(Call.getDecl());
+  if (!Func)
+    return;
+
+  if (Func->isOverloadedOperator()) {
+    const auto Op = Func->getOverloadedOperator();
+    if (isSimpleComparisonOperator(Op)) {
+      if (const auto *InstCall = dyn_cast(&Call)) {
+        handleComparison(C, Call.getReturnValue(), InstCall->getCXXThisVal(),
+                         Call.getArgSVal(0), Op);
+      } else {
+        handleComparison(C, Call.getReturnValue(), Call.getArgSVal(0),
+                         Call.getArgSVal(1), Op);
+      }
+    }
+  } else {
+    const auto *OrigExpr = Call.getOriginExpr();
+    if (!OrigExpr)
+      return;
+
+    if (!isIteratorType(Call.getResultType()))
+      return;
+
+    auto State = C.getState();
+    // Already bound to container?
+    if (getIteratorPosition(State, Call.getReturnValue()))
+      return;
+
+    if (const auto *InstCall = dyn_cast(&Call)) {
+      if (isEndCall(Func)) {
+        handleEnd(C, OrigExpr, Call.getReturnValue(),
+                  InstCall->getCXXThisVal());
+        return;
+      }
+    }
+
+    // Copy-like and move constructors
+    if (isa(&Call) && Call.getNumArgs() == 1) {
+      if (const auto *Pos = getIteratorPosition(State, Call.getArgSVal(0))) {
+        State = setIteratorPosition(State, Call.getReturnValue(), *Pos);
+        if (cast(Func)->isMoveConstructor()) {
+          State = removeIteratorPosition(State, Call.getArgSVal(0));
+        }
+        C.addTransition(State);
+        return;
+      }
+    }
+
+    // Assumption: if return value is an iterator which is not yet bound to a
+    //             container, then look for the first iterator argument, and
+    //             bind the return value to the same container. This approach
+    //             works for STL algorithms.
+    // FIXME: Add a more conservative mode
+    for (unsigned i = 0; i < Call.getNumArgs(); ++i) {
+      if (isIteratorType(Call.getArgExpr(i)->getType())) {
+        if (const auto *Pos = getIteratorPosition(State, Call.getArgSVal(i))) {
+          assignToContainer(C, OrigExpr, Call.getReturnValue(),
+                            Pos->getContainer());
+          return;
+        }
+      }
+    }
+  }
+}
+
+void IteratorChecker::checkPostStmt(const MaterializeTemporaryExpr *MTE,
+                                    CheckerContext &C) const {
+  /* Transfer iterator state to temporary objects */
+  auto State = C.getState();
+  const auto *LCtx = C.getLocationContext();
+  const auto *Pos =
+      getIteratorPosition(State, State->getSVal(MTE->GetTemporaryExpr(), LCtx));
+  if (!Pos)
+    return;
+  State = setIteratorPosition(State, State->getSVal(MTE, LCtx), *Pos);
+  C.addTransition(State);
+}
+
+void IteratorChecker::checkDeadSymbols(SymbolReaper &SR,
+                                       CheckerContext &C) const {
+  // Cleanup
+  auto State = C.getState();
+
+  auto RegionMap = State->get();
+  for (const auto Reg : RegionMap) {
+    if (!SR.isLiveRegion(Reg.first)) {
+      State = State->remove(Reg.first);
+    }
+  }
+
+  auto SymbolMap = State->get();
+  for (const auto Sym : SymbolMap) {
+    if (!SR.isLive(Sym.first)) {
+      State = State->remove(Sym.first);
+    }
+  }
+
+  auto ContMap = State->get();
+  for (const auto Cont : ContMap) {
+    if (!SR.isLiveRegion(Cont.first)) {
+      State = State->remove(Cont.first);
+    }
+  }
+
+  auto ComparisonMap = State->get();
+  for (const auto Comp : ComparisonMap) {
+    if (!SR.isLive(Comp.first)) {
+      State = State->remove(Comp.first);
+    }
+  }
+}
+
+ProgramStateRef IteratorChecker::evalAssume(ProgramStateRef State, SVal Cond,
+                                            bool Assumption) const {
+  // Load recorded comparison and transfer iterator state between sides
+  // according to comparison operator and assumption
+  const auto *SE = Cond.getAsSymExpr();
+  if (!SE)
+    return State;
+
+  auto Opc = getOpcode(SE);
+  if (Opc != BO_EQ && Opc != BO_NE)
+    return State;
+
+  bool Negated = false;
+  const auto *Comp = loadComparison(State, SE);
+  if (!Comp) {
+    // Try negated comparison, which is a SymExpr to 0 integer comparison
+    const auto *SIE = dyn_cast(SE);
+    if (!SIE)
+      return State;
+
+    if (SIE->getRHS() != 0)
+      return State;
+
+    SE = SIE->getLHS();
+    Negated = SIE->getOpcode() == BO_EQ; // Equal to zero means negation
+    Opc = getOpcode(SE);
+    if (Opc != BO_EQ && Opc != BO_NE)
+      return State;
+
+    Comp = loadComparison(State, SE);
+    if (!Comp)
+      return State;
+  }
+
+  return processComparison(State, Comp->getLeft(), Comp->getRight(),
+                           (Comp->isEquality() == Assumption) != Negated);
+}
+
+void IteratorChecker::handleComparison(CheckerContext &C, const SVal &RetVal,
+                                       const SVal &LVal, const SVal &RVal,
+                                       OverloadedOperatorKind Op) const {
+  // Record the operands and the operator of the comparison for the next
+  // evalAssume, if the result is a symbolic expression. If it is a concrete
+  // value (only one branch is possible), then transfer the state between
+  // the operands according to the operator and the result
+  auto State = C.getState();
+  if (const auto *Condition = RetVal.getAsSymbolicExpression()) {
+    const auto *LPos = getIteratorPosition(State, LVal);
+    const auto *RPos = getIteratorPosition(State, RVal);
+    if (!LPos && !RPos)
+      return;
+    State = saveComparison(State, Condition, LVal, RVal, Op == OO_EqualEqual);
+    C.addTransition(State);
+  } else if (const auto TruthVal = RetVal.getAs()) {
+    if ((State = processComparison(
+             State, getRegionOrSymbol(LVal), getRegionOrSymbol(RVal),
+             (Op == OO_EqualEqual) == (TruthVal->getValue() != 0)))) {
+      C.addTransition(State);
+    } else {
+      C.generateSink(State, C.getPredecessor());
+    }
+  }
+}
+
+void IteratorChecker::verifyDereference(CheckerContext &C,
+                                        const SVal &Val) const {
+  auto State = C.getState();
+  const auto *Pos = getIteratorPosition(State, Val);
+  if (Pos && isOutOfRange(State, *Pos)) {
+    // If I do not put a tag here, some range tests will fail
+    static CheckerProgramPointTag Tag("IteratorRangeChecker",
+                                      "IteratorOutOfRange");
+    auto *N = C.generateNonFatalErrorNode(State, &Tag);
+    if (!N) {
+      return;
+    }
+    reportOutOfRangeBug("Iterator accessed outside of its range.", Val, C, N);
+  }
+}
+
+void IteratorChecker::handleEnd(CheckerContext &C, const Expr *CE,
+                                const SVal &RetVal, const SVal &Cont) const {
+  const auto *ContReg = Cont.getAsRegion();
+  if (!ContReg)
+    return;
+
+  while (const auto *CBOR = ContReg->getAs()) {
+    ContReg = CBOR->getSuperRegion();
+  }
+
+  // If the container already has an end symbol then use it. Otherwise first
+  // create a new one.
+  auto State = C.getState();
+  auto EndSym = getContainerEnd(State, ContReg);
+  if (!EndSym) {
+    auto &SymMgr = C.getSymbolManager();
+    EndSym = SymMgr.conjureSymbol(CE, C.getLocationContext(),
+                                  C.getASTContext().LongTy, C.blockCount());
+    State = createContainerEnd(State, ContReg, EndSym);
+  }
+  State = setIteratorPosition(State, RetVal,
+                              IteratorPosition::getPosition(ContReg, EndSym));
+  C.addTransition(State);
+}
+
+void IteratorChecker::assignToContainer(CheckerContext &C, const Expr *CE,
+                                        const SVal &RetVal,
+                                        const MemRegion *Cont) const {
+  while (const auto *CBOR = Cont->getAs()) {
+    Cont = CBOR->getSuperRegion();
+  }
+
+  auto State = C.getState();
+  auto &SymMgr = C.getSymbolManager();
+  auto Sym = SymMgr.conjureSymbol(CE, C.getLocationContext(),
+                                  C.getASTContext().LongTy, C.blockCount());
+  State = setIteratorPosition(State, RetVal,
+                              IteratorPosition::getPosition(Cont, Sym));
+  C.addTransition(State);
+}
+
+void IteratorChecker::reportOutOfRangeBug(const StringRef &Message,
+                                          const SVal &Val, CheckerContext &C,
+                                          ExplodedNode *ErrNode) const {
+  auto R = llvm::make_unique(*OutOfRangeBugType, Message, ErrNode);
+  R->markInteresting(Val);
+  C.emitReport(std::move(R));
+}
+
+namespace {
+
+bool isGreaterOrEqual(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2);
+bool compare(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2,
+             BinaryOperator::Opcode Opc);
+
+bool isIteratorType(const QualType &Type) {
+  if (Type->isPointerType())
+    return true;
+
+  const auto *CRD = Type->getUnqualifiedDesugaredType()->getAsCXXRecordDecl();
+  return isIterator(CRD);
+}
+
+bool isIterator(const CXXRecordDecl *CRD) {
+  if (!CRD)
+    return false;
+
+  const auto Name = CRD->getName();
+  if (!(Name.endswith_lower("iterator") || Name.endswith_lower("iter") ||
+        Name.endswith_lower("it")))
+    return false;
+
+  bool HasCopyCtor = false, HasCopyAssign = true, HasDtor = false,
+       HasPreIncrOp = false, HasPostIncrOp = false, HasDerefOp = false;
+  for (const auto *Method : CRD->methods()) {
+    if (const auto *Ctor = dyn_cast(Method)) {
+      if (Ctor->isCopyConstructor()) {
+        HasCopyCtor = !Ctor->isDeleted() && Ctor->getAccess() == AS_public;
+      }
+      continue;
+    }
+    if (const auto *Dtor = dyn_cast(Method)) {
+      HasDtor = !Dtor->isDeleted() && Dtor->getAccess() == AS_public;
+      continue;
+    }
+    if (Method->isCopyAssignmentOperator()) {
+      HasCopyAssign = !Method->isDeleted() && Method->getAccess() == AS_public;
+      continue;
+    }
+    if (!Method->isOverloadedOperator())
+      continue;
+    const auto OPK = Method->getOverloadedOperator();
+    if (OPK == OO_PlusPlus) {
+      HasPreIncrOp = HasPreIncrOp || (Method->getNumParams() == 0);
+      HasPostIncrOp = HasPostIncrOp || (Method->getNumParams() == 1);
+      continue;
+    }
+    if (OPK == OO_Star) {
+      HasDerefOp = (Method->getNumParams() == 0);
+      continue;
+    }
+  }
+
+  return HasCopyCtor && HasCopyAssign && HasDtor && HasPreIncrOp &&
+         HasPostIncrOp && HasDerefOp;
+}
+
+bool isEndCall(const FunctionDecl *Func) {
+  const auto *IdInfo = Func->getIdentifier();
+  if (!IdInfo)
+    return false;
+  return IdInfo->getName().endswith_lower("end");
+}
+
+bool isSimpleComparisonOperator(OverloadedOperatorKind OK) {
+  return OK == OO_EqualEqual || OK == OO_ExclaimEqual;
+}
+
+bool isDereferenceOperator(OverloadedOperatorKind OK) {
+  return OK == OO_Star || OK == OO_Arrow || OK == OO_ArrowStar ||
+         OK == OO_Subscript;
+}
+
+BinaryOperator::Opcode getOpcode(const SymExpr *SE) {
+  if (const auto *BSE = dyn_cast(SE)) {
+    return BSE->getOpcode();
+  } else if (const auto *SC = dyn_cast(SE)) {
+    const auto *COE = dyn_cast(SC->getStmt());
+    if (!COE)
+      return BO_Comma; // Extremal value, neither EQ nor NE
+    if (COE->getOperator() == OO_EqualEqual) {
+      return BO_EQ;
+    } else if (COE->getOperator() == OO_ExclaimEqual) {
+      return BO_NE;
+    }
+    return BO_Comma; // Extremal value, neither EQ nor NE
+  }
+  return BO_Comma; // Extremal value, neither EQ nor NE
+}
+
+const RegionOrSymbol getRegionOrSymbol(const SVal &Val) {
+  if (const auto Reg = Val.getAsRegion()) {
+    return Reg;
+  } else if (const auto Sym = Val.getAsSymbol()) {
+    return Sym;
+  } else if (const auto LCVal = Val.getAs()) {
+    return LCVal->getRegion();
+  }
+  return RegionOrSymbol();
+}
+
+const ProgramStateRef processComparison(ProgramStateRef State,
+                                        RegionOrSymbol LVal,
+                                        RegionOrSymbol RVal, bool Equal) {
+  const auto *LPos = getIteratorPosition(State, LVal);
+  const auto *RPos = getIteratorPosition(State, RVal);
+  if (LPos && !RPos) {
+    State = adjustIteratorPosition(State, RVal, *LPos, Equal);
+  } else if (!LPos && RPos) {
+    State = adjustIteratorPosition(State, LVal, *RPos, Equal);
+  } else if (LPos && RPos) {
+    State = relateIteratorPositions(State, *LPos, *RPos, Equal);
+  }
+  return State;
+}
+
+const ProgramStateRef saveComparison(ProgramStateRef State,
+                                     const SymExpr *Condition, const SVal &LVal,
+                                     const SVal &RVal, bool Eq) {
+  const auto Left = getRegionOrSymbol(LVal);
+  const auto Right = getRegionOrSymbol(RVal);
+  if (!Left || !Right)
+    return State;
+  return State->set(Condition,
+                                           IteratorComparison(Left, Right, Eq));
+}
+
+const IteratorComparison *loadComparison(ProgramStateRef State,
+                                         const SymExpr *Condition) {
+  return State->get(Condition);
+}
+
+SymbolRef getContainerEnd(ProgramStateRef State, const MemRegion *Cont) {
+  const auto *CDataPtr = getContainerData(State, Cont);
+  if (!CDataPtr)
+    return nullptr;
+
+  return CDataPtr->getEnd();
+}
+
+ProgramStateRef createContainerEnd(ProgramStateRef State, const MemRegion *Cont,
+                                   const SymbolRef Sym) {
+  // Only create if it does not exist
+  const auto *CDataPtr = getContainerData(State, Cont);
+  if (CDataPtr) {
+    if (CDataPtr->getEnd()) {
+      return State;
+    } else {
+      const auto CData = CDataPtr->newEnd(Sym);
+      return setContainerData(State, Cont, CData);
+    }
+  } else {
+    const auto CData = ContainerData::fromEnd(Sym);
+    return setContainerData(State, Cont, CData);
+  }
+}
+
+const ContainerData *getContainerData(ProgramStateRef State,
+                                      const MemRegion *Cont) {
+  return State->get(Cont);
+}
+
+ProgramStateRef setContainerData(ProgramStateRef State, const MemRegion *Cont,
+                                 const ContainerData &CData) {
+  return State->set(Cont, CData);
+}
+
+const IteratorPosition *getIteratorPosition(ProgramStateRef State,
+                                            const SVal &Val) {
+  if (const auto Reg = Val.getAsRegion()) {
+    return State->get(Reg);
+  } else if (const auto Sym = Val.getAsSymbol()) {
+    return State->get(Sym);
+  } else if (const auto LCVal = Val.getAs()) {
+    return State->get(LCVal->getRegion());
+  }
+  return nullptr;
+}
+
+const IteratorPosition *getIteratorPosition(ProgramStateRef State,
+                                            RegionOrSymbol RegOrSym) {
+  if (RegOrSym.is()) {
+    return State->get(RegOrSym.get());
+  } else if (RegOrSym.is()) {
+    return State->get(RegOrSym.get());
+  }
+  return nullptr;
+}
+
+ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal &Val,
+                                    const IteratorPosition &Pos) {
+  if (const auto Reg = Val.getAsRegion()) {
+    return State->set(Reg, Pos);
+  } else if (const auto Sym = Val.getAsSymbol()) {
+    return State->set(Sym, Pos);
+  } else if (const auto LCVal = Val.getAs()) {
+    return State->set(LCVal->getRegion(), Pos);
+  }
+  return nullptr;
+}
+
+ProgramStateRef setIteratorPosition(ProgramStateRef State,
+                                    RegionOrSymbol RegOrSym,
+                                    const IteratorPosition &Pos) {
+  if (RegOrSym.is()) {
+    return State->set(RegOrSym.get(),
+                                         Pos);
+  } else if (RegOrSym.is()) {
+    return State->set(RegOrSym.get(), Pos);
+  }
+  return nullptr;
+}
+
+ProgramStateRef removeIteratorPosition(ProgramStateRef State, const SVal &Val) {
+  if (const auto Reg = Val.getAsRegion()) {
+    return State->remove(Reg);
+  } else if (const auto Sym = Val.getAsSymbol()) {
+    return State->remove(Sym);
+  } else if (const auto LCVal = Val.getAs()) {
+    return State->remove(LCVal->getRegion());
+  }
+  return nullptr;
+}
+
+ProgramStateRef adjustIteratorPosition(ProgramStateRef State,
+                                       RegionOrSymbol RegOrSym,
+                                       const IteratorPosition &Pos,
+                                       bool Equal) {
+  if (Equal) {
+    return setIteratorPosition(State, RegOrSym, Pos);
+  } else {
+    return State;
+  }
+}
+
+ProgramStateRef relateIteratorPositions(ProgramStateRef State,
+                                        const IteratorPosition &Pos1,
+                                        const IteratorPosition &Pos2,
+                                        bool Equal) {
+  // Try to compare them and get a defined value
+  auto &SVB = State->getStateManager().getSValBuilder();
+  const auto comparison =
+      SVB.evalBinOp(State, BO_EQ, nonloc::SymbolVal(Pos1.getOffset()),
+                    nonloc::SymbolVal(Pos2.getOffset()), SVB.getConditionType())
+          .getAs();
+  if (comparison) {
+    return State->assume(*comparison, Equal);
+  }
+
+  return State;
+}
+
+bool isOutOfRange(ProgramStateRef State, const IteratorPosition &Pos) {
+  const auto *Cont = Pos.getContainer();
+  const auto *CData = getContainerData(State, Cont);
+  if (!CData)
+    return false;
+
+  // Out of range means less than the begin symbol or greater or equal to the
+  // end symbol.
+
+  const auto End = CData->getEnd();
+  if (End) {
+    if (isGreaterOrEqual(State, Pos.getOffset(), End)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool isGreaterOrEqual(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2) {
+  return compare(State, Sym1, Sym2, BO_GE);
+}
+
+bool compare(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2,
+             BinaryOperator::Opcode Opc) {
+  auto &SMgr = State->getStateManager();
+  auto &SVB = SMgr.getSValBuilder();
+
+  const auto comparison =
+      SVB.evalBinOp(State, Opc, nonloc::SymbolVal(Sym1),
+                    nonloc::SymbolVal(Sym2), SVB.getConditionType())
+          .getAs();
+
+  if(comparison) {
+    return !!State->assume(*comparison, true);
+  }
+
+  return false;
+}
+
+} // namespace
+
+#define REGISTER_CHECKER(name)                                                 \
+  void ento::register##name(CheckerManager &Mgr) {                             \
+    auto *checker = Mgr.registerChecker();                    \
+    checker->ChecksEnabled[IteratorChecker::CK_##name] = true;                 \
+    checker->CheckNames[IteratorChecker::CK_##name] =                          \
+        Mgr.getCurrentCheckName();                                             \
+  }
+
+REGISTER_CHECKER(IteratorRangeChecker)
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
index af35c2b0e9914..655ce33390c9e 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
@@ -57,7 +57,7 @@ struct LocalizedState {
 };
 
 class NonLocalizedStringChecker
-    : public Checker> {
 
@@ -79,9 +79,10 @@ class NonLocalizedStringChecker
   void setNonLocalizedState(SVal S, CheckerContext &C) const;
   void setLocalizedState(SVal S, CheckerContext &C) const;
 
-  bool isAnnotatedAsLocalized(const Decl *D) const;
-  void reportLocalizationError(SVal S, const ObjCMethodCall &M,
-                               CheckerContext &C, int argumentNumber = 0) const;
+  bool isAnnotatedAsReturningLocalized(const Decl *D) const;
+  bool isAnnotatedAsTakingLocalized(const Decl *D) const;
+  void reportLocalizationError(SVal S, const CallEvent &M, CheckerContext &C,
+                               int argumentNumber = 0) const;
 
   int getLocalizedArgumentForSelector(const IdentifierInfo *Receiver,
                                       Selector S) const;
@@ -97,6 +98,7 @@ class NonLocalizedStringChecker
   void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
   void checkPostObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
   void checkPostStmt(const ObjCStringLiteral *SL, CheckerContext &C) const;
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
 };
 
@@ -281,6 +283,9 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const {
   IdentifierInfo *setLabelNSSegmentedControl[] = {
       &Ctx.Idents.get("setLabel"), &Ctx.Idents.get("forSegment")};
   ADD_METHOD(NSSegmentedControl, setLabelNSSegmentedControl, 2, 0)
+  IdentifierInfo *setToolTipNSSegmentedControl[] = {
+      &Ctx.Idents.get("setToolTip"), &Ctx.Idents.get("forSegment")};
+  ADD_METHOD(NSSegmentedControl, setToolTipNSSegmentedControl, 2, 0)
 
   NEW_RECEIVER(NSButtonCell)
   ADD_UNARY_METHOD(NSButtonCell, setTitle, 0)
@@ -562,6 +567,46 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const {
   IdentifierInfo *setTitleUISegmentedControl[] = {
       &Ctx.Idents.get("setTitle"), &Ctx.Idents.get("forSegmentAtIndex")};
   ADD_METHOD(UISegmentedControl, setTitleUISegmentedControl, 2, 0)
+
+  NEW_RECEIVER(NSAccessibilityCustomRotorItemResult)
+  IdentifierInfo
+      *initWithItemLoadingTokenNSAccessibilityCustomRotorItemResult[] = {
+          &Ctx.Idents.get("initWithItemLoadingToken"),
+          &Ctx.Idents.get("customLabel")};
+  ADD_METHOD(NSAccessibilityCustomRotorItemResult,
+             initWithItemLoadingTokenNSAccessibilityCustomRotorItemResult, 2, 1)
+  ADD_UNARY_METHOD(NSAccessibilityCustomRotorItemResult, setCustomLabel, 0)
+
+  NEW_RECEIVER(UIContextualAction)
+  IdentifierInfo *contextualActionWithStyleUIContextualAction[] = {
+      &Ctx.Idents.get("contextualActionWithStyle"), &Ctx.Idents.get("title"),
+      &Ctx.Idents.get("handler")};
+  ADD_METHOD(UIContextualAction, contextualActionWithStyleUIContextualAction, 3,
+             1)
+  ADD_UNARY_METHOD(UIContextualAction, setTitle, 0)
+
+  NEW_RECEIVER(NSAccessibilityCustomRotor)
+  IdentifierInfo *initWithLabelNSAccessibilityCustomRotor[] = {
+      &Ctx.Idents.get("initWithLabel"), &Ctx.Idents.get("itemSearchDelegate")};
+  ADD_METHOD(NSAccessibilityCustomRotor,
+             initWithLabelNSAccessibilityCustomRotor, 2, 0)
+  ADD_UNARY_METHOD(NSAccessibilityCustomRotor, setLabel, 0)
+
+  NEW_RECEIVER(NSWindowTab)
+  ADD_UNARY_METHOD(NSWindowTab, setTitle, 0)
+  ADD_UNARY_METHOD(NSWindowTab, setToolTip, 0)
+
+  NEW_RECEIVER(NSAccessibilityCustomAction)
+  IdentifierInfo *initWithNameNSAccessibilityCustomAction[] = {
+      &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("handler")};
+  ADD_METHOD(NSAccessibilityCustomAction,
+             initWithNameNSAccessibilityCustomAction, 2, 0)
+  IdentifierInfo *initWithNameTargetNSAccessibilityCustomAction[] = {
+      &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("target"),
+      &Ctx.Idents.get("selector")};
+  ADD_METHOD(NSAccessibilityCustomAction,
+             initWithNameTargetNSAccessibilityCustomAction, 3, 0)
+  ADD_UNARY_METHOD(NSAccessibilityCustomAction, setName, 0)
 }
 
 #define LSF_INSERT(function_name) LSF.insert(&Ctx.Idents.get(function_name));
@@ -601,7 +646,8 @@ void NonLocalizedStringChecker::initLocStringsMethods(ASTContext &Ctx) const {
 
 /// Checks to see if the method / function declaration includes
 /// __attribute__((annotate("returns_localized_nsstring")))
-bool NonLocalizedStringChecker::isAnnotatedAsLocalized(const Decl *D) const {
+bool NonLocalizedStringChecker::isAnnotatedAsReturningLocalized(
+    const Decl *D) const {
   if (!D)
     return false;
   return std::any_of(
@@ -611,6 +657,19 @@ bool NonLocalizedStringChecker::isAnnotatedAsLocalized(const Decl *D) const {
       });
 }
 
+/// Checks to see if the method / function declaration includes
+/// __attribute__((annotate("takes_localized_nsstring")))
+bool NonLocalizedStringChecker::isAnnotatedAsTakingLocalized(
+    const Decl *D) const {
+  if (!D)
+    return false;
+  return std::any_of(
+      D->specific_attr_begin(),
+      D->specific_attr_end(), [](const AnnotateAttr *Ann) {
+        return Ann->getAnnotation() == "takes_localized_nsstring";
+      });
+}
+
 /// Returns true if the given SVal is marked as Localized in the program state
 bool NonLocalizedStringChecker::hasLocalizedState(SVal S,
                                                   CheckerContext &C) const {
@@ -690,8 +749,7 @@ static bool isDebuggingContext(CheckerContext &C) {
 
 /// Reports a localization error for the passed in method call and SVal
 void NonLocalizedStringChecker::reportLocalizationError(
-    SVal S, const ObjCMethodCall &M, CheckerContext &C,
-    int argumentNumber) const {
+    SVal S, const CallEvent &M, CheckerContext &C, int argumentNumber) const {
 
   // Don't warn about localization errors in classes and methods that
   // may be debug code.
@@ -789,7 +847,21 @@ void NonLocalizedStringChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     }
   }
 
-  if (argumentNumber < 0) // There was no match in UIMethods
+  if (argumentNumber < 0) { // There was no match in UIMethods
+    if (const Decl *D = msg.getDecl()) {
+      if (const ObjCMethodDecl *OMD = dyn_cast_or_null(D)) {
+        auto formals = OMD->parameters();
+        for (unsigned i = 0, ei = formals.size(); i != ei; ++i) {
+          if (isAnnotatedAsTakingLocalized(formals[i])) {
+            argumentNumber = i;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  if (argumentNumber < 0) // Still no match
     return;
 
   SVal svTitle = msg.getArgSVal(argumentNumber);
@@ -812,6 +884,25 @@ void NonLocalizedStringChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
   }
 }
 
+void NonLocalizedStringChecker::checkPreCall(const CallEvent &Call,
+                                             CheckerContext &C) const {
+  const Decl *D = Call.getDecl();
+  if (D && isa(D)) {
+    const FunctionDecl *FD = dyn_cast(D);
+    auto formals = FD->parameters();
+    for (unsigned i = 0,
+                  ei = std::min(unsigned(formals.size()), Call.getNumArgs());
+         i != ei; ++i) {
+      if (isAnnotatedAsTakingLocalized(formals[i])) {
+        auto actual = Call.getArgSVal(i);
+        if (hasNonLocalizedState(actual, C)) {
+          reportLocalizationError(actual, Call, C, i + 1);
+        }
+      }
+    }
+  }
+}
+
 static inline bool isNSStringType(QualType T, ASTContext &Ctx) {
 
   const ObjCObjectPointerType *PT = T->getAs();
@@ -863,7 +954,7 @@ void NonLocalizedStringChecker::checkPostCall(const CallEvent &Call,
   const IdentifierInfo *Identifier = Call.getCalleeIdentifier();
 
   SVal sv = Call.getReturnValue();
-  if (isAnnotatedAsLocalized(D) || LSF.count(Identifier) != 0) {
+  if (isAnnotatedAsReturningLocalized(D) || LSF.count(Identifier) != 0) {
     setLocalizedState(sv, C);
   } else if (isNSStringType(RT, C.getASTContext()) &&
              !hasLocalizedState(sv, C)) {
@@ -897,7 +988,8 @@ void NonLocalizedStringChecker::checkPostObjCMessage(const ObjCMethodCall &msg,
 
   std::pair MethodDescription = {odInfo, S};
 
-  if (LSM.count(MethodDescription) || isAnnotatedAsLocalized(msg.getDecl())) {
+  if (LSM.count(MethodDescription) ||
+      isAnnotatedAsReturningLocalized(msg.getDecl())) {
     SVal sv = msg.getReturnValue();
     setLocalizedState(sv, C);
   }
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
index 41999d2527639..fa9a317683ba4 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
@@ -326,7 +326,7 @@ NullabilityChecker::NullabilityBugVisitor::VisitNode(const ExplodedNode *N,
 
   // Retrieve the associated statement.
   const Stmt *S = TrackedNullab->getNullabilitySource();
-  if (!S) {
+  if (!S || S->getLocStart().isInvalid()) {
     S = PathDiagnosticLocation::getStmt(N);
   }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
index 7ef79c683c49e..0e3a649e88f78 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
@@ -25,7 +25,13 @@ using namespace ento;
 namespace {
 
 struct LockState {
-  enum Kind { Destroyed, Locked, Unlocked } K;
+  enum Kind {
+    Destroyed,
+    Locked,
+    Unlocked,
+    UntouchedAndPossiblyDestroyed,
+    UnlockedAndPossiblyDestroyed
+  } K;
 
 private:
   LockState(Kind K) : K(K) {}
@@ -34,6 +40,12 @@ struct LockState {
   static LockState getLocked() { return LockState(Locked); }
   static LockState getUnlocked() { return LockState(Unlocked); }
   static LockState getDestroyed() { return LockState(Destroyed); }
+  static LockState getUntouchedAndPossiblyDestroyed() {
+    return LockState(UntouchedAndPossiblyDestroyed);
+  }
+  static LockState getUnlockedAndPossiblyDestroyed() {
+    return LockState(UnlockedAndPossiblyDestroyed);
+  }
 
   bool operator==(const LockState &X) const {
     return K == X.K;
@@ -42,13 +54,20 @@ struct LockState {
   bool isLocked() const { return K == Locked; }
   bool isUnlocked() const { return K == Unlocked; }
   bool isDestroyed() const { return K == Destroyed; }
+  bool isUntouchedAndPossiblyDestroyed() const {
+    return K == UntouchedAndPossiblyDestroyed;
+  }
+  bool isUnlockedAndPossiblyDestroyed() const {
+    return K == UnlockedAndPossiblyDestroyed;
+  }
 
   void Profile(llvm::FoldingSetNodeID &ID) const {
     ID.AddInteger(K);
   }
 };
 
-class PthreadLockChecker : public Checker< check::PostStmt > {
+class PthreadLockChecker
+    : public Checker, check::DeadSymbols> {
   mutable std::unique_ptr BT_doublelock;
   mutable std::unique_ptr BT_doubleunlock;
   mutable std::unique_ptr BT_destroylock;
@@ -61,22 +80,31 @@ class PthreadLockChecker : public Checker< check::PostStmt > {
   };
 public:
   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
+  void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
 
   void AcquireLock(CheckerContext &C, const CallExpr *CE, SVal lock,
                    bool isTryLock, enum LockingSemantics semantics) const;
 
   void ReleaseLock(CheckerContext &C, const CallExpr *CE, SVal lock) const;
-  void DestroyLock(CheckerContext &C, const CallExpr *CE, SVal Lock) const;
+  void DestroyLock(CheckerContext &C, const CallExpr *CE, SVal Lock,
+                   enum LockingSemantics semantics) const;
   void InitLock(CheckerContext &C, const CallExpr *CE, SVal Lock) const;
   void reportUseDestroyedBug(CheckerContext &C, const CallExpr *CE) const;
+  ProgramStateRef resolvePossiblyDestroyedMutex(ProgramStateRef state,
+                                                const MemRegion *lockR,
+                                                const SymbolRef *sym) const;
 };
 } // end anonymous namespace
 
-// GDM Entry for tracking lock state.
+// A stack of locks for tracking lock-unlock order.
 REGISTER_LIST_WITH_PROGRAMSTATE(LockSet, const MemRegion *)
 
+// An entry for tracking lock states.
 REGISTER_MAP_WITH_PROGRAMSTATE(LockMap, const MemRegion *, LockState)
 
+// Return values for unresolved calls to pthread_mutex_destroy().
+REGISTER_MAP_WITH_PROGRAMSTATE(DestroyRetVal, const MemRegion *, SymbolRef)
+
 void PthreadLockChecker::checkPostStmt(const CallExpr *CE,
                                        CheckerContext &C) const {
   ProgramStateRef state = C.getState();
@@ -113,13 +141,49 @@ void PthreadLockChecker::checkPostStmt(const CallExpr *CE,
            FName == "lck_mtx_unlock" ||
            FName == "lck_rw_done")
     ReleaseLock(C, CE, state->getSVal(CE->getArg(0), LCtx));
-  else if (FName == "pthread_mutex_destroy" ||
-           FName == "lck_mtx_destroy")
-    DestroyLock(C, CE, state->getSVal(CE->getArg(0), LCtx));
+  else if (FName == "pthread_mutex_destroy")
+    DestroyLock(C, CE, state->getSVal(CE->getArg(0), LCtx), PthreadSemantics);
+  else if (FName == "lck_mtx_destroy")
+    DestroyLock(C, CE, state->getSVal(CE->getArg(0), LCtx), XNUSemantics);
   else if (FName == "pthread_mutex_init")
     InitLock(C, CE, state->getSVal(CE->getArg(0), LCtx));
 }
 
+// When a lock is destroyed, in some semantics(like PthreadSemantics) we are not
+// sure if the destroy call has succeeded or failed, and the lock enters one of
+// the 'possibly destroyed' state. There is a short time frame for the
+// programmer to check the return value to see if the lock was successfully
+// destroyed. Before we model the next operation over that lock, we call this
+// function to see if the return value was checked by now and set the lock state
+// - either to destroyed state or back to its previous state.
+
+// In PthreadSemantics, pthread_mutex_destroy() returns zero if the lock is
+// successfully destroyed and it returns a non-zero value otherwise.
+ProgramStateRef PthreadLockChecker::resolvePossiblyDestroyedMutex(
+    ProgramStateRef state, const MemRegion *lockR, const SymbolRef *sym) const {
+  const LockState *lstate = state->get(lockR);
+  // Existence in DestroyRetVal ensures existence in LockMap.
+  // Existence in Destroyed also ensures that the lock state for lockR is either
+  // UntouchedAndPossiblyDestroyed or UnlockedAndPossiblyDestroyed.
+  assert(lstate->isUntouchedAndPossiblyDestroyed() ||
+         lstate->isUnlockedAndPossiblyDestroyed());
+
+  ConstraintManager &CMgr = state->getConstraintManager();
+  ConditionTruthVal retZero = CMgr.isNull(state, *sym);
+  if (retZero.isConstrainedFalse()) {
+    if (lstate->isUntouchedAndPossiblyDestroyed())
+      state = state->remove(lockR);
+    else if (lstate->isUnlockedAndPossiblyDestroyed())
+      state = state->set(lockR, LockState::getUnlocked());
+  } else
+    state = state->set(lockR, LockState::getDestroyed());
+
+  // Removing the map entry (lockR, sym) from DestroyRetVal as the lock state is
+  // now resolved.
+  state = state->remove(lockR);
+  return state;
+}
+
 void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE,
                                      SVal lock, bool isTryLock,
                                      enum LockingSemantics semantics) const {
@@ -129,6 +193,9 @@ void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE,
     return;
 
   ProgramStateRef state = C.getState();
+  const SymbolRef *sym = state->get(lockR);
+  if (sym)
+    state = resolvePossiblyDestroyedMutex(state, lockR, sym);
 
   SVal X = state->getSVal(CE, C.getLocationContext());
   if (X.isUnknownOrUndef())
@@ -197,6 +264,9 @@ void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE,
     return;
 
   ProgramStateRef state = C.getState();
+  const SymbolRef *sym = state->get(lockR);
+  if (sym)
+    state = resolvePossiblyDestroyedMutex(state, lockR, sym);
 
   if (const LockState *LState = state->get(lockR)) {
     if (LState->isUnlocked()) {
@@ -245,7 +315,8 @@ void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE,
 }
 
 void PthreadLockChecker::DestroyLock(CheckerContext &C, const CallExpr *CE,
-                                     SVal Lock) const {
+                                     SVal Lock,
+                                     enum LockingSemantics semantics) const {
 
   const MemRegion *LockR = Lock.getAsRegion();
   if (!LockR)
@@ -253,13 +324,38 @@ void PthreadLockChecker::DestroyLock(CheckerContext &C, const CallExpr *CE,
 
   ProgramStateRef State = C.getState();
 
+  const SymbolRef *sym = State->get(LockR);
+  if (sym)
+    State = resolvePossiblyDestroyedMutex(State, LockR, sym);
+
   const LockState *LState = State->get(LockR);
-  if (!LState || LState->isUnlocked()) {
-    State = State->set(LockR, LockState::getDestroyed());
-    C.addTransition(State);
-    return;
+  // Checking the return value of the destroy method only in the case of
+  // PthreadSemantics
+  if (semantics == PthreadSemantics) {
+    if (!LState || LState->isUnlocked()) {
+      SymbolRef sym = C.getSVal(CE).getAsSymbol();
+      if (!sym) {
+        State = State->remove(LockR);
+        C.addTransition(State);
+        return;
+      }
+      State = State->set(LockR, sym);
+      if (LState && LState->isUnlocked())
+        State = State->set(
+            LockR, LockState::getUnlockedAndPossiblyDestroyed());
+      else
+        State = State->set(
+            LockR, LockState::getUntouchedAndPossiblyDestroyed());
+      C.addTransition(State);
+      return;
+    }
+  } else {
+    if (!LState || LState->isUnlocked()) {
+      State = State->set(LockR, LockState::getDestroyed());
+      C.addTransition(State);
+      return;
+    }
   }
-
   StringRef Message;
 
   if (LState->isLocked()) {
@@ -288,6 +384,10 @@ void PthreadLockChecker::InitLock(CheckerContext &C, const CallExpr *CE,
 
   ProgramStateRef State = C.getState();
 
+  const SymbolRef *sym = State->get(LockR);
+  if (sym)
+    State = resolvePossiblyDestroyedMutex(State, LockR, sym);
+
   const struct LockState *LState = State->get(LockR);
   if (!LState || LState->isDestroyed()) {
     State = State->set(LockR, LockState::getUnlocked());
@@ -328,6 +428,26 @@ void PthreadLockChecker::reportUseDestroyedBug(CheckerContext &C,
   C.emitReport(std::move(Report));
 }
 
+void PthreadLockChecker::checkDeadSymbols(SymbolReaper &SymReaper,
+                                          CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+
+  // TODO: Clean LockMap when a mutex region dies.
+
+  DestroyRetValTy TrackedSymbols = State->get();
+  for (DestroyRetValTy::iterator I = TrackedSymbols.begin(),
+                                 E = TrackedSymbols.end();
+       I != E; ++I) {
+    const SymbolRef Sym = I->second;
+    const MemRegion *lockR = I->first;
+    bool IsSymDead = SymReaper.isDead(Sym);
+    // Remove the dead symbol from the return value symbols map.
+    if (IsSymDead)
+      State = resolvePossiblyDestroyedMutex(State, lockR, &Sym);
+  }
+  C.addTransition(State);
+}
+
 void ento::registerPthreadLockChecker(CheckerManager &mgr) {
   mgr.registerChecker();
 }
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
index 89b1291c4f46a..21ccf21515b3a 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
@@ -1304,6 +1304,21 @@ RetainSummaryManager::getCFSummaryGetRule(const FunctionDecl *FD) {
                               DoNothing, DoNothing);
 }
 
+/// Returns true if the declaration 'D' is annotated with 'rcAnnotation'.
+static bool hasRCAnnotation(const Decl *D, StringRef rcAnnotation) {
+  for (const auto *Ann : D->specific_attrs()) {
+    if (Ann->getAnnotation() == rcAnnotation)
+      return true;
+  }
+  return false;
+}
+
+/// Returns true if the function declaration 'FD' contains
+/// 'rc_ownership_trusted_implementation' annotate attribute.
+static bool isTrustedReferenceCountImplementation(const FunctionDecl *FD) {
+  return hasRCAnnotation(FD, "rc_ownership_trusted_implementation");
+}
+
 //===----------------------------------------------------------------------===//
 // Summary creation for Selectors.
 //===----------------------------------------------------------------------===//
@@ -3380,6 +3395,9 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
 
   // See if it's one of the specific functions we know how to eval.
   bool canEval = false;
+  // See if the function has 'rc_ownership_trusted_implementation'
+  // annotate attribute. If it does, we will not inline it.
+  bool hasTrustedImplementationAnnotation = false;
 
   QualType ResultTy = CE->getCallReturnType(C.getASTContext());
   if (ResultTy->isObjCIdType()) {
@@ -3395,6 +3413,11 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
         cocoa::isRefType(ResultTy, "CV", FName)) {
       canEval = isRetain(FD, FName) || isAutorelease(FD, FName) ||
                 isMakeCollectable(FD, FName);
+    } else {
+      if (FD->getDefinition()) {
+        canEval = isTrustedReferenceCountImplementation(FD->getDefinition());
+        hasTrustedImplementationAnnotation = canEval;
+      }
     }
   }
 
@@ -3404,8 +3427,11 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
   // Bind the return value.
   const LocationContext *LCtx = C.getLocationContext();
   SVal RetVal = state->getSVal(CE->getArg(0), LCtx);
-  if (RetVal.isUnknown()) {
-    // If the receiver is unknown, conjure a return value.
+  if (RetVal.isUnknown() ||
+      (hasTrustedImplementationAnnotation && !ResultTy.isNull())) {
+    // If the receiver is unknown or the function has
+    // 'rc_ownership_trusted_implementation' annotate attribute, conjure a
+    // return value.
     SValBuilder &SVB = C.getSValBuilder();
     RetVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, ResultTy, C.blockCount());
   }
@@ -3421,8 +3447,9 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
       Binding = getRefBinding(state, Sym);
 
     // Invalidate the argument region.
-    state = state->invalidateRegions(ArgRegion, CE, C.blockCount(), LCtx,
-                                     /*CausesPointerEscape*/ false);
+    state = state->invalidateRegions(
+        ArgRegion, CE, C.blockCount(), LCtx,
+        /*CausesPointerEscape*/ hasTrustedImplementationAnnotation);
 
     // Restore the refcount status of the argument.
     if (Binding)
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
index 26bf597bd950c..7f9a00ff876d9 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
@@ -45,6 +45,8 @@ class UnixAPIChecker : public Checker< check::PreStmt > {
   mutable Optional Val_O_CREAT;
 
 public:
+  DefaultBool CheckMisuse, CheckPortability;
+
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
 
   void CheckOpen(CheckerContext &C, const CallExpr *CE) const;
@@ -437,29 +439,42 @@ void UnixAPIChecker::checkPreStmt(const CallExpr *CE,
   if (FName.empty())
     return;
 
-  SubChecker SC =
-    llvm::StringSwitch(FName)
-      .Case("open", &UnixAPIChecker::CheckOpen)
-      .Case("openat", &UnixAPIChecker::CheckOpenAt)
-      .Case("pthread_once", &UnixAPIChecker::CheckPthreadOnce)
-      .Case("calloc", &UnixAPIChecker::CheckCallocZero)
-      .Case("malloc", &UnixAPIChecker::CheckMallocZero)
-      .Case("realloc", &UnixAPIChecker::CheckReallocZero)
-      .Case("reallocf", &UnixAPIChecker::CheckReallocfZero)
-      .Cases("alloca", "__builtin_alloca", &UnixAPIChecker::CheckAllocaZero)
-      .Case("__builtin_alloca_with_align",
-            &UnixAPIChecker::CheckAllocaWithAlignZero)
-      .Case("valloc", &UnixAPIChecker::CheckVallocZero)
-      .Default(nullptr);
-
-  if (SC)
-    (this->*SC)(C, CE);
+  if (CheckMisuse) {
+    if (SubChecker SC =
+            llvm::StringSwitch(FName)
+                .Case("open", &UnixAPIChecker::CheckOpen)
+                .Case("openat", &UnixAPIChecker::CheckOpenAt)
+                .Case("pthread_once", &UnixAPIChecker::CheckPthreadOnce)
+                .Default(nullptr)) {
+      (this->*SC)(C, CE);
+    }
+  }
+  if (CheckPortability) {
+    if (SubChecker SC =
+            llvm::StringSwitch(FName)
+                .Case("calloc", &UnixAPIChecker::CheckCallocZero)
+                .Case("malloc", &UnixAPIChecker::CheckMallocZero)
+                .Case("realloc", &UnixAPIChecker::CheckReallocZero)
+                .Case("reallocf", &UnixAPIChecker::CheckReallocfZero)
+                .Cases("alloca", "__builtin_alloca",
+                       &UnixAPIChecker::CheckAllocaZero)
+                .Case("__builtin_alloca_with_align",
+                      &UnixAPIChecker::CheckAllocaWithAlignZero)
+                .Case("valloc", &UnixAPIChecker::CheckVallocZero)
+                .Default(nullptr)) {
+      (this->*SC)(C, CE);
+    }
+  }
 }
 
 //===----------------------------------------------------------------------===//
 // Registration.
 //===----------------------------------------------------------------------===//
 
-void ento::registerUnixAPIChecker(CheckerManager &mgr) {
-  mgr.registerChecker();
-}
+#define REGISTER_CHECKER(Name)                                                 \
+  void ento::registerUnixAPI##Name##Checker(CheckerManager &mgr) {             \
+    mgr.registerChecker()->Check##Name = true;                 \
+  }
+
+REGISTER_CHECKER(Misuse)
+REGISTER_CHECKER(Portability)
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
index 54634fdffeb5f..83e67662e614c 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
@@ -23,9 +23,10 @@ AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags,
                                  AnalyzerOptions &Options,
                                  CodeInjector *injector)
   : AnaCtxMgr(Options.UnoptimizedCFG,
-              /*AddImplicitDtors=*/true,
+              Options.includeImplicitDtorsInCFG(),
               /*AddInitializers=*/true,
               Options.includeTemporaryDtorsInCFG(),
+	      Options.includeLifetimeInCFG(),
               Options.shouldSynthesizeBodies(),
               Options.shouldConditionalizeStaticInitializers(),
               /*addCXXNewAllocator=*/true,
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
index 45ef612ee1d58..6f48fcb9e20ce 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
@@ -172,6 +172,17 @@ bool AnalyzerOptions::includeTemporaryDtorsInCFG() {
                           /* Default = */ false);
 }
 
+bool AnalyzerOptions::includeImplicitDtorsInCFG() {
+  return getBooleanOption(IncludeImplicitDtorsInCFG,
+                          "cfg-implicit-dtors",
+                          /* Default = */ true);
+}
+
+bool AnalyzerOptions::includeLifetimeInCFG() {
+  return getBooleanOption(IncludeLifetimeInCFG, "cfg-lifetime",
+                          /* Default = */ false);
+}
+
 bool AnalyzerOptions::mayInlineCXXStandardLibrary() {
   return getBooleanOption(InlineCXXStandardLibrary,
                           "c++-stdlib-inlining",
@@ -293,7 +304,7 @@ unsigned AnalyzerOptions::getMaxInlinableSize() {
         DefaultValue = 4;
         break;
       case UMK_Deep:
-        DefaultValue = 50;
+        DefaultValue = 100;
         break;
     }
 
@@ -332,7 +343,7 @@ unsigned AnalyzerOptions::getMaxNodesPerTopLevelFunction() {
         DefaultValue = 75000;
         break;
       case UMK_Deep:
-        DefaultValue = 150000;
+        DefaultValue = 225000;
         break;
     }
     MaxNodesPerTopLevelFunction = getOptionAsInteger("max-nodes", DefaultValue);
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 2114033ba8b53..d8fca00681b4d 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -1671,9 +1671,15 @@ static bool GenerateAlternateExtensivePathDiagnostic(
         // Add an edge to the start of the function.
         const StackFrameContext *CalleeLC = CE->getCalleeContext();
         const Decl *D = CalleeLC->getDecl();
-        addEdgeToPath(PD.getActivePath(), PrevLoc,
-                      PathDiagnosticLocation::createBegin(D, SM),
-                      CalleeLC);
+        // Add the edge only when the callee has body. We jump to the beginning
+        // of the *declaration*, however we expect it to be followed by the
+        // body. This isn't the case for autosynthesized property accessors in
+        // Objective-C. No need for a similar extra check for CallExit points
+        // because the exit edge comes from a statement (i.e. return),
+        // not from declaration.
+        if (D->hasBody())
+          addEdgeToPath(PD.getActivePath(), PrevLoc,
+                        PathDiagnosticLocation::createBegin(D, SM), CalleeLC);
 
         // Did we visit an entire call?
         bool VisitedEntireCall = PD.isWithinCall();
@@ -3448,14 +3454,12 @@ void BugReporter::FlushReport(BugReport *exampleReport,
     // the BugReporterVisitors may mark this bug as a false positive.
     assert(!bugReports.empty());
 
-    MaxBugClassSize =
-        std::max(bugReports.size(), static_cast(MaxBugClassSize));
+    MaxBugClassSize.updateMax(bugReports.size());
 
     if (!generatePathDiagnostic(*D.get(), PD, bugReports))
       return;
 
-    MaxValidBugClassSize =
-        std::max(bugReports.size(), static_cast(MaxValidBugClassSize));
+    MaxValidBugClassSize.updateMax(bugReports.size());
 
     // Examine the report and see if the last piece is in a header. Reset the
     // report location to the last piece in the main source file.
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index ee761689f479b..1858bfd89637e 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -957,6 +957,12 @@ RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const {
         return RuntimeDefinition();
 
       DynamicTypeInfo DTI = getDynamicTypeInfo(getState(), Receiver);
+      if (!DTI.isValid()) {
+        assert(isa(Receiver) &&
+               "Unhandled untyped region class!");
+        return RuntimeDefinition();
+      }
+
       QualType DynType = DTI.getType();
       CanBeSubClassed = DTI.canBeASubClass();
       ReceiverT = dyn_cast(DynType.getCanonicalType());
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 8ee34190891ad..eee5400fe1771 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -362,6 +362,8 @@ void ExprEngine::processCFGElement(const CFGElement E, ExplodedNode *Pred,
     case CFGElement::TemporaryDtor:
       ProcessImplicitDtor(E.castAs(), Pred);
       return;
+    case CFGElement::LifetimeEnds:
+      return;
   }
 }
 
@@ -1176,6 +1178,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
         }
       }
       // FALLTHROUGH
+      LLVM_FALLTHROUGH;
     }
     case Stmt::CallExprClass:
     case Stmt::CXXMemberCallExprClass:
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 8f720a2067b15..6f1e8391e67cf 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -980,10 +980,9 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U, ExplodedNode *Pred,
           //    transfer functions as "0 == E".
           SVal Result;
           if (Optional LV = V.getAs()) {
-            Loc X = svalBuilder.makeNull();
+            Loc X = svalBuilder.makeNullWithType(Ex->getType());
             Result = evalBinOp(state, BO_EQ, *LV, X, U->getType());
-          }
-          else if (Ex->getType()->isFloatingType()) {
+          } else if (Ex->getType()->isFloatingType()) {
             // FIXME: handle floating point types.
             Result = UnknownVal();
           } else {
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index 39d88bfda1486..caf86b26b66d3 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -447,6 +447,7 @@ bool ExprEngine::inlineCall(const CallEvent &Call, const Decl *D,
   Bldr.takeNodes(Pred);
 
   NumInlinedCalls++;
+  Engine.FunctionSummaries->bumpNumTimesInlined(D);
 
   // Mark the decl as visited.
   if (VisitedCallees)
@@ -868,8 +869,6 @@ bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D,
       || IsRecursive))
     return false;
 
-  Engine.FunctionSummaries->bumpNumTimesInlined(D);
-
   return true;
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index 7c5ee3b259440..d91786f749198 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -578,6 +578,7 @@ getLocationForCaller(const StackFrameContext *SFC,
   }
   case CFGElement::TemporaryDtor:
   case CFGElement::NewAllocator:
+  case CFGElement::LifetimeEnds:
     llvm_unreachable("not yet implemented!");
   }
 
@@ -694,7 +695,30 @@ PathDiagnosticLocation::create(const ProgramPoint& P,
   return PathDiagnosticLocation(S, SMng, P.getLocationContext());
 }
 
+static const LocationContext *
+findTopAutosynthesizedParentContext(const LocationContext *LC) {
+  assert(LC->getAnalysisDeclContext()->isBodyAutosynthesized());
+  const LocationContext *ParentLC = LC->getParent();
+  assert(ParentLC && "We don't start analysis from autosynthesized code");
+  while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
+    LC = ParentLC;
+    ParentLC = LC->getParent();
+    assert(ParentLC && "We don't start analysis from autosynthesized code");
+  }
+  return LC;
+}
+
 const Stmt *PathDiagnosticLocation::getStmt(const ExplodedNode *N) {
+  // We cannot place diagnostics on autosynthesized code.
+  // Put them onto the call site through which we jumped into autosynthesized
+  // code for the first time.
+  const LocationContext *LC = N->getLocationContext();
+  if (LC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
+    // It must be a stack frame because we only autosynthesize functions.
+    return cast(findTopAutosynthesizedParentContext(LC))
+        ->getCallSite();
+  }
+  // Otherwise, see if the node's program point directly points to a statement.
   ProgramPoint P = N->getLocation();
   if (Optional SP = P.getAs())
     return SP->getStmt();
@@ -912,6 +936,17 @@ void PathDiagnosticCallPiece::setCallee(const CallEnter &CE,
 
   callEnterWithin = PathDiagnosticLocation::createBegin(Callee, SM);
   callEnter = getLocationForCaller(CalleeCtx, CE.getLocationContext(), SM);
+
+  // Autosynthesized property accessors are special because we'd never
+  // pop back up to non-autosynthesized code until we leave them.
+  // This is not generally true for autosynthesized callees, which may call
+  // non-autosynthesized callbacks.
+  // Unless set here, the IsCalleeAnAutosynthesizedPropertyAccessor flag
+  // defaults to false.
+  if (const ObjCMethodDecl *MD = dyn_cast(Callee))
+    IsCalleeAnAutosynthesizedPropertyAccessor = (
+        MD->isPropertyAccessor() &&
+        CalleeCtx->getAnalysisDeclContext()->isBodyAutosynthesized());
 }
 
 static inline void describeClass(raw_ostream &Out, const CXXRecordDecl *D,
@@ -986,7 +1021,11 @@ static bool describeCodeDecl(raw_ostream &Out, const Decl *D,
 
 std::shared_ptr
 PathDiagnosticCallPiece::getCallEnterEvent() const {
-  if (!Callee)
+  // We do not produce call enters and call exits for autosynthesized property
+  // accessors. We do generally produce them for other functions coming from
+  // the body farm because they may call callbacks that bring us back into
+  // visible code.
+  if (!Callee || IsCalleeAnAutosynthesizedPropertyAccessor)
     return nullptr;
 
   SmallString<256> buf;
@@ -1020,7 +1059,11 @@ PathDiagnosticCallPiece::getCallEnterWithinCallerEvent() const {
 
 std::shared_ptr
 PathDiagnosticCallPiece::getCallExitEvent() const {
-  if (NoExit)
+  // We do not produce call enters and call exits for autosynthesized property
+  // accessors. We do generally produce them for other functions coming from
+  // the body farm because they may call callbacks that bring us back into
+  // visible code.
+  if (NoExit || IsCalleeAnAutosynthesizedPropertyAccessor)
     return nullptr;
 
   SmallString<256> buf;
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 31556c792fc5b..3215c3ccd21e9 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -644,15 +644,33 @@ ProgramStateRef ProgramState::addTaint(const Stmt *S,
   if (const Expr *E = dyn_cast_or_null(S))
     S = E->IgnoreParens();
 
-  SymbolRef Sym = getSVal(S, LCtx).getAsSymbol();
+  return addTaint(getSVal(S, LCtx), Kind);
+}
+
+ProgramStateRef ProgramState::addTaint(SVal V,
+                                       TaintTagType Kind) const {
+  SymbolRef Sym = V.getAsSymbol();
   if (Sym)
     return addTaint(Sym, Kind);
 
-  const MemRegion *R = getSVal(S, LCtx).getAsRegion();
-  addTaint(R, Kind);
+  // If the SVal represents a structure, try to mass-taint all values within the
+  // structure. For now it only works efficiently on lazy compound values that
+  // were conjured during a conservative evaluation of a function - either as
+  // return values of functions that return structures or arrays by value, or as
+  // values of structures or arrays passed into the function by reference,
+  // directly or through pointer aliasing. Such lazy compound values are
+  // characterized by having exactly one binding in their captured store within
+  // their parent region, which is a conjured symbol default-bound to the base
+  // region of the parent region.
+  if (auto LCV = V.getAs()) {
+    if (Optional binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
+      if (SymbolRef Sym = binding->getAsSymbol())
+        return addPartialTaint(Sym, LCV->getRegion(), Kind);
+    }
+  }
 
-  // Cannot add taint, so just return the state.
-  return this;
+  const MemRegion *R = V.getAsRegion();
+  return addTaint(R, Kind);
 }
 
 ProgramStateRef ProgramState::addTaint(const MemRegion *R,
@@ -674,6 +692,27 @@ ProgramStateRef ProgramState::addTaint(SymbolRef Sym,
   return NewState;
 }
 
+ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
+                                              const SubRegion *SubRegion,
+                                              TaintTagType Kind) const {
+  // Ignore partial taint if the entire parent symbol is already tainted.
+  if (contains(ParentSym) && *get(ParentSym) == Kind)
+    return this;
+
+  // Partial taint applies if only a portion of the symbol is tainted.
+  if (SubRegion == SubRegion->getBaseRegion())
+    return addTaint(ParentSym, Kind);
+
+  const TaintedSubRegions *SavedRegs = get(ParentSym);
+  TaintedSubRegions Regs =
+      SavedRegs ? *SavedRegs : stateMgr->TSRFactory.getEmptyMap();
+
+  Regs = stateMgr->TSRFactory.add(Regs, SubRegion, Kind);
+  ProgramStateRef NewState = set(ParentSym, Regs);
+  assert(NewState);
+  return NewState;
+}
+
 bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx,
                              TaintTagType Kind) const {
   if (const Expr *E = dyn_cast_or_null(S))
@@ -714,31 +753,52 @@ bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const {
     return false;
 
   // Traverse all the symbols this symbol depends on to see if any are tainted.
-  bool Tainted = false;
   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
        SI != SE; ++SI) {
     if (!isa(*SI))
       continue;
 
-    const TaintTagType *Tag = get(*SI);
-    Tainted = (Tag && *Tag == Kind);
+    if (const TaintTagType *Tag = get(*SI)) {
+      if (*Tag == Kind)
+        return true;
+    }
 
-    // If this is a SymbolDerived with a tainted parent, it's also tainted.
-    if (const SymbolDerived *SD = dyn_cast(*SI))
-      Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind);
+    if (const SymbolDerived *SD = dyn_cast(*SI)) {
+      // If this is a SymbolDerived with a tainted parent, it's also tainted.
+      if (isTainted(SD->getParentSymbol(), Kind))
+        return true;
+
+      // If this is a SymbolDerived with the same parent symbol as another
+      // tainted SymbolDerived and a region that's a sub-region of that tainted
+      // symbol, it's also tainted.
+      if (const TaintedSubRegions *Regs =
+              get(SD->getParentSymbol())) {
+        const TypedValueRegion *R = SD->getRegion();
+        for (auto I : *Regs) {
+          // FIXME: The logic to identify tainted regions could be more
+          // complete. For example, this would not currently identify
+          // overlapping fields in a union as tainted. To identify this we can
+          // check for overlapping/nested byte offsets.
+          if (Kind == I.second &&
+              (R == I.first || R->isSubRegionOf(I.first)))
+            return true;
+        }
+      }
+    }
 
     // If memory region is tainted, data is also tainted.
-    if (const SymbolRegionValue *SRV = dyn_cast(*SI))
-      Tainted = Tainted || isTainted(SRV->getRegion(), Kind);
-
-    // If If this is a SymbolCast from a tainted value, it's also tainted.
-    if (const SymbolCast *SC = dyn_cast(*SI))
-      Tainted = Tainted || isTainted(SC->getOperand(), Kind);
+    if (const SymbolRegionValue *SRV = dyn_cast(*SI)) {
+      if (isTainted(SRV->getRegion(), Kind))
+        return true;
+    }
 
-    if (Tainted)
-      return true;
+    // If this is a SymbolCast from a tainted value, it's also tainted.
+    if (const SymbolCast *SC = dyn_cast(*SI)) {
+      if (isTainted(SC->getOperand(), Kind))
+        return true;
+    }
   }
 
-  return Tainted;
+  return false;
 }
 
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 3000e13d32c6f..11902f66df914 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -409,6 +409,19 @@ class RegionStoreManager : public StoreManager {
 
   // BindDefault is only used to initialize a region with a default value.
   StoreRef BindDefault(Store store, const MemRegion *R, SVal V) override {
+    // FIXME: The offsets of empty bases can be tricky because of
+    // of the so called "empty base class optimization".
+    // If a base class has been optimized out
+    // we should not try to create a binding, otherwise we should.
+    // Unfortunately, at the moment ASTRecordLayout doesn't expose
+    // the actual sizes of the empty bases
+    // and trying to infer them from offsets/alignments
+    // seems to be error-prone and non-trivial because of the trailing padding.
+    // As a temporary mitigation we don't create bindings for empty bases.
+    if (R->getKind() == MemRegion::CXXBaseObjectRegionKind &&
+        cast(R)->getDecl()->isEmpty())
+      return StoreRef(store, *this);
+
     RegionBindingsRef B = getRegionBindings(store);
     assert(!B.lookup(R, BindingKey::Direct));
 
@@ -496,7 +509,10 @@ class RegionStoreManager : public StoreManager {
 
   Optional getDefaultBinding(Store S, const MemRegion *R) override {
     RegionBindingsRef B = getRegionBindings(S);
-    return B.getDefaultBinding(R);
+    // Default bindings are always applied over a base region so look up the
+    // base region's default binding, otherwise the lookup will fail when R
+    // is at an offset from R->getBaseRegion().
+    return B.getDefaultBinding(R->getBaseRegion());
   }
 
   SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType());
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
index ffaa0eda918aa..04452e3e7cc22 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -325,6 +325,7 @@ Optional SValBuilder::getConstantVal(const Expr *E) {
     }
     }
     // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   }
 
   // If we don't have a special case, fall back to the AST's constant evaluator.
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index 82ce8b45fe781..f09f9696f5add 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -71,18 +71,15 @@ SVal SimpleSValBuilder::dispatchCast(SVal Val, QualType CastTy) {
 }
 
 SVal SimpleSValBuilder::evalCastFromNonLoc(NonLoc val, QualType castTy) {
-
   bool isLocType = Loc::isLocType(castTy);
-
   if (val.getAs())
     return val;
 
   if (Optional LI = val.getAs()) {
     if (isLocType)
       return LI->getLoc();
-
     // FIXME: Correctly support promotions/truncations.
-    unsigned castSize = Context.getTypeSize(castTy);
+    unsigned castSize = Context.getIntWidth(castTy);
     if (castSize == LI->getNumBits())
       return val;
     return makeLocAsInteger(LI->getLoc(), castSize);
@@ -163,6 +160,7 @@ SVal SimpleSValBuilder::evalCastFromLoc(Loc val, QualType castTy) {
           return nonloc::SymbolVal(SymR->getSymbol());
 
         // FALL-THROUGH
+        LLVM_FALLTHROUGH;
       }
 
       case loc::GotoLabelKind:
@@ -172,7 +170,7 @@ SVal SimpleSValBuilder::evalCastFromLoc(Loc val, QualType castTy) {
   }
 
   if (castTy->isIntegralOrEnumerationType()) {
-    unsigned BitWidth = Context.getTypeSize(castTy);
+    unsigned BitWidth = Context.getIntWidth(castTy);
 
     if (!val.getAs())
       return makeLocAsInteger(val, BitWidth);
diff --git a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index 0fe0f3a6ed58d..c47edc7d21256 100644
--- a/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -674,10 +674,8 @@ void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode,
 
   DisplayFunction(D, Mode, IMode);
   CFG *DeclCFG = Mgr->getCFG(D);
-  if (DeclCFG) {
-    unsigned CFGSize = DeclCFG->size();
-    MaxCFGSize = MaxCFGSize < CFGSize ? CFGSize : MaxCFGSize;
-  }
+  if (DeclCFG)
+    MaxCFGSize.updateMax(DeclCFG->size());
 
   BugReporter BR(*Mgr);
 
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/ArgumentsAdjusters.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/ArgumentsAdjusters.cpp
index 48b925c698a7c..ac9fd3c5cade4 100644
--- a/interpreter/llvm/src/tools/clang/lib/Tooling/ArgumentsAdjusters.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/ArgumentsAdjusters.cpp
@@ -42,7 +42,7 @@ ArgumentsAdjuster getClangStripOutputAdjuster() {
         AdjustedArgs.push_back(Args[i]);
 
       if (Arg == "-o") {
-        // Output is specified as -o foo. Skip the next argument also.
+        // Output is specified as -o foo. Skip the next argument too.
         ++i;
       }
       // Else, the output is specified as -ofoo. Just do nothing.
@@ -51,6 +51,26 @@ ArgumentsAdjuster getClangStripOutputAdjuster() {
   };
 }
 
+ArgumentsAdjuster getClangStripDependencyFileAdjuster() {
+  return [](const CommandLineArguments &Args, StringRef /*unused*/) {
+    CommandLineArguments AdjustedArgs;
+    for (size_t i = 0, e = Args.size(); i < e; ++i) {
+      StringRef Arg = Args[i];
+      // All dependency-file options begin with -M. These include -MM,
+      // -MF, -MG, -MP, -MT, -MQ, -MD, and -MMD.
+      if (!Arg.startswith("-M"))
+        AdjustedArgs.push_back(Args[i]);
+
+      if ((Arg == "-MF") || (Arg == "-MT") || (Arg == "-MQ") ||
+          (Arg == "-MD") || (Arg == "-MMD")) {
+        // Output is specified as -MX foo. Skip the next argument also.
+        ++i;
+      }
+    }
+    return AdjustedArgs;
+  };
+}
+
 ArgumentsAdjuster getInsertArgumentAdjuster(const CommandLineArguments &Extra,
                                             ArgumentInsertPosition Pos) {
   return [Extra, Pos](const CommandLineArguments &Args, StringRef /*unused*/) {
@@ -83,4 +103,3 @@ ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First,
 
 } // end namespace tooling
 } // end namespace clang
-
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/CommonOptionsParser.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/CommonOptionsParser.cpp
index 5a44061cbd4c4..9e9689e6b2524 100644
--- a/interpreter/llvm/src/tools/clang/lib/Tooling/CommonOptionsParser.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/CommonOptionsParser.cpp
@@ -116,7 +116,11 @@ CommonOptionsParser::CommonOptionsParser(
 
   cl::HideUnrelatedOptions(Category);
 
-  Compilations.reset(FixedCompilationDatabase::loadFromCommandLine(argc, argv));
+  std::string ErrorMessage;
+  Compilations =
+      FixedCompilationDatabase::loadFromCommandLine(argc, argv, ErrorMessage);
+  if (!Compilations && !ErrorMessage.empty())
+    llvm::errs() << ErrorMessage;
   cl::ParseCommandLineOptions(argc, argv, Overview);
   cl::PrintOptionValues();
 
@@ -125,7 +129,6 @@ CommonOptionsParser::CommonOptionsParser(
       SourcePathList.empty())
     return;
   if (!Compilations) {
-    std::string ErrorMessage;
     if (!BuildPath.empty()) {
       Compilations =
           CompilationDatabase::autoDetectFromDirectory(BuildPath, ErrorMessage);
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/CompilationDatabase.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/CompilationDatabase.cpp
index 8ca0b2df70130..0e835579e04ed 100644
--- a/interpreter/llvm/src/tools/clang/lib/Tooling/CompilationDatabase.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/CompilationDatabase.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Option/Arg.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 #include 
 #include 
 using namespace clang;
@@ -150,23 +151,21 @@ struct CompileJobAnalyzer {
 // options.
 class UnusedInputDiagConsumer : public DiagnosticConsumer {
 public:
-  UnusedInputDiagConsumer() : Other(nullptr) {}
-
-  // Useful for debugging, chain diagnostics to another consumer after
-  // recording for our own purposes.
-  UnusedInputDiagConsumer(DiagnosticConsumer *Other) : Other(Other) {}
+  UnusedInputDiagConsumer(DiagnosticConsumer &Other) : Other(Other) {}
 
   void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
                         const Diagnostic &Info) override {
     if (Info.getID() == clang::diag::warn_drv_input_file_unused) {
       // Arg 1 for this diagnostic is the option that didn't get used.
       UnusedInputs.push_back(Info.getArgStdStr(0));
+    } else if (DiagLevel >= DiagnosticsEngine::Error) {
+      // If driver failed to create compilation object, show the diagnostics
+      // to user.
+      Other.HandleDiagnostic(DiagLevel, Info);
     }
-    if (Other)
-      Other->HandleDiagnostic(DiagLevel, Info);
   }
 
-  DiagnosticConsumer *Other;
+  DiagnosticConsumer &Other;
   SmallVector UnusedInputs;
 };
 
@@ -205,9 +204,12 @@ struct MatchesAny {
 ///          \li false if \c Args cannot be used for compilation jobs (e.g.
 ///          contains an option like -E or -version).
 static bool stripPositionalArgs(std::vector Args,
-                                std::vector &Result) {
+                                std::vector &Result,
+                                std::string &ErrorMsg) {
   IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions();
-  UnusedInputDiagConsumer DiagClient;
+  llvm::raw_string_ostream Output(ErrorMsg);
+  TextDiagnosticPrinter DiagnosticPrinter(Output, &*DiagOpts);
+  UnusedInputDiagConsumer DiagClient(DiagnosticPrinter);
   DiagnosticsEngine Diagnostics(
       IntrusiveRefCntPtr(new DiagnosticIDs()),
       &*DiagOpts, &DiagClient, false);
@@ -245,21 +247,24 @@ static bool stripPositionalArgs(std::vector Args,
 
   const std::unique_ptr Compilation(
       NewDriver->BuildCompilation(Args));
+  if (!Compilation)
+    return false;
 
   const driver::JobList &Jobs = Compilation->getJobs();
 
   CompileJobAnalyzer CompileAnalyzer;
 
   for (const auto &Cmd : Jobs) {
-    // Collect only for Assemble jobs. If we do all jobs we get duplicates
-    // since Link jobs point to Assemble jobs as inputs.
-    if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass)
+    // Collect only for Assemble and Compile jobs. If we do all jobs we get
+    // duplicates since Link jobs point to Assemble jobs as inputs.
+    if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass ||
+        Cmd.getSource().getKind() == driver::Action::CompileJobClass) {
       CompileAnalyzer.run(&Cmd.getSource());
+    }
   }
 
   if (CompileAnalyzer.Inputs.empty()) {
-    // No compile jobs found.
-    // FIXME: Emit a warning of some kind?
+    ErrorMsg = "warning: no compile jobs found\n";
     return false;
   }
 
@@ -280,8 +285,14 @@ static bool stripPositionalArgs(std::vector Args,
   return true;
 }
 
-FixedCompilationDatabase *FixedCompilationDatabase::loadFromCommandLine(
-    int &Argc, const char *const *Argv, Twine Directory) {
+std::unique_ptr
+FixedCompilationDatabase::loadFromCommandLine(int &Argc,
+                                              const char *const *Argv,
+                                              std::string &ErrorMsg,
+                                              Twine Directory) {
+  ErrorMsg.clear();
+  if (Argc == 0)
+    return nullptr;
   const char *const *DoubleDash = std::find(Argv, Argv + Argc, StringRef("--"));
   if (DoubleDash == Argv + Argc)
     return nullptr;
@@ -289,9 +300,10 @@ FixedCompilationDatabase *FixedCompilationDatabase::loadFromCommandLine(
   Argc = DoubleDash - Argv;
 
   std::vector StrippedArgs;
-  if (!stripPositionalArgs(CommandLine, StrippedArgs))
+  if (!stripPositionalArgs(CommandLine, StrippedArgs, ErrorMsg))
     return nullptr;
-  return new FixedCompilationDatabase(Directory, StrippedArgs);
+  return std::unique_ptr(
+      new FixedCompilationDatabase(Directory, StrippedArgs));
 }
 
 FixedCompilationDatabase::
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Core/Diagnostic.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/Core/Diagnostic.cpp
index 3bbc2b901e383..9e4833f2eff4d 100644
--- a/interpreter/llvm/src/tools/clang/lib/Tooling/Core/Diagnostic.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Core/Diagnostic.cpp
@@ -35,9 +35,9 @@ Diagnostic::Diagnostic(llvm::StringRef DiagnosticName,
       BuildDirectory(BuildDirectory) {}
 
 Diagnostic::Diagnostic(llvm::StringRef DiagnosticName,
-                       DiagnosticMessage &Message,
-                       llvm::StringMap &Fix,
-                       SmallVector &Notes,
+                       const DiagnosticMessage &Message,
+                       const llvm::StringMap &Fix,
+                       const SmallVector &Notes,
                        Level DiagLevel, llvm::StringRef BuildDirectory)
     : DiagnosticName(DiagnosticName), Message(Message), Fix(Fix), Notes(Notes),
       DiagLevel(DiagLevel), BuildDirectory(BuildDirectory) {}
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/AtomicChange.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/AtomicChange.cpp
index 321bbfa2866ae..79dd346acf72f 100644
--- a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/AtomicChange.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/AtomicChange.cpp
@@ -12,7 +12,6 @@
 #include "llvm/Support/YAMLTraits.h"
 #include 
 
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::tooling::AtomicChange)
 
 namespace {
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/CMakeLists.txt b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/CMakeLists.txt
index b2f9b4f4c0cdd..288582fc1b6bd 100644
--- a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/CMakeLists.txt
@@ -5,8 +5,16 @@ set(LLVM_LINK_COMPONENTS
 
 add_clang_library(clangToolingRefactor
   AtomicChange.cpp
+  Rename/RenamingAction.cpp
+  Rename/USRFinder.cpp
+  Rename/USRFindingAction.cpp
+  Rename/USRLocFinder.cpp
 
   LINK_LIBS
+  clangAST
+  clangASTMatchers
   clangBasic
+  clangIndex
+  clangLex
   clangToolingCore
   )
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp
new file mode 100644
index 0000000000000..de6aba944a4aa
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp
@@ -0,0 +1,134 @@
+//===--- RenamingAction.cpp - Clang refactoring library -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides an action to rename every symbol at a point.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Refactoring/Rename/RenamingAction.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Refactoring.h"
+#include "clang/Tooling/Refactoring/Rename/USRLocFinder.h"
+#include "clang/Tooling/Tooling.h"
+#include 
+#include 
+
+using namespace llvm;
+
+namespace clang {
+namespace tooling {
+
+class RenamingASTConsumer : public ASTConsumer {
+public:
+  RenamingASTConsumer(
+      const std::vector &NewNames,
+      const std::vector &PrevNames,
+      const std::vector> &USRList,
+      std::map &FileToReplaces,
+      bool PrintLocations)
+      : NewNames(NewNames), PrevNames(PrevNames), USRList(USRList),
+        FileToReplaces(FileToReplaces), PrintLocations(PrintLocations) {}
+
+  void HandleTranslationUnit(ASTContext &Context) override {
+    for (unsigned I = 0; I < NewNames.size(); ++I)
+      HandleOneRename(Context, NewNames[I], PrevNames[I], USRList[I]);
+  }
+
+  void HandleOneRename(ASTContext &Context, const std::string &NewName,
+                       const std::string &PrevName,
+                       const std::vector &USRs) {
+    const SourceManager &SourceMgr = Context.getSourceManager();
+    std::vector RenamingCandidates;
+    std::vector NewCandidates;
+
+    NewCandidates = tooling::getLocationsOfUSRs(
+        USRs, PrevName, Context.getTranslationUnitDecl());
+    RenamingCandidates.insert(RenamingCandidates.end(), NewCandidates.begin(),
+                              NewCandidates.end());
+
+    unsigned PrevNameLen = PrevName.length();
+    for (const auto &Loc : RenamingCandidates) {
+      if (PrintLocations) {
+        FullSourceLoc FullLoc(Loc, SourceMgr);
+        errs() << "clang-rename: renamed at: " << SourceMgr.getFilename(Loc)
+               << ":" << FullLoc.getSpellingLineNumber() << ":"
+               << FullLoc.getSpellingColumnNumber() << "\n";
+      }
+      // FIXME: better error handling.
+      tooling::Replacement Replace(SourceMgr, Loc, PrevNameLen, NewName);
+      llvm::Error Err = FileToReplaces[Replace.getFilePath()].add(Replace);
+      if (Err)
+        llvm::errs() << "Renaming failed in " << Replace.getFilePath() << "! "
+                     << llvm::toString(std::move(Err)) << "\n";
+    }
+  }
+
+private:
+  const std::vector &NewNames, &PrevNames;
+  const std::vector> &USRList;
+  std::map &FileToReplaces;
+  bool PrintLocations;
+};
+
+// A renamer to rename symbols which are identified by a give USRList to
+// new name.
+//
+// FIXME: Merge with the above RenamingASTConsumer.
+class USRSymbolRenamer : public ASTConsumer {
+public:
+  USRSymbolRenamer(const std::vector &NewNames,
+                   const std::vector> &USRList,
+                   std::map &FileToReplaces)
+      : NewNames(NewNames), USRList(USRList), FileToReplaces(FileToReplaces) {
+    assert(USRList.size() == NewNames.size());
+  }
+
+  void HandleTranslationUnit(ASTContext &Context) override {
+    for (unsigned I = 0; I < NewNames.size(); ++I) {
+      // FIXME: Apply AtomicChanges directly once the refactoring APIs are
+      // ready.
+      auto AtomicChanges = tooling::createRenameAtomicChanges(
+          USRList[I], NewNames[I], Context.getTranslationUnitDecl());
+      for (const auto AtomicChange : AtomicChanges) {
+        for (const auto &Replace : AtomicChange.getReplacements()) {
+          llvm::Error Err = FileToReplaces[Replace.getFilePath()].add(Replace);
+          if (Err) {
+            llvm::errs() << "Renaming failed in " << Replace.getFilePath()
+                         << "! " << llvm::toString(std::move(Err)) << "\n";
+          }
+        }
+      }
+    }
+  }
+
+private:
+  const std::vector &NewNames;
+  const std::vector> &USRList;
+  std::map &FileToReplaces;
+};
+
+std::unique_ptr RenamingAction::newASTConsumer() {
+  return llvm::make_unique(NewNames, PrevNames, USRList,
+                                                FileToReplaces, PrintLocations);
+}
+
+std::unique_ptr QualifiedRenamingAction::newASTConsumer() {
+  return llvm::make_unique(NewNames, USRList, FileToReplaces);
+}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp
new file mode 100644
index 0000000000000..3bfb5bbe35e4a
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp
@@ -0,0 +1,146 @@
+//===--- USRFinder.cpp - Clang refactoring library ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file Implements a recursive AST visitor that finds the USR of a symbol at a
+/// point.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Refactoring/Rename/USRFinder.h"
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Index/USRGeneration.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Refactoring/RecursiveSymbolVisitor.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace llvm;
+
+namespace clang {
+namespace tooling {
+
+namespace {
+
+/// Recursively visits each AST node to find the symbol underneath the cursor.
+class NamedDeclOccurrenceFindingVisitor
+    : public RecursiveSymbolVisitor {
+public:
+  // \brief Finds the NamedDecl at a point in the source.
+  // \param Point the location in the source to search for the NamedDecl.
+  explicit NamedDeclOccurrenceFindingVisitor(const SourceLocation Point,
+                                             const ASTContext &Context)
+      : RecursiveSymbolVisitor(Context.getSourceManager(),
+                               Context.getLangOpts()),
+        Point(Point), Context(Context) {}
+
+  bool visitSymbolOccurrence(const NamedDecl *ND,
+                             ArrayRef NameRanges) {
+    if (!ND)
+      return true;
+    for (const auto &Range : NameRanges) {
+      SourceLocation Start = Range.getBegin();
+      SourceLocation End = Range.getEnd();
+      if (!Start.isValid() || !Start.isFileID() || !End.isValid() ||
+          !End.isFileID() || !isPointWithin(Start, End))
+        return true;
+    }
+    Result = ND;
+    return false;
+  }
+
+  const NamedDecl *getNamedDecl() const { return Result; }
+
+private:
+  // \brief Determines if the Point is within Start and End.
+  bool isPointWithin(const SourceLocation Start, const SourceLocation End) {
+    // FIXME: Add tests for Point == End.
+    return Point == Start || Point == End ||
+           (Context.getSourceManager().isBeforeInTranslationUnit(Start,
+                                                                 Point) &&
+            Context.getSourceManager().isBeforeInTranslationUnit(Point, End));
+  }
+
+  const NamedDecl *Result = nullptr;
+  const SourceLocation Point; // The location to find the NamedDecl.
+  const ASTContext &Context;
+};
+
+} // end anonymous namespace
+
+const NamedDecl *getNamedDeclAt(const ASTContext &Context,
+                                const SourceLocation Point) {
+  const SourceManager &SM = Context.getSourceManager();
+  NamedDeclOccurrenceFindingVisitor Visitor(Point, Context);
+
+  // Try to be clever about pruning down the number of top-level declarations we
+  // see. If both start and end is either before or after the point we're
+  // looking for the point cannot be inside of this decl. Don't even look at it.
+  for (auto *CurrDecl : Context.getTranslationUnitDecl()->decls()) {
+    SourceLocation StartLoc = CurrDecl->getLocStart();
+    SourceLocation EndLoc = CurrDecl->getLocEnd();
+    if (StartLoc.isValid() && EndLoc.isValid() &&
+        SM.isBeforeInTranslationUnit(StartLoc, Point) !=
+            SM.isBeforeInTranslationUnit(EndLoc, Point))
+      Visitor.TraverseDecl(CurrDecl);
+  }
+
+  return Visitor.getNamedDecl();
+}
+
+namespace {
+
+/// Recursively visits each NamedDecl node to find the declaration with a
+/// specific name.
+class NamedDeclFindingVisitor
+    : public RecursiveASTVisitor {
+public:
+  explicit NamedDeclFindingVisitor(StringRef Name) : Name(Name) {}
+
+  // We don't have to traverse the uses to find some declaration with a
+  // specific name, so just visit the named declarations.
+  bool VisitNamedDecl(const NamedDecl *ND) {
+    if (!ND)
+      return true;
+    // Fully qualified name is used to find the declaration.
+    if (Name != ND->getQualifiedNameAsString() &&
+        Name != "::" + ND->getQualifiedNameAsString())
+      return true;
+    Result = ND;
+    return false;
+  }
+
+  const NamedDecl *getNamedDecl() const { return Result; }
+
+private:
+  const NamedDecl *Result = nullptr;
+  StringRef Name;
+};
+
+} // end anonymous namespace
+
+const NamedDecl *getNamedDeclFor(const ASTContext &Context,
+                                 const std::string &Name) {
+  NamedDeclFindingVisitor Visitor(Name);
+  Visitor.TraverseDecl(Context.getTranslationUnitDecl());
+  return Visitor.getNamedDecl();
+}
+
+std::string getUSRForDecl(const Decl *Decl) {
+  llvm::SmallVector Buff;
+
+  // FIXME: Add test for the nullptr case.
+  if (Decl == nullptr || index::generateUSRForDecl(Decl, Buff))
+    return "";
+
+  return std::string(Buff.data(), Buff.size());
+}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp
new file mode 100644
index 0000000000000..2769802ad2bc6
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp
@@ -0,0 +1,236 @@
+//===--- USRFindingAction.cpp - Clang refactoring library -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides an action to find USR for the symbol at , as well as
+/// all additional USRs.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Refactoring.h"
+#include "clang/Tooling/Refactoring/Rename/USRFinder.h"
+#include "clang/Tooling/Tooling.h"
+
+#include 
+#include 
+#include 
+#include 
+
+using namespace llvm;
+
+namespace clang {
+namespace tooling {
+
+namespace {
+// \brief NamedDeclFindingConsumer should delegate finding USRs of given Decl to
+// AdditionalUSRFinder. AdditionalUSRFinder adds USRs of ctor and dtor if given
+// Decl refers to class and adds USRs of all overridden methods if Decl refers
+// to virtual method.
+class AdditionalUSRFinder : public RecursiveASTVisitor {
+public:
+  AdditionalUSRFinder(const Decl *FoundDecl, ASTContext &Context)
+      : FoundDecl(FoundDecl), Context(Context) {}
+
+  std::vector Find() {
+    // Fill OverriddenMethods and PartialSpecs storages.
+    TraverseDecl(Context.getTranslationUnitDecl());
+    if (const auto *MethodDecl = dyn_cast(FoundDecl)) {
+      addUSRsOfOverridenFunctions(MethodDecl);
+      for (const auto &OverriddenMethod : OverriddenMethods) {
+        if (checkIfOverriddenFunctionAscends(OverriddenMethod))
+          USRSet.insert(getUSRForDecl(OverriddenMethod));
+      }
+    } else if (const auto *RecordDecl = dyn_cast(FoundDecl)) {
+      handleCXXRecordDecl(RecordDecl);
+    } else if (const auto *TemplateDecl =
+                   dyn_cast(FoundDecl)) {
+      handleClassTemplateDecl(TemplateDecl);
+    } else {
+      USRSet.insert(getUSRForDecl(FoundDecl));
+    }
+    return std::vector(USRSet.begin(), USRSet.end());
+  }
+
+  bool VisitCXXMethodDecl(const CXXMethodDecl *MethodDecl) {
+    if (MethodDecl->isVirtual())
+      OverriddenMethods.push_back(MethodDecl);
+    return true;
+  }
+
+  bool VisitClassTemplatePartialSpecializationDecl(
+      const ClassTemplatePartialSpecializationDecl *PartialSpec) {
+    PartialSpecs.push_back(PartialSpec);
+    return true;
+  }
+
+private:
+  void handleCXXRecordDecl(const CXXRecordDecl *RecordDecl) {
+    RecordDecl = RecordDecl->getDefinition();
+    if (const auto *ClassTemplateSpecDecl =
+            dyn_cast(RecordDecl))
+      handleClassTemplateDecl(ClassTemplateSpecDecl->getSpecializedTemplate());
+    addUSRsOfCtorDtors(RecordDecl);
+  }
+
+  void handleClassTemplateDecl(const ClassTemplateDecl *TemplateDecl) {
+    for (const auto *Specialization : TemplateDecl->specializations())
+      addUSRsOfCtorDtors(Specialization);
+
+    for (const auto *PartialSpec : PartialSpecs) {
+      if (PartialSpec->getSpecializedTemplate() == TemplateDecl)
+        addUSRsOfCtorDtors(PartialSpec);
+    }
+    addUSRsOfCtorDtors(TemplateDecl->getTemplatedDecl());
+  }
+
+  void addUSRsOfCtorDtors(const CXXRecordDecl *RecordDecl) {
+    RecordDecl = RecordDecl->getDefinition();
+
+    // Skip if the CXXRecordDecl doesn't have definition.
+    if (!RecordDecl)
+      return;
+
+    for (const auto *CtorDecl : RecordDecl->ctors())
+      USRSet.insert(getUSRForDecl(CtorDecl));
+
+    USRSet.insert(getUSRForDecl(RecordDecl->getDestructor()));
+    USRSet.insert(getUSRForDecl(RecordDecl));
+  }
+
+  void addUSRsOfOverridenFunctions(const CXXMethodDecl *MethodDecl) {
+    USRSet.insert(getUSRForDecl(MethodDecl));
+    // Recursively visit each OverridenMethod.
+    for (const auto &OverriddenMethod : MethodDecl->overridden_methods())
+      addUSRsOfOverridenFunctions(OverriddenMethod);
+  }
+
+  bool checkIfOverriddenFunctionAscends(const CXXMethodDecl *MethodDecl) {
+    for (const auto &OverriddenMethod : MethodDecl->overridden_methods()) {
+      if (USRSet.find(getUSRForDecl(OverriddenMethod)) != USRSet.end())
+        return true;
+      return checkIfOverriddenFunctionAscends(OverriddenMethod);
+    }
+    return false;
+  }
+
+  const Decl *FoundDecl;
+  ASTContext &Context;
+  std::set USRSet;
+  std::vector OverriddenMethods;
+  std::vector PartialSpecs;
+};
+} // namespace
+
+class NamedDeclFindingConsumer : public ASTConsumer {
+public:
+  NamedDeclFindingConsumer(ArrayRef SymbolOffsets,
+                           ArrayRef QualifiedNames,
+                           std::vector &SpellingNames,
+                           std::vector> &USRList,
+                           bool Force, bool &ErrorOccurred)
+      : SymbolOffsets(SymbolOffsets), QualifiedNames(QualifiedNames),
+        SpellingNames(SpellingNames), USRList(USRList), Force(Force),
+        ErrorOccurred(ErrorOccurred) {}
+
+private:
+  bool FindSymbol(ASTContext &Context, const SourceManager &SourceMgr,
+                  unsigned SymbolOffset, const std::string &QualifiedName) {
+    DiagnosticsEngine &Engine = Context.getDiagnostics();
+    const FileID MainFileID = SourceMgr.getMainFileID();
+
+    if (SymbolOffset >= SourceMgr.getFileIDSize(MainFileID)) {
+      ErrorOccurred = true;
+      unsigned InvalidOffset = Engine.getCustomDiagID(
+          DiagnosticsEngine::Error,
+          "SourceLocation in file %0 at offset %1 is invalid");
+      Engine.Report(SourceLocation(), InvalidOffset)
+          << SourceMgr.getFileEntryForID(MainFileID)->getName() << SymbolOffset;
+      return false;
+    }
+
+    const SourceLocation Point = SourceMgr.getLocForStartOfFile(MainFileID)
+                                     .getLocWithOffset(SymbolOffset);
+    const NamedDecl *FoundDecl = QualifiedName.empty()
+                                     ? getNamedDeclAt(Context, Point)
+                                     : getNamedDeclFor(Context, QualifiedName);
+
+    if (FoundDecl == nullptr) {
+      if (QualifiedName.empty()) {
+        FullSourceLoc FullLoc(Point, SourceMgr);
+        unsigned CouldNotFindSymbolAt = Engine.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "clang-rename could not find symbol (offset %0)");
+        Engine.Report(Point, CouldNotFindSymbolAt) << SymbolOffset;
+        ErrorOccurred = true;
+        return false;
+      }
+
+      if (Force)
+        return true;
+
+      unsigned CouldNotFindSymbolNamed = Engine.getCustomDiagID(
+          DiagnosticsEngine::Error, "clang-rename could not find symbol %0");
+      Engine.Report(CouldNotFindSymbolNamed) << QualifiedName;
+      ErrorOccurred = true;
+      return false;
+    }
+
+    // If FoundDecl is a constructor or destructor, we want to instead take
+    // the Decl of the corresponding class.
+    if (const auto *CtorDecl = dyn_cast(FoundDecl))
+      FoundDecl = CtorDecl->getParent();
+    else if (const auto *DtorDecl = dyn_cast(FoundDecl))
+      FoundDecl = DtorDecl->getParent();
+
+    SpellingNames.push_back(FoundDecl->getNameAsString());
+    AdditionalUSRFinder Finder(FoundDecl, Context);
+    USRList.push_back(Finder.Find());
+    return true;
+  }
+
+  void HandleTranslationUnit(ASTContext &Context) override {
+    const SourceManager &SourceMgr = Context.getSourceManager();
+    for (unsigned Offset : SymbolOffsets) {
+      if (!FindSymbol(Context, SourceMgr, Offset, ""))
+        return;
+    }
+    for (const std::string &QualifiedName : QualifiedNames) {
+      if (!FindSymbol(Context, SourceMgr, 0, QualifiedName))
+        return;
+    }
+  }
+
+  ArrayRef SymbolOffsets;
+  ArrayRef QualifiedNames;
+  std::vector &SpellingNames;
+  std::vector> &USRList;
+  bool Force;
+  bool &ErrorOccurred;
+};
+
+std::unique_ptr USRFindingAction::newASTConsumer() {
+  return llvm::make_unique(
+      SymbolOffsets, QualifiedNames, SpellingNames, USRList, Force,
+      ErrorOccurred);
+}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
new file mode 100644
index 0000000000000..dc21a94610cbb
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
@@ -0,0 +1,451 @@
+//===--- USRLocFinder.cpp - Clang refactoring library ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Methods for finding all instances of a USR. Our strategy is very
+/// simple; we just compare the USR at every relevant AST node with the one
+/// provided.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Refactoring/Rename/USRLocFinder.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Core/Lookup.h"
+#include "clang/Tooling/Refactoring/RecursiveSymbolVisitor.h"
+#include "clang/Tooling/Refactoring/Rename/USRFinder.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include 
+#include 
+#include 
+#include 
+
+using namespace llvm;
+
+namespace clang {
+namespace tooling {
+
+namespace {
+
+// \brief This visitor recursively searches for all instances of a USR in a
+// translation unit and stores them for later usage.
+class USRLocFindingASTVisitor
+    : public RecursiveSymbolVisitor {
+public:
+  explicit USRLocFindingASTVisitor(const std::vector &USRs,
+                                   StringRef PrevName,
+                                   const ASTContext &Context)
+      : RecursiveSymbolVisitor(Context.getSourceManager(),
+                               Context.getLangOpts()),
+        USRSet(USRs.begin(), USRs.end()), PrevName(PrevName), Context(Context) {
+  }
+
+  bool visitSymbolOccurrence(const NamedDecl *ND,
+                             ArrayRef NameRanges) {
+    if (USRSet.find(getUSRForDecl(ND)) != USRSet.end()) {
+      assert(NameRanges.size() == 1 &&
+             "Multiple name pieces are not supported yet!");
+      SourceLocation Loc = NameRanges[0].getBegin();
+      const SourceManager &SM = Context.getSourceManager();
+      // TODO: Deal with macro occurrences correctly.
+      if (Loc.isMacroID())
+        Loc = SM.getSpellingLoc(Loc);
+      checkAndAddLocation(Loc);
+    }
+    return true;
+  }
+
+  // Non-visitors:
+
+  // \brief Returns a list of unique locations. Duplicate or overlapping
+  // locations are erroneous and should be reported!
+  const std::vector &getLocationsFound() const {
+    return LocationsFound;
+  }
+
+private:
+  void checkAndAddLocation(SourceLocation Loc) {
+    const SourceLocation BeginLoc = Loc;
+    const SourceLocation EndLoc = Lexer::getLocForEndOfToken(
+        BeginLoc, 0, Context.getSourceManager(), Context.getLangOpts());
+    StringRef TokenName =
+        Lexer::getSourceText(CharSourceRange::getTokenRange(BeginLoc, EndLoc),
+                             Context.getSourceManager(), Context.getLangOpts());
+    size_t Offset = TokenName.find(PrevName);
+
+    // The token of the source location we find actually has the old
+    // name.
+    if (Offset != StringRef::npos)
+      LocationsFound.push_back(BeginLoc.getLocWithOffset(Offset));
+  }
+
+  const std::set USRSet;
+  const std::string PrevName;
+  std::vector LocationsFound;
+  const ASTContext &Context;
+};
+
+SourceLocation StartLocationForType(TypeLoc TL) {
+  // For elaborated types (e.g. `struct a::A`) we want the portion after the
+  // `struct` but including the namespace qualifier, `a::`.
+  if (auto ElaboratedTypeLoc = TL.getAs()) {
+    NestedNameSpecifierLoc NestedNameSpecifier =
+        ElaboratedTypeLoc.getQualifierLoc();
+    if (NestedNameSpecifier.getNestedNameSpecifier())
+      return NestedNameSpecifier.getBeginLoc();
+    TL = TL.getNextTypeLoc();
+  }
+  return TL.getLocStart();
+}
+
+SourceLocation EndLocationForType(TypeLoc TL) {
+  // Dig past any namespace or keyword qualifications.
+  while (TL.getTypeLocClass() == TypeLoc::Elaborated ||
+         TL.getTypeLocClass() == TypeLoc::Qualified)
+    TL = TL.getNextTypeLoc();
+
+  // The location for template specializations (e.g. Foo) includes the
+  // templated types in its location range.  We want to restrict this to just
+  // before the `<` character.
+  if (TL.getTypeLocClass() == TypeLoc::TemplateSpecialization) {
+    return TL.castAs()
+        .getLAngleLoc()
+        .getLocWithOffset(-1);
+  }
+  return TL.getEndLoc();
+}
+
+NestedNameSpecifier *GetNestedNameForType(TypeLoc TL) {
+  // Dig past any keyword qualifications.
+  while (TL.getTypeLocClass() == TypeLoc::Qualified)
+    TL = TL.getNextTypeLoc();
+
+  // For elaborated types (e.g. `struct a::A`) we want the portion after the
+  // `struct` but including the namespace qualifier, `a::`.
+  if (auto ElaboratedTypeLoc = TL.getAs())
+    return ElaboratedTypeLoc.getQualifierLoc().getNestedNameSpecifier();
+  return nullptr;
+}
+
+// Find all locations identified by the given USRs for rename.
+//
+// This class will traverse the AST and find every AST node whose USR is in the
+// given USRs' set.
+class RenameLocFinder : public RecursiveASTVisitor {
+public:
+  RenameLocFinder(llvm::ArrayRef USRs, ASTContext &Context)
+      : USRSet(USRs.begin(), USRs.end()), Context(Context) {}
+
+  // A structure records all information of a symbol reference being renamed.
+  // We try to add as few prefix qualifiers as possible.
+  struct RenameInfo {
+    // The begin location of a symbol being renamed.
+    SourceLocation Begin;
+    // The end location of a symbol being renamed.
+    SourceLocation End;
+    // The declaration of a symbol being renamed (can be nullptr).
+    const NamedDecl *FromDecl;
+    // The declaration in which the nested name is contained (can be nullptr).
+    const Decl *Context;
+    // The nested name being replaced (can be nullptr).
+    const NestedNameSpecifier *Specifier;
+  };
+
+  // FIXME: Currently, prefix qualifiers will be added to the renamed symbol
+  // definition (e.g. "class Foo {};" => "class b::Bar {};" when renaming
+  // "a::Foo" to "b::Bar").
+  // For renaming declarations/definitions, prefix qualifiers should be filtered
+  // out.
+  bool VisitNamedDecl(const NamedDecl *Decl) {
+    // UsingDecl has been handled in other place.
+    if (llvm::isa(Decl))
+      return true;
+
+    // DestructorDecl has been handled in Typeloc.
+    if (llvm::isa(Decl))
+      return true;
+
+    if (Decl->isImplicit())
+      return true;
+
+    if (isInUSRSet(Decl)) {
+      RenameInfo Info = {Decl->getLocation(), Decl->getLocation(), nullptr,
+                         nullptr, nullptr};
+      RenameInfos.push_back(Info);
+    }
+    return true;
+  }
+
+  bool VisitDeclRefExpr(const DeclRefExpr *Expr) {
+    const NamedDecl *Decl = Expr->getFoundDecl();
+    if (isInUSRSet(Decl)) {
+      RenameInfo Info = {Expr->getSourceRange().getBegin(),
+                         Expr->getSourceRange().getEnd(), Decl,
+                         getClosestAncestorDecl(*Expr), Expr->getQualifier()};
+      RenameInfos.push_back(Info);
+    }
+
+    return true;
+  }
+
+  bool VisitUsingDecl(const UsingDecl *Using) {
+    for (const auto *UsingShadow : Using->shadows()) {
+      if (isInUSRSet(UsingShadow->getTargetDecl())) {
+        UsingDecls.push_back(Using);
+        break;
+      }
+    }
+    return true;
+  }
+
+  bool VisitNestedNameSpecifierLocations(NestedNameSpecifierLoc NestedLoc) {
+    if (!NestedLoc.getNestedNameSpecifier()->getAsType())
+      return true;
+    if (IsTypeAliasWhichWillBeRenamedElsewhere(NestedLoc.getTypeLoc()))
+      return true;
+
+    if (const auto *TargetDecl =
+            getSupportedDeclFromTypeLoc(NestedLoc.getTypeLoc())) {
+      if (isInUSRSet(TargetDecl)) {
+        RenameInfo Info = {NestedLoc.getBeginLoc(),
+                           EndLocationForType(NestedLoc.getTypeLoc()),
+                           TargetDecl, getClosestAncestorDecl(NestedLoc),
+                           NestedLoc.getNestedNameSpecifier()->getPrefix()};
+        RenameInfos.push_back(Info);
+      }
+    }
+    return true;
+  }
+
+  bool VisitTypeLoc(TypeLoc Loc) {
+    if (IsTypeAliasWhichWillBeRenamedElsewhere(Loc))
+      return true;
+
+    auto Parents = Context.getParents(Loc);
+    TypeLoc ParentTypeLoc;
+    if (!Parents.empty()) {
+      // Handle cases of nested name specificier locations.
+      //
+      // The VisitNestedNameSpecifierLoc interface is not impelmented in
+      // RecursiveASTVisitor, we have to handle it explicitly.
+      if (const auto *NSL = Parents[0].get()) {
+        VisitNestedNameSpecifierLocations(*NSL);
+        return true;
+      }
+
+      if (const auto *TL = Parents[0].get())
+        ParentTypeLoc = *TL;
+    }
+
+    // Handle the outermost TypeLoc which is directly linked to the interesting
+    // declaration and don't handle nested name specifier locations.
+    if (const auto *TargetDecl = getSupportedDeclFromTypeLoc(Loc)) {
+      if (isInUSRSet(TargetDecl)) {
+        // Only handle the outermost typeLoc.
+        //
+        // For a type like "a::Foo", there will be two typeLocs for it.
+        // One ElaboratedType, the other is RecordType:
+        //
+        //   ElaboratedType 0x33b9390 'a::Foo' sugar
+        //   `-RecordType 0x338fef0 'class a::Foo'
+        //     `-CXXRecord 0x338fe58 'Foo'
+        //
+        // Skip if this is an inner typeLoc.
+        if (!ParentTypeLoc.isNull() &&
+            isInUSRSet(getSupportedDeclFromTypeLoc(ParentTypeLoc)))
+          return true;
+        RenameInfo Info = {StartLocationForType(Loc), EndLocationForType(Loc),
+                           TargetDecl, getClosestAncestorDecl(Loc),
+                           GetNestedNameForType(Loc)};
+        RenameInfos.push_back(Info);
+        return true;
+      }
+    }
+
+    // Handle specific template class specialiation cases.
+    if (const auto *TemplateSpecType =
+            dyn_cast(Loc.getType())) {
+      TypeLoc TargetLoc = Loc;
+      if (!ParentTypeLoc.isNull()) {
+        if (llvm::isa(ParentTypeLoc.getType()))
+          TargetLoc = ParentTypeLoc;
+      }
+
+      if (isInUSRSet(TemplateSpecType->getTemplateName().getAsTemplateDecl())) {
+        TypeLoc TargetLoc = Loc;
+        // FIXME: Find a better way to handle this case.
+        // For the qualified template class specification type like
+        // "ns::Foo" in "ns::Foo& f();", we want the parent typeLoc
+        // (ElaboratedType) of the TemplateSpecializationType in order to
+        // catch the prefix qualifiers "ns::".
+        if (!ParentTypeLoc.isNull() &&
+            llvm::isa(ParentTypeLoc.getType()))
+          TargetLoc = ParentTypeLoc;
+        RenameInfo Info = {
+            StartLocationForType(TargetLoc), EndLocationForType(TargetLoc),
+            TemplateSpecType->getTemplateName().getAsTemplateDecl(),
+            getClosestAncestorDecl(
+                ast_type_traits::DynTypedNode::create(TargetLoc)),
+            GetNestedNameForType(TargetLoc)};
+        RenameInfos.push_back(Info);
+      }
+    }
+    return true;
+  }
+
+  // Returns a list of RenameInfo.
+  const std::vector &getRenameInfos() const { return RenameInfos; }
+
+  // Returns a list of using declarations which are needed to update.
+  const std::vector &getUsingDecls() const {
+    return UsingDecls;
+  }
+
+private:
+  // FIXME: This method may not be suitable for renaming other types like alias
+  // types. Need to figure out a way to handle it.
+  bool IsTypeAliasWhichWillBeRenamedElsewhere(TypeLoc TL) const {
+    while (!TL.isNull()) {
+      // SubstTemplateTypeParm is the TypeLocation class for a substituted type
+      // inside a template expansion so we ignore these.  For example:
+      //
+      // template struct S {
+      //   T t;  // <-- this T becomes a TypeLoc(int) with class
+      //         //     SubstTemplateTypeParm when S is instantiated
+      // }
+      if (TL.getTypeLocClass() == TypeLoc::SubstTemplateTypeParm)
+        return true;
+
+      // Typedef is the TypeLocation class for a type which is a typedef to the
+      // type we want to replace.  We ignore the use of the typedef as we will
+      // replace the definition of it.  For example:
+      //
+      // typedef int T;
+      // T a;  // <---  This T is a TypeLoc(int) with class Typedef.
+      if (TL.getTypeLocClass() == TypeLoc::Typedef)
+        return true;
+      TL = TL.getNextTypeLoc();
+    }
+    return false;
+  }
+
+  // Get the supported declaration from a given typeLoc. If the declaration type
+  // is not supported, returns nullptr.
+  //
+  // FIXME: support more types, e.g. enum, type alias.
+  const NamedDecl *getSupportedDeclFromTypeLoc(TypeLoc Loc) {
+    if (const auto *RD = Loc.getType()->getAsCXXRecordDecl())
+      return RD;
+    return nullptr;
+  }
+
+  // Get the closest ancester which is a declaration of a given AST node.
+  template 
+  const Decl *getClosestAncestorDecl(const ASTNodeType &Node) {
+    auto Parents = Context.getParents(Node);
+    // FIXME: figure out how to handle it when there are multiple parents.
+    if (Parents.size() != 1)
+      return nullptr;
+    if (ast_type_traits::ASTNodeKind::getFromNodeKind().isBaseOf(
+            Parents[0].getNodeKind()))
+      return Parents[0].template get();
+    return getClosestAncestorDecl(Parents[0]);
+  }
+
+  // Get the parent typeLoc of a given typeLoc. If there is no such parent,
+  // return nullptr.
+  const TypeLoc *getParentTypeLoc(TypeLoc Loc) const {
+    auto Parents = Context.getParents(Loc);
+    // FIXME: figure out how to handle it when there are multiple parents.
+    if (Parents.size() != 1)
+      return nullptr;
+    return Parents[0].get();
+  }
+
+  // Check whether the USR of a given Decl is in the USRSet.
+  bool isInUSRSet(const Decl *Decl) const {
+    auto USR = getUSRForDecl(Decl);
+    if (USR.empty())
+      return false;
+    return llvm::is_contained(USRSet, USR);
+  }
+
+  const std::set USRSet;
+  ASTContext &Context;
+  std::vector RenameInfos;
+  // Record all interested using declarations which contains the using-shadow
+  // declarations of the symbol declarations being renamed.
+  std::vector UsingDecls;
+};
+
+} // namespace
+
+std::vector
+getLocationsOfUSRs(const std::vector &USRs, StringRef PrevName,
+                   Decl *Decl) {
+  USRLocFindingASTVisitor Visitor(USRs, PrevName, Decl->getASTContext());
+  Visitor.TraverseDecl(Decl);
+  return Visitor.getLocationsFound();
+}
+
+std::vector
+createRenameAtomicChanges(llvm::ArrayRef USRs,
+                          llvm::StringRef NewName, Decl *TranslationUnitDecl) {
+  RenameLocFinder Finder(USRs, TranslationUnitDecl->getASTContext());
+  Finder.TraverseDecl(TranslationUnitDecl);
+
+  const SourceManager &SM =
+      TranslationUnitDecl->getASTContext().getSourceManager();
+
+  std::vector AtomicChanges;
+  auto Replace = [&](SourceLocation Start, SourceLocation End,
+                     llvm::StringRef Text) {
+    tooling::AtomicChange ReplaceChange = tooling::AtomicChange(SM, Start);
+    llvm::Error Err = ReplaceChange.replace(
+        SM, CharSourceRange::getTokenRange(Start, End), Text);
+    if (Err) {
+      llvm::errs() << "Faile to add replacement to AtomicChange: "
+                   << llvm::toString(std::move(Err)) << "\n";
+      return;
+    }
+    AtomicChanges.push_back(std::move(ReplaceChange));
+  };
+
+  for (const auto &RenameInfo : Finder.getRenameInfos()) {
+    std::string ReplacedName = NewName.str();
+    if (RenameInfo.FromDecl && RenameInfo.Context) {
+      if (!llvm::isa(
+              RenameInfo.Context->getDeclContext())) {
+        ReplacedName = tooling::replaceNestedName(
+            RenameInfo.Specifier, RenameInfo.Context->getDeclContext(),
+            RenameInfo.FromDecl,
+            NewName.startswith("::") ? NewName.str() : ("::" + NewName).str());
+      }
+    }
+    // If the NewName contains leading "::", add it back.
+    if (NewName.startswith("::") && NewName.substr(2) == ReplacedName)
+      ReplacedName = NewName.str();
+    Replace(RenameInfo.Begin, RenameInfo.End, ReplacedName);
+  }
+
+  // Hanlde using declarations explicitly as "using a::Foo" don't trigger
+  // typeLoc for "a::Foo".
+  for (const auto *Using : Finder.getUsingDecls())
+    Replace(Using->getLocStart(), Using->getLocEnd(), "using " + NewName.str());
+
+  return AtomicChanges;
+}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/interpreter/llvm/src/tools/clang/lib/Tooling/Tooling.cpp b/interpreter/llvm/src/tools/clang/lib/Tooling/Tooling.cpp
index 9e1181281f139..662f02dca2a66 100644
--- a/interpreter/llvm/src/tools/clang/lib/Tooling/Tooling.cpp
+++ b/interpreter/llvm/src/tools/clang/lib/Tooling/Tooling.cpp
@@ -100,7 +100,6 @@ clang::CompilerInvocation *newInvocation(
       *Diagnostics);
   Invocation->getFrontendOpts().DisableFree = false;
   Invocation->getCodeGenOpts().DisableFree = false;
-  Invocation->getDependencyOutputOpts() = DependencyOutputOptions();
   return Invocation;
 }
 
@@ -140,9 +139,11 @@ bool runToolOnCodeWithArgs(
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
-  ToolInvocation Invocation(getSyntaxOnlyToolArgs(ToolName, Args, FileNameRef),
-                            ToolAction, Files.get(),
-                            std::move(PCHContainerOps));
+  ArgumentsAdjuster Adjuster = getClangStripDependencyFileAdjuster();
+  ToolInvocation Invocation(
+      getSyntaxOnlyToolArgs(ToolName, Adjuster(Args, FileNameRef), FileNameRef),
+      ToolAction, Files.get(),
+      std::move(PCHContainerOps));
 
   SmallString<1024> CodeStorage;
   InMemoryFileSystem->addFile(FileNameRef, 0,
@@ -260,6 +261,8 @@ bool ToolInvocation::run() {
   Driver->setCheckInputsExist(false);
   const std::unique_ptr Compilation(
       Driver->BuildCompilation(llvm::makeArrayRef(Argv)));
+  if (!Compilation)
+    return false;
   const llvm::opt::ArgStringList *const CC1Args = getCC1Arguments(
       &Diagnostics, Compilation.get());
   if (!CC1Args) {
@@ -333,6 +336,7 @@ ClangTool::ClangTool(const CompilationDatabase &Compilations,
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   appendArgumentsAdjuster(getClangStripOutputAdjuster());
   appendArgumentsAdjuster(getClangSyntaxOnlyAdjuster());
+  appendArgumentsAdjuster(getClangStripDependencyFileAdjuster());
 }
 
 ClangTool::~ClangTool() {}
@@ -508,7 +512,8 @@ buildASTFromCode(const Twine &Code, const Twine &FileName,
 std::unique_ptr buildASTFromCodeWithArgs(
     const Twine &Code, const std::vector &Args,
     const Twine &FileName, const Twine &ToolName,
-    std::shared_ptr PCHContainerOps) {
+    std::shared_ptr PCHContainerOps,
+    ArgumentsAdjuster Adjuster) {
   SmallString<16> FileNameStorage;
   StringRef FileNameRef = FileName.toNullTerminatedStringRef(FileNameStorage);
 
@@ -521,8 +526,10 @@ std::unique_ptr buildASTFromCodeWithArgs(
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
-  ToolInvocation Invocation(getSyntaxOnlyToolArgs(ToolName, Args, FileNameRef),
-                            &Action, Files.get(), std::move(PCHContainerOps));
+
+  ToolInvocation Invocation(
+      getSyntaxOnlyToolArgs(ToolName, Adjuster(Args, FileNameRef), FileNameRef),
+      &Action, Files.get(), std::move(PCHContainerOps));
 
   SmallString<1024> CodeStorage;
   InMemoryFileSystem->addFile(FileNameRef, 0,
diff --git a/interpreter/llvm/src/tools/clang/tools/CMakeLists.txt b/interpreter/llvm/src/tools/clang/tools/CMakeLists.txt
index b0c97f0f1e4ca..4976332b7dbc2 100644
--- a/interpreter/llvm/src/tools/clang/tools/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/tools/CMakeLists.txt
@@ -10,6 +10,8 @@ add_clang_subdirectory(clang-offload-bundler)
 
 add_clang_subdirectory(c-index-test)
 
+add_clang_subdirectory(clang-rename)
+
 if(CLANG_ENABLE_ARCMT)
   add_clang_subdirectory(arcmt-test)
   add_clang_subdirectory(c-arcmt-test)
diff --git a/interpreter/llvm/src/tools/clang/tools/c-index-test/c-index-test.c b/interpreter/llvm/src/tools/clang/tools/c-index-test/c-index-test.c
index 1f5d604431978..cf3581e259f7e 100644
--- a/interpreter/llvm/src/tools/clang/tools/c-index-test/c-index-test.c
+++ b/interpreter/llvm/src/tools/clang/tools/c-index-test/c-index-test.c
@@ -804,11 +804,44 @@ static void PrintCursor(CXCursor Cursor, const char *CommentSchemaFile) {
       printf(" (const)");
     if (clang_CXXMethod_isPureVirtual(Cursor))
       printf(" (pure)");
+    if (clang_EnumDecl_isScoped(Cursor))
+      printf(" (scoped)");
     if (clang_Cursor_isVariadic(Cursor))
       printf(" (variadic)");
     if (clang_Cursor_isObjCOptional(Cursor))
       printf(" (@optional)");
 
+    switch (clang_getCursorExceptionSpecificationType(Cursor))
+    {
+      case CXCursor_ExceptionSpecificationKind_None:
+        break;
+
+      case CXCursor_ExceptionSpecificationKind_DynamicNone:
+        printf(" (noexcept dynamic none)");
+        break;
+
+      case CXCursor_ExceptionSpecificationKind_Dynamic:
+        printf(" (noexcept dynamic)");
+        break;
+
+      case CXCursor_ExceptionSpecificationKind_MSAny:
+        printf(" (noexcept dynamic any)");
+        break;
+
+      case CXCursor_ExceptionSpecificationKind_BasicNoexcept:
+        printf(" (noexcept)");
+        break;
+
+      case CXCursor_ExceptionSpecificationKind_ComputedNoexcept:
+        printf(" (computed-noexcept)");
+        break;
+
+      case CXCursor_ExceptionSpecificationKind_Unevaluated:
+      case CXCursor_ExceptionSpecificationKind_Uninstantiated:
+      case CXCursor_ExceptionSpecificationKind_Unparsed:
+        break;
+    }
+
     {
       CXString language;
       CXString definedIn;
@@ -1742,6 +1775,8 @@ int perform_test_load_source(int argc, const char **argv,
       return -1;
 
     if (Repeats > 1) {
+      clang_suspendTranslationUnit(TU);
+
       Err = clang_reparseTranslationUnit(TU, num_unsaved_files, unsaved_files,
                                          clang_defaultReparseOptions(TU));
       if (Err != CXError_Success) {
@@ -1848,6 +1883,34 @@ int perform_test_reparse_source(int argc, const char **argv, int trials,
   return result;
 }
 
+static int perform_single_file_parse(const char *filename) {
+  CXIndex Idx;
+  CXTranslationUnit TU;
+  enum CXErrorCode Err;
+  int result;
+
+  Idx = clang_createIndex(/* excludeDeclsFromPCH */1,
+                          /* displayDiagnostics=*/1);
+
+  Err = clang_parseTranslationUnit2(Idx, filename,
+                                    /*command_line_args=*/NULL,
+                                    /*num_command_line_args=*/0,
+                                    /*unsaved_files=*/NULL,
+                                    /*num_unsaved_files=*/0,
+                                    CXTranslationUnit_SingleFileParse, &TU);
+  if (Err != CXError_Success) {
+    fprintf(stderr, "Unable to load translation unit!\n");
+    describeLibclangFailure(Err);
+    clang_disposeIndex(Idx);
+    return 1;
+  }
+
+  result = perform_test_load(Idx, TU, /*filter=*/"all", /*prefix=*/NULL, FilteredPrintingVisitor, /*PostVisit=*/NULL,
+                             /*CommentSchemaFile=*/NULL);
+  clang_disposeIndex(Idx);
+  return result;
+}
+
 /******************************************************************************/
 /* Logic for testing clang_getCursor().                                       */
 /******************************************************************************/
@@ -4437,6 +4500,8 @@ int cindextest_main(int argc, const char **argv) {
       return perform_test_load_source(argc - 3, argv + 3, argv[2], I,
                                       postVisit);
   }
+  else if (argc >= 3 && strcmp(argv[1], "-single-file-parse") == 0)
+    return perform_single_file_parse(argv[2]);
   else if (argc >= 4 && strcmp(argv[1], "-test-file-scan") == 0)
     return perform_file_scan(argv[2], argv[3],
                              argc >= 5 ? argv[4] : 0);
diff --git a/interpreter/llvm/src/tools/clang/tools/c-index-test/core_main.cpp b/interpreter/llvm/src/tools/clang/tools/c-index-test/core_main.cpp
index 4f2c3cb34a9ba..c255f54ba68c9 100644
--- a/interpreter/llvm/src/tools/clang/tools/c-index-test/core_main.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/c-index-test/core_main.cpp
@@ -217,7 +217,7 @@ static bool printSourceSymbolsFromModule(StringRef modulePath,
   IntrusiveRefCntPtr Diags =
       CompilerInstance::createDiagnostics(new DiagnosticOptions());
   std::unique_ptr AU = ASTUnit::LoadFromASTFile(
-      modulePath, *pchRdr, Diags,
+      modulePath, *pchRdr, ASTUnit::LoadASTOnly, Diags,
       FileSystemOpts, /*UseDebugInfo=*/false,
       /*OnlyLocalDecls=*/true, None,
       /*CaptureDiagnostics=*/false,
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-check/ClangCheck.cpp b/interpreter/llvm/src/tools/clang/tools/clang-check/ClangCheck.cpp
index dac11ff077718..e190c0721afb1 100644
--- a/interpreter/llvm/src/tools/clang/tools/clang-check/ClangCheck.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/clang-check/ClangCheck.cpp
@@ -124,8 +124,7 @@ class FixItRewriter : public clang::FixItRewriter {
 /// \c FixItRewriter.
 class FixItAction : public clang::FixItAction {
 public:
-  bool BeginSourceFileAction(clang::CompilerInstance& CI,
-                             StringRef Filename) override {
+  bool BeginSourceFileAction(clang::CompilerInstance& CI) override {
     FixItOpts.reset(new FixItOptions);
     Rewriter.reset(new FixItRewriter(CI.getDiagnostics(), CI.getSourceManager(),
                                      CI.getLangOpts(), FixItOpts.get()));
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-format/clang-format.py b/interpreter/llvm/src/tools/clang/tools/clang-format/clang-format.py
index ae8a6ebf74e92..2412566346f24 100644
--- a/interpreter/llvm/src/tools/clang/tools/clang-format/clang-format.py
+++ b/interpreter/llvm/src/tools/clang/tools/clang-format/clang-format.py
@@ -63,8 +63,19 @@ def main():
   # Determine range to format.
   if vim.eval('exists("l:lines")') == '1':
     lines = vim.eval('l:lines')
+  elif vim.eval('exists("l:formatdiff")') == '1':
+    with open(vim.current.buffer.name, 'r') as f:
+      ondisk = f.read().splitlines();
+    sequence = difflib.SequenceMatcher(None, ondisk, vim.current.buffer)
+    lines = []
+    for op in reversed(sequence.get_opcodes()):
+      if op[0] not in ['equal', 'delete']:
+        lines += ['-lines', '%s:%s' % (op[3] + 1, op[4])]
+    if lines == []:
+      return
   else:
-    lines = '%s:%s' % (vim.current.range.start + 1, vim.current.range.end + 1)
+    lines = ['-lines', '%s:%s' % (vim.current.range.start + 1,
+                                  vim.current.range.end + 1)]
 
   # Determine the cursor position.
   cursor = int(vim.eval('line2byte(line("."))+col(".")')) - 2
@@ -82,7 +93,7 @@ def main():
   # Call formatter.
   command = [binary, '-style', style, '-cursor', str(cursor)]
   if lines != 'all':
-    command.extend(['-lines', lines])
+    command += lines
   if fallback_style:
     command.extend(['-fallback-style', fallback_style])
   if vim.current.buffer.name:
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-format/git-clang-format b/interpreter/llvm/src/tools/clang/tools/clang-format/git-clang-format
index 3d1ba8a3c1070..60cd4fb25b635 100755
--- a/interpreter/llvm/src/tools/clang/tools/clang-format/git-clang-format
+++ b/interpreter/llvm/src/tools/clang/tools/clang-format/git-clang-format
@@ -20,7 +20,7 @@ clang-format on the changes in current files or a specific commit.
 For further details, run:                                                        
 git clang-format -h                                                              
                                                                                  
-Requires Python 2.7                                                              
+Requires Python 2.7 or Python 3                                                  
 """               
 
 from __future__ import print_function
@@ -258,7 +258,7 @@ def get_object_type(value):
   stdout, stderr = p.communicate()
   if p.returncode != 0:
     return None
-  return stdout.strip()
+  return convert_string(stdout.strip())
 
 
 def compute_diff_and_extract_lines(commits, files):
@@ -301,6 +301,7 @@ def extract_lines(patch_file):
   list of line `Range`s."""
   matches = {}
   for line in patch_file:
+    line = convert_string(line)
     match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
     if match:
       filename = match.group(1).rstrip('\r\n')
@@ -323,6 +324,8 @@ def filter_by_extension(dictionary, allowed_extensions):
   allowed_extensions = frozenset(allowed_extensions)
   for filename in list(dictionary.keys()):
     base_ext = filename.rsplit('.', 1)
+    if len(base_ext) == 1 and '' in allowed_extensions:
+        continue
     if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
       del dictionary[filename]
 
@@ -385,7 +388,7 @@ def create_tree(input_lines, mode):
   with temporary_index_file():
     p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
     for line in input_lines:
-      p.stdin.write('%s\0' % line)
+      p.stdin.write(to_bytes('%s\0' % line))
     p.stdin.close()
     if p.wait() != 0:
       die('`%s` failed' % ' '.join(cmd))
@@ -440,7 +443,7 @@ def clang_format_to_blob(filename, line_ranges, revision=None,
     die('`%s` failed' % ' '.join(clang_format_cmd))
   if git_show and git_show.wait() != 0:
     die('`%s` failed' % ' '.join(git_show_cmd))
-  return stdout.rstrip('\r\n')
+  return convert_string(stdout).rstrip('\r\n')
 
 
 @contextlib.contextmanager
@@ -527,6 +530,10 @@ def run(*args, **kwargs):
   p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                        stdin=subprocess.PIPE)
   stdout, stderr = p.communicate(input=stdin)
+
+  stdout = convert_string(stdout)
+  stderr = convert_string(stderr)
+
   if p.returncode == 0:
     if stderr:
       if verbose:
@@ -547,5 +554,26 @@ def die(message):
   sys.exit(2)
 
 
+def to_bytes(str_input):
+    # Encode to UTF-8 to get binary data.
+    if isinstance(str_input, bytes):
+        return str_input
+    return str_input.encode('utf-8')
+
+
+def to_string(bytes_input):
+    if isinstance(bytes_input, str):
+        return bytes_input
+    return bytes_input.encode('utf-8')
+
+
+def convert_string(bytes_input):
+    try:
+        return to_string(bytes_input.decode('utf-8'))
+    except AttributeError: # 'str' object has no attribute 'decode'.
+        return str(bytes_input)
+    except UnicodeError:
+        return str(bytes_input)
+
 if __name__ == '__main__':
   main()
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/CMakeLists.txt b/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/CMakeLists.txt
index 87d21c6bf116b..a4ea4ca19cdd1 100644
--- a/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/CMakeLists.txt
@@ -1,5 +1,5 @@
 if( LLVM_USE_SANITIZE_COVERAGE )
-  set(LLVM_LINK_COMPONENTS support)
+  set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD})
 
   add_clang_executable(clang-fuzzer
     EXCLUDE_FROM_ALL
@@ -10,6 +10,7 @@ if( LLVM_USE_SANITIZE_COVERAGE )
     ${CLANG_FORMAT_LIB_DEPS}
     clangAST
     clangBasic
+    clangCodeGen
     clangDriver
     clangFrontend
     clangRewriteFrontend
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/ClangFuzzer.cpp b/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/ClangFuzzer.cpp
index 1692882c0b5f1..9eceb843e581c 100644
--- a/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/ClangFuzzer.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/clang-fuzzer/ClangFuzzer.cpp
@@ -14,18 +14,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/Tooling.h"
-#include "clang/Frontend/FrontendActions.h"
+#include "clang/CodeGen/CodeGenAction.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Lex/PreprocessorOptions.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/TargetSelect.h"
 
 using namespace clang;
 
 extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
   std::string s((const char *)data, size);
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllTargetMCs();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+
   llvm::opt::ArgStringList CC1Args;
   CC1Args.push_back("-cc1");
   CC1Args.push_back("./test.cc");
+  CC1Args.push_back("-O2");
   llvm::IntrusiveRefCntPtr Files(
       new FileManager(FileSystemOptions()));
   IgnoringDiagConsumer Diags;
@@ -39,7 +46,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
       llvm::MemoryBuffer::getMemBuffer(s);
   Invocation->getPreprocessorOpts().addRemappedFile("./test.cc", Input.release());
   std::unique_ptr action(
-      tooling::newFrontendActionFactory());
+      tooling::newFrontendActionFactory());
   std::shared_ptr PCHContainerOps =
       std::make_shared();
   action->runInvocation(std::move(Invocation), Files.get(), PCHContainerOps,
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-import-test/clang-import-test.cpp b/interpreter/llvm/src/tools/clang/tools/clang-import-test/clang-import-test.cpp
index 567a4bb4f0a26..6b724e9cf5fae 100644
--- a/interpreter/llvm/src/tools/clang/tools/clang-import-test/clang-import-test.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/clang-import-test/clang-import-test.cpp
@@ -17,7 +17,9 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "clang/CodeGen/ModuleBuilder.h"
+#include "clang/Frontend/ASTConsumers.h"
 #include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/MultiplexConsumer.h"
 #include "clang/Frontend/TextDiagnosticBuffer.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
@@ -51,6 +53,10 @@ static llvm::cl::list
               llvm::cl::desc("Argument to pass to the CompilerInvocation"),
               llvm::cl::CommaSeparated);
 
+static llvm::cl::opt
+DumpAST("dump-ast", llvm::cl::init(false),
+        llvm::cl::desc("Dump combined AST"));
+
 namespace init_convenience {
 class TestDiagnosticConsumer : public DiagnosticConsumer {
 private:
@@ -176,14 +182,6 @@ BuildCompilerInstance(ArrayRef ClangArgv) {
   return Ins;
 }
 
-std::unique_ptr
-BuildCompilerInstance(ArrayRef ClangArgs) {
-  std::vector ClangArgv(ClangArgs.size());
-  std::transform(ClangArgs.begin(), ClangArgs.end(), ClangArgv.begin(),
-                 [](const std::string &s) -> const char * { return s.data(); });
-  return init_convenience::BuildCompilerInstance(ClangArgv);
-}
-
 std::unique_ptr
 BuildASTContext(CompilerInstance &CI, SelectorTable &ST, Builtin::Context &BC) {
   auto AST = llvm::make_unique(
@@ -233,7 +231,7 @@ std::unique_ptr BuildIndirect(std::unique_ptr>
 Parse(const std::string &Path,
-      llvm::ArrayRef> Imports) {
+      llvm::ArrayRef> Imports,
+      bool ShouldDumpAST) {
   std::vector ClangArgv(ClangArgs.size());
   std::transform(ClangArgs.begin(), ClangArgs.end(), ClangArgv.begin(),
                  [](const std::string &s) -> const char * { return s.data(); });
@@ -261,14 +260,20 @@ Parse(const std::string &Path,
   if (Imports.size())
     AddExternalSource(*CI, Imports);
 
+  std::vector> ASTConsumers;
+
   auto LLVMCtx = llvm::make_unique();
-  std::unique_ptr CG =
-      init_convenience::BuildCodeGen(*CI, *LLVMCtx);
-  CG->Initialize(CI->getASTContext());
+  ASTConsumers.push_back(init_convenience::BuildCodeGen(*CI, *LLVMCtx));
+
+  if (ShouldDumpAST)
+    ASTConsumers.push_back(CreateASTDumper("", true, false, false));
 
   CI->getDiagnosticClient().BeginSourceFile(CI->getLangOpts(),
                                             &CI->getPreprocessor());
-  if (llvm::Error PE = ParseSource(Path, *CI, *CG)) {
+  MultiplexConsumer Consumers(std::move(ASTConsumers));
+  Consumers.Initialize(CI->getASTContext());
+
+  if (llvm::Error PE = ParseSource(Path, *CI, Consumers)) {
     return std::move(PE);
   }
   CI->getDiagnosticClient().EndSourceFile();
@@ -288,7 +293,8 @@ int main(int argc, const char **argv) {
   llvm::cl::ParseCommandLineOptions(argc, argv);
   std::vector> ImportCIs;
   for (auto I : Imports) {
-    llvm::Expected> ImportCI = Parse(I, {});
+    llvm::Expected> ImportCI =
+      Parse(I, {}, false);
     if (auto E = ImportCI.takeError()) {
       llvm::errs() << llvm::toString(std::move(E));
       exit(-1);
@@ -299,18 +305,12 @@ int main(int argc, const char **argv) {
   std::vector> IndirectCIs;
   if (!Direct) {
     for (auto &ImportCI : ImportCIs) {
-      llvm::Expected> IndirectCI =
-          BuildIndirect(ImportCI);
-      if (auto E = IndirectCI.takeError()) {
-        llvm::errs() << llvm::toString(std::move(E));
-        exit(-1);
-      } else {
-        IndirectCIs.push_back(std::move(*IndirectCI));
-      }
+      std::unique_ptr IndirectCI = BuildIndirect(ImportCI);
+      IndirectCIs.push_back(std::move(IndirectCI));
     }
   }
   llvm::Expected> ExpressionCI =
-      Parse(Expression, Direct ? ImportCIs : IndirectCIs);
+      Parse(Expression, Direct ? ImportCIs : IndirectCIs, DumpAST);
   if (auto E = ExpressionCI.takeError()) {
     llvm::errs() << llvm::toString(std::move(E));
     exit(-1);
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-rename/CMakeLists.txt b/interpreter/llvm/src/tools/clang/tools/clang-rename/CMakeLists.txt
new file mode 100644
index 0000000000000..771e3bdea6f01
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/tools/clang-rename/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_LINK_COMPONENTS
+  Option
+  Support
+  )
+
+add_clang_executable(clang-rename ClangRename.cpp)
+
+target_link_libraries(clang-rename
+  clangBasic
+  clangFrontend
+  clangRewrite
+  clangTooling
+  clangToolingCore
+  clangToolingRefactor
+  )
+
+install(TARGETS clang-rename RUNTIME DESTINATION bin)
+
+install(PROGRAMS clang-rename.py
+  DESTINATION share/clang
+  COMPONENT clang-rename)
+install(PROGRAMS clang-rename.el
+  DESTINATION share/clang
+  COMPONENT clang-rename)
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-rename/ClangRename.cpp b/interpreter/llvm/src/tools/clang/tools/clang-rename/ClangRename.cpp
new file mode 100644
index 0000000000000..cc18a05bcdbe6
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/tools/clang-rename/ClangRename.cpp
@@ -0,0 +1,239 @@
+//===--- tools/extra/clang-rename/ClangRename.cpp - Clang rename tool -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a clang-rename tool that automatically finds and
+/// renames symbols in C++ code.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Rewrite/Core/Rewriter.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Refactoring.h"
+#include "clang/Tooling/Refactoring/Rename/RenamingAction.h"
+#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
+#include "clang/Tooling/ReplacementsYaml.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include 
+#include 
+
+using namespace llvm;
+using namespace clang;
+
+/// \brief An oldname -> newname rename.
+struct RenameAllInfo {
+  unsigned Offset = 0;
+  std::string QualifiedName;
+  std::string NewName;
+};
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(RenameAllInfo)
+
+namespace llvm {
+namespace yaml {
+
+/// \brief Specialized MappingTraits to describe how a RenameAllInfo is
+/// (de)serialized.
+template <> struct MappingTraits {
+  static void mapping(IO &IO, RenameAllInfo &Info) {
+    IO.mapOptional("Offset", Info.Offset);
+    IO.mapOptional("QualifiedName", Info.QualifiedName);
+    IO.mapRequired("NewName", Info.NewName);
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+static cl::OptionCategory ClangRenameOptions("clang-rename common options");
+
+static cl::list SymbolOffsets(
+    "offset",
+    cl::desc("Locates the symbol by offset as opposed to :."),
+    cl::ZeroOrMore, cl::cat(ClangRenameOptions));
+static cl::opt Inplace("i", cl::desc("Overwrite edited s."),
+                             cl::cat(ClangRenameOptions));
+static cl::list
+    QualifiedNames("qualified-name",
+                   cl::desc("The fully qualified name of the symbol."),
+                   cl::ZeroOrMore, cl::cat(ClangRenameOptions));
+
+static cl::list
+    NewNames("new-name", cl::desc("The new name to change the symbol to."),
+             cl::ZeroOrMore, cl::cat(ClangRenameOptions));
+static cl::opt PrintName(
+    "pn",
+    cl::desc("Print the found symbol's name prior to renaming to stderr."),
+    cl::cat(ClangRenameOptions));
+static cl::opt PrintLocations(
+    "pl", cl::desc("Print the locations affected by renaming to stderr."),
+    cl::cat(ClangRenameOptions));
+static cl::opt
+    ExportFixes("export-fixes",
+                cl::desc("YAML file to store suggested fixes in."),
+                cl::value_desc("filename"), cl::cat(ClangRenameOptions));
+static cl::opt
+    Input("input", cl::desc("YAML file to load oldname-newname pairs from."),
+          cl::Optional, cl::cat(ClangRenameOptions));
+static cl::opt Force("force",
+                           cl::desc("Ignore nonexistent qualified names."),
+                           cl::cat(ClangRenameOptions));
+
+int main(int argc, const char **argv) {
+  tooling::CommonOptionsParser OP(argc, argv, ClangRenameOptions);
+
+  if (!Input.empty()) {
+    // Populate QualifiedNames and NewNames from a YAML file.
+    ErrorOr> Buffer =
+        llvm::MemoryBuffer::getFile(Input);
+    if (!Buffer) {
+      errs() << "clang-rename: failed to read " << Input << ": "
+             << Buffer.getError().message() << "\n";
+      return 1;
+    }
+
+    std::vector Infos;
+    llvm::yaml::Input YAML(Buffer.get()->getBuffer());
+    YAML >> Infos;
+    for (const auto &Info : Infos) {
+      if (!Info.QualifiedName.empty())
+        QualifiedNames.push_back(Info.QualifiedName);
+      else
+        SymbolOffsets.push_back(Info.Offset);
+      NewNames.push_back(Info.NewName);
+    }
+  }
+
+  // Check the arguments for correctness.
+  if (NewNames.empty()) {
+    errs() << "clang-rename: -new-name must be specified.\n\n";
+    return 1;
+  }
+
+  if (SymbolOffsets.empty() == QualifiedNames.empty()) {
+    errs() << "clang-rename: -offset and -qualified-name can't be present at "
+              "the same time.\n";
+    return 1;
+  }
+
+  // Check if NewNames is a valid identifier in C++17.
+  LangOptions Options;
+  Options.CPlusPlus = true;
+  Options.CPlusPlus1z = true;
+  IdentifierTable Table(Options);
+  for (const auto &NewName : NewNames) {
+    auto NewNameTokKind = Table.get(NewName).getTokenID();
+    if (!tok::isAnyIdentifier(NewNameTokKind)) {
+      errs() << "ERROR: new name is not a valid identifier in C++17.\n\n";
+      return 1;
+    }
+  }
+
+  if (SymbolOffsets.size() + QualifiedNames.size() != NewNames.size()) {
+    errs() << "clang-rename: number of symbol offsets(" << SymbolOffsets.size()
+           << ") + number of qualified names (" << QualifiedNames.size()
+           << ") must be equal to number of new names(" << NewNames.size()
+           << ").\n\n";
+    cl::PrintHelpMessage();
+    return 1;
+  }
+
+  auto Files = OP.getSourcePathList();
+  tooling::RefactoringTool Tool(OP.getCompilations(), Files);
+  tooling::USRFindingAction FindingAction(SymbolOffsets, QualifiedNames, Force);
+  Tool.run(tooling::newFrontendActionFactory(&FindingAction).get());
+  const std::vector> &USRList =
+      FindingAction.getUSRList();
+  const std::vector &PrevNames = FindingAction.getUSRSpellings();
+  if (PrintName) {
+    for (const auto &PrevName : PrevNames) {
+      outs() << "clang-rename found name: " << PrevName << '\n';
+    }
+  }
+
+  if (FindingAction.errorOccurred()) {
+    // Diagnostics are already issued at this point.
+    return 1;
+  }
+
+  if (Force && PrevNames.size() < NewNames.size()) {
+    // No matching PrevName for all NewNames. Without Force this is an error
+    // above already.
+    return 0;
+  }
+
+  // Perform the renaming.
+  tooling::RenamingAction RenameAction(NewNames, PrevNames, USRList,
+                                       Tool.getReplacements(), PrintLocations);
+  std::unique_ptr Factory =
+      tooling::newFrontendActionFactory(&RenameAction);
+  int ExitCode;
+
+  if (Inplace) {
+    ExitCode = Tool.runAndSave(Factory.get());
+  } else {
+    ExitCode = Tool.run(Factory.get());
+
+    if (!ExportFixes.empty()) {
+      std::error_code EC;
+      llvm::raw_fd_ostream OS(ExportFixes, EC, llvm::sys::fs::F_None);
+      if (EC) {
+        llvm::errs() << "Error opening output file: " << EC.message() << '\n';
+        return 1;
+      }
+
+      // Export replacements.
+      tooling::TranslationUnitReplacements TUR;
+      const auto &FileToReplacements = Tool.getReplacements();
+      for (const auto &Entry : FileToReplacements)
+        TUR.Replacements.insert(TUR.Replacements.end(), Entry.second.begin(),
+                                Entry.second.end());
+
+      yaml::Output YAML(OS);
+      YAML << TUR;
+      OS.close();
+      return 0;
+    }
+
+    // Write every file to stdout. Right now we just barf the files without any
+    // indication of which files start where, other than that we print the files
+    // in the same order we see them.
+    LangOptions DefaultLangOptions;
+    IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions();
+    TextDiagnosticPrinter DiagnosticPrinter(errs(), &*DiagOpts);
+    DiagnosticsEngine Diagnostics(
+        IntrusiveRefCntPtr(new DiagnosticIDs()), &*DiagOpts,
+        &DiagnosticPrinter, false);
+    auto &FileMgr = Tool.getFiles();
+    SourceManager Sources(Diagnostics, FileMgr);
+    Rewriter Rewrite(Sources, DefaultLangOptions);
+
+    Tool.applyAllReplacements(Rewrite);
+    for (const auto &File : Files) {
+      const auto *Entry = FileMgr.getFile(File);
+      const auto ID = Sources.getOrCreateFileID(Entry, SrcMgr::C_User);
+      Rewrite.getEditBuffer(ID).write(outs());
+    }
+  }
+
+  return ExitCode;
+}
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-rename/clang-rename.el b/interpreter/llvm/src/tools/clang/tools/clang-rename/clang-rename.el
new file mode 100644
index 0000000000000..b6c3ed4c686b7
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/tools/clang-rename/clang-rename.el
@@ -0,0 +1,79 @@
+;;; clang-rename.el --- Renames every occurrence of a symbol found at .  -*- lexical-binding: t; -*-
+
+;; Keywords: tools, c
+
+;;; Commentary:
+
+;; To install clang-rename.el make sure the directory of this file is in your
+;; `load-path' and add
+;;
+;;   (require 'clang-rename)
+;;
+;; to your .emacs configuration.
+
+;;; Code:
+
+(defgroup clang-rename nil
+  "Integration with clang-rename"
+  :group 'c)
+
+(defcustom clang-rename-binary "clang-rename"
+  "Path to clang-rename executable."
+  :type '(file :must-match t)
+  :group 'clang-rename)
+
+;;;###autoload
+(defun clang-rename (new-name)
+  "Rename all instances of the symbol at point to NEW-NAME using clang-rename."
+  (interactive "sEnter a new name: ")
+  (save-some-buffers :all)
+  ;; clang-rename should not be combined with other operations when undoing.
+  (undo-boundary)
+  (let ((output-buffer (get-buffer-create "*clang-rename*")))
+    (with-current-buffer output-buffer (erase-buffer))
+    (let ((exit-code (call-process
+                      clang-rename-binary nil output-buffer nil
+                      (format "-offset=%d"
+                              ;; clang-rename wants file (byte) offsets, not
+                              ;; buffer (character) positions.
+                              (clang-rename--bufferpos-to-filepos
+                               ;; Emacs treats one character after a symbol as
+                               ;; part of the symbol, but clang-rename doesn’t.
+                               ;; Use the beginning of the current symbol, if
+                               ;; available, to resolve the inconsistency.
+                               (or (car (bounds-of-thing-at-point 'symbol))
+                                   (point))
+                               'exact))
+                      (format "-new-name=%s" new-name)
+                      "-i" (buffer-file-name))))
+      (if (and (integerp exit-code) (zerop exit-code))
+          ;; Success; revert current buffer so it gets the modifications.
+          (progn
+            (kill-buffer output-buffer)
+            (revert-buffer :ignore-auto :noconfirm :preserve-modes))
+        ;; Failure; append exit code to output buffer and display it.
+        (let ((message (clang-rename--format-message
+                        "clang-rename failed with %s %s"
+                        (if (integerp exit-code) "exit status" "signal")
+                        exit-code)))
+          (with-current-buffer output-buffer
+            (insert ?\n message ?\n))
+          (message "%s" message)
+          (display-buffer output-buffer))))))
+
+(defalias 'clang-rename--bufferpos-to-filepos
+  (if (fboundp 'bufferpos-to-filepos)
+      'bufferpos-to-filepos
+    ;; Emacs 24 doesn’t have ‘bufferpos-to-filepos’, simulate it using
+    ;; ‘position-bytes’.
+    (lambda (position &optional _quality _coding-system)
+      (1- (position-bytes position)))))
+
+;; ‘format-message’ is new in Emacs 25.1.  Provide a fallback for older
+;; versions.
+(defalias 'clang-rename--format-message
+  (if (fboundp 'format-message) 'format-message 'format))
+
+(provide 'clang-rename)
+
+;;; clang-rename.el ends here
diff --git a/interpreter/llvm/src/tools/clang/tools/clang-rename/clang-rename.py b/interpreter/llvm/src/tools/clang/tools/clang-rename/clang-rename.py
new file mode 100644
index 0000000000000..3cc6644ff8f0a
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/tools/clang-rename/clang-rename.py
@@ -0,0 +1,61 @@
+'''
+Minimal clang-rename integration with Vim.
+
+Before installing make sure one of the following is satisfied:
+
+* clang-rename is in your PATH
+* `g:clang_rename_path` in ~/.vimrc points to valid clang-rename executable
+* `binary` in clang-rename.py points to valid to clang-rename executable
+
+To install, simply put this into your ~/.vimrc
+
+    noremap cr :pyf /clang-rename.py
+
+IMPORTANT NOTE: Before running the tool, make sure you saved the file.
+
+All you have to do now is to place a cursor on a variable/function/class which
+you would like to rename and press 'cr'. You will be prompted for a new
+name if the cursor points to a valid symbol.
+'''
+
+import vim
+import subprocess
+import sys
+
+def main():
+    binary = 'clang-rename'
+    if vim.eval('exists("g:clang_rename_path")') == "1":
+        binary = vim.eval('g:clang_rename_path')
+
+    # Get arguments for clang-rename binary.
+    offset = int(vim.eval('line2byte(line("."))+col(".")')) - 2
+    if offset < 0:
+        print >> sys.stderr, '''Couldn\'t determine cursor position.
+                                Is your file empty?'''
+        return
+    filename = vim.current.buffer.name
+
+    new_name_request_message = 'type new name:'
+    new_name = vim.eval("input('{}\n')".format(new_name_request_message))
+
+    # Call clang-rename.
+    command = [binary,
+               filename,
+               '-i',
+               '-offset', str(offset),
+               '-new-name', str(new_name)]
+    # FIXME: make it possible to run the tool on unsaved file.
+    p = subprocess.Popen(command,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+
+    if stderr:
+        print stderr
+
+    # Reload all buffers in Vim.
+    vim.command("checktime")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/interpreter/llvm/src/tools/clang/tools/diagtool/CMakeLists.txt b/interpreter/llvm/src/tools/clang/tools/diagtool/CMakeLists.txt
index e88c2ab6e8c3f..3f7d80385a82c 100644
--- a/interpreter/llvm/src/tools/clang/tools/diagtool/CMakeLists.txt
+++ b/interpreter/llvm/src/tools/clang/tools/diagtool/CMakeLists.txt
@@ -6,6 +6,7 @@ add_clang_executable(diagtool
   diagtool_main.cpp
   DiagTool.cpp
   DiagnosticNames.cpp
+  FindDiagnosticID.cpp
   ListWarnings.cpp
   ShowEnabledWarnings.cpp
   TreeView.cpp
diff --git a/interpreter/llvm/src/tools/clang/tools/diagtool/FindDiagnosticID.cpp b/interpreter/llvm/src/tools/clang/tools/diagtool/FindDiagnosticID.cpp
new file mode 100644
index 0000000000000..167b9925eedc0
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/tools/diagtool/FindDiagnosticID.cpp
@@ -0,0 +1,58 @@
+//===- FindDiagnosticID.cpp - diagtool tool for finding diagnostic id -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DiagTool.h"
+#include "DiagnosticNames.h"
+#include "clang/Basic/AllDiagnostics.h"
+#include "llvm/Support/CommandLine.h"
+
+DEF_DIAGTOOL("find-diagnostic-id", "Print the id of the given diagnostic",
+             FindDiagnosticID)
+
+using namespace clang;
+using namespace diagtool;
+
+static Optional
+findDiagnostic(ArrayRef Diagnostics, StringRef Name) {
+  for (const auto &Diag : Diagnostics) {
+    StringRef DiagName = Diag.getName();
+    if (DiagName == Name)
+      return Diag;
+  }
+  return None;
+}
+
+int FindDiagnosticID::run(unsigned int argc, char **argv,
+                          llvm::raw_ostream &OS) {
+  static llvm::cl::OptionCategory FindDiagnosticIDOptions(
+      "diagtool find-diagnostic-id options");
+
+  static llvm::cl::opt DiagnosticName(
+      llvm::cl::Positional, llvm::cl::desc(""),
+      llvm::cl::Required, llvm::cl::cat(FindDiagnosticIDOptions));
+
+  std::vector Args;
+  Args.push_back("find-diagnostic-id");
+  for (const char *A : llvm::makeArrayRef(argv, argc))
+    Args.push_back(A);
+
+  llvm::cl::HideUnrelatedOptions(FindDiagnosticIDOptions);
+  llvm::cl::ParseCommandLineOptions((int)Args.size(), Args.data(),
+                                    "Diagnostic ID mapping utility");
+
+  ArrayRef AllDiagnostics = getBuiltinDiagnosticsByName();
+  Optional Diag =
+      findDiagnostic(AllDiagnostics, DiagnosticName);
+  if (!Diag) {
+    llvm::errs() << "error: invalid diagnostic '" << DiagnosticName << "'\n";
+    return 1;
+  }
+  OS << Diag->DiagID << "\n";
+  return 0;
+}
diff --git a/interpreter/llvm/src/tools/clang/tools/driver/cc1as_main.cpp b/interpreter/llvm/src/tools/clang/tools/driver/cc1as_main.cpp
index 33d957658cf05..2fc2b508ef21b 100644
--- a/interpreter/llvm/src/tools/clang/tools/driver/cc1as_main.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/driver/cc1as_main.cpp
@@ -88,12 +88,13 @@ struct AssemblerInvocation {
   unsigned NoInitialTextSection : 1;
   unsigned SaveTemporaryLabels : 1;
   unsigned GenDwarfForAssembly : 1;
-  unsigned CompressDebugSections : 1;
   unsigned RelaxELFRelocations : 1;
   unsigned DwarfVersion;
   std::string DwarfDebugFlags;
   std::string DwarfDebugProducer;
   std::string DebugCompilationDir;
+  llvm::DebugCompressionType CompressDebugSections =
+      llvm::DebugCompressionType::None;
   std::string MainFileName;
 
   /// @}
@@ -201,7 +202,22 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
   Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels);
   // Any DebugInfoKind implies GenDwarfForAssembly.
   Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ);
-  Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
+
+  if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections,
+                                     OPT_compress_debug_sections_EQ)) {
+    if (A->getOption().getID() == OPT_compress_debug_sections) {
+      // TODO: be more clever about the compression type auto-detection
+      Opts.CompressDebugSections = llvm::DebugCompressionType::GNU;
+    } else {
+      Opts.CompressDebugSections =
+          llvm::StringSwitch(A->getValue())
+              .Case("none", llvm::DebugCompressionType::None)
+              .Case("zlib", llvm::DebugCompressionType::Z)
+              .Case("zlib-gnu", llvm::DebugCompressionType::GNU)
+              .Default(llvm::DebugCompressionType::None);
+    }
+  }
+
   Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations);
   Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags);
   Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags);
@@ -314,8 +330,7 @@ static bool ExecuteAssembler(AssemblerInvocation &Opts,
 
   // Ensure MCAsmInfo initialization occurs before any use, otherwise sections
   // may be created with a combination of default and explicit settings.
-  if (Opts.CompressDebugSections)
-    MAI->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu);
+  MAI->setCompressDebugSections(Opts.CompressDebugSections);
 
   MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations);
 
diff --git a/interpreter/llvm/src/tools/clang/tools/driver/driver.cpp b/interpreter/llvm/src/tools/clang/tools/driver/driver.cpp
index 626d006ac0d84..9f37c428ff932 100644
--- a/interpreter/llvm/src/tools/clang/tools/driver/driver.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/driver/driver.cpp
@@ -53,8 +53,15 @@ using namespace clang::driver;
 using namespace llvm::opt;
 
 std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) {
-  if (!CanonicalPrefixes)
-    return Argv0;
+  if (!CanonicalPrefixes) {
+    SmallString<128> ExecutablePath(Argv0);
+    // Do a PATH lookup if Argv0 isn't a valid path.
+    if (!llvm::sys::fs::exists(ExecutablePath))
+      if (llvm::ErrorOr P =
+              llvm::sys::findProgramByName(ExecutablePath))
+        ExecutablePath = *P;
+    return ExecutablePath.str();
+  }
 
   // This just needs to be some symbol in the binary; C++ doesn't
   // allow taking the address of ::main however.
@@ -454,40 +461,41 @@ int main(int argc_, const char **argv_) {
   SetBackdoorDriverOutputsFromEnvVars(TheDriver);
 
   std::unique_ptr C(TheDriver.BuildCompilation(argv));
-  int Res = 0;
-  SmallVector, 4> FailingCommands;
-  if (C.get())
+  int Res = 1;
+  if (C && !C->containsError()) {
+    SmallVector, 4> FailingCommands;
     Res = TheDriver.ExecuteCompilation(*C, FailingCommands);
 
-  // Force a crash to test the diagnostics.
-  if (TheDriver.GenReproducer) {
-    Diags.Report(diag::err_drv_force_crash)
+    // Force a crash to test the diagnostics.
+    if (TheDriver.GenReproducer) {
+      Diags.Report(diag::err_drv_force_crash)
         << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH");
 
-    // Pretend that every command failed.
-    FailingCommands.clear();
-    for (const auto &J : C->getJobs())
-      if (const Command *C = dyn_cast(&J))
-        FailingCommands.push_back(std::make_pair(-1, C));
-  }
+      // Pretend that every command failed.
+      FailingCommands.clear();
+      for (const auto &J : C->getJobs())
+        if (const Command *C = dyn_cast(&J))
+          FailingCommands.push_back(std::make_pair(-1, C));
+    }
 
-  for (const auto &P : FailingCommands) {
-    int CommandRes = P.first;
-    const Command *FailingCommand = P.second;
-    if (!Res)
-      Res = CommandRes;
-
-    // If result status is < 0, then the driver command signalled an error.
-    // If result status is 70, then the driver command reported a fatal error.
-    // On Windows, abort will return an exit code of 3.  In these cases,
-    // generate additional diagnostic information if possible.
-    bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70;
+    for (const auto &P : FailingCommands) {
+      int CommandRes = P.first;
+      const Command *FailingCommand = P.second;
+      if (!Res)
+        Res = CommandRes;
+
+      // If result status is < 0, then the driver command signalled an error.
+      // If result status is 70, then the driver command reported a fatal error.
+      // On Windows, abort will return an exit code of 3.  In these cases,
+      // generate additional diagnostic information if possible.
+      bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70;
 #ifdef LLVM_ON_WIN32
-    DiagnoseCrash |= CommandRes == 3;
+      DiagnoseCrash |= CommandRes == 3;
 #endif
-    if (DiagnoseCrash) {
-      TheDriver.generateCompilationDiagnostics(*C, *FailingCommand);
-      break;
+      if (DiagnoseCrash) {
+        TheDriver.generateCompilationDiagnostics(*C, *FailingCommand);
+        break;
+      }
     }
   }
 
diff --git a/interpreter/llvm/src/tools/clang/tools/libclang/ARCMigrate.cpp b/interpreter/llvm/src/tools/clang/tools/libclang/ARCMigrate.cpp
index 44a60c4e3e2c1..0f2bd06db4b49 100644
--- a/interpreter/llvm/src/tools/clang/tools/libclang/ARCMigrate.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/libclang/ARCMigrate.cpp
@@ -14,6 +14,7 @@
 #include "clang-c/Index.h"
 #include "CXString.h"
 #include "clang/ARCMigrate/ARCMT.h"
+#include "clang/Config/config.h"
 #include "clang/Frontend/TextDiagnosticBuffer.h"
 #include "llvm/Support/FileSystem.h"
 
diff --git a/interpreter/llvm/src/tools/clang/tools/libclang/CIndex.cpp b/interpreter/llvm/src/tools/clang/tools/libclang/CIndex.cpp
index 9c795ae9c5b76..d527535a17c17 100644
--- a/interpreter/llvm/src/tools/clang/tools/libclang/CIndex.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/libclang/CIndex.cpp
@@ -2264,6 +2264,23 @@ void OMPClauseEnqueue::VisitOMPReductionClause(const OMPReductionClause *C) {
     Visitor->AddStmt(E);
   }
 }
+void OMPClauseEnqueue::VisitOMPTaskReductionClause(
+    const OMPTaskReductionClause *C) {
+  VisitOMPClauseList(C);
+  VisitOMPClauseWithPostUpdate(C);
+  for (auto *E : C->privates()) {
+    Visitor->AddStmt(E);
+  }
+  for (auto *E : C->lhs_exprs()) {
+    Visitor->AddStmt(E);
+  }
+  for (auto *E : C->rhs_exprs()) {
+    Visitor->AddStmt(E);
+  }
+  for (auto *E : C->reduction_ops()) {
+    Visitor->AddStmt(E);
+  }
+}
 void OMPClauseEnqueue::VisitOMPLinearClause(const OMPLinearClause *C) {
   VisitOMPClauseList(C);
   VisitOMPClauseWithPostUpdate(C);
@@ -3247,7 +3264,8 @@ enum CXErrorCode clang_createTranslationUnit2(CXIndex CIdx,
   IntrusiveRefCntPtr Diags =
       CompilerInstance::createDiagnostics(new DiagnosticOptions());
   std::unique_ptr AU = ASTUnit::LoadFromASTFile(
-      ast_filename, CXXIdx->getPCHContainerOperations()->getRawReader(), Diags,
+      ast_filename, CXXIdx->getPCHContainerOperations()->getRawReader(),
+      ASTUnit::LoadEverything, Diags,
       FileSystemOpts, /*UseDebugInfo=*/false,
       CXXIdx->getOnlyLocalDecls(), None,
       /*CaptureDiagnostics=*/true,
@@ -3300,12 +3318,14 @@ clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename,
       options & CXTranslationUnit_CreatePreambleOnFirstParse;
   // FIXME: Add a flag for modules.
   TranslationUnitKind TUKind
-    = (options & CXTranslationUnit_Incomplete)? TU_Prefix : TU_Complete;
+    = (options & (CXTranslationUnit_Incomplete |
+                  CXTranslationUnit_SingleFileParse))? TU_Prefix : TU_Complete;
   bool CacheCodeCompletionResults
     = options & CXTranslationUnit_CacheCompletionResults;
   bool IncludeBriefCommentsInCodeCompletion
     = options & CXTranslationUnit_IncludeBriefCommentsInCodeCompletion;
   bool SkipFunctionBodies = options & CXTranslationUnit_SkipFunctionBodies;
+  bool SingleFileParse = options & CXTranslationUnit_SingleFileParse;
   bool ForSerialization = options & CXTranslationUnit_ForSerialization;
 
   // Configure the diagnostics.
@@ -3390,7 +3410,7 @@ clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename,
       /*CaptureDiagnostics=*/true, *RemappedFiles.get(),
       /*RemappedFilesKeepOriginalName=*/true, PrecompilePreambleAfterNParses,
       TUKind, CacheCodeCompletionResults, IncludeBriefCommentsInCodeCompletion,
-      /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies,
+      /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies, SingleFileParse,
       /*UserFilesAreVolatile=*/true, ForSerialization,
       CXXIdx->getPCHContainerOperations()->getRawReader().getFormat(),
       &ErrUnit));
@@ -3918,6 +3938,20 @@ void clang_disposeTranslationUnit(CXTranslationUnit CTUnit) {
   }
 }
 
+unsigned clang_suspendTranslationUnit(CXTranslationUnit CTUnit) {
+  if (CTUnit) {
+    ASTUnit *Unit = cxtu::getASTUnit(CTUnit);
+
+    if (Unit && Unit->isUnsafeToFree())
+      return false;
+
+    Unit->ResetForParse();
+    return true;
+  }
+
+  return false;
+}
+
 unsigned clang_defaultReparseOptions(CXTranslationUnit TU) {
   return CXReparse_None;
 }
@@ -7069,8 +7103,10 @@ CXLinkageKind clang_getCursorLinkage(CXCursor cursor) {
     switch (ND->getLinkageInternal()) {
       case NoLinkage:
       case VisibleNoLinkage: return CXLinkage_NoLinkage;
+      case ModuleInternalLinkage:
       case InternalLinkage: return CXLinkage_Internal;
       case UniqueExternalLinkage: return CXLinkage_UniqueExternal;
+      case ModuleLinkage:
       case ExternalLinkage: return CXLinkage_External;
     };
 
@@ -7200,15 +7236,11 @@ static CXVersion convertVersion(VersionTuple In) {
   return Out;
 }
 
-static int getCursorPlatformAvailabilityForDecl(const Decl *D,
-                                                int *always_deprecated,
-                                                CXString *deprecated_message,
-                                                int *always_unavailable,
-                                                CXString *unavailable_message,
-                                           CXPlatformAvailability *availability,
-                                                int availability_size) {
+static void getCursorPlatformAvailabilityForDecl(
+    const Decl *D, int *always_deprecated, CXString *deprecated_message,
+    int *always_unavailable, CXString *unavailable_message,
+    SmallVectorImpl &AvailabilityAttrs) {
   bool HadAvailAttr = false;
-  int N = 0;
   for (auto A : D->attrs()) {
     if (DeprecatedAttr *Deprecated = dyn_cast(A)) {
       HadAvailAttr = true;
@@ -7220,7 +7252,7 @@ static int getCursorPlatformAvailabilityForDecl(const Decl *D,
       }
       continue;
     }
-    
+
     if (UnavailableAttr *Unavailable = dyn_cast(A)) {
       HadAvailAttr = true;
       if (always_unavailable)
@@ -7231,38 +7263,71 @@ static int getCursorPlatformAvailabilityForDecl(const Decl *D,
       }
       continue;
     }
-    
+
     if (AvailabilityAttr *Avail = dyn_cast(A)) {
+      AvailabilityAttrs.push_back(Avail);
       HadAvailAttr = true;
-      if (N < availability_size) {
-        availability[N].Platform
-          = cxstring::createDup(Avail->getPlatform()->getName());
-        availability[N].Introduced = convertVersion(Avail->getIntroduced());
-        availability[N].Deprecated = convertVersion(Avail->getDeprecated());
-        availability[N].Obsoleted = convertVersion(Avail->getObsoleted());
-        availability[N].Unavailable = Avail->getUnavailable();
-        availability[N].Message = cxstring::createDup(Avail->getMessage());
-      }
-      ++N;
     }
   }
 
   if (!HadAvailAttr)
     if (const EnumConstantDecl *EnumConst = dyn_cast(D))
       return getCursorPlatformAvailabilityForDecl(
-                                        cast(EnumConst->getDeclContext()),
-                                                  always_deprecated,
-                                                  deprecated_message,
-                                                  always_unavailable,
-                                                  unavailable_message,
-                                                  availability,
-                                                  availability_size);
-  
-  return N;
+          cast(EnumConst->getDeclContext()), always_deprecated,
+          deprecated_message, always_unavailable, unavailable_message,
+          AvailabilityAttrs);
+
+  if (AvailabilityAttrs.empty())
+    return;
+
+  std::sort(AvailabilityAttrs.begin(), AvailabilityAttrs.end(),
+            [](AvailabilityAttr *LHS, AvailabilityAttr *RHS) {
+              return LHS->getPlatform() > RHS->getPlatform();
+            });
+  ASTContext &Ctx = D->getASTContext();
+  auto It = std::unique(
+      AvailabilityAttrs.begin(), AvailabilityAttrs.end(),
+      [&Ctx](AvailabilityAttr *LHS, AvailabilityAttr *RHS) {
+        if (LHS->getPlatform() != RHS->getPlatform())
+          return false;
+
+        if (LHS->getIntroduced() == RHS->getIntroduced() &&
+            LHS->getDeprecated() == RHS->getDeprecated() &&
+            LHS->getObsoleted() == RHS->getObsoleted() &&
+            LHS->getMessage() == RHS->getMessage() &&
+            LHS->getReplacement() == RHS->getReplacement())
+          return true;
+
+        if ((!LHS->getIntroduced().empty() && !RHS->getIntroduced().empty()) ||
+            (!LHS->getDeprecated().empty() && !RHS->getDeprecated().empty()) ||
+            (!LHS->getObsoleted().empty() && !RHS->getObsoleted().empty()))
+          return false;
+
+        if (LHS->getIntroduced().empty() && !RHS->getIntroduced().empty())
+          LHS->setIntroduced(Ctx, RHS->getIntroduced());
+
+        if (LHS->getDeprecated().empty() && !RHS->getDeprecated().empty()) {
+          LHS->setDeprecated(Ctx, RHS->getDeprecated());
+          if (LHS->getMessage().empty())
+            LHS->setMessage(Ctx, RHS->getMessage());
+          if (LHS->getReplacement().empty())
+            LHS->setReplacement(Ctx, RHS->getReplacement());
+        }
+
+        if (LHS->getObsoleted().empty() && !RHS->getObsoleted().empty()) {
+          LHS->setObsoleted(Ctx, RHS->getObsoleted());
+          if (LHS->getMessage().empty())
+            LHS->setMessage(Ctx, RHS->getMessage());
+          if (LHS->getReplacement().empty())
+            LHS->setReplacement(Ctx, RHS->getReplacement());
+        }
+
+        return true;
+      });
+  AvailabilityAttrs.erase(It, AvailabilityAttrs.end());
 }
 
-int clang_getCursorPlatformAvailability(CXCursor cursor,
-                                        int *always_deprecated,
+int clang_getCursorPlatformAvailability(CXCursor cursor, int *always_deprecated,
                                         CXString *deprecated_message,
                                         int *always_unavailable,
                                         CXString *unavailable_message,
@@ -7284,14 +7349,29 @@ int clang_getCursorPlatformAvailability(CXCursor cursor,
   if (!D)
     return 0;
 
-  return getCursorPlatformAvailabilityForDecl(D, always_deprecated,
-                                              deprecated_message,
-                                              always_unavailable,
-                                              unavailable_message,
-                                              availability,
-                                              availability_size);
+  SmallVector AvailabilityAttrs;
+  getCursorPlatformAvailabilityForDecl(D, always_deprecated, deprecated_message,
+                                       always_unavailable, unavailable_message,
+                                       AvailabilityAttrs);
+  for (const auto &Avail :
+       llvm::enumerate(llvm::makeArrayRef(AvailabilityAttrs)
+                           .take_front(availability_size))) {
+    availability[Avail.index()].Platform =
+        cxstring::createDup(Avail.value()->getPlatform()->getName());
+    availability[Avail.index()].Introduced =
+        convertVersion(Avail.value()->getIntroduced());
+    availability[Avail.index()].Deprecated =
+        convertVersion(Avail.value()->getDeprecated());
+    availability[Avail.index()].Obsoleted =
+        convertVersion(Avail.value()->getObsoleted());
+    availability[Avail.index()].Unavailable = Avail.value()->getUnavailable();
+    availability[Avail.index()].Message =
+        cxstring::createDup(Avail.value()->getMessage());
+  }
+
+  return AvailabilityAttrs.size();
 }
-  
+
 void clang_disposeCXPlatformAvailability(CXPlatformAvailability *availability) {
   clang_disposeString(availability->Platform);
   clang_disposeString(availability->Message);
@@ -7487,16 +7567,7 @@ unsigned clang_Cursor_isExternalSymbol(CXCursor C,
 
   const Decl *D = getCursorDecl(C);
 
-  auto getExternalSymAttr = [](const Decl *D) -> ExternalSourceSymbolAttr* {
-    if (auto *attr = D->getAttr())
-      return attr;
-    if (auto *dcd = dyn_cast(D->getDeclContext())) {
-      if (auto *attr = dcd->getAttr())
-        return attr;
-    }
-    return nullptr;
-  };
-  if (auto *attr = getExternalSymAttr(D)) {
+  if (auto *attr = D->getExternalSourceSymbolAttr()) {
     if (language)
       *language = cxstring::createDup(attr->getLanguage());
     if (definedIn)
@@ -7753,6 +7824,15 @@ unsigned clang_CXXMethod_isVirtual(CXCursor C) {
   return (Method && Method->isVirtual()) ? 1 : 0;
 }
 
+unsigned clang_EnumDecl_isScoped(CXCursor C) {
+  if (!clang_isDeclaration(C.kind))
+    return 0;
+
+  const Decl *D = cxcursor::getCursorDecl(C);
+  auto *Enum = dyn_cast_or_null(D);
+  return (Enum && Enum->isScoped()) ? 1 : 0;
+}
+
 //===----------------------------------------------------------------------===//
 // Attribute introspection.
 //===----------------------------------------------------------------------===//
@@ -8132,7 +8212,7 @@ cxindex::checkForMacroInMacroDefinition(const MacroInfo *MI, const Token &Tok,
     return nullptr;
 
   // Check that the identifier is not one of the macro arguments.
-  if (std::find(MI->arg_begin(), MI->arg_end(), &II) != MI->arg_end())
+  if (std::find(MI->param_begin(), MI->param_end(), &II) != MI->param_end())
     return nullptr;
 
   MacroDirective *InnerMD = PP.getLocalMacroDirectiveHistory(&II);
diff --git a/interpreter/llvm/src/tools/clang/tools/libclang/CIndexDiagnostic.cpp b/interpreter/llvm/src/tools/clang/tools/libclang/CIndexDiagnostic.cpp
index de223d3043a31..4e47b25a4bf01 100644
--- a/interpreter/llvm/src/tools/clang/tools/libclang/CIndexDiagnostic.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/libclang/CIndexDiagnostic.cpp
@@ -110,40 +110,34 @@ class CXDiagnosticRenderer : public DiagnosticNoteRenderer {
       CurrentSet = &CD.getChildDiagnostics();
   }
 
-  void emitDiagnosticMessage(SourceLocation Loc, PresumedLoc PLoc,
-                             DiagnosticsEngine::Level Level,
-                             StringRef Message,
+  void emitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc,
+                             DiagnosticsEngine::Level Level, StringRef Message,
                              ArrayRef Ranges,
-                             const SourceManager *SM,
                              DiagOrStoredDiag D) override {
     if (!D.isNull())
       return;
     
     CXSourceLocation L;
-    if (SM)
-      L = translateSourceLocation(*SM, LangOpts, Loc);
+    if (Loc.hasManager())
+      L = translateSourceLocation(Loc.getManager(), LangOpts, Loc);
     else
       L = clang_getNullLocation();
     CurrentSet->appendDiagnostic(
         llvm::make_unique(Message, L));
   }
 
-  void emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
+  void emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
                          DiagnosticsEngine::Level Level,
-                         ArrayRef Ranges,
-                         const SourceManager &SM) override {}
+                         ArrayRef Ranges) override {}
 
-  void emitCodeContext(SourceLocation Loc,
-                       DiagnosticsEngine::Level Level,
-                       SmallVectorImpl& Ranges,
-                       ArrayRef Hints,
-                       const SourceManager &SM) override {}
+  void emitCodeContext(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+                       SmallVectorImpl &Ranges,
+                       ArrayRef Hints) override {}
 
-  void emitNote(SourceLocation Loc, StringRef Message,
-                const SourceManager *SM) override {
+  void emitNote(FullSourceLoc Loc, StringRef Message) override {
     CXSourceLocation L;
-    if (SM)
-      L = translateSourceLocation(*SM, LangOpts, Loc);
+    if (Loc.hasManager())
+      L = translateSourceLocation(Loc.getManager(), LangOpts, Loc);
     else
       L = clang_getNullLocation();
     CurrentSet->appendDiagnostic(
diff --git a/interpreter/llvm/src/tools/clang/tools/libclang/CXCompilationDatabase.cpp b/interpreter/llvm/src/tools/clang/tools/libclang/CXCompilationDatabase.cpp
index c122ec8a6db4e..2ca532659d378 100644
--- a/interpreter/llvm/src/tools/clang/tools/libclang/CXCompilationDatabase.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/libclang/CXCompilationDatabase.cpp
@@ -145,36 +145,23 @@ clang_CompileCommand_getArg(CXCompileCommand CCmd, unsigned Arg)
 unsigned
 clang_CompileCommand_getNumMappedSources(CXCompileCommand CCmd)
 {
-  if (!CCmd)
-    return 0;
-
-  return static_cast(CCmd)->MappedSources.size();
+  // Left here for backward compatibility. No mapped sources exists in the C++
+  // backend anymore.
+  return 0;
 }
 
 CXString
 clang_CompileCommand_getMappedSourcePath(CXCompileCommand CCmd, unsigned I)
 {
-  if (!CCmd)
-    return cxstring::createNull();
-
-  CompileCommand *Cmd = static_cast(CCmd);
-
-  if (I >= Cmd->MappedSources.size())
-    return cxstring::createNull();
-
-  return cxstring::createRef(Cmd->MappedSources[I].first.c_str());
+  // Left here for backward compatibility. No mapped sources exists in the C++
+  // backend anymore.
+  return cxstring::createNull();
 }
 
 CXString
 clang_CompileCommand_getMappedSourceContent(CXCompileCommand CCmd, unsigned I)
 {
-  if (!CCmd)
-    return cxstring::createNull();
-
-  CompileCommand *Cmd = static_cast(CCmd);
-
-  if (I >= Cmd->MappedSources.size())
-    return cxstring::createNull();
-
-  return cxstring::createRef(Cmd->MappedSources[I].second.c_str());
+  // Left here for backward compatibility. No mapped sources exists in the C++
+  // backend anymore.
+  return cxstring::createNull();
 }
diff --git a/interpreter/llvm/src/tools/clang/tools/libclang/CXIndexDataConsumer.cpp b/interpreter/llvm/src/tools/clang/tools/libclang/CXIndexDataConsumer.cpp
index 9cd5ff4f505f6..a2ef68be49dec 100644
--- a/interpreter/llvm/src/tools/clang/tools/libclang/CXIndexDataConsumer.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/libclang/CXIndexDataConsumer.cpp
@@ -423,11 +423,13 @@ bool CXIndexDataConsumer::isFunctionLocalDecl(const Decl *D) {
   if (const NamedDecl *ND = dyn_cast(D)) {
     switch (ND->getFormalLinkage()) {
     case NoLinkage:
-    case VisibleNoLinkage:
     case InternalLinkage:
       return true;
+    case VisibleNoLinkage:
+    case ModuleInternalLinkage:
     case UniqueExternalLinkage:
       llvm_unreachable("Not a sema linkage");
+    case ModuleLinkage:
     case ExternalLinkage:
       return false;
     }
diff --git a/interpreter/llvm/src/tools/clang/tools/libclang/CXType.cpp b/interpreter/llvm/src/tools/clang/tools/libclang/CXType.cpp
index 16e993e2ac013..d2cb509059156 100644
--- a/interpreter/llvm/src/tools/clang/tools/libclang/CXType.cpp
+++ b/interpreter/llvm/src/tools/clang/tools/libclang/CXType.cpp
@@ -21,6 +21,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/AddressSpaces.h"
 #include "clang/Frontend/ASTUnit.h"
 
 using namespace clang;
@@ -59,6 +60,13 @@ static CXTypeKind GetBuiltinTypeKind(const BuiltinType *BT) {
     BTCASE(ObjCId);
     BTCASE(ObjCClass);
     BTCASE(ObjCSel);
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) BTCASE(Id);
+#include "clang/Basic/OpenCLImageTypes.def"
+#undef IMAGE_TYPE
+    BTCASE(OCLSampler);
+    BTCASE(OCLEvent);
+    BTCASE(OCLQueue);
+    BTCASE(OCLReserveID);
   default:
     return CXType_Unexposed;
   }
@@ -94,6 +102,7 @@ static CXTypeKind GetTypeKind(QualType T) {
     TKCASE(MemberPointer);
     TKCASE(Auto);
     TKCASE(Elaborated);
+    TKCASE(Pipe);
     default:
       return CXType_Unexposed;
   }
@@ -386,6 +395,27 @@ unsigned clang_isRestrictQualifiedType(CXType CT) {
   return T.isLocalRestrictQualified();
 }
 
+unsigned clang_getAddressSpace(CXType CT) {
+  QualType T = GetQualType(CT);
+
+  // For non language-specific address space, use separate helper function.
+  if (T.getAddressSpace() >= LangAS::FirstTargetAddressSpace) {
+    return T.getQualifiers().getAddressSpaceAttributePrintValue();
+  }
+  return T.getAddressSpace();
+}
+
+CXString clang_getTypedefName(CXType CT) {
+  QualType T = GetQualType(CT);
+  const TypedefType *TT = T->getAs();
+  if (TT) {
+    TypedefNameDecl *TD = TT->getDecl();
+    if (TD)
+      return cxstring::createDup(TD->getNameAsString().c_str());
+  }
+  return cxstring::createEmpty();
+}
+
 CXType clang_getPointeeType(CXType CT) {
   QualType T = GetQualType(CT);
   const Type *TP = T.getTypePtrOrNull();
@@ -535,6 +565,14 @@ CXString clang_getTypeKindSpelling(enum CXTypeKind K) {
     TKIND(MemberPointer);
     TKIND(Auto);
     TKIND(Elaborated);
+    TKIND(Pipe);
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) TKIND(Id);
+#include "clang/Basic/OpenCLImageTypes.def"
+#undef IMAGE_TYPE
+    TKIND(OCLSampler);
+    TKIND(OCLEvent);
+    TKIND(OCLQueue);
+    TKIND(OCLReserveID);
   }
 #undef TKIND
   return cxstring::createRef(s);
@@ -573,7 +611,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
       TCALLINGCONV(X86Pascal);
       TCALLINGCONV(X86RegCall);
       TCALLINGCONV(X86VectorCall);
-      TCALLINGCONV(X86_64Win64);
+      TCALLINGCONV(Win64);
       TCALLINGCONV(X86_64SysV);
       TCALLINGCONV(AAPCS);
       TCALLINGCONV(AAPCS_VFP);
@@ -646,6 +684,24 @@ CXType clang_getCursorResultType(CXCursor C) {
   return MakeCXType(QualType(), cxcursor::getCursorTU(C));
 }
 
+int clang_getExceptionSpecificationType(CXType X) {
+  QualType T = GetQualType(X);
+  if (T.isNull())
+    return -1;
+
+  if (const auto *FD = T->getAs())
+    return static_cast(FD->getExceptionSpecType());
+
+  return -1;
+}
+
+int clang_getCursorExceptionSpecificationType(CXCursor C) {
+  if (clang_isDeclaration(C.kind))
+    return clang_getExceptionSpecificationType(clang_getCursorType(C));
+
+  return -1;
+}
+
 unsigned clang_isPODType(CXType X) {
   QualType T = GetQualType(X);
   if (T.isNull())
diff --git a/interpreter/llvm/src/tools/clang/tools/libclang/libclang.exports b/interpreter/llvm/src/tools/clang/tools/libclang/libclang.exports
index d9a406e5741b1..e0d178a5291a3 100644
--- a/interpreter/llvm/src/tools/clang/tools/libclang/libclang.exports
+++ b/interpreter/llvm/src/tools/clang/tools/libclang/libclang.exports
@@ -12,6 +12,7 @@ clang_CXXMethod_isConst
 clang_CXXMethod_isPureVirtual
 clang_CXXMethod_isStatic
 clang_CXXMethod_isVirtual
+clang_EnumDecl_isScoped
 clang_Cursor_getArgument
 clang_Cursor_getNumTemplateArguments
 clang_Cursor_getTemplateArgumentKind
@@ -147,6 +148,7 @@ clang_findReferencesInFile
 clang_findReferencesInFileWithBlock
 clang_formatDiagnostic
 clang_free
+clang_getAddressSpace
 clang_getAllSkippedRanges
 clang_getArgType
 clang_getArrayElementType
@@ -174,6 +176,7 @@ clang_getCursorCompletionString
 clang_getCursorDefinition
 clang_getCursorDisplayName
 clang_getCursorExtent
+clang_getCursorExceptionSpecificationType
 clang_getCursorKind
 clang_getCursorKindSpelling
 clang_getCursorLanguage
@@ -209,6 +212,7 @@ clang_getElementType
 clang_getEnumConstantDeclUnsignedValue
 clang_getEnumConstantDeclValue
 clang_getEnumDeclIntegerType
+clang_getExceptionSpecificationType
 clang_getFieldDeclBitWidth
 clang_getExpansionLocation
 clang_getFile
@@ -259,6 +263,7 @@ clang_getTypeDeclaration
 clang_getTypeKindSpelling
 clang_getTypeSpelling
 clang_getTypedefDeclUnderlyingType
+clang_getTypedefName
 clang_hashCursor
 clang_indexLoc_getCXSourceLocation
 clang_indexLoc_getFileLocation
@@ -305,6 +310,7 @@ clang_remap_getFilenames
 clang_remap_getNumFiles
 clang_reparseTranslationUnit
 clang_saveTranslationUnit
+clang_suspendTranslationUnit
 clang_sortCodeCompletionResults
 clang_toggleCrashRecovery
 clang_tokenize
diff --git a/interpreter/llvm/src/tools/clang/tools/scan-build-py/libscanbuild/analyze.py b/interpreter/llvm/src/tools/clang/tools/scan-build-py/libscanbuild/analyze.py
index a09c72389d762..b5614b5b6da05 100644
--- a/interpreter/llvm/src/tools/clang/tools/scan-build-py/libscanbuild/analyze.py
+++ b/interpreter/llvm/src/tools/clang/tools/scan-build-py/libscanbuild/analyze.py
@@ -249,7 +249,7 @@ def prefix_with(constant, pieces):
     if args.output_format:
         result.append('-analyzer-output={0}'.format(args.output_format))
     if args.analyzer_config:
-        result.append(args.analyzer_config)
+        result.extend(['-analyzer-config', args.analyzer_config])
     if args.verbose >= 4:
         result.append('-analyzer-display-progress')
     if args.plugins:
diff --git a/interpreter/llvm/src/tools/clang/tools/scan-build/libexec/ccc-analyzer b/interpreter/llvm/src/tools/clang/tools/scan-build/libexec/ccc-analyzer
index bfda1d326f904..b0ec7e7e7487e 100755
--- a/interpreter/llvm/src/tools/clang/tools/scan-build/libexec/ccc-analyzer
+++ b/interpreter/llvm/src/tools/clang/tools/scan-build/libexec/ccc-analyzer
@@ -385,7 +385,8 @@ my %CompilerLinkerOptionMap = (
   '-target' => 1,
   '-v' => 0,
   '-mmacosx-version-min' => 0, # This is really a 1 argument, but always has '='
-  '-miphoneos-version-min' => 0 # This is really a 1 argument, but always has '='
+  '-miphoneos-version-min' => 0, # This is really a 1 argument, but always has '='
+  '--target' => 0
 );
 
 my %IgnoredOptionMap = (
diff --git a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangAttrEmitter.cpp b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
index c326292e7d074..08f76f65d1518 100644
--- a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -312,7 +312,7 @@ namespace {
     }
 
     void writeDump(raw_ostream &OS) const override {
-      if (type == "FunctionDecl *") {
+      if (type == "FunctionDecl *" || type == "NamedDecl *") {
         OS << "    OS << \" \";\n";
         OS << "    dumpBareDeclRef(SA->get" << getUpperName() << "());\n"; 
       } else if (type == "IdentifierInfo *") {
@@ -719,9 +719,9 @@ namespace {
   };
 
   // Unique the enums, but maintain the original declaration ordering.
-  std::vector
-  uniqueEnumsInOrder(const std::vector &enums) {
-    std::vector uniques;
+  std::vector
+  uniqueEnumsInOrder(const std::vector &enums) {
+    std::vector uniques;
     SmallDenseSet unique_set;
     for (const auto &i : enums) {
       if (unique_set.insert(i).second)
@@ -732,7 +732,8 @@ namespace {
 
   class EnumArgument : public Argument {
     std::string type;
-    std::vector values, enums, uniques;
+    std::vector values, enums, uniques;
+
   public:
     EnumArgument(const Record &Arg, StringRef Attr)
       : Argument(Arg, Attr), type(Arg.getValueAsString("Type")),
@@ -851,7 +852,7 @@ namespace {
   
   class VariadicEnumArgument: public VariadicArgument {
     std::string type, QualifiedTypeName;
-    std::vector values, enums, uniques;
+    std::vector values, enums, uniques;
 
   protected:
     void writeValueImpl(raw_ostream &OS) const override {
@@ -1182,6 +1183,8 @@ createArgument(const Record &Arg, StringRef Attr,
     Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "FunctionArgument")
     Ptr = llvm::make_unique(Arg, Attr, "FunctionDecl *");
+  else if (ArgName == "NamedArgument")
+    Ptr = llvm::make_unique(Arg, Attr, "NamedDecl *");
   else if (ArgName == "IdentifierArgument")
     Ptr = llvm::make_unique(Arg, Attr, "IdentifierInfo *");
   else if (ArgName == "DefaultBoolArgument")
@@ -1590,8 +1593,9 @@ struct AttributeSubjectMatchRule {
   }
 
   std::string getEnumValueName() const {
-    std::string Result =
-        "SubjectMatchRule_" + MetaSubject->getValueAsString("Name");
+    SmallString<128> Result;
+    Result += "SubjectMatchRule_";
+    Result += MetaSubject->getValueAsString("Name");
     if (isSubRule()) {
       Result += "_";
       if (isNegatedSubRule())
@@ -1600,7 +1604,7 @@ struct AttributeSubjectMatchRule {
     }
     if (isAbstractRule())
       Result += "_abstract";
-    return Result;
+    return Result.str();
   }
 
   std::string getEnumValue() const { return "attr::" + getEnumValueName(); }
@@ -2602,7 +2606,7 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) {
 // append a unique suffix to distinguish this set of target checks from other
 // TargetSpecificAttr records.
 static void GenerateTargetSpecificAttrChecks(const Record *R,
-                                             std::vector &Arches,
+                                             std::vector &Arches,
                                              std::string &Test,
                                              std::string *FnName) {
   // It is assumed that there will be an llvm::Triple object
@@ -2612,8 +2616,9 @@ static void GenerateTargetSpecificAttrChecks(const Record *R,
   Test += "(";
 
   for (auto I = Arches.begin(), E = Arches.end(); I != E; ++I) {
-    std::string Part = *I;
-    Test += "T.getArch() == llvm::Triple::" + Part;
+    StringRef Part = *I;
+    Test += "T.getArch() == llvm::Triple::";
+    Test += Part;
     if (I + 1 != E)
       Test += " || ";
     if (FnName)
@@ -2626,11 +2631,12 @@ static void GenerateTargetSpecificAttrChecks(const Record *R,
     // We know that there was at least one arch test, so we need to and in the
     // OS tests.
     Test += " && (";
-    std::vector OSes = R->getValueAsListOfStrings("OSes");
+    std::vector OSes = R->getValueAsListOfStrings("OSes");
     for (auto I = OSes.begin(), E = OSes.end(); I != E; ++I) {
-      std::string Part = *I;
+      StringRef Part = *I;
 
-      Test += "T.getOS() == llvm::Triple::" + Part;
+      Test += "T.getOS() == llvm::Triple::";
+      Test += Part;
       if (I + 1 != E)
         Test += " || ";
       if (FnName)
@@ -2642,10 +2648,11 @@ static void GenerateTargetSpecificAttrChecks(const Record *R,
   // If one or more CXX ABIs are specified, check those as well.
   if (!R->isValueUnset("CXXABIs")) {
     Test += " && (";
-    std::vector CXXABIs = R->getValueAsListOfStrings("CXXABIs");
+    std::vector CXXABIs = R->getValueAsListOfStrings("CXXABIs");
     for (auto I = CXXABIs.begin(), E = CXXABIs.end(); I != E; ++I) {
-      std::string Part = *I;
-      Test += "Target.getCXXABI().getKind() == TargetCXXABI::" + Part;
+      StringRef Part = *I;
+      Test += "Target.getCXXABI().getKind() == TargetCXXABI::";
+      Test += Part;
       if (I + 1 != E)
         Test += " || ";
       if (FnName)
@@ -2683,7 +2690,7 @@ static void GenerateHasAttrSpellingStringSwitch(
     std::string Test;
     if (Attr->isSubClassOf("TargetSpecificAttr")) {
       const Record *R = Attr->getValueAsDef("Target");
-      std::vector Arches = R->getValueAsListOfStrings("Arches");
+      std::vector Arches = R->getValueAsListOfStrings("Arches");
       GenerateTargetSpecificAttrChecks(R, Arches, Test, nullptr);
 
       // If this is the C++11 variety, also add in the LangOpts test.
@@ -3104,6 +3111,8 @@ static std::string CalculateDiagnostic(const Record &S) {
              "            : ExpectedVariableOrFunction))";
 
     case ObjCMethod | ObjCProp: return "ExpectedMethodOrProperty";
+    case Func | ObjCMethod | ObjCProp:
+      return "ExpectedFunctionOrMethodOrProperty";
     case ObjCProtocol | ObjCInterface:
       return "ExpectedObjectiveCInterfaceOrProtocol";
     case Field | Var: return "ExpectedFieldOrGlobalVar";
@@ -3320,7 +3329,7 @@ static std::string GenerateTargetRequirements(const Record &Attr,
 
   // Get the list of architectures to be tested for.
   const Record *R = Attr.getValueAsDef("Target");
-  std::vector Arches = R->getValueAsListOfStrings("Arches");
+  std::vector Arches = R->getValueAsListOfStrings("Arches");
   if (Arches.empty()) {
     PrintError(Attr.getLoc(), "Empty list of target architectures for a "
                               "target-specific attr");
@@ -3337,9 +3346,10 @@ static std::string GenerateTargetRequirements(const Record &Attr,
     std::string APK = Attr.getValueAsString("ParseKind");
     for (const auto &I : Dupes) {
       if (I.first == APK) {
-        std::vector DA = I.second->getValueAsDef("Target")
-                                          ->getValueAsListOfStrings("Arches");
-        std::move(DA.begin(), DA.end(), std::back_inserter(Arches));
+        std::vector DA =
+            I.second->getValueAsDef("Target")->getValueAsListOfStrings(
+                "Arches");
+        Arches.insert(Arches.end(), DA.begin(), DA.end());
       }
     }
   }
diff --git a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
index cad08afd846f2..d9d99e0bb0029 100644
--- a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -1277,8 +1277,8 @@ void EmitClangDiagDocs(RecordKeeper &Records, raw_ostream &OS) {
     bool IsSynonym = GroupInfo.DiagsInGroup.empty() &&
                      GroupInfo.SubGroups.size() == 1;
 
-    writeHeader((IsRemarkGroup ? "-R" : "-W") +
-                    G->getValueAsString("GroupName"),
+    writeHeader(((IsRemarkGroup ? "-R" : "-W") +
+                    G->getValueAsString("GroupName")).str(),
                 OS);
 
     if (!IsSynonym) {
diff --git a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangOptionDocEmitter.cpp b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangOptionDocEmitter.cpp
index aa7502e2c8506..59314510e0ad3 100644
--- a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangOptionDocEmitter.cpp
+++ b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangOptionDocEmitter.cpp
@@ -83,7 +83,7 @@ Documentation extractDocumentation(RecordKeeper &Records) {
     }
 
     // Pretend no-X and Xno-Y options are aliases of X and XY.
-    auto Name = R->getValueAsString("Name");
+    std::string Name = R->getValueAsString("Name");
     if (Name.size() >= 4) {
       if (Name.substr(0, 3) == "no-" && OptionsByName[Name.substr(3)]) {
         Aliases[OptionsByName[Name.substr(3)]].push_back(R);
@@ -229,7 +229,7 @@ std::string getRSTStringWithTextFallback(const Record *R, StringRef Primary,
 }
 
 void emitOptionWithArgs(StringRef Prefix, const Record *Option,
-                        ArrayRef Args, raw_ostream &OS) {
+                        ArrayRef Args, raw_ostream &OS) {
   OS << Prefix << escapeRST(Option->getValueAsString("Name"));
 
   std::pair Separators =
@@ -261,14 +261,15 @@ void emitOptionName(StringRef Prefix, const Record *Option, raw_ostream &OS) {
     }
   }
 
-  emitOptionWithArgs(Prefix, Option, Args, OS);
+  emitOptionWithArgs(Prefix, Option, std::vector(Args.begin(), Args.end()), OS);
 
   auto AliasArgs = Option->getValueAsListOfStrings("AliasArgs");
   if (!AliasArgs.empty()) {
     Record *Alias = Option->getValueAsDef("Alias");
     OS << " (equivalent to ";
-    emitOptionWithArgs(Alias->getValueAsListOfStrings("Prefixes").front(),
-                       Alias, Option->getValueAsListOfStrings("AliasArgs"), OS);
+    emitOptionWithArgs(
+        Alias->getValueAsListOfStrings("Prefixes").front(), Alias,
+        AliasArgs, OS);
     OS << ")";
   }
 }
@@ -310,7 +311,7 @@ void emitOption(const DocumentedOption &Option, const Record *DocInfo,
   forEachOptionName(Option, DocInfo, [&](const Record *Option) {
     for (auto &Prefix : Option->getValueAsListOfStrings("Prefixes"))
       SphinxOptionIDs.push_back(
-          getSphinxOptionID(Prefix + Option->getValueAsString("Name")));
+          getSphinxOptionID((Prefix + Option->getValueAsString("Name")).str()));
   });
   assert(!SphinxOptionIDs.empty() && "no flags for option");
   static std::map NextSuffix;
diff --git a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangSACheckersEmitter.cpp b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangSACheckersEmitter.cpp
index 115527ae33036..8f3de0b67d77d 100644
--- a/interpreter/llvm/src/tools/clang/utils/TableGen/ClangSACheckersEmitter.cpp
+++ b/interpreter/llvm/src/tools/clang/utils/TableGen/ClangSACheckersEmitter.cpp
@@ -51,7 +51,8 @@ static std::string getParentPackageFullName(const Record *R) {
 static std::string getPackageFullName(const Record *R) {
   std::string name = getParentPackageFullName(R);
   if (!name.empty()) name += ".";
-  return name + R->getValueAsString("PackageName");
+  name += R->getValueAsString("PackageName");
+  return name;
 }
 
 static std::string getCheckerFullName(const Record *R) {
diff --git a/interpreter/llvm/src/tools/clang/utils/bash-autocomplete.sh b/interpreter/llvm/src/tools/clang/utils/bash-autocomplete.sh
new file mode 100644
index 0000000000000..ed815d2d7ad10
--- /dev/null
+++ b/interpreter/llvm/src/tools/clang/utils/bash-autocomplete.sh
@@ -0,0 +1,83 @@
+# Please add "source /path/to/bash-autocomplete.sh" to your .bashrc to use this.
+
+_clang_filedir()
+{
+  # _filedir function provided by recent versions of bash-completion package is
+  # better than "compgen -f" because the former honors spaces in pathnames while
+  # the latter doesn't. So we use compgen only when _filedir is not provided.
+  _filedir 2> /dev/null || COMPREPLY=( $( compgen -f ) )
+}
+
+_clang()
+{
+  local cur prev words cword arg flags w1 w2
+  # If latest bash-completion is not supported just initialize COMPREPLY and
+  # initialize variables by setting manualy.
+  _init_completion -n 2> /dev/null
+  if [[ "$?" != 0 ]]; then
+    COMPREPLY=()
+    cword=$COMP_CWORD
+    cur="${COMP_WORDS[$cword]}"
+  fi
+
+  w1="${COMP_WORDS[$cword - 1]}"
+  if [[ $cword > 1 ]]; then
+    w2="${COMP_WORDS[$cword - 2]}"
+  fi
+
+  # Clang want to know if -cc1 or -Xclang option is specified or not, because we don't want to show
+  # cc1 options otherwise.
+  if [[ "${COMP_WORDS[1]}" == "-cc1" || "$w1" == "-Xclang" ]]; then
+    arg="#"
+  fi
+
+  # bash always separates '=' as a token even if there's no space before/after '='.
+  # On the other hand, '=' is just a regular character for clang options that
+  # contain '='. For example, "-stdlib=" is defined as is, instead of "-stdlib" and "=".
+  # So, we need to partially undo bash tokenization here for integrity.
+  if [[ "$cur" == -* ]]; then
+    # -foo
+    arg="$arg$cur"
+  elif [[ "$w1" == -*  && "$cur" == '=' ]]; then
+    # -foo=
+    arg="$arg$w1=,"
+  elif [[ "$cur" == -*= ]]; then
+    # -foo=
+    arg="$arg$cur,"
+  elif [[ "$w1" == -* ]]; then
+    # -foo  or -foo bar
+    arg="$arg$w1,$cur"
+  elif [[ "$w2" == -* && "$w1" == '=' ]]; then
+    # -foo=bar
+    arg="$arg$w2=,$cur"
+  elif [[ ${cur: -1} != '=' && ${cur/=} != $cur ]]; then
+    # -foo=bar
+    arg="$arg${cur%=*}=,${cur#*=}"
+  fi
+
+  # expand ~ to $HOME
+  eval local path=${COMP_WORDS[0]}
+  flags=$( "$path" --autocomplete="$arg" 2>/dev/null | sed -e 's/\t.*//' )
+  # If clang is old that it does not support --autocomplete,
+  # fall back to the filename completion.
+  if [[ "$?" != 0 ]]; then
+    _clang_filedir
+    return
+  fi
+
+  # When clang does not emit any possible autocompletion, or user pushed tab after " ",
+  # just autocomplete files.
+  if [[ "$flags" == "$(echo -e '\n')" || "$arg" == "" ]]; then
+    # If -foo= and there was no possible values, autocomplete files.
+    [[ "$cur" == '=' || "$cur" == -*= ]] && cur=""
+    _clang_filedir
+  elif [[ "$cur" == '=' ]]; then
+    COMPREPLY=( $( compgen -W "$flags" -- "") )
+  else
+    # Bash automatically appends a space after '=' by default.
+    # Disable it so that it works nicely for options in the form of -foo=bar.
+    [[ "${flags: -1}" == '=' ]] && compopt -o nospace 2> /dev/null
+    COMPREPLY=( $( compgen -W "$flags" -- "$cur" ) )
+  fi
+}
+complete -F _clang clang
diff --git a/interpreter/llvm/src/tools/clang/utils/perf-training/lit.cfg b/interpreter/llvm/src/tools/clang/utils/perf-training/lit.cfg
index edae551bed3fa..671d44f83b948 100644
--- a/interpreter/llvm/src/tools/clang/utils/perf-training/lit.cfg
+++ b/interpreter/llvm/src/tools/clang/utils/perf-training/lit.cfg
@@ -3,13 +3,14 @@
 from lit import Test
 import lit.formats
 import lit.util
+import subprocess
 
 def getSysrootFlagsOnDarwin(config, lit_config):
     # On Darwin, support relocatable SDKs by providing Clang with a
     # default system root path.
     if 'darwin' in config.target_triple:
         try:
-            out = lit.util.capture(['xcrun', '--show-sdk-path']).strip()
+            out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip()
             res = 0
         except OSError:
             res = -1
diff --git a/interpreter/llvm/src/tools/clang/utils/perf-training/order-files.lit.cfg b/interpreter/llvm/src/tools/clang/utils/perf-training/order-files.lit.cfg
index a4fd81232a46c..93904ec84a419 100644
--- a/interpreter/llvm/src/tools/clang/utils/perf-training/order-files.lit.cfg
+++ b/interpreter/llvm/src/tools/clang/utils/perf-training/order-files.lit.cfg
@@ -4,13 +4,14 @@ from lit import Test
 import lit.formats
 import lit.util
 import os
+import subprocess
 
 def getSysrootFlagsOnDarwin(config, lit_config):
     # On Darwin, support relocatable SDKs by providing Clang with a
     # default system root path.
     if 'darwin' in config.target_triple:
         try:
-            out = lit.util.capture(['xcrun', '--show-sdk-path']).strip()
+            out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip()
             res = 0
         except OSError:
             res = -1
diff --git a/interpreter/llvm/src/tools/clang/www/analyzer/alpha_checks.html b/interpreter/llvm/src/tools/clang/www/analyzer/alpha_checks.html
index ce9392b9960c6..7d84d23343f9b 100644
--- a/interpreter/llvm/src/tools/clang/www/analyzer/alpha_checks.html
+++ b/interpreter/llvm/src/tools/clang/www/analyzer/alpha_checks.html
@@ -24,6 +24,7 @@ 

Alpha Checkers

Bug reports are welcome but will likely not be investigated for some time. Patches welcome! + + +

Clone Alpha Checkers

+ + + + + + + +
Name, DescriptionExample
+

Core Alpha Checkers

@@ -52,6 +85,28 @@

Core Alpha Checkers

+ + + + + + + + + + + + + + +
@@ -187,19 +298,6 @@

C++ Alpha Checkers

Name, DescriptionExample - - - -
+ + + + @@ -675,52 +739,6 @@

Unix Alpha Checkers

}
- - - - -
- - - diff --git a/interpreter/llvm/src/tools/clang/www/analyzer/available_checks.html b/interpreter/llvm/src/tools/clang/www/analyzer/available_checks.html index 7707fc0150d5e..eca8dca616d27 100644 --- a/interpreter/llvm/src/tools/clang/www/analyzer/available_checks.html +++ b/interpreter/llvm/src/tools/clang/www/analyzer/available_checks.html @@ -38,12 +38,14 @@

Default Checkers

  • Core Checkers model core language features and perform general-purpose checks such as division by zero, null pointer dereference, usage of uninitialized values, etc.
  • C++ Checkers perform C++-specific checks
  • Dead Code Checkers check for unused code
  • +
  • Nullability Checkers
  • +
  • Optin Checkers
  • OS X Checkers perform Objective-C-specific checks and check the use of Apple's SDKs (OS X and iOS)
  • Security Checkers check for insecure API usage and perform checks based on the CERT Secure Coding Standards
  • Unix Checkers check the use of Unix and POSIX APIs
  • - +

    Core Checkers

    @@ -360,7 +362,7 @@

    Core Checkers

    - +

    C++ Checkers

    @@ -421,9 +423,21 @@

    C++ Checkers

    } + + +
    - +

    Dead Code Checkers

    @@ -444,7 +458,161 @@

    Dead Code Checkers

    - + +

    Nullability Checkers

    + + + + + + + + + + + + + + + + + + + + +
    Name, DescriptionExample
    + + +

    Optin Checkers

    + + + + + + + + + + + + + + + + +
    Name, DescriptionExample
    + +

    OS X Checkers

    @@ -465,6 +633,22 @@

    OS X Checkers

    + + + + + + + + + + + + + + + +
    - +

    Security Checkers

    @@ -995,7 +1274,7 @@

    Security Checkers

    - +

    Unix Checkers

    @@ -1187,6 +1466,38 @@

    Unix Checkers

    + + + + - - - + + + @@ -809,6 +871,11 @@

    Technical specifications and standing documents

    Clang 4 (P0096R3) + + +
    Available in Clang?
    SD-6: SG10 feature test recommendationsSD-6N/ASD-6: SG10 feature test recommendationsSD-6N/A Clang 3.4 (N3745)
    + Clang 5 (P0096R4) +